salesforce_chunker 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 63428f8b047e970e10dab25118c83b9445e6c79b7d75cf3fd6552a37d8379cd5
4
- data.tar.gz: 543260b73b9c91bed55e7788527553419bc83459d7917119e6b2286d74880a71
3
+ metadata.gz: b9aabe3e30bb91a83ba214f11c224f003869420e4f5e2bdaf0fd7cc89ad4b669
4
+ data.tar.gz: fcb13aaa42d91efbfffaa047b6c3f8e21d378fa2a318e5ec4ab4cb29736b52d2
5
5
  SHA512:
6
- metadata.gz: 918d7303b522c79d351901073ac415fdb273b1e3582a9c6565cdb4878e76fb0744023766b6cacf7792a30f840f9bfe52c989790ceb7dad8e5de080503f34ef22
7
- data.tar.gz: 28883b66685b1071a31b826ab23f166f4ec6dd42a86f98d5ae50ce590c3a38bb43ce6a374170408f522835edef009c20cdb01e5198d5091ba7e4c366eb95f7ca
6
+ metadata.gz: ff8e83600ff22d71455a5ff84fa1e997e7badbb2b4a6db5c885d12748ea8b6867a7adf3570dbc361bc9fbe233f1c22ed7bec9e411815dae8b38026082726609a
7
+ data.tar.gz: 12023fee018edb1550414fbb16fd918896142fed885970afed558b55fffcd50b13c692ec499ef07a6c7bd3a0c3d0ef50ffb9287fdaa9431e4e08032d76f9a388
data/.travis.yml CHANGED
@@ -2,4 +2,4 @@ sudo: false
2
2
  language: ruby
3
3
  rvm:
4
4
  - 2.5.0
5
- before_install: gem install bundler -v 1.16.1
5
+ before_install: gem install bundler -v 1.17.1
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 1.1.1 - 2018-11-26
4
+
5
+ - Reimplemented ManualChunkingQuery using CSV batch results.
6
+ - Changed sleeping and timeout error to only occur when no new results appear.
7
+ - Added more log info messages with regards to JSON parsing and yielding results.
8
+
3
9
  ## 1.1.0 - 2018-11-06
4
10
 
5
11
  - Added ManualChunkingQuery, which implements chunking within the gem for any Salesforce field.
data/Gemfile.lock CHANGED
@@ -1,17 +1,21 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- salesforce_chunker (1.1.0)
4
+ salesforce_chunker (1.1.1)
5
5
  httparty (~> 0.13)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
10
  coderay (1.1.2)
11
- httparty (0.16.2)
11
+ httparty (0.16.3)
12
+ mime-types (~> 3.0)
12
13
  multi_xml (>= 0.5.2)
13
14
  metaclass (0.0.4)
14
15
  method_source (0.9.0)
16
+ mime-types (3.2.2)
17
+ mime-types-data (~> 3.2015)
18
+ mime-types-data (3.2018.0812)
15
19
  minitest (5.11.3)
16
20
  mocha (1.5.0)
17
21
  metaclass (~> 0.0.1)
@@ -27,10 +31,10 @@ PLATFORMS
27
31
  DEPENDENCIES
28
32
  bundler (~> 1.16)
29
33
  minitest (~> 5.0)
30
- mocha (~> 1.5.0)
31
- pry (~> 0.11.1)
34
+ mocha (~> 1.5)
35
+ pry (~> 0.11)
32
36
  rake (~> 10.0)
33
37
  salesforce_chunker!
34
38
 
35
39
  BUNDLED WITH
36
- 1.16.6
40
+ 1.17.1
@@ -4,6 +4,7 @@ require "salesforce_chunker/job.rb"
4
4
  require "salesforce_chunker/single_batch_job.rb"
5
5
  require "salesforce_chunker/primary_key_chunking_query.rb"
6
6
  require "salesforce_chunker/manual_chunking_query.rb"
7
+ require "salesforce_chunker/manual_chunking_breakpoint_query.rb"
7
8
  require 'logger'
8
9
 
9
10
  module SalesforceChunker
@@ -43,6 +43,11 @@ module SalesforceChunker
43
43
  self.class.check_response_error(response.parsed_response)
44
44
  end
45
45
 
46
+ def get(url, headers={})
47
+ @log.info "GET: #{url}"
48
+ HTTParty.get(@base_url + url, headers: @default_headers.merge(headers)).body
49
+ end
50
+
46
51
  private
47
52
 
48
53
  def self.login_soap_request_body(username, password, security_token)
@@ -1,3 +1,5 @@
1
+ require "json"
2
+
1
3
  module SalesforceChunker
2
4
  class Job
3
5
  attr_reader :batches_count
@@ -27,20 +29,27 @@ module SalesforceChunker
27
29
  downloaded_batches = []
28
30
 
29
31
  loop do
32
+ results_downloaded = false
30
33
  @log.info "Retrieving batch status information"
31
34
  get_completed_batches.each do |batch|
32
35
  next if downloaded_batches.include?(batch["id"])
33
36
  @log.info "Batch #{downloaded_batches.length + 1} of #{@batches_count || '?'}: " \
34
37
  "retrieving #{batch["numberRecordsProcessed"]} records"
35
- get_batch_results(batch["id"]) { |result| yield(result) } if batch["numberRecordsProcessed"] > 0
38
+ if batch["numberRecordsProcessed"].to_i > 0
39
+ get_batch_results(batch["id"]) { |result| yield(result) }
40
+ results_downloaded = true
41
+ end
36
42
  downloaded_batches.append(batch["id"])
37
43
  end
38
44
 
39
45
  break if @batches_count && downloaded_batches.length == @batches_count
40
- raise TimeoutError, "Timeout during batch processing" if Time.now.utc > timeout_at
41
46
 
42
- @log.info "Waiting #{retry_seconds} seconds"
43
- sleep(retry_seconds)
47
+ unless results_downloaded
48
+ raise TimeoutError, "Timeout during batch processing" if Time.now.utc > timeout_at
49
+
50
+ @log.info "Waiting #{retry_seconds} seconds"
51
+ sleep(retry_seconds)
52
+ end
44
53
  end
45
54
 
46
55
  @log.info "Completed"
@@ -49,14 +58,20 @@ module SalesforceChunker
49
58
  def get_completed_batches
50
59
  get_batch_statuses.select do |batch|
51
60
  raise BatchError, "Batch failed: #{batch["stateMessage"]}" if batch["state"] == "Failed"
52
- raise RecordError, "Failed records in batch" if batch["state"] == "Completed" && batch["numberRecordsFailed"] > 0
61
+ raise RecordError, "Failed records in batch" if batch["state"] == "Completed" && batch["numberRecordsFailed"].to_i > 0
53
62
  batch["state"] == "Completed"
54
63
  end
55
64
  end
56
65
 
57
66
  def get_batch_results(batch_id)
58
67
  retrieve_batch_results(batch_id).each do |result_id|
59
- retrieve_results(batch_id, result_id).each do |result|
68
+ results = retrieve_raw_results(batch_id, result_id)
69
+
70
+ @log.info "Parsing JSON response"
71
+ parsed_results = JSON.parse(results)
72
+
73
+ @log.info "Yielding records"
74
+ parsed_results.each do |result|
60
75
  result.tap { |h| h.delete("attributes") }
61
76
  yield(result)
62
77
  end
@@ -85,6 +100,10 @@ module SalesforceChunker
85
100
  @connection.get_json("job/#{@job_id}/batch/#{batch_id}/result/#{result_id}")
86
101
  end
87
102
 
103
+ def retrieve_raw_results(batch_id, result_id)
104
+ @connection.get("job/#{@job_id}/batch/#{batch_id}/result/#{result_id}")
105
+ end
106
+
88
107
  def close
89
108
  body = {"state": "Closed"}
90
109
  @connection.post_json("job/#{@job_id}/", body)
@@ -96,7 +115,7 @@ module SalesforceChunker
96
115
  body = {
97
116
  "operation": @operation,
98
117
  "object": object,
99
- "contentType": "JSON",
118
+ "contentType": options[:content_type] || "JSON",
100
119
  }
101
120
  body[:externalIdFieldName] = options[:external_id] if @operation == "upsert"
102
121
  @connection.post_json("job", body, options[:headers].to_h)["id"]
@@ -0,0 +1,60 @@
1
+ module SalesforceChunker
2
+ class ManualChunkingBreakpointQuery < Job
3
+
4
+ def initialize(connection:, object:, operation:, query:, **options)
5
+ @batch_size = options[:batch_size] || 100000
6
+ super(connection: connection, object: object, operation: operation, **options)
7
+
8
+ create_batch(query)
9
+ @batches_count = 1
10
+
11
+ close
12
+ end
13
+
14
+ def get_batch_results(batch_id)
15
+ retrieve_batch_results(batch_id).each do |result_id|
16
+ results = retrieve_raw_results(batch_id, result_id)
17
+
18
+ @log.info "Generating breakpoints from CSV results"
19
+ process_csv_results(results) { |result| yield result }
20
+ end
21
+ end
22
+
23
+ def process_csv_results(result)
24
+ lines = result.each_line
25
+ headers = lines.next
26
+
27
+ loop do
28
+ @batch_size.times { lines.next }
29
+ yield(lines.peek.chomp.gsub("\"", ""))
30
+ end
31
+ rescue StopIteration
32
+ nil
33
+ end
34
+
35
+ def create_batch(payload)
36
+ @log.info "Creating Id Batch: \"#{payload.gsub(/\n/, " ").strip}\""
37
+ response = @connection.post("job/#{@job_id}/batch", payload.to_s, {"Content-Type": "text/csv"})
38
+ response["batchInfo"]["id"]
39
+ end
40
+
41
+ def retrieve_batch_results(batch_id)
42
+ # XML to JSON wrangling
43
+ response = super(batch_id)
44
+ if response["result_list"]["result"].is_a? Array
45
+ response["result_list"]["result"]
46
+ else
47
+ [response["result_list"]["result"]]
48
+ end
49
+ end
50
+
51
+ def get_batch_statuses
52
+ # XML to JSON wrangling
53
+ [@connection.get_json("job/#{@job_id}/batch")["batchInfoList"]["batchInfo"]]
54
+ end
55
+
56
+ def create_job(object, options)
57
+ super(object, options.merge(content_type: "CSV"))
58
+ end
59
+ end
60
+ end
@@ -2,14 +2,23 @@ module SalesforceChunker
2
2
  class ManualChunkingQuery < Job
3
3
 
4
4
  def initialize(connection:, object:, operation:, query:, **options)
5
+ @log = options.delete(:logger) || Logger.new(options[:log_output])
6
+ @log.progname = "salesforce_chunker"
5
7
  batch_size = options[:batch_size] || 100000
6
8
  where_clause = self.class.query_where_clause(query)
7
9
 
8
- super(connection: connection, object: object, operation: operation, **options)
9
10
  @log.info "Using Manual Chunking"
10
-
11
- @log.info "Retrieving Ids from records"
12
- breakpoints = breakpoints(object, where_clause, batch_size)
11
+ breakpoint_creation_job = SalesforceChunker::ManualChunkingBreakpointQuery.new(
12
+ connection: connection,
13
+ object: object,
14
+ operation: operation,
15
+ logger: @log,
16
+ batch_size: batch_size,
17
+ query: "Select Id From #{object} #{where_clause} Order By Id Asc",
18
+ )
19
+ breakpoints = breakpoint_creation_job.download_results(retry_seconds: 10).to_a
20
+
21
+ super(connection: connection, object: object, operation: operation, logger: @log, **options)
13
22
 
14
23
  @log.info "Creating Query Batches"
15
24
  create_batches(query, breakpoints, where_clause)
@@ -17,21 +26,6 @@ module SalesforceChunker
17
26
  close
18
27
  end
19
28
 
20
- def get_batch_statuses
21
- batches = super
22
- batches.delete_if { |batch| batch["id"] == @initial_batch_id && batches.count > 1 }
23
- end
24
-
25
- def breakpoints(object, where_clause, batch_size)
26
- @batches_count = 1
27
- @initial_batch_id = create_batch("Select Id From #{object} #{where_clause} Order By Id Asc")
28
-
29
- download_results(retry_seconds: 10)
30
- .with_index
31
- .select { |_, i| i % batch_size == 0 && i != 0 }
32
- .map { |result, _| result["Id"] }
33
- end
34
-
35
29
  def create_batches(query, breakpoints, where_clause)
36
30
  if breakpoints.empty?
37
31
  create_batch(query)
@@ -1,3 +1,3 @@
1
1
  module SalesforceChunker
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
  end
@@ -25,6 +25,6 @@ Gem::Specification.new do |spec|
25
25
  spec.add_development_dependency "bundler", "~> 1.16"
26
26
  spec.add_development_dependency "rake", "~> 10.0"
27
27
  spec.add_development_dependency "minitest", "~> 5.0"
28
- spec.add_development_dependency "mocha", "~> 1.5.0"
29
- spec.add_development_dependency "pry", "~> 0.11.1"
28
+ spec.add_development_dependency "mocha", "~> 1.5"
29
+ spec.add_development_dependency "pry", "~> 0.11"
30
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: salesforce_chunker
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Curtis Holmes
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-11-07 00:00:00.000000000 Z
11
+ date: 2018-11-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -72,28 +72,28 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.5.0
75
+ version: '1.5'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.5.0
82
+ version: '1.5'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: pry
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 0.11.1
89
+ version: '0.11'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 0.11.1
96
+ version: '0.11'
97
97
  description: Salesforce client and extractor designed for handling large amounts of
98
98
  data
99
99
  email:
@@ -117,6 +117,7 @@ files:
117
117
  - lib/salesforce_chunker/connection.rb
118
118
  - lib/salesforce_chunker/exceptions.rb
119
119
  - lib/salesforce_chunker/job.rb
120
+ - lib/salesforce_chunker/manual_chunking_breakpoint_query.rb
120
121
  - lib/salesforce_chunker/manual_chunking_query.rb
121
122
  - lib/salesforce_chunker/primary_key_chunking_query.rb
122
123
  - lib/salesforce_chunker/single_batch_job.rb