salesforce_chunker 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +9 -5
- data/lib/salesforce_chunker.rb +1 -0
- data/lib/salesforce_chunker/connection.rb +5 -0
- data/lib/salesforce_chunker/job.rb +26 -7
- data/lib/salesforce_chunker/manual_chunking_breakpoint_query.rb +60 -0
- data/lib/salesforce_chunker/manual_chunking_query.rb +13 -19
- data/lib/salesforce_chunker/version.rb +1 -1
- data/salesforce_chunker.gemspec +2 -2
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9aabe3e30bb91a83ba214f11c224f003869420e4f5e2bdaf0fd7cc89ad4b669
|
4
|
+
data.tar.gz: fcb13aaa42d91efbfffaa047b6c3f8e21d378fa2a318e5ec4ab4cb29736b52d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff8e83600ff22d71455a5ff84fa1e997e7badbb2b4a6db5c885d12748ea8b6867a7adf3570dbc361bc9fbe233f1c22ed7bec9e411815dae8b38026082726609a
|
7
|
+
data.tar.gz: 12023fee018edb1550414fbb16fd918896142fed885970afed558b55fffcd50b13c692ec499ef07a6c7bd3a0c3d0ef50ffb9287fdaa9431e4e08032d76f9a388
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## 1.1.1 - 2018-11-26
|
4
|
+
|
5
|
+
- Reimplemented ManualChunkingQuery using CSV batch results.
|
6
|
+
- Changed sleeping and timeout error to only occur when no new results appear.
|
7
|
+
- Added more log info messages with regards to JSON parsing and yielding results.
|
8
|
+
|
3
9
|
## 1.1.0 - 2018-11-06
|
4
10
|
|
5
11
|
- Added ManualChunkingQuery, which implements chunking within the gem for any Salesforce field.
|
data/Gemfile.lock
CHANGED
@@ -1,17 +1,21 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
salesforce_chunker (1.1.
|
4
|
+
salesforce_chunker (1.1.1)
|
5
5
|
httparty (~> 0.13)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
10
|
coderay (1.1.2)
|
11
|
-
httparty (0.16.
|
11
|
+
httparty (0.16.3)
|
12
|
+
mime-types (~> 3.0)
|
12
13
|
multi_xml (>= 0.5.2)
|
13
14
|
metaclass (0.0.4)
|
14
15
|
method_source (0.9.0)
|
16
|
+
mime-types (3.2.2)
|
17
|
+
mime-types-data (~> 3.2015)
|
18
|
+
mime-types-data (3.2018.0812)
|
15
19
|
minitest (5.11.3)
|
16
20
|
mocha (1.5.0)
|
17
21
|
metaclass (~> 0.0.1)
|
@@ -27,10 +31,10 @@ PLATFORMS
|
|
27
31
|
DEPENDENCIES
|
28
32
|
bundler (~> 1.16)
|
29
33
|
minitest (~> 5.0)
|
30
|
-
mocha (~> 1.5
|
31
|
-
pry (~> 0.11
|
34
|
+
mocha (~> 1.5)
|
35
|
+
pry (~> 0.11)
|
32
36
|
rake (~> 10.0)
|
33
37
|
salesforce_chunker!
|
34
38
|
|
35
39
|
BUNDLED WITH
|
36
|
-
1.
|
40
|
+
1.17.1
|
data/lib/salesforce_chunker.rb
CHANGED
@@ -4,6 +4,7 @@ require "salesforce_chunker/job.rb"
|
|
4
4
|
require "salesforce_chunker/single_batch_job.rb"
|
5
5
|
require "salesforce_chunker/primary_key_chunking_query.rb"
|
6
6
|
require "salesforce_chunker/manual_chunking_query.rb"
|
7
|
+
require "salesforce_chunker/manual_chunking_breakpoint_query.rb"
|
7
8
|
require 'logger'
|
8
9
|
|
9
10
|
module SalesforceChunker
|
@@ -43,6 +43,11 @@ module SalesforceChunker
|
|
43
43
|
self.class.check_response_error(response.parsed_response)
|
44
44
|
end
|
45
45
|
|
46
|
+
def get(url, headers={})
|
47
|
+
@log.info "GET: #{url}"
|
48
|
+
HTTParty.get(@base_url + url, headers: @default_headers.merge(headers)).body
|
49
|
+
end
|
50
|
+
|
46
51
|
private
|
47
52
|
|
48
53
|
def self.login_soap_request_body(username, password, security_token)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require "json"
|
2
|
+
|
1
3
|
module SalesforceChunker
|
2
4
|
class Job
|
3
5
|
attr_reader :batches_count
|
@@ -27,20 +29,27 @@ module SalesforceChunker
|
|
27
29
|
downloaded_batches = []
|
28
30
|
|
29
31
|
loop do
|
32
|
+
results_downloaded = false
|
30
33
|
@log.info "Retrieving batch status information"
|
31
34
|
get_completed_batches.each do |batch|
|
32
35
|
next if downloaded_batches.include?(batch["id"])
|
33
36
|
@log.info "Batch #{downloaded_batches.length + 1} of #{@batches_count || '?'}: " \
|
34
37
|
"retrieving #{batch["numberRecordsProcessed"]} records"
|
35
|
-
|
38
|
+
if batch["numberRecordsProcessed"].to_i > 0
|
39
|
+
get_batch_results(batch["id"]) { |result| yield(result) }
|
40
|
+
results_downloaded = true
|
41
|
+
end
|
36
42
|
downloaded_batches.append(batch["id"])
|
37
43
|
end
|
38
44
|
|
39
45
|
break if @batches_count && downloaded_batches.length == @batches_count
|
40
|
-
raise TimeoutError, "Timeout during batch processing" if Time.now.utc > timeout_at
|
41
46
|
|
42
|
-
|
43
|
-
|
47
|
+
unless results_downloaded
|
48
|
+
raise TimeoutError, "Timeout during batch processing" if Time.now.utc > timeout_at
|
49
|
+
|
50
|
+
@log.info "Waiting #{retry_seconds} seconds"
|
51
|
+
sleep(retry_seconds)
|
52
|
+
end
|
44
53
|
end
|
45
54
|
|
46
55
|
@log.info "Completed"
|
@@ -49,14 +58,20 @@ module SalesforceChunker
|
|
49
58
|
def get_completed_batches
|
50
59
|
get_batch_statuses.select do |batch|
|
51
60
|
raise BatchError, "Batch failed: #{batch["stateMessage"]}" if batch["state"] == "Failed"
|
52
|
-
raise RecordError, "Failed records in batch" if batch["state"] == "Completed" && batch["numberRecordsFailed"] > 0
|
61
|
+
raise RecordError, "Failed records in batch" if batch["state"] == "Completed" && batch["numberRecordsFailed"].to_i > 0
|
53
62
|
batch["state"] == "Completed"
|
54
63
|
end
|
55
64
|
end
|
56
65
|
|
57
66
|
def get_batch_results(batch_id)
|
58
67
|
retrieve_batch_results(batch_id).each do |result_id|
|
59
|
-
|
68
|
+
results = retrieve_raw_results(batch_id, result_id)
|
69
|
+
|
70
|
+
@log.info "Parsing JSON response"
|
71
|
+
parsed_results = JSON.parse(results)
|
72
|
+
|
73
|
+
@log.info "Yielding records"
|
74
|
+
parsed_results.each do |result|
|
60
75
|
result.tap { |h| h.delete("attributes") }
|
61
76
|
yield(result)
|
62
77
|
end
|
@@ -85,6 +100,10 @@ module SalesforceChunker
|
|
85
100
|
@connection.get_json("job/#{@job_id}/batch/#{batch_id}/result/#{result_id}")
|
86
101
|
end
|
87
102
|
|
103
|
+
def retrieve_raw_results(batch_id, result_id)
|
104
|
+
@connection.get("job/#{@job_id}/batch/#{batch_id}/result/#{result_id}")
|
105
|
+
end
|
106
|
+
|
88
107
|
def close
|
89
108
|
body = {"state": "Closed"}
|
90
109
|
@connection.post_json("job/#{@job_id}/", body)
|
@@ -96,7 +115,7 @@ module SalesforceChunker
|
|
96
115
|
body = {
|
97
116
|
"operation": @operation,
|
98
117
|
"object": object,
|
99
|
-
"contentType": "JSON",
|
118
|
+
"contentType": options[:content_type] || "JSON",
|
100
119
|
}
|
101
120
|
body[:externalIdFieldName] = options[:external_id] if @operation == "upsert"
|
102
121
|
@connection.post_json("job", body, options[:headers].to_h)["id"]
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class ManualChunkingBreakpointQuery < Job
|
3
|
+
|
4
|
+
def initialize(connection:, object:, operation:, query:, **options)
|
5
|
+
@batch_size = options[:batch_size] || 100000
|
6
|
+
super(connection: connection, object: object, operation: operation, **options)
|
7
|
+
|
8
|
+
create_batch(query)
|
9
|
+
@batches_count = 1
|
10
|
+
|
11
|
+
close
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_batch_results(batch_id)
|
15
|
+
retrieve_batch_results(batch_id).each do |result_id|
|
16
|
+
results = retrieve_raw_results(batch_id, result_id)
|
17
|
+
|
18
|
+
@log.info "Generating breakpoints from CSV results"
|
19
|
+
process_csv_results(results) { |result| yield result }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def process_csv_results(result)
|
24
|
+
lines = result.each_line
|
25
|
+
headers = lines.next
|
26
|
+
|
27
|
+
loop do
|
28
|
+
@batch_size.times { lines.next }
|
29
|
+
yield(lines.peek.chomp.gsub("\"", ""))
|
30
|
+
end
|
31
|
+
rescue StopIteration
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
|
35
|
+
def create_batch(payload)
|
36
|
+
@log.info "Creating Id Batch: \"#{payload.gsub(/\n/, " ").strip}\""
|
37
|
+
response = @connection.post("job/#{@job_id}/batch", payload.to_s, {"Content-Type": "text/csv"})
|
38
|
+
response["batchInfo"]["id"]
|
39
|
+
end
|
40
|
+
|
41
|
+
def retrieve_batch_results(batch_id)
|
42
|
+
# XML to JSON wrangling
|
43
|
+
response = super(batch_id)
|
44
|
+
if response["result_list"]["result"].is_a? Array
|
45
|
+
response["result_list"]["result"]
|
46
|
+
else
|
47
|
+
[response["result_list"]["result"]]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_batch_statuses
|
52
|
+
# XML to JSON wrangling
|
53
|
+
[@connection.get_json("job/#{@job_id}/batch")["batchInfoList"]["batchInfo"]]
|
54
|
+
end
|
55
|
+
|
56
|
+
def create_job(object, options)
|
57
|
+
super(object, options.merge(content_type: "CSV"))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -2,14 +2,23 @@ module SalesforceChunker
|
|
2
2
|
class ManualChunkingQuery < Job
|
3
3
|
|
4
4
|
def initialize(connection:, object:, operation:, query:, **options)
|
5
|
+
@log = options.delete(:logger) || Logger.new(options[:log_output])
|
6
|
+
@log.progname = "salesforce_chunker"
|
5
7
|
batch_size = options[:batch_size] || 100000
|
6
8
|
where_clause = self.class.query_where_clause(query)
|
7
9
|
|
8
|
-
super(connection: connection, object: object, operation: operation, **options)
|
9
10
|
@log.info "Using Manual Chunking"
|
10
|
-
|
11
|
-
|
12
|
-
|
11
|
+
breakpoint_creation_job = SalesforceChunker::ManualChunkingBreakpointQuery.new(
|
12
|
+
connection: connection,
|
13
|
+
object: object,
|
14
|
+
operation: operation,
|
15
|
+
logger: @log,
|
16
|
+
batch_size: batch_size,
|
17
|
+
query: "Select Id From #{object} #{where_clause} Order By Id Asc",
|
18
|
+
)
|
19
|
+
breakpoints = breakpoint_creation_job.download_results(retry_seconds: 10).to_a
|
20
|
+
|
21
|
+
super(connection: connection, object: object, operation: operation, logger: @log, **options)
|
13
22
|
|
14
23
|
@log.info "Creating Query Batches"
|
15
24
|
create_batches(query, breakpoints, where_clause)
|
@@ -17,21 +26,6 @@ module SalesforceChunker
|
|
17
26
|
close
|
18
27
|
end
|
19
28
|
|
20
|
-
def get_batch_statuses
|
21
|
-
batches = super
|
22
|
-
batches.delete_if { |batch| batch["id"] == @initial_batch_id && batches.count > 1 }
|
23
|
-
end
|
24
|
-
|
25
|
-
def breakpoints(object, where_clause, batch_size)
|
26
|
-
@batches_count = 1
|
27
|
-
@initial_batch_id = create_batch("Select Id From #{object} #{where_clause} Order By Id Asc")
|
28
|
-
|
29
|
-
download_results(retry_seconds: 10)
|
30
|
-
.with_index
|
31
|
-
.select { |_, i| i % batch_size == 0 && i != 0 }
|
32
|
-
.map { |result, _| result["Id"] }
|
33
|
-
end
|
34
|
-
|
35
29
|
def create_batches(query, breakpoints, where_clause)
|
36
30
|
if breakpoints.empty?
|
37
31
|
create_batch(query)
|
data/salesforce_chunker.gemspec
CHANGED
@@ -25,6 +25,6 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_development_dependency "bundler", "~> 1.16"
|
26
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
27
27
|
spec.add_development_dependency "minitest", "~> 5.0"
|
28
|
-
spec.add_development_dependency "mocha", "~> 1.5
|
29
|
-
spec.add_development_dependency "pry", "~> 0.11
|
28
|
+
spec.add_development_dependency "mocha", "~> 1.5"
|
29
|
+
spec.add_development_dependency "pry", "~> 0.11"
|
30
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: salesforce_chunker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Curtis Holmes
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -72,28 +72,28 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.5
|
75
|
+
version: '1.5'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.5
|
82
|
+
version: '1.5'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: pry
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.11
|
89
|
+
version: '0.11'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.11
|
96
|
+
version: '0.11'
|
97
97
|
description: Salesforce client and extractor designed for handling large amounts of
|
98
98
|
data
|
99
99
|
email:
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- lib/salesforce_chunker/connection.rb
|
118
118
|
- lib/salesforce_chunker/exceptions.rb
|
119
119
|
- lib/salesforce_chunker/job.rb
|
120
|
+
- lib/salesforce_chunker/manual_chunking_breakpoint_query.rb
|
120
121
|
- lib/salesforce_chunker/manual_chunking_query.rb
|
121
122
|
- lib/salesforce_chunker/primary_key_chunking_query.rb
|
122
123
|
- lib/salesforce_chunker/single_batch_job.rb
|