salesforce_chunker 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +36 -0
- data/LICENSE.txt +21 -0
- data/README.md +167 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/dev.yml +16 -0
- data/lib/salesforce_chunker/connection.rb +76 -0
- data/lib/salesforce_chunker/exceptions.rb +18 -0
- data/lib/salesforce_chunker/job.rb +105 -0
- data/lib/salesforce_chunker/manual_chunking_query.rb +54 -0
- data/lib/salesforce_chunker/primary_key_chunking_query.rb +34 -0
- data/lib/salesforce_chunker/single_batch_job.rb +12 -0
- data/lib/salesforce_chunker/version.rb +3 -0
- data/lib/salesforce_chunker.rb +56 -0
- data/salesforce_chunker.gemspec +30 -0
- metadata +149 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 63428f8b047e970e10dab25118c83b9445e6c79b7d75cf3fd6552a37d8379cd5
|
4
|
+
data.tar.gz: 543260b73b9c91bed55e7788527553419bc83459d7917119e6b2286d74880a71
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 918d7303b522c79d351901073ac415fdb273b1e3582a9c6565cdb4878e76fb0744023766b6cacf7792a30f840f9bfe52c989790ceb7dad8e5de080503f34ef22
|
7
|
+
data.tar.gz: 28883b66685b1071a31b826ab23f166f4ec6dd42a86f98d5ae50ce590c3a38bb43ce6a374170408f522835edef009c20cdb01e5198d5091ba7e4c366eb95f7ca
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
salesforce_chunker (1.1.0)
|
5
|
+
httparty (~> 0.13)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
coderay (1.1.2)
|
11
|
+
httparty (0.16.2)
|
12
|
+
multi_xml (>= 0.5.2)
|
13
|
+
metaclass (0.0.4)
|
14
|
+
method_source (0.9.0)
|
15
|
+
minitest (5.11.3)
|
16
|
+
mocha (1.5.0)
|
17
|
+
metaclass (~> 0.0.1)
|
18
|
+
multi_xml (0.6.0)
|
19
|
+
pry (0.11.3)
|
20
|
+
coderay (~> 1.1.0)
|
21
|
+
method_source (~> 0.9.0)
|
22
|
+
rake (10.5.0)
|
23
|
+
|
24
|
+
PLATFORMS
|
25
|
+
ruby
|
26
|
+
|
27
|
+
DEPENDENCIES
|
28
|
+
bundler (~> 1.16)
|
29
|
+
minitest (~> 5.0)
|
30
|
+
mocha (~> 1.5.0)
|
31
|
+
pry (~> 0.11.1)
|
32
|
+
rake (~> 10.0)
|
33
|
+
salesforce_chunker!
|
34
|
+
|
35
|
+
BUNDLED WITH
|
36
|
+
1.16.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Shopify
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
# SalesforceChunker
|
2
|
+
|
3
|
+
The `salesforce_chunker` gem is a ruby library for interacting with the Salesforce Bulk API. It was primarily designed as an extractor to handle queries using batching and [Primary Key Chunking](https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/async_api_headers_enable_pk_chunking.htm).
|
4
|
+
|
5
|
+
Currently, only querying is built into `SalesforceChunker::Client`, but non-query jobs can be created with `SalesforceChunker::Job`.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'salesforce_chunker'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install salesforce_chunker
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
### SalesforceChunker::Client
|
26
|
+
|
27
|
+
#### Simple Example
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
client = SalesforceChunker::Client.new(
|
31
|
+
username: "username",
|
32
|
+
password: "password",
|
33
|
+
security_token: "security_token",
|
34
|
+
)
|
35
|
+
|
36
|
+
names = client.query(query: "Select Name From User", object: "User").map { |result| result["Name"] }
|
37
|
+
```
|
38
|
+
|
39
|
+
#### Initialize
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
client = SalesforceChunker::Client.new(
|
43
|
+
username: "username",
|
44
|
+
password: "password",
|
45
|
+
security_token: "security_token",
|
46
|
+
domain: "login",
|
47
|
+
salesforce_version: "42.0",
|
48
|
+
)
|
49
|
+
```
|
50
|
+
|
51
|
+
| Parameter | |
|
52
|
+
| --- | --- |
|
53
|
+
| username | required |
|
54
|
+
| password | required |
|
55
|
+
| security_token | may be required depending on your Salesforce setup |
|
56
|
+
| domain | optional. defaults to `"login"`. |
|
57
|
+
| salesforce_version | optional. defaults to `"42.0"`. Must be >= `"33.0"` to use PK Chunking. |
|
58
|
+
|
59
|
+
#### Functions
|
60
|
+
|
61
|
+
| function | |
|
62
|
+
| --- | --- |
|
63
|
+
| query |
|
64
|
+
| single_batch_query | calls `query(job_type: "single_batch", **options)` |
|
65
|
+
| primary_key_chunking_query | calls `query(job_type: "primary_key_chunking", **options)` |
|
66
|
+
|
67
|
+
#### Query
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
options = {
|
71
|
+
query: "Select Name from Account",
|
72
|
+
object: "Account",
|
73
|
+
batch_size: 100000,
|
74
|
+
retry_seconds: 10,
|
75
|
+
timeout_seconds: 3600,
|
76
|
+
logger: nil,
|
77
|
+
log_output: STDOUT,
|
78
|
+
job_type: "primary_key_chunking",
|
79
|
+
}
|
80
|
+
|
81
|
+
client.query(options) do |result|
|
82
|
+
process(result)
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
| Parameter | | |
|
87
|
+
| --- | --- | --- |
|
88
|
+
| query | required | SOQL query. |
|
89
|
+
| object | required | Salesforce Object type. |
|
90
|
+
| batch_size | optional | defaults to `100000`. Number of records to process in a batch. (Only for PK Chunking) |
|
91
|
+
| retry_seconds | optional | defaults to `10`. Number of seconds to wait before querying API for updated results. |
|
92
|
+
| timeout_seconds | optional | defaults to `3600`. Number of seconds to wait before query is killed. |
|
93
|
+
| logger | optional | logger to use. Must be instance of or similar to rails logger. |
|
94
|
+
| log_output | optional | log output to use. i.e. `STDOUT`. |
|
95
|
+
| job_type | optional | defaults to `"primary_key_chunking"`. Can also be set to `"single_batch"`. |
|
96
|
+
|
97
|
+
`query` can either be called with a block, or will return an enumerator:
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
names = client.query(query, object, options).map { |result| result["Name"] }
|
101
|
+
```
|
102
|
+
|
103
|
+
### Under the hood: SalesforceChunker::Job
|
104
|
+
|
105
|
+
Using `SalesforceChunker::Job`, you have more direct access to the Salesforce Bulk API functions, such as `create_batch`, `get_batch_statuses`, and `retrieve_batch_results`. This can be used to perform custom tasks, such as upserts or multiple batch queries.
|
106
|
+
|
107
|
+
This should be used in coordination with `SalesforceChunker::Connection`, which has the same initialization process as `SalesforceChunker::Client`.
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
connection = SalesforceChunker::Connection.new(
|
111
|
+
username: "username",
|
112
|
+
password: "password",
|
113
|
+
security_token: "security_token",
|
114
|
+
)
|
115
|
+
|
116
|
+
job = SalesforceChunker::Job.new(
|
117
|
+
connection: connection,
|
118
|
+
object: "Account",
|
119
|
+
operation: "query",
|
120
|
+
log_output: STDOUT,
|
121
|
+
)
|
122
|
+
|
123
|
+
job.create_batch("Select Id From Account Order By Id Desc Limit 1")
|
124
|
+
job.create_batch("Select Id From Account Order By Id Asc Limit 1")
|
125
|
+
job.close
|
126
|
+
|
127
|
+
job.instance_variable_set(:@batches_count, 2)
|
128
|
+
ids = job.download_results.to_a
|
129
|
+
```
|
130
|
+
|
131
|
+
Also, `SalesforceChunker::SingleBatchJob` can be used to create a Job with only a single batch. This automatically handles the batch creation, closing, and setting `@batches_count`.
|
132
|
+
|
133
|
+
```ruby
|
134
|
+
job = SalesforceChunker::SingleBatchJob.new(
|
135
|
+
connection: connection,
|
136
|
+
object: "Account",
|
137
|
+
operation: "upsert",
|
138
|
+
payload: [{ "Name" => "Random Account", "IdField__c" => "123456" }],
|
139
|
+
external_id: "IdField__c",
|
140
|
+
log_output: STDOUT,
|
141
|
+
)
|
142
|
+
|
143
|
+
loop do
|
144
|
+
batch = job.get_batch_statuses.first
|
145
|
+
if batch["state"] == "Completed"
|
146
|
+
break
|
147
|
+
elsif batch["state"] == "Failed"
|
148
|
+
raise "batch failed"
|
149
|
+
end
|
150
|
+
sleep 5
|
151
|
+
end
|
152
|
+
```
|
153
|
+
|
154
|
+
## Development
|
155
|
+
|
156
|
+
After checking out the repo,
|
157
|
+
- run `bin/setup` to install dependencies.
|
158
|
+
- run `rake test` to run the tests.
|
159
|
+
- run `bin/console` for an interactive prompt that will allow you to experiment.
|
160
|
+
|
161
|
+
## Contributing
|
162
|
+
|
163
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/Shopify/salesforce_chunker.
|
164
|
+
|
165
|
+
## License
|
166
|
+
|
167
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "salesforce_chunker"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/dev.yml
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# For internal Shopify employee use
|
2
|
+
# the requirements below describe the required dependencies
|
3
|
+
---
|
4
|
+
name: salesforce-chunker
|
5
|
+
|
6
|
+
up:
|
7
|
+
- ruby: 2.5.0
|
8
|
+
- bundler
|
9
|
+
|
10
|
+
commands:
|
11
|
+
console:
|
12
|
+
desc: 'start a rails console'
|
13
|
+
run: bin/console
|
14
|
+
test:
|
15
|
+
desc: 'run the tests'
|
16
|
+
run: bundle exec rake test
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require "httparty"
|
2
|
+
|
3
|
+
module SalesforceChunker
|
4
|
+
class Connection
|
5
|
+
|
6
|
+
def initialize(username: "", password: "", security_token: "", domain: "login", salesforce_version: "42.0", **options)
|
7
|
+
@log = options[:logger] || Logger.new(options[:log_output])
|
8
|
+
@log.progname = "salesforce_chunker"
|
9
|
+
|
10
|
+
response = HTTParty.post(
|
11
|
+
"https://#{domain}.salesforce.com/services/Soap/u/#{salesforce_version}",
|
12
|
+
headers: { "SOAPAction": "login", "Content-Type": "text/xml; charset=UTF-8" },
|
13
|
+
body: self.class.login_soap_request_body(username, password, security_token)
|
14
|
+
).parsed_response
|
15
|
+
|
16
|
+
result = response["Envelope"]["Body"]["loginResponse"]["result"]
|
17
|
+
|
18
|
+
instance = self.class.get_instance(result["serverUrl"])
|
19
|
+
|
20
|
+
@base_url = "https://#{instance}.salesforce.com/services/async/#{salesforce_version}/"
|
21
|
+
@default_headers = {
|
22
|
+
"Content-Type": "application/json",
|
23
|
+
"X-SFDC-Session": result["sessionId"],
|
24
|
+
"Accept-Encoding": "gzip",
|
25
|
+
}
|
26
|
+
rescue NoMethodError
|
27
|
+
raise ConnectionError, response["Envelope"]["Body"]["Fault"]["faultstring"]
|
28
|
+
end
|
29
|
+
|
30
|
+
def post_json(url, body, headers={})
|
31
|
+
post(url, body.to_json, headers)
|
32
|
+
end
|
33
|
+
|
34
|
+
def post(url, body, headers={})
|
35
|
+
@log.info "POST: #{url}"
|
36
|
+
response = HTTParty.post(@base_url + url, headers: @default_headers.merge(headers), body: body)
|
37
|
+
self.class.check_response_error(response.parsed_response)
|
38
|
+
end
|
39
|
+
|
40
|
+
def get_json(url, headers={})
|
41
|
+
@log.info "GET: #{url}"
|
42
|
+
response = HTTParty.get(@base_url + url, headers: @default_headers.merge(headers))
|
43
|
+
self.class.check_response_error(response.parsed_response)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def self.login_soap_request_body(username, password, security_token)
|
49
|
+
"<?xml version=\"1.0\" encoding=\"utf-8\" ?>
|
50
|
+
<env:Envelope
|
51
|
+
xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"
|
52
|
+
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
|
53
|
+
xmlns:env=\"http://schemas.xmlsoap.org/soap/envelope/\"
|
54
|
+
xmlns:urn=\"urn:partner.soap.sforce.com\">
|
55
|
+
<env:Body>
|
56
|
+
<n1:login xmlns:n1=\"urn:partner.soap.sforce.com\">
|
57
|
+
<n1:username>#{username.encode(xml: :text)}</n1:username>
|
58
|
+
<n1:password>#{password.encode(xml: :text)}#{security_token.encode(xml: :text)}</n1:password>
|
59
|
+
</n1:login>
|
60
|
+
</env:Body>
|
61
|
+
</env:Envelope>"
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.get_instance(server_url)
|
65
|
+
/https:\/\/(.*).salesforce.com/.match(server_url)[1]
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.check_response_error(response)
|
69
|
+
if response.is_a?(Hash) && response.key?("exceptionCode")
|
70
|
+
raise ResponseError, "#{response["exceptionCode"]}: #{response["exceptionMessage"]}"
|
71
|
+
else
|
72
|
+
response
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class Error < StandardError; end
|
3
|
+
|
4
|
+
# Raised when connecting with Salesforce fails
|
5
|
+
class ConnectionError < Error; end
|
6
|
+
|
7
|
+
# Raised when a request sent to Salesforce is invalid
|
8
|
+
class ResponseError < Error; end
|
9
|
+
|
10
|
+
# Raised when Salesforce returns a failed batch
|
11
|
+
class BatchError < Error; end
|
12
|
+
|
13
|
+
# Raised when Salesforce returns a successful batch with failed record(s)
|
14
|
+
class RecordError < Error; end
|
15
|
+
|
16
|
+
# Raised when batch job exceeds time limit
|
17
|
+
class TimeoutError < Error; end
|
18
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class Job
|
3
|
+
attr_reader :batches_count
|
4
|
+
|
5
|
+
QUERY_OPERATIONS = ["query", "queryall"].freeze
|
6
|
+
DEFAULT_RETRY_SECONDS = 10
|
7
|
+
DEFAULT_TIMEOUT_SECONDS = 3600
|
8
|
+
|
9
|
+
def initialize(connection:, object:, operation:, **options)
|
10
|
+
@log = options[:logger] || Logger.new(options[:log_output])
|
11
|
+
@log.progname = "salesforce_chunker"
|
12
|
+
|
13
|
+
@connection = connection
|
14
|
+
@operation = operation
|
15
|
+
@batches_count = nil
|
16
|
+
|
17
|
+
@log.info "Creating Bulk API Job"
|
18
|
+
@job_id = create_job(object, options.slice(:headers, :external_id))
|
19
|
+
end
|
20
|
+
|
21
|
+
def download_results(**options)
|
22
|
+
return nil unless QUERY_OPERATIONS.include?(@operation)
|
23
|
+
return to_enum(:download_results, **options) unless block_given?
|
24
|
+
|
25
|
+
retry_seconds = options[:retry_seconds] || DEFAULT_RETRY_SECONDS
|
26
|
+
timeout_at = Time.now.utc + (options[:timeout_seconds] || DEFAULT_TIMEOUT_SECONDS)
|
27
|
+
downloaded_batches = []
|
28
|
+
|
29
|
+
loop do
|
30
|
+
@log.info "Retrieving batch status information"
|
31
|
+
get_completed_batches.each do |batch|
|
32
|
+
next if downloaded_batches.include?(batch["id"])
|
33
|
+
@log.info "Batch #{downloaded_batches.length + 1} of #{@batches_count || '?'}: " \
|
34
|
+
"retrieving #{batch["numberRecordsProcessed"]} records"
|
35
|
+
get_batch_results(batch["id"]) { |result| yield(result) } if batch["numberRecordsProcessed"] > 0
|
36
|
+
downloaded_batches.append(batch["id"])
|
37
|
+
end
|
38
|
+
|
39
|
+
break if @batches_count && downloaded_batches.length == @batches_count
|
40
|
+
raise TimeoutError, "Timeout during batch processing" if Time.now.utc > timeout_at
|
41
|
+
|
42
|
+
@log.info "Waiting #{retry_seconds} seconds"
|
43
|
+
sleep(retry_seconds)
|
44
|
+
end
|
45
|
+
|
46
|
+
@log.info "Completed"
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_completed_batches
|
50
|
+
get_batch_statuses.select do |batch|
|
51
|
+
raise BatchError, "Batch failed: #{batch["stateMessage"]}" if batch["state"] == "Failed"
|
52
|
+
raise RecordError, "Failed records in batch" if batch["state"] == "Completed" && batch["numberRecordsFailed"] > 0
|
53
|
+
batch["state"] == "Completed"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_batch_results(batch_id)
|
58
|
+
retrieve_batch_results(batch_id).each do |result_id|
|
59
|
+
retrieve_results(batch_id, result_id).each do |result|
|
60
|
+
result.tap { |h| h.delete("attributes") }
|
61
|
+
yield(result)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def create_batch(payload)
|
67
|
+
if QUERY_OPERATIONS.include?(@operation)
|
68
|
+
@log.info "Creating #{@operation.capitalize} Batch: \"#{payload.gsub(/\n/, " ").strip}\""
|
69
|
+
@connection.post("job/#{@job_id}/batch", payload.to_s)["id"]
|
70
|
+
else
|
71
|
+
@log.info "Creating #{@operation.capitalize} Batch"
|
72
|
+
@connection.post_json("job/#{@job_id}/batch", payload)["id"]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_batch_statuses
|
77
|
+
@connection.get_json("job/#{@job_id}/batch")["batchInfo"]
|
78
|
+
end
|
79
|
+
|
80
|
+
def retrieve_batch_results(batch_id)
|
81
|
+
@connection.get_json("job/#{@job_id}/batch/#{batch_id}/result")
|
82
|
+
end
|
83
|
+
|
84
|
+
def retrieve_results(batch_id, result_id)
|
85
|
+
@connection.get_json("job/#{@job_id}/batch/#{batch_id}/result/#{result_id}")
|
86
|
+
end
|
87
|
+
|
88
|
+
def close
|
89
|
+
body = {"state": "Closed"}
|
90
|
+
@connection.post_json("job/#{@job_id}/", body)
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def create_job(object, options)
|
96
|
+
body = {
|
97
|
+
"operation": @operation,
|
98
|
+
"object": object,
|
99
|
+
"contentType": "JSON",
|
100
|
+
}
|
101
|
+
body[:externalIdFieldName] = options[:external_id] if @operation == "upsert"
|
102
|
+
@connection.post_json("job", body, options[:headers].to_h)["id"]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class ManualChunkingQuery < Job
|
3
|
+
|
4
|
+
def initialize(connection:, object:, operation:, query:, **options)
|
5
|
+
batch_size = options[:batch_size] || 100000
|
6
|
+
where_clause = self.class.query_where_clause(query)
|
7
|
+
|
8
|
+
super(connection: connection, object: object, operation: operation, **options)
|
9
|
+
@log.info "Using Manual Chunking"
|
10
|
+
|
11
|
+
@log.info "Retrieving Ids from records"
|
12
|
+
breakpoints = breakpoints(object, where_clause, batch_size)
|
13
|
+
|
14
|
+
@log.info "Creating Query Batches"
|
15
|
+
create_batches(query, breakpoints, where_clause)
|
16
|
+
|
17
|
+
close
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_batch_statuses
|
21
|
+
batches = super
|
22
|
+
batches.delete_if { |batch| batch["id"] == @initial_batch_id && batches.count > 1 }
|
23
|
+
end
|
24
|
+
|
25
|
+
def breakpoints(object, where_clause, batch_size)
|
26
|
+
@batches_count = 1
|
27
|
+
@initial_batch_id = create_batch("Select Id From #{object} #{where_clause} Order By Id Asc")
|
28
|
+
|
29
|
+
download_results(retry_seconds: 10)
|
30
|
+
.with_index
|
31
|
+
.select { |_, i| i % batch_size == 0 && i != 0 }
|
32
|
+
.map { |result, _| result["Id"] }
|
33
|
+
end
|
34
|
+
|
35
|
+
def create_batches(query, breakpoints, where_clause)
|
36
|
+
if breakpoints.empty?
|
37
|
+
create_batch(query)
|
38
|
+
else
|
39
|
+
query += where_clause.empty? ? " Where" : " And"
|
40
|
+
|
41
|
+
create_batch("#{query} Id < '#{breakpoints.first}'")
|
42
|
+
breakpoints.each_cons(2) do |first, second|
|
43
|
+
create_batch("#{query} Id >= '#{first}' And Id < '#{second}'")
|
44
|
+
end
|
45
|
+
create_batch("#{query} Id >= '#{breakpoints.last}'")
|
46
|
+
end
|
47
|
+
@batches_count = breakpoints.length + 1
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.query_where_clause(query)
|
51
|
+
query.partition(/where\s/i)[1..2].join
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class PrimaryKeyChunkingQuery < Job
|
3
|
+
|
4
|
+
def initialize(connection:, object:, operation:, query:, **options)
|
5
|
+
batch_size = options[:batch_size] || 100000
|
6
|
+
|
7
|
+
if options[:headers].nil?
|
8
|
+
options[:headers] = {"Sforce-Enable-PKChunking": "true; chunkSize=#{batch_size};" }
|
9
|
+
else
|
10
|
+
options[:headers].reverse_merge!({"Sforce-Enable-PKChunking": "true; chunkSize=#{batch_size};" })
|
11
|
+
end
|
12
|
+
|
13
|
+
super(connection: connection, object: object, operation: operation, **options)
|
14
|
+
@log.info "Using Primary Key Chunking"
|
15
|
+
@initial_batch_id = create_batch(query)
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_batch_statuses
|
19
|
+
batches = super
|
20
|
+
finalize_chunking_setup(batches) if @batches_count.nil?
|
21
|
+
batches
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def finalize_chunking_setup(batches)
|
27
|
+
initial_batch = batches.select { |batch| batch["id"] == @initial_batch_id }.first
|
28
|
+
if initial_batch && initial_batch["state"] == "NotProcessed"
|
29
|
+
@batches_count = batches.length - 1
|
30
|
+
close
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class SingleBatchJob < Job
|
3
|
+
def initialize(connection:, object:, operation:, **options)
|
4
|
+
super(connection: connection, object: object, operation: operation, **options)
|
5
|
+
payload = options[:payload] || options[:query]
|
6
|
+
@log.info "Using Single Batch"
|
7
|
+
@batch_id = create_batch(payload)
|
8
|
+
@batches_count = 1
|
9
|
+
close
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "salesforce_chunker/connection.rb"
|
2
|
+
require "salesforce_chunker/exceptions.rb"
|
3
|
+
require "salesforce_chunker/job.rb"
|
4
|
+
require "salesforce_chunker/single_batch_job.rb"
|
5
|
+
require "salesforce_chunker/primary_key_chunking_query.rb"
|
6
|
+
require "salesforce_chunker/manual_chunking_query.rb"
|
7
|
+
require 'logger'
|
8
|
+
|
9
|
+
module SalesforceChunker
|
10
|
+
class Client
|
11
|
+
|
12
|
+
def initialize(**options)
|
13
|
+
@log = options[:logger] || Logger.new(options[:log_output])
|
14
|
+
@log.progname = "salesforce_chunker"
|
15
|
+
|
16
|
+
@connection = SalesforceChunker::Connection.new(**options, logger: @log)
|
17
|
+
end
|
18
|
+
|
19
|
+
def query(query:, object:, **options)
|
20
|
+
return to_enum(:query, query: query, object: object, **options) unless block_given?
|
21
|
+
|
22
|
+
case options[:job_type]
|
23
|
+
when "single_batch"
|
24
|
+
job_class = SalesforceChunker::SingleBatchJob
|
25
|
+
when "manual_chunking"
|
26
|
+
job_class = SalesforceChunker::ManualChunkingQuery
|
27
|
+
when "primary_key_chunking", nil # for backwards compatibility
|
28
|
+
job_class = SalesforceChunker::PrimaryKeyChunkingQuery
|
29
|
+
end
|
30
|
+
|
31
|
+
job_params = {
|
32
|
+
connection: @connection,
|
33
|
+
object: object,
|
34
|
+
operation: "query",
|
35
|
+
query: query,
|
36
|
+
**options.slice(:batch_size, :logger, :log_output)
|
37
|
+
}
|
38
|
+
job_params[:logger] = @log if job_params[:logger].nil? && job_params[:log_output].nil?
|
39
|
+
|
40
|
+
job = job_class.new(**job_params)
|
41
|
+
job.download_results(**options.slice(:timeout, :retry_seconds)) { |result| yield(result) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def single_batch_query(**options)
|
45
|
+
query(**options.merge(job_type: "single_batch"))
|
46
|
+
end
|
47
|
+
|
48
|
+
def primary_key_chunking_query(**options)
|
49
|
+
query(**options.merge(job_type: "primary_key_chunking"))
|
50
|
+
end
|
51
|
+
|
52
|
+
def manual_chunking_query(**options)
|
53
|
+
query(**options.merge(job_type: "manual_chunking"))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "salesforce_chunker/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "salesforce_chunker"
|
8
|
+
spec.version = SalesforceChunker::VERSION
|
9
|
+
spec.authors = ["Curtis Holmes"]
|
10
|
+
spec.email = ["curtis.holmes@shopify.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Salesforce Bulk API Client}
|
13
|
+
spec.description = %q{Salesforce client and extractor designed for handling large amounts of data}
|
14
|
+
spec.homepage = 'https://github.com/Shopify/salesforce_chunker'
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_dependency "httparty", "~> 0.13"
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
28
|
+
spec.add_development_dependency "mocha", "~> 1.5.0"
|
29
|
+
spec.add_development_dependency "pry", "~> 0.11.1"
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: salesforce_chunker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Curtis Holmes
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-11-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: httparty
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.13'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.16'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.16'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mocha
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.5.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.5.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.11.1
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.11.1
|
97
|
+
description: Salesforce client and extractor designed for handling large amounts of
|
98
|
+
data
|
99
|
+
email:
|
100
|
+
- curtis.holmes@shopify.com
|
101
|
+
executables: []
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".travis.yml"
|
107
|
+
- CHANGELOG.md
|
108
|
+
- Gemfile
|
109
|
+
- Gemfile.lock
|
110
|
+
- LICENSE.txt
|
111
|
+
- README.md
|
112
|
+
- Rakefile
|
113
|
+
- bin/console
|
114
|
+
- bin/setup
|
115
|
+
- dev.yml
|
116
|
+
- lib/salesforce_chunker.rb
|
117
|
+
- lib/salesforce_chunker/connection.rb
|
118
|
+
- lib/salesforce_chunker/exceptions.rb
|
119
|
+
- lib/salesforce_chunker/job.rb
|
120
|
+
- lib/salesforce_chunker/manual_chunking_query.rb
|
121
|
+
- lib/salesforce_chunker/primary_key_chunking_query.rb
|
122
|
+
- lib/salesforce_chunker/single_batch_job.rb
|
123
|
+
- lib/salesforce_chunker/version.rb
|
124
|
+
- salesforce_chunker.gemspec
|
125
|
+
homepage: https://github.com/Shopify/salesforce_chunker
|
126
|
+
licenses:
|
127
|
+
- MIT
|
128
|
+
metadata: {}
|
129
|
+
post_install_message:
|
130
|
+
rdoc_options: []
|
131
|
+
require_paths:
|
132
|
+
- lib
|
133
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
138
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: '0'
|
143
|
+
requirements: []
|
144
|
+
rubyforge_project:
|
145
|
+
rubygems_version: 2.7.6
|
146
|
+
signing_key:
|
147
|
+
specification_version: 4
|
148
|
+
summary: Salesforce Bulk API Client
|
149
|
+
test_files: []
|