salesforce_chunker 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +36 -0
- data/LICENSE.txt +21 -0
- data/README.md +167 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/dev.yml +16 -0
- data/lib/salesforce_chunker/connection.rb +76 -0
- data/lib/salesforce_chunker/exceptions.rb +18 -0
- data/lib/salesforce_chunker/job.rb +105 -0
- data/lib/salesforce_chunker/manual_chunking_query.rb +54 -0
- data/lib/salesforce_chunker/primary_key_chunking_query.rb +34 -0
- data/lib/salesforce_chunker/single_batch_job.rb +12 -0
- data/lib/salesforce_chunker/version.rb +3 -0
- data/lib/salesforce_chunker.rb +56 -0
- data/salesforce_chunker.gemspec +30 -0
- metadata +149 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 63428f8b047e970e10dab25118c83b9445e6c79b7d75cf3fd6552a37d8379cd5
|
4
|
+
data.tar.gz: 543260b73b9c91bed55e7788527553419bc83459d7917119e6b2286d74880a71
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 918d7303b522c79d351901073ac415fdb273b1e3582a9c6565cdb4878e76fb0744023766b6cacf7792a30f840f9bfe52c989790ceb7dad8e5de080503f34ef22
|
7
|
+
data.tar.gz: 28883b66685b1071a31b826ab23f166f4ec6dd42a86f98d5ae50ce590c3a38bb43ce6a374170408f522835edef009c20cdb01e5198d5091ba7e4c366eb95f7ca
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
salesforce_chunker (1.1.0)
|
5
|
+
httparty (~> 0.13)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
coderay (1.1.2)
|
11
|
+
httparty (0.16.2)
|
12
|
+
multi_xml (>= 0.5.2)
|
13
|
+
metaclass (0.0.4)
|
14
|
+
method_source (0.9.0)
|
15
|
+
minitest (5.11.3)
|
16
|
+
mocha (1.5.0)
|
17
|
+
metaclass (~> 0.0.1)
|
18
|
+
multi_xml (0.6.0)
|
19
|
+
pry (0.11.3)
|
20
|
+
coderay (~> 1.1.0)
|
21
|
+
method_source (~> 0.9.0)
|
22
|
+
rake (10.5.0)
|
23
|
+
|
24
|
+
PLATFORMS
|
25
|
+
ruby
|
26
|
+
|
27
|
+
DEPENDENCIES
|
28
|
+
bundler (~> 1.16)
|
29
|
+
minitest (~> 5.0)
|
30
|
+
mocha (~> 1.5.0)
|
31
|
+
pry (~> 0.11.1)
|
32
|
+
rake (~> 10.0)
|
33
|
+
salesforce_chunker!
|
34
|
+
|
35
|
+
BUNDLED WITH
|
36
|
+
1.16.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Shopify
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
# SalesforceChunker
|
2
|
+
|
3
|
+
The `salesforce_chunker` gem is a ruby library for interacting with the Salesforce Bulk API. It was primarily designed as an extractor to handle queries using batching and [Primary Key Chunking](https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/async_api_headers_enable_pk_chunking.htm).
|
4
|
+
|
5
|
+
Currently, only querying is built into `SalesforceChunker::Client`, but non-query jobs can be created with `SalesforceChunker::Job`.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'salesforce_chunker'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install salesforce_chunker
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
### SalesforceChunker::Client
|
26
|
+
|
27
|
+
#### Simple Example
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
client = SalesforceChunker::Client.new(
|
31
|
+
username: "username",
|
32
|
+
password: "password",
|
33
|
+
security_token: "security_token",
|
34
|
+
)
|
35
|
+
|
36
|
+
names = client.query(query: "Select Name From User", object: "User").map { |result| result["Name"] }
|
37
|
+
```
|
38
|
+
|
39
|
+
#### Initialize
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
client = SalesforceChunker::Client.new(
|
43
|
+
username: "username",
|
44
|
+
password: "password",
|
45
|
+
security_token: "security_token",
|
46
|
+
domain: "login",
|
47
|
+
salesforce_version: "42.0",
|
48
|
+
)
|
49
|
+
```
|
50
|
+
|
51
|
+
| Parameter | |
|
52
|
+
| --- | --- |
|
53
|
+
| username | required |
|
54
|
+
| password | required |
|
55
|
+
| security_token | may be required depending on your Salesforce setup |
|
56
|
+
| domain | optional. defaults to `"login"`. |
|
57
|
+
| salesforce_version | optional. defaults to `"42.0"`. Must be >= `"33.0"` to use PK Chunking. |
|
58
|
+
|
59
|
+
#### Functions
|
60
|
+
|
61
|
+
| function | |
|
62
|
+
| --- | --- |
|
63
|
+
| query |
|
64
|
+
| single_batch_query | calls `query(job_type: "single_batch", **options)` |
|
65
|
+
| primary_key_chunking_query | calls `query(job_type: "primary_key_chunking", **options)` |
|
66
|
+
|
67
|
+
#### Query
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
options = {
|
71
|
+
query: "Select Name from Account",
|
72
|
+
object: "Account",
|
73
|
+
batch_size: 100000,
|
74
|
+
retry_seconds: 10,
|
75
|
+
timeout_seconds: 3600,
|
76
|
+
logger: nil,
|
77
|
+
log_output: STDOUT,
|
78
|
+
job_type: "primary_key_chunking",
|
79
|
+
}
|
80
|
+
|
81
|
+
client.query(options) do |result|
|
82
|
+
process(result)
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
| Parameter | | |
|
87
|
+
| --- | --- | --- |
|
88
|
+
| query | required | SOQL query. |
|
89
|
+
| object | required | Salesforce Object type. |
|
90
|
+
| batch_size | optional | defaults to `100000`. Number of records to process in a batch. (Only for PK Chunking) |
|
91
|
+
| retry_seconds | optional | defaults to `10`. Number of seconds to wait before querying API for updated results. |
|
92
|
+
| timeout_seconds | optional | defaults to `3600`. Number of seconds to wait before query is killed. |
|
93
|
+
| logger | optional | logger to use. Must be instance of or similar to rails logger. |
|
94
|
+
| log_output | optional | log output to use. i.e. `STDOUT`. |
|
95
|
+
| job_type | optional | defaults to `"primary_key_chunking"`. Can also be set to `"single_batch"`. |
|
96
|
+
|
97
|
+
`query` can either be called with a block, or will return an enumerator:
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
names = client.query(query, object, options).map { |result| result["Name"] }
|
101
|
+
```
|
102
|
+
|
103
|
+
### Under the hood: SalesforceChunker::Job
|
104
|
+
|
105
|
+
Using `SalesforceChunker::Job`, you have more direct access to the Salesforce Bulk API functions, such as `create_batch`, `get_batch_statuses`, and `retrieve_batch_results`. This can be used to perform custom tasks, such as upserts or multiple batch queries.
|
106
|
+
|
107
|
+
This should be used in coordination with `SalesforceChunker::Connection`, which has the same initialization process as `SalesforceChunker::Client`.
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
connection = SalesforceChunker::Connection.new(
|
111
|
+
username: "username",
|
112
|
+
password: "password",
|
113
|
+
security_token: "security_token",
|
114
|
+
)
|
115
|
+
|
116
|
+
job = SalesforceChunker::Job.new(
|
117
|
+
connection: connection,
|
118
|
+
object: "Account",
|
119
|
+
operation: "query",
|
120
|
+
log_output: STDOUT,
|
121
|
+
)
|
122
|
+
|
123
|
+
job.create_batch("Select Id From Account Order By Id Desc Limit 1")
|
124
|
+
job.create_batch("Select Id From Account Order By Id Asc Limit 1")
|
125
|
+
job.close
|
126
|
+
|
127
|
+
job.instance_variable_set(:@batches_count, 2)
|
128
|
+
ids = job.download_results.to_a
|
129
|
+
```
|
130
|
+
|
131
|
+
Also, `SalesforceChunker::SingleBatchJob` can be used to create a Job with only a single batch. This automatically handles the batch creation, closing, and setting `@batches_count`.
|
132
|
+
|
133
|
+
```ruby
|
134
|
+
job = SalesforceChunker::SingleBatchJob.new(
|
135
|
+
connection: connection,
|
136
|
+
object: "Account",
|
137
|
+
operation: "upsert",
|
138
|
+
payload: [{ "Name" => "Random Account", "IdField__c" => "123456" }],
|
139
|
+
external_id: "IdField__c",
|
140
|
+
log_output: STDOUT,
|
141
|
+
)
|
142
|
+
|
143
|
+
loop do
|
144
|
+
batch = job.get_batch_statuses.first
|
145
|
+
if batch["state"] == "Completed"
|
146
|
+
break
|
147
|
+
elsif batch["state"] == "Failed"
|
148
|
+
raise "batch failed"
|
149
|
+
end
|
150
|
+
sleep 5
|
151
|
+
end
|
152
|
+
```
|
153
|
+
|
154
|
+
## Development
|
155
|
+
|
156
|
+
After checking out the repo,
|
157
|
+
- run `bin/setup` to install dependencies.
|
158
|
+
- run `rake test` to run the tests.
|
159
|
+
- run `bin/console` for an interactive prompt that will allow you to experiment.
|
160
|
+
|
161
|
+
## Contributing
|
162
|
+
|
163
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/Shopify/salesforce_chunker.
|
164
|
+
|
165
|
+
## License
|
166
|
+
|
167
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "salesforce_chunker"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/dev.yml
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# For internal Shopify employee use
|
2
|
+
# the requirements below describe the required dependencies
|
3
|
+
---
|
4
|
+
name: salesforce-chunker
|
5
|
+
|
6
|
+
up:
|
7
|
+
- ruby: 2.5.0
|
8
|
+
- bundler
|
9
|
+
|
10
|
+
commands:
|
11
|
+
console:
|
12
|
+
desc: 'start a rails console'
|
13
|
+
run: bin/console
|
14
|
+
test:
|
15
|
+
desc: 'run the tests'
|
16
|
+
run: bundle exec rake test
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require "httparty"
|
2
|
+
|
3
|
+
module SalesforceChunker
|
4
|
+
class Connection
|
5
|
+
|
6
|
+
def initialize(username: "", password: "", security_token: "", domain: "login", salesforce_version: "42.0", **options)
|
7
|
+
@log = options[:logger] || Logger.new(options[:log_output])
|
8
|
+
@log.progname = "salesforce_chunker"
|
9
|
+
|
10
|
+
response = HTTParty.post(
|
11
|
+
"https://#{domain}.salesforce.com/services/Soap/u/#{salesforce_version}",
|
12
|
+
headers: { "SOAPAction": "login", "Content-Type": "text/xml; charset=UTF-8" },
|
13
|
+
body: self.class.login_soap_request_body(username, password, security_token)
|
14
|
+
).parsed_response
|
15
|
+
|
16
|
+
result = response["Envelope"]["Body"]["loginResponse"]["result"]
|
17
|
+
|
18
|
+
instance = self.class.get_instance(result["serverUrl"])
|
19
|
+
|
20
|
+
@base_url = "https://#{instance}.salesforce.com/services/async/#{salesforce_version}/"
|
21
|
+
@default_headers = {
|
22
|
+
"Content-Type": "application/json",
|
23
|
+
"X-SFDC-Session": result["sessionId"],
|
24
|
+
"Accept-Encoding": "gzip",
|
25
|
+
}
|
26
|
+
rescue NoMethodError
|
27
|
+
raise ConnectionError, response["Envelope"]["Body"]["Fault"]["faultstring"]
|
28
|
+
end
|
29
|
+
|
30
|
+
def post_json(url, body, headers={})
|
31
|
+
post(url, body.to_json, headers)
|
32
|
+
end
|
33
|
+
|
34
|
+
def post(url, body, headers={})
|
35
|
+
@log.info "POST: #{url}"
|
36
|
+
response = HTTParty.post(@base_url + url, headers: @default_headers.merge(headers), body: body)
|
37
|
+
self.class.check_response_error(response.parsed_response)
|
38
|
+
end
|
39
|
+
|
40
|
+
def get_json(url, headers={})
|
41
|
+
@log.info "GET: #{url}"
|
42
|
+
response = HTTParty.get(@base_url + url, headers: @default_headers.merge(headers))
|
43
|
+
self.class.check_response_error(response.parsed_response)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def self.login_soap_request_body(username, password, security_token)
|
49
|
+
"<?xml version=\"1.0\" encoding=\"utf-8\" ?>
|
50
|
+
<env:Envelope
|
51
|
+
xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"
|
52
|
+
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
|
53
|
+
xmlns:env=\"http://schemas.xmlsoap.org/soap/envelope/\"
|
54
|
+
xmlns:urn=\"urn:partner.soap.sforce.com\">
|
55
|
+
<env:Body>
|
56
|
+
<n1:login xmlns:n1=\"urn:partner.soap.sforce.com\">
|
57
|
+
<n1:username>#{username.encode(xml: :text)}</n1:username>
|
58
|
+
<n1:password>#{password.encode(xml: :text)}#{security_token.encode(xml: :text)}</n1:password>
|
59
|
+
</n1:login>
|
60
|
+
</env:Body>
|
61
|
+
</env:Envelope>"
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.get_instance(server_url)
|
65
|
+
/https:\/\/(.*).salesforce.com/.match(server_url)[1]
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.check_response_error(response)
|
69
|
+
if response.is_a?(Hash) && response.key?("exceptionCode")
|
70
|
+
raise ResponseError, "#{response["exceptionCode"]}: #{response["exceptionMessage"]}"
|
71
|
+
else
|
72
|
+
response
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class Error < StandardError; end
|
3
|
+
|
4
|
+
# Raised when connecting with Salesforce fails
|
5
|
+
class ConnectionError < Error; end
|
6
|
+
|
7
|
+
# Raised when a request sent to Salesforce is invalid
|
8
|
+
class ResponseError < Error; end
|
9
|
+
|
10
|
+
# Raised when Salesforce returns a failed batch
|
11
|
+
class BatchError < Error; end
|
12
|
+
|
13
|
+
# Raised when Salesforce returns a successful batch with failed record(s)
|
14
|
+
class RecordError < Error; end
|
15
|
+
|
16
|
+
# Raised when batch job exceeds time limit
|
17
|
+
class TimeoutError < Error; end
|
18
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class Job
|
3
|
+
attr_reader :batches_count
|
4
|
+
|
5
|
+
QUERY_OPERATIONS = ["query", "queryall"].freeze
|
6
|
+
DEFAULT_RETRY_SECONDS = 10
|
7
|
+
DEFAULT_TIMEOUT_SECONDS = 3600
|
8
|
+
|
9
|
+
def initialize(connection:, object:, operation:, **options)
|
10
|
+
@log = options[:logger] || Logger.new(options[:log_output])
|
11
|
+
@log.progname = "salesforce_chunker"
|
12
|
+
|
13
|
+
@connection = connection
|
14
|
+
@operation = operation
|
15
|
+
@batches_count = nil
|
16
|
+
|
17
|
+
@log.info "Creating Bulk API Job"
|
18
|
+
@job_id = create_job(object, options.slice(:headers, :external_id))
|
19
|
+
end
|
20
|
+
|
21
|
+
def download_results(**options)
|
22
|
+
return nil unless QUERY_OPERATIONS.include?(@operation)
|
23
|
+
return to_enum(:download_results, **options) unless block_given?
|
24
|
+
|
25
|
+
retry_seconds = options[:retry_seconds] || DEFAULT_RETRY_SECONDS
|
26
|
+
timeout_at = Time.now.utc + (options[:timeout_seconds] || DEFAULT_TIMEOUT_SECONDS)
|
27
|
+
downloaded_batches = []
|
28
|
+
|
29
|
+
loop do
|
30
|
+
@log.info "Retrieving batch status information"
|
31
|
+
get_completed_batches.each do |batch|
|
32
|
+
next if downloaded_batches.include?(batch["id"])
|
33
|
+
@log.info "Batch #{downloaded_batches.length + 1} of #{@batches_count || '?'}: " \
|
34
|
+
"retrieving #{batch["numberRecordsProcessed"]} records"
|
35
|
+
get_batch_results(batch["id"]) { |result| yield(result) } if batch["numberRecordsProcessed"] > 0
|
36
|
+
downloaded_batches.append(batch["id"])
|
37
|
+
end
|
38
|
+
|
39
|
+
break if @batches_count && downloaded_batches.length == @batches_count
|
40
|
+
raise TimeoutError, "Timeout during batch processing" if Time.now.utc > timeout_at
|
41
|
+
|
42
|
+
@log.info "Waiting #{retry_seconds} seconds"
|
43
|
+
sleep(retry_seconds)
|
44
|
+
end
|
45
|
+
|
46
|
+
@log.info "Completed"
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_completed_batches
|
50
|
+
get_batch_statuses.select do |batch|
|
51
|
+
raise BatchError, "Batch failed: #{batch["stateMessage"]}" if batch["state"] == "Failed"
|
52
|
+
raise RecordError, "Failed records in batch" if batch["state"] == "Completed" && batch["numberRecordsFailed"] > 0
|
53
|
+
batch["state"] == "Completed"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_batch_results(batch_id)
|
58
|
+
retrieve_batch_results(batch_id).each do |result_id|
|
59
|
+
retrieve_results(batch_id, result_id).each do |result|
|
60
|
+
result.tap { |h| h.delete("attributes") }
|
61
|
+
yield(result)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def create_batch(payload)
|
67
|
+
if QUERY_OPERATIONS.include?(@operation)
|
68
|
+
@log.info "Creating #{@operation.capitalize} Batch: \"#{payload.gsub(/\n/, " ").strip}\""
|
69
|
+
@connection.post("job/#{@job_id}/batch", payload.to_s)["id"]
|
70
|
+
else
|
71
|
+
@log.info "Creating #{@operation.capitalize} Batch"
|
72
|
+
@connection.post_json("job/#{@job_id}/batch", payload)["id"]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_batch_statuses
|
77
|
+
@connection.get_json("job/#{@job_id}/batch")["batchInfo"]
|
78
|
+
end
|
79
|
+
|
80
|
+
def retrieve_batch_results(batch_id)
|
81
|
+
@connection.get_json("job/#{@job_id}/batch/#{batch_id}/result")
|
82
|
+
end
|
83
|
+
|
84
|
+
def retrieve_results(batch_id, result_id)
|
85
|
+
@connection.get_json("job/#{@job_id}/batch/#{batch_id}/result/#{result_id}")
|
86
|
+
end
|
87
|
+
|
88
|
+
def close
|
89
|
+
body = {"state": "Closed"}
|
90
|
+
@connection.post_json("job/#{@job_id}/", body)
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def create_job(object, options)
|
96
|
+
body = {
|
97
|
+
"operation": @operation,
|
98
|
+
"object": object,
|
99
|
+
"contentType": "JSON",
|
100
|
+
}
|
101
|
+
body[:externalIdFieldName] = options[:external_id] if @operation == "upsert"
|
102
|
+
@connection.post_json("job", body, options[:headers].to_h)["id"]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class ManualChunkingQuery < Job
|
3
|
+
|
4
|
+
def initialize(connection:, object:, operation:, query:, **options)
|
5
|
+
batch_size = options[:batch_size] || 100000
|
6
|
+
where_clause = self.class.query_where_clause(query)
|
7
|
+
|
8
|
+
super(connection: connection, object: object, operation: operation, **options)
|
9
|
+
@log.info "Using Manual Chunking"
|
10
|
+
|
11
|
+
@log.info "Retrieving Ids from records"
|
12
|
+
breakpoints = breakpoints(object, where_clause, batch_size)
|
13
|
+
|
14
|
+
@log.info "Creating Query Batches"
|
15
|
+
create_batches(query, breakpoints, where_clause)
|
16
|
+
|
17
|
+
close
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_batch_statuses
|
21
|
+
batches = super
|
22
|
+
batches.delete_if { |batch| batch["id"] == @initial_batch_id && batches.count > 1 }
|
23
|
+
end
|
24
|
+
|
25
|
+
def breakpoints(object, where_clause, batch_size)
|
26
|
+
@batches_count = 1
|
27
|
+
@initial_batch_id = create_batch("Select Id From #{object} #{where_clause} Order By Id Asc")
|
28
|
+
|
29
|
+
download_results(retry_seconds: 10)
|
30
|
+
.with_index
|
31
|
+
.select { |_, i| i % batch_size == 0 && i != 0 }
|
32
|
+
.map { |result, _| result["Id"] }
|
33
|
+
end
|
34
|
+
|
35
|
+
def create_batches(query, breakpoints, where_clause)
|
36
|
+
if breakpoints.empty?
|
37
|
+
create_batch(query)
|
38
|
+
else
|
39
|
+
query += where_clause.empty? ? " Where" : " And"
|
40
|
+
|
41
|
+
create_batch("#{query} Id < '#{breakpoints.first}'")
|
42
|
+
breakpoints.each_cons(2) do |first, second|
|
43
|
+
create_batch("#{query} Id >= '#{first}' And Id < '#{second}'")
|
44
|
+
end
|
45
|
+
create_batch("#{query} Id >= '#{breakpoints.last}'")
|
46
|
+
end
|
47
|
+
@batches_count = breakpoints.length + 1
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.query_where_clause(query)
|
51
|
+
query.partition(/where\s/i)[1..2].join
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class PrimaryKeyChunkingQuery < Job
|
3
|
+
|
4
|
+
def initialize(connection:, object:, operation:, query:, **options)
|
5
|
+
batch_size = options[:batch_size] || 100000
|
6
|
+
|
7
|
+
if options[:headers].nil?
|
8
|
+
options[:headers] = {"Sforce-Enable-PKChunking": "true; chunkSize=#{batch_size};" }
|
9
|
+
else
|
10
|
+
options[:headers].reverse_merge!({"Sforce-Enable-PKChunking": "true; chunkSize=#{batch_size};" })
|
11
|
+
end
|
12
|
+
|
13
|
+
super(connection: connection, object: object, operation: operation, **options)
|
14
|
+
@log.info "Using Primary Key Chunking"
|
15
|
+
@initial_batch_id = create_batch(query)
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_batch_statuses
|
19
|
+
batches = super
|
20
|
+
finalize_chunking_setup(batches) if @batches_count.nil?
|
21
|
+
batches
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def finalize_chunking_setup(batches)
|
27
|
+
initial_batch = batches.select { |batch| batch["id"] == @initial_batch_id }.first
|
28
|
+
if initial_batch && initial_batch["state"] == "NotProcessed"
|
29
|
+
@batches_count = batches.length - 1
|
30
|
+
close
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module SalesforceChunker
|
2
|
+
class SingleBatchJob < Job
|
3
|
+
def initialize(connection:, object:, operation:, **options)
|
4
|
+
super(connection: connection, object: object, operation: operation, **options)
|
5
|
+
payload = options[:payload] || options[:query]
|
6
|
+
@log.info "Using Single Batch"
|
7
|
+
@batch_id = create_batch(payload)
|
8
|
+
@batches_count = 1
|
9
|
+
close
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "salesforce_chunker/connection.rb"
|
2
|
+
require "salesforce_chunker/exceptions.rb"
|
3
|
+
require "salesforce_chunker/job.rb"
|
4
|
+
require "salesforce_chunker/single_batch_job.rb"
|
5
|
+
require "salesforce_chunker/primary_key_chunking_query.rb"
|
6
|
+
require "salesforce_chunker/manual_chunking_query.rb"
|
7
|
+
require 'logger'
|
8
|
+
|
9
|
+
module SalesforceChunker
|
10
|
+
class Client
|
11
|
+
|
12
|
+
def initialize(**options)
|
13
|
+
@log = options[:logger] || Logger.new(options[:log_output])
|
14
|
+
@log.progname = "salesforce_chunker"
|
15
|
+
|
16
|
+
@connection = SalesforceChunker::Connection.new(**options, logger: @log)
|
17
|
+
end
|
18
|
+
|
19
|
+
def query(query:, object:, **options)
|
20
|
+
return to_enum(:query, query: query, object: object, **options) unless block_given?
|
21
|
+
|
22
|
+
case options[:job_type]
|
23
|
+
when "single_batch"
|
24
|
+
job_class = SalesforceChunker::SingleBatchJob
|
25
|
+
when "manual_chunking"
|
26
|
+
job_class = SalesforceChunker::ManualChunkingQuery
|
27
|
+
when "primary_key_chunking", nil # for backwards compatibility
|
28
|
+
job_class = SalesforceChunker::PrimaryKeyChunkingQuery
|
29
|
+
end
|
30
|
+
|
31
|
+
job_params = {
|
32
|
+
connection: @connection,
|
33
|
+
object: object,
|
34
|
+
operation: "query",
|
35
|
+
query: query,
|
36
|
+
**options.slice(:batch_size, :logger, :log_output)
|
37
|
+
}
|
38
|
+
job_params[:logger] = @log if job_params[:logger].nil? && job_params[:log_output].nil?
|
39
|
+
|
40
|
+
job = job_class.new(**job_params)
|
41
|
+
job.download_results(**options.slice(:timeout, :retry_seconds)) { |result| yield(result) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def single_batch_query(**options)
|
45
|
+
query(**options.merge(job_type: "single_batch"))
|
46
|
+
end
|
47
|
+
|
48
|
+
def primary_key_chunking_query(**options)
|
49
|
+
query(**options.merge(job_type: "primary_key_chunking"))
|
50
|
+
end
|
51
|
+
|
52
|
+
def manual_chunking_query(**options)
|
53
|
+
query(**options.merge(job_type: "manual_chunking"))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "salesforce_chunker/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "salesforce_chunker"
|
8
|
+
spec.version = SalesforceChunker::VERSION
|
9
|
+
spec.authors = ["Curtis Holmes"]
|
10
|
+
spec.email = ["curtis.holmes@shopify.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Salesforce Bulk API Client}
|
13
|
+
spec.description = %q{Salesforce client and extractor designed for handling large amounts of data}
|
14
|
+
spec.homepage = 'https://github.com/Shopify/salesforce_chunker'
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_dependency "httparty", "~> 0.13"
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
28
|
+
spec.add_development_dependency "mocha", "~> 1.5.0"
|
29
|
+
spec.add_development_dependency "pry", "~> 0.11.1"
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: salesforce_chunker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Curtis Holmes
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-11-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: httparty
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.13'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.16'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.16'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mocha
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.5.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.5.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.11.1
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.11.1
|
97
|
+
description: Salesforce client and extractor designed for handling large amounts of
|
98
|
+
data
|
99
|
+
email:
|
100
|
+
- curtis.holmes@shopify.com
|
101
|
+
executables: []
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".travis.yml"
|
107
|
+
- CHANGELOG.md
|
108
|
+
- Gemfile
|
109
|
+
- Gemfile.lock
|
110
|
+
- LICENSE.txt
|
111
|
+
- README.md
|
112
|
+
- Rakefile
|
113
|
+
- bin/console
|
114
|
+
- bin/setup
|
115
|
+
- dev.yml
|
116
|
+
- lib/salesforce_chunker.rb
|
117
|
+
- lib/salesforce_chunker/connection.rb
|
118
|
+
- lib/salesforce_chunker/exceptions.rb
|
119
|
+
- lib/salesforce_chunker/job.rb
|
120
|
+
- lib/salesforce_chunker/manual_chunking_query.rb
|
121
|
+
- lib/salesforce_chunker/primary_key_chunking_query.rb
|
122
|
+
- lib/salesforce_chunker/single_batch_job.rb
|
123
|
+
- lib/salesforce_chunker/version.rb
|
124
|
+
- salesforce_chunker.gemspec
|
125
|
+
homepage: https://github.com/Shopify/salesforce_chunker
|
126
|
+
licenses:
|
127
|
+
- MIT
|
128
|
+
metadata: {}
|
129
|
+
post_install_message:
|
130
|
+
rdoc_options: []
|
131
|
+
require_paths:
|
132
|
+
- lib
|
133
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
138
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: '0'
|
143
|
+
requirements: []
|
144
|
+
rubyforge_project:
|
145
|
+
rubygems_version: 2.7.6
|
146
|
+
signing_key:
|
147
|
+
specification_version: 4
|
148
|
+
summary: Salesforce Bulk API Client
|
149
|
+
test_files: []
|