salesforce_bulk_api 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.sample +7 -0
- data/.github/workflows/ci.yml +36 -0
- data/.gitignore +1 -0
- data/.rspec +1 -1
- data/.rubocop.yml +1927 -0
- data/CHANGELOG.md +0 -0
- data/README.md +332 -18
- data/lib/salesforce_bulk_api/concerns/throttling.rb +15 -6
- data/lib/salesforce_bulk_api/connection.rb +55 -51
- data/lib/salesforce_bulk_api/job.rb +193 -74
- data/lib/salesforce_bulk_api/version.rb +1 -1
- data/lib/salesforce_bulk_api.rb +32 -21
- data/salesforce_bulk_api.gemspec +5 -2
- data/spec/salesforce_bulk_api/salesforce_bulk_api_spec.rb +70 -105
- data/spec/spec_helper.rb +3 -0
- metadata +52 -6
@@ -1,5 +1,3 @@
|
|
1
|
-
require "timeout"
|
2
|
-
|
3
1
|
module SalesforceBulkApi
|
4
2
|
class Job
|
5
3
|
attr_reader :job_id
|
@@ -23,121 +21,242 @@ module SalesforceBulkApi
|
|
23
21
|
@send_nulls = send_nulls
|
24
22
|
@no_null_list = no_null_list
|
25
23
|
|
26
|
-
xml =
|
27
|
-
|
28
|
-
|
24
|
+
xml = "#{XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
25
|
+
xml << "<operation>#{@operation}</operation>"
|
26
|
+
xml << "<object>#{@sobject}</object>"
|
27
|
+
# This only happens on upsert
|
28
|
+
if !@external_field.nil?
|
29
|
+
xml << "<externalIdFieldName>#{@external_field}</externalIdFieldName>"
|
30
|
+
end
|
31
|
+
xml << "<contentType>XML</contentType>"
|
32
|
+
xml << "</jobInfo>"
|
33
|
+
|
34
|
+
path = "job"
|
35
|
+
headers = {"Content-Type" => "application/xml; charset=utf-8"}
|
36
|
+
|
37
|
+
response = @connection.post_xml(nil, path, xml, headers)
|
38
|
+
response_parsed = XmlSimple.xml_in(response)
|
39
|
+
|
40
|
+
# response may contain an exception, so raise it
|
41
|
+
raise SalesforceException.new("#{response_parsed["exceptionMessage"][0]} (#{response_parsed["exceptionCode"][0]})") if response_parsed["exceptionCode"]
|
42
|
+
|
43
|
+
@job_id = response_parsed["id"][0]
|
29
44
|
end
|
30
45
|
|
31
46
|
def close_job
|
32
|
-
xml =
|
33
|
-
|
47
|
+
xml = "#{XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
48
|
+
xml << "<state>Closed</state>"
|
49
|
+
xml << "</jobInfo>"
|
50
|
+
|
51
|
+
path = "job/#{@job_id}"
|
52
|
+
headers = {"Content-Type" => "application/xml; charset=utf-8"}
|
53
|
+
|
54
|
+
response = @connection.post_xml(nil, path, xml, headers)
|
34
55
|
XmlSimple.xml_in(response)
|
35
56
|
end
|
36
57
|
|
37
58
|
def add_query
|
38
|
-
|
59
|
+
path = "job/#{@job_id}/batch/"
|
60
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
61
|
+
|
62
|
+
response = @connection.post_xml(nil, path, @records, headers)
|
39
63
|
response_parsed = XmlSimple.xml_in(response)
|
64
|
+
|
40
65
|
@batch_ids << response_parsed["id"][0]
|
41
66
|
end
|
42
67
|
|
43
68
|
def add_batches
|
44
|
-
raise ArgumentError, "Records must be an array of hashes." unless @records.is_a?
|
69
|
+
raise ArgumentError, "Records must be an array of hashes." unless @records.is_a? Array
|
70
|
+
keys = @records.each_with_object({}) { |pairs, h|
|
71
|
+
pairs.each { |k, v| (h[k] ||= []) << v }
|
72
|
+
}.keys
|
45
73
|
|
46
|
-
|
47
|
-
batches = @records.each_slice(@batch_size).to_a
|
74
|
+
@records_dup = @records.clone
|
48
75
|
|
49
|
-
|
76
|
+
super_records = []
|
77
|
+
(@records_dup.size / @batch_size).to_i.times do
|
78
|
+
super_records << @records_dup.pop(@batch_size)
|
79
|
+
end
|
80
|
+
super_records << @records_dup unless @records_dup.empty?
|
81
|
+
|
82
|
+
super_records.each do |batch|
|
50
83
|
@batch_ids << add_batch(keys, batch)
|
51
84
|
end
|
52
85
|
end
|
53
86
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
job_status = check_job_status
|
59
|
-
break unless job_closed_and_batches_completed?(job_status, state)
|
60
|
-
break if @batch_ids.empty?
|
61
|
-
end
|
87
|
+
def add_batch(keys, batch)
|
88
|
+
xml = "#{XML_HEADER}<sObjects xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"
|
89
|
+
batch.each do |r|
|
90
|
+
xml << create_sobject(keys, r)
|
62
91
|
end
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
92
|
+
xml << "</sObjects>"
|
93
|
+
path = "job/#{@job_id}/batch/"
|
94
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
95
|
+
response = @connection.post_xml(nil, path, xml, headers)
|
96
|
+
response_parsed = XmlSimple.xml_in(response)
|
97
|
+
response_parsed["id"][0] if response_parsed["id"]
|
68
98
|
end
|
69
99
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
100
|
+
def build_sobject(data)
|
101
|
+
xml = "<sObject>"
|
102
|
+
data.keys.each do |k|
|
103
|
+
if k.is_a?(Hash)
|
104
|
+
xml << build_sobject(k)
|
105
|
+
elsif k.to_s.include? "."
|
106
|
+
relations = k.to_s.split(".")
|
107
|
+
parent = relations[0]
|
108
|
+
child = relations[1..].join(".")
|
109
|
+
xml << "<#{parent}>#{build_sobject({child => data[k]})}</#{parent}>"
|
110
|
+
elsif data[k] != :type
|
111
|
+
xml << "<#{k}>#{data[k]}</#{k}>"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
xml << "</sObject>"
|
79
115
|
end
|
80
116
|
|
81
|
-
def
|
82
|
-
|
117
|
+
def build_relationship_sobject(key, value)
|
118
|
+
if key.to_s.include? "."
|
119
|
+
relations = key.to_s.split(".")
|
120
|
+
parent = relations[0]
|
121
|
+
child = relations[1..].join(".")
|
122
|
+
xml = "<#{parent}>"
|
123
|
+
xml << "<sObject>"
|
124
|
+
xml << build_relationship_sobject(child, value)
|
125
|
+
xml << "</sObject>"
|
126
|
+
xml << "</#{parent}>"
|
127
|
+
else
|
128
|
+
"<#{key}>#{value}</#{key}>"
|
129
|
+
end
|
83
130
|
end
|
84
131
|
|
85
|
-
def
|
86
|
-
|
87
|
-
|
132
|
+
def create_sobject(keys, r)
|
133
|
+
sobject_xml = "<sObject>"
|
134
|
+
keys.each do |k|
|
135
|
+
if r[k].is_a?(Hash)
|
136
|
+
sobject_xml << "<#{k}>"
|
137
|
+
sobject_xml << build_sobject(r[k])
|
138
|
+
sobject_xml << "</#{k}>"
|
139
|
+
elsif k.to_s.include? "."
|
140
|
+
sobject_xml << build_relationship_sobject(k, r[k])
|
141
|
+
elsif !r[k].to_s.empty?
|
142
|
+
sobject_xml << "<#{k}>"
|
143
|
+
sobject_xml << if r[k].respond_to?(:encode)
|
144
|
+
r[k].encode(xml: :text)
|
145
|
+
elsif r[k].respond_to?(:iso8601) # timestamps
|
146
|
+
r[k].iso8601.to_s
|
147
|
+
else
|
148
|
+
r[k].to_s
|
149
|
+
end
|
150
|
+
sobject_xml << "</#{k}>"
|
151
|
+
elsif @send_nulls && !@no_null_list.include?(k) && r.key?(k)
|
152
|
+
sobject_xml << "<#{k} xsi:nil=\"true\"/>"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
sobject_xml << "</sObject>"
|
156
|
+
sobject_xml
|
88
157
|
end
|
89
158
|
|
90
|
-
def
|
91
|
-
|
92
|
-
|
93
|
-
|
159
|
+
def check_job_status
|
160
|
+
path = "job/#{@job_id}"
|
161
|
+
headers = {}
|
162
|
+
response = @connection.get_request(nil, path, headers)
|
163
|
+
|
164
|
+
begin
|
165
|
+
response_parsed = XmlSimple.xml_in(response) if response
|
166
|
+
response_parsed
|
167
|
+
rescue => e
|
168
|
+
puts "Error parsing XML response for #{@job_id}"
|
169
|
+
puts e
|
170
|
+
puts e.backtrace
|
94
171
|
end
|
95
|
-
@job_id = response_parsed["id"][0]
|
96
172
|
end
|
97
173
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
101
|
-
xml << "</sObjects>"
|
174
|
+
def check_batch_status(batch_id)
|
175
|
+
path = "job/#{@job_id}/batch/#{batch_id}"
|
176
|
+
headers = {}
|
102
177
|
|
103
|
-
response =
|
104
|
-
|
105
|
-
|
178
|
+
response = @connection.get_request(nil, path, headers)
|
179
|
+
|
180
|
+
begin
|
181
|
+
response_parsed = XmlSimple.xml_in(response) if response
|
182
|
+
response_parsed
|
183
|
+
rescue => e
|
184
|
+
puts "Error parsing XML response for #{@job_id}, batch #{batch_id}"
|
185
|
+
puts e
|
186
|
+
puts e.backtrace
|
187
|
+
end
|
106
188
|
end
|
107
189
|
|
108
|
-
def
|
109
|
-
|
190
|
+
def get_job_result(return_result, timeout)
|
191
|
+
# timeout is in seconds
|
192
|
+
begin
|
193
|
+
state = []
|
194
|
+
Timeout.timeout(timeout, SalesforceBulkApi::JobTimeout) do
|
195
|
+
loop do
|
196
|
+
job_status = check_job_status
|
197
|
+
if job_status && job_status["state"] && job_status["state"][0] == "Closed"
|
198
|
+
batch_statuses = {}
|
110
199
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
end
|
200
|
+
batches_ready = @batch_ids.all? do |batch_id|
|
201
|
+
batch_state = batch_statuses[batch_id] = check_batch_status(batch_id)
|
202
|
+
batch_state && batch_state["state"] && batch_state["state"][0] && !["Queued", "InProgress"].include?(batch_state["state"][0])
|
203
|
+
end
|
116
204
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
205
|
+
if batches_ready
|
206
|
+
@batch_ids.each do |batch_id|
|
207
|
+
state.insert(0, batch_statuses[batch_id])
|
208
|
+
@batch_ids.delete(batch_id)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
break if @batch_ids.empty?
|
212
|
+
else
|
213
|
+
break
|
214
|
+
end
|
215
|
+
end
|
121
216
|
end
|
217
|
+
rescue SalesforceBulkApi::JobTimeout => e
|
218
|
+
puts "Timeout waiting for Salesforce to process job batches #{@batch_ids} of job #{@job_id}."
|
219
|
+
puts e
|
220
|
+
raise
|
122
221
|
end
|
123
222
|
|
124
|
-
|
223
|
+
state.each_with_index do |batch_state, i|
|
224
|
+
if batch_state["state"][0] == "Completed" && return_result == true
|
225
|
+
state[i].merge!({"response" => get_batch_result(batch_state["id"][0])})
|
226
|
+
end
|
227
|
+
end
|
228
|
+
state
|
125
229
|
end
|
126
230
|
|
127
|
-
def
|
128
|
-
|
129
|
-
|
130
|
-
raise
|
131
|
-
end
|
231
|
+
def get_batch_result(batch_id)
|
232
|
+
path = "job/#{@job_id}/batch/#{batch_id}/result"
|
233
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
132
234
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
235
|
+
response = @connection.get_request(nil, path, headers)
|
236
|
+
response_parsed = XmlSimple.xml_in(response)
|
237
|
+
results = response_parsed["result"] unless @operation == "query"
|
238
|
+
|
239
|
+
if @operation == "query" # The query op requires us to do another request to get the results
|
240
|
+
result_id = response_parsed["result"][0]
|
241
|
+
path = "job/#{@job_id}/batch/#{batch_id}/result/#{result_id}"
|
242
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
243
|
+
response = @connection.get_request(nil, path, headers)
|
244
|
+
response_parsed = XmlSimple.xml_in(response)
|
245
|
+
results = response_parsed["records"]
|
138
246
|
end
|
247
|
+
results
|
248
|
+
end
|
249
|
+
|
250
|
+
def get_batch_records(batch_id)
|
251
|
+
path = "job/#{@job_id}/batch/#{batch_id}/request"
|
252
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
253
|
+
|
254
|
+
response = @connection.get_request(nil, path, headers)
|
255
|
+
response_parsed = XmlSimple.xml_in(response)
|
256
|
+
response_parsed["sObject"]
|
139
257
|
end
|
140
258
|
end
|
141
259
|
|
142
|
-
class JobTimeout < StandardError
|
260
|
+
class JobTimeout < StandardError
|
261
|
+
end
|
143
262
|
end
|
data/lib/salesforce_bulk_api.rb
CHANGED
@@ -16,28 +16,37 @@ module SalesforceBulkApi
|
|
16
16
|
def initialize(client, salesforce_api_version = "46.0")
|
17
17
|
@connection = SalesforceBulkApi::Connection.new(salesforce_api_version, client)
|
18
18
|
@listeners = {job_created: []}
|
19
|
-
@counters = Hash.new(0)
|
20
19
|
end
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
do_operation(operation, sobject, records, external_field, **options)
|
25
|
-
end
|
21
|
+
def upsert(sobject, records, external_field, get_response = false, send_nulls = false, no_null_list = [], batch_size = 10000, timeout = 1500)
|
22
|
+
do_operation("upsert", sobject, records, external_field, get_response, timeout, batch_size, send_nulls, no_null_list)
|
26
23
|
end
|
27
24
|
|
28
|
-
def
|
29
|
-
do_operation("
|
25
|
+
def update(sobject, records, get_response = false, send_nulls = false, no_null_list = [], batch_size = 10000, timeout = 1500)
|
26
|
+
do_operation("update", sobject, records, nil, get_response, timeout, batch_size, send_nulls, no_null_list)
|
27
|
+
end
|
28
|
+
|
29
|
+
def create(sobject, records, get_response = false, send_nulls = false, batch_size = 10000, timeout = 1500)
|
30
|
+
do_operation("insert", sobject, records, nil, get_response, timeout, batch_size, send_nulls)
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(sobject, records, get_response = false, batch_size = 10000, timeout = 1500)
|
34
|
+
do_operation("delete", sobject, records, nil, get_response, timeout, batch_size)
|
35
|
+
end
|
36
|
+
|
37
|
+
def query(sobject, query, batch_size = 10000, timeout = 1500)
|
38
|
+
do_operation("query", sobject, query, nil, true, timeout, batch_size)
|
30
39
|
end
|
31
40
|
|
32
41
|
def counters
|
33
42
|
{
|
34
43
|
http_get: @connection.counters[:get],
|
35
44
|
http_post: @connection.counters[:post],
|
36
|
-
upsert:
|
37
|
-
update:
|
38
|
-
create:
|
39
|
-
delete:
|
40
|
-
query:
|
45
|
+
upsert: get_counters[:upsert],
|
46
|
+
update: get_counters[:update],
|
47
|
+
create: get_counters[:create],
|
48
|
+
delete: get_counters[:delete],
|
49
|
+
query: get_counters[:query]
|
41
50
|
}
|
42
51
|
end
|
43
52
|
|
@@ -54,10 +63,8 @@ module SalesforceBulkApi
|
|
54
63
|
SalesforceBulkApi::Job.new(job_id: job_id, connection: @connection)
|
55
64
|
end
|
56
65
|
|
57
|
-
|
58
|
-
|
59
|
-
def do_operation(operation, sobject, records, external_field, **options)
|
60
|
-
count(operation.to_sym)
|
66
|
+
def do_operation(operation, sobject, records, external_field, get_response, timeout, batch_size, send_nulls = false, no_null_list = [])
|
67
|
+
count operation.to_sym
|
61
68
|
|
62
69
|
job = SalesforceBulkApi::Job.new(
|
63
70
|
operation: operation,
|
@@ -67,18 +74,22 @@ module SalesforceBulkApi
|
|
67
74
|
connection: @connection
|
68
75
|
)
|
69
76
|
|
70
|
-
job.create_job(
|
77
|
+
job.create_job(batch_size, send_nulls, no_null_list)
|
71
78
|
@listeners[:job_created].each { |callback| callback.call(job) }
|
72
|
-
|
73
79
|
(operation == "query") ? job.add_query : job.add_batches
|
74
|
-
|
75
80
|
response = job.close_job
|
76
|
-
response
|
81
|
+
response["batches"] = job.get_job_result(get_response, timeout) if get_response == true
|
77
82
|
response
|
78
83
|
end
|
79
84
|
|
85
|
+
private
|
86
|
+
|
87
|
+
def get_counters
|
88
|
+
@counters ||= Hash.new { |hash, key| hash[key] = 0 }
|
89
|
+
end
|
90
|
+
|
80
91
|
def count(name)
|
81
|
-
|
92
|
+
get_counters[name] += 1
|
82
93
|
end
|
83
94
|
end
|
84
95
|
end
|
data/salesforce_bulk_api.gemspec
CHANGED
@@ -14,12 +14,15 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.add_dependency("json", [">= 0"])
|
15
15
|
s.add_dependency("xml-simple", [">= 0"])
|
16
16
|
s.add_dependency("csv", [">= 0"])
|
17
|
+
s.add_dependency("logger", [">= 0"])
|
17
18
|
|
18
19
|
s.add_development_dependency "rspec"
|
19
|
-
s.add_development_dependency "restforce", "~>
|
20
|
+
s.add_development_dependency "restforce", "~> 8.0.0"
|
20
21
|
s.add_development_dependency "rake", ">= 12.3.3"
|
21
22
|
s.add_development_dependency "pry"
|
22
|
-
s.add_development_dependency "
|
23
|
+
s.add_development_dependency "rubocop"
|
24
|
+
s.add_development_dependency "rubocop-rake"
|
25
|
+
s.add_development_dependency "dotenv"
|
23
26
|
|
24
27
|
s.files = `git ls-files`.split("\n")
|
25
28
|
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|