salesforce_bulk_api 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.sample +7 -0
- data/.github/workflows/ci.yml +36 -0
- data/.gitignore +1 -0
- data/.rspec +1 -1
- data/.rubocop.yml +1927 -0
- data/CHANGELOG.md +0 -0
- data/LICENCE +1 -1
- data/README.md +426 -71
- data/Rakefile +3 -3
- data/lib/salesforce_bulk_api/concerns/throttling.rb +1 -3
- data/lib/salesforce_bulk_api/connection.rb +12 -14
- data/lib/salesforce_bulk_api/job.rb +83 -85
- data/lib/salesforce_bulk_api/version.rb +1 -1
- data/lib/salesforce_bulk_api.rb +21 -22
- data/salesforce_bulk_api.gemspec +20 -18
- data/spec/salesforce_bulk_api/salesforce_bulk_api_spec.rb +100 -143
- data/spec/spec_helper.rb +7 -4
- metadata +81 -12
@@ -1,19 +1,19 @@
|
|
1
1
|
module SalesforceBulkApi
|
2
|
-
|
3
2
|
class Job
|
4
3
|
attr_reader :job_id
|
5
4
|
|
6
5
|
class SalesforceException < StandardError; end
|
7
6
|
|
7
|
+
XML_HEADER = '<?xml version="1.0" encoding="utf-8" ?>'.freeze
|
8
|
+
|
8
9
|
def initialize(args)
|
9
|
-
@job_id
|
10
|
-
@operation
|
11
|
-
@sobject
|
10
|
+
@job_id = args[:job_id]
|
11
|
+
@operation = args[:operation]
|
12
|
+
@sobject = args[:sobject]
|
12
13
|
@external_field = args[:external_field]
|
13
|
-
@records
|
14
|
-
@connection
|
15
|
-
@batch_ids
|
16
|
-
@XML_HEADER = '<?xml version="1.0" encoding="utf-8" ?>'
|
14
|
+
@records = args[:records]
|
15
|
+
@connection = args[:connection]
|
16
|
+
@batch_ids = []
|
17
17
|
end
|
18
18
|
|
19
19
|
def create_job(batch_size, send_nulls, no_null_list)
|
@@ -21,35 +21,35 @@ module SalesforceBulkApi
|
|
21
21
|
@send_nulls = send_nulls
|
22
22
|
@no_null_list = no_null_list
|
23
23
|
|
24
|
-
xml = "#{
|
25
|
-
xml
|
26
|
-
xml
|
24
|
+
xml = "#{XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
25
|
+
xml << "<operation>#{@operation}</operation>"
|
26
|
+
xml << "<object>#{@sobject}</object>"
|
27
27
|
# This only happens on upsert
|
28
28
|
if !@external_field.nil?
|
29
|
-
xml
|
29
|
+
xml << "<externalIdFieldName>#{@external_field}</externalIdFieldName>"
|
30
30
|
end
|
31
|
-
xml
|
32
|
-
xml
|
31
|
+
xml << "<contentType>XML</contentType>"
|
32
|
+
xml << "</jobInfo>"
|
33
33
|
|
34
34
|
path = "job"
|
35
|
-
headers =
|
35
|
+
headers = {"Content-Type" => "application/xml; charset=utf-8"}
|
36
36
|
|
37
37
|
response = @connection.post_xml(nil, path, xml, headers)
|
38
38
|
response_parsed = XmlSimple.xml_in(response)
|
39
39
|
|
40
40
|
# response may contain an exception, so raise it
|
41
|
-
raise SalesforceException.new("#{response_parsed[
|
41
|
+
raise SalesforceException.new("#{response_parsed["exceptionMessage"][0]} (#{response_parsed["exceptionCode"][0]})") if response_parsed["exceptionCode"]
|
42
42
|
|
43
|
-
@job_id = response_parsed[
|
43
|
+
@job_id = response_parsed["id"][0]
|
44
44
|
end
|
45
45
|
|
46
|
-
def close_job
|
47
|
-
xml = "#{
|
48
|
-
xml
|
49
|
-
xml
|
46
|
+
def close_job
|
47
|
+
xml = "#{XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
48
|
+
xml << "<state>Closed</state>"
|
49
|
+
xml << "</jobInfo>"
|
50
50
|
|
51
51
|
path = "job/#{@job_id}"
|
52
|
-
headers =
|
52
|
+
headers = {"Content-Type" => "application/xml; charset=utf-8"}
|
53
53
|
|
54
54
|
response = @connection.post_xml(nil, path, xml, headers)
|
55
55
|
XmlSimple.xml_in(response)
|
@@ -57,17 +57,19 @@ module SalesforceBulkApi
|
|
57
57
|
|
58
58
|
def add_query
|
59
59
|
path = "job/#{@job_id}/batch/"
|
60
|
-
headers =
|
60
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
61
61
|
|
62
62
|
response = @connection.post_xml(nil, path, @records, headers)
|
63
63
|
response_parsed = XmlSimple.xml_in(response)
|
64
64
|
|
65
|
-
@batch_ids << response_parsed[
|
65
|
+
@batch_ids << response_parsed["id"][0]
|
66
66
|
end
|
67
67
|
|
68
68
|
def add_batches
|
69
|
-
raise
|
70
|
-
keys = @records.
|
69
|
+
raise ArgumentError, "Records must be an array of hashes." unless @records.is_a? Array
|
70
|
+
keys = @records.each_with_object({}) { |pairs, h|
|
71
|
+
pairs.each { |k, v| (h[k] ||= []) << v }
|
72
|
+
}.keys
|
71
73
|
|
72
74
|
@records_dup = @records.clone
|
73
75
|
|
@@ -83,86 +85,86 @@ module SalesforceBulkApi
|
|
83
85
|
end
|
84
86
|
|
85
87
|
def add_batch(keys, batch)
|
86
|
-
xml = "#{
|
88
|
+
xml = "#{XML_HEADER}<sObjects xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"
|
87
89
|
batch.each do |r|
|
88
|
-
xml
|
90
|
+
xml << create_sobject(keys, r)
|
89
91
|
end
|
90
|
-
xml
|
92
|
+
xml << "</sObjects>"
|
91
93
|
path = "job/#{@job_id}/batch/"
|
92
|
-
headers =
|
94
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
93
95
|
response = @connection.post_xml(nil, path, xml, headers)
|
94
96
|
response_parsed = XmlSimple.xml_in(response)
|
95
|
-
response_parsed[
|
97
|
+
response_parsed["id"][0] if response_parsed["id"]
|
96
98
|
end
|
97
99
|
|
98
100
|
def build_sobject(data)
|
99
|
-
xml =
|
101
|
+
xml = "<sObject>"
|
100
102
|
data.keys.each do |k|
|
101
103
|
if k.is_a?(Hash)
|
102
|
-
xml
|
103
|
-
elsif k.to_s.include?
|
104
|
-
relations = k.to_s.split(
|
104
|
+
xml << build_sobject(k)
|
105
|
+
elsif k.to_s.include? "."
|
106
|
+
relations = k.to_s.split(".")
|
105
107
|
parent = relations[0]
|
106
|
-
child = relations[1
|
107
|
-
xml
|
108
|
+
child = relations[1..].join(".")
|
109
|
+
xml << "<#{parent}>#{build_sobject({child => data[k]})}</#{parent}>"
|
108
110
|
elsif data[k] != :type
|
109
|
-
xml
|
111
|
+
xml << "<#{k}>#{data[k]}</#{k}>"
|
110
112
|
end
|
111
113
|
end
|
112
|
-
xml
|
114
|
+
xml << "</sObject>"
|
113
115
|
end
|
114
116
|
|
115
117
|
def build_relationship_sobject(key, value)
|
116
|
-
if key.to_s.include?
|
117
|
-
relations = key.to_s.split(
|
118
|
+
if key.to_s.include? "."
|
119
|
+
relations = key.to_s.split(".")
|
118
120
|
parent = relations[0]
|
119
|
-
child = relations[1
|
121
|
+
child = relations[1..].join(".")
|
120
122
|
xml = "<#{parent}>"
|
121
|
-
xml
|
122
|
-
xml
|
123
|
-
xml
|
124
|
-
xml
|
123
|
+
xml << "<sObject>"
|
124
|
+
xml << build_relationship_sobject(child, value)
|
125
|
+
xml << "</sObject>"
|
126
|
+
xml << "</#{parent}>"
|
125
127
|
else
|
126
|
-
|
128
|
+
"<#{key}>#{value}</#{key}>"
|
127
129
|
end
|
128
130
|
end
|
129
131
|
|
130
132
|
def create_sobject(keys, r)
|
131
|
-
sobject_xml =
|
133
|
+
sobject_xml = "<sObject>"
|
132
134
|
keys.each do |k|
|
133
135
|
if r[k].is_a?(Hash)
|
134
|
-
sobject_xml
|
135
|
-
sobject_xml
|
136
|
-
sobject_xml
|
137
|
-
elsif k.to_s.include?
|
138
|
-
sobject_xml
|
136
|
+
sobject_xml << "<#{k}>"
|
137
|
+
sobject_xml << build_sobject(r[k])
|
138
|
+
sobject_xml << "</#{k}>"
|
139
|
+
elsif k.to_s.include? "."
|
140
|
+
sobject_xml << build_relationship_sobject(k, r[k])
|
139
141
|
elsif !r[k].to_s.empty?
|
140
|
-
sobject_xml
|
141
|
-
if r[k].respond_to?(:encode)
|
142
|
-
|
142
|
+
sobject_xml << "<#{k}>"
|
143
|
+
sobject_xml << if r[k].respond_to?(:encode)
|
144
|
+
r[k].encode(xml: :text)
|
143
145
|
elsif r[k].respond_to?(:iso8601) # timestamps
|
144
|
-
|
146
|
+
r[k].iso8601.to_s
|
145
147
|
else
|
146
|
-
|
148
|
+
r[k].to_s
|
147
149
|
end
|
148
|
-
sobject_xml
|
150
|
+
sobject_xml << "</#{k}>"
|
149
151
|
elsif @send_nulls && !@no_null_list.include?(k) && r.key?(k)
|
150
|
-
sobject_xml
|
152
|
+
sobject_xml << "<#{k} xsi:nil=\"true\"/>"
|
151
153
|
end
|
152
154
|
end
|
153
|
-
sobject_xml
|
155
|
+
sobject_xml << "</sObject>"
|
154
156
|
sobject_xml
|
155
157
|
end
|
156
158
|
|
157
159
|
def check_job_status
|
158
160
|
path = "job/#{@job_id}"
|
159
|
-
headers =
|
161
|
+
headers = {}
|
160
162
|
response = @connection.get_request(nil, path, headers)
|
161
163
|
|
162
164
|
begin
|
163
165
|
response_parsed = XmlSimple.xml_in(response) if response
|
164
166
|
response_parsed
|
165
|
-
rescue
|
167
|
+
rescue => e
|
166
168
|
puts "Error parsing XML response for #{@job_id}"
|
167
169
|
puts e
|
168
170
|
puts e.backtrace
|
@@ -171,14 +173,14 @@ module SalesforceBulkApi
|
|
171
173
|
|
172
174
|
def check_batch_status(batch_id)
|
173
175
|
path = "job/#{@job_id}/batch/#{batch_id}"
|
174
|
-
headers =
|
176
|
+
headers = {}
|
175
177
|
|
176
178
|
response = @connection.get_request(nil, path, headers)
|
177
179
|
|
178
180
|
begin
|
179
181
|
response_parsed = XmlSimple.xml_in(response) if response
|
180
182
|
response_parsed
|
181
|
-
rescue
|
183
|
+
rescue => e
|
182
184
|
puts "Error parsing XML response for #{@job_id}, batch #{batch_id}"
|
183
185
|
puts e
|
184
186
|
puts e.backtrace
|
@@ -189,15 +191,15 @@ module SalesforceBulkApi
|
|
189
191
|
# timeout is in seconds
|
190
192
|
begin
|
191
193
|
state = []
|
192
|
-
Timeout
|
193
|
-
|
194
|
-
job_status =
|
195
|
-
if job_status && job_status[
|
194
|
+
Timeout.timeout(timeout, SalesforceBulkApi::JobTimeout) do
|
195
|
+
loop do
|
196
|
+
job_status = check_job_status
|
197
|
+
if job_status && job_status["state"] && job_status["state"][0] == "Closed"
|
196
198
|
batch_statuses = {}
|
197
199
|
|
198
200
|
batches_ready = @batch_ids.all? do |batch_id|
|
199
|
-
batch_state = batch_statuses[batch_id] =
|
200
|
-
batch_state && batch_state[
|
201
|
+
batch_state = batch_statuses[batch_id] = check_batch_status(batch_id)
|
202
|
+
batch_state && batch_state["state"] && batch_state["state"][0] && !["Queued", "InProgress"].include?(batch_state["state"][0])
|
201
203
|
end
|
202
204
|
|
203
205
|
if batches_ready
|
@@ -213,14 +215,14 @@ module SalesforceBulkApi
|
|
213
215
|
end
|
214
216
|
end
|
215
217
|
rescue SalesforceBulkApi::JobTimeout => e
|
216
|
-
puts
|
218
|
+
puts "Timeout waiting for Salesforce to process job batches #{@batch_ids} of job #{@job_id}."
|
217
219
|
puts e
|
218
220
|
raise
|
219
221
|
end
|
220
222
|
|
221
223
|
state.each_with_index do |batch_state, i|
|
222
|
-
if batch_state[
|
223
|
-
state[i].merge!({
|
224
|
+
if batch_state["state"][0] == "Completed" && return_result == true
|
225
|
+
state[i].merge!({"response" => get_batch_result(batch_state["id"][0])})
|
224
226
|
end
|
225
227
|
end
|
226
228
|
state
|
@@ -228,35 +230,31 @@ module SalesforceBulkApi
|
|
228
230
|
|
229
231
|
def get_batch_result(batch_id)
|
230
232
|
path = "job/#{@job_id}/batch/#{batch_id}/result"
|
231
|
-
headers =
|
233
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
232
234
|
|
233
235
|
response = @connection.get_request(nil, path, headers)
|
234
236
|
response_parsed = XmlSimple.xml_in(response)
|
235
|
-
results = response_parsed[
|
237
|
+
results = response_parsed["result"] unless @operation == "query"
|
236
238
|
|
237
|
-
if
|
239
|
+
if @operation == "query" # The query op requires us to do another request to get the results
|
238
240
|
result_id = response_parsed["result"][0]
|
239
241
|
path = "job/#{@job_id}/batch/#{batch_id}/result/#{result_id}"
|
240
|
-
headers =
|
241
|
-
headers = Hash["Content-Type" => "application/xml; charset=UTF-8"]
|
242
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
242
243
|
response = @connection.get_request(nil, path, headers)
|
243
244
|
response_parsed = XmlSimple.xml_in(response)
|
244
|
-
results = response_parsed[
|
245
|
+
results = response_parsed["records"]
|
245
246
|
end
|
246
247
|
results
|
247
248
|
end
|
248
249
|
|
249
250
|
def get_batch_records(batch_id)
|
250
251
|
path = "job/#{@job_id}/batch/#{batch_id}/request"
|
251
|
-
headers =
|
252
|
+
headers = {"Content-Type" => "application/xml; charset=UTF-8"}
|
252
253
|
|
253
254
|
response = @connection.get_request(nil, path, headers)
|
254
255
|
response_parsed = XmlSimple.xml_in(response)
|
255
|
-
|
256
|
-
|
257
|
-
results
|
256
|
+
response_parsed["sObject"]
|
258
257
|
end
|
259
|
-
|
260
258
|
end
|
261
259
|
|
262
260
|
class JobTimeout < StandardError
|
data/lib/salesforce_bulk_api.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
1
|
+
require "rubygems"
|
2
|
+
require "bundler"
|
3
|
+
require "net/https"
|
4
|
+
require "xmlsimple"
|
5
|
+
require "csv"
|
6
6
|
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
7
|
+
require "salesforce_bulk_api/version"
|
8
|
+
require "salesforce_bulk_api/concerns/throttling"
|
9
|
+
require "salesforce_bulk_api/job"
|
10
|
+
require "salesforce_bulk_api/connection"
|
11
11
|
|
12
12
|
module SalesforceBulkApi
|
13
13
|
class Api
|
@@ -15,27 +15,27 @@ module SalesforceBulkApi
|
|
15
15
|
|
16
16
|
def initialize(client, salesforce_api_version = "46.0")
|
17
17
|
@connection = SalesforceBulkApi::Connection.new(salesforce_api_version, client)
|
18
|
-
@listeners = {
|
18
|
+
@listeners = {job_created: []}
|
19
19
|
end
|
20
20
|
|
21
21
|
def upsert(sobject, records, external_field, get_response = false, send_nulls = false, no_null_list = [], batch_size = 10000, timeout = 1500)
|
22
|
-
do_operation(
|
22
|
+
do_operation("upsert", sobject, records, external_field, get_response, timeout, batch_size, send_nulls, no_null_list)
|
23
23
|
end
|
24
24
|
|
25
25
|
def update(sobject, records, get_response = false, send_nulls = false, no_null_list = [], batch_size = 10000, timeout = 1500)
|
26
|
-
do_operation(
|
26
|
+
do_operation("update", sobject, records, nil, get_response, timeout, batch_size, send_nulls, no_null_list)
|
27
27
|
end
|
28
28
|
|
29
29
|
def create(sobject, records, get_response = false, send_nulls = false, batch_size = 10000, timeout = 1500)
|
30
|
-
do_operation(
|
30
|
+
do_operation("insert", sobject, records, nil, get_response, timeout, batch_size, send_nulls)
|
31
31
|
end
|
32
32
|
|
33
33
|
def delete(sobject, records, get_response = false, batch_size = 10000, timeout = 1500)
|
34
|
-
do_operation(
|
34
|
+
do_operation("delete", sobject, records, nil, get_response, timeout, batch_size)
|
35
35
|
end
|
36
36
|
|
37
37
|
def query(sobject, query, batch_size = 10000, timeout = 1500)
|
38
|
-
do_operation(
|
38
|
+
do_operation("query", sobject, query, nil, true, timeout, batch_size)
|
39
39
|
end
|
40
40
|
|
41
41
|
def counters
|
@@ -50,8 +50,8 @@ module SalesforceBulkApi
|
|
50
50
|
}
|
51
51
|
end
|
52
52
|
|
53
|
-
# Allows you to attach a listener that accepts the created job (which has a useful #job_id field).
|
54
|
-
# for recording a job ID persistently before you begin batch work (i.e. start modifying the salesforce database),
|
53
|
+
# Allows you to attach a listener that accepts the created job (which has a useful #job_id field).
|
54
|
+
# This is useful for recording a job ID persistently before you begin batch work (i.e. start modifying the salesforce database),
|
55
55
|
# so if the load process you are writing needs to recover, it can be aware of previous jobs it started and wait
|
56
56
|
# for them to finish.
|
57
57
|
#
|
@@ -75,22 +75,21 @@ module SalesforceBulkApi
|
|
75
75
|
)
|
76
76
|
|
77
77
|
job.create_job(batch_size, send_nulls, no_null_list)
|
78
|
-
@listeners[:job_created].each {|callback| callback.call(job)}
|
79
|
-
operation == "query" ? job.add_query
|
78
|
+
@listeners[:job_created].each { |callback| callback.call(job) }
|
79
|
+
(operation == "query") ? job.add_query : job.add_batches
|
80
80
|
response = job.close_job
|
81
|
-
response
|
81
|
+
response["batches"] = job.get_job_result(get_response, timeout) if get_response == true
|
82
82
|
response
|
83
83
|
end
|
84
84
|
|
85
85
|
private
|
86
86
|
|
87
87
|
def get_counters
|
88
|
-
@counters ||= Hash.new
|
88
|
+
@counters ||= Hash.new { |hash, key| hash[key] = 0 }
|
89
89
|
end
|
90
90
|
|
91
91
|
def count(name)
|
92
92
|
get_counters[name] += 1
|
93
93
|
end
|
94
|
-
|
95
94
|
end
|
96
95
|
end
|
data/salesforce_bulk_api.gemspec
CHANGED
@@ -1,28 +1,30 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
1
|
$:.push File.expand_path("../lib", __FILE__)
|
3
2
|
require "salesforce_bulk_api/version"
|
4
3
|
|
5
4
|
Gem::Specification.new do |s|
|
6
|
-
s.name
|
7
|
-
s.version
|
8
|
-
s.authors
|
9
|
-
s.email
|
5
|
+
s.name = "salesforce_bulk_api"
|
6
|
+
s.version = SalesforceBulkApi::VERSION
|
7
|
+
s.authors = ["Yatish Mehta"]
|
8
|
+
s.email = ["yatish27@users.noreply.github.com"]
|
10
9
|
|
11
|
-
s.homepage
|
12
|
-
s.summary
|
13
|
-
s.description =
|
10
|
+
s.homepage = "https://github.com/yatishmehta27/salesforce_bulk_api"
|
11
|
+
s.summary = "It uses the bulk api of salesforce to communicate with Salesforce CRM"
|
12
|
+
s.description = "Salesforce Bulk API with governor limits taken care of"
|
14
13
|
|
15
|
-
s.add_dependency(
|
16
|
-
s.add_dependency(
|
14
|
+
s.add_dependency("json", [">= 0"])
|
15
|
+
s.add_dependency("xml-simple", [">= 0"])
|
16
|
+
s.add_dependency("csv", [">= 0"])
|
17
|
+
s.add_dependency("logger", [">= 0"])
|
17
18
|
|
18
|
-
s.add_development_dependency
|
19
|
-
s.add_development_dependency
|
19
|
+
s.add_development_dependency "rspec"
|
20
|
+
s.add_development_dependency "restforce", "~> 8.0.0"
|
20
21
|
s.add_development_dependency "rake", ">= 12.3.3"
|
21
|
-
s.add_development_dependency
|
22
|
-
|
23
|
-
s.
|
24
|
-
s.
|
25
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
|
-
s.require_paths = ['lib']
|
22
|
+
s.add_development_dependency "pry"
|
23
|
+
s.add_development_dependency "rubocop"
|
24
|
+
s.add_development_dependency "rubocop-rake"
|
25
|
+
s.add_development_dependency "dotenv"
|
27
26
|
|
27
|
+
s.files = `git ls-files`.split("\n")
|
28
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
29
|
+
s.require_paths = ["lib"]
|
28
30
|
end
|