salesforce_bulk_api 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,21 @@
1
- module SalesforceBulkApi
1
+ require "timeout"
2
2
 
3
+ module SalesforceBulkApi
3
4
  class Job
4
5
  attr_reader :job_id
5
6
 
6
7
  class SalesforceException < StandardError; end
7
8
 
9
+ XML_HEADER = '<?xml version="1.0" encoding="utf-8" ?>'.freeze
10
+
8
11
  def initialize(args)
9
- @job_id = args[:job_id]
10
- @operation = args[:operation]
11
- @sobject = args[:sobject]
12
+ @job_id = args[:job_id]
13
+ @operation = args[:operation]
14
+ @sobject = args[:sobject]
12
15
  @external_field = args[:external_field]
13
- @records = args[:records]
14
- @connection = args[:connection]
15
- @batch_ids = []
16
- @XML_HEADER = '<?xml version="1.0" encoding="utf-8" ?>'
16
+ @records = args[:records]
17
+ @connection = args[:connection]
18
+ @batch_ids = []
17
19
  end
18
20
 
19
21
  def create_job(batch_size, send_nulls, no_null_list)
@@ -21,244 +23,121 @@ module SalesforceBulkApi
21
23
  @send_nulls = send_nulls
22
24
  @no_null_list = no_null_list
23
25
 
24
- xml = "#{@XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
25
- xml += "<operation>#{@operation}</operation>"
26
- xml += "<object>#{@sobject}</object>"
27
- # This only happens on upsert
28
- if !@external_field.nil?
29
- xml += "<externalIdFieldName>#{@external_field}</externalIdFieldName>"
30
- end
31
- xml += "<contentType>XML</contentType>"
32
- xml += "</jobInfo>"
33
-
34
- path = "job"
35
- headers = Hash['Content-Type' => 'application/xml; charset=utf-8']
36
-
37
- response = @connection.post_xml(nil, path, xml, headers)
38
- response_parsed = XmlSimple.xml_in(response)
39
-
40
- # response may contain an exception, so raise it
41
- raise SalesforceException.new("#{response_parsed['exceptionMessage'][0]} (#{response_parsed['exceptionCode'][0]})") if response_parsed['exceptionCode']
42
-
43
- @job_id = response_parsed['id'][0]
26
+ xml = build_job_xml
27
+ response = post_xml("job", xml)
28
+ parse_job_response(response)
44
29
  end
45
30
 
46
- def close_job()
47
- xml = "#{@XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
48
- xml += "<state>Closed</state>"
49
- xml += "</jobInfo>"
50
-
51
- path = "job/#{@job_id}"
52
- headers = Hash['Content-Type' => 'application/xml; charset=utf-8']
53
-
54
- response = @connection.post_xml(nil, path, xml, headers)
31
+ def close_job
32
+ xml = build_close_job_xml
33
+ response = post_xml("job/#{@job_id}", xml)
55
34
  XmlSimple.xml_in(response)
56
35
  end
57
36
 
58
37
  def add_query
59
- path = "job/#{@job_id}/batch/"
60
- headers = Hash["Content-Type" => "application/xml; charset=UTF-8"]
61
-
62
- response = @connection.post_xml(nil, path, @records, headers)
38
+ response = post_xml("job/#{@job_id}/batch/", @records)
63
39
  response_parsed = XmlSimple.xml_in(response)
64
-
65
- @batch_ids << response_parsed['id'][0]
40
+ @batch_ids << response_parsed["id"][0]
66
41
  end
67
42
 
68
43
  def add_batches
69
- raise 'Records must be an array of hashes.' unless @records.is_a? Array
70
- keys = @records.reduce({}) {|h, pairs| pairs.each {|k, v| (h[k] ||= []) << v}; h}.keys
44
+ raise ArgumentError, "Records must be an array of hashes." unless @records.is_a?(Array)
71
45
 
72
- @records_dup = @records.clone
46
+ keys = @records.each_with_object({}) { |pairs, h| pairs.each { |k, v| (h[k] ||= []) << v } }.keys
47
+ batches = @records.each_slice(@batch_size).to_a
73
48
 
74
- super_records = []
75
- (@records_dup.size / @batch_size).to_i.times do
76
- super_records << @records_dup.pop(@batch_size)
77
- end
78
- super_records << @records_dup unless @records_dup.empty?
79
-
80
- super_records.each do |batch|
49
+ batches.each do |batch|
81
50
  @batch_ids << add_batch(keys, batch)
82
51
  end
83
52
  end
84
53
 
85
- def add_batch(keys, batch)
86
- xml = "#{@XML_HEADER}<sObjects xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"
87
- batch.each do |r|
88
- xml += create_sobject(keys, r)
89
- end
90
- xml += '</sObjects>'
91
- path = "job/#{@job_id}/batch/"
92
- headers = Hash["Content-Type" => "application/xml; charset=UTF-8"]
93
- response = @connection.post_xml(nil, path, xml, headers)
94
- response_parsed = XmlSimple.xml_in(response)
95
- response_parsed['id'][0] if response_parsed['id']
96
- end
97
-
98
- def build_sobject(data)
99
- xml = '<sObject>'
100
- data.keys.each do |k|
101
- if k.is_a?(Hash)
102
- xml += build_sobject(k)
103
- elsif k.to_s.include? '.'
104
- relations = k.to_s.split('.')
105
- parent = relations[0]
106
- child = relations[1..-1].join('.')
107
- xml += "<#{parent}>#{build_sobject({ child => data[k] })}</#{parent}>"
108
- elsif data[k] != :type
109
- xml += "<#{k}>#{data[k]}</#{k}>"
54
+ def get_job_result(return_result, timeout)
55
+ state = []
56
+ Timeout.timeout(timeout, JobTimeout) do
57
+ loop do
58
+ job_status = check_job_status
59
+ break unless job_closed_and_batches_completed?(job_status, state)
60
+ break if @batch_ids.empty?
110
61
  end
111
62
  end
112
- xml += '</sObject>'
63
+ rescue JobTimeout => e
64
+ handle_timeout(e)
65
+ ensure
66
+ process_batch_results(state) if return_result
67
+ state
113
68
  end
114
69
 
115
- def build_relationship_sobject(key, value)
116
- if key.to_s.include? '.'
117
- relations = key.to_s.split('.')
118
- parent = relations[0]
119
- child = relations[1..-1].join('.')
120
- xml = "<#{parent}>"
121
- xml += "<sObject>"
122
- xml += build_relationship_sobject(child, value)
123
- xml += "</sObject>"
124
- xml += "</#{parent}>"
125
- else
126
- xml = "<#{key}>#{value}</#{key}>"
127
- end
70
+ private
71
+
72
+ def build_job_xml
73
+ xml = "#{XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
74
+ xml << "<operation>#{@operation}</operation>"
75
+ xml << "<object>#{@sobject}</object>"
76
+ xml << "<externalIdFieldName>#{@external_field}</externalIdFieldName>" if @external_field
77
+ xml << "<contentType>XML</contentType>"
78
+ xml << "</jobInfo>"
128
79
  end
129
80
 
130
- def create_sobject(keys, r)
131
- sobject_xml = '<sObject>'
132
- keys.each do |k|
133
- if r[k].is_a?(Hash)
134
- sobject_xml += "<#{k}>"
135
- sobject_xml += build_sobject(r[k])
136
- sobject_xml += "</#{k}>"
137
- elsif k.to_s.include? '.'
138
- sobject_xml += build_relationship_sobject(k, r[k])
139
- elsif !r[k].to_s.empty?
140
- sobject_xml += "<#{k}>"
141
- if r[k].respond_to?(:encode)
142
- sobject_xml += r[k].encode(:xml => :text)
143
- elsif r[k].respond_to?(:iso8601) # timestamps
144
- sobject_xml += r[k].iso8601.to_s
145
- else
146
- sobject_xml += r[k].to_s
147
- end
148
- sobject_xml += "</#{k}>"
149
- elsif @send_nulls && !@no_null_list.include?(k) && r.key?(k)
150
- sobject_xml += "<#{k} xsi:nil=\"true\"/>"
151
- end
152
- end
153
- sobject_xml += '</sObject>'
154
- sobject_xml
81
+ def build_close_job_xml
82
+ "#{XML_HEADER}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\"><state>Closed</state></jobInfo>"
155
83
  end
156
84
 
157
- def check_job_status
158
- path = "job/#{@job_id}"
159
- headers = Hash.new
160
- response = @connection.get_request(nil, path, headers)
85
+ def post_xml(path, xml)
86
+ headers = {"Content-Type" => "application/xml; charset=utf-8"}
87
+ @connection.post_xml(nil, path, xml, headers)
88
+ end
161
89
 
162
- begin
163
- response_parsed = XmlSimple.xml_in(response) if response
164
- response_parsed
165
- rescue StandardError => e
166
- puts "Error parsing XML response for #{@job_id}"
167
- puts e
168
- puts e.backtrace
90
+ def parse_job_response(response)
91
+ response_parsed = XmlSimple.xml_in(response)
92
+ if response_parsed["exceptionCode"]
93
+ raise SalesforceException, "#{response_parsed["exceptionMessage"][0]} (#{response_parsed["exceptionCode"][0]})"
169
94
  end
95
+ @job_id = response_parsed["id"][0]
170
96
  end
171
97
 
172
- def check_batch_status(batch_id)
173
- path = "job/#{@job_id}/batch/#{batch_id}"
174
- headers = Hash.new
175
-
176
- response = @connection.get_request(nil, path, headers)
98
+ def add_batch(keys, batch)
99
+ xml = "#{XML_HEADER}<sObjects xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"
100
+ batch.each { |r| xml << create_sobject(keys, r) }
101
+ xml << "</sObjects>"
177
102
 
178
- begin
179
- response_parsed = XmlSimple.xml_in(response) if response
180
- response_parsed
181
- rescue StandardError => e
182
- puts "Error parsing XML response for #{@job_id}, batch #{batch_id}"
183
- puts e
184
- puts e.backtrace
185
- end
103
+ response = post_xml("job/#{@job_id}/batch/", xml)
104
+ response_parsed = XmlSimple.xml_in(response)
105
+ response_parsed["id"]&.first
186
106
  end
187
107
 
188
- def get_job_result(return_result, timeout)
189
- # timeout is in seconds
190
- begin
191
- state = []
192
- Timeout::timeout(timeout, SalesforceBulkApi::JobTimeout) do
193
- while true
194
- job_status = self.check_job_status
195
- if job_status && job_status['state'] && job_status['state'][0] == 'Closed'
196
- batch_statuses = {}
197
-
198
- batches_ready = @batch_ids.all? do |batch_id|
199
- batch_state = batch_statuses[batch_id] = self.check_batch_status(batch_id)
200
- batch_state && batch_state['state'] && batch_state['state'][0] && !['Queued', 'InProgress'].include?(batch_state['state'][0])
201
- end
108
+ def job_closed_and_batches_completed?(job_status, state)
109
+ return false unless job_status && job_status["state"] && job_status["state"][0] == "Closed"
202
110
 
203
- if batches_ready
204
- @batch_ids.each do |batch_id|
205
- state.insert(0, batch_statuses[batch_id])
206
- @batch_ids.delete(batch_id)
207
- end
208
- end
209
- break if @batch_ids.empty?
210
- else
211
- break
212
- end
213
- end
214
- end
215
- rescue SalesforceBulkApi::JobTimeout => e
216
- puts 'Timeout waiting for Salesforce to process job batches #{@batch_ids} of job #{@job_id}.'
217
- puts e
218
- raise
111
+ batch_statuses = {}
112
+ batches_ready = @batch_ids.all? do |batch_id|
113
+ batch_state = batch_statuses[batch_id] = check_batch_status(batch_id)
114
+ batch_state && batch_state["state"] && batch_state["state"][0] && !["Queued", "InProgress"].include?(batch_state["state"][0])
219
115
  end
220
116
 
221
- state.each_with_index do |batch_state, i|
222
- if batch_state['state'][0] == 'Completed' && return_result == true
223
- state[i].merge!({'response' => self.get_batch_result(batch_state['id'][0])})
117
+ if batches_ready
118
+ @batch_ids.each do |batch_id|
119
+ state.unshift(batch_statuses[batch_id])
120
+ @batch_ids.delete(batch_id)
224
121
  end
225
122
  end
226
- state
227
- end
228
123
 
229
- def get_batch_result(batch_id)
230
- path = "job/#{@job_id}/batch/#{batch_id}/result"
231
- headers = Hash["Content-Type" => "application/xml; charset=UTF-8"]
232
-
233
- response = @connection.get_request(nil, path, headers)
234
- response_parsed = XmlSimple.xml_in(response)
235
- results = response_parsed['result'] unless @operation == 'query'
236
-
237
- if(@operation == 'query') # The query op requires us to do another request to get the results
238
- result_id = response_parsed["result"][0]
239
- path = "job/#{@job_id}/batch/#{batch_id}/result/#{result_id}"
240
- headers = Hash.new
241
- headers = Hash["Content-Type" => "application/xml; charset=UTF-8"]
242
- response = @connection.get_request(nil, path, headers)
243
- response_parsed = XmlSimple.xml_in(response)
244
- results = response_parsed['records']
245
- end
246
- results
124
+ true
247
125
  end
248
126
 
249
- def get_batch_records(batch_id)
250
- path = "job/#{@job_id}/batch/#{batch_id}/request"
251
- headers = Hash["Content-Type" => "application/xml; charset=UTF-8"]
252
-
253
- response = @connection.get_request(nil, path, headers)
254
- response_parsed = XmlSimple.xml_in(response)
255
- results = response_parsed['sObject']
256
-
257
- results
127
+ def handle_timeout(error)
128
+ puts "Timeout waiting for Salesforce to process job batches #{@batch_ids} of job #{@job_id}."
129
+ puts error
130
+ raise
258
131
  end
259
132
 
133
+ def process_batch_results(state)
134
+ state.each_with_index do |batch_state, i|
135
+ if batch_state["state"][0] == "Completed"
136
+ state[i].merge!("response" => get_batch_result(batch_state["id"][0]))
137
+ end
138
+ end
139
+ end
260
140
  end
261
141
 
262
- class JobTimeout < StandardError
263
- end
142
+ class JobTimeout < StandardError; end
264
143
  end
@@ -1,3 +1,3 @@
1
1
  module SalesforceBulkApi
2
- VERSION = '1.1.0'
2
+ VERSION = "1.2.0"
3
3
  end
@@ -1,13 +1,13 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- require 'net/https'
4
- require 'xmlsimple'
5
- require 'csv'
1
+ require "rubygems"
2
+ require "bundler"
3
+ require "net/https"
4
+ require "xmlsimple"
5
+ require "csv"
6
6
 
7
- require 'salesforce_bulk_api/version'
8
- require 'salesforce_bulk_api/concerns/throttling'
9
- require 'salesforce_bulk_api/job'
10
- require 'salesforce_bulk_api/connection'
7
+ require "salesforce_bulk_api/version"
8
+ require "salesforce_bulk_api/concerns/throttling"
9
+ require "salesforce_bulk_api/job"
10
+ require "salesforce_bulk_api/connection"
11
11
 
12
12
  module SalesforceBulkApi
13
13
  class Api
@@ -15,43 +15,34 @@ module SalesforceBulkApi
15
15
 
16
16
  def initialize(client, salesforce_api_version = "46.0")
17
17
  @connection = SalesforceBulkApi::Connection.new(salesforce_api_version, client)
18
- @listeners = { job_created: [] }
18
+ @listeners = {job_created: []}
19
+ @counters = Hash.new(0)
19
20
  end
20
21
 
21
- def upsert(sobject, records, external_field, get_response = false, send_nulls = false, no_null_list = [], batch_size = 10000, timeout = 1500)
22
- do_operation('upsert', sobject, records, external_field, get_response, timeout, batch_size, send_nulls, no_null_list)
22
+ %w[upsert update create delete].each do |operation|
23
+ define_method(operation) do |sobject, records, external_field = nil, **options|
24
+ do_operation(operation, sobject, records, external_field, **options)
25
+ end
23
26
  end
24
27
 
25
- def update(sobject, records, get_response = false, send_nulls = false, no_null_list = [], batch_size = 10000, timeout = 1500)
26
- do_operation('update', sobject, records, nil, get_response, timeout, batch_size, send_nulls, no_null_list)
27
- end
28
-
29
- def create(sobject, records, get_response = false, send_nulls = false, batch_size = 10000, timeout = 1500)
30
- do_operation('insert', sobject, records, nil, get_response, timeout, batch_size, send_nulls)
31
- end
32
-
33
- def delete(sobject, records, get_response = false, batch_size = 10000, timeout = 1500)
34
- do_operation('delete', sobject, records, nil, get_response, timeout, batch_size)
35
- end
36
-
37
- def query(sobject, query, batch_size = 10000, timeout = 1500)
38
- do_operation('query', sobject, query, nil, true, timeout, batch_size)
28
+ def query(sobject, query, **)
29
+ do_operation("query", sobject, query, nil, get_response: true, **)
39
30
  end
40
31
 
41
32
  def counters
42
33
  {
43
34
  http_get: @connection.counters[:get],
44
35
  http_post: @connection.counters[:post],
45
- upsert: get_counters[:upsert],
46
- update: get_counters[:update],
47
- create: get_counters[:create],
48
- delete: get_counters[:delete],
49
- query: get_counters[:query]
36
+ upsert: @counters[:upsert],
37
+ update: @counters[:update],
38
+ create: @counters[:create],
39
+ delete: @counters[:delete],
40
+ query: @counters[:query]
50
41
  }
51
42
  end
52
43
 
53
- # Allows you to attach a listener that accepts the created job (which has a useful #job_id field). This is useful
54
- # for recording a job ID persistently before you begin batch work (i.e. start modifying the salesforce database),
44
+ # Allows you to attach a listener that accepts the created job (which has a useful #job_id field).
45
+ # This is useful for recording a job ID persistently before you begin batch work (i.e. start modifying the salesforce database),
55
46
  # so if the load process you are writing needs to recover, it can be aware of previous jobs it started and wait
56
47
  # for them to finish.
57
48
  #
@@ -63,8 +54,10 @@ module SalesforceBulkApi
63
54
  SalesforceBulkApi::Job.new(job_id: job_id, connection: @connection)
64
55
  end
65
56
 
66
- def do_operation(operation, sobject, records, external_field, get_response, timeout, batch_size, send_nulls = false, no_null_list = [])
67
- count operation.to_sym
57
+ private
58
+
59
+ def do_operation(operation, sobject, records, external_field, **options)
60
+ count(operation.to_sym)
68
61
 
69
62
  job = SalesforceBulkApi::Job.new(
70
63
  operation: operation,
@@ -74,23 +67,18 @@ module SalesforceBulkApi
74
67
  connection: @connection
75
68
  )
76
69
 
77
- job.create_job(batch_size, send_nulls, no_null_list)
78
- @listeners[:job_created].each {|callback| callback.call(job)}
79
- operation == "query" ? job.add_query() : job.add_batches()
80
- response = job.close_job
81
- response.merge!({'batches' => job.get_job_result(get_response, timeout)}) if get_response == true
82
- response
83
- end
70
+ job.create_job(options[:batch_size], options[:send_nulls], options[:no_null_list])
71
+ @listeners[:job_created].each { |callback| callback.call(job) }
84
72
 
85
- private
73
+ (operation == "query") ? job.add_query : job.add_batches
86
74
 
87
- def get_counters
88
- @counters ||= Hash.new(0)
75
+ response = job.close_job
76
+ response.merge!("batches" => job.get_job_result(options[:get_response], options[:timeout])) if options[:get_response]
77
+ response
89
78
  end
90
79
 
91
80
  def count(name)
92
- get_counters[name] += 1
81
+ @counters[name] += 1
93
82
  end
94
-
95
83
  end
96
84
  end
@@ -1,28 +1,27 @@
1
- # -*- encoding: utf-8 -*-
2
1
  $:.push File.expand_path("../lib", __FILE__)
3
2
  require "salesforce_bulk_api/version"
4
3
 
5
4
  Gem::Specification.new do |s|
6
- s.name = 'salesforce_bulk_api'
7
- s.version = SalesforceBulkApi::VERSION
8
- s.authors = ['Yatish Mehta']
9
- s.email = ['hi@example.com']
5
+ s.name = "salesforce_bulk_api"
6
+ s.version = SalesforceBulkApi::VERSION
7
+ s.authors = ["Yatish Mehta"]
8
+ s.email = ["yatish27@users.noreply.github.com"]
10
9
 
11
- s.homepage = 'https://github.com/yatishmehta27/salesforce_bulk_api'
12
- s.summary = %q{It uses the bulk api of salesforce to communicate with Salesforce CRM}
13
- s.description = %q{Salesforce Bulk API with governor limits taken care of}
10
+ s.homepage = "https://github.com/yatishmehta27/salesforce_bulk_api"
11
+ s.summary = "It uses the bulk api of salesforce to communicate with Salesforce CRM"
12
+ s.description = "Salesforce Bulk API with governor limits taken care of"
14
13
 
15
- s.add_dependency('json', ['>= 0'])
16
- s.add_dependency('xml-simple', ['>= 0'])
14
+ s.add_dependency("json", [">= 0"])
15
+ s.add_dependency("xml-simple", [">= 0"])
16
+ s.add_dependency("csv", [">= 0"])
17
17
 
18
- s.add_development_dependency 'rspec'
19
- s.add_development_dependency 'restforce', '~> 3.0.0'
18
+ s.add_development_dependency "rspec"
19
+ s.add_development_dependency "restforce", "~> 3.0.0"
20
20
  s.add_development_dependency "rake", ">= 12.3.3"
21
- s.add_development_dependency 'pry'
22
-
23
- s.files = `git ls-files`.split("\n")
24
- s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
- s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
- s.require_paths = ['lib']
21
+ s.add_development_dependency "pry"
22
+ s.add_development_dependency "standardrb"
27
23
 
24
+ s.files = `git ls-files`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
28
27
  end