salesforcebulk 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.gitignore +4 -0
  2. data/.rbenv-version +1 -0
  3. data/Gemfile +2 -0
  4. data/README.md +143 -0
  5. data/Rakefile +22 -0
  6. data/lib/salesforce_bulk.rb +15 -0
  7. data/lib/salesforce_bulk/batch.rb +50 -0
  8. data/lib/salesforce_bulk/batch_result.rb +39 -0
  9. data/lib/salesforce_bulk/batch_result_collection.rb +29 -0
  10. data/lib/salesforce_bulk/client.rb +254 -0
  11. data/lib/salesforce_bulk/core_extensions/string.rb +14 -0
  12. data/lib/salesforce_bulk/job.rb +70 -0
  13. data/lib/salesforce_bulk/query_result_collection.rb +48 -0
  14. data/lib/salesforce_bulk/salesforce_error.rb +30 -0
  15. data/lib/salesforce_bulk/version.rb +3 -0
  16. data/salesforcebulk.gemspec +28 -0
  17. data/test/fixtures/batch_create_request.csv +3 -0
  18. data/test/fixtures/batch_create_response.xml +13 -0
  19. data/test/fixtures/batch_info_list_response.xml +27 -0
  20. data/test/fixtures/batch_info_response.xml +13 -0
  21. data/test/fixtures/batch_result_list_response.csv +3 -0
  22. data/test/fixtures/config.yml +7 -0
  23. data/test/fixtures/invalid_batch_error.xml +5 -0
  24. data/test/fixtures/invalid_error.xml +5 -0
  25. data/test/fixtures/invalid_job_error.xml +5 -0
  26. data/test/fixtures/invalid_session_error.xml +5 -0
  27. data/test/fixtures/job_abort_request.xml +1 -0
  28. data/test/fixtures/job_abort_response.xml +25 -0
  29. data/test/fixtures/job_close_request.xml +1 -0
  30. data/test/fixtures/job_close_response.xml +25 -0
  31. data/test/fixtures/job_create_request.xml +1 -0
  32. data/test/fixtures/job_create_response.xml +25 -0
  33. data/test/fixtures/job_info_response.xml +25 -0
  34. data/test/fixtures/login_error.xml +1 -0
  35. data/test/fixtures/login_request.xml +1 -0
  36. data/test/fixtures/login_response.xml +39 -0
  37. data/test/fixtures/query_result_list_response.xml +1 -0
  38. data/test/fixtures/query_result_response.csv +5 -0
  39. data/test/lib/test_batch.rb +258 -0
  40. data/test/lib/test_batch_result.rb +40 -0
  41. data/test/lib/test_core_extensions.rb +15 -0
  42. data/test/lib/test_initialization.rb +86 -0
  43. data/test/lib/test_job.rb +256 -0
  44. data/test/lib/test_query_result_collection.rb +87 -0
  45. data/test/test_helper.rb +32 -0
  46. metadata +222 -0
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
@@ -0,0 +1 @@
1
+ 1.9.3-p194
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://rubygems.org"
2
+ gemspec
@@ -0,0 +1,143 @@
1
+ # SalesforceBulk
2
+
3
+ ## Overview
4
+
5
+ Salesforce Bulk is a simple Ruby gem for connecting to and using the [Salesforce Bulk API](http://www.salesforce.com/us/developer/docs/api_asynch/index.htm). This is a rewrite of Jorge Valdivia's salesforce_bulk gem with unit tests and full API capability.
6
+
7
+ ## Installation
8
+
9
+ Install SalesforceBulk from RubyGems:
10
+
11
+ gem install salesforcebulk
12
+
13
+ Or include it in your project's `Gemfile` with Bundler:
14
+
15
+ gem 'salesforcebulk'
16
+
17
+ ## Contribute
18
+
19
+ To contribute, fork this repo, create a topic branch, make changes, then send a pull request. Pull requests without accompanying tests will *not* be accepted. To run tests in your fork, just do:
20
+
21
+ bundle install
22
+ rake
23
+
24
+ ## Configuration and Initialization
25
+
26
+ ### Basic Configuration
27
+
28
+ require 'salesforce_bulk'
29
+
30
+ client = SalesforceBulk::Client.new(username: 'MyUsername', password: 'MyPassword', token: 'MySecurityToken')
31
+ client.authenticate
32
+
33
+ Optional keys include host (default: login.salesforce.com), version (default: 24.0) and debugging (default: false).
34
+
35
+ ### Configuring from a YAML file
36
+
37
+ The optional keys mentioned in the Basic Configuration section can also be used here.
38
+
39
+ ---
40
+ username: MyUsername
41
+ password: MyPassword
42
+ token: MySecurityToken
43
+
44
+ Then in a Ruby script:
45
+
46
+ require 'salesforce_bulk'
47
+
48
+ client = SalesforceBulk::Client.new("config/salesforce_bulk.yml")
49
+ client.authenticate
50
+
51
+ ## Usage Examples
52
+
53
+ An important note about the data in any of the examples: each hash in a data set must have the same set of keys. If you need to have logic to not include certain values simply specify a nil value for a key.
54
+
55
+ ### Basic Overall Example
56
+
57
+ data1 = [{:Name__c => 'Test 1'}, {:Name__c => 'Test 2'}]
58
+ data2 = [{:Name__c => 'Test 3'}, {:Name__c => 'Test 4'}]
59
+
60
+ job = client.add_job(:insert, :MyObject__c)
61
+
62
+ # easily add multiple batches to a job
63
+ batch = client.add_batch(job.id, data1)
64
+ batch = client.add_batch(job.id, data2)
65
+
66
+ job = client.close_job(job.id) # or use the abort_job(id) method
67
+
68
+ ### Adding a Job
69
+
70
+ When adding a job you can specify the following operations for the first argument:
71
+ - :delete
72
+ - :insert
73
+ - :update
74
+ - :upsert
75
+ - :query
76
+
77
+ When using the :upsert operation you must specify an external ID field name:
78
+
79
+ job = client.add_job(:upsert, :MyObject__c, :external_id_field_name => :MyId__c)
80
+
81
+ For any operation you should be able to specify a concurrency mode. The default is Parallel. The other choice is Serial.
82
+
83
+ job = client.add_job(:upsert, :MyObject__c, :concurrency_mode => :Serial, :external_id_field_name => :MyId__c)
84
+
85
+ ### Retrieving Job Information (e.g. Status)
86
+
87
+ job = client.job_info(jobId) # returns a Job object
88
+
89
+ puts "Job #{job.id} is closed." if job.closed? # other: open?, aborted?
90
+
91
+ ### Retrieving Info for all Batches
92
+
93
+ batches = client.batch_info_list(jobId) # returns an Array of Batch objects
94
+
95
+ batches.each do |batch|
96
+ puts "Batch #{batch.id} failed." if batch.failed? # other: completed?, failed?, in_progress?, queued?
97
+ end
98
+
99
+ ### Retrieving Info for a single Batch
100
+
101
+ batch = client.batch_info(jobId, batchId) # returns a Batch object
102
+
103
+ puts "Batch #{batch.id} is in progress." if batch.in_progress?
104
+
105
+ ### Retrieving Batch Results (for Delete, Insert, Update and Upsert)
106
+
107
+ To verify that a batch completed successfully or failed call the `batch_info` or `batch_info_list` methods first, otherwise if you call `batch_result` without verifying and the batch failed the method will raise an error.
108
+
109
+ The object returned from the following example only applies to the operations: delete, insert, update and upsert. Query results are handled differently.
110
+
111
+ results = client.batch_result(jobId, batchId) # returns an Array of BatchResult objects
112
+
113
+ results.each do |result|
114
+ puts "Item #{result.id} had an error of: #{result.error}" if result.error?
115
+ end
116
+
117
+ ### Retrieving Query based Batch Results
118
+
119
+ To verify that a batch completed successfully or failed call the `batch_info` or `batch_info_list` methods first, otherwise if you call `batch_result` without verifying and the batch failed the method will raise an error.
120
+
121
+ Query results are handled differently as the response will not contain the full result set. You'll have to page through sets if you added multiple batches to a job.
122
+
123
+ # returns a QueryResultCollection object (an Array)
124
+ results = client.batch_result(jobId, batchId)
125
+
126
+ while results.any?
127
+
128
+ # Assuming query was: SELECT Id, Name, CustomField__c FROM Account
129
+ results.each do |result|
130
+ puts result[:Id], result[:Name], result[:CustomField__c]
131
+ end
132
+
133
+ puts "Another set is available." if results.next?
134
+
135
+ results.next
136
+
137
+ end
138
+
139
+ Note: By reviewing the API docs and response format my understanding was that the API would return multiple results sets for a single batch if the query was to large but this does not seem to be the case in my live testing. It seems to be capped at 10000 records (as it when inserting data) but I haven't been able to verify through the documentation. If you know anything about that your input is appreciated. In the meantime the gem was built to support multiple result sets for a query batch but seems that will change which will simplify that method.
140
+
141
+ ## Copyright
142
+
143
+ Copyright (c) 2012 Javier Julio.
@@ -0,0 +1,22 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test_*.rb'
8
+ test.verbose = true
9
+ end
10
+
11
+ task :default => :test
12
+
13
+ namespace :doc do
14
+ require 'rdoc/task'
15
+ require File.expand_path('../lib/salesforce_bulk/version', __FILE__)
16
+ RDoc::Task.new do |rdoc|
17
+ rdoc.rdoc_dir = 'rdoc'
18
+ rdoc.title = "SalesforceBulk #{SalesforceBulk::VERSION}"
19
+ rdoc.main = 'README.md'
20
+ rdoc.rdoc_files.include('README.md', 'LICENSE.md', 'lib/**/*.rb')
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ require 'net/https'
2
+ require 'xmlsimple'
3
+ require 'csv'
4
+ require 'active_support'
5
+ require 'active_support/core_ext/object/blank'
6
+ require 'active_support/core_ext/hash/keys'
7
+ require 'salesforce_bulk/version'
8
+ require 'salesforce_bulk/core_extensions/string'
9
+ require 'salesforce_bulk/salesforce_error'
10
+ require 'salesforce_bulk/client'
11
+ require 'salesforce_bulk/job'
12
+ require 'salesforce_bulk/batch'
13
+ require 'salesforce_bulk/batch_result'
14
+ require 'salesforce_bulk/batch_result_collection'
15
+ require 'salesforce_bulk/query_result_collection'
@@ -0,0 +1,50 @@
1
+ module SalesforceBulk
2
+ class Batch
3
+
4
+ attr_accessor :apex_processing_time
5
+ attr_accessor :api_active_processing_time
6
+ attr_accessor :completed_at
7
+ attr_accessor :created_at
8
+ attr_accessor :failed_records
9
+ attr_accessor :id
10
+ attr_accessor :job_id
11
+ attr_accessor :processed_records
12
+ attr_accessor :state
13
+ attr_accessor :total_processing_time
14
+
15
+ def self.new_from_xml(data)
16
+ batch = self.new
17
+ batch.id = data['id']
18
+ batch.job_id = data['jobId']
19
+ batch.state = data['state']
20
+ batch.created_at = DateTime.parse(data['createdDate'])
21
+ batch.completed_at = DateTime.parse(data['systemModstamp'])
22
+ batch.processed_records = data['numberRecordsProcessed'].to_i
23
+ batch.failed_records = data['numberRecordsFailed'].to_i
24
+ batch.total_processing_time = data['totalProcessingTime'].to_i
25
+ batch.api_active_processing_time = data['apiActiveProcessingTime'].to_i
26
+ batch.apex_processing_time = data['apex_processing_time'].to_i
27
+ batch
28
+ end
29
+
30
+ def in_progress?
31
+ state? 'InProgress'
32
+ end
33
+
34
+ def queued?
35
+ state? 'Queued'
36
+ end
37
+
38
+ def completed?
39
+ state? 'Completed'
40
+ end
41
+
42
+ def failed?
43
+ state? 'Failed'
44
+ end
45
+
46
+ def state?(value)
47
+ self.state.present? && self.state.casecmp(value) == 0
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,39 @@
1
+ module SalesforceBulk
2
+ class BatchResult
3
+
4
+ # A boolean indicating if record was created. If updated value is false.
5
+ attr_accessor :created
6
+
7
+ # The error message.
8
+ attr_accessor :error
9
+
10
+ # The record's unique id.
11
+ attr_accessor :id
12
+
13
+ # If record was created successfully. If false then an error message is provided.
14
+ attr_accessor :success
15
+
16
+ def initialize(id, success, created, error)
17
+ @id = id
18
+ @success = success
19
+ @created = created
20
+ @error = error
21
+ end
22
+
23
+ def error?
24
+ error.present?
25
+ end
26
+
27
+ def created?
28
+ created
29
+ end
30
+
31
+ def successful?
32
+ success
33
+ end
34
+
35
+ def updated?
36
+ !created && success
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,29 @@
1
+ module SalesforceBulk
2
+ class BatchResultCollection < Array
3
+
4
+ attr_reader :batch_id
5
+ attr_reader :job_id
6
+
7
+ def initialize(job_id, batch_id)
8
+ @job_id = job_id
9
+ @batch_id = batch_id
10
+ end
11
+
12
+ def any_failures?
13
+ self.any? { |result| result.error.length > 0 }
14
+ end
15
+
16
+ def failed
17
+ self.select { |result| result.error.length > 0 }
18
+ end
19
+
20
+ def completed
21
+ self.select { |result| result.success }
22
+ end
23
+
24
+ def created
25
+ self.select { |result| result.success && result.created }
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,254 @@
1
+ module SalesforceBulk
2
+ # Interface for operating the Salesforce Bulk REST API
3
+ class Client
4
+ # If true, print API debugging information to stdout. Defaults to false.
5
+ attr_accessor :debugging
6
+
7
+ # The host to use for authentication. Defaults to login.salesforce.com.
8
+ attr_accessor :host
9
+
10
+ # The instance host to use for API calls. Determined from login response.
11
+ attr_accessor :instance_host
12
+
13
+ # The Salesforce password
14
+ attr_accessor :password
15
+
16
+ # The Salesforce security token
17
+ attr_accessor :token
18
+
19
+ # The Salesforce username
20
+ attr_accessor :username
21
+
22
+ # The API version the client is using. Defaults to 24.0.
23
+ attr_accessor :version
24
+
25
+ def initialize(options={})
26
+ if options.is_a?(String)
27
+ options = YAML.load_file(options)
28
+ options.symbolize_keys!
29
+ end
30
+
31
+ options = {:debugging => false, :host => 'login.salesforce.com', :version => 24.0}.merge(options)
32
+
33
+ options.assert_valid_keys(:username, :password, :token, :debugging, :host, :version)
34
+
35
+ self.username = options[:username]
36
+ self.password = "#{options[:password]}#{options[:token]}"
37
+ self.token = options[:token]
38
+ self.debugging = options[:debugging]
39
+ self.host = options[:host]
40
+ self.version = options[:version]
41
+
42
+ @api_path_prefix = "/services/async/#{self.version}/"
43
+ @valid_operations = [:delete, :insert, :update, :upsert, :query]
44
+ @valid_concurrency_modes = ['Parallel', 'Serial']
45
+ end
46
+
47
+ def authenticate
48
+ xml = '<?xml version="1.0" encoding="utf-8"?>'
49
+ xml += '<env:Envelope xmlns:xsd="http://www.w3.org/2001/XMLSchema"'
50
+ xml += ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
51
+ xml += ' xmlns:env="http://schemas.xmlsoap.org/soap/envelope/">'
52
+ xml += "<env:Body>"
53
+ xml += '<n1:login xmlns:n1="urn:partner.soap.sforce.com">'
54
+ xml += "<n1:username>#{self.username}</n1:username>"
55
+ xml += "<n1:password>#{self.password}</n1:password>"
56
+ xml += "</n1:login>"
57
+ xml += "</env:Body>"
58
+ xml += "</env:Envelope>"
59
+
60
+ response = http_post("/services/Soap/u/#{self.version}", xml, 'Content-Type' => 'text/xml', 'SOAPAction' => 'login')
61
+
62
+ data = XmlSimple.xml_in(response.body, :ForceArray => false)
63
+ result = data['Body']['loginResponse']['result']
64
+
65
+ @session_id = result['sessionId']
66
+
67
+ self.instance_host = "#{instance_id(result['serverUrl'])}.salesforce.com"
68
+ end
69
+
70
+ def abort_job(jobId)
71
+ xml = '<?xml version="1.0" encoding="utf-8"?>'
72
+ xml += '<jobInfo xmlns="http://www.force.com/2009/06/asyncapi/dataload">'
73
+ xml += "<state>Aborted</state>"
74
+ xml += "</jobInfo>"
75
+
76
+ response = http_post("job/#{jobId}", xml)
77
+ data = XmlSimple.xml_in(response.body, :ForceArray => false)
78
+ Job.new_from_xml(data)
79
+ end
80
+
81
+ def add_batch(jobId, data)
82
+ body = data
83
+
84
+ if data.is_a?(Array)
85
+ raise ArgumentError, "Data set exceeds 10000 record limit by #{data.length - 10000}" if data.length > 10000
86
+
87
+ keys = data.first.keys
88
+ body = keys.to_csv
89
+
90
+ data.each do |item|
91
+ item_values = keys.map { |key| item[key] }
92
+ body += item_values.to_csv
93
+ end
94
+ end
95
+
96
+ # Despite the content for a query operation batch being plain text we
97
+ # still have to specify CSV content type per API docs.
98
+ response = http_post("job/#{jobId}/batch", body, "Content-Type" => "text/csv; charset=UTF-8")
99
+ result = XmlSimple.xml_in(response.body, 'ForceArray' => false)
100
+ Batch.new_from_xml(result)
101
+ end
102
+
103
+ def add_job(operation, sobject, options={})
104
+ operation = operation.to_sym.downcase
105
+
106
+ raise ArgumentError.new("Invalid operation: #{operation}") unless @valid_operations.include?(operation)
107
+
108
+ options.assert_valid_keys(:external_id_field_name, :concurrency_mode)
109
+
110
+ if options[:concurrency_mode]
111
+ concurrency_mode = options[:concurrency_mode].capitalize
112
+ raise ArgumentError.new("Invalid concurrency mode: #{concurrency_mode}") unless @valid_concurrency_modes.include?(concurrency_mode)
113
+ end
114
+
115
+ xml = '<?xml version="1.0" encoding="utf-8"?>'
116
+ xml += '<jobInfo xmlns="http://www.force.com/2009/06/asyncapi/dataload">'
117
+ xml += "<operation>#{operation}</operation>"
118
+ xml += "<object>#{sobject}</object>"
119
+ xml += "<externalIdFieldName>#{options[:external_id_field_name]}</externalIdFieldName>" if options[:external_id_field_name]
120
+ xml += "<concurrencyMode>#{options[:concurrency_mode]}</concurrencyMode>" if options[:concurrency_mode]
121
+ xml += "<contentType>CSV</contentType>"
122
+ xml += "</jobInfo>"
123
+
124
+ response = http_post("job", xml)
125
+ data = XmlSimple.xml_in(response.body, :ForceArray => false)
126
+ job = Job.new_from_xml(data)
127
+ end
128
+
129
+ def batch_info_list(jobId)
130
+ response = http_get("job/#{jobId}/batch")
131
+ result = XmlSimple.xml_in(response.body, 'ForceArray' => false)
132
+
133
+ if result['batchInfo'].is_a?(Array)
134
+ result['batchInfo'].collect do |info|
135
+ Batch.new_from_xml(info)
136
+ end
137
+ else
138
+ [Batch.new_from_xml(result['batchInfo'])]
139
+ end
140
+ end
141
+
142
+ def batch_info(jobId, batchId)
143
+ response = http_get("job/#{jobId}/batch/#{batchId}")
144
+ result = XmlSimple.xml_in(response.body, 'ForceArray' => false)
145
+ Batch.new_from_xml(result)
146
+ end
147
+
148
+ def batch_result(jobId, batchId)
149
+ response = http_get("job/#{jobId}/batch/#{batchId}/result")
150
+
151
+ if response.body =~ /<.*?>/m
152
+ result = XmlSimple.xml_in(response.body)
153
+
154
+ if result['result'].present?
155
+ results = query_result(jobId, batchId, result['result'].first)
156
+
157
+ collection = QueryResultCollection.new(self, jobId, batchId, result['result'].first, result['result'])
158
+ collection.replace(results)
159
+ end
160
+ else
161
+ result = BatchResultCollection.new(jobId, batchId)
162
+
163
+ CSV.parse(response.body, :headers => true) do |row|
164
+ result << BatchResult.new(row[0], row[1].to_b, row[2].to_b, row[3])
165
+ end
166
+
167
+ result
168
+ end
169
+ end
170
+
171
+ def query_result(job_id, batch_id, result_id)
172
+ headers = {"Content-Type" => "text/csv; charset=UTF-8"}
173
+ response = http_get("job/#{job_id}/batch/#{batch_id}/result/#{result_id}", headers)
174
+
175
+ lines = response.body.lines.to_a
176
+ headers = CSV.parse_line(lines.shift).collect { |header| header.to_sym }
177
+
178
+ result = []
179
+
180
+ #CSV.parse(lines.join, :headers => headers, :converters => [:all, lambda{|s| s.to_b if s.kind_of? String }]) do |row|
181
+ CSV.parse(lines.join, :headers => headers) do |row|
182
+ result << Hash[row.headers.zip(row.fields)]
183
+ end
184
+
185
+ result
186
+ end
187
+
188
+ def close_job(jobId)
189
+ xml = '<?xml version="1.0" encoding="utf-8"?>'
190
+ xml += '<jobInfo xmlns="http://www.force.com/2009/06/asyncapi/dataload">'
191
+ xml += "<state>Closed</state>"
192
+ xml += "</jobInfo>"
193
+
194
+ response = http_post("job/#{jobId}", xml)
195
+ data = XmlSimple.xml_in(response.body, :ForceArray => false)
196
+ Job.new_from_xml(data)
197
+ end
198
+
199
+ def job_info(jobId)
200
+ response = http_get("job/#{jobId}")
201
+ data = XmlSimple.xml_in(response.body, :ForceArray => false)
202
+ Job.new_from_xml(data)
203
+ end
204
+
205
+ def http_post(path, body, headers={})
206
+ host = self.host
207
+
208
+ headers = {'Content-Type' => 'application/xml'}.merge(headers)
209
+
210
+ if @session_id
211
+ headers['X-SFDC-Session'] = @session_id
212
+ host = self.instance_host
213
+ path = "#{@api_path_prefix}#{path}"
214
+ end
215
+
216
+ response = https_request(host).post(path, body, headers)
217
+
218
+ if response.is_a?(Net::HTTPSuccess)
219
+ response
220
+ else
221
+ raise SalesforceError.new(response)
222
+ end
223
+ end
224
+
225
+ def http_get(path, headers={})
226
+ path = "#{@api_path_prefix}#{path}"
227
+
228
+ headers = {'Content-Type' => 'application/xml'}.merge(headers)
229
+
230
+ if @session_id
231
+ headers['X-SFDC-Session'] = @session_id
232
+ end
233
+
234
+ response = https_request(self.instance_host).get(path, headers)
235
+
236
+ if response.is_a?(Net::HTTPSuccess)
237
+ response
238
+ else
239
+ raise SalesforceError.new(response)
240
+ end
241
+ end
242
+
243
+ def https_request(host)
244
+ req = Net::HTTP.new(host, 443)
245
+ req.use_ssl = true
246
+ req.verify_mode = OpenSSL::SSL::VERIFY_NONE
247
+ req
248
+ end
249
+
250
+ def instance_id(url)
251
+ url.match(/:\/\/([a-zA-Z0-9-]{2,}).salesforce/)[1]
252
+ end
253
+ end
254
+ end