salesforce_bulk_query-edge 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/.travis.yml +26 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +168 -0
- data/Rakefile +20 -0
- data/env_setup-example.sh +13 -0
- data/lib/salesforce_bulk_query.rb +108 -0
- data/lib/salesforce_bulk_query/batch.rb +153 -0
- data/lib/salesforce_bulk_query/connection.rb +140 -0
- data/lib/salesforce_bulk_query/job.rb +199 -0
- data/lib/salesforce_bulk_query/logger.rb +44 -0
- data/lib/salesforce_bulk_query/query.rb +192 -0
- data/lib/salesforce_bulk_query/utils.rb +16 -0
- data/lib/salesforce_bulk_query/version.rb +3 -0
- data/new-version.sh +22 -0
- data/salesforce_bulk_query.gemspec +34 -0
- data/spec/salesforce_bulk_query_spec.rb +227 -0
- data/spec/spec_helper.rb +9 -0
- metadata +207 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
require 'xmlsimple'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
module SalesforceBulkQuery
|
5
|
+
|
6
|
+
# Connection to the Salesforce API
|
7
|
+
# shared in all classes that do some requests
|
8
|
+
class Connection
|
9
|
+
def initialize(client, api_version, logger=nil, filename_prefix=nil,ssl_version = nil)
|
10
|
+
@client = client
|
11
|
+
@logger = logger
|
12
|
+
@filename_prefix = filename_prefix
|
13
|
+
@ssl_version = ssl_version
|
14
|
+
|
15
|
+
|
16
|
+
@@API_VERSION = api_version
|
17
|
+
@@PATH_PREFIX = "/services/async/#{@@API_VERSION}/"
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :client
|
21
|
+
|
22
|
+
XML_REQUEST_HEADER = {'Content-Type' => 'application/xml; charset=utf-8'}
|
23
|
+
CSV_REQUEST_HEADER = {'Content-Type' => 'text/csv; charset=UTF-8'}
|
24
|
+
|
25
|
+
def session_header
|
26
|
+
{'X-SFDC-Session' => @client.options[:oauth_token]}
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_xml(xml)
|
30
|
+
parsed = nil
|
31
|
+
begin
|
32
|
+
parsed = XmlSimple.xml_in(xml)
|
33
|
+
rescue => e
|
34
|
+
@logger.error "Error parsing xml: #{xml}\n#{e}\n#{e.backtrace}"
|
35
|
+
raise
|
36
|
+
end
|
37
|
+
|
38
|
+
return parsed
|
39
|
+
end
|
40
|
+
|
41
|
+
def post_xml(path, xml, options={})
|
42
|
+
path = "#{@@PATH_PREFIX}#{path}"
|
43
|
+
headers = options[:csv_content_type] ? CSV_REQUEST_HEADER : XML_REQUEST_HEADER
|
44
|
+
|
45
|
+
response = nil
|
46
|
+
# do the request
|
47
|
+
with_retries do
|
48
|
+
begin
|
49
|
+
response = @client.post(path, xml, headers.merge(session_header))
|
50
|
+
rescue JSON::ParserError => e
|
51
|
+
if e.message.index('ExceededQuota')
|
52
|
+
raise "You've run out of sfdc batch api quota. Original error: #{e}\n #{e.backtrace}"
|
53
|
+
end
|
54
|
+
raise e
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
return parse_xml(response.body)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_xml(path, options={})
|
62
|
+
path = "#{@@PATH_PREFIX}#{path}"
|
63
|
+
headers = XML_REQUEST_HEADER
|
64
|
+
|
65
|
+
response = nil
|
66
|
+
with_retries do
|
67
|
+
response = @client.get(path, {}, headers.merge(session_header))
|
68
|
+
end
|
69
|
+
|
70
|
+
return options[:skip_parsing] ? response.body : parse_xml(response.body)
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_to_file(path, filename)
|
74
|
+
path = "#{@@PATH_PREFIX}#{path}"
|
75
|
+
uri = URI.parse( @client.options[:instance_url])
|
76
|
+
# open a file
|
77
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
78
|
+
http.use_ssl = true
|
79
|
+
http.ssl_version = @ssl_version if !@ssl_version.nil?
|
80
|
+
headers = XML_REQUEST_HEADER.merge(session_header)
|
81
|
+
@logger.info "Doing GET to #{path}, headers #{headers}" if @logger
|
82
|
+
|
83
|
+
if @filename_prefix
|
84
|
+
filename = "#{@filename_prefix}_#{filename}"
|
85
|
+
end
|
86
|
+
|
87
|
+
# do the request
|
88
|
+
http.request_get(path, headers) do |res|
|
89
|
+
|
90
|
+
@logger.info "Got response #{res.inspect}, reading response body by chunks and writing to #{filename}" if @logger
|
91
|
+
|
92
|
+
File.open(filename, 'w') do |file|
|
93
|
+
# write the body to the file by chunks
|
94
|
+
res.read_body do |segment|
|
95
|
+
file.write(segment.encode('UTF-8', :invalid => :replace, :undef => :replace,:replace => "?"))
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def with_retries
|
102
|
+
i = 0
|
103
|
+
begin
|
104
|
+
yield
|
105
|
+
rescue => e
|
106
|
+
i += 1
|
107
|
+
if i < 3
|
108
|
+
@logger.warn "Retrying, got error: #{e}, #{e.backtrace}" if @logger
|
109
|
+
retry
|
110
|
+
else
|
111
|
+
@logger.error "Failed 3 times, last error: #{e}, #{e.backtrace}" if @logger
|
112
|
+
raise
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def query_count(sobject, date_field, from, to)
|
118
|
+
# do it with retries, if it doesn't succeed, return nil, don't fail.
|
119
|
+
soql = "SELECT COUNT() FROM #{sobject} WHERE #{date_field} >= #{from} AND #{date_field} < #{to}"
|
120
|
+
begin
|
121
|
+
with_retries do
|
122
|
+
q = @client.query(soql)
|
123
|
+
return q.size
|
124
|
+
end
|
125
|
+
rescue Faraday::Error::TimeoutError => e
|
126
|
+
@logger.warn "Timeout getting count: #{soql}. Error: #{e}. Taking it as failed verification" if @logger
|
127
|
+
return nil
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def to_log
|
132
|
+
return {
|
133
|
+
:client => "Restforce asi",
|
134
|
+
:filename_prefix => @filename_prefix,
|
135
|
+
:api_version => @@API_VERSION,
|
136
|
+
:path_prefix => @@PATH_PREFIX
|
137
|
+
}
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
require "salesforce_bulk_query/batch"
|
2
|
+
|
3
|
+
module SalesforceBulkQuery
|
4
|
+
|
5
|
+
# Represents a Salesforce bulk api job, contains multiple batches.
|
6
|
+
# Many jobs contained in Query
|
7
|
+
class Job
|
8
|
+
@@operation = 'query'
|
9
|
+
@@xml_header = '<?xml version="1.0" encoding="utf-8" ?>'
|
10
|
+
JOB_TIME_LIMIT = 15 * 60
|
11
|
+
BATCH_COUNT = 15
|
12
|
+
|
13
|
+
|
14
|
+
def initialize(sobject, connection, options={})
|
15
|
+
@sobject = sobject
|
16
|
+
@connection = connection
|
17
|
+
@logger = options[:logger]
|
18
|
+
@job_time_limit = options[:job_time_limit] || JOB_TIME_LIMIT
|
19
|
+
@date_field = options[:date_field] or fail "date_field must be given when creating a batch"
|
20
|
+
@batch_count = options[:batch_count] || BATCH_COUNT
|
21
|
+
|
22
|
+
# all batches (static)
|
23
|
+
@batches = []
|
24
|
+
|
25
|
+
# unfinished batches as of last get_available_results call
|
26
|
+
@unfinished_batches = []
|
27
|
+
|
28
|
+
# filenames fort the already downloaded and verified batches
|
29
|
+
@filenames = []
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :job_id
|
33
|
+
|
34
|
+
# Do the API request
|
35
|
+
def create_job(csv=true)
|
36
|
+
content_type = csv ? "CSV" : "XML"
|
37
|
+
xml = "#{@@xml_header}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
38
|
+
xml += "<operation>#{@@operation}</operation>"
|
39
|
+
xml += "<object>#{@sobject}</object>"
|
40
|
+
xml += "<contentType>#{content_type}</contentType>"
|
41
|
+
xml += "</jobInfo>"
|
42
|
+
|
43
|
+
response_parsed = @connection.post_xml("job", xml)
|
44
|
+
@job_id = response_parsed['id'][0]
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_extended_soql(soql, from, to)
|
48
|
+
return "#{soql} WHERE #{@date_field} >= #{from} AND #{@date_field} < #{to}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def generate_batches(soql, start, stop, single_batch=false)
|
52
|
+
# if there's just one batch wanted, add it and we're done
|
53
|
+
if single_batch
|
54
|
+
soql_extended = get_extended_soql(soql, start, stop)
|
55
|
+
@logger.info "Adding soql #{soql_extended} as a batch to job" if @logger
|
56
|
+
|
57
|
+
add_query(soql_extended,
|
58
|
+
:start => start,
|
59
|
+
:stop => stop
|
60
|
+
)
|
61
|
+
return
|
62
|
+
end
|
63
|
+
|
64
|
+
# if there's more, generate the time intervals and generate the batches
|
65
|
+
step_size = (stop - start) / @batch_count
|
66
|
+
|
67
|
+
interval_beginings = start.step(stop - step_size, step_size).map{|f|f}
|
68
|
+
interval_ends = interval_beginings.clone
|
69
|
+
interval_ends.shift
|
70
|
+
interval_ends.push(stop)
|
71
|
+
|
72
|
+
interval_beginings.zip(interval_ends).each do |from, to|
|
73
|
+
|
74
|
+
soql_extended = get_extended_soql(soql, from, to)
|
75
|
+
@logger.info "Adding soql #{soql_extended} as a batch to job" if @logger
|
76
|
+
|
77
|
+
add_query(soql_extended,
|
78
|
+
:start => from,
|
79
|
+
:stop => to
|
80
|
+
)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def add_query(query, options={})
|
85
|
+
# create and create a batch
|
86
|
+
batch = SalesforceBulkQuery::Batch.new(
|
87
|
+
:sobject => @sobject,
|
88
|
+
:soql => query,
|
89
|
+
:job_id => @job_id,
|
90
|
+
:connection => @connection,
|
91
|
+
:start => options[:start],
|
92
|
+
:stop => options[:stop],
|
93
|
+
:logger => @logger,
|
94
|
+
:date_field => @date_field
|
95
|
+
)
|
96
|
+
batch.create
|
97
|
+
|
98
|
+
# add the batch to the list
|
99
|
+
@batches.push(batch)
|
100
|
+
@unfinished_batches.push(batch)
|
101
|
+
end
|
102
|
+
|
103
|
+
def close_job
|
104
|
+
xml = "#{@@xml_header}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
105
|
+
xml += "<state>Closed</state>"
|
106
|
+
xml += "</jobInfo>"
|
107
|
+
|
108
|
+
path = "job/#{@job_id}"
|
109
|
+
|
110
|
+
response_parsed = @connection.post_xml(path, xml)
|
111
|
+
@job_closed_time = Time.now
|
112
|
+
end
|
113
|
+
|
114
|
+
def check_status
|
115
|
+
path = "job/#{@job_id}"
|
116
|
+
response_parsed = @connection.get_xml(path)
|
117
|
+
@completed_count = Integer(response_parsed["numberBatchesCompleted"][0])
|
118
|
+
@succeeded = @completed_count == Integer(response_parsed["numberBatchesTotal"][0])
|
119
|
+
|
120
|
+
return {
|
121
|
+
:succeeded => @succeeded,
|
122
|
+
:some_records_failed => Integer(response_parsed["numberRecordsFailed"][0]) > 0,
|
123
|
+
:some_batches_failed => Integer(response_parsed["numberBatchesFailed"][0]) > 0,
|
124
|
+
:response => response_parsed
|
125
|
+
}
|
126
|
+
end
|
127
|
+
|
128
|
+
def over_limit?
|
129
|
+
(Time.now - @job_closed_time) > @job_time_limit
|
130
|
+
end
|
131
|
+
|
132
|
+
# downloads whatever is available, returns as unfinished whatever is not
|
133
|
+
|
134
|
+
def get_available_results(options={})
|
135
|
+
downloaded_filenames = []
|
136
|
+
unfinished_batches = []
|
137
|
+
verification_fail_batches = []
|
138
|
+
failed_batches = []
|
139
|
+
|
140
|
+
# get result for each batch in the job
|
141
|
+
@unfinished_batches.each do |batch|
|
142
|
+
batch_status = batch.check_status
|
143
|
+
|
144
|
+
# if the result is ready
|
145
|
+
if batch_status[:succeeded]
|
146
|
+
# each finished batch should go here only once
|
147
|
+
|
148
|
+
# download the result
|
149
|
+
result = batch.get_result(options)
|
150
|
+
@logger.info "get_result result: #{result}" if @logger
|
151
|
+
|
152
|
+
# if the verification failed, put it to failed
|
153
|
+
# will never ask about this one again.
|
154
|
+
if result[:verification] == false
|
155
|
+
verification_fail_batches << batch
|
156
|
+
else
|
157
|
+
# if verification ok and finished put it to filenames
|
158
|
+
downloaded_filenames << result[:filename]
|
159
|
+
end
|
160
|
+
elsif batch_status[:failed]
|
161
|
+
# put it to failed and raise error at the end
|
162
|
+
failed_batches << batch
|
163
|
+
else
|
164
|
+
# otherwise put it to unfinished
|
165
|
+
unfinished_batches << batch
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
unless failed_batches.empty?
|
170
|
+
details = failed_batches.map{ |b| "#{b.batch_id}: #{b.fail_message}"}.join("\n")
|
171
|
+
fail ArgumentError, "#{failed_batches.length} batches failed. Details: #{details}"
|
172
|
+
end
|
173
|
+
|
174
|
+
# cache the unfinished_batches till the next run
|
175
|
+
@unfinished_batches = unfinished_batches
|
176
|
+
|
177
|
+
# cumulate filenames
|
178
|
+
@filenames += downloaded_filenames
|
179
|
+
|
180
|
+
@logger.info "unfinished batches: #{unfinished_batches}\nverification_fail_batches: #{verification_fail_batches}" if @logger
|
181
|
+
|
182
|
+
return {
|
183
|
+
:finished => @unfinished_batches.empty?,
|
184
|
+
:filenames => @filenames,
|
185
|
+
:unfinished_batches => @unfinished_batches,
|
186
|
+
:verification_fail_batches => verification_fail_batches
|
187
|
+
}
|
188
|
+
end
|
189
|
+
|
190
|
+
def to_log
|
191
|
+
return {
|
192
|
+
:sobject => @sobject,
|
193
|
+
:connection => @connection.to_log,
|
194
|
+
:batches => @batches.map {|b| b.to_log},
|
195
|
+
:unfinished_batches => @unfinished_batches.map {|b| b.to_log}
|
196
|
+
}
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'faraday'
|
3
|
+
|
4
|
+
module SalesforceBulkQuery
|
5
|
+
# Custom logger for Restforce that doesn't log tons of data.
|
6
|
+
class Logger < Faraday::Response::Middleware
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
MAX_LOG_LENGTH = 2000
|
10
|
+
|
11
|
+
def initialize(app, logger, options)
|
12
|
+
super(app)
|
13
|
+
@options = options
|
14
|
+
@logger = logger || begin
|
15
|
+
require 'logger'
|
16
|
+
::Logger.new(STDOUT)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def_delegators :@logger, :debug, :info, :warn, :error, :fatal
|
21
|
+
|
22
|
+
def call(env)
|
23
|
+
debug('request') do
|
24
|
+
dump :url => env[:url].to_s,
|
25
|
+
:method => env[:method],
|
26
|
+
:headers => env[:request_headers],
|
27
|
+
:body => env[:body] ? env[:body][0..MAX_LOG_LENGTH] : nil
|
28
|
+
end
|
29
|
+
super
|
30
|
+
end
|
31
|
+
|
32
|
+
def on_complete(env)
|
33
|
+
debug('response') do
|
34
|
+
dump :status => env[:status].to_s,
|
35
|
+
:headers => env[:response_headers],
|
36
|
+
:body => env[:body] ? env[:body][0..MAX_LOG_LENGTH] : nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def dump(hash)
|
41
|
+
"\n" + hash.map { |k, v| " #{k}: #{v.inspect}" }.join("\n")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
require 'salesforce_bulk_query/job'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module SalesforceBulkQuery
|
5
|
+
|
6
|
+
# Abstraction of a single user-given query. It contains multiple jobs, is tied to a specific connection
|
7
|
+
class Query
|
8
|
+
|
9
|
+
# if no date_to is given we use the current time with this offset
|
10
|
+
# subtracted (to make sure the freshest changes that can be inconsistent
|
11
|
+
# aren't there) It's in minutes
|
12
|
+
OFFSET_FROM_NOW = 10
|
13
|
+
|
14
|
+
DEFAULT_DATE_FIELD = 'CreatedDate'
|
15
|
+
|
16
|
+
def initialize(sobject, soql, connection, options={})
|
17
|
+
@sobject = sobject
|
18
|
+
@soql = soql
|
19
|
+
@connection = connection
|
20
|
+
@logger = options[:logger]
|
21
|
+
@date_field = options[:date_field] || DEFAULT_DATE_FIELD
|
22
|
+
@date_from = options[:date_from] || options[:created_from]
|
23
|
+
@date_to = options[:date_to] || options[:created_to]
|
24
|
+
@single_batch = options[:single_batch]
|
25
|
+
|
26
|
+
# jobs currently running
|
27
|
+
@jobs_in_progress = []
|
28
|
+
|
29
|
+
# successfully finished jobs with no batches to split
|
30
|
+
@jobs_done = []
|
31
|
+
|
32
|
+
# finished or timeouted jobs with some batches split into other jobs
|
33
|
+
@jobs_restarted = []
|
34
|
+
|
35
|
+
@finished_batch_filenames = []
|
36
|
+
@restarted_subqueries = []
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :jobs_in_progress, :jobs_restarted, :jobs_done
|
40
|
+
|
41
|
+
DEFAULT_MIN_CREATED = "1999-01-01T00:00:00.000Z"
|
42
|
+
|
43
|
+
# Creates the first job, divides the query to subqueries, puts all the subqueries as batches to the job
|
44
|
+
def start(options={})
|
45
|
+
# order by and where not allowed
|
46
|
+
if (!@single_batch) && (@soql =~ / WHERE /i || @soql =~ /ORDER BY/i)
|
47
|
+
raise "You can't have WHERE or ORDER BY in your soql. If you want to download just specific date range use date_from / date_to"
|
48
|
+
end
|
49
|
+
|
50
|
+
# create the first job
|
51
|
+
job = SalesforceBulkQuery::Job.new(
|
52
|
+
@sobject,
|
53
|
+
@connection,
|
54
|
+
{:logger => @logger, :date_field => @date_field}.merge(options)
|
55
|
+
)
|
56
|
+
job.create_job
|
57
|
+
|
58
|
+
# get the date when it should start
|
59
|
+
min_date = get_min_date
|
60
|
+
|
61
|
+
# generate intervals
|
62
|
+
start = nil
|
63
|
+
if (min_date.instance_of?(Time))
|
64
|
+
start = DateTime.parse(min_date.to_s)
|
65
|
+
else
|
66
|
+
start = DateTime.parse(min_date)
|
67
|
+
end
|
68
|
+
|
69
|
+
stop = nil
|
70
|
+
if (@date_to.nil?)
|
71
|
+
stop = DateTime.now - Rational(options[:offset_from_now] || OFFSET_FROM_NOW, 1440)
|
72
|
+
else
|
73
|
+
if (@date_to.instance_of?(Time))
|
74
|
+
stop = DateTime.parse(@date_to.to_s)
|
75
|
+
else
|
76
|
+
stop = DateTime.parse(@date_to)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
job.generate_batches(@soql, start, stop, @single_batch)
|
80
|
+
|
81
|
+
job.close_job
|
82
|
+
|
83
|
+
@jobs_in_progress.push(job)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Get results for all finished jobs. If there are some unfinished batches, skip them and return them as unfinished.
|
87
|
+
#
|
88
|
+
# @param options[:directory_path]
|
89
|
+
def get_available_results(options={})
|
90
|
+
|
91
|
+
unfinished_subqueries = []
|
92
|
+
jobs_in_progress = []
|
93
|
+
jobs_restarted = []
|
94
|
+
jobs_done = []
|
95
|
+
|
96
|
+
# check all jobs statuses and split what should be split
|
97
|
+
@jobs_in_progress.each do |job|
|
98
|
+
|
99
|
+
# download what's available
|
100
|
+
job_results = job.get_available_results(options)
|
101
|
+
|
102
|
+
job_over_limit = job.over_limit?
|
103
|
+
job_done = job_results[:finished] || job_over_limit
|
104
|
+
|
105
|
+
@logger.debug "job_results: #{job_results}" if @logger
|
106
|
+
|
107
|
+
unfinished_batches = job_results[:unfinished_batches]
|
108
|
+
verification_fail_batches = job_results[:verification_fail_batches]
|
109
|
+
|
110
|
+
unfinished_subqueries += unfinished_batches.map {|b| b.soql}
|
111
|
+
|
112
|
+
# split to subqueries what needs to be split
|
113
|
+
to_split = verification_fail_batches
|
114
|
+
to_split += unfinished_batches if job_over_limit
|
115
|
+
|
116
|
+
# delete files associated with batches that failed verification
|
117
|
+
verification_fail_batches.each do |b|
|
118
|
+
@logger.info "Deleting #{b.filename}, verification failed."
|
119
|
+
File.delete(b.filename)
|
120
|
+
end
|
121
|
+
|
122
|
+
to_split.each do |batch|
|
123
|
+
# for each unfinished batch create a new job and add it to new jobs
|
124
|
+
@logger.info "The following subquery didn't end in time / failed verification: #{batch.soql}. Dividing into multiple and running again" if @logger
|
125
|
+
new_job = SalesforceBulkQuery::Job.new(
|
126
|
+
@sobject,
|
127
|
+
@connection,
|
128
|
+
{:logger => @logger, :date_field => @date_field}.merge(options)
|
129
|
+
)
|
130
|
+
new_job.create_job
|
131
|
+
new_job.generate_batches(@soql, batch.start, batch.stop)
|
132
|
+
new_job.close_job
|
133
|
+
jobs_in_progress.push(new_job)
|
134
|
+
end
|
135
|
+
|
136
|
+
# what to do with the current job
|
137
|
+
# finish, some stuff restarted
|
138
|
+
if job_done
|
139
|
+
if to_split.empty?
|
140
|
+
# done, nothing left
|
141
|
+
jobs_done.push(job)
|
142
|
+
|
143
|
+
@logger.info "#{job.job_id} finished. Nothing to split. unfinished_batches: #{unfinished_batches}, verification_fail_batches: #{verification_fail_batches}" if @logger
|
144
|
+
else
|
145
|
+
# done, some batches needed to be restarted
|
146
|
+
jobs_restarted.push(job)
|
147
|
+
end
|
148
|
+
|
149
|
+
# store the filenames and restarted stuff
|
150
|
+
@finished_batch_filenames += job_results[:filenames]
|
151
|
+
@restarted_subqueries += to_split.map {|b| b.soql}
|
152
|
+
else
|
153
|
+
# still in progress
|
154
|
+
jobs_in_progress.push(job)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# remove the finished jobs from progress and add there the new ones
|
159
|
+
@jobs_in_progress = jobs_in_progress
|
160
|
+
@jobs_done += jobs_done
|
161
|
+
|
162
|
+
# we're done if there're no jobs in progress
|
163
|
+
return {
|
164
|
+
:succeeded => @jobs_in_progress.empty?,
|
165
|
+
:filenames => @finished_batch_filenames,
|
166
|
+
:unfinished_subqueries => unfinished_subqueries,
|
167
|
+
:jobs_done => @jobs_done.map { |j| j.job_id }
|
168
|
+
}
|
169
|
+
end
|
170
|
+
|
171
|
+
private
|
172
|
+
|
173
|
+
def get_min_date
|
174
|
+
if @date_from
|
175
|
+
return @date_from
|
176
|
+
end
|
177
|
+
|
178
|
+
# get the date when the first was created
|
179
|
+
min_created = nil
|
180
|
+
begin
|
181
|
+
min_created_resp = @connection.client.query("SELECT #{@date_field} FROM #{@sobject} ORDER BY #{@date_field} LIMIT 1")
|
182
|
+
min_created_resp.each {|s| min_created = s[@date_field.to_sym]}
|
183
|
+
rescue Faraday::Error::TimeoutError => e
|
184
|
+
@logger.warn "Timeout getting the oldest object for #{@sobject}. Error: #{e}. Using the default value" if @logger
|
185
|
+
min_created = DEFAULT_MIN_CREATED
|
186
|
+
rescue Faraday::Error::ClientError => e
|
187
|
+
fail ArgumentError, "Error when trying to get the oldest record according to #{@date_field}, looks like #{@date_field} is not on #{@sobject}. Original error: #{e}\n #{e.message} \n #{e.backtrace} "
|
188
|
+
end
|
189
|
+
min_created
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|