salesforce_bulk_query-edge 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/.travis.yml +26 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +168 -0
- data/Rakefile +20 -0
- data/env_setup-example.sh +13 -0
- data/lib/salesforce_bulk_query.rb +108 -0
- data/lib/salesforce_bulk_query/batch.rb +153 -0
- data/lib/salesforce_bulk_query/connection.rb +140 -0
- data/lib/salesforce_bulk_query/job.rb +199 -0
- data/lib/salesforce_bulk_query/logger.rb +44 -0
- data/lib/salesforce_bulk_query/query.rb +192 -0
- data/lib/salesforce_bulk_query/utils.rb +16 -0
- data/lib/salesforce_bulk_query/version.rb +3 -0
- data/new-version.sh +22 -0
- data/salesforce_bulk_query.gemspec +34 -0
- data/spec/salesforce_bulk_query_spec.rb +227 -0
- data/spec/spec_helper.rb +9 -0
- metadata +207 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
require 'xmlsimple'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
module SalesforceBulkQuery
|
5
|
+
|
6
|
+
# Connection to the Salesforce API
|
7
|
+
# shared in all classes that do some requests
|
8
|
+
class Connection
|
9
|
+
def initialize(client, api_version, logger=nil, filename_prefix=nil,ssl_version = nil)
|
10
|
+
@client = client
|
11
|
+
@logger = logger
|
12
|
+
@filename_prefix = filename_prefix
|
13
|
+
@ssl_version = ssl_version
|
14
|
+
|
15
|
+
|
16
|
+
@@API_VERSION = api_version
|
17
|
+
@@PATH_PREFIX = "/services/async/#{@@API_VERSION}/"
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :client
|
21
|
+
|
22
|
+
XML_REQUEST_HEADER = {'Content-Type' => 'application/xml; charset=utf-8'}
|
23
|
+
CSV_REQUEST_HEADER = {'Content-Type' => 'text/csv; charset=UTF-8'}
|
24
|
+
|
25
|
+
def session_header
|
26
|
+
{'X-SFDC-Session' => @client.options[:oauth_token]}
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_xml(xml)
|
30
|
+
parsed = nil
|
31
|
+
begin
|
32
|
+
parsed = XmlSimple.xml_in(xml)
|
33
|
+
rescue => e
|
34
|
+
@logger.error "Error parsing xml: #{xml}\n#{e}\n#{e.backtrace}"
|
35
|
+
raise
|
36
|
+
end
|
37
|
+
|
38
|
+
return parsed
|
39
|
+
end
|
40
|
+
|
41
|
+
def post_xml(path, xml, options={})
|
42
|
+
path = "#{@@PATH_PREFIX}#{path}"
|
43
|
+
headers = options[:csv_content_type] ? CSV_REQUEST_HEADER : XML_REQUEST_HEADER
|
44
|
+
|
45
|
+
response = nil
|
46
|
+
# do the request
|
47
|
+
with_retries do
|
48
|
+
begin
|
49
|
+
response = @client.post(path, xml, headers.merge(session_header))
|
50
|
+
rescue JSON::ParserError => e
|
51
|
+
if e.message.index('ExceededQuota')
|
52
|
+
raise "You've run out of sfdc batch api quota. Original error: #{e}\n #{e.backtrace}"
|
53
|
+
end
|
54
|
+
raise e
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
return parse_xml(response.body)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_xml(path, options={})
|
62
|
+
path = "#{@@PATH_PREFIX}#{path}"
|
63
|
+
headers = XML_REQUEST_HEADER
|
64
|
+
|
65
|
+
response = nil
|
66
|
+
with_retries do
|
67
|
+
response = @client.get(path, {}, headers.merge(session_header))
|
68
|
+
end
|
69
|
+
|
70
|
+
return options[:skip_parsing] ? response.body : parse_xml(response.body)
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_to_file(path, filename)
|
74
|
+
path = "#{@@PATH_PREFIX}#{path}"
|
75
|
+
uri = URI.parse( @client.options[:instance_url])
|
76
|
+
# open a file
|
77
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
78
|
+
http.use_ssl = true
|
79
|
+
http.ssl_version = @ssl_version if !@ssl_version.nil?
|
80
|
+
headers = XML_REQUEST_HEADER.merge(session_header)
|
81
|
+
@logger.info "Doing GET to #{path}, headers #{headers}" if @logger
|
82
|
+
|
83
|
+
if @filename_prefix
|
84
|
+
filename = "#{@filename_prefix}_#{filename}"
|
85
|
+
end
|
86
|
+
|
87
|
+
# do the request
|
88
|
+
http.request_get(path, headers) do |res|
|
89
|
+
|
90
|
+
@logger.info "Got response #{res.inspect}, reading response body by chunks and writing to #{filename}" if @logger
|
91
|
+
|
92
|
+
File.open(filename, 'w') do |file|
|
93
|
+
# write the body to the file by chunks
|
94
|
+
res.read_body do |segment|
|
95
|
+
file.write(segment.encode('UTF-8', :invalid => :replace, :undef => :replace,:replace => "?"))
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def with_retries
|
102
|
+
i = 0
|
103
|
+
begin
|
104
|
+
yield
|
105
|
+
rescue => e
|
106
|
+
i += 1
|
107
|
+
if i < 3
|
108
|
+
@logger.warn "Retrying, got error: #{e}, #{e.backtrace}" if @logger
|
109
|
+
retry
|
110
|
+
else
|
111
|
+
@logger.error "Failed 3 times, last error: #{e}, #{e.backtrace}" if @logger
|
112
|
+
raise
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def query_count(sobject, date_field, from, to)
|
118
|
+
# do it with retries, if it doesn't succeed, return nil, don't fail.
|
119
|
+
soql = "SELECT COUNT() FROM #{sobject} WHERE #{date_field} >= #{from} AND #{date_field} < #{to}"
|
120
|
+
begin
|
121
|
+
with_retries do
|
122
|
+
q = @client.query(soql)
|
123
|
+
return q.size
|
124
|
+
end
|
125
|
+
rescue Faraday::Error::TimeoutError => e
|
126
|
+
@logger.warn "Timeout getting count: #{soql}. Error: #{e}. Taking it as failed verification" if @logger
|
127
|
+
return nil
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def to_log
|
132
|
+
return {
|
133
|
+
:client => "Restforce asi",
|
134
|
+
:filename_prefix => @filename_prefix,
|
135
|
+
:api_version => @@API_VERSION,
|
136
|
+
:path_prefix => @@PATH_PREFIX
|
137
|
+
}
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
require "salesforce_bulk_query/batch"
|
2
|
+
|
3
|
+
module SalesforceBulkQuery
|
4
|
+
|
5
|
+
# Represents a Salesforce bulk api job, contains multiple batches.
|
6
|
+
# Many jobs contained in Query
|
7
|
+
class Job
|
8
|
+
@@operation = 'query'
|
9
|
+
@@xml_header = '<?xml version="1.0" encoding="utf-8" ?>'
|
10
|
+
JOB_TIME_LIMIT = 15 * 60
|
11
|
+
BATCH_COUNT = 15
|
12
|
+
|
13
|
+
|
14
|
+
def initialize(sobject, connection, options={})
|
15
|
+
@sobject = sobject
|
16
|
+
@connection = connection
|
17
|
+
@logger = options[:logger]
|
18
|
+
@job_time_limit = options[:job_time_limit] || JOB_TIME_LIMIT
|
19
|
+
@date_field = options[:date_field] or fail "date_field must be given when creating a batch"
|
20
|
+
@batch_count = options[:batch_count] || BATCH_COUNT
|
21
|
+
|
22
|
+
# all batches (static)
|
23
|
+
@batches = []
|
24
|
+
|
25
|
+
# unfinished batches as of last get_available_results call
|
26
|
+
@unfinished_batches = []
|
27
|
+
|
28
|
+
# filenames fort the already downloaded and verified batches
|
29
|
+
@filenames = []
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :job_id
|
33
|
+
|
34
|
+
# Do the API request
|
35
|
+
def create_job(csv=true)
|
36
|
+
content_type = csv ? "CSV" : "XML"
|
37
|
+
xml = "#{@@xml_header}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
38
|
+
xml += "<operation>#{@@operation}</operation>"
|
39
|
+
xml += "<object>#{@sobject}</object>"
|
40
|
+
xml += "<contentType>#{content_type}</contentType>"
|
41
|
+
xml += "</jobInfo>"
|
42
|
+
|
43
|
+
response_parsed = @connection.post_xml("job", xml)
|
44
|
+
@job_id = response_parsed['id'][0]
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_extended_soql(soql, from, to)
|
48
|
+
return "#{soql} WHERE #{@date_field} >= #{from} AND #{@date_field} < #{to}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def generate_batches(soql, start, stop, single_batch=false)
|
52
|
+
# if there's just one batch wanted, add it and we're done
|
53
|
+
if single_batch
|
54
|
+
soql_extended = get_extended_soql(soql, start, stop)
|
55
|
+
@logger.info "Adding soql #{soql_extended} as a batch to job" if @logger
|
56
|
+
|
57
|
+
add_query(soql_extended,
|
58
|
+
:start => start,
|
59
|
+
:stop => stop
|
60
|
+
)
|
61
|
+
return
|
62
|
+
end
|
63
|
+
|
64
|
+
# if there's more, generate the time intervals and generate the batches
|
65
|
+
step_size = (stop - start) / @batch_count
|
66
|
+
|
67
|
+
interval_beginings = start.step(stop - step_size, step_size).map{|f|f}
|
68
|
+
interval_ends = interval_beginings.clone
|
69
|
+
interval_ends.shift
|
70
|
+
interval_ends.push(stop)
|
71
|
+
|
72
|
+
interval_beginings.zip(interval_ends).each do |from, to|
|
73
|
+
|
74
|
+
soql_extended = get_extended_soql(soql, from, to)
|
75
|
+
@logger.info "Adding soql #{soql_extended} as a batch to job" if @logger
|
76
|
+
|
77
|
+
add_query(soql_extended,
|
78
|
+
:start => from,
|
79
|
+
:stop => to
|
80
|
+
)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def add_query(query, options={})
|
85
|
+
# create and create a batch
|
86
|
+
batch = SalesforceBulkQuery::Batch.new(
|
87
|
+
:sobject => @sobject,
|
88
|
+
:soql => query,
|
89
|
+
:job_id => @job_id,
|
90
|
+
:connection => @connection,
|
91
|
+
:start => options[:start],
|
92
|
+
:stop => options[:stop],
|
93
|
+
:logger => @logger,
|
94
|
+
:date_field => @date_field
|
95
|
+
)
|
96
|
+
batch.create
|
97
|
+
|
98
|
+
# add the batch to the list
|
99
|
+
@batches.push(batch)
|
100
|
+
@unfinished_batches.push(batch)
|
101
|
+
end
|
102
|
+
|
103
|
+
def close_job
|
104
|
+
xml = "#{@@xml_header}<jobInfo xmlns=\"http://www.force.com/2009/06/asyncapi/dataload\">"
|
105
|
+
xml += "<state>Closed</state>"
|
106
|
+
xml += "</jobInfo>"
|
107
|
+
|
108
|
+
path = "job/#{@job_id}"
|
109
|
+
|
110
|
+
response_parsed = @connection.post_xml(path, xml)
|
111
|
+
@job_closed_time = Time.now
|
112
|
+
end
|
113
|
+
|
114
|
+
def check_status
|
115
|
+
path = "job/#{@job_id}"
|
116
|
+
response_parsed = @connection.get_xml(path)
|
117
|
+
@completed_count = Integer(response_parsed["numberBatchesCompleted"][0])
|
118
|
+
@succeeded = @completed_count == Integer(response_parsed["numberBatchesTotal"][0])
|
119
|
+
|
120
|
+
return {
|
121
|
+
:succeeded => @succeeded,
|
122
|
+
:some_records_failed => Integer(response_parsed["numberRecordsFailed"][0]) > 0,
|
123
|
+
:some_batches_failed => Integer(response_parsed["numberBatchesFailed"][0]) > 0,
|
124
|
+
:response => response_parsed
|
125
|
+
}
|
126
|
+
end
|
127
|
+
|
128
|
+
def over_limit?
|
129
|
+
(Time.now - @job_closed_time) > @job_time_limit
|
130
|
+
end
|
131
|
+
|
132
|
+
# downloads whatever is available, returns as unfinished whatever is not
|
133
|
+
|
134
|
+
def get_available_results(options={})
|
135
|
+
downloaded_filenames = []
|
136
|
+
unfinished_batches = []
|
137
|
+
verification_fail_batches = []
|
138
|
+
failed_batches = []
|
139
|
+
|
140
|
+
# get result for each batch in the job
|
141
|
+
@unfinished_batches.each do |batch|
|
142
|
+
batch_status = batch.check_status
|
143
|
+
|
144
|
+
# if the result is ready
|
145
|
+
if batch_status[:succeeded]
|
146
|
+
# each finished batch should go here only once
|
147
|
+
|
148
|
+
# download the result
|
149
|
+
result = batch.get_result(options)
|
150
|
+
@logger.info "get_result result: #{result}" if @logger
|
151
|
+
|
152
|
+
# if the verification failed, put it to failed
|
153
|
+
# will never ask about this one again.
|
154
|
+
if result[:verification] == false
|
155
|
+
verification_fail_batches << batch
|
156
|
+
else
|
157
|
+
# if verification ok and finished put it to filenames
|
158
|
+
downloaded_filenames << result[:filename]
|
159
|
+
end
|
160
|
+
elsif batch_status[:failed]
|
161
|
+
# put it to failed and raise error at the end
|
162
|
+
failed_batches << batch
|
163
|
+
else
|
164
|
+
# otherwise put it to unfinished
|
165
|
+
unfinished_batches << batch
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
unless failed_batches.empty?
|
170
|
+
details = failed_batches.map{ |b| "#{b.batch_id}: #{b.fail_message}"}.join("\n")
|
171
|
+
fail ArgumentError, "#{failed_batches.length} batches failed. Details: #{details}"
|
172
|
+
end
|
173
|
+
|
174
|
+
# cache the unfinished_batches till the next run
|
175
|
+
@unfinished_batches = unfinished_batches
|
176
|
+
|
177
|
+
# cumulate filenames
|
178
|
+
@filenames += downloaded_filenames
|
179
|
+
|
180
|
+
@logger.info "unfinished batches: #{unfinished_batches}\nverification_fail_batches: #{verification_fail_batches}" if @logger
|
181
|
+
|
182
|
+
return {
|
183
|
+
:finished => @unfinished_batches.empty?,
|
184
|
+
:filenames => @filenames,
|
185
|
+
:unfinished_batches => @unfinished_batches,
|
186
|
+
:verification_fail_batches => verification_fail_batches
|
187
|
+
}
|
188
|
+
end
|
189
|
+
|
190
|
+
def to_log
|
191
|
+
return {
|
192
|
+
:sobject => @sobject,
|
193
|
+
:connection => @connection.to_log,
|
194
|
+
:batches => @batches.map {|b| b.to_log},
|
195
|
+
:unfinished_batches => @unfinished_batches.map {|b| b.to_log}
|
196
|
+
}
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'faraday'
|
3
|
+
|
4
|
+
module SalesforceBulkQuery
|
5
|
+
# Custom logger for Restforce that doesn't log tons of data.
|
6
|
+
class Logger < Faraday::Response::Middleware
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
MAX_LOG_LENGTH = 2000
|
10
|
+
|
11
|
+
def initialize(app, logger, options)
|
12
|
+
super(app)
|
13
|
+
@options = options
|
14
|
+
@logger = logger || begin
|
15
|
+
require 'logger'
|
16
|
+
::Logger.new(STDOUT)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def_delegators :@logger, :debug, :info, :warn, :error, :fatal
|
21
|
+
|
22
|
+
def call(env)
|
23
|
+
debug('request') do
|
24
|
+
dump :url => env[:url].to_s,
|
25
|
+
:method => env[:method],
|
26
|
+
:headers => env[:request_headers],
|
27
|
+
:body => env[:body] ? env[:body][0..MAX_LOG_LENGTH] : nil
|
28
|
+
end
|
29
|
+
super
|
30
|
+
end
|
31
|
+
|
32
|
+
def on_complete(env)
|
33
|
+
debug('response') do
|
34
|
+
dump :status => env[:status].to_s,
|
35
|
+
:headers => env[:response_headers],
|
36
|
+
:body => env[:body] ? env[:body][0..MAX_LOG_LENGTH] : nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def dump(hash)
|
41
|
+
"\n" + hash.map { |k, v| " #{k}: #{v.inspect}" }.join("\n")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
require 'salesforce_bulk_query/job'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module SalesforceBulkQuery
|
5
|
+
|
6
|
+
# Abstraction of a single user-given query. It contains multiple jobs, is tied to a specific connection
|
7
|
+
class Query
|
8
|
+
|
9
|
+
# if no date_to is given we use the current time with this offset
|
10
|
+
# subtracted (to make sure the freshest changes that can be inconsistent
|
11
|
+
# aren't there) It's in minutes
|
12
|
+
OFFSET_FROM_NOW = 10
|
13
|
+
|
14
|
+
DEFAULT_DATE_FIELD = 'CreatedDate'
|
15
|
+
|
16
|
+
def initialize(sobject, soql, connection, options={})
|
17
|
+
@sobject = sobject
|
18
|
+
@soql = soql
|
19
|
+
@connection = connection
|
20
|
+
@logger = options[:logger]
|
21
|
+
@date_field = options[:date_field] || DEFAULT_DATE_FIELD
|
22
|
+
@date_from = options[:date_from] || options[:created_from]
|
23
|
+
@date_to = options[:date_to] || options[:created_to]
|
24
|
+
@single_batch = options[:single_batch]
|
25
|
+
|
26
|
+
# jobs currently running
|
27
|
+
@jobs_in_progress = []
|
28
|
+
|
29
|
+
# successfully finished jobs with no batches to split
|
30
|
+
@jobs_done = []
|
31
|
+
|
32
|
+
# finished or timeouted jobs with some batches split into other jobs
|
33
|
+
@jobs_restarted = []
|
34
|
+
|
35
|
+
@finished_batch_filenames = []
|
36
|
+
@restarted_subqueries = []
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :jobs_in_progress, :jobs_restarted, :jobs_done
|
40
|
+
|
41
|
+
DEFAULT_MIN_CREATED = "1999-01-01T00:00:00.000Z"
|
42
|
+
|
43
|
+
# Creates the first job, divides the query to subqueries, puts all the subqueries as batches to the job
|
44
|
+
def start(options={})
|
45
|
+
# order by and where not allowed
|
46
|
+
if (!@single_batch) && (@soql =~ / WHERE /i || @soql =~ /ORDER BY/i)
|
47
|
+
raise "You can't have WHERE or ORDER BY in your soql. If you want to download just specific date range use date_from / date_to"
|
48
|
+
end
|
49
|
+
|
50
|
+
# create the first job
|
51
|
+
job = SalesforceBulkQuery::Job.new(
|
52
|
+
@sobject,
|
53
|
+
@connection,
|
54
|
+
{:logger => @logger, :date_field => @date_field}.merge(options)
|
55
|
+
)
|
56
|
+
job.create_job
|
57
|
+
|
58
|
+
# get the date when it should start
|
59
|
+
min_date = get_min_date
|
60
|
+
|
61
|
+
# generate intervals
|
62
|
+
start = nil
|
63
|
+
if (min_date.instance_of?(Time))
|
64
|
+
start = DateTime.parse(min_date.to_s)
|
65
|
+
else
|
66
|
+
start = DateTime.parse(min_date)
|
67
|
+
end
|
68
|
+
|
69
|
+
stop = nil
|
70
|
+
if (@date_to.nil?)
|
71
|
+
stop = DateTime.now - Rational(options[:offset_from_now] || OFFSET_FROM_NOW, 1440)
|
72
|
+
else
|
73
|
+
if (@date_to.instance_of?(Time))
|
74
|
+
stop = DateTime.parse(@date_to.to_s)
|
75
|
+
else
|
76
|
+
stop = DateTime.parse(@date_to)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
job.generate_batches(@soql, start, stop, @single_batch)
|
80
|
+
|
81
|
+
job.close_job
|
82
|
+
|
83
|
+
@jobs_in_progress.push(job)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Get results for all finished jobs. If there are some unfinished batches, skip them and return them as unfinished.
|
87
|
+
#
|
88
|
+
# @param options[:directory_path]
|
89
|
+
def get_available_results(options={})
|
90
|
+
|
91
|
+
unfinished_subqueries = []
|
92
|
+
jobs_in_progress = []
|
93
|
+
jobs_restarted = []
|
94
|
+
jobs_done = []
|
95
|
+
|
96
|
+
# check all jobs statuses and split what should be split
|
97
|
+
@jobs_in_progress.each do |job|
|
98
|
+
|
99
|
+
# download what's available
|
100
|
+
job_results = job.get_available_results(options)
|
101
|
+
|
102
|
+
job_over_limit = job.over_limit?
|
103
|
+
job_done = job_results[:finished] || job_over_limit
|
104
|
+
|
105
|
+
@logger.debug "job_results: #{job_results}" if @logger
|
106
|
+
|
107
|
+
unfinished_batches = job_results[:unfinished_batches]
|
108
|
+
verification_fail_batches = job_results[:verification_fail_batches]
|
109
|
+
|
110
|
+
unfinished_subqueries += unfinished_batches.map {|b| b.soql}
|
111
|
+
|
112
|
+
# split to subqueries what needs to be split
|
113
|
+
to_split = verification_fail_batches
|
114
|
+
to_split += unfinished_batches if job_over_limit
|
115
|
+
|
116
|
+
# delete files associated with batches that failed verification
|
117
|
+
verification_fail_batches.each do |b|
|
118
|
+
@logger.info "Deleting #{b.filename}, verification failed."
|
119
|
+
File.delete(b.filename)
|
120
|
+
end
|
121
|
+
|
122
|
+
to_split.each do |batch|
|
123
|
+
# for each unfinished batch create a new job and add it to new jobs
|
124
|
+
@logger.info "The following subquery didn't end in time / failed verification: #{batch.soql}. Dividing into multiple and running again" if @logger
|
125
|
+
new_job = SalesforceBulkQuery::Job.new(
|
126
|
+
@sobject,
|
127
|
+
@connection,
|
128
|
+
{:logger => @logger, :date_field => @date_field}.merge(options)
|
129
|
+
)
|
130
|
+
new_job.create_job
|
131
|
+
new_job.generate_batches(@soql, batch.start, batch.stop)
|
132
|
+
new_job.close_job
|
133
|
+
jobs_in_progress.push(new_job)
|
134
|
+
end
|
135
|
+
|
136
|
+
# what to do with the current job
|
137
|
+
# finish, some stuff restarted
|
138
|
+
if job_done
|
139
|
+
if to_split.empty?
|
140
|
+
# done, nothing left
|
141
|
+
jobs_done.push(job)
|
142
|
+
|
143
|
+
@logger.info "#{job.job_id} finished. Nothing to split. unfinished_batches: #{unfinished_batches}, verification_fail_batches: #{verification_fail_batches}" if @logger
|
144
|
+
else
|
145
|
+
# done, some batches needed to be restarted
|
146
|
+
jobs_restarted.push(job)
|
147
|
+
end
|
148
|
+
|
149
|
+
# store the filenames and restarted stuff
|
150
|
+
@finished_batch_filenames += job_results[:filenames]
|
151
|
+
@restarted_subqueries += to_split.map {|b| b.soql}
|
152
|
+
else
|
153
|
+
# still in progress
|
154
|
+
jobs_in_progress.push(job)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# remove the finished jobs from progress and add there the new ones
|
159
|
+
@jobs_in_progress = jobs_in_progress
|
160
|
+
@jobs_done += jobs_done
|
161
|
+
|
162
|
+
# we're done if there're no jobs in progress
|
163
|
+
return {
|
164
|
+
:succeeded => @jobs_in_progress.empty?,
|
165
|
+
:filenames => @finished_batch_filenames,
|
166
|
+
:unfinished_subqueries => unfinished_subqueries,
|
167
|
+
:jobs_done => @jobs_done.map { |j| j.job_id }
|
168
|
+
}
|
169
|
+
end
|
170
|
+
|
171
|
+
private
|
172
|
+
|
173
|
+
def get_min_date
|
174
|
+
if @date_from
|
175
|
+
return @date_from
|
176
|
+
end
|
177
|
+
|
178
|
+
# get the date when the first was created
|
179
|
+
min_created = nil
|
180
|
+
begin
|
181
|
+
min_created_resp = @connection.client.query("SELECT #{@date_field} FROM #{@sobject} ORDER BY #{@date_field} LIMIT 1")
|
182
|
+
min_created_resp.each {|s| min_created = s[@date_field.to_sym]}
|
183
|
+
rescue Faraday::Error::TimeoutError => e
|
184
|
+
@logger.warn "Timeout getting the oldest object for #{@sobject}. Error: #{e}. Using the default value" if @logger
|
185
|
+
min_created = DEFAULT_MIN_CREATED
|
186
|
+
rescue Faraday::Error::ClientError => e
|
187
|
+
fail ArgumentError, "Error when trying to get the oldest record according to #{@date_field}, looks like #{@date_field} is not on #{@sobject}. Original error: #{e}\n #{e.message} \n #{e.backtrace} "
|
188
|
+
end
|
189
|
+
min_created
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|