rocketjob 3.5.2 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +63 -1
- data/bin/rocketjob +1 -0
- data/bin/rocketjob_batch_perf +11 -0
- data/lib/rocket_job/batch.rb +32 -0
- data/lib/rocket_job/batch/callbacks.rb +40 -0
- data/lib/rocket_job/batch/io.rb +154 -0
- data/lib/rocket_job/batch/logger.rb +57 -0
- data/lib/rocket_job/batch/lower_priority.rb +54 -0
- data/lib/rocket_job/batch/model.rb +157 -0
- data/lib/rocket_job/batch/performance.rb +99 -0
- data/lib/rocket_job/batch/result.rb +8 -0
- data/lib/rocket_job/batch/results.rb +9 -0
- data/lib/rocket_job/batch/state_machine.rb +102 -0
- data/lib/rocket_job/batch/statistics.rb +88 -0
- data/lib/rocket_job/batch/tabular.rb +56 -0
- data/lib/rocket_job/batch/tabular/input.rb +123 -0
- data/lib/rocket_job/batch/tabular/output.rb +59 -0
- data/lib/rocket_job/batch/throttle.rb +91 -0
- data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
- data/lib/rocket_job/batch/worker.rb +288 -0
- data/lib/rocket_job/cli.rb +29 -7
- data/lib/rocket_job/config.rb +1 -1
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
- data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
- data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
- data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
- data/lib/rocket_job/jobs/performance_job.rb +18 -0
- data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
- data/lib/rocket_job/plugins/document.rb +2 -8
- data/lib/rocket_job/plugins/job/persistence.rb +6 -4
- data/lib/rocket_job/plugins/job/throttle.rb +3 -6
- data/lib/rocket_job/plugins/job/worker.rb +2 -2
- data/lib/rocket_job/server.rb +14 -3
- data/lib/rocket_job/sliced/input.rb +336 -0
- data/lib/rocket_job/sliced/output.rb +99 -0
- data/lib/rocket_job/sliced/slice.rb +166 -0
- data/lib/rocket_job/sliced/slices.rb +166 -0
- data/lib/rocket_job/sliced/writer/input.rb +60 -0
- data/lib/rocket_job/sliced/writer/output.rb +82 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +2 -2
- data/lib/rocketjob.rb +28 -0
- metadata +51 -62
- data/test/config/database.yml +0 -5
- data/test/config/mongoid.yml +0 -88
- data/test/config_test.rb +0 -10
- data/test/dirmon_entry_test.rb +0 -313
- data/test/dirmon_job_test.rb +0 -216
- data/test/files/text.txt +0 -3
- data/test/job_test.rb +0 -71
- data/test/jobs/housekeeping_job_test.rb +0 -102
- data/test/jobs/on_demand_job_test.rb +0 -59
- data/test/jobs/upload_file_job_test.rb +0 -107
- data/test/plugins/cron_test.rb +0 -166
- data/test/plugins/job/callbacks_test.rb +0 -166
- data/test/plugins/job/defaults_test.rb +0 -53
- data/test/plugins/job/logger_test.rb +0 -56
- data/test/plugins/job/model_test.rb +0 -94
- data/test/plugins/job/persistence_test.rb +0 -94
- data/test/plugins/job/state_machine_test.rb +0 -116
- data/test/plugins/job/throttle_test.rb +0 -111
- data/test/plugins/job/worker_test.rb +0 -199
- data/test/plugins/processing_window_test.rb +0 -109
- data/test/plugins/restart_test.rb +0 -193
- data/test/plugins/retry_test.rb +0 -88
- data/test/plugins/singleton_test.rb +0 -92
- data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
- data/test/plugins/state_machine_test.rb +0 -67
- data/test/plugins/transaction_test.rb +0 -84
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +0 -17
@@ -5,19 +5,13 @@ module RocketJob
|
|
5
5
|
# Base class for storing models in MongoDB
|
6
6
|
module Document
|
7
7
|
extend ActiveSupport::Concern
|
8
|
-
include Mongoid::Document
|
8
|
+
include ::Mongoid::Document
|
9
9
|
|
10
10
|
included do
|
11
11
|
store_in client: 'rocketjob'
|
12
12
|
end
|
13
13
|
|
14
14
|
module ClassMethods
|
15
|
-
# V2 Backward compatibility
|
16
|
-
# DEPRECATED
|
17
|
-
def key(name, type, options = {})
|
18
|
-
field(name, options.merge(type: type))
|
19
|
-
end
|
20
|
-
|
21
15
|
# Mongoid does not apply ordering, add sort
|
22
16
|
def first
|
23
17
|
all.sort('_id' => 1).first
|
@@ -35,7 +29,7 @@ module RocketJob
|
|
35
29
|
# Allows other changes to be made on the server that will be loaded.
|
36
30
|
def find_and_update(attrs)
|
37
31
|
doc = collection.find(_id: id).find_one_and_update({'$set' => attrs}, return_document: :after)
|
38
|
-
raise(Mongoid::Errors::DocumentNotFound.new(self.class, id)) unless doc
|
32
|
+
raise(::Mongoid::Errors::DocumentNotFound.new(self.class, id)) unless doc
|
39
33
|
|
40
34
|
# Clear out keys that are not returned during the reload from MongoDB
|
41
35
|
(fields.keys + embedded_relations.keys - doc.keys).each { |key| send("#{key}=", nil) }
|
@@ -32,9 +32,11 @@ module RocketJob
|
|
32
32
|
# job = RocketJob::Job.rocket_job_retrieve('host:pid:worker', filter)
|
33
33
|
def rocket_job_retrieve(worker_name, filter)
|
34
34
|
SemanticLogger.silence(:info) do
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
scheduled = {'$or' => [{run_at: nil}, {:run_at.lte => Time.now}]}
|
36
|
+
working = {'$or' => [{state: :queued}, {state: :running, sub_state: :processing}]}
|
37
|
+
query = self.and(working, scheduled)
|
38
|
+
query = query.where(filter) unless filter.blank?
|
39
|
+
update = {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
|
38
40
|
query.sort(priority: 1, _id: 1).find_one_and_update(update, bypass_document_validation: true)
|
39
41
|
end
|
40
42
|
end
|
@@ -101,7 +103,7 @@ module RocketJob
|
|
101
103
|
return super unless destroy_on_complete
|
102
104
|
begin
|
103
105
|
super
|
104
|
-
rescue Mongoid::Errors::DocumentNotFound
|
106
|
+
rescue ::Mongoid::Errors::DocumentNotFound
|
105
107
|
unless completed?
|
106
108
|
self.state = :completed
|
107
109
|
rocket_job_set_completed_at
|
@@ -58,17 +58,14 @@ module RocketJob
|
|
58
58
|
end
|
59
59
|
|
60
60
|
# Undefine a previously defined throttle
|
61
|
-
def undefine_throttle(
|
62
|
-
rocket_job_throttles.delete_if
|
61
|
+
def undefine_throttle(method_name)
|
62
|
+
rocket_job_throttles.delete_if { |throttle| throttle.method_name == method_name }
|
63
63
|
end
|
64
64
|
|
65
65
|
# Has a throttle been defined?
|
66
66
|
def throttle?(method_name)
|
67
|
-
rocket_job_throttles.
|
67
|
+
rocket_job_throttles.any? { |throttle| throttle.method_name == method_name }
|
68
68
|
end
|
69
|
-
|
70
|
-
# DEPRECATED
|
71
|
-
alias has_throttle? throttle?
|
72
69
|
end
|
73
70
|
|
74
71
|
# Default throttle to use when the throttle is exceeded.
|
@@ -51,7 +51,7 @@ module RocketJob
|
|
51
51
|
else
|
52
52
|
job.worker_name = worker_name
|
53
53
|
job.rocket_job_fail_on_exception!(worker_name) do
|
54
|
-
|
54
|
+
job.start!
|
55
55
|
end
|
56
56
|
return job if job.running?
|
57
57
|
end
|
@@ -105,7 +105,7 @@ module RocketJob
|
|
105
105
|
#
|
106
106
|
# Exceptions are _not_ suppressed and should be handled by the caller.
|
107
107
|
def perform_now
|
108
|
-
raise(Mongoid::Errors::Validations, self) unless valid?
|
108
|
+
raise(::Mongoid::Errors::Validations, self) unless valid?
|
109
109
|
|
110
110
|
worker = RocketJob::Worker.new(inline: true)
|
111
111
|
start if may_start?
|
data/lib/rocket_job/server.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'yaml'
|
1
2
|
require 'concurrent'
|
2
3
|
module RocketJob
|
3
4
|
# Server
|
@@ -45,7 +46,7 @@ module RocketJob
|
|
45
46
|
field :started_at, type: Time
|
46
47
|
|
47
48
|
# Filter to apply to control which job classes this server can process
|
48
|
-
field :
|
49
|
+
field :yaml_filter, type: String
|
49
50
|
|
50
51
|
# The heartbeat information for this server
|
51
52
|
embeds_one :heartbeat, class_name: 'RocketJob::Heartbeat'
|
@@ -188,7 +189,7 @@ module RocketJob
|
|
188
189
|
def self.run(attrs = {})
|
189
190
|
Thread.current.name = 'rocketjob main'
|
190
191
|
# Create Indexes on server startup
|
191
|
-
Mongoid::Tasks::Database.create_indexes
|
192
|
+
::Mongoid::Tasks::Database.create_indexes
|
192
193
|
register_signal_handlers
|
193
194
|
|
194
195
|
server = create!(attrs)
|
@@ -228,6 +229,15 @@ module RocketJob
|
|
228
229
|
(Time.now - heartbeat.updated_at) >= dead_seconds
|
229
230
|
end
|
230
231
|
|
232
|
+
# Where clause filter to apply to workers looking for jobs
|
233
|
+
def filter
|
234
|
+
YAML.load(yaml_filter) if yaml_filter
|
235
|
+
end
|
236
|
+
|
237
|
+
def filter=(hash)
|
238
|
+
self.yaml_filter = hash.nil? ? nil : hash.to_yaml
|
239
|
+
end
|
240
|
+
|
231
241
|
private
|
232
242
|
|
233
243
|
# Returns [Array<Worker>] collection of workers
|
@@ -238,6 +248,7 @@ module RocketJob
|
|
238
248
|
# Management Thread
|
239
249
|
def run
|
240
250
|
logger.info "Using MongoDB Database: #{RocketJob::Job.collection.database.name}"
|
251
|
+
logger.info('Running with filter', filter) if filter
|
241
252
|
build_heartbeat(updated_at: Time.now, workers: 0)
|
242
253
|
started!
|
243
254
|
logger.info 'Rocket Job Server started'
|
@@ -262,7 +273,7 @@ module RocketJob
|
|
262
273
|
end
|
263
274
|
|
264
275
|
logger.info 'Shutdown'
|
265
|
-
rescue Mongoid::Errors::DocumentNotFound
|
276
|
+
rescue ::Mongoid::Errors::DocumentNotFound
|
266
277
|
logger.warn('Server has been destroyed. Going down hard!')
|
267
278
|
rescue Exception => exc
|
268
279
|
logger.error('RocketJob::Server is stopping due to an exception', exc)
|
@@ -0,0 +1,336 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Sliced
|
3
|
+
class Input < Slices
|
4
|
+
# Load lines for processing from the supplied filename or stream into this job.
|
5
|
+
#
|
6
|
+
# Returns [Integer] the number of lines loaded into this collection
|
7
|
+
#
|
8
|
+
# Parameters
|
9
|
+
# file_name_or_io [String | IO]
|
10
|
+
# Full path and file name to stream into the job,
|
11
|
+
# Or, an IO Stream that responds to: :read
|
12
|
+
#
|
13
|
+
# streams [Symbol|Array]
|
14
|
+
# Streams to convert the data whilst it is being read.
|
15
|
+
# When nil, the file_name extensions will be inspected to determine what
|
16
|
+
# streams should be applied.
|
17
|
+
# Default: nil
|
18
|
+
#
|
19
|
+
# delimiter[String]
|
20
|
+
# Line / Record delimiter to use to break the stream up into records
|
21
|
+
# Any string to break the stream up by
|
22
|
+
# The records when saved will not include this delimiter
|
23
|
+
# Default: nil
|
24
|
+
# Automatically detect line endings and break up by line
|
25
|
+
# Searches for the first "\r\n" or "\n" and then uses that as the
|
26
|
+
# delimiter for all subsequent records
|
27
|
+
#
|
28
|
+
# buffer_size [Integer]
|
29
|
+
# Size of the blocks when reading from the input file / stream.
|
30
|
+
# Default: 65536 ( 64K )
|
31
|
+
#
|
32
|
+
# encoding: [String|Encoding]
|
33
|
+
# Encode returned data with this encoding.
|
34
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
35
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
36
|
+
# 'UTF-8': UTF-8 Format
|
37
|
+
# Etc.
|
38
|
+
# Default: 'UTF-8'
|
39
|
+
#
|
40
|
+
# encode_replace: [String]
|
41
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
42
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
43
|
+
# Default: nil
|
44
|
+
#
|
45
|
+
# encode_cleaner: [nil|symbol|Proc]
|
46
|
+
# Cleanse data read from the input stream.
|
47
|
+
# nil: No cleansing
|
48
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
49
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
50
|
+
# Default: :printable
|
51
|
+
#
|
52
|
+
# stream_mode: [:line | :row | :record]
|
53
|
+
# :line
|
54
|
+
# Uploads the file a line (String) at a time for processing by workers.
|
55
|
+
# :row
|
56
|
+
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
57
|
+
# :record
|
58
|
+
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
59
|
+
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
60
|
+
#
|
61
|
+
# Example:
|
62
|
+
# # Load plain text records from a file
|
63
|
+
# job.input.upload('hello.csv')
|
64
|
+
#
|
65
|
+
# Example:
|
66
|
+
# # Load plain text records from a file, stripping all non-printable characters,
|
67
|
+
# # as well as any characters that cannot be converted to UTF-8
|
68
|
+
# job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
|
69
|
+
#
|
70
|
+
# Example: Zip
|
71
|
+
# # Since csv is not known to RocketJob it is ignored
|
72
|
+
# job.input.upload('myfile.csv.zip')
|
73
|
+
#
|
74
|
+
# Example: Encrypted Zip
|
75
|
+
# job.input.upload('myfile.csv.zip.enc')
|
76
|
+
#
|
77
|
+
# Example: Explicitly set the streams
|
78
|
+
# job.input.upload('myfile.ze', streams: [:zip, :enc])
|
79
|
+
#
|
80
|
+
# Example: Supply custom options
|
81
|
+
# job.input.upload('myfile.csv.enc', streams: :enc])
|
82
|
+
#
|
83
|
+
# Example: Extract streams from filename but write to a temp file
|
84
|
+
# streams = IOStreams.streams_for_file_name('myfile.gz.enc')
|
85
|
+
# t = Tempfile.new('my_project')
|
86
|
+
# job.input.upload(t.to_path, streams: streams)
|
87
|
+
#
|
88
|
+
# Example: Upload by writing records one at a time to the upload stream
|
89
|
+
# job.upload do |writer|
|
90
|
+
# 10.times { |i| writer << i }
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# Notes:
|
94
|
+
# - By default all data read from the file/stream is converted into UTF-8 before being persisted. This
|
95
|
+
# is recommended since Mongo only supports UTF-8 strings.
|
96
|
+
# - When zip format, the Zip file/stream must contain only one file, the first file found will be
|
97
|
+
# loaded into the job
|
98
|
+
# - If an io stream is supplied, it is read until it returns nil.
|
99
|
+
# - Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
100
|
+
# - Only call from one thread at a time per job instance.
|
101
|
+
# - CSV parsing is slow, so it is left for the workers to do.
|
102
|
+
def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
|
103
|
+
raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
|
104
|
+
|
105
|
+
block ||= -> (io) do
|
106
|
+
iterator = "each_#{stream_mode}".to_sym
|
107
|
+
IOStreams.public_send(iterator, file_name_or_io, encoding: encoding, **args) { |line| io << line }
|
108
|
+
end
|
109
|
+
|
110
|
+
create_indexes
|
111
|
+
Writer::Input.collect(self, on_first: on_first, &block)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Upload the result of a MongoDB query to the input collection for processing
|
115
|
+
# Useful when an entire MongoDB collection, or part thereof needs to be
|
116
|
+
# processed by a job.
|
117
|
+
#
|
118
|
+
# Returns [Integer] the number of records uploaded
|
119
|
+
#
|
120
|
+
# If a Block is supplied it is passed the document returned from the
|
121
|
+
# database and should return a record for processing
|
122
|
+
#
|
123
|
+
# If no Block is supplied then the record will be the :fields returned
|
124
|
+
# from MongoDB
|
125
|
+
#
|
126
|
+
# Note:
|
127
|
+
# This method uses the collection and not the MongoMapper document to
|
128
|
+
# avoid the overhead of constructing a Model with every document returned
|
129
|
+
# by the query
|
130
|
+
#
|
131
|
+
# Note:
|
132
|
+
# The Block must return types that can be serialized to BSON.
|
133
|
+
# Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
|
134
|
+
# Invalid: Date, etc.
|
135
|
+
#
|
136
|
+
# Example: Upload document ids
|
137
|
+
# criteria = User.where(state: 'FL')
|
138
|
+
# job.record_count = job.upload_mongo_query(criteria)
|
139
|
+
#
|
140
|
+
# Example: Upload just the supplied column
|
141
|
+
# criteria = User.where(state: 'FL')
|
142
|
+
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
143
|
+
def upload_mongo_query(criteria, *column_names, &block)
|
144
|
+
create_indexes
|
145
|
+
options = criteria.options
|
146
|
+
|
147
|
+
# Without a block extract the fields from the supplied criteria
|
148
|
+
if block
|
149
|
+
# Criteria is returning old school :fields instead of :projections
|
150
|
+
options[:projection] = options.delete(:fields) if options.key?(:fields)
|
151
|
+
else
|
152
|
+
column_names = column_names.collect(&:to_s)
|
153
|
+
column_names << '_id' if column_names.size.zero?
|
154
|
+
|
155
|
+
fields = options.delete(:fields) || {}
|
156
|
+
column_names.each { |col| fields[col] = 1 }
|
157
|
+
options[:projection] = fields
|
158
|
+
|
159
|
+
block =
|
160
|
+
if column_names.size == 1
|
161
|
+
column = column_names.first
|
162
|
+
->(document) { document[column] }
|
163
|
+
else
|
164
|
+
->(document) { column_names.collect { |c| document[c] } }
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
Writer::Input.collect(self) do |records|
|
169
|
+
# Drop down to the mongo driver level to avoid constructing a Model for each document returned
|
170
|
+
criteria.klass.collection.find(criteria.selector, options).each do |document|
|
171
|
+
records << block.call(document)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# Upload results from an Arel into RocketJob::SlicedJob.
|
177
|
+
#
|
178
|
+
# Params
|
179
|
+
# column_names
|
180
|
+
# When a block is not supplied, supply the names of the columns to be returned
|
181
|
+
# and uploaded into the job
|
182
|
+
# These columns are automatically added to the select list to reduce overhead
|
183
|
+
#
|
184
|
+
# If a Block is supplied it is passed the model returned from the database and should
|
185
|
+
# return the work item to be uploaded into the job.
|
186
|
+
#
|
187
|
+
# Returns [Integer] the number of records uploaded
|
188
|
+
#
|
189
|
+
# Example: Upload id's for all users
|
190
|
+
# arel = User.all
|
191
|
+
# job.record_count = job.upload_arel(arel)
|
192
|
+
#
|
193
|
+
# Example: Upload selected user id's
|
194
|
+
# arel = User.where(country_code: 'US')
|
195
|
+
# job.record_count = job.upload_arel(arel)
|
196
|
+
#
|
197
|
+
# Example: Upload user_name and zip_code
|
198
|
+
# arel = User.where(country_code: 'US')
|
199
|
+
# job.record_count = job.upload_arel(arel, :user_name, :zip_code)
|
200
|
+
def upload_arel(arel, *column_names, &block)
|
201
|
+
create_indexes
|
202
|
+
unless block
|
203
|
+
column_names = column_names.collect(&:to_sym)
|
204
|
+
column_names << :id if column_names.size.zero?
|
205
|
+
|
206
|
+
block =
|
207
|
+
if column_names.size == 1
|
208
|
+
column = column_names.first
|
209
|
+
->(model) { model.send(column) }
|
210
|
+
else
|
211
|
+
->(model) { column_names.collect { |c| model.send(c) } }
|
212
|
+
end
|
213
|
+
# find_each requires the :id column in the query
|
214
|
+
selection = column_names.include?(:id) ? column_names : column_names + [:id]
|
215
|
+
arel = arel.select(selection)
|
216
|
+
end
|
217
|
+
|
218
|
+
Writer::Input.collect(self) do |records|
|
219
|
+
arel.find_each { |model| records << block.call(model) }
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
# Upload sliced range of integer requests as a an arrays of start and end ids
|
224
|
+
#
|
225
|
+
# Returns [Integer] the number of slices uploaded
|
226
|
+
#
|
227
|
+
# Uploads one range per slice so that the response can return multiple records
|
228
|
+
# for each slice processed
|
229
|
+
#
|
230
|
+
# Example
|
231
|
+
# job.slice_size = 100
|
232
|
+
# job.record_count = job.upload_integer_range(200, 421)
|
233
|
+
#
|
234
|
+
# # Equivalent to calling:
|
235
|
+
# job.record_count = job.insert([200,299])
|
236
|
+
# job.record_count += job.insert([300,399])
|
237
|
+
# job.record_count += job.insert([400,421])
|
238
|
+
def upload_integer_range(start_id, last_id)
|
239
|
+
create_indexes
|
240
|
+
count = 0
|
241
|
+
while start_id <= last_id
|
242
|
+
end_id = start_id + slice_size - 1
|
243
|
+
end_id = last_id if end_id > last_id
|
244
|
+
create!(records: [[start_id, end_id]])
|
245
|
+
start_id += slice_size
|
246
|
+
count += 1
|
247
|
+
end
|
248
|
+
count
|
249
|
+
end
|
250
|
+
|
251
|
+
# Upload sliced range of integer requests as an arrays of start and end ids
|
252
|
+
# starting with the last range first
|
253
|
+
#
|
254
|
+
# Returns [Integer] the number of slices uploaded
|
255
|
+
#
|
256
|
+
# Uploads one range per slice so that the response can return multiple records
|
257
|
+
# for each slice processed.
|
258
|
+
# Useful for when the highest order integer values should be processed before
|
259
|
+
# the lower integer value ranges. For example when processing every record
|
260
|
+
# in a database based on the id column
|
261
|
+
#
|
262
|
+
# Example
|
263
|
+
# job.slice_size = 100
|
264
|
+
# job.record_count = job.upload_integer_range_in_reverse_order(200, 421) * job.slice_size
|
265
|
+
#
|
266
|
+
# # Equivalent to calling:
|
267
|
+
# job.insert([400,421])
|
268
|
+
# job.insert([300,399])
|
269
|
+
# job.insert([200,299])
|
270
|
+
def upload_integer_range_in_reverse_order(start_id, last_id)
|
271
|
+
create_indexes
|
272
|
+
end_id = last_id
|
273
|
+
count = 0
|
274
|
+
while end_id >= start_id
|
275
|
+
first_id = end_id - slice_size + 1
|
276
|
+
first_id = start_id if first_id.negative? || (first_id < start_id)
|
277
|
+
create!(records: [[first_id, end_id]])
|
278
|
+
end_id -= slice_size
|
279
|
+
count += 1
|
280
|
+
end
|
281
|
+
count
|
282
|
+
end
|
283
|
+
|
284
|
+
# Iterate over each failed record, if any
|
285
|
+
# Since each slice can only contain 1 failed record, only the failed
|
286
|
+
# record is returned along with the slice containing the exception
|
287
|
+
# details
|
288
|
+
#
|
289
|
+
# Example:
|
290
|
+
# job.each_failed_record do |record, slice|
|
291
|
+
# ap slice
|
292
|
+
# end
|
293
|
+
#
|
294
|
+
def each_failed_record
|
295
|
+
failed.each do |slice|
|
296
|
+
if slice.exception && (record_number = slice.exception.record_number)
|
297
|
+
yield(slice.at(record_number - 1), slice)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Requeue all failed slices
|
303
|
+
def requeue_failed
|
304
|
+
failed.update_all(
|
305
|
+
'$unset' => {worker_name: nil, started_at: nil},
|
306
|
+
'$set' => {state: :queued}
|
307
|
+
)
|
308
|
+
end
|
309
|
+
|
310
|
+
# Requeue all running slices for a server or worker that is no longer available
|
311
|
+
def requeue_running(worker_name)
|
312
|
+
running.where(worker_name: /\A#{worker_name}/).update_all(
|
313
|
+
'$unset' => {worker_name: nil, started_at: nil},
|
314
|
+
'$set' => {state: :queued}
|
315
|
+
)
|
316
|
+
end
|
317
|
+
|
318
|
+
# Returns the next slice to work on in id order
|
319
|
+
# Returns nil if there are currently no queued slices
|
320
|
+
#
|
321
|
+
# If a slice is in queued state it will be started and assigned to this worker
|
322
|
+
def next_slice(worker_name)
|
323
|
+
# TODO: Will it perform faster without the id sort?
|
324
|
+
# I.e. Just process on a FIFO basis?
|
325
|
+
document = all.queued.
|
326
|
+
sort('_id' => 1).
|
327
|
+
find_one_and_update(
|
328
|
+
{'$set' => {worker_name: worker_name, state: :running, started_at: Time.now}},
|
329
|
+
return_document: :after
|
330
|
+
)
|
331
|
+
document.collection_name = collection_name if document
|
332
|
+
document
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|