rocketjob 3.5.2 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +63 -1
- data/bin/rocketjob +1 -0
- data/bin/rocketjob_batch_perf +11 -0
- data/lib/rocket_job/batch.rb +32 -0
- data/lib/rocket_job/batch/callbacks.rb +40 -0
- data/lib/rocket_job/batch/io.rb +154 -0
- data/lib/rocket_job/batch/logger.rb +57 -0
- data/lib/rocket_job/batch/lower_priority.rb +54 -0
- data/lib/rocket_job/batch/model.rb +157 -0
- data/lib/rocket_job/batch/performance.rb +99 -0
- data/lib/rocket_job/batch/result.rb +8 -0
- data/lib/rocket_job/batch/results.rb +9 -0
- data/lib/rocket_job/batch/state_machine.rb +102 -0
- data/lib/rocket_job/batch/statistics.rb +88 -0
- data/lib/rocket_job/batch/tabular.rb +56 -0
- data/lib/rocket_job/batch/tabular/input.rb +123 -0
- data/lib/rocket_job/batch/tabular/output.rb +59 -0
- data/lib/rocket_job/batch/throttle.rb +91 -0
- data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
- data/lib/rocket_job/batch/worker.rb +288 -0
- data/lib/rocket_job/cli.rb +29 -7
- data/lib/rocket_job/config.rb +1 -1
- data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
- data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
- data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
- data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
- data/lib/rocket_job/jobs/performance_job.rb +18 -0
- data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
- data/lib/rocket_job/plugins/document.rb +2 -8
- data/lib/rocket_job/plugins/job/persistence.rb +6 -4
- data/lib/rocket_job/plugins/job/throttle.rb +3 -6
- data/lib/rocket_job/plugins/job/worker.rb +2 -2
- data/lib/rocket_job/server.rb +14 -3
- data/lib/rocket_job/sliced/input.rb +336 -0
- data/lib/rocket_job/sliced/output.rb +99 -0
- data/lib/rocket_job/sliced/slice.rb +166 -0
- data/lib/rocket_job/sliced/slices.rb +166 -0
- data/lib/rocket_job/sliced/writer/input.rb +60 -0
- data/lib/rocket_job/sliced/writer/output.rb +82 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +2 -2
- data/lib/rocketjob.rb +28 -0
- metadata +51 -62
- data/test/config/database.yml +0 -5
- data/test/config/mongoid.yml +0 -88
- data/test/config_test.rb +0 -10
- data/test/dirmon_entry_test.rb +0 -313
- data/test/dirmon_job_test.rb +0 -216
- data/test/files/text.txt +0 -3
- data/test/job_test.rb +0 -71
- data/test/jobs/housekeeping_job_test.rb +0 -102
- data/test/jobs/on_demand_job_test.rb +0 -59
- data/test/jobs/upload_file_job_test.rb +0 -107
- data/test/plugins/cron_test.rb +0 -166
- data/test/plugins/job/callbacks_test.rb +0 -166
- data/test/plugins/job/defaults_test.rb +0 -53
- data/test/plugins/job/logger_test.rb +0 -56
- data/test/plugins/job/model_test.rb +0 -94
- data/test/plugins/job/persistence_test.rb +0 -94
- data/test/plugins/job/state_machine_test.rb +0 -116
- data/test/plugins/job/throttle_test.rb +0 -111
- data/test/plugins/job/worker_test.rb +0 -199
- data/test/plugins/processing_window_test.rb +0 -109
- data/test/plugins/restart_test.rb +0 -193
- data/test/plugins/retry_test.rb +0 -88
- data/test/plugins/singleton_test.rb +0 -92
- data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
- data/test/plugins/state_machine_test.rb +0 -67
- data/test/plugins/transaction_test.rb +0 -84
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +0 -17
@@ -5,19 +5,13 @@ module RocketJob
|
|
5
5
|
# Base class for storing models in MongoDB
|
6
6
|
module Document
|
7
7
|
extend ActiveSupport::Concern
|
8
|
-
include Mongoid::Document
|
8
|
+
include ::Mongoid::Document
|
9
9
|
|
10
10
|
included do
|
11
11
|
store_in client: 'rocketjob'
|
12
12
|
end
|
13
13
|
|
14
14
|
module ClassMethods
|
15
|
-
# V2 Backward compatibility
|
16
|
-
# DEPRECATED
|
17
|
-
def key(name, type, options = {})
|
18
|
-
field(name, options.merge(type: type))
|
19
|
-
end
|
20
|
-
|
21
15
|
# Mongoid does not apply ordering, add sort
|
22
16
|
def first
|
23
17
|
all.sort('_id' => 1).first
|
@@ -35,7 +29,7 @@ module RocketJob
|
|
35
29
|
# Allows other changes to be made on the server that will be loaded.
|
36
30
|
def find_and_update(attrs)
|
37
31
|
doc = collection.find(_id: id).find_one_and_update({'$set' => attrs}, return_document: :after)
|
38
|
-
raise(Mongoid::Errors::DocumentNotFound.new(self.class, id)) unless doc
|
32
|
+
raise(::Mongoid::Errors::DocumentNotFound.new(self.class, id)) unless doc
|
39
33
|
|
40
34
|
# Clear out keys that are not returned during the reload from MongoDB
|
41
35
|
(fields.keys + embedded_relations.keys - doc.keys).each { |key| send("#{key}=", nil) }
|
@@ -32,9 +32,11 @@ module RocketJob
|
|
32
32
|
# job = RocketJob::Job.rocket_job_retrieve('host:pid:worker', filter)
|
33
33
|
def rocket_job_retrieve(worker_name, filter)
|
34
34
|
SemanticLogger.silence(:info) do
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
scheduled = {'$or' => [{run_at: nil}, {:run_at.lte => Time.now}]}
|
36
|
+
working = {'$or' => [{state: :queued}, {state: :running, sub_state: :processing}]}
|
37
|
+
query = self.and(working, scheduled)
|
38
|
+
query = query.where(filter) unless filter.blank?
|
39
|
+
update = {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
|
38
40
|
query.sort(priority: 1, _id: 1).find_one_and_update(update, bypass_document_validation: true)
|
39
41
|
end
|
40
42
|
end
|
@@ -101,7 +103,7 @@ module RocketJob
|
|
101
103
|
return super unless destroy_on_complete
|
102
104
|
begin
|
103
105
|
super
|
104
|
-
rescue Mongoid::Errors::DocumentNotFound
|
106
|
+
rescue ::Mongoid::Errors::DocumentNotFound
|
105
107
|
unless completed?
|
106
108
|
self.state = :completed
|
107
109
|
rocket_job_set_completed_at
|
@@ -58,17 +58,14 @@ module RocketJob
|
|
58
58
|
end
|
59
59
|
|
60
60
|
# Undefine a previously defined throttle
|
61
|
-
def undefine_throttle(
|
62
|
-
rocket_job_throttles.delete_if
|
61
|
+
def undefine_throttle(method_name)
|
62
|
+
rocket_job_throttles.delete_if { |throttle| throttle.method_name == method_name }
|
63
63
|
end
|
64
64
|
|
65
65
|
# Has a throttle been defined?
|
66
66
|
def throttle?(method_name)
|
67
|
-
rocket_job_throttles.
|
67
|
+
rocket_job_throttles.any? { |throttle| throttle.method_name == method_name }
|
68
68
|
end
|
69
|
-
|
70
|
-
# DEPRECATED
|
71
|
-
alias has_throttle? throttle?
|
72
69
|
end
|
73
70
|
|
74
71
|
# Default throttle to use when the throttle is exceeded.
|
@@ -51,7 +51,7 @@ module RocketJob
|
|
51
51
|
else
|
52
52
|
job.worker_name = worker_name
|
53
53
|
job.rocket_job_fail_on_exception!(worker_name) do
|
54
|
-
|
54
|
+
job.start!
|
55
55
|
end
|
56
56
|
return job if job.running?
|
57
57
|
end
|
@@ -105,7 +105,7 @@ module RocketJob
|
|
105
105
|
#
|
106
106
|
# Exceptions are _not_ suppressed and should be handled by the caller.
|
107
107
|
def perform_now
|
108
|
-
raise(Mongoid::Errors::Validations, self) unless valid?
|
108
|
+
raise(::Mongoid::Errors::Validations, self) unless valid?
|
109
109
|
|
110
110
|
worker = RocketJob::Worker.new(inline: true)
|
111
111
|
start if may_start?
|
data/lib/rocket_job/server.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'yaml'
|
1
2
|
require 'concurrent'
|
2
3
|
module RocketJob
|
3
4
|
# Server
|
@@ -45,7 +46,7 @@ module RocketJob
|
|
45
46
|
field :started_at, type: Time
|
46
47
|
|
47
48
|
# Filter to apply to control which job classes this server can process
|
48
|
-
field :
|
49
|
+
field :yaml_filter, type: String
|
49
50
|
|
50
51
|
# The heartbeat information for this server
|
51
52
|
embeds_one :heartbeat, class_name: 'RocketJob::Heartbeat'
|
@@ -188,7 +189,7 @@ module RocketJob
|
|
188
189
|
def self.run(attrs = {})
|
189
190
|
Thread.current.name = 'rocketjob main'
|
190
191
|
# Create Indexes on server startup
|
191
|
-
Mongoid::Tasks::Database.create_indexes
|
192
|
+
::Mongoid::Tasks::Database.create_indexes
|
192
193
|
register_signal_handlers
|
193
194
|
|
194
195
|
server = create!(attrs)
|
@@ -228,6 +229,15 @@ module RocketJob
|
|
228
229
|
(Time.now - heartbeat.updated_at) >= dead_seconds
|
229
230
|
end
|
230
231
|
|
232
|
+
# Where clause filter to apply to workers looking for jobs
|
233
|
+
def filter
|
234
|
+
YAML.load(yaml_filter) if yaml_filter
|
235
|
+
end
|
236
|
+
|
237
|
+
def filter=(hash)
|
238
|
+
self.yaml_filter = hash.nil? ? nil : hash.to_yaml
|
239
|
+
end
|
240
|
+
|
231
241
|
private
|
232
242
|
|
233
243
|
# Returns [Array<Worker>] collection of workers
|
@@ -238,6 +248,7 @@ module RocketJob
|
|
238
248
|
# Management Thread
|
239
249
|
def run
|
240
250
|
logger.info "Using MongoDB Database: #{RocketJob::Job.collection.database.name}"
|
251
|
+
logger.info('Running with filter', filter) if filter
|
241
252
|
build_heartbeat(updated_at: Time.now, workers: 0)
|
242
253
|
started!
|
243
254
|
logger.info 'Rocket Job Server started'
|
@@ -262,7 +273,7 @@ module RocketJob
|
|
262
273
|
end
|
263
274
|
|
264
275
|
logger.info 'Shutdown'
|
265
|
-
rescue Mongoid::Errors::DocumentNotFound
|
276
|
+
rescue ::Mongoid::Errors::DocumentNotFound
|
266
277
|
logger.warn('Server has been destroyed. Going down hard!')
|
267
278
|
rescue Exception => exc
|
268
279
|
logger.error('RocketJob::Server is stopping due to an exception', exc)
|
@@ -0,0 +1,336 @@
|
|
1
|
+
module RocketJob
|
2
|
+
module Sliced
|
3
|
+
class Input < Slices
|
4
|
+
# Load lines for processing from the supplied filename or stream into this job.
|
5
|
+
#
|
6
|
+
# Returns [Integer] the number of lines loaded into this collection
|
7
|
+
#
|
8
|
+
# Parameters
|
9
|
+
# file_name_or_io [String | IO]
|
10
|
+
# Full path and file name to stream into the job,
|
11
|
+
# Or, an IO Stream that responds to: :read
|
12
|
+
#
|
13
|
+
# streams [Symbol|Array]
|
14
|
+
# Streams to convert the data whilst it is being read.
|
15
|
+
# When nil, the file_name extensions will be inspected to determine what
|
16
|
+
# streams should be applied.
|
17
|
+
# Default: nil
|
18
|
+
#
|
19
|
+
# delimiter[String]
|
20
|
+
# Line / Record delimiter to use to break the stream up into records
|
21
|
+
# Any string to break the stream up by
|
22
|
+
# The records when saved will not include this delimiter
|
23
|
+
# Default: nil
|
24
|
+
# Automatically detect line endings and break up by line
|
25
|
+
# Searches for the first "\r\n" or "\n" and then uses that as the
|
26
|
+
# delimiter for all subsequent records
|
27
|
+
#
|
28
|
+
# buffer_size [Integer]
|
29
|
+
# Size of the blocks when reading from the input file / stream.
|
30
|
+
# Default: 65536 ( 64K )
|
31
|
+
#
|
32
|
+
# encoding: [String|Encoding]
|
33
|
+
# Encode returned data with this encoding.
|
34
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
35
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
36
|
+
# 'UTF-8': UTF-8 Format
|
37
|
+
# Etc.
|
38
|
+
# Default: 'UTF-8'
|
39
|
+
#
|
40
|
+
# encode_replace: [String]
|
41
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
42
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
43
|
+
# Default: nil
|
44
|
+
#
|
45
|
+
# encode_cleaner: [nil|symbol|Proc]
|
46
|
+
# Cleanse data read from the input stream.
|
47
|
+
# nil: No cleansing
|
48
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
49
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
50
|
+
# Default: :printable
|
51
|
+
#
|
52
|
+
# stream_mode: [:line | :row | :record]
|
53
|
+
# :line
|
54
|
+
# Uploads the file a line (String) at a time for processing by workers.
|
55
|
+
# :row
|
56
|
+
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
57
|
+
# :record
|
58
|
+
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
59
|
+
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
60
|
+
#
|
61
|
+
# Example:
|
62
|
+
# # Load plain text records from a file
|
63
|
+
# job.input.upload('hello.csv')
|
64
|
+
#
|
65
|
+
# Example:
|
66
|
+
# # Load plain text records from a file, stripping all non-printable characters,
|
67
|
+
# # as well as any characters that cannot be converted to UTF-8
|
68
|
+
# job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
|
69
|
+
#
|
70
|
+
# Example: Zip
|
71
|
+
# # Since csv is not known to RocketJob it is ignored
|
72
|
+
# job.input.upload('myfile.csv.zip')
|
73
|
+
#
|
74
|
+
# Example: Encrypted Zip
|
75
|
+
# job.input.upload('myfile.csv.zip.enc')
|
76
|
+
#
|
77
|
+
# Example: Explicitly set the streams
|
78
|
+
# job.input.upload('myfile.ze', streams: [:zip, :enc])
|
79
|
+
#
|
80
|
+
# Example: Supply custom options
|
81
|
+
# job.input.upload('myfile.csv.enc', streams: :enc])
|
82
|
+
#
|
83
|
+
# Example: Extract streams from filename but write to a temp file
|
84
|
+
# streams = IOStreams.streams_for_file_name('myfile.gz.enc')
|
85
|
+
# t = Tempfile.new('my_project')
|
86
|
+
# job.input.upload(t.to_path, streams: streams)
|
87
|
+
#
|
88
|
+
# Example: Upload by writing records one at a time to the upload stream
|
89
|
+
# job.upload do |writer|
|
90
|
+
# 10.times { |i| writer << i }
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# Notes:
|
94
|
+
# - By default all data read from the file/stream is converted into UTF-8 before being persisted. This
|
95
|
+
# is recommended since Mongo only supports UTF-8 strings.
|
96
|
+
# - When zip format, the Zip file/stream must contain only one file, the first file found will be
|
97
|
+
# loaded into the job
|
98
|
+
# - If an io stream is supplied, it is read until it returns nil.
|
99
|
+
# - Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
100
|
+
# - Only call from one thread at a time per job instance.
|
101
|
+
# - CSV parsing is slow, so it is left for the workers to do.
|
102
|
+
def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
|
103
|
+
raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
|
104
|
+
|
105
|
+
block ||= -> (io) do
|
106
|
+
iterator = "each_#{stream_mode}".to_sym
|
107
|
+
IOStreams.public_send(iterator, file_name_or_io, encoding: encoding, **args) { |line| io << line }
|
108
|
+
end
|
109
|
+
|
110
|
+
create_indexes
|
111
|
+
Writer::Input.collect(self, on_first: on_first, &block)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Upload the result of a MongoDB query to the input collection for processing
|
115
|
+
# Useful when an entire MongoDB collection, or part thereof needs to be
|
116
|
+
# processed by a job.
|
117
|
+
#
|
118
|
+
# Returns [Integer] the number of records uploaded
|
119
|
+
#
|
120
|
+
# If a Block is supplied it is passed the document returned from the
|
121
|
+
# database and should return a record for processing
|
122
|
+
#
|
123
|
+
# If no Block is supplied then the record will be the :fields returned
|
124
|
+
# from MongoDB
|
125
|
+
#
|
126
|
+
# Note:
|
127
|
+
# This method uses the collection and not the MongoMapper document to
|
128
|
+
# avoid the overhead of constructing a Model with every document returned
|
129
|
+
# by the query
|
130
|
+
#
|
131
|
+
# Note:
|
132
|
+
# The Block must return types that can be serialized to BSON.
|
133
|
+
# Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
|
134
|
+
# Invalid: Date, etc.
|
135
|
+
#
|
136
|
+
# Example: Upload document ids
|
137
|
+
# criteria = User.where(state: 'FL')
|
138
|
+
# job.record_count = job.upload_mongo_query(criteria)
|
139
|
+
#
|
140
|
+
# Example: Upload just the supplied column
|
141
|
+
# criteria = User.where(state: 'FL')
|
142
|
+
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
143
|
+
def upload_mongo_query(criteria, *column_names, &block)
|
144
|
+
create_indexes
|
145
|
+
options = criteria.options
|
146
|
+
|
147
|
+
# Without a block extract the fields from the supplied criteria
|
148
|
+
if block
|
149
|
+
# Criteria is returning old school :fields instead of :projections
|
150
|
+
options[:projection] = options.delete(:fields) if options.key?(:fields)
|
151
|
+
else
|
152
|
+
column_names = column_names.collect(&:to_s)
|
153
|
+
column_names << '_id' if column_names.size.zero?
|
154
|
+
|
155
|
+
fields = options.delete(:fields) || {}
|
156
|
+
column_names.each { |col| fields[col] = 1 }
|
157
|
+
options[:projection] = fields
|
158
|
+
|
159
|
+
block =
|
160
|
+
if column_names.size == 1
|
161
|
+
column = column_names.first
|
162
|
+
->(document) { document[column] }
|
163
|
+
else
|
164
|
+
->(document) { column_names.collect { |c| document[c] } }
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
Writer::Input.collect(self) do |records|
|
169
|
+
# Drop down to the mongo driver level to avoid constructing a Model for each document returned
|
170
|
+
criteria.klass.collection.find(criteria.selector, options).each do |document|
|
171
|
+
records << block.call(document)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# Upload results from an Arel into RocketJob::SlicedJob.
|
177
|
+
#
|
178
|
+
# Params
|
179
|
+
# column_names
|
180
|
+
# When a block is not supplied, supply the names of the columns to be returned
|
181
|
+
# and uploaded into the job
|
182
|
+
# These columns are automatically added to the select list to reduce overhead
|
183
|
+
#
|
184
|
+
# If a Block is supplied it is passed the model returned from the database and should
|
185
|
+
# return the work item to be uploaded into the job.
|
186
|
+
#
|
187
|
+
# Returns [Integer] the number of records uploaded
|
188
|
+
#
|
189
|
+
# Example: Upload id's for all users
|
190
|
+
# arel = User.all
|
191
|
+
# job.record_count = job.upload_arel(arel)
|
192
|
+
#
|
193
|
+
# Example: Upload selected user id's
|
194
|
+
# arel = User.where(country_code: 'US')
|
195
|
+
# job.record_count = job.upload_arel(arel)
|
196
|
+
#
|
197
|
+
# Example: Upload user_name and zip_code
|
198
|
+
# arel = User.where(country_code: 'US')
|
199
|
+
# job.record_count = job.upload_arel(arel, :user_name, :zip_code)
|
200
|
+
def upload_arel(arel, *column_names, &block)
|
201
|
+
create_indexes
|
202
|
+
unless block
|
203
|
+
column_names = column_names.collect(&:to_sym)
|
204
|
+
column_names << :id if column_names.size.zero?
|
205
|
+
|
206
|
+
block =
|
207
|
+
if column_names.size == 1
|
208
|
+
column = column_names.first
|
209
|
+
->(model) { model.send(column) }
|
210
|
+
else
|
211
|
+
->(model) { column_names.collect { |c| model.send(c) } }
|
212
|
+
end
|
213
|
+
# find_each requires the :id column in the query
|
214
|
+
selection = column_names.include?(:id) ? column_names : column_names + [:id]
|
215
|
+
arel = arel.select(selection)
|
216
|
+
end
|
217
|
+
|
218
|
+
Writer::Input.collect(self) do |records|
|
219
|
+
arel.find_each { |model| records << block.call(model) }
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
# Upload sliced range of integer requests as a an arrays of start and end ids
|
224
|
+
#
|
225
|
+
# Returns [Integer] the number of slices uploaded
|
226
|
+
#
|
227
|
+
# Uploads one range per slice so that the response can return multiple records
|
228
|
+
# for each slice processed
|
229
|
+
#
|
230
|
+
# Example
|
231
|
+
# job.slice_size = 100
|
232
|
+
# job.record_count = job.upload_integer_range(200, 421)
|
233
|
+
#
|
234
|
+
# # Equivalent to calling:
|
235
|
+
# job.record_count = job.insert([200,299])
|
236
|
+
# job.record_count += job.insert([300,399])
|
237
|
+
# job.record_count += job.insert([400,421])
|
238
|
+
def upload_integer_range(start_id, last_id)
|
239
|
+
create_indexes
|
240
|
+
count = 0
|
241
|
+
while start_id <= last_id
|
242
|
+
end_id = start_id + slice_size - 1
|
243
|
+
end_id = last_id if end_id > last_id
|
244
|
+
create!(records: [[start_id, end_id]])
|
245
|
+
start_id += slice_size
|
246
|
+
count += 1
|
247
|
+
end
|
248
|
+
count
|
249
|
+
end
|
250
|
+
|
251
|
+
# Upload sliced range of integer requests as an arrays of start and end ids
|
252
|
+
# starting with the last range first
|
253
|
+
#
|
254
|
+
# Returns [Integer] the number of slices uploaded
|
255
|
+
#
|
256
|
+
# Uploads one range per slice so that the response can return multiple records
|
257
|
+
# for each slice processed.
|
258
|
+
# Useful for when the highest order integer values should be processed before
|
259
|
+
# the lower integer value ranges. For example when processing every record
|
260
|
+
# in a database based on the id column
|
261
|
+
#
|
262
|
+
# Example
|
263
|
+
# job.slice_size = 100
|
264
|
+
# job.record_count = job.upload_integer_range_in_reverse_order(200, 421) * job.slice_size
|
265
|
+
#
|
266
|
+
# # Equivalent to calling:
|
267
|
+
# job.insert([400,421])
|
268
|
+
# job.insert([300,399])
|
269
|
+
# job.insert([200,299])
|
270
|
+
def upload_integer_range_in_reverse_order(start_id, last_id)
|
271
|
+
create_indexes
|
272
|
+
end_id = last_id
|
273
|
+
count = 0
|
274
|
+
while end_id >= start_id
|
275
|
+
first_id = end_id - slice_size + 1
|
276
|
+
first_id = start_id if first_id.negative? || (first_id < start_id)
|
277
|
+
create!(records: [[first_id, end_id]])
|
278
|
+
end_id -= slice_size
|
279
|
+
count += 1
|
280
|
+
end
|
281
|
+
count
|
282
|
+
end
|
283
|
+
|
284
|
+
# Iterate over each failed record, if any
|
285
|
+
# Since each slice can only contain 1 failed record, only the failed
|
286
|
+
# record is returned along with the slice containing the exception
|
287
|
+
# details
|
288
|
+
#
|
289
|
+
# Example:
|
290
|
+
# job.each_failed_record do |record, slice|
|
291
|
+
# ap slice
|
292
|
+
# end
|
293
|
+
#
|
294
|
+
def each_failed_record
|
295
|
+
failed.each do |slice|
|
296
|
+
if slice.exception && (record_number = slice.exception.record_number)
|
297
|
+
yield(slice.at(record_number - 1), slice)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Requeue all failed slices
|
303
|
+
def requeue_failed
|
304
|
+
failed.update_all(
|
305
|
+
'$unset' => {worker_name: nil, started_at: nil},
|
306
|
+
'$set' => {state: :queued}
|
307
|
+
)
|
308
|
+
end
|
309
|
+
|
310
|
+
# Requeue all running slices for a server or worker that is no longer available
|
311
|
+
def requeue_running(worker_name)
|
312
|
+
running.where(worker_name: /\A#{worker_name}/).update_all(
|
313
|
+
'$unset' => {worker_name: nil, started_at: nil},
|
314
|
+
'$set' => {state: :queued}
|
315
|
+
)
|
316
|
+
end
|
317
|
+
|
318
|
+
# Returns the next slice to work on in id order
|
319
|
+
# Returns nil if there are currently no queued slices
|
320
|
+
#
|
321
|
+
# If a slice is in queued state it will be started and assigned to this worker
|
322
|
+
def next_slice(worker_name)
|
323
|
+
# TODO: Will it perform faster without the id sort?
|
324
|
+
# I.e. Just process on a FIFO basis?
|
325
|
+
document = all.queued.
|
326
|
+
sort('_id' => 1).
|
327
|
+
find_one_and_update(
|
328
|
+
{'$set' => {worker_name: worker_name, state: :running, started_at: Time.now}},
|
329
|
+
return_document: :after
|
330
|
+
)
|
331
|
+
document.collection_name = collection_name if document
|
332
|
+
document
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|