rocketjob 5.4.1 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +175 -5
- data/bin/rocketjob_batch_perf +1 -1
- data/bin/rocketjob_perf +1 -1
- data/lib/rocket_job/batch/categories.rb +345 -0
- data/lib/rocket_job/batch/io.rb +174 -106
- data/lib/rocket_job/batch/model.rb +20 -68
- data/lib/rocket_job/batch/performance.rb +19 -7
- data/lib/rocket_job/batch/statistics.rb +34 -12
- data/lib/rocket_job/batch/throttle_running_workers.rb +2 -6
- data/lib/rocket_job/batch/worker.rb +31 -26
- data/lib/rocket_job/batch.rb +3 -1
- data/lib/rocket_job/category/base.rb +81 -0
- data/lib/rocket_job/category/input.rb +170 -0
- data/lib/rocket_job/category/output.rb +34 -0
- data/lib/rocket_job/cli.rb +25 -17
- data/lib/rocket_job/dirmon_entry.rb +23 -13
- data/lib/rocket_job/event.rb +1 -1
- data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
- data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
- data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
- data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
- data/lib/rocket_job/jobs/conversion_job.rb +43 -0
- data/lib/rocket_job/jobs/dirmon_job.rb +25 -36
- data/lib/rocket_job/jobs/housekeeping_job.rb +11 -12
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +24 -11
- data/lib/rocket_job/jobs/on_demand_job.rb +3 -4
- data/lib/rocket_job/jobs/performance_job.rb +3 -1
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
- data/lib/rocket_job/jobs/upload_file_job.rb +48 -8
- data/lib/rocket_job/lookup_collection.rb +69 -0
- data/lib/rocket_job/plugins/cron.rb +60 -20
- data/lib/rocket_job/plugins/job/model.rb +25 -50
- data/lib/rocket_job/plugins/job/persistence.rb +36 -0
- data/lib/rocket_job/plugins/job/throttle.rb +2 -2
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/job/worker.rb +2 -7
- data/lib/rocket_job/plugins/restart.rb +3 -103
- data/lib/rocket_job/plugins/state_machine.rb +4 -3
- data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +37 -0
- data/lib/rocket_job/ractor_worker.rb +42 -0
- data/lib/rocket_job/server/model.rb +1 -1
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
- data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
- data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
- data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
- data/lib/rocket_job/sliced/input.rb +42 -54
- data/lib/rocket_job/sliced/slice.rb +12 -16
- data/lib/rocket_job/sliced/slices.rb +26 -11
- data/lib/rocket_job/sliced/writer/input.rb +46 -18
- data/lib/rocket_job/sliced/writer/output.rb +33 -45
- data/lib/rocket_job/sliced.rb +1 -74
- data/lib/rocket_job/subscribers/server.rb +1 -1
- data/lib/rocket_job/thread_worker.rb +46 -0
- data/lib/rocket_job/throttle_definitions.rb +7 -1
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +21 -55
- data/lib/rocket_job/worker_pool.rb +5 -7
- data/lib/rocketjob.rb +53 -43
- metadata +36 -28
- data/lib/rocket_job/batch/tabular/input.rb +0 -131
- data/lib/rocket_job/batch/tabular/output.rb +0 -65
- data/lib/rocket_job/batch/tabular.rb +0 -56
- data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
- data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -9,34 +9,66 @@ module RocketJob
|
|
9
9
|
# Returns [RocketJob::Sliced::Input] input collection for holding input slices
|
10
10
|
#
|
11
11
|
# Parameters:
|
12
|
-
# category [Symbol]
|
13
|
-
# The name of the category to access or upload data into
|
12
|
+
# category [Symbol|RocketJob::Category::Input]
|
13
|
+
# The category or the name of the category to access or upload data into
|
14
14
|
# Default: None ( Uses the single default input collection for this job )
|
15
15
|
# Validates: This value must be one of those listed in #input_categories
|
16
16
|
def input(category = :main)
|
17
|
-
|
18
|
-
raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
|
19
|
-
end
|
17
|
+
category = input_category(category)
|
20
18
|
|
21
|
-
(@inputs ||= {})[category] ||=
|
19
|
+
(@inputs ||= {})[category.name] ||= category.data_store(self)
|
22
20
|
end
|
23
21
|
|
24
22
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
25
23
|
# Returns nil if no output is being collected
|
26
24
|
#
|
27
25
|
# Parameters:
|
28
|
-
# category [Symbol]
|
29
|
-
# The name of the category to access or download data from
|
26
|
+
# category [Symbol|RocketJob::Category::Input]
|
27
|
+
# The category or the name of the category to access or download data from
|
30
28
|
# Default: None ( Uses the single default output collection for this job )
|
31
29
|
# Validates: This value must be one of those listed in #output_categories
|
32
30
|
def output(category = :main)
|
33
|
-
|
34
|
-
raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
|
35
|
-
end
|
31
|
+
category = output_category(category)
|
36
32
|
|
37
|
-
(@outputs ||= {})[category] ||=
|
33
|
+
(@outputs ||= {})[category.name] ||= category.data_store(self)
|
38
34
|
end
|
39
35
|
|
36
|
+
# Rapidly upload individual records in batches.
|
37
|
+
#
|
38
|
+
# Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
|
39
|
+
# for each and every row.
|
40
|
+
#
|
41
|
+
# input_category(:my_lookup).find(id: 123).first
|
42
|
+
#
|
43
|
+
# Lookup collection.
|
44
|
+
#
|
45
|
+
# Upload side / secondary lookup tables that can be accessed during job processing.
|
46
|
+
#
|
47
|
+
# Example:
|
48
|
+
# lookup_collection(:my_lookup).upload do |io|
|
49
|
+
# io << {id: 123, data: "first record"}
|
50
|
+
# io << {id: 124, data: "second record"}
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# Parameters:
|
54
|
+
# category [Symbol|RocketJob::Category::Input]
|
55
|
+
# The category or the name of the category to access or download data from
|
56
|
+
# Default: None ( Uses the single default output collection for this job )
|
57
|
+
# Validates: This value must be one of those listed in #input_categories
|
58
|
+
# def lookup_collection(category = :main)
|
59
|
+
# category = input_category(category) unless category.is_a?(Category::Input)
|
60
|
+
#
|
61
|
+
# collection = (@lookup_collections ||= {})[category.name]
|
62
|
+
#
|
63
|
+
# unless collection
|
64
|
+
# collection_name = "rocket_job.inputs.#{id}"
|
65
|
+
# collection_name << ".#{category.name}" unless category.name == :main
|
66
|
+
#
|
67
|
+
# @lookup_collections[category.name] ||=
|
68
|
+
# LookupCollection.new(Sliced::Slice.collection.database, collection_name)
|
69
|
+
# end
|
70
|
+
# end
|
71
|
+
|
40
72
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
41
73
|
#
|
42
74
|
# Returns [Integer] the number of records uploaded.
|
@@ -65,6 +97,11 @@ module RocketJob
|
|
65
97
|
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
66
98
|
# See IOStreams::Stream#each.
|
67
99
|
#
|
100
|
+
# category [Symbol|RocketJob::Category::Input]
|
101
|
+
# The category or the name of the category to access or download data from
|
102
|
+
# Default: None ( Uses the single default output collection for this job )
|
103
|
+
# Validates: This value must be one of those listed in #input_categories
|
104
|
+
#
|
68
105
|
# Example:
|
69
106
|
# # Load plain text records from a file
|
70
107
|
# job.upload('hello.csv')
|
@@ -113,29 +150,7 @@ module RocketJob
|
|
113
150
|
# * If an io stream is supplied, it is read until it returns nil.
|
114
151
|
# * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
115
152
|
# * CSV parsing is slow, so it is usually left for the workers to do.
|
116
|
-
|
117
|
-
raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
|
118
|
-
|
119
|
-
stream_mode = stream_mode.to_sym
|
120
|
-
# Backward compatibility with existing v4 jobs
|
121
|
-
stream_mode = :array if stream_mode == :row
|
122
|
-
stream_mode = :hash if stream_mode == :record
|
123
|
-
|
124
|
-
count =
|
125
|
-
if block
|
126
|
-
input(category).upload(on_first: on_first, &block)
|
127
|
-
else
|
128
|
-
path = IOStreams.new(stream)
|
129
|
-
path.file_name = file_name if file_name
|
130
|
-
self.upload_file_name = path.file_name
|
131
|
-
input(category).upload(on_first: on_first) do |io|
|
132
|
-
path.each(stream_mode, **args) { |line| io << line }
|
133
|
-
end
|
134
|
-
end
|
135
|
-
self.record_count = (record_count || 0) + count
|
136
|
-
count
|
137
|
-
end
|
138
|
-
|
153
|
+
#
|
139
154
|
# Upload results from an Arel into RocketJob::SlicedJob.
|
140
155
|
#
|
141
156
|
# Params
|
@@ -144,6 +159,9 @@ module RocketJob
|
|
144
159
|
# and uploaded into the job
|
145
160
|
# These columns are automatically added to the select list to reduce overhead
|
146
161
|
#
|
162
|
+
# category [Symbol|RocketJob::Category::Input]
|
163
|
+
# The category or the name of the category to upload to.
|
164
|
+
#
|
147
165
|
# If a Block is supplied it is passed the model returned from the database and should
|
148
166
|
# return the work item to be uploaded into the job.
|
149
167
|
#
|
@@ -159,18 +177,13 @@ module RocketJob
|
|
159
177
|
#
|
160
178
|
# Example: Upload user_name and zip_code
|
161
179
|
# arel = User.where(country_code: 'US')
|
162
|
-
# job.upload_arel(arel, :user_name, :zip_code)
|
180
|
+
# job.upload_arel(arel, columns: [:user_name, :zip_code])
|
163
181
|
#
|
164
182
|
# Notes:
|
165
183
|
# * Only call from one thread at a time against a single instance of this job.
|
166
184
|
# * The record_count for the job is set to the number of records returned by the arel.
|
167
185
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
168
186
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
169
|
-
def upload_arel(arel, *column_names, category: :main, &block)
|
170
|
-
count = input(category).upload_arel(arel, *column_names, &block)
|
171
|
-
self.record_count = (record_count || 0) + count
|
172
|
-
count
|
173
|
-
end
|
174
187
|
|
175
188
|
# Upload the result of a MongoDB query to the input collection for processing
|
176
189
|
# Useful when an entire MongoDB collection, or part thereof needs to be
|
@@ -198,30 +211,25 @@ module RocketJob
|
|
198
211
|
# criteria = User.where(state: 'FL')
|
199
212
|
# job.record_count = job.upload_mongo_query(criteria)
|
200
213
|
#
|
201
|
-
# Example: Upload
|
214
|
+
# Example: Upload only the specified column(s)
|
202
215
|
# criteria = User.where(state: 'FL')
|
203
|
-
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
216
|
+
# job.record_count = job.upload_mongo_query(criteria, columns: [:zip_code])
|
204
217
|
#
|
205
218
|
# Notes:
|
206
219
|
# * Only call from one thread at a time against a single instance of this job.
|
207
220
|
# * The record_count for the job is set to the number of records returned by the monqo query.
|
208
221
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
209
222
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
210
|
-
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
211
|
-
count = input(category).upload_mongo_query(criteria, *column_names, &block)
|
212
|
-
self.record_count = (record_count || 0) + count
|
213
|
-
count
|
214
|
-
end
|
215
223
|
|
216
224
|
# Upload sliced range of integer requests as arrays of start and end ids.
|
217
225
|
#
|
218
|
-
# Returns [Integer]
|
226
|
+
# Returns [Integer] the number of slices uploaded.
|
219
227
|
#
|
220
228
|
# Uploads one range per slice so that the response can return multiple records
|
221
229
|
# for each slice processed
|
222
230
|
#
|
223
231
|
# Example
|
224
|
-
# job.slice_size = 100
|
232
|
+
# job.input_category.slice_size = 100
|
225
233
|
# job.upload_integer_range(200, 421)
|
226
234
|
#
|
227
235
|
# # Equivalent to calling:
|
@@ -234,17 +242,11 @@ module RocketJob
|
|
234
242
|
# * The record_count for the job is set to: last_id - start_id + 1.
|
235
243
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
236
244
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
237
|
-
def upload_integer_range(start_id, last_id, category: :main)
|
238
|
-
input(category).upload_integer_range(start_id, last_id)
|
239
|
-
count = last_id - start_id + 1
|
240
|
-
self.record_count = (record_count || 0) + count
|
241
|
-
count
|
242
|
-
end
|
243
245
|
|
244
246
|
# Upload sliced range of integer requests as an arrays of start and end ids
|
245
247
|
# starting with the last range first
|
246
248
|
#
|
247
|
-
# Returns [Integer]
|
249
|
+
# Returns [Integer] the number of slices uploaded.
|
248
250
|
#
|
249
251
|
# Uploads one range per slice so that the response can return multiple records
|
250
252
|
# for each slice processed.
|
@@ -253,7 +255,7 @@ module RocketJob
|
|
253
255
|
# in a database based on the id column
|
254
256
|
#
|
255
257
|
# Example
|
256
|
-
# job.slice_size = 100
|
258
|
+
# job.input_category.slice_size = 100
|
257
259
|
# job.upload_integer_range_in_reverse_order(200, 421)
|
258
260
|
#
|
259
261
|
# # Equivalent to calling:
|
@@ -266,14 +268,102 @@ module RocketJob
|
|
266
268
|
# * The record_count for the job is set to: last_id - start_id + 1.
|
267
269
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
268
270
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
269
|
-
|
270
|
-
|
271
|
-
|
271
|
+
|
272
|
+
def upload(object = nil, category: :main, file_name: nil, stream_mode: nil, on_first: nil, columns: nil, slice_batch_size: nil, **args, &block)
|
273
|
+
input_collection = input(category)
|
274
|
+
|
275
|
+
if block
|
276
|
+
raise(ArgumentError, "Cannot supply both an object to upload, and a block.") if object
|
277
|
+
if stream_mode || columns || slice_batch_size || args.size > 0
|
278
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a block. Only accepts :category, :file_name, or :on_first")
|
279
|
+
end
|
280
|
+
|
281
|
+
category = input_category(category)
|
282
|
+
category.file_name = file_name if file_name
|
283
|
+
|
284
|
+
# Extract the header line during the upload when applicable.
|
285
|
+
extract_header = category.extract_header_callback(on_first)
|
286
|
+
|
287
|
+
count = input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size, &block)
|
288
|
+
self.record_count = (record_count || 0) + count
|
289
|
+
return count
|
290
|
+
end
|
291
|
+
|
292
|
+
count =
|
293
|
+
case object
|
294
|
+
when Range
|
295
|
+
if file_name || stream_mode || on_first || args.size > 0
|
296
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a Range. Only accepts :category, :columns, or :slice_batch_size")
|
297
|
+
end
|
298
|
+
|
299
|
+
first = object.first
|
300
|
+
last = object.last
|
301
|
+
if first < last
|
302
|
+
input_collection.upload_integer_range(first, last, slice_batch_size: slice_batch_size || 1_000)
|
303
|
+
else
|
304
|
+
input_collection.upload_integer_range_in_reverse_order(last, first, slice_batch_size: slice_batch_size || 1_000)
|
305
|
+
end
|
306
|
+
when Mongoid::Criteria
|
307
|
+
if file_name || stream_mode || on_first || args.size > 0
|
308
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a Mongoid::Criteria. Only accepts :category, :columns, or :slice_batch_size")
|
309
|
+
end
|
310
|
+
|
311
|
+
input_collection.upload_mongo_query(object, columns: columns, slice_batch_size: slice_batch_size, &block)
|
312
|
+
when defined?(ActiveRecord::Relation) ? ActiveRecord::Relation : false
|
313
|
+
if file_name || stream_mode || on_first || args.size > 0
|
314
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading an ActiveRecord::Relation. Only accepts :category, :columns, or :slice_batch_size")
|
315
|
+
end
|
316
|
+
|
317
|
+
input_collection.upload_arel(object, columns: columns, slice_batch_size: slice_batch_size, &block)
|
318
|
+
|
319
|
+
else
|
320
|
+
raise(ArgumentError, "Unknown keyword argument :columns when uploading a file") if columns
|
321
|
+
|
322
|
+
category = input_category(category)
|
323
|
+
|
324
|
+
# Extract the header line during the upload when applicable.
|
325
|
+
extract_header = category.extract_header_callback(on_first)
|
326
|
+
path = category.upload_path(object, original_file_name: file_name)
|
327
|
+
|
328
|
+
input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size) do |io|
|
329
|
+
path.each(stream_mode || :line, **args) { |line| io << line }
|
330
|
+
end
|
331
|
+
|
332
|
+
end
|
333
|
+
|
334
|
+
self.record_count = (record_count || 0) + count
|
335
|
+
count
|
336
|
+
end
|
337
|
+
|
338
|
+
# @deprecated
|
339
|
+
def upload_arel(arel, *column_names, category: :main, &block)
|
340
|
+
count = input(category).upload_arel(arel, columns: column_names, &block)
|
341
|
+
self.record_count = (record_count || 0) + count
|
342
|
+
count
|
343
|
+
end
|
344
|
+
|
345
|
+
# @deprecated
|
346
|
+
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
347
|
+
count = input(category).upload_mongo_query(criteria, columns: column_names, &block)
|
272
348
|
self.record_count = (record_count || 0) + count
|
273
349
|
count
|
274
350
|
end
|
275
351
|
|
276
|
-
#
|
352
|
+
# @deprecated
|
353
|
+
def upload_integer_range(start_id, last_id, category: :main, slice_batch_size: 1_000)
|
354
|
+
count = input(category).upload_integer_range(start_id, last_id, slice_batch_size: slice_batch_size)
|
355
|
+
self.record_count = (record_count || 0) + count
|
356
|
+
count
|
357
|
+
end
|
358
|
+
|
359
|
+
# @deprecated
|
360
|
+
def upload_integer_range_in_reverse_order(start_id, last_id, category: :main, slice_batch_size: 1_000)
|
361
|
+
count = input(category).upload_integer_range_in_reverse_order(start_id, last_id, slice_batch_size: slice_batch_size)
|
362
|
+
self.record_count = (record_count || 0) + count
|
363
|
+
count
|
364
|
+
end
|
365
|
+
|
366
|
+
# Upload the supplied slice for processing by workers
|
277
367
|
#
|
278
368
|
# Updates the record_count after adding the records
|
279
369
|
#
|
@@ -285,12 +375,12 @@ module RocketJob
|
|
285
375
|
# For example the following types are not supported: Date
|
286
376
|
#
|
287
377
|
# Note:
|
288
|
-
# The caller should
|
378
|
+
# The caller should implement `:slice_size`, since the entire slice is saved as-is.
|
289
379
|
#
|
290
380
|
# Note:
|
291
381
|
# Not thread-safe. Only call from one thread at a time
|
292
|
-
def upload_slice(slice)
|
293
|
-
input.insert(slice)
|
382
|
+
def upload_slice(slice, category: :main)
|
383
|
+
input(category).insert(slice)
|
294
384
|
count = slice.size
|
295
385
|
self.record_count = (record_count || 0) + count
|
296
386
|
count
|
@@ -353,56 +443,34 @@ module RocketJob
|
|
353
443
|
def download(stream = nil, category: :main, header_line: nil, **args, &block)
|
354
444
|
raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
|
355
445
|
|
356
|
-
|
446
|
+
category = output_category(category) unless category.is_a?(Category::Output)
|
447
|
+
output_collection = output(category)
|
448
|
+
|
449
|
+
# Store the output file name in the category
|
450
|
+
category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
|
357
451
|
|
358
|
-
|
452
|
+
header_line ||= category.render_header
|
359
453
|
|
360
|
-
|
361
|
-
IOStreams.new(stream).stream(:none).writer(**args) do |io|
|
362
|
-
raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
|
454
|
+
return output_collection.download(header_line: header_line, &block) if block
|
363
455
|
|
364
|
-
|
456
|
+
raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
|
457
|
+
|
458
|
+
if output_collection.slice_class.binary_format
|
459
|
+
binary_header_line = output_collection.slice_class.to_binary(header_line) if header_line
|
460
|
+
|
461
|
+
# Don't overwrite supplied stream options if any
|
462
|
+
stream = stream&.is_a?(IOStreams::Stream) ? stream.dup : IOStreams.new(category.file_name)
|
463
|
+
stream.remove_from_pipeline(output_collection.slice_class.binary_format)
|
464
|
+
stream.writer(**args) do |io|
|
465
|
+
# TODO: Binary formats should return the record count, instead of the slice count.
|
466
|
+
output_collection.download(header_line: binary_header_line) { |record| io.write(record) }
|
365
467
|
end
|
366
468
|
else
|
367
|
-
IOStreams.new(stream).writer(:line, **args) do |io|
|
469
|
+
IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
|
368
470
|
output_collection.download(header_line: header_line) { |record| io << record }
|
369
471
|
end
|
370
472
|
end
|
371
473
|
end
|
372
|
-
|
373
|
-
# Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
|
374
|
-
#
|
375
|
-
# If a block is supplied, the block is supplied with a writer that should be used to
|
376
|
-
# accumulate the results.
|
377
|
-
#
|
378
|
-
# Examples
|
379
|
-
#
|
380
|
-
# job.write_output('hello world')
|
381
|
-
#
|
382
|
-
# job.write_output do |writer|
|
383
|
-
# writer << 'hello world'
|
384
|
-
# end
|
385
|
-
#
|
386
|
-
# job.write_output do |writer|
|
387
|
-
# result = RocketJob::Batch::Results
|
388
|
-
# result << RocketJob::Batch::Result.new(:main, 'hello world')
|
389
|
-
# result << RocketJob::Batch::Result.new(:errors, 'errors')
|
390
|
-
# writer << result
|
391
|
-
# end
|
392
|
-
#
|
393
|
-
# result = RocketJob::Batch::Results
|
394
|
-
# result << RocketJob::Batch::Result.new(:main, 'hello world')
|
395
|
-
# result << RocketJob::Batch::Result.new(:errors, 'errors')
|
396
|
-
# job.write_output(result)
|
397
|
-
def write_output(result = nil, input_slice = nil, &block)
|
398
|
-
if block
|
399
|
-
RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
|
400
|
-
else
|
401
|
-
raise(ArgumentError, "result parameter is required when no block is supplied") unless result
|
402
|
-
|
403
|
-
RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
|
404
|
-
end
|
405
|
-
end
|
406
474
|
end
|
407
475
|
end
|
408
476
|
end
|
@@ -11,46 +11,6 @@ module RocketJob
|
|
11
11
|
#
|
12
12
|
# The following attributes are set when the job is created
|
13
13
|
|
14
|
-
# Number of records to include in each slice that is processed
|
15
|
-
# Note:
|
16
|
-
# slice_size is only used by SlicedJob#upload & Sliced::Input#upload
|
17
|
-
# When slices are supplied directly, their size is not modified to match this number
|
18
|
-
field :slice_size, type: Integer, default: 100, class_attribute: true, user_editable: true, copy_on_restart: true
|
19
|
-
|
20
|
-
# Whether to retain nil results.
|
21
|
-
#
|
22
|
-
# Only applicable if `collect_output` is `true`
|
23
|
-
# Set to `false` to prevent collecting output from the perform
|
24
|
-
# method when it returns `nil`.
|
25
|
-
field :collect_nil_output, type: Boolean, default: true, class_attribute: true
|
26
|
-
|
27
|
-
# Optional Array<Symbol> list of categories that this job can output to
|
28
|
-
#
|
29
|
-
# By using categories the output from #perform can be placed in different
|
30
|
-
# output collections, and therefore different output files
|
31
|
-
#
|
32
|
-
# Categories must be declared in advance to avoid a #perform method
|
33
|
-
# accidentally writing its results to an unknown category
|
34
|
-
field :output_categories, type: Array, default: [:main], class_attribute: true
|
35
|
-
|
36
|
-
# Optional Array<Symbol> list of categories that this job can load input data into
|
37
|
-
field :input_categories, type: Array, default: [:main], class_attribute: true
|
38
|
-
|
39
|
-
# The file name of the uploaded file, if any.
|
40
|
-
# Set by #upload if a file name was supplied, but can also be set explicitly.
|
41
|
-
# May or may not include the fully qualified path name.
|
42
|
-
field :upload_file_name, type: String
|
43
|
-
|
44
|
-
# Compress uploaded records.
|
45
|
-
# The fields are not affected in any way, only the data stored in the
|
46
|
-
# records and results collections will compressed
|
47
|
-
field :compress, type: Object, default: false, class_attribute: true
|
48
|
-
|
49
|
-
# Encrypt uploaded records.
|
50
|
-
# The fields are not affected in any way, only the data stored in the
|
51
|
-
# records and results collections will be encrypted
|
52
|
-
field :encrypt, type: Object, default: false, class_attribute: true
|
53
|
-
|
54
14
|
#
|
55
15
|
# Values that jobs can also update during processing
|
56
16
|
#
|
@@ -69,30 +29,7 @@ module RocketJob
|
|
69
29
|
|
70
30
|
# Breaks the :running state up into multiple sub-states:
|
71
31
|
# :running -> :before -> :processing -> :after -> :complete
|
72
|
-
field :sub_state, type:
|
73
|
-
|
74
|
-
validates_presence_of :slice_size
|
75
|
-
|
76
|
-
validates_each :output_categories, :input_categories do |record, attr, value|
|
77
|
-
# Under some circumstances ActiveModel is passing in a nil value even though the
|
78
|
-
# attributes have default values
|
79
|
-
Array(value).each do |category|
|
80
|
-
record.errors.add(attr, "must only contain Symbol values") unless category.is_a?(Symbol)
|
81
|
-
unless category.to_s =~ /\A[a-z_0-9]+\Z/
|
82
|
-
record.errors.add(attr, "must only consist of lowercase characters, digits, and _")
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
# Returns [true|false] whether the slices for this job are encrypted
|
89
|
-
def encrypted?
|
90
|
-
encrypt == true
|
91
|
-
end
|
92
|
-
|
93
|
-
# Returns [true|false] whether the slices for this job are compressed
|
94
|
-
def compressed?
|
95
|
-
compress == true
|
32
|
+
field :sub_state, type: Mongoid::StringifiedSymbol
|
96
33
|
end
|
97
34
|
|
98
35
|
# Returns [Integer] percent of records completed so far
|
@@ -102,10 +39,10 @@ module RocketJob
|
|
102
39
|
return 0 unless record_count.to_i.positive?
|
103
40
|
|
104
41
|
# Approximate number of input records
|
105
|
-
input_records = input.count.to_f * slice_size
|
42
|
+
input_records = input.count.to_f * input_category.slice_size
|
106
43
|
if input_records > record_count
|
107
44
|
# Sanity check in case slice_size is not being adhered to
|
108
|
-
|
45
|
+
0
|
109
46
|
else
|
110
47
|
((1.0 - (input_records.to_f / record_count)) * 100).to_i
|
111
48
|
end
|
@@ -120,6 +57,10 @@ module RocketJob
|
|
120
57
|
h["active_slices"] = worker_count
|
121
58
|
h["failed_slices"] = input.failed.count
|
122
59
|
h["queued_slices"] = input.queued.count
|
60
|
+
output_categories.each do |category|
|
61
|
+
name_str = category.name == :main ? "" : "_#{category.name}"
|
62
|
+
h["output_slices#{name_str}"] = output(category).count
|
63
|
+
end
|
123
64
|
# Very high level estimated time left
|
124
65
|
if record_count && running? && record_count.positive?
|
125
66
|
percent = percent_complete
|
@@ -129,10 +70,9 @@ module RocketJob
|
|
129
70
|
end
|
130
71
|
end
|
131
72
|
elsif completed?
|
132
|
-
secs
|
73
|
+
secs = seconds.to_f
|
133
74
|
h["records_per_hour"] = ((record_count.to_f / secs) * 60 * 60).round if record_count&.positive? && (secs > 0.0)
|
134
75
|
end
|
135
|
-
h["output_slices"] = output.count if collect_output? && !completed?
|
136
76
|
h.merge!(super(time_zone))
|
137
77
|
h.delete("result")
|
138
78
|
# Worker name should be retrieved from the slices when processing
|
@@ -172,6 +112,18 @@ module RocketJob
|
|
172
112
|
@worker_count_last = Time.now.to_i
|
173
113
|
@worker_count
|
174
114
|
end
|
115
|
+
|
116
|
+
# @deprecated
|
117
|
+
# For backward compatibility
|
118
|
+
def upload_file_name
|
119
|
+
input_category.file_name
|
120
|
+
end
|
121
|
+
|
122
|
+
# @deprecated
|
123
|
+
# For backward compatibility
|
124
|
+
def upload_file_name=(upload_file_name)
|
125
|
+
input_category.file_name = upload_file_name
|
126
|
+
end
|
175
127
|
end
|
176
128
|
end
|
177
129
|
end
|