rocketjob 5.4.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +175 -5
- data/bin/rocketjob_batch_perf +1 -1
- data/bin/rocketjob_perf +1 -1
- data/lib/rocket_job/batch/categories.rb +345 -0
- data/lib/rocket_job/batch/io.rb +174 -106
- data/lib/rocket_job/batch/model.rb +20 -68
- data/lib/rocket_job/batch/performance.rb +19 -7
- data/lib/rocket_job/batch/statistics.rb +34 -12
- data/lib/rocket_job/batch/throttle_running_workers.rb +2 -6
- data/lib/rocket_job/batch/worker.rb +31 -26
- data/lib/rocket_job/batch.rb +3 -1
- data/lib/rocket_job/category/base.rb +81 -0
- data/lib/rocket_job/category/input.rb +170 -0
- data/lib/rocket_job/category/output.rb +34 -0
- data/lib/rocket_job/cli.rb +25 -17
- data/lib/rocket_job/dirmon_entry.rb +23 -13
- data/lib/rocket_job/event.rb +1 -1
- data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
- data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
- data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
- data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
- data/lib/rocket_job/jobs/conversion_job.rb +43 -0
- data/lib/rocket_job/jobs/dirmon_job.rb +25 -36
- data/lib/rocket_job/jobs/housekeeping_job.rb +11 -12
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +24 -11
- data/lib/rocket_job/jobs/on_demand_job.rb +3 -4
- data/lib/rocket_job/jobs/performance_job.rb +3 -1
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
- data/lib/rocket_job/jobs/upload_file_job.rb +48 -8
- data/lib/rocket_job/lookup_collection.rb +69 -0
- data/lib/rocket_job/plugins/cron.rb +60 -20
- data/lib/rocket_job/plugins/job/model.rb +25 -50
- data/lib/rocket_job/plugins/job/persistence.rb +36 -0
- data/lib/rocket_job/plugins/job/throttle.rb +2 -2
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/job/worker.rb +2 -7
- data/lib/rocket_job/plugins/restart.rb +3 -103
- data/lib/rocket_job/plugins/state_machine.rb +4 -3
- data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +37 -0
- data/lib/rocket_job/ractor_worker.rb +42 -0
- data/lib/rocket_job/server/model.rb +1 -1
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
- data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
- data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
- data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
- data/lib/rocket_job/sliced/input.rb +42 -54
- data/lib/rocket_job/sliced/slice.rb +12 -16
- data/lib/rocket_job/sliced/slices.rb +26 -11
- data/lib/rocket_job/sliced/writer/input.rb +46 -18
- data/lib/rocket_job/sliced/writer/output.rb +33 -45
- data/lib/rocket_job/sliced.rb +1 -74
- data/lib/rocket_job/subscribers/server.rb +1 -1
- data/lib/rocket_job/thread_worker.rb +46 -0
- data/lib/rocket_job/throttle_definitions.rb +7 -1
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +21 -55
- data/lib/rocket_job/worker_pool.rb +5 -7
- data/lib/rocketjob.rb +53 -43
- metadata +36 -28
- data/lib/rocket_job/batch/tabular/input.rb +0 -131
- data/lib/rocket_job/batch/tabular/output.rb +0 -65
- data/lib/rocket_job/batch/tabular.rb +0 -56
- data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
- data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -9,34 +9,66 @@ module RocketJob
|
|
9
9
|
# Returns [RocketJob::Sliced::Input] input collection for holding input slices
|
10
10
|
#
|
11
11
|
# Parameters:
|
12
|
-
# category [Symbol]
|
13
|
-
# The name of the category to access or upload data into
|
12
|
+
# category [Symbol|RocketJob::Category::Input]
|
13
|
+
# The category or the name of the category to access or upload data into
|
14
14
|
# Default: None ( Uses the single default input collection for this job )
|
15
15
|
# Validates: This value must be one of those listed in #input_categories
|
16
16
|
def input(category = :main)
|
17
|
-
|
18
|
-
raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
|
19
|
-
end
|
17
|
+
category = input_category(category)
|
20
18
|
|
21
|
-
(@inputs ||= {})[category] ||=
|
19
|
+
(@inputs ||= {})[category.name] ||= category.data_store(self)
|
22
20
|
end
|
23
21
|
|
24
22
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
25
23
|
# Returns nil if no output is being collected
|
26
24
|
#
|
27
25
|
# Parameters:
|
28
|
-
# category [Symbol]
|
29
|
-
# The name of the category to access or download data from
|
26
|
+
# category [Symbol|RocketJob::Category::Input]
|
27
|
+
# The category or the name of the category to access or download data from
|
30
28
|
# Default: None ( Uses the single default output collection for this job )
|
31
29
|
# Validates: This value must be one of those listed in #output_categories
|
32
30
|
def output(category = :main)
|
33
|
-
|
34
|
-
raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
|
35
|
-
end
|
31
|
+
category = output_category(category)
|
36
32
|
|
37
|
-
(@outputs ||= {})[category] ||=
|
33
|
+
(@outputs ||= {})[category.name] ||= category.data_store(self)
|
38
34
|
end
|
39
35
|
|
36
|
+
# Rapidly upload individual records in batches.
|
37
|
+
#
|
38
|
+
# Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
|
39
|
+
# for each and every row.
|
40
|
+
#
|
41
|
+
# input_category(:my_lookup).find(id: 123).first
|
42
|
+
#
|
43
|
+
# Lookup collection.
|
44
|
+
#
|
45
|
+
# Upload side / secondary lookup tables that can be accessed during job processing.
|
46
|
+
#
|
47
|
+
# Example:
|
48
|
+
# lookup_collection(:my_lookup).upload do |io|
|
49
|
+
# io << {id: 123, data: "first record"}
|
50
|
+
# io << {id: 124, data: "second record"}
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
# Parameters:
|
54
|
+
# category [Symbol|RocketJob::Category::Input]
|
55
|
+
# The category or the name of the category to access or download data from
|
56
|
+
# Default: None ( Uses the single default output collection for this job )
|
57
|
+
# Validates: This value must be one of those listed in #input_categories
|
58
|
+
# def lookup_collection(category = :main)
|
59
|
+
# category = input_category(category) unless category.is_a?(Category::Input)
|
60
|
+
#
|
61
|
+
# collection = (@lookup_collections ||= {})[category.name]
|
62
|
+
#
|
63
|
+
# unless collection
|
64
|
+
# collection_name = "rocket_job.inputs.#{id}"
|
65
|
+
# collection_name << ".#{category.name}" unless category.name == :main
|
66
|
+
#
|
67
|
+
# @lookup_collections[category.name] ||=
|
68
|
+
# LookupCollection.new(Sliced::Slice.collection.database, collection_name)
|
69
|
+
# end
|
70
|
+
# end
|
71
|
+
|
40
72
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
41
73
|
#
|
42
74
|
# Returns [Integer] the number of records uploaded.
|
@@ -65,6 +97,11 @@ module RocketJob
|
|
65
97
|
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
66
98
|
# See IOStreams::Stream#each.
|
67
99
|
#
|
100
|
+
# category [Symbol|RocketJob::Category::Input]
|
101
|
+
# The category or the name of the category to access or download data from
|
102
|
+
# Default: None ( Uses the single default output collection for this job )
|
103
|
+
# Validates: This value must be one of those listed in #input_categories
|
104
|
+
#
|
68
105
|
# Example:
|
69
106
|
# # Load plain text records from a file
|
70
107
|
# job.upload('hello.csv')
|
@@ -113,29 +150,7 @@ module RocketJob
|
|
113
150
|
# * If an io stream is supplied, it is read until it returns nil.
|
114
151
|
# * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
115
152
|
# * CSV parsing is slow, so it is usually left for the workers to do.
|
116
|
-
|
117
|
-
raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
|
118
|
-
|
119
|
-
stream_mode = stream_mode.to_sym
|
120
|
-
# Backward compatibility with existing v4 jobs
|
121
|
-
stream_mode = :array if stream_mode == :row
|
122
|
-
stream_mode = :hash if stream_mode == :record
|
123
|
-
|
124
|
-
count =
|
125
|
-
if block
|
126
|
-
input(category).upload(on_first: on_first, &block)
|
127
|
-
else
|
128
|
-
path = IOStreams.new(stream)
|
129
|
-
path.file_name = file_name if file_name
|
130
|
-
self.upload_file_name = path.file_name
|
131
|
-
input(category).upload(on_first: on_first) do |io|
|
132
|
-
path.each(stream_mode, **args) { |line| io << line }
|
133
|
-
end
|
134
|
-
end
|
135
|
-
self.record_count = (record_count || 0) + count
|
136
|
-
count
|
137
|
-
end
|
138
|
-
|
153
|
+
#
|
139
154
|
# Upload results from an Arel into RocketJob::SlicedJob.
|
140
155
|
#
|
141
156
|
# Params
|
@@ -144,6 +159,9 @@ module RocketJob
|
|
144
159
|
# and uploaded into the job
|
145
160
|
# These columns are automatically added to the select list to reduce overhead
|
146
161
|
#
|
162
|
+
# category [Symbol|RocketJob::Category::Input]
|
163
|
+
# The category or the name of the category to upload to.
|
164
|
+
#
|
147
165
|
# If a Block is supplied it is passed the model returned from the database and should
|
148
166
|
# return the work item to be uploaded into the job.
|
149
167
|
#
|
@@ -159,18 +177,13 @@ module RocketJob
|
|
159
177
|
#
|
160
178
|
# Example: Upload user_name and zip_code
|
161
179
|
# arel = User.where(country_code: 'US')
|
162
|
-
# job.upload_arel(arel, :user_name, :zip_code)
|
180
|
+
# job.upload_arel(arel, columns: [:user_name, :zip_code])
|
163
181
|
#
|
164
182
|
# Notes:
|
165
183
|
# * Only call from one thread at a time against a single instance of this job.
|
166
184
|
# * The record_count for the job is set to the number of records returned by the arel.
|
167
185
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
168
186
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
169
|
-
def upload_arel(arel, *column_names, category: :main, &block)
|
170
|
-
count = input(category).upload_arel(arel, *column_names, &block)
|
171
|
-
self.record_count = (record_count || 0) + count
|
172
|
-
count
|
173
|
-
end
|
174
187
|
|
175
188
|
# Upload the result of a MongoDB query to the input collection for processing
|
176
189
|
# Useful when an entire MongoDB collection, or part thereof needs to be
|
@@ -198,30 +211,25 @@ module RocketJob
|
|
198
211
|
# criteria = User.where(state: 'FL')
|
199
212
|
# job.record_count = job.upload_mongo_query(criteria)
|
200
213
|
#
|
201
|
-
# Example: Upload
|
214
|
+
# Example: Upload only the specified column(s)
|
202
215
|
# criteria = User.where(state: 'FL')
|
203
|
-
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
216
|
+
# job.record_count = job.upload_mongo_query(criteria, columns: [:zip_code])
|
204
217
|
#
|
205
218
|
# Notes:
|
206
219
|
# * Only call from one thread at a time against a single instance of this job.
|
207
220
|
# * The record_count for the job is set to the number of records returned by the monqo query.
|
208
221
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
209
222
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
210
|
-
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
211
|
-
count = input(category).upload_mongo_query(criteria, *column_names, &block)
|
212
|
-
self.record_count = (record_count || 0) + count
|
213
|
-
count
|
214
|
-
end
|
215
223
|
|
216
224
|
# Upload sliced range of integer requests as arrays of start and end ids.
|
217
225
|
#
|
218
|
-
# Returns [Integer]
|
226
|
+
# Returns [Integer] the number of slices uploaded.
|
219
227
|
#
|
220
228
|
# Uploads one range per slice so that the response can return multiple records
|
221
229
|
# for each slice processed
|
222
230
|
#
|
223
231
|
# Example
|
224
|
-
# job.slice_size = 100
|
232
|
+
# job.input_category.slice_size = 100
|
225
233
|
# job.upload_integer_range(200, 421)
|
226
234
|
#
|
227
235
|
# # Equivalent to calling:
|
@@ -234,17 +242,11 @@ module RocketJob
|
|
234
242
|
# * The record_count for the job is set to: last_id - start_id + 1.
|
235
243
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
236
244
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
237
|
-
def upload_integer_range(start_id, last_id, category: :main)
|
238
|
-
input(category).upload_integer_range(start_id, last_id)
|
239
|
-
count = last_id - start_id + 1
|
240
|
-
self.record_count = (record_count || 0) + count
|
241
|
-
count
|
242
|
-
end
|
243
245
|
|
244
246
|
# Upload sliced range of integer requests as an arrays of start and end ids
|
245
247
|
# starting with the last range first
|
246
248
|
#
|
247
|
-
# Returns [Integer]
|
249
|
+
# Returns [Integer] the number of slices uploaded.
|
248
250
|
#
|
249
251
|
# Uploads one range per slice so that the response can return multiple records
|
250
252
|
# for each slice processed.
|
@@ -253,7 +255,7 @@ module RocketJob
|
|
253
255
|
# in a database based on the id column
|
254
256
|
#
|
255
257
|
# Example
|
256
|
-
# job.slice_size = 100
|
258
|
+
# job.input_category.slice_size = 100
|
257
259
|
# job.upload_integer_range_in_reverse_order(200, 421)
|
258
260
|
#
|
259
261
|
# # Equivalent to calling:
|
@@ -266,14 +268,102 @@ module RocketJob
|
|
266
268
|
# * The record_count for the job is set to: last_id - start_id + 1.
|
267
269
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
268
270
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
269
|
-
|
270
|
-
|
271
|
-
|
271
|
+
|
272
|
+
def upload(object = nil, category: :main, file_name: nil, stream_mode: nil, on_first: nil, columns: nil, slice_batch_size: nil, **args, &block)
|
273
|
+
input_collection = input(category)
|
274
|
+
|
275
|
+
if block
|
276
|
+
raise(ArgumentError, "Cannot supply both an object to upload, and a block.") if object
|
277
|
+
if stream_mode || columns || slice_batch_size || args.size > 0
|
278
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a block. Only accepts :category, :file_name, or :on_first")
|
279
|
+
end
|
280
|
+
|
281
|
+
category = input_category(category)
|
282
|
+
category.file_name = file_name if file_name
|
283
|
+
|
284
|
+
# Extract the header line during the upload when applicable.
|
285
|
+
extract_header = category.extract_header_callback(on_first)
|
286
|
+
|
287
|
+
count = input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size, &block)
|
288
|
+
self.record_count = (record_count || 0) + count
|
289
|
+
return count
|
290
|
+
end
|
291
|
+
|
292
|
+
count =
|
293
|
+
case object
|
294
|
+
when Range
|
295
|
+
if file_name || stream_mode || on_first || args.size > 0
|
296
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a Range. Only accepts :category, :columns, or :slice_batch_size")
|
297
|
+
end
|
298
|
+
|
299
|
+
first = object.first
|
300
|
+
last = object.last
|
301
|
+
if first < last
|
302
|
+
input_collection.upload_integer_range(first, last, slice_batch_size: slice_batch_size || 1_000)
|
303
|
+
else
|
304
|
+
input_collection.upload_integer_range_in_reverse_order(last, first, slice_batch_size: slice_batch_size || 1_000)
|
305
|
+
end
|
306
|
+
when Mongoid::Criteria
|
307
|
+
if file_name || stream_mode || on_first || args.size > 0
|
308
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a Mongoid::Criteria. Only accepts :category, :columns, or :slice_batch_size")
|
309
|
+
end
|
310
|
+
|
311
|
+
input_collection.upload_mongo_query(object, columns: columns, slice_batch_size: slice_batch_size, &block)
|
312
|
+
when defined?(ActiveRecord::Relation) ? ActiveRecord::Relation : false
|
313
|
+
if file_name || stream_mode || on_first || args.size > 0
|
314
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading an ActiveRecord::Relation. Only accepts :category, :columns, or :slice_batch_size")
|
315
|
+
end
|
316
|
+
|
317
|
+
input_collection.upload_arel(object, columns: columns, slice_batch_size: slice_batch_size, &block)
|
318
|
+
|
319
|
+
else
|
320
|
+
raise(ArgumentError, "Unknown keyword argument :columns when uploading a file") if columns
|
321
|
+
|
322
|
+
category = input_category(category)
|
323
|
+
|
324
|
+
# Extract the header line during the upload when applicable.
|
325
|
+
extract_header = category.extract_header_callback(on_first)
|
326
|
+
path = category.upload_path(object, original_file_name: file_name)
|
327
|
+
|
328
|
+
input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size) do |io|
|
329
|
+
path.each(stream_mode || :line, **args) { |line| io << line }
|
330
|
+
end
|
331
|
+
|
332
|
+
end
|
333
|
+
|
334
|
+
self.record_count = (record_count || 0) + count
|
335
|
+
count
|
336
|
+
end
|
337
|
+
|
338
|
+
# @deprecated
|
339
|
+
def upload_arel(arel, *column_names, category: :main, &block)
|
340
|
+
count = input(category).upload_arel(arel, columns: column_names, &block)
|
341
|
+
self.record_count = (record_count || 0) + count
|
342
|
+
count
|
343
|
+
end
|
344
|
+
|
345
|
+
# @deprecated
|
346
|
+
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
347
|
+
count = input(category).upload_mongo_query(criteria, columns: column_names, &block)
|
272
348
|
self.record_count = (record_count || 0) + count
|
273
349
|
count
|
274
350
|
end
|
275
351
|
|
276
|
-
#
|
352
|
+
# @deprecated
|
353
|
+
def upload_integer_range(start_id, last_id, category: :main, slice_batch_size: 1_000)
|
354
|
+
count = input(category).upload_integer_range(start_id, last_id, slice_batch_size: slice_batch_size)
|
355
|
+
self.record_count = (record_count || 0) + count
|
356
|
+
count
|
357
|
+
end
|
358
|
+
|
359
|
+
# @deprecated
|
360
|
+
def upload_integer_range_in_reverse_order(start_id, last_id, category: :main, slice_batch_size: 1_000)
|
361
|
+
count = input(category).upload_integer_range_in_reverse_order(start_id, last_id, slice_batch_size: slice_batch_size)
|
362
|
+
self.record_count = (record_count || 0) + count
|
363
|
+
count
|
364
|
+
end
|
365
|
+
|
366
|
+
# Upload the supplied slice for processing by workers
|
277
367
|
#
|
278
368
|
# Updates the record_count after adding the records
|
279
369
|
#
|
@@ -285,12 +375,12 @@ module RocketJob
|
|
285
375
|
# For example the following types are not supported: Date
|
286
376
|
#
|
287
377
|
# Note:
|
288
|
-
# The caller should
|
378
|
+
# The caller should implement `:slice_size`, since the entire slice is saved as-is.
|
289
379
|
#
|
290
380
|
# Note:
|
291
381
|
# Not thread-safe. Only call from one thread at a time
|
292
|
-
def upload_slice(slice)
|
293
|
-
input.insert(slice)
|
382
|
+
def upload_slice(slice, category: :main)
|
383
|
+
input(category).insert(slice)
|
294
384
|
count = slice.size
|
295
385
|
self.record_count = (record_count || 0) + count
|
296
386
|
count
|
@@ -353,56 +443,34 @@ module RocketJob
|
|
353
443
|
def download(stream = nil, category: :main, header_line: nil, **args, &block)
|
354
444
|
raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
|
355
445
|
|
356
|
-
|
446
|
+
category = output_category(category) unless category.is_a?(Category::Output)
|
447
|
+
output_collection = output(category)
|
448
|
+
|
449
|
+
# Store the output file name in the category
|
450
|
+
category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
|
357
451
|
|
358
|
-
|
452
|
+
header_line ||= category.render_header
|
359
453
|
|
360
|
-
|
361
|
-
IOStreams.new(stream).stream(:none).writer(**args) do |io|
|
362
|
-
raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
|
454
|
+
return output_collection.download(header_line: header_line, &block) if block
|
363
455
|
|
364
|
-
|
456
|
+
raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
|
457
|
+
|
458
|
+
if output_collection.slice_class.binary_format
|
459
|
+
binary_header_line = output_collection.slice_class.to_binary(header_line) if header_line
|
460
|
+
|
461
|
+
# Don't overwrite supplied stream options if any
|
462
|
+
stream = stream&.is_a?(IOStreams::Stream) ? stream.dup : IOStreams.new(category.file_name)
|
463
|
+
stream.remove_from_pipeline(output_collection.slice_class.binary_format)
|
464
|
+
stream.writer(**args) do |io|
|
465
|
+
# TODO: Binary formats should return the record count, instead of the slice count.
|
466
|
+
output_collection.download(header_line: binary_header_line) { |record| io.write(record) }
|
365
467
|
end
|
366
468
|
else
|
367
|
-
IOStreams.new(stream).writer(:line, **args) do |io|
|
469
|
+
IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
|
368
470
|
output_collection.download(header_line: header_line) { |record| io << record }
|
369
471
|
end
|
370
472
|
end
|
371
473
|
end
|
372
|
-
|
373
|
-
# Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
|
374
|
-
#
|
375
|
-
# If a block is supplied, the block is supplied with a writer that should be used to
|
376
|
-
# accumulate the results.
|
377
|
-
#
|
378
|
-
# Examples
|
379
|
-
#
|
380
|
-
# job.write_output('hello world')
|
381
|
-
#
|
382
|
-
# job.write_output do |writer|
|
383
|
-
# writer << 'hello world'
|
384
|
-
# end
|
385
|
-
#
|
386
|
-
# job.write_output do |writer|
|
387
|
-
# result = RocketJob::Batch::Results
|
388
|
-
# result << RocketJob::Batch::Result.new(:main, 'hello world')
|
389
|
-
# result << RocketJob::Batch::Result.new(:errors, 'errors')
|
390
|
-
# writer << result
|
391
|
-
# end
|
392
|
-
#
|
393
|
-
# result = RocketJob::Batch::Results
|
394
|
-
# result << RocketJob::Batch::Result.new(:main, 'hello world')
|
395
|
-
# result << RocketJob::Batch::Result.new(:errors, 'errors')
|
396
|
-
# job.write_output(result)
|
397
|
-
def write_output(result = nil, input_slice = nil, &block)
|
398
|
-
if block
|
399
|
-
RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
|
400
|
-
else
|
401
|
-
raise(ArgumentError, "result parameter is required when no block is supplied") unless result
|
402
|
-
|
403
|
-
RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
|
404
|
-
end
|
405
|
-
end
|
406
474
|
end
|
407
475
|
end
|
408
476
|
end
|
@@ -11,46 +11,6 @@ module RocketJob
|
|
11
11
|
#
|
12
12
|
# The following attributes are set when the job is created
|
13
13
|
|
14
|
-
# Number of records to include in each slice that is processed
|
15
|
-
# Note:
|
16
|
-
# slice_size is only used by SlicedJob#upload & Sliced::Input#upload
|
17
|
-
# When slices are supplied directly, their size is not modified to match this number
|
18
|
-
field :slice_size, type: Integer, default: 100, class_attribute: true, user_editable: true, copy_on_restart: true
|
19
|
-
|
20
|
-
# Whether to retain nil results.
|
21
|
-
#
|
22
|
-
# Only applicable if `collect_output` is `true`
|
23
|
-
# Set to `false` to prevent collecting output from the perform
|
24
|
-
# method when it returns `nil`.
|
25
|
-
field :collect_nil_output, type: Boolean, default: true, class_attribute: true
|
26
|
-
|
27
|
-
# Optional Array<Symbol> list of categories that this job can output to
|
28
|
-
#
|
29
|
-
# By using categories the output from #perform can be placed in different
|
30
|
-
# output collections, and therefore different output files
|
31
|
-
#
|
32
|
-
# Categories must be declared in advance to avoid a #perform method
|
33
|
-
# accidentally writing its results to an unknown category
|
34
|
-
field :output_categories, type: Array, default: [:main], class_attribute: true
|
35
|
-
|
36
|
-
# Optional Array<Symbol> list of categories that this job can load input data into
|
37
|
-
field :input_categories, type: Array, default: [:main], class_attribute: true
|
38
|
-
|
39
|
-
# The file name of the uploaded file, if any.
|
40
|
-
# Set by #upload if a file name was supplied, but can also be set explicitly.
|
41
|
-
# May or may not include the fully qualified path name.
|
42
|
-
field :upload_file_name, type: String
|
43
|
-
|
44
|
-
# Compress uploaded records.
|
45
|
-
# The fields are not affected in any way, only the data stored in the
|
46
|
-
# records and results collections will compressed
|
47
|
-
field :compress, type: Object, default: false, class_attribute: true
|
48
|
-
|
49
|
-
# Encrypt uploaded records.
|
50
|
-
# The fields are not affected in any way, only the data stored in the
|
51
|
-
# records and results collections will be encrypted
|
52
|
-
field :encrypt, type: Object, default: false, class_attribute: true
|
53
|
-
|
54
14
|
#
|
55
15
|
# Values that jobs can also update during processing
|
56
16
|
#
|
@@ -69,30 +29,7 @@ module RocketJob
|
|
69
29
|
|
70
30
|
# Breaks the :running state up into multiple sub-states:
|
71
31
|
# :running -> :before -> :processing -> :after -> :complete
|
72
|
-
field :sub_state, type:
|
73
|
-
|
74
|
-
validates_presence_of :slice_size
|
75
|
-
|
76
|
-
validates_each :output_categories, :input_categories do |record, attr, value|
|
77
|
-
# Under some circumstances ActiveModel is passing in a nil value even though the
|
78
|
-
# attributes have default values
|
79
|
-
Array(value).each do |category|
|
80
|
-
record.errors.add(attr, "must only contain Symbol values") unless category.is_a?(Symbol)
|
81
|
-
unless category.to_s =~ /\A[a-z_0-9]+\Z/
|
82
|
-
record.errors.add(attr, "must only consist of lowercase characters, digits, and _")
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
# Returns [true|false] whether the slices for this job are encrypted
|
89
|
-
def encrypted?
|
90
|
-
encrypt == true
|
91
|
-
end
|
92
|
-
|
93
|
-
# Returns [true|false] whether the slices for this job are compressed
|
94
|
-
def compressed?
|
95
|
-
compress == true
|
32
|
+
field :sub_state, type: Mongoid::StringifiedSymbol
|
96
33
|
end
|
97
34
|
|
98
35
|
# Returns [Integer] percent of records completed so far
|
@@ -102,10 +39,10 @@ module RocketJob
|
|
102
39
|
return 0 unless record_count.to_i.positive?
|
103
40
|
|
104
41
|
# Approximate number of input records
|
105
|
-
input_records = input.count.to_f * slice_size
|
42
|
+
input_records = input.count.to_f * input_category.slice_size
|
106
43
|
if input_records > record_count
|
107
44
|
# Sanity check in case slice_size is not being adhered to
|
108
|
-
|
45
|
+
0
|
109
46
|
else
|
110
47
|
((1.0 - (input_records.to_f / record_count)) * 100).to_i
|
111
48
|
end
|
@@ -120,6 +57,10 @@ module RocketJob
|
|
120
57
|
h["active_slices"] = worker_count
|
121
58
|
h["failed_slices"] = input.failed.count
|
122
59
|
h["queued_slices"] = input.queued.count
|
60
|
+
output_categories.each do |category|
|
61
|
+
name_str = category.name == :main ? "" : "_#{category.name}"
|
62
|
+
h["output_slices#{name_str}"] = output(category).count
|
63
|
+
end
|
123
64
|
# Very high level estimated time left
|
124
65
|
if record_count && running? && record_count.positive?
|
125
66
|
percent = percent_complete
|
@@ -129,10 +70,9 @@ module RocketJob
|
|
129
70
|
end
|
130
71
|
end
|
131
72
|
elsif completed?
|
132
|
-
secs
|
73
|
+
secs = seconds.to_f
|
133
74
|
h["records_per_hour"] = ((record_count.to_f / secs) * 60 * 60).round if record_count&.positive? && (secs > 0.0)
|
134
75
|
end
|
135
|
-
h["output_slices"] = output.count if collect_output? && !completed?
|
136
76
|
h.merge!(super(time_zone))
|
137
77
|
h.delete("result")
|
138
78
|
# Worker name should be retrieved from the slices when processing
|
@@ -172,6 +112,18 @@ module RocketJob
|
|
172
112
|
@worker_count_last = Time.now.to_i
|
173
113
|
@worker_count
|
174
114
|
end
|
115
|
+
|
116
|
+
# @deprecated
|
117
|
+
# For backward compatibility
|
118
|
+
def upload_file_name
|
119
|
+
input_category.file_name
|
120
|
+
end
|
121
|
+
|
122
|
+
# @deprecated
|
123
|
+
# For backward compatibility
|
124
|
+
def upload_file_name=(upload_file_name)
|
125
|
+
input_category.file_name = upload_file_name
|
126
|
+
end
|
175
127
|
end
|
176
128
|
end
|
177
129
|
end
|