rocketjob 5.4.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +175 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch/categories.rb +345 -0
  6. data/lib/rocket_job/batch/io.rb +174 -106
  7. data/lib/rocket_job/batch/model.rb +20 -68
  8. data/lib/rocket_job/batch/performance.rb +19 -7
  9. data/lib/rocket_job/batch/statistics.rb +34 -12
  10. data/lib/rocket_job/batch/throttle_running_workers.rb +2 -6
  11. data/lib/rocket_job/batch/worker.rb +31 -26
  12. data/lib/rocket_job/batch.rb +3 -1
  13. data/lib/rocket_job/category/base.rb +81 -0
  14. data/lib/rocket_job/category/input.rb +170 -0
  15. data/lib/rocket_job/category/output.rb +34 -0
  16. data/lib/rocket_job/cli.rb +25 -17
  17. data/lib/rocket_job/dirmon_entry.rb +23 -13
  18. data/lib/rocket_job/event.rb +1 -1
  19. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  20. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
  21. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  22. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  23. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  24. data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
  25. data/lib/rocket_job/jobs/conversion_job.rb +43 -0
  26. data/lib/rocket_job/jobs/dirmon_job.rb +25 -36
  27. data/lib/rocket_job/jobs/housekeeping_job.rb +11 -12
  28. data/lib/rocket_job/jobs/on_demand_batch_job.rb +24 -11
  29. data/lib/rocket_job/jobs/on_demand_job.rb +3 -4
  30. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  31. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
  32. data/lib/rocket_job/jobs/upload_file_job.rb +48 -8
  33. data/lib/rocket_job/lookup_collection.rb +69 -0
  34. data/lib/rocket_job/plugins/cron.rb +60 -20
  35. data/lib/rocket_job/plugins/job/model.rb +25 -50
  36. data/lib/rocket_job/plugins/job/persistence.rb +36 -0
  37. data/lib/rocket_job/plugins/job/throttle.rb +2 -2
  38. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
  39. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  40. data/lib/rocket_job/plugins/restart.rb +3 -103
  41. data/lib/rocket_job/plugins/state_machine.rb +4 -3
  42. data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +37 -0
  43. data/lib/rocket_job/ractor_worker.rb +42 -0
  44. data/lib/rocket_job/server/model.rb +1 -1
  45. data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
  46. data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
  47. data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
  48. data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
  49. data/lib/rocket_job/sliced/input.rb +42 -54
  50. data/lib/rocket_job/sliced/slice.rb +12 -16
  51. data/lib/rocket_job/sliced/slices.rb +26 -11
  52. data/lib/rocket_job/sliced/writer/input.rb +46 -18
  53. data/lib/rocket_job/sliced/writer/output.rb +33 -45
  54. data/lib/rocket_job/sliced.rb +1 -74
  55. data/lib/rocket_job/subscribers/server.rb +1 -1
  56. data/lib/rocket_job/thread_worker.rb +46 -0
  57. data/lib/rocket_job/throttle_definitions.rb +7 -1
  58. data/lib/rocket_job/version.rb +1 -1
  59. data/lib/rocket_job/worker.rb +21 -55
  60. data/lib/rocket_job/worker_pool.rb +5 -7
  61. data/lib/rocketjob.rb +53 -43
  62. metadata +36 -28
  63. data/lib/rocket_job/batch/tabular/input.rb +0 -131
  64. data/lib/rocket_job/batch/tabular/output.rb +0 -65
  65. data/lib/rocket_job/batch/tabular.rb +0 -56
  66. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  67. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -9,34 +9,66 @@ module RocketJob
9
9
  # Returns [RocketJob::Sliced::Input] input collection for holding input slices
10
10
  #
11
11
  # Parameters:
12
- # category [Symbol]
13
- # The name of the category to access or upload data into
12
+ # category [Symbol|RocketJob::Category::Input]
13
+ # The category or the name of the category to access or upload data into
14
14
  # Default: None ( Uses the single default input collection for this job )
15
15
  # Validates: This value must be one of those listed in #input_categories
16
16
  def input(category = :main)
17
- unless input_categories.include?(category) || (category == :main)
18
- raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
- end
17
+ category = input_category(category)
20
18
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
19
+ (@inputs ||= {})[category.name] ||= category.data_store(self)
22
20
  end
23
21
 
24
22
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
25
23
  # Returns nil if no output is being collected
26
24
  #
27
25
  # Parameters:
28
- # category [Symbol]
29
- # The name of the category to access or download data from
26
+ # category [Symbol|RocketJob::Category::Input]
27
+ # The category or the name of the category to access or download data from
30
28
  # Default: None ( Uses the single default output collection for this job )
31
29
  # Validates: This value must be one of those listed in #output_categories
32
30
  def output(category = :main)
33
- unless output_categories.include?(category) || (category == :main)
34
- raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
- end
31
+ category = output_category(category)
36
32
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
33
+ (@outputs ||= {})[category.name] ||= category.data_store(self)
38
34
  end
39
35
 
36
+ # Rapidly upload individual records in batches.
37
+ #
38
+ # Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
39
+ # for each and every row.
40
+ #
41
+ # input_category(:my_lookup).find(id: 123).first
42
+ #
43
+ # Lookup collection.
44
+ #
45
+ # Upload side / secondary lookup tables that can be accessed during job processing.
46
+ #
47
+ # Example:
48
+ # lookup_collection(:my_lookup).upload do |io|
49
+ # io << {id: 123, data: "first record"}
50
+ # io << {id: 124, data: "second record"}
51
+ # end
52
+ #
53
+ # Parameters:
54
+ # category [Symbol|RocketJob::Category::Input]
55
+ # The category or the name of the category to access or download data from
56
+ # Default: None ( Uses the single default output collection for this job )
57
+ # Validates: This value must be one of those listed in #input_categories
58
+ # def lookup_collection(category = :main)
59
+ # category = input_category(category) unless category.is_a?(Category::Input)
60
+ #
61
+ # collection = (@lookup_collections ||= {})[category.name]
62
+ #
63
+ # unless collection
64
+ # collection_name = "rocket_job.inputs.#{id}"
65
+ # collection_name << ".#{category.name}" unless category.name == :main
66
+ #
67
+ # @lookup_collections[category.name] ||=
68
+ # LookupCollection.new(Sliced::Slice.collection.database, collection_name)
69
+ # end
70
+ # end
71
+
40
72
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
41
73
  #
42
74
  # Returns [Integer] the number of records uploaded.
@@ -65,6 +97,11 @@ module RocketJob
65
97
  # Parses each line from the file into a Hash and uploads each hash for processing by workers.
66
98
  # See IOStreams::Stream#each.
67
99
  #
100
+ # category [Symbol|RocketJob::Category::Input]
101
+ # The category or the name of the category to access or download data from
102
+ # Default: None ( Uses the single default output collection for this job )
103
+ # Validates: This value must be one of those listed in #input_categories
104
+ #
68
105
  # Example:
69
106
  # # Load plain text records from a file
70
107
  # job.upload('hello.csv')
@@ -113,29 +150,7 @@ module RocketJob
113
150
  # * If an io stream is supplied, it is read until it returns nil.
114
151
  # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
115
152
  # * CSV parsing is slow, so it is usually left for the workers to do.
116
- def upload(stream = nil, file_name: nil, category: :main, stream_mode: :line, on_first: nil, **args, &block)
117
- raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
118
-
119
- stream_mode = stream_mode.to_sym
120
- # Backward compatibility with existing v4 jobs
121
- stream_mode = :array if stream_mode == :row
122
- stream_mode = :hash if stream_mode == :record
123
-
124
- count =
125
- if block
126
- input(category).upload(on_first: on_first, &block)
127
- else
128
- path = IOStreams.new(stream)
129
- path.file_name = file_name if file_name
130
- self.upload_file_name = path.file_name
131
- input(category).upload(on_first: on_first) do |io|
132
- path.each(stream_mode, **args) { |line| io << line }
133
- end
134
- end
135
- self.record_count = (record_count || 0) + count
136
- count
137
- end
138
-
153
+ #
139
154
  # Upload results from an Arel into RocketJob::SlicedJob.
140
155
  #
141
156
  # Params
@@ -144,6 +159,9 @@ module RocketJob
144
159
  # and uploaded into the job
145
160
  # These columns are automatically added to the select list to reduce overhead
146
161
  #
162
+ # category [Symbol|RocketJob::Category::Input]
163
+ # The category or the name of the category to upload to.
164
+ #
147
165
  # If a Block is supplied it is passed the model returned from the database and should
148
166
  # return the work item to be uploaded into the job.
149
167
  #
@@ -159,18 +177,13 @@ module RocketJob
159
177
  #
160
178
  # Example: Upload user_name and zip_code
161
179
  # arel = User.where(country_code: 'US')
162
- # job.upload_arel(arel, :user_name, :zip_code)
180
+ # job.upload_arel(arel, columns: [:user_name, :zip_code])
163
181
  #
164
182
  # Notes:
165
183
  # * Only call from one thread at a time against a single instance of this job.
166
184
  # * The record_count for the job is set to the number of records returned by the arel.
167
185
  # * If an exception is raised while uploading data, the input collection is cleared out
168
186
  # so that if a job is retried during an upload failure, data is not duplicated.
169
- def upload_arel(arel, *column_names, category: :main, &block)
170
- count = input(category).upload_arel(arel, *column_names, &block)
171
- self.record_count = (record_count || 0) + count
172
- count
173
- end
174
187
 
175
188
  # Upload the result of a MongoDB query to the input collection for processing
176
189
  # Useful when an entire MongoDB collection, or part thereof needs to be
@@ -198,30 +211,25 @@ module RocketJob
198
211
  # criteria = User.where(state: 'FL')
199
212
  # job.record_count = job.upload_mongo_query(criteria)
200
213
  #
201
- # Example: Upload just the supplied column
214
+ # Example: Upload only the specified column(s)
202
215
  # criteria = User.where(state: 'FL')
203
- # job.record_count = job.upload_mongo_query(criteria, :zip_code)
216
+ # job.record_count = job.upload_mongo_query(criteria, columns: [:zip_code])
204
217
  #
205
218
  # Notes:
206
219
  # * Only call from one thread at a time against a single instance of this job.
207
220
  # * The record_count for the job is set to the number of records returned by the monqo query.
208
221
  # * If an exception is raised while uploading data, the input collection is cleared out
209
222
  # so that if a job is retried during an upload failure, data is not duplicated.
210
- def upload_mongo_query(criteria, *column_names, category: :main, &block)
211
- count = input(category).upload_mongo_query(criteria, *column_names, &block)
212
- self.record_count = (record_count || 0) + count
213
- count
214
- end
215
223
 
216
224
  # Upload sliced range of integer requests as arrays of start and end ids.
217
225
  #
218
- # Returns [Integer] last_id - start_id + 1.
226
+ # Returns [Integer] the number of slices uploaded.
219
227
  #
220
228
  # Uploads one range per slice so that the response can return multiple records
221
229
  # for each slice processed
222
230
  #
223
231
  # Example
224
- # job.slice_size = 100
232
+ # job.input_category.slice_size = 100
225
233
  # job.upload_integer_range(200, 421)
226
234
  #
227
235
  # # Equivalent to calling:
@@ -234,17 +242,11 @@ module RocketJob
234
242
  # * The record_count for the job is set to: last_id - start_id + 1.
235
243
  # * If an exception is raised while uploading data, the input collection is cleared out
236
244
  # so that if a job is retried during an upload failure, data is not duplicated.
237
- def upload_integer_range(start_id, last_id, category: :main)
238
- input(category).upload_integer_range(start_id, last_id)
239
- count = last_id - start_id + 1
240
- self.record_count = (record_count || 0) + count
241
- count
242
- end
243
245
 
244
246
  # Upload sliced range of integer requests as an arrays of start and end ids
245
247
  # starting with the last range first
246
248
  #
247
- # Returns [Integer] last_id - start_id + 1.
249
+ # Returns [Integer] the number of slices uploaded.
248
250
  #
249
251
  # Uploads one range per slice so that the response can return multiple records
250
252
  # for each slice processed.
@@ -253,7 +255,7 @@ module RocketJob
253
255
  # in a database based on the id column
254
256
  #
255
257
  # Example
256
- # job.slice_size = 100
258
+ # job.input_category.slice_size = 100
257
259
  # job.upload_integer_range_in_reverse_order(200, 421)
258
260
  #
259
261
  # # Equivalent to calling:
@@ -266,14 +268,102 @@ module RocketJob
266
268
  # * The record_count for the job is set to: last_id - start_id + 1.
267
269
  # * If an exception is raised while uploading data, the input collection is cleared out
268
270
  # so that if a job is retried during an upload failure, data is not duplicated.
269
- def upload_integer_range_in_reverse_order(start_id, last_id, category: :main)
270
- input(category).upload_integer_range_in_reverse_order(start_id, last_id)
271
- count = last_id - start_id + 1
271
+
272
+ def upload(object = nil, category: :main, file_name: nil, stream_mode: nil, on_first: nil, columns: nil, slice_batch_size: nil, **args, &block)
273
+ input_collection = input(category)
274
+
275
+ if block
276
+ raise(ArgumentError, "Cannot supply both an object to upload, and a block.") if object
277
+ if stream_mode || columns || slice_batch_size || args.size > 0
278
+ raise(ArgumentError, "Unknown keyword arguments when uploading a block. Only accepts :category, :file_name, or :on_first")
279
+ end
280
+
281
+ category = input_category(category)
282
+ category.file_name = file_name if file_name
283
+
284
+ # Extract the header line during the upload when applicable.
285
+ extract_header = category.extract_header_callback(on_first)
286
+
287
+ count = input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size, &block)
288
+ self.record_count = (record_count || 0) + count
289
+ return count
290
+ end
291
+
292
+ count =
293
+ case object
294
+ when Range
295
+ if file_name || stream_mode || on_first || args.size > 0
296
+ raise(ArgumentError, "Unknown keyword arguments when uploading a Range. Only accepts :category, :columns, or :slice_batch_size")
297
+ end
298
+
299
+ first = object.first
300
+ last = object.last
301
+ if first < last
302
+ input_collection.upload_integer_range(first, last, slice_batch_size: slice_batch_size || 1_000)
303
+ else
304
+ input_collection.upload_integer_range_in_reverse_order(last, first, slice_batch_size: slice_batch_size || 1_000)
305
+ end
306
+ when Mongoid::Criteria
307
+ if file_name || stream_mode || on_first || args.size > 0
308
+ raise(ArgumentError, "Unknown keyword arguments when uploading a Mongoid::Criteria. Only accepts :category, :columns, or :slice_batch_size")
309
+ end
310
+
311
+ input_collection.upload_mongo_query(object, columns: columns, slice_batch_size: slice_batch_size, &block)
312
+ when defined?(ActiveRecord::Relation) ? ActiveRecord::Relation : false
313
+ if file_name || stream_mode || on_first || args.size > 0
314
+ raise(ArgumentError, "Unknown keyword arguments when uploading an ActiveRecord::Relation. Only accepts :category, :columns, or :slice_batch_size")
315
+ end
316
+
317
+ input_collection.upload_arel(object, columns: columns, slice_batch_size: slice_batch_size, &block)
318
+
319
+ else
320
+ raise(ArgumentError, "Unknown keyword argument :columns when uploading a file") if columns
321
+
322
+ category = input_category(category)
323
+
324
+ # Extract the header line during the upload when applicable.
325
+ extract_header = category.extract_header_callback(on_first)
326
+ path = category.upload_path(object, original_file_name: file_name)
327
+
328
+ input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size) do |io|
329
+ path.each(stream_mode || :line, **args) { |line| io << line }
330
+ end
331
+
332
+ end
333
+
334
+ self.record_count = (record_count || 0) + count
335
+ count
336
+ end
337
+
338
+ # @deprecated
339
+ def upload_arel(arel, *column_names, category: :main, &block)
340
+ count = input(category).upload_arel(arel, columns: column_names, &block)
341
+ self.record_count = (record_count || 0) + count
342
+ count
343
+ end
344
+
345
+ # @deprecated
346
+ def upload_mongo_query(criteria, *column_names, category: :main, &block)
347
+ count = input(category).upload_mongo_query(criteria, columns: column_names, &block)
272
348
  self.record_count = (record_count || 0) + count
273
349
  count
274
350
  end
275
351
 
276
- # Upload the supplied slices for processing by workers
352
+ # @deprecated
353
+ def upload_integer_range(start_id, last_id, category: :main, slice_batch_size: 1_000)
354
+ count = input(category).upload_integer_range(start_id, last_id, slice_batch_size: slice_batch_size)
355
+ self.record_count = (record_count || 0) + count
356
+ count
357
+ end
358
+
359
+ # @deprecated
360
+ def upload_integer_range_in_reverse_order(start_id, last_id, category: :main, slice_batch_size: 1_000)
361
+ count = input(category).upload_integer_range_in_reverse_order(start_id, last_id, slice_batch_size: slice_batch_size)
362
+ self.record_count = (record_count || 0) + count
363
+ count
364
+ end
365
+
366
+ # Upload the supplied slice for processing by workers
277
367
  #
278
368
  # Updates the record_count after adding the records
279
369
  #
@@ -285,12 +375,12 @@ module RocketJob
285
375
  # For example the following types are not supported: Date
286
376
  #
287
377
  # Note:
288
- # The caller should honor `:slice_size`, the entire slice is loaded as-is.
378
+ # The caller should implement `:slice_size`, since the entire slice is saved as-is.
289
379
  #
290
380
  # Note:
291
381
  # Not thread-safe. Only call from one thread at a time
292
- def upload_slice(slice)
293
- input.insert(slice)
382
+ def upload_slice(slice, category: :main)
383
+ input(category).insert(slice)
294
384
  count = slice.size
295
385
  self.record_count = (record_count || 0) + count
296
386
  count
@@ -353,56 +443,34 @@ module RocketJob
353
443
  def download(stream = nil, category: :main, header_line: nil, **args, &block)
354
444
  raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
355
445
 
356
- return output(category).download(header_line: header_line, &block) if block
446
+ category = output_category(category) unless category.is_a?(Category::Output)
447
+ output_collection = output(category)
448
+
449
+ # Store the output file name in the category
450
+ category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
357
451
 
358
- output_collection = output(category)
452
+ header_line ||= category.render_header
359
453
 
360
- if output_collection.binary?
361
- IOStreams.new(stream).stream(:none).writer(**args) do |io|
362
- raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
454
+ return output_collection.download(header_line: header_line, &block) if block
363
455
 
364
- output_collection.download { |record| io << record[:binary] }
456
+ raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
457
+
458
+ if output_collection.slice_class.binary_format
459
+ binary_header_line = output_collection.slice_class.to_binary(header_line) if header_line
460
+
461
+ # Don't overwrite supplied stream options if any
462
+ stream = stream&.is_a?(IOStreams::Stream) ? stream.dup : IOStreams.new(category.file_name)
463
+ stream.remove_from_pipeline(output_collection.slice_class.binary_format)
464
+ stream.writer(**args) do |io|
465
+ # TODO: Binary formats should return the record count, instead of the slice count.
466
+ output_collection.download(header_line: binary_header_line) { |record| io.write(record) }
365
467
  end
366
468
  else
367
- IOStreams.new(stream).writer(:line, **args) do |io|
469
+ IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
368
470
  output_collection.download(header_line: header_line) { |record| io << record }
369
471
  end
370
472
  end
371
473
  end
372
-
373
- # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
374
- #
375
- # If a block is supplied, the block is supplied with a writer that should be used to
376
- # accumulate the results.
377
- #
378
- # Examples
379
- #
380
- # job.write_output('hello world')
381
- #
382
- # job.write_output do |writer|
383
- # writer << 'hello world'
384
- # end
385
- #
386
- # job.write_output do |writer|
387
- # result = RocketJob::Batch::Results
388
- # result << RocketJob::Batch::Result.new(:main, 'hello world')
389
- # result << RocketJob::Batch::Result.new(:errors, 'errors')
390
- # writer << result
391
- # end
392
- #
393
- # result = RocketJob::Batch::Results
394
- # result << RocketJob::Batch::Result.new(:main, 'hello world')
395
- # result << RocketJob::Batch::Result.new(:errors, 'errors')
396
- # job.write_output(result)
397
- def write_output(result = nil, input_slice = nil, &block)
398
- if block
399
- RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
400
- else
401
- raise(ArgumentError, "result parameter is required when no block is supplied") unless result
402
-
403
- RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
404
- end
405
- end
406
474
  end
407
475
  end
408
476
  end
@@ -11,46 +11,6 @@ module RocketJob
11
11
  #
12
12
  # The following attributes are set when the job is created
13
13
 
14
- # Number of records to include in each slice that is processed
15
- # Note:
16
- # slice_size is only used by SlicedJob#upload & Sliced::Input#upload
17
- # When slices are supplied directly, their size is not modified to match this number
18
- field :slice_size, type: Integer, default: 100, class_attribute: true, user_editable: true, copy_on_restart: true
19
-
20
- # Whether to retain nil results.
21
- #
22
- # Only applicable if `collect_output` is `true`
23
- # Set to `false` to prevent collecting output from the perform
24
- # method when it returns `nil`.
25
- field :collect_nil_output, type: Boolean, default: true, class_attribute: true
26
-
27
- # Optional Array<Symbol> list of categories that this job can output to
28
- #
29
- # By using categories the output from #perform can be placed in different
30
- # output collections, and therefore different output files
31
- #
32
- # Categories must be declared in advance to avoid a #perform method
33
- # accidentally writing its results to an unknown category
34
- field :output_categories, type: Array, default: [:main], class_attribute: true
35
-
36
- # Optional Array<Symbol> list of categories that this job can load input data into
37
- field :input_categories, type: Array, default: [:main], class_attribute: true
38
-
39
- # The file name of the uploaded file, if any.
40
- # Set by #upload if a file name was supplied, but can also be set explicitly.
41
- # May or may not include the fully qualified path name.
42
- field :upload_file_name, type: String
43
-
44
- # Compress uploaded records.
45
- # The fields are not affected in any way, only the data stored in the
46
- # records and results collections will compressed
47
- field :compress, type: Object, default: false, class_attribute: true
48
-
49
- # Encrypt uploaded records.
50
- # The fields are not affected in any way, only the data stored in the
51
- # records and results collections will be encrypted
52
- field :encrypt, type: Object, default: false, class_attribute: true
53
-
54
14
  #
55
15
  # Values that jobs can also update during processing
56
16
  #
@@ -69,30 +29,7 @@ module RocketJob
69
29
 
70
30
  # Breaks the :running state up into multiple sub-states:
71
31
  # :running -> :before -> :processing -> :after -> :complete
72
- field :sub_state, type: Symbol
73
-
74
- validates_presence_of :slice_size
75
-
76
- validates_each :output_categories, :input_categories do |record, attr, value|
77
- # Under some circumstances ActiveModel is passing in a nil value even though the
78
- # attributes have default values
79
- Array(value).each do |category|
80
- record.errors.add(attr, "must only contain Symbol values") unless category.is_a?(Symbol)
81
- unless category.to_s =~ /\A[a-z_0-9]+\Z/
82
- record.errors.add(attr, "must only consist of lowercase characters, digits, and _")
83
- end
84
- end
85
- end
86
- end
87
-
88
- # Returns [true|false] whether the slices for this job are encrypted
89
- def encrypted?
90
- encrypt == true
91
- end
92
-
93
- # Returns [true|false] whether the slices for this job are compressed
94
- def compressed?
95
- compress == true
32
+ field :sub_state, type: Mongoid::StringifiedSymbol
96
33
  end
97
34
 
98
35
  # Returns [Integer] percent of records completed so far
@@ -102,10 +39,10 @@ module RocketJob
102
39
  return 0 unless record_count.to_i.positive?
103
40
 
104
41
  # Approximate number of input records
105
- input_records = input.count.to_f * slice_size
42
+ input_records = input.count.to_f * input_category.slice_size
106
43
  if input_records > record_count
107
44
  # Sanity check in case slice_size is not being adhered to
108
- 99
45
+ 0
109
46
  else
110
47
  ((1.0 - (input_records.to_f / record_count)) * 100).to_i
111
48
  end
@@ -120,6 +57,10 @@ module RocketJob
120
57
  h["active_slices"] = worker_count
121
58
  h["failed_slices"] = input.failed.count
122
59
  h["queued_slices"] = input.queued.count
60
+ output_categories.each do |category|
61
+ name_str = category.name == :main ? "" : "_#{category.name}"
62
+ h["output_slices#{name_str}"] = output(category).count
63
+ end
123
64
  # Very high level estimated time left
124
65
  if record_count && running? && record_count.positive?
125
66
  percent = percent_complete
@@ -129,10 +70,9 @@ module RocketJob
129
70
  end
130
71
  end
131
72
  elsif completed?
132
- secs = seconds.to_f
73
+ secs = seconds.to_f
133
74
  h["records_per_hour"] = ((record_count.to_f / secs) * 60 * 60).round if record_count&.positive? && (secs > 0.0)
134
75
  end
135
- h["output_slices"] = output.count if collect_output? && !completed?
136
76
  h.merge!(super(time_zone))
137
77
  h.delete("result")
138
78
  # Worker name should be retrieved from the slices when processing
@@ -172,6 +112,18 @@ module RocketJob
172
112
  @worker_count_last = Time.now.to_i
173
113
  @worker_count
174
114
  end
115
+
116
+ # @deprecated
117
+ # For backward compatibility
118
+ def upload_file_name
119
+ input_category.file_name
120
+ end
121
+
122
+ # @deprecated
123
+ # For backward compatibility
124
+ def upload_file_name=(upload_file_name)
125
+ input_category.file_name = upload_file_name
126
+ end
175
127
  end
176
128
  end
177
129
  end