rocketjob 5.4.1 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +175 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch/categories.rb +345 -0
  6. data/lib/rocket_job/batch/io.rb +174 -106
  7. data/lib/rocket_job/batch/model.rb +20 -68
  8. data/lib/rocket_job/batch/performance.rb +19 -7
  9. data/lib/rocket_job/batch/statistics.rb +34 -12
  10. data/lib/rocket_job/batch/throttle_running_workers.rb +2 -6
  11. data/lib/rocket_job/batch/worker.rb +31 -26
  12. data/lib/rocket_job/batch.rb +3 -1
  13. data/lib/rocket_job/category/base.rb +81 -0
  14. data/lib/rocket_job/category/input.rb +170 -0
  15. data/lib/rocket_job/category/output.rb +34 -0
  16. data/lib/rocket_job/cli.rb +25 -17
  17. data/lib/rocket_job/dirmon_entry.rb +23 -13
  18. data/lib/rocket_job/event.rb +1 -1
  19. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  20. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
  21. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  22. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  23. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  24. data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
  25. data/lib/rocket_job/jobs/conversion_job.rb +43 -0
  26. data/lib/rocket_job/jobs/dirmon_job.rb +25 -36
  27. data/lib/rocket_job/jobs/housekeeping_job.rb +11 -12
  28. data/lib/rocket_job/jobs/on_demand_batch_job.rb +24 -11
  29. data/lib/rocket_job/jobs/on_demand_job.rb +3 -4
  30. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  31. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
  32. data/lib/rocket_job/jobs/upload_file_job.rb +48 -8
  33. data/lib/rocket_job/lookup_collection.rb +69 -0
  34. data/lib/rocket_job/plugins/cron.rb +60 -20
  35. data/lib/rocket_job/plugins/job/model.rb +25 -50
  36. data/lib/rocket_job/plugins/job/persistence.rb +36 -0
  37. data/lib/rocket_job/plugins/job/throttle.rb +2 -2
  38. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
  39. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  40. data/lib/rocket_job/plugins/restart.rb +3 -103
  41. data/lib/rocket_job/plugins/state_machine.rb +4 -3
  42. data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +37 -0
  43. data/lib/rocket_job/ractor_worker.rb +42 -0
  44. data/lib/rocket_job/server/model.rb +1 -1
  45. data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
  46. data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
  47. data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
  48. data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
  49. data/lib/rocket_job/sliced/input.rb +42 -54
  50. data/lib/rocket_job/sliced/slice.rb +12 -16
  51. data/lib/rocket_job/sliced/slices.rb +26 -11
  52. data/lib/rocket_job/sliced/writer/input.rb +46 -18
  53. data/lib/rocket_job/sliced/writer/output.rb +33 -45
  54. data/lib/rocket_job/sliced.rb +1 -74
  55. data/lib/rocket_job/subscribers/server.rb +1 -1
  56. data/lib/rocket_job/thread_worker.rb +46 -0
  57. data/lib/rocket_job/throttle_definitions.rb +7 -1
  58. data/lib/rocket_job/version.rb +1 -1
  59. data/lib/rocket_job/worker.rb +21 -55
  60. data/lib/rocket_job/worker_pool.rb +5 -7
  61. data/lib/rocketjob.rb +53 -43
  62. metadata +36 -28
  63. data/lib/rocket_job/batch/tabular/input.rb +0 -131
  64. data/lib/rocket_job/batch/tabular/output.rb +0 -65
  65. data/lib/rocket_job/batch/tabular.rb +0 -56
  66. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  67. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
@@ -9,34 +9,66 @@ module RocketJob
9
9
  # Returns [RocketJob::Sliced::Input] input collection for holding input slices
10
10
  #
11
11
  # Parameters:
12
- # category [Symbol]
13
- # The name of the category to access or upload data into
12
+ # category [Symbol|RocketJob::Category::Input]
13
+ # The category or the name of the category to access or upload data into
14
14
  # Default: None ( Uses the single default input collection for this job )
15
15
  # Validates: This value must be one of those listed in #input_categories
16
16
  def input(category = :main)
17
- unless input_categories.include?(category) || (category == :main)
18
- raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
- end
17
+ category = input_category(category)
20
18
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
19
+ (@inputs ||= {})[category.name] ||= category.data_store(self)
22
20
  end
23
21
 
24
22
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
25
23
  # Returns nil if no output is being collected
26
24
  #
27
25
  # Parameters:
28
- # category [Symbol]
29
- # The name of the category to access or download data from
26
+ # category [Symbol|RocketJob::Category::Input]
27
+ # The category or the name of the category to access or download data from
30
28
  # Default: None ( Uses the single default output collection for this job )
31
29
  # Validates: This value must be one of those listed in #output_categories
32
30
  def output(category = :main)
33
- unless output_categories.include?(category) || (category == :main)
34
- raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
- end
31
+ category = output_category(category)
36
32
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
33
+ (@outputs ||= {})[category.name] ||= category.data_store(self)
38
34
  end
39
35
 
36
+ # Rapidly upload individual records in batches.
37
+ #
38
+ # Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
39
+ # for each and every row.
40
+ #
41
+ # input_category(:my_lookup).find(id: 123).first
42
+ #
43
+ # Lookup collection.
44
+ #
45
+ # Upload side / secondary lookup tables that can be accessed during job processing.
46
+ #
47
+ # Example:
48
+ # lookup_collection(:my_lookup).upload do |io|
49
+ # io << {id: 123, data: "first record"}
50
+ # io << {id: 124, data: "second record"}
51
+ # end
52
+ #
53
+ # Parameters:
54
+ # category [Symbol|RocketJob::Category::Input]
55
+ # The category or the name of the category to access or download data from
56
+ # Default: None ( Uses the single default output collection for this job )
57
+ # Validates: This value must be one of those listed in #input_categories
58
+ # def lookup_collection(category = :main)
59
+ # category = input_category(category) unless category.is_a?(Category::Input)
60
+ #
61
+ # collection = (@lookup_collections ||= {})[category.name]
62
+ #
63
+ # unless collection
64
+ # collection_name = "rocket_job.inputs.#{id}"
65
+ # collection_name << ".#{category.name}" unless category.name == :main
66
+ #
67
+ # @lookup_collections[category.name] ||=
68
+ # LookupCollection.new(Sliced::Slice.collection.database, collection_name)
69
+ # end
70
+ # end
71
+
40
72
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
41
73
  #
42
74
  # Returns [Integer] the number of records uploaded.
@@ -65,6 +97,11 @@ module RocketJob
65
97
  # Parses each line from the file into a Hash and uploads each hash for processing by workers.
66
98
  # See IOStreams::Stream#each.
67
99
  #
100
+ # category [Symbol|RocketJob::Category::Input]
101
+ # The category or the name of the category to access or download data from
102
+ # Default: None ( Uses the single default output collection for this job )
103
+ # Validates: This value must be one of those listed in #input_categories
104
+ #
68
105
  # Example:
69
106
  # # Load plain text records from a file
70
107
  # job.upload('hello.csv')
@@ -113,29 +150,7 @@ module RocketJob
113
150
  # * If an io stream is supplied, it is read until it returns nil.
114
151
  # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
115
152
  # * CSV parsing is slow, so it is usually left for the workers to do.
116
- def upload(stream = nil, file_name: nil, category: :main, stream_mode: :line, on_first: nil, **args, &block)
117
- raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
118
-
119
- stream_mode = stream_mode.to_sym
120
- # Backward compatibility with existing v4 jobs
121
- stream_mode = :array if stream_mode == :row
122
- stream_mode = :hash if stream_mode == :record
123
-
124
- count =
125
- if block
126
- input(category).upload(on_first: on_first, &block)
127
- else
128
- path = IOStreams.new(stream)
129
- path.file_name = file_name if file_name
130
- self.upload_file_name = path.file_name
131
- input(category).upload(on_first: on_first) do |io|
132
- path.each(stream_mode, **args) { |line| io << line }
133
- end
134
- end
135
- self.record_count = (record_count || 0) + count
136
- count
137
- end
138
-
153
+ #
139
154
  # Upload results from an Arel into RocketJob::SlicedJob.
140
155
  #
141
156
  # Params
@@ -144,6 +159,9 @@ module RocketJob
144
159
  # and uploaded into the job
145
160
  # These columns are automatically added to the select list to reduce overhead
146
161
  #
162
+ # category [Symbol|RocketJob::Category::Input]
163
+ # The category or the name of the category to upload to.
164
+ #
147
165
  # If a Block is supplied it is passed the model returned from the database and should
148
166
  # return the work item to be uploaded into the job.
149
167
  #
@@ -159,18 +177,13 @@ module RocketJob
159
177
  #
160
178
  # Example: Upload user_name and zip_code
161
179
  # arel = User.where(country_code: 'US')
162
- # job.upload_arel(arel, :user_name, :zip_code)
180
+ # job.upload_arel(arel, columns: [:user_name, :zip_code])
163
181
  #
164
182
  # Notes:
165
183
  # * Only call from one thread at a time against a single instance of this job.
166
184
  # * The record_count for the job is set to the number of records returned by the arel.
167
185
  # * If an exception is raised while uploading data, the input collection is cleared out
168
186
  # so that if a job is retried during an upload failure, data is not duplicated.
169
- def upload_arel(arel, *column_names, category: :main, &block)
170
- count = input(category).upload_arel(arel, *column_names, &block)
171
- self.record_count = (record_count || 0) + count
172
- count
173
- end
174
187
 
175
188
  # Upload the result of a MongoDB query to the input collection for processing
176
189
  # Useful when an entire MongoDB collection, or part thereof needs to be
@@ -198,30 +211,25 @@ module RocketJob
198
211
  # criteria = User.where(state: 'FL')
199
212
  # job.record_count = job.upload_mongo_query(criteria)
200
213
  #
201
- # Example: Upload just the supplied column
214
+ # Example: Upload only the specified column(s)
202
215
  # criteria = User.where(state: 'FL')
203
- # job.record_count = job.upload_mongo_query(criteria, :zip_code)
216
+ # job.record_count = job.upload_mongo_query(criteria, columns: [:zip_code])
204
217
  #
205
218
  # Notes:
206
219
  # * Only call from one thread at a time against a single instance of this job.
207
220
  # * The record_count for the job is set to the number of records returned by the monqo query.
208
221
  # * If an exception is raised while uploading data, the input collection is cleared out
209
222
  # so that if a job is retried during an upload failure, data is not duplicated.
210
- def upload_mongo_query(criteria, *column_names, category: :main, &block)
211
- count = input(category).upload_mongo_query(criteria, *column_names, &block)
212
- self.record_count = (record_count || 0) + count
213
- count
214
- end
215
223
 
216
224
  # Upload sliced range of integer requests as arrays of start and end ids.
217
225
  #
218
- # Returns [Integer] last_id - start_id + 1.
226
+ # Returns [Integer] the number of slices uploaded.
219
227
  #
220
228
  # Uploads one range per slice so that the response can return multiple records
221
229
  # for each slice processed
222
230
  #
223
231
  # Example
224
- # job.slice_size = 100
232
+ # job.input_category.slice_size = 100
225
233
  # job.upload_integer_range(200, 421)
226
234
  #
227
235
  # # Equivalent to calling:
@@ -234,17 +242,11 @@ module RocketJob
234
242
  # * The record_count for the job is set to: last_id - start_id + 1.
235
243
  # * If an exception is raised while uploading data, the input collection is cleared out
236
244
  # so that if a job is retried during an upload failure, data is not duplicated.
237
- def upload_integer_range(start_id, last_id, category: :main)
238
- input(category).upload_integer_range(start_id, last_id)
239
- count = last_id - start_id + 1
240
- self.record_count = (record_count || 0) + count
241
- count
242
- end
243
245
 
244
246
  # Upload sliced range of integer requests as an arrays of start and end ids
245
247
  # starting with the last range first
246
248
  #
247
- # Returns [Integer] last_id - start_id + 1.
249
+ # Returns [Integer] the number of slices uploaded.
248
250
  #
249
251
  # Uploads one range per slice so that the response can return multiple records
250
252
  # for each slice processed.
@@ -253,7 +255,7 @@ module RocketJob
253
255
  # in a database based on the id column
254
256
  #
255
257
  # Example
256
- # job.slice_size = 100
258
+ # job.input_category.slice_size = 100
257
259
  # job.upload_integer_range_in_reverse_order(200, 421)
258
260
  #
259
261
  # # Equivalent to calling:
@@ -266,14 +268,102 @@ module RocketJob
266
268
  # * The record_count for the job is set to: last_id - start_id + 1.
267
269
  # * If an exception is raised while uploading data, the input collection is cleared out
268
270
  # so that if a job is retried during an upload failure, data is not duplicated.
269
- def upload_integer_range_in_reverse_order(start_id, last_id, category: :main)
270
- input(category).upload_integer_range_in_reverse_order(start_id, last_id)
271
- count = last_id - start_id + 1
271
+
272
+ def upload(object = nil, category: :main, file_name: nil, stream_mode: nil, on_first: nil, columns: nil, slice_batch_size: nil, **args, &block)
273
+ input_collection = input(category)
274
+
275
+ if block
276
+ raise(ArgumentError, "Cannot supply both an object to upload, and a block.") if object
277
+ if stream_mode || columns || slice_batch_size || args.size > 0
278
+ raise(ArgumentError, "Unknown keyword arguments when uploading a block. Only accepts :category, :file_name, or :on_first")
279
+ end
280
+
281
+ category = input_category(category)
282
+ category.file_name = file_name if file_name
283
+
284
+ # Extract the header line during the upload when applicable.
285
+ extract_header = category.extract_header_callback(on_first)
286
+
287
+ count = input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size, &block)
288
+ self.record_count = (record_count || 0) + count
289
+ return count
290
+ end
291
+
292
+ count =
293
+ case object
294
+ when Range
295
+ if file_name || stream_mode || on_first || args.size > 0
296
+ raise(ArgumentError, "Unknown keyword arguments when uploading a Range. Only accepts :category, :columns, or :slice_batch_size")
297
+ end
298
+
299
+ first = object.first
300
+ last = object.last
301
+ if first < last
302
+ input_collection.upload_integer_range(first, last, slice_batch_size: slice_batch_size || 1_000)
303
+ else
304
+ input_collection.upload_integer_range_in_reverse_order(last, first, slice_batch_size: slice_batch_size || 1_000)
305
+ end
306
+ when Mongoid::Criteria
307
+ if file_name || stream_mode || on_first || args.size > 0
308
+ raise(ArgumentError, "Unknown keyword arguments when uploading a Mongoid::Criteria. Only accepts :category, :columns, or :slice_batch_size")
309
+ end
310
+
311
+ input_collection.upload_mongo_query(object, columns: columns, slice_batch_size: slice_batch_size, &block)
312
+ when defined?(ActiveRecord::Relation) ? ActiveRecord::Relation : false
313
+ if file_name || stream_mode || on_first || args.size > 0
314
+ raise(ArgumentError, "Unknown keyword arguments when uploading an ActiveRecord::Relation. Only accepts :category, :columns, or :slice_batch_size")
315
+ end
316
+
317
+ input_collection.upload_arel(object, columns: columns, slice_batch_size: slice_batch_size, &block)
318
+
319
+ else
320
+ raise(ArgumentError, "Unknown keyword argument :columns when uploading a file") if columns
321
+
322
+ category = input_category(category)
323
+
324
+ # Extract the header line during the upload when applicable.
325
+ extract_header = category.extract_header_callback(on_first)
326
+ path = category.upload_path(object, original_file_name: file_name)
327
+
328
+ input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size) do |io|
329
+ path.each(stream_mode || :line, **args) { |line| io << line }
330
+ end
331
+
332
+ end
333
+
334
+ self.record_count = (record_count || 0) + count
335
+ count
336
+ end
337
+
338
+ # @deprecated
339
+ def upload_arel(arel, *column_names, category: :main, &block)
340
+ count = input(category).upload_arel(arel, columns: column_names, &block)
341
+ self.record_count = (record_count || 0) + count
342
+ count
343
+ end
344
+
345
+ # @deprecated
346
+ def upload_mongo_query(criteria, *column_names, category: :main, &block)
347
+ count = input(category).upload_mongo_query(criteria, columns: column_names, &block)
272
348
  self.record_count = (record_count || 0) + count
273
349
  count
274
350
  end
275
351
 
276
- # Upload the supplied slices for processing by workers
352
+ # @deprecated
353
+ def upload_integer_range(start_id, last_id, category: :main, slice_batch_size: 1_000)
354
+ count = input(category).upload_integer_range(start_id, last_id, slice_batch_size: slice_batch_size)
355
+ self.record_count = (record_count || 0) + count
356
+ count
357
+ end
358
+
359
+ # @deprecated
360
+ def upload_integer_range_in_reverse_order(start_id, last_id, category: :main, slice_batch_size: 1_000)
361
+ count = input(category).upload_integer_range_in_reverse_order(start_id, last_id, slice_batch_size: slice_batch_size)
362
+ self.record_count = (record_count || 0) + count
363
+ count
364
+ end
365
+
366
+ # Upload the supplied slice for processing by workers
277
367
  #
278
368
  # Updates the record_count after adding the records
279
369
  #
@@ -285,12 +375,12 @@ module RocketJob
285
375
  # For example the following types are not supported: Date
286
376
  #
287
377
  # Note:
288
- # The caller should honor `:slice_size`, the entire slice is loaded as-is.
378
+ # The caller should implement `:slice_size`, since the entire slice is saved as-is.
289
379
  #
290
380
  # Note:
291
381
  # Not thread-safe. Only call from one thread at a time
292
- def upload_slice(slice)
293
- input.insert(slice)
382
+ def upload_slice(slice, category: :main)
383
+ input(category).insert(slice)
294
384
  count = slice.size
295
385
  self.record_count = (record_count || 0) + count
296
386
  count
@@ -353,56 +443,34 @@ module RocketJob
353
443
  def download(stream = nil, category: :main, header_line: nil, **args, &block)
354
444
  raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
355
445
 
356
- return output(category).download(header_line: header_line, &block) if block
446
+ category = output_category(category) unless category.is_a?(Category::Output)
447
+ output_collection = output(category)
448
+
449
+ # Store the output file name in the category
450
+ category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
357
451
 
358
- output_collection = output(category)
452
+ header_line ||= category.render_header
359
453
 
360
- if output_collection.binary?
361
- IOStreams.new(stream).stream(:none).writer(**args) do |io|
362
- raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
454
+ return output_collection.download(header_line: header_line, &block) if block
363
455
 
364
- output_collection.download { |record| io << record[:binary] }
456
+ raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
457
+
458
+ if output_collection.slice_class.binary_format
459
+ binary_header_line = output_collection.slice_class.to_binary(header_line) if header_line
460
+
461
+ # Don't overwrite supplied stream options if any
462
+ stream = stream&.is_a?(IOStreams::Stream) ? stream.dup : IOStreams.new(category.file_name)
463
+ stream.remove_from_pipeline(output_collection.slice_class.binary_format)
464
+ stream.writer(**args) do |io|
465
+ # TODO: Binary formats should return the record count, instead of the slice count.
466
+ output_collection.download(header_line: binary_header_line) { |record| io.write(record) }
365
467
  end
366
468
  else
367
- IOStreams.new(stream).writer(:line, **args) do |io|
469
+ IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
368
470
  output_collection.download(header_line: header_line) { |record| io << record }
369
471
  end
370
472
  end
371
473
  end
372
-
373
- # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
374
- #
375
- # If a block is supplied, the block is supplied with a writer that should be used to
376
- # accumulate the results.
377
- #
378
- # Examples
379
- #
380
- # job.write_output('hello world')
381
- #
382
- # job.write_output do |writer|
383
- # writer << 'hello world'
384
- # end
385
- #
386
- # job.write_output do |writer|
387
- # result = RocketJob::Batch::Results
388
- # result << RocketJob::Batch::Result.new(:main, 'hello world')
389
- # result << RocketJob::Batch::Result.new(:errors, 'errors')
390
- # writer << result
391
- # end
392
- #
393
- # result = RocketJob::Batch::Results
394
- # result << RocketJob::Batch::Result.new(:main, 'hello world')
395
- # result << RocketJob::Batch::Result.new(:errors, 'errors')
396
- # job.write_output(result)
397
- def write_output(result = nil, input_slice = nil, &block)
398
- if block
399
- RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
400
- else
401
- raise(ArgumentError, "result parameter is required when no block is supplied") unless result
402
-
403
- RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
404
- end
405
- end
406
474
  end
407
475
  end
408
476
  end
@@ -11,46 +11,6 @@ module RocketJob
11
11
  #
12
12
  # The following attributes are set when the job is created
13
13
 
14
- # Number of records to include in each slice that is processed
15
- # Note:
16
- # slice_size is only used by SlicedJob#upload & Sliced::Input#upload
17
- # When slices are supplied directly, their size is not modified to match this number
18
- field :slice_size, type: Integer, default: 100, class_attribute: true, user_editable: true, copy_on_restart: true
19
-
20
- # Whether to retain nil results.
21
- #
22
- # Only applicable if `collect_output` is `true`
23
- # Set to `false` to prevent collecting output from the perform
24
- # method when it returns `nil`.
25
- field :collect_nil_output, type: Boolean, default: true, class_attribute: true
26
-
27
- # Optional Array<Symbol> list of categories that this job can output to
28
- #
29
- # By using categories the output from #perform can be placed in different
30
- # output collections, and therefore different output files
31
- #
32
- # Categories must be declared in advance to avoid a #perform method
33
- # accidentally writing its results to an unknown category
34
- field :output_categories, type: Array, default: [:main], class_attribute: true
35
-
36
- # Optional Array<Symbol> list of categories that this job can load input data into
37
- field :input_categories, type: Array, default: [:main], class_attribute: true
38
-
39
- # The file name of the uploaded file, if any.
40
- # Set by #upload if a file name was supplied, but can also be set explicitly.
41
- # May or may not include the fully qualified path name.
42
- field :upload_file_name, type: String
43
-
44
- # Compress uploaded records.
45
- # The fields are not affected in any way, only the data stored in the
46
- # records and results collections will compressed
47
- field :compress, type: Object, default: false, class_attribute: true
48
-
49
- # Encrypt uploaded records.
50
- # The fields are not affected in any way, only the data stored in the
51
- # records and results collections will be encrypted
52
- field :encrypt, type: Object, default: false, class_attribute: true
53
-
54
14
  #
55
15
  # Values that jobs can also update during processing
56
16
  #
@@ -69,30 +29,7 @@ module RocketJob
69
29
 
70
30
  # Breaks the :running state up into multiple sub-states:
71
31
  # :running -> :before -> :processing -> :after -> :complete
72
- field :sub_state, type: Symbol
73
-
74
- validates_presence_of :slice_size
75
-
76
- validates_each :output_categories, :input_categories do |record, attr, value|
77
- # Under some circumstances ActiveModel is passing in a nil value even though the
78
- # attributes have default values
79
- Array(value).each do |category|
80
- record.errors.add(attr, "must only contain Symbol values") unless category.is_a?(Symbol)
81
- unless category.to_s =~ /\A[a-z_0-9]+\Z/
82
- record.errors.add(attr, "must only consist of lowercase characters, digits, and _")
83
- end
84
- end
85
- end
86
- end
87
-
88
- # Returns [true|false] whether the slices for this job are encrypted
89
- def encrypted?
90
- encrypt == true
91
- end
92
-
93
- # Returns [true|false] whether the slices for this job are compressed
94
- def compressed?
95
- compress == true
32
+ field :sub_state, type: Mongoid::StringifiedSymbol
96
33
  end
97
34
 
98
35
  # Returns [Integer] percent of records completed so far
@@ -102,10 +39,10 @@ module RocketJob
102
39
  return 0 unless record_count.to_i.positive?
103
40
 
104
41
  # Approximate number of input records
105
- input_records = input.count.to_f * slice_size
42
+ input_records = input.count.to_f * input_category.slice_size
106
43
  if input_records > record_count
107
44
  # Sanity check in case slice_size is not being adhered to
108
- 99
45
+ 0
109
46
  else
110
47
  ((1.0 - (input_records.to_f / record_count)) * 100).to_i
111
48
  end
@@ -120,6 +57,10 @@ module RocketJob
120
57
  h["active_slices"] = worker_count
121
58
  h["failed_slices"] = input.failed.count
122
59
  h["queued_slices"] = input.queued.count
60
+ output_categories.each do |category|
61
+ name_str = category.name == :main ? "" : "_#{category.name}"
62
+ h["output_slices#{name_str}"] = output(category).count
63
+ end
123
64
  # Very high level estimated time left
124
65
  if record_count && running? && record_count.positive?
125
66
  percent = percent_complete
@@ -129,10 +70,9 @@ module RocketJob
129
70
  end
130
71
  end
131
72
  elsif completed?
132
- secs = seconds.to_f
73
+ secs = seconds.to_f
133
74
  h["records_per_hour"] = ((record_count.to_f / secs) * 60 * 60).round if record_count&.positive? && (secs > 0.0)
134
75
  end
135
- h["output_slices"] = output.count if collect_output? && !completed?
136
76
  h.merge!(super(time_zone))
137
77
  h.delete("result")
138
78
  # Worker name should be retrieved from the slices when processing
@@ -172,6 +112,18 @@ module RocketJob
172
112
  @worker_count_last = Time.now.to_i
173
113
  @worker_count
174
114
  end
115
+
116
+ # @deprecated
117
+ # For backward compatibility
118
+ def upload_file_name
119
+ input_category.file_name
120
+ end
121
+
122
+ # @deprecated
123
+ # For backward compatibility
124
+ def upload_file_name=(upload_file_name)
125
+ input_category.file_name = upload_file_name
126
+ end
175
127
  end
176
128
  end
177
129
  end