rocketjob 5.4.0.beta1 → 6.0.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +19 -5
  3. data/bin/rocketjob_batch_perf +1 -1
  4. data/bin/rocketjob_perf +1 -1
  5. data/lib/rocket_job/batch.rb +3 -0
  6. data/lib/rocket_job/batch/categories.rb +341 -0
  7. data/lib/rocket_job/batch/io.rb +128 -60
  8. data/lib/rocket_job/batch/model.rb +20 -68
  9. data/lib/rocket_job/batch/performance.rb +19 -7
  10. data/lib/rocket_job/batch/statistics.rb +34 -12
  11. data/lib/rocket_job/batch/tabular.rb +2 -0
  12. data/lib/rocket_job/batch/tabular/input.rb +8 -6
  13. data/lib/rocket_job/batch/tabular/output.rb +4 -2
  14. data/lib/rocket_job/batch/throttle_running_workers.rb +8 -17
  15. data/lib/rocket_job/batch/worker.rb +27 -24
  16. data/lib/rocket_job/category/base.rb +78 -0
  17. data/lib/rocket_job/category/input.rb +110 -0
  18. data/lib/rocket_job/category/output.rb +25 -0
  19. data/lib/rocket_job/cli.rb +25 -17
  20. data/lib/rocket_job/dirmon_entry.rb +22 -12
  21. data/lib/rocket_job/event.rb +1 -1
  22. data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
  23. data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
  24. data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
  25. data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
  26. data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
  27. data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
  28. data/lib/rocket_job/jobs/dirmon_job.rb +2 -2
  29. data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
  30. data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -6
  31. data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
  32. data/lib/rocket_job/jobs/performance_job.rb +3 -1
  33. data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -96
  34. data/lib/rocket_job/jobs/upload_file_job.rb +44 -8
  35. data/lib/rocket_job/lookup_collection.rb +69 -0
  36. data/lib/rocket_job/plugins/job/model.rb +25 -50
  37. data/lib/rocket_job/plugins/job/throttle.rb +2 -2
  38. data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +12 -4
  39. data/lib/rocket_job/plugins/job/worker.rb +2 -7
  40. data/lib/rocket_job/plugins/restart.rb +12 -5
  41. data/lib/rocket_job/plugins/state_machine.rb +2 -1
  42. data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +38 -0
  43. data/lib/rocket_job/ractor_worker.rb +42 -0
  44. data/lib/rocket_job/server/model.rb +1 -1
  45. data/lib/rocket_job/sliced.rb +15 -70
  46. data/lib/rocket_job/sliced/bzip2_output_slice.rb +2 -2
  47. data/lib/rocket_job/sliced/input.rb +1 -1
  48. data/lib/rocket_job/sliced/slice.rb +5 -13
  49. data/lib/rocket_job/sliced/slices.rb +14 -2
  50. data/lib/rocket_job/sliced/writer/output.rb +33 -45
  51. data/lib/rocket_job/subscribers/server.rb +1 -1
  52. data/lib/rocket_job/thread_worker.rb +46 -0
  53. data/lib/rocket_job/throttle_definitions.rb +7 -1
  54. data/lib/rocket_job/version.rb +1 -1
  55. data/lib/rocket_job/worker.rb +21 -55
  56. data/lib/rocket_job/worker_pool.rb +5 -7
  57. data/lib/rocketjob.rb +53 -43
  58. metadata +35 -26
  59. data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
  60. data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55ab6ca2b3f76cdb4ddf679a2d3e88e1d6a6f3106a69349129a267b629ef4a53
4
- data.tar.gz: 4e0d07878fb4265179b4a270650cc9b89ca4bca55f1d8f9a3451cb3064062c35
3
+ metadata.gz: 6568c1307b7d42a0968df0335e3177967a8223838eceecbe0b3c0cab72c398af
4
+ data.tar.gz: 1109611ffce2fe4aed881f5bd9d017681546f06d62330ffc21873adbef12c179
5
5
  SHA512:
6
- metadata.gz: fdc7ae3827d987404f431410cb81161fbfa269dfe7575a4a86a6abf362c59c6acd0d2c7e9856273ced396eeaf20e50561f1be10a7de0d5c4ae45e5648d15d083
7
- data.tar.gz: 7bb9e9ac90569e78e135293efcd357a0d75037b5417f5f530f18ae3f17f44acf28d7de5b4aeef880a0315117f2992623958a4fb4c9df93ca7273369ac052e759
6
+ metadata.gz: 073adf2196d6d0cfd5c06ad8776374a1a14618b5fbd49a550e9d8df587b0a17fd95b4dad34a60d8812a2c5f4046974e22bcf616512d77442ea28145d9bd374d2
7
+ data.tar.gz: ee9b6d35149f7d7799071485f2e4032d47303d3452a4b7c70c5422019a66615938dba90e1e27090a4f33b635df6f565871efd3152f5a2f2a5cb03a61168b3755
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  # Rocket Job
2
- [![Gem Version](https://img.shields.io/gem/v/rocketjob.svg)](https://rubygems.org/gems/rocketjob) [![Build Status](https://travis-ci.org/rocketjob/rocketjob.svg?branch=master)](https://travis-ci.org/rocketjob/rocketjob) [![Downloads](https://img.shields.io/gem/dt/rocketjob.svg)](https://rubygems.org/gems/semantic_logger) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg) [![Support](https://img.shields.io/badge/IRC%20(gitter)-Support-brightgreen.svg)](https://gitter.im/rocketjob/support)
2
+ [![Gem Version](https://img.shields.io/gem/v/rocketjob.svg)](https://rubygems.org/gems/rocketjob) [![Downloads](https://img.shields.io/gem/dt/rocketjob.svg)](https://rubygems.org/gems/rocketjob) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg) [![Support](https://img.shields.io/badge/IRC%20(gitter)-Support-brightgreen.svg)](https://gitter.im/rocketjob/support)
3
3
 
4
4
  Ruby's missing batch system
5
5
 
@@ -17,11 +17,23 @@ Checkout https://rocketjob.io/
17
17
  * Questions? Join the chat room on Gitter for [rocketjob support](https://gitter.im/rocketjob/support)
18
18
  * [Report bugs](https://github.com/rocketjob/rocketjob/issues)
19
19
 
20
- ## Rocket Job 4
20
+ ## Rocket Job v5
21
21
 
22
- Rocket Job Pro is now open sourced and included within Rocket Job.
22
+ - Support for Ruby v3 and Rails 6.
23
+ - Multiple output file support through extended `output_categories` capability.
24
+ - File output formats for each category. For example: CSV, PSV, JSON, etc.
25
+ - Support for AWS DocumentDB as the data store.
26
+ - Removed use of Symbols to meet Symbol deprecation in MongoDB and Mongoid.
23
27
 
24
- The `RocketJob::Batch` plugin now adds batch processing capabilites to break up a single task into many
28
+ The following plugins have been deprecated and will be removed in Rocket Job v5.1
29
+ - RocketJob::Batch::Tabular::Input
30
+ - RocketJob::Batch::Tabular::Output
31
+
32
+ ## Rocket Job v4
33
+
34
+ Rocket Job Pro is now open source and included in Rocket Job.
35
+
36
+ The `RocketJob::Batch` plugin now adds batch processing capabilities to break up a single task into many
25
37
  concurrent workers processing slices of the entire job at the same time.
26
38
 
27
39
 
@@ -33,7 +45,9 @@ class MyJob < RocketJob::Job
33
45
 
34
46
  self.description = "Reverse names"
35
47
  self.destroy_on_complete = false
36
- self.collect_output = true
48
+
49
+ # Collect the output for this job in the default output category: `:main`
50
+ output_category
37
51
 
38
52
  # Method to call by all available workers at the same time.
39
53
  # Reverse the characters for each line:
@@ -2,7 +2,7 @@
2
2
  require "rocketjob_batch"
3
3
 
4
4
  # Log to console
5
- SemanticLogger.add_appender(io: STDOUT, formatter: :color)
5
+ SemanticLogger.add_appender(io: $stdout, formatter: :color)
6
6
 
7
7
  perf = RocketJob::Batch::Performance.new
8
8
  perf.parse(ARGV)
data/bin/rocketjob_perf CHANGED
@@ -2,7 +2,7 @@
2
2
  require "rocketjob"
3
3
 
4
4
  # Log to console
5
- SemanticLogger.add_appender(io: STDOUT, formatter: :color)
5
+ SemanticLogger.add_appender(io: $stdout, formatter: :color)
6
6
 
7
7
  perf = RocketJob::Performance.new
8
8
  perf.parse(ARGV)
@@ -7,6 +7,8 @@ require "rocket_job/batch/state_machine"
7
7
  require "rocket_job/batch/throttle"
8
8
  require "rocket_job/batch/throttle_running_workers"
9
9
  require "rocket_job/batch/worker"
10
+ # Ensure after_perform is run first and #upload override is after IO#upload is defined.
11
+ require "rocket_job/batch/categories"
10
12
 
11
13
  module RocketJob
12
14
  module Batch
@@ -17,6 +19,7 @@ module RocketJob
17
19
  include Callbacks
18
20
  include Logger
19
21
  include Worker
22
+ include Categories
20
23
  include Throttle
21
24
  include ThrottleRunningWorkers
22
25
  include IO
@@ -0,0 +1,341 @@
1
+ require "active_support/concern"
2
+
3
+ module RocketJob
4
+ module Batch
5
+ module Categories
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ after_initialize :rocketjob_categories_assign, if: :new_record?
10
+ after_initialize :rocketjob_categories_migrate, unless: :new_record?
11
+ before_perform :rocketjob_categories_input_render
12
+ after_perform :rocketjob_categories_output_render
13
+
14
+ # List of categories that this job can load input data into
15
+ embeds_many :input_categories, class_name: "RocketJob::Category::Input"
16
+
17
+ # List of categories that this job can save output data into
18
+ embeds_many :output_categories, class_name: "RocketJob::Category::Output"
19
+
20
+ # Internal attributes
21
+ class_attribute :defined_input_categories, instance_accessor: false, instance_predicate: false
22
+ class_attribute :defined_output_categories, instance_accessor: false, instance_predicate: false
23
+
24
+ # For RJMC to be able to edit jobs
25
+ accepts_nested_attributes_for :input_categories, :output_categories
26
+ end
27
+
28
+ module ClassMethods
29
+ # Define a new input category
30
+ # @see RocketJob::Category::Input
31
+ def input_category(**args)
32
+ category = RocketJob::Category::Input.new(**args)
33
+ if defined_input_categories.nil?
34
+ self.defined_input_categories = [category]
35
+ else
36
+ rocketjob_categories_set(category, defined_input_categories)
37
+ end
38
+ end
39
+
40
+ # Define a new output category
41
+ # @see RocketJob::Category::Output
42
+ def output_category(**args)
43
+ category = RocketJob::Category::Output.new(**args)
44
+ if defined_output_categories.nil?
45
+ self.defined_output_categories = [category]
46
+ else
47
+ rocketjob_categories_set(category, defined_output_categories)
48
+ end
49
+ end
50
+
51
+ # Builds this job instance from the supplied properties hash that may contain input and output categories.
52
+ # Keeps the defaults and merges in settings without replacing existing categories.
53
+ def from_properties(properties)
54
+ return super(properties) unless properties.key?("input_categories") || properties.key?("output_categories")
55
+
56
+ properties = properties.dup
57
+ input_categories = properties.delete("input_categories")
58
+ output_categories = properties.delete("output_categories")
59
+ job = super(properties)
60
+ job.merge_input_categories(input_categories)
61
+ job.merge_output_categories(output_categories)
62
+ job
63
+ end
64
+
65
+ private
66
+
67
+ def rocketjob_categories_set(category, categories)
68
+ index = categories.find_index { |cat| cat.name == category.name }
69
+ index ? categories[index] = category : categories << category
70
+ category
71
+ end
72
+ end
73
+
74
+ def input_category(category_name = :main)
75
+ category_name = category_name.to_sym
76
+ category = nil
77
+ # .find does not work against this association
78
+ input_categories.each { |catg| category = catg if catg.name == category_name }
79
+ unless category
80
+ # Auto-register main input category if missing
81
+ if category_name == :main
82
+ category = Category::Input.new
83
+ self.input_categories = [category]
84
+ else
85
+ raise(ArgumentError,
86
+ "Unknown Input Category: #{category_name.inspect}. Registered categories: #{input_categories.collect(&:name).join(',')}")
87
+ end
88
+ end
89
+ category
90
+ end
91
+
92
+ def output_category(category_name = :main)
93
+ category_name = category_name.to_sym
94
+ category = nil
95
+ # .find does not work against this association
96
+ output_categories.each { |catg| category = catg if catg.name == category_name }
97
+ unless category
98
+ raise(ArgumentError,
99
+ "Unknown Output Category: #{category_name.inspect}. Registered categories: #{output_categories.collect(&:name).join(',')}")
100
+ end
101
+
102
+ category
103
+ end
104
+
105
+ # Returns [true|false] whether the named category has already been defined
106
+ def input_category?(category_name)
107
+ category_name = category_name.to_sym
108
+ # .find does not work against this association
109
+ input_categories.each { |catg| return true if catg.name == category_name }
110
+ false
111
+ end
112
+
113
+ def output_category?(category_name)
114
+ category_name = category_name.to_sym
115
+ # .find does not work against this association
116
+ output_categories.each { |catg| return true if catg.name == category_name }
117
+ false
118
+ end
119
+
120
+ def merge_input_categories(categories)
121
+ return if categories.blank?
122
+
123
+ categories.each do |properties|
124
+ category_name = (properties["name"] || properties[:name] || :main).to_sym
125
+ category = input_category(category_name)
126
+ properties.each { |key, value| category.public_send("#{key}=".to_sym, value) }
127
+ end
128
+ end
129
+
130
+ def merge_output_categories(categories)
131
+ return if categories.blank?
132
+
133
+ categories.each do |properties|
134
+ category_name = (properties["name"] || properties[:name] || :main).to_sym
135
+ category = output_category(category_name)
136
+ properties.each { |key, value| category.public_send("#{key}=".to_sym, value) }
137
+ end
138
+ end
139
+
140
+ private
141
+
142
+ def rocketjob_categories_assign
143
+ # Input categories defaults to :main if none was set in the class
144
+ if input_categories.empty?
145
+ self.input_categories =
146
+ if self.class.defined_input_categories
147
+ self.class.defined_input_categories.deep_dup
148
+ else
149
+ [RocketJob::Category::Input.new]
150
+ end
151
+ end
152
+
153
+ return if !self.class.defined_output_categories || !output_categories.empty?
154
+
155
+ # Input categories defaults to nil if none was set in the class
156
+ self.output_categories = self.class.defined_output_categories.deep_dup
157
+ end
158
+
159
+ # Render the output from the perform.
160
+ def rocketjob_categories_output_render
161
+ return if @rocket_job_output.nil?
162
+
163
+ # TODO: ..
164
+ return unless output_categories
165
+ return if output_categories.empty?
166
+
167
+ @rocket_job_output = rocketjob_categories_output_render_row(@rocket_job_output)
168
+ end
169
+
170
+ # Parse the input data before passing to the perform method
171
+ def rocketjob_categories_input_render
172
+ return if @rocket_job_input.nil?
173
+
174
+ @rocket_job_input = rocketjob_categories_input_render_row(@rocket_job_input)
175
+ end
176
+
177
+ def rocketjob_categories_input_render_row(row)
178
+ return if row.nil?
179
+
180
+ category = input_category
181
+ return row if category.nil? || !category.tabular?
182
+ return nil if row.blank?
183
+
184
+ tabular = category.tabular
185
+
186
+ # Return the row as-is if the required header has not yet been set.
187
+ if tabular.header?
188
+ raise(ArgumentError,
189
+ "The tabular header columns _must_ be set before attempting to parse data that requires it.")
190
+ end
191
+
192
+ tabular.record_parse(row)
193
+ end
194
+
195
+ def rocketjob_categories_output_render_row(row)
196
+ return if row.nil?
197
+
198
+ if row.is_a?(Batch::Result)
199
+ category = output_category(row.category)
200
+ row.value = category.tabular.render(row.value) if category.tabular?
201
+ return row
202
+ end
203
+
204
+ if row.is_a?(Batch::Results)
205
+ results = Batch::Results.new
206
+ row.each { |result| results << rocketjob_categories_output_render_row(result) }
207
+ return results
208
+ end
209
+
210
+ category = output_category
211
+ return row unless category.tabular?
212
+ return nil if row.blank?
213
+
214
+ category.tabular.render(row)
215
+ end
216
+
217
+ # Migrate existing v4 batch jobs to v5.0
218
+ def rocketjob_categories_migrate
219
+ return unless attribute_present?(:input_categories) && self[:input_categories]&.first.is_a?(Symbol)
220
+
221
+ serializer = :none
222
+ if attribute_present?(:compress)
223
+ serializer = :compress if self[:compress]
224
+ remove_attribute(:compress)
225
+ end
226
+
227
+ if attribute_present?(:encrypt)
228
+ serializer = :encrypt if self[:encrypt]
229
+ remove_attribute(:encrypt)
230
+ end
231
+
232
+ slice_size = 100
233
+ if attribute_present?(:slice_size)
234
+ slice_size = self[:slice_size].to_i
235
+ remove_attribute(:slice_size)
236
+ end
237
+
238
+ main_input_format = nil
239
+ main_input_mode = :line
240
+ main_input_columns = nil
241
+ # Only migrate tabular attributes if the job also removed the tabular plugin.
242
+ unless respond_to?(:tabular_input_render)
243
+ if attribute_present?(:tabular_input_format)
244
+ main_input_format = self[:tabular_input_format]
245
+ remove_attribute(:tabular_input_format)
246
+ end
247
+
248
+ if attribute_present?(:tabular_input_mode)
249
+ main_input_mode = self[:tabular_input_mode]
250
+ remove_attribute(:tabular_input_mode)
251
+ end
252
+
253
+ if attribute_present?(:tabular_input_header)
254
+ main_input_columns = self[:tabular_input_header]
255
+ remove_attribute(:tabular_input_header)
256
+ end
257
+ end
258
+
259
+ file_name = nil
260
+ if attribute_present?(:upload_file_name)
261
+ file_name = self[:upload_file_name]
262
+ remove_attribute(:upload_file_name)
263
+ end
264
+
265
+ existing = self[:input_categories]
266
+ self[:input_categories] = []
267
+ self[:input_categories] = existing.collect do |category_name|
268
+ RocketJob::Category::Input.new(
269
+ name: category_name,
270
+ file_name: file_name,
271
+ serializer: serializer,
272
+ slice_size: slice_size,
273
+ format: [:main, "main"].include?(category_name) ? main_input_format : nil,
274
+ columns: [:main, "main"].include?(category_name) ? main_input_columns : nil,
275
+ mode: [:main, "main"].include?(category_name) ? main_input_mode : nil
276
+ ).as_document
277
+ end
278
+
279
+ collect_output = false
280
+ if attribute_present?(:collect_output)
281
+ collect_output = self[:collect_output]
282
+ remove_attribute(:collect_output)
283
+ end
284
+
285
+ collect_nil_output = true
286
+ if attribute_present?(:collect_nil_output)
287
+ collect_nil_output = self[:collect_nil_output]
288
+ remove_attribute(:collect_nil_output)
289
+ end
290
+
291
+ main_output_format = nil
292
+ main_output_columns = nil
293
+ main_output_options = nil
294
+
295
+ # Only migrate tabular attributes if the job also removed the tabular plugin.
296
+ unless respond_to?(:tabular_output_render)
297
+ if attribute_present?(:tabular_output_format)
298
+ main_output_format = self[:tabular_output_format]
299
+ remove_attribute(:tabular_output_format)
300
+ end
301
+
302
+ if attribute_present?(:tabular_output_header)
303
+ main_output_columns = self[:tabular_output_header]
304
+ remove_attribute(:tabular_output_header)
305
+ end
306
+
307
+ if attribute_present?(:tabular_output_options)
308
+ main_output_options = self[:tabular_output_options]
309
+ remove_attribute(:tabular_output_options)
310
+ end
311
+ end
312
+
313
+ existing = self[:output_categories]
314
+ self[:output_categories] = []
315
+ if collect_output
316
+ if existing.blank?
317
+ self[:output_categories] = [
318
+ RocketJob::Category::Output.new(
319
+ nils: collect_nil_output,
320
+ format: main_output_format,
321
+ columns: main_output_columns,
322
+ format_options: main_output_options
323
+ ).as_document
324
+ ]
325
+ elsif existing.first.is_a?(Symbol)
326
+ self[:output_categories] = existing.collect do |category_name|
327
+ RocketJob::Category::Output.new(
328
+ name: category_name,
329
+ serializer: serializer,
330
+ nils: collect_nil_output,
331
+ format: [:main, "main"].include?(category_name) ? main_output_format : nil,
332
+ columns: [:main, "main"].include?(category_name) ? main_output_columns : nil,
333
+ format_options: [:main, "main"].include?(category_name) ? main_output_options : nil
334
+ ).as_document
335
+ end
336
+ end
337
+ end
338
+ end
339
+ end
340
+ end
341
+ end
@@ -9,32 +9,68 @@ module RocketJob
9
9
  # Returns [RocketJob::Sliced::Input] input collection for holding input slices
10
10
  #
11
11
  # Parameters:
12
- # category [Symbol]
13
- # The name of the category to access or upload data into
12
+ # category [Symbol|RocketJob::Category::Input]
13
+ # The category or the name of the category to access or upload data into
14
14
  # Default: None ( Uses the single default input collection for this job )
15
15
  # Validates: This value must be one of those listed in #input_categories
16
16
  def input(category = :main)
17
- unless input_categories.include?(category) || (category == :main)
18
- raise "Category #{category.inspect}, must be registered in input_categories: #{input_categories.inspect}"
19
- end
17
+ raise(ArgumentError, "Cannot supply Output Category to input category") if category.is_a?(Category::Output)
18
+
19
+ category = input_category(category) unless category.is_a?(Category::Input)
20
20
 
21
- (@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
21
+ (@inputs ||= {})[category.name] ||= RocketJob::Sliced.factory(:input, category, self)
22
22
  end
23
23
 
24
24
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
25
25
  # Returns nil if no output is being collected
26
26
  #
27
27
  # Parameters:
28
- # category [Symbol]
29
- # The name of the category to access or download data from
28
+ # category [Symbol|RocketJob::Category::Input]
29
+ # The category or the name of the category to access or download data from
30
30
  # Default: None ( Uses the single default output collection for this job )
31
31
  # Validates: This value must be one of those listed in #output_categories
32
32
  def output(category = :main)
33
- unless output_categories.include?(category) || (category == :main)
34
- raise "Category #{category.inspect}, must be registered in output_categories: #{output_categories.inspect}"
35
- end
33
+ raise(ArgumentError, "Cannot supply Input Category to output category") if category.is_a?(Category::Input)
34
+
35
+ category = output_category(category) unless category.is_a?(Category::Output)
36
+
37
+ (@outputs ||= {})[category.name] ||= RocketJob::Sliced.factory(:output, category, self)
38
+ end
39
+
40
+ # Rapidly upload individual records in batches.
41
+ #
42
+ # Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
43
+ # for each and every row.
44
+ #
45
+ # input_category(:my_lookup).find(id: 123).first
46
+ #
47
+ # Lookup collection.
48
+ #
49
+ # Upload side / secondary lookup tables that can be accessed during job processing.
50
+ #
51
+ # Example:
52
+ # lookup_collection(:my_lookup).upload do |io|
53
+ # io << {id: 123, data: "first record"}
54
+ # io << {id: 124, data: "second record"}
55
+ # end
56
+ #
57
+ # Parameters:
58
+ # category [Symbol|RocketJob::Category::Input]
59
+ # The category or the name of the category to access or download data from
60
+ # Default: None ( Uses the single default output collection for this job )
61
+ # Validates: This value must be one of those listed in #input_categories
62
+ def lookup_collection(category = :main)
63
+ category = input_category(category) unless category.is_a?(Category::Input)
36
64
 
37
- (@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
65
+ collection = (@lookup_collections ||= {})[category.name]
66
+
67
+ unless collection
68
+ collection_name = "rocket_job.inputs.#{id}"
69
+ collection_name << ".#{category.name}" unless category.name == :main
70
+
71
+ @lookup_collections[category.name] ||=
72
+ LookupCollection.new(Sliced::Slice.collection.database, collection_name)
73
+ end
38
74
  end
39
75
 
40
76
  # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
@@ -65,6 +101,11 @@ module RocketJob
65
101
  # Parses each line from the file into a Hash and uploads each hash for processing by workers.
66
102
  # See IOStreams::Stream#each.
67
103
  #
104
+ # category [Symbol|RocketJob::Category::Input]
105
+ # The category or the name of the category to access or download data from
106
+ # Default: None ( Uses the single default output collection for this job )
107
+ # Validates: This value must be one of those listed in #input_categories
108
+ #
68
109
  # Example:
69
110
  # # Load plain text records from a file
70
111
  # job.upload('hello.csv')
@@ -116,22 +157,46 @@ module RocketJob
116
157
  def upload(stream = nil, file_name: nil, category: :main, stream_mode: :line, on_first: nil, **args, &block)
117
158
  raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
118
159
 
119
- stream_mode = stream_mode.to_sym
120
- # Backward compatibility with existing v4 jobs
121
- stream_mode = :array if stream_mode == :row
122
- stream_mode = :hash if stream_mode == :record
160
+ category = input_category(category) unless category.is_a?(Category::Input)
161
+ stream ||= category.file_name
162
+ path = nil
123
163
 
124
- count =
164
+ if stream
165
+ path = IOStreams.new(stream)
166
+ path.file_name = file_name if file_name
167
+ category.file_name = path.file_name
168
+
169
+ # Auto detect the format based on the upload file name if present.
170
+ if category.format == :auto
171
+ format = path.format
172
+ if format
173
+ # Rebuild tabular with the above file name
174
+ category.reset_tabular
175
+ category.format = format
176
+ end
177
+ end
178
+ end
179
+
180
+ # Tabular transformations required for upload?
181
+ if category.tabular?
182
+ # Remove non-printable characters from tabular input formats
183
+ # Cannot change the length of fixed width lines
184
+ replace = category.format == :fixed ? " " : ""
185
+ path&.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
186
+
187
+ # Extract the header line during the file upload when needed.
188
+ on_first = rocket_job_upload_header_lambda(category, on_first) if category.tabular.header?
189
+ end
190
+
191
+ count =
125
192
  if block
126
193
  input(category).upload(on_first: on_first, &block)
127
194
  else
128
- path = IOStreams.new(stream)
129
- path.file_name = file_name if file_name
130
- self.upload_file_name = path.file_name
131
195
  input(category).upload(on_first: on_first) do |io|
132
196
  path.each(stream_mode, **args) { |line| io << line }
133
197
  end
134
198
  end
199
+
135
200
  self.record_count = (record_count || 0) + count
136
201
  count
137
202
  end
@@ -144,6 +209,9 @@ module RocketJob
144
209
  # and uploaded into the job
145
210
  # These columns are automatically added to the select list to reduce overhead
146
211
  #
212
+ # category [Symbol|RocketJob::Category::Input]
213
+ # The category or the name of the category to upload to.
214
+ #
147
215
  # If a Block is supplied it is passed the model returned from the database and should
148
216
  # return the work item to be uploaded into the job.
149
217
  #
@@ -221,7 +289,7 @@ module RocketJob
221
289
  # for each slice processed
222
290
  #
223
291
  # Example
224
- # job.slice_size = 100
292
+ # job.input_category.slice_size = 100
225
293
  # job.upload_integer_range(200, 421)
226
294
  #
227
295
  # # Equivalent to calling:
@@ -253,7 +321,7 @@ module RocketJob
253
321
  # in a database based on the id column
254
322
  #
255
323
  # Example
256
- # job.slice_size = 100
324
+ # job.input_category.slice_size = 100
257
325
  # job.upload_integer_range_in_reverse_order(200, 421)
258
326
  #
259
327
  # # Equivalent to calling:
@@ -285,12 +353,12 @@ module RocketJob
285
353
  # For example the following types are not supported: Date
286
354
  #
287
355
  # Note:
288
- # The caller should honor `:slice_size`, the entire slice is loaded as-is.
356
+ # The caller should implement `:slice_size`, since the entire slice is saved as-is.
289
357
  #
290
358
  # Note:
291
359
  # Not thread-safe. Only call from one thread at a time
292
- def upload_slice(slice)
293
- input.insert(slice)
360
+ def upload_slice(slice, category: :main)
361
+ input(category).insert(slice)
294
362
  count = slice.size
295
363
  self.record_count = (record_count || 0) + count
296
364
  count
@@ -353,54 +421,54 @@ module RocketJob
353
421
  def download(stream = nil, category: :main, header_line: nil, **args, &block)
354
422
  raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
355
423
 
356
- return output(category).download(header_line: header_line, &block) if block
424
+ category = output_category(category) unless category.is_a?(Category::Output)
425
+ output_collection = output(category)
357
426
 
358
- output_collection = output(category)
427
+ # Store the output file name in the category
428
+ category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
359
429
 
360
430
  if output_collection.binary?
361
- IOStreams.new(stream).stream(:none).writer(**args) do |io|
362
- raise(ArgumenError, "A `header_line` is not supported with binary output collections") if header_line
431
+ raise(ArgumentError, "A `header_line` is not supported with binary output collections") if header_line
432
+
433
+ return output_collection.download(&block) if block
363
434
 
435
+ IOStreams.new(stream || category.file_name).stream(:none).writer(**args) do |io|
364
436
  output_collection.download { |record| io << record[:binary] }
365
437
  end
366
438
  else
367
- IOStreams.new(stream).writer(:line, **args) do |io|
439
+ header_line ||= category.render_header
440
+
441
+ return output_collection.download(header_line: header_line, &block) if block
442
+
443
+ raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
444
+
445
+ IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
368
446
  output_collection.download(header_line: header_line) { |record| io << record }
369
447
  end
370
448
  end
371
449
  end
372
450
 
373
- # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
374
- #
375
- # If a block is supplied, the block is supplied with a writer that should be used to
376
- # accumulate the results.
377
- #
378
- # Examples
379
- #
380
- # job.write_output('hello world')
381
- #
382
- # job.write_output do |writer|
383
- # writer << 'hello world'
384
- # end
385
- #
386
- # job.write_output do |writer|
387
- # result = RocketJob::Batch::Results
388
- # result << RocketJob::Batch::Result.new(:main, 'hello world')
389
- # result << RocketJob::Batch::Result.new(:errors, 'errors')
390
- # writer << result
391
- # end
392
- #
393
- # result = RocketJob::Batch::Results
394
- # result << RocketJob::Batch::Result.new(:main, 'hello world')
395
- # result << RocketJob::Batch::Result.new(:errors, 'errors')
396
- # job.write_output(result)
397
- def write_output(result = nil, input_slice = nil, &block)
398
- if block
399
- RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
400
- else
401
- raise(ArgumentError, "result parameter is required when no block is supplied") unless result
451
+ private
402
452
 
403
- RocketJob::Sliced::Writer::Output.collect(self, input_slice) { |writer| writer << result }
453
+ # Return a lambda to extract the header row from the uploaded file.
454
+ def rocket_job_upload_header_lambda(category, on_first)
455
+ case category.mode
456
+ when :line
457
+ lambda do |line|
458
+ category.tabular.parse_header(line)
459
+ category.cleanse_header!
460
+ category.columns = category.tabular.header.columns
461
+ # Call chained on_first if present
462
+ on_first&.call(line)
463
+ end
464
+ when :array
465
+ lambda do |row|
466
+ category.tabular.header.columns = row
467
+ category.cleanse_header!
468
+ category.columns = category.tabular.header.columns
469
+ # Call chained on_first if present
470
+ on_first&.call(line)
471
+ end
404
472
  end
405
473
  end
406
474
  end