rocketjob 5.4.1 → 6.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -5
- data/bin/rocketjob_batch_perf +1 -1
- data/bin/rocketjob_perf +1 -1
- data/lib/rocket_job/batch.rb +3 -0
- data/lib/rocket_job/batch/categories.rb +338 -0
- data/lib/rocket_job/batch/io.rb +128 -60
- data/lib/rocket_job/batch/model.rb +20 -68
- data/lib/rocket_job/batch/performance.rb +20 -8
- data/lib/rocket_job/batch/statistics.rb +35 -13
- data/lib/rocket_job/batch/tabular.rb +2 -0
- data/lib/rocket_job/batch/tabular/input.rb +8 -6
- data/lib/rocket_job/batch/tabular/output.rb +4 -2
- data/lib/rocket_job/batch/throttle_running_workers.rb +1 -5
- data/lib/rocket_job/batch/worker.rb +27 -24
- data/lib/rocket_job/category/base.rb +78 -0
- data/lib/rocket_job/category/input.rb +110 -0
- data/lib/rocket_job/category/output.rb +25 -0
- data/lib/rocket_job/cli.rb +24 -16
- data/lib/rocket_job/dirmon_entry.rb +22 -12
- data/lib/rocket_job/event.rb +1 -1
- data/lib/rocket_job/extensions/iostreams/path.rb +32 -0
- data/lib/rocket_job/extensions/mongoid/factory.rb +4 -12
- data/lib/rocket_job/extensions/mongoid/stringified_symbol.rb +50 -0
- data/lib/rocket_job/extensions/psych/yaml_tree.rb +8 -0
- data/lib/rocket_job/jobs/dirmon_job.rb +1 -1
- data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -6
- data/lib/rocket_job/jobs/on_demand_job.rb +1 -2
- data/lib/rocket_job/jobs/performance_job.rb +3 -1
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +5 -4
- data/lib/rocket_job/jobs/upload_file_job.rb +46 -9
- data/lib/rocket_job/lookup_collection.rb +68 -0
- data/lib/rocket_job/plugins/job/model.rb +25 -50
- data/lib/rocket_job/plugins/job/throttle_running_jobs.rb +1 -1
- data/lib/rocket_job/plugins/job/worker.rb +2 -7
- data/lib/rocket_job/plugins/restart.rb +12 -5
- data/lib/rocket_job/plugins/state_machine.rb +2 -1
- data/lib/rocket_job/ractor_worker.rb +42 -0
- data/lib/rocket_job/server/model.rb +1 -1
- data/lib/rocket_job/sliced.rb +15 -70
- data/lib/rocket_job/sliced/input.rb +1 -1
- data/lib/rocket_job/sliced/slice.rb +5 -13
- data/lib/rocket_job/sliced/slices.rb +14 -2
- data/lib/rocket_job/sliced/writer/output.rb +33 -44
- data/lib/rocket_job/subscribers/server.rb +1 -1
- data/lib/rocket_job/thread_worker.rb +46 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +21 -55
- data/lib/rocket_job/worker_pool.rb +5 -7
- data/lib/rocketjob.rb +52 -41
- metadata +35 -27
- data/lib/rocket_job/extensions/mongoid/remove_warnings.rb +0 -12
- data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +0 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2794f5dc5e0ada3ffdc3da9a13fd0cb6c5713f89254d93b69d60677283bc2d64
|
4
|
+
data.tar.gz: 3a208b181aca760b07432348bc2e51443a9da03cc6a143be81765ca2b3c0e37a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44816973f2f63dc300fe41e168ae485cd8b7def5e3bbab173501f6ef2935d3f65707ec4c2f9eb7cb6b43bab2d464b163f3e10216be50babf2dab5e82a7998439
|
7
|
+
data.tar.gz: c0b2d210a3bb3faa49f30eeaf687052ed79e9da802635c6434e374c4f1ccc3538a71a4db9391f41e18a6265106aa7fedeaf920edf4e9d11acf81c9bc632534bd
|
data/README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# Rocket Job
|
2
|
-
[![Gem Version](https://img.shields.io/gem/v/rocketjob.svg)](https://rubygems.org/gems/rocketjob) [![
|
2
|
+
[![Gem Version](https://img.shields.io/gem/v/rocketjob.svg)](https://rubygems.org/gems/rocketjob) [![Downloads](https://img.shields.io/gem/dt/rocketjob.svg)](https://rubygems.org/gems/rocketjob) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg) [![Support](https://img.shields.io/badge/IRC%20(gitter)-Support-brightgreen.svg)](https://gitter.im/rocketjob/support)
|
3
3
|
|
4
4
|
Ruby's missing batch system
|
5
5
|
|
@@ -17,11 +17,23 @@ Checkout https://rocketjob.io/
|
|
17
17
|
* Questions? Join the chat room on Gitter for [rocketjob support](https://gitter.im/rocketjob/support)
|
18
18
|
* [Report bugs](https://github.com/rocketjob/rocketjob/issues)
|
19
19
|
|
20
|
-
## Rocket Job
|
20
|
+
## Rocket Job v5
|
21
21
|
|
22
|
-
|
22
|
+
- Support for Ruby v3 and Rails 6.
|
23
|
+
- Multiple output file support through extended `output_categories` capability.
|
24
|
+
- File output formats for each category. For example: CSV, PSV, JSON, etc.
|
25
|
+
- Support for AWS DocumentDB as the data store.
|
26
|
+
- Removed use of Symbols to meet Symbol deprecation in MongoDB and Mongoid.
|
23
27
|
|
24
|
-
The
|
28
|
+
The following plugins have been deprecated and will be removed in Rocket Job v5.1
|
29
|
+
- RocketJob::Batch::Tabular::Input
|
30
|
+
- RocketJob::Batch::Tabular::Output
|
31
|
+
|
32
|
+
## Rocket Job v4
|
33
|
+
|
34
|
+
Rocket Job Pro is now open source and included in Rocket Job.
|
35
|
+
|
36
|
+
The `RocketJob::Batch` plugin now adds batch processing capabilities to break up a single task into many
|
25
37
|
concurrent workers processing slices of the entire job at the same time.
|
26
38
|
|
27
39
|
|
@@ -33,7 +45,9 @@ class MyJob < RocketJob::Job
|
|
33
45
|
|
34
46
|
self.description = "Reverse names"
|
35
47
|
self.destroy_on_complete = false
|
36
|
-
|
48
|
+
|
49
|
+
# Collect the output for this job in the default output category: `:main`
|
50
|
+
output_category
|
37
51
|
|
38
52
|
# Method to call by all available workers at the same time.
|
39
53
|
# Reverse the characters for each line:
|
data/bin/rocketjob_batch_perf
CHANGED
data/bin/rocketjob_perf
CHANGED
data/lib/rocket_job/batch.rb
CHANGED
@@ -7,6 +7,8 @@ require "rocket_job/batch/state_machine"
|
|
7
7
|
require "rocket_job/batch/throttle"
|
8
8
|
require "rocket_job/batch/throttle_running_workers"
|
9
9
|
require "rocket_job/batch/worker"
|
10
|
+
# Ensure after_perform is run first and #upload override is after IO#upload is defined.
|
11
|
+
require "rocket_job/batch/categories"
|
10
12
|
|
11
13
|
module RocketJob
|
12
14
|
module Batch
|
@@ -17,6 +19,7 @@ module RocketJob
|
|
17
19
|
include Callbacks
|
18
20
|
include Logger
|
19
21
|
include Worker
|
22
|
+
include Categories
|
20
23
|
include Throttle
|
21
24
|
include ThrottleRunningWorkers
|
22
25
|
include IO
|
@@ -0,0 +1,338 @@
|
|
1
|
+
require "active_support/concern"
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
module Batch
|
5
|
+
module Categories
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
after_initialize :rocketjob_categories_assign, if: :new_record?
|
10
|
+
after_initialize :rocketjob_categories_migrate, unless: :new_record?
|
11
|
+
before_perform :rocketjob_categories_input_render
|
12
|
+
after_perform :rocketjob_categories_output_render
|
13
|
+
|
14
|
+
# List of categories that this job can load input data into
|
15
|
+
embeds_many :input_categories, class_name: "RocketJob::Category::Input"
|
16
|
+
|
17
|
+
# List of categories that this job can save output data into
|
18
|
+
embeds_many :output_categories, class_name: "RocketJob::Category::Output"
|
19
|
+
|
20
|
+
# Internal attributes
|
21
|
+
class_attribute :defined_input_categories, instance_accessor: false, instance_predicate: false
|
22
|
+
class_attribute :defined_output_categories, instance_accessor: false, instance_predicate: false
|
23
|
+
|
24
|
+
# For RJMC to be able to edit jobs
|
25
|
+
accepts_nested_attributes_for :input_categories, :output_categories
|
26
|
+
end
|
27
|
+
|
28
|
+
module ClassMethods
|
29
|
+
# Define a new input category
|
30
|
+
# @see RocketJob::Category::Input
|
31
|
+
def input_category(**args)
|
32
|
+
category = RocketJob::Category::Input.new(**args)
|
33
|
+
if defined_input_categories.nil?
|
34
|
+
self.defined_input_categories = [category]
|
35
|
+
else
|
36
|
+
rocketjob_categories_set(category, defined_input_categories)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Define a new output category
|
41
|
+
# @see RocketJob::Category::Output
|
42
|
+
def output_category(**args)
|
43
|
+
category = RocketJob::Category::Output.new(**args)
|
44
|
+
if defined_output_categories.nil?
|
45
|
+
self.defined_output_categories = [category]
|
46
|
+
else
|
47
|
+
rocketjob_categories_set(category, defined_output_categories)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Builds this job instance from the supplied properties hash that may contain input and output categories.
|
52
|
+
# Keeps the defaults and merges in settings without replacing existing categories.
|
53
|
+
def from_properties(properties)
|
54
|
+
return super(properties) unless properties.key?("input_categories") || properties.key?("output_categories")
|
55
|
+
|
56
|
+
properties = properties.dup
|
57
|
+
input_categories = properties.delete("input_categories")
|
58
|
+
output_categories = properties.delete("output_categories")
|
59
|
+
job = super(properties)
|
60
|
+
job.merge_input_categories(input_categories)
|
61
|
+
job.merge_output_categories(output_categories)
|
62
|
+
job
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def rocketjob_categories_set(category, categories)
|
68
|
+
index = categories.find_index { |cat| cat.name == category.name }
|
69
|
+
index ? categories[index] = category : categories << category
|
70
|
+
category
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def input_category(category_name = :main)
|
75
|
+
category_name = category_name.to_sym
|
76
|
+
category = nil
|
77
|
+
# .find does not work against this association
|
78
|
+
input_categories.each { |catg| category = catg if catg.name == category_name }
|
79
|
+
unless category
|
80
|
+
# Auto-register main input category if missing
|
81
|
+
if category_name == :main
|
82
|
+
category = Category::Input.new
|
83
|
+
self.input_categories = [category]
|
84
|
+
else
|
85
|
+
raise(ArgumentError, "Unknown Input Category: #{category_name.inspect}. Registered categories: #{input_categories.collect(&:name).join(',')}")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
category
|
89
|
+
end
|
90
|
+
|
91
|
+
def output_category(category_name = :main)
|
92
|
+
category_name = category_name.to_sym
|
93
|
+
category = nil
|
94
|
+
# .find does not work against this association
|
95
|
+
output_categories.each { |catg| category = catg if catg.name == category_name }
|
96
|
+
unless category
|
97
|
+
raise(ArgumentError, "Unknown Output Category: #{category_name.inspect}. Registered categories: #{output_categories.collect(&:name).join(',')}")
|
98
|
+
end
|
99
|
+
category
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns [true|false] whether the named category has already been defined
|
103
|
+
def input_category?(category_name)
|
104
|
+
category_name = category_name.to_sym
|
105
|
+
# .find does not work against this association
|
106
|
+
input_categories.each { |catg| return true if catg.name == category_name }
|
107
|
+
false
|
108
|
+
end
|
109
|
+
|
110
|
+
def output_category?(category_name)
|
111
|
+
category_name = category_name.to_sym
|
112
|
+
# .find does not work against this association
|
113
|
+
output_categories.each { |catg| return true if catg.name == category_name }
|
114
|
+
false
|
115
|
+
end
|
116
|
+
|
117
|
+
def merge_input_categories(categories)
|
118
|
+
return if categories.blank?
|
119
|
+
|
120
|
+
categories.each do |properties|
|
121
|
+
category_name = (properties["name"] || properties[:name] || :main).to_sym
|
122
|
+
category = input_category(category_name)
|
123
|
+
properties.each { |key, value| category.public_send("#{key}=".to_sym, value) }
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def merge_output_categories(categories)
|
128
|
+
return if categories.blank?
|
129
|
+
|
130
|
+
categories.each do |properties|
|
131
|
+
category_name = (properties["name"] || properties[:name] || :main).to_sym
|
132
|
+
category = output_category(category_name)
|
133
|
+
properties.each { |key, value| category.public_send("#{key}=".to_sym, value) }
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def rocketjob_categories_assign
|
140
|
+
# Input categories defaults to :main if none was set in the class
|
141
|
+
if input_categories.empty?
|
142
|
+
self.input_categories =
|
143
|
+
if self.class.defined_input_categories
|
144
|
+
self.class.defined_input_categories.deep_dup
|
145
|
+
else
|
146
|
+
[RocketJob::Category::Input.new]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
return if !self.class.defined_output_categories || !output_categories.empty?
|
151
|
+
|
152
|
+
# Input categories defaults to nil if none was set in the class
|
153
|
+
self.output_categories = self.class.defined_output_categories.deep_dup
|
154
|
+
end
|
155
|
+
|
156
|
+
# Render the output from the perform.
|
157
|
+
def rocketjob_categories_output_render
|
158
|
+
return if @rocket_job_output.nil?
|
159
|
+
|
160
|
+
# TODO: ..
|
161
|
+
return unless output_categories
|
162
|
+
return if output_categories.empty?
|
163
|
+
|
164
|
+
@rocket_job_output = rocketjob_categories_output_render_row(@rocket_job_output)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Parse the input data before passing to the perform method
|
168
|
+
def rocketjob_categories_input_render
|
169
|
+
return if @rocket_job_input.nil?
|
170
|
+
|
171
|
+
@rocket_job_input = rocketjob_categories_input_render_row(@rocket_job_input)
|
172
|
+
end
|
173
|
+
|
174
|
+
def rocketjob_categories_input_render_row(row)
|
175
|
+
return if row.nil?
|
176
|
+
|
177
|
+
category = input_category
|
178
|
+
return row if category.nil? || !category.tabular?
|
179
|
+
return nil if row.blank?
|
180
|
+
|
181
|
+
tabular = category.tabular
|
182
|
+
|
183
|
+
# Return the row as-is if the required header has not yet been set.
|
184
|
+
if tabular.header?
|
185
|
+
raise(ArgumentError,
|
186
|
+
"The tabular header columns _must_ be set before attempting to parse data that requires it.")
|
187
|
+
end
|
188
|
+
|
189
|
+
tabular.record_parse(row)
|
190
|
+
end
|
191
|
+
|
192
|
+
def rocketjob_categories_output_render_row(row)
|
193
|
+
return if row.nil?
|
194
|
+
|
195
|
+
if row.is_a?(Batch::Result)
|
196
|
+
category = output_category(row.category)
|
197
|
+
row.value = category.tabular.render(row.value) if category.tabular?
|
198
|
+
return row
|
199
|
+
end
|
200
|
+
|
201
|
+
if row.is_a?(Batch::Results)
|
202
|
+
results = Batch::Results.new
|
203
|
+
row.each { |result| results << rocketjob_categories_output_render_row(result) }
|
204
|
+
return results
|
205
|
+
end
|
206
|
+
|
207
|
+
category = output_category
|
208
|
+
return row unless category.tabular?
|
209
|
+
return nil if row.blank?
|
210
|
+
|
211
|
+
category.tabular.render(row)
|
212
|
+
end
|
213
|
+
|
214
|
+
# Migrate existing v4 batch jobs to v5.0
|
215
|
+
def rocketjob_categories_migrate
|
216
|
+
return unless attribute_present?(:input_categories) && self[:input_categories]&.first.is_a?(Symbol)
|
217
|
+
|
218
|
+
serializer = :none
|
219
|
+
if attribute_present?(:compress)
|
220
|
+
serializer = :compress if self[:compress]
|
221
|
+
remove_attribute(:compress)
|
222
|
+
end
|
223
|
+
|
224
|
+
if attribute_present?(:encrypt)
|
225
|
+
serializer = :encrypt if self[:encrypt]
|
226
|
+
remove_attribute(:encrypt)
|
227
|
+
end
|
228
|
+
|
229
|
+
slice_size = 100
|
230
|
+
if attribute_present?(:slice_size)
|
231
|
+
slice_size = self[:slice_size].to_i
|
232
|
+
remove_attribute(:slice_size)
|
233
|
+
end
|
234
|
+
|
235
|
+
main_input_format = nil
|
236
|
+
main_input_mode = :line
|
237
|
+
main_input_columns = nil
|
238
|
+
# Only migrate tabular attributes if the job also removed the tabular plugin.
|
239
|
+
unless respond_to?(:tabular_input_render)
|
240
|
+
if attribute_present?(:tabular_input_format)
|
241
|
+
main_input_format = self[:tabular_input_format]
|
242
|
+
remove_attribute(:tabular_input_format)
|
243
|
+
end
|
244
|
+
|
245
|
+
if attribute_present?(:tabular_input_mode)
|
246
|
+
main_input_mode = self[:tabular_input_mode]
|
247
|
+
remove_attribute(:tabular_input_mode)
|
248
|
+
end
|
249
|
+
|
250
|
+
if attribute_present?(:tabular_input_header)
|
251
|
+
main_input_columns = self[:tabular_input_header]
|
252
|
+
remove_attribute(:tabular_input_header)
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
file_name = nil
|
257
|
+
if attribute_present?(:upload_file_name)
|
258
|
+
file_name = self[:upload_file_name]
|
259
|
+
remove_attribute(:upload_file_name)
|
260
|
+
end
|
261
|
+
|
262
|
+
existing = self[:input_categories]
|
263
|
+
self[:input_categories] = []
|
264
|
+
self[:input_categories] = existing.collect do |category_name|
|
265
|
+
RocketJob::Category::Input.new(
|
266
|
+
name: category_name,
|
267
|
+
file_name: file_name,
|
268
|
+
serializer: serializer,
|
269
|
+
slice_size: slice_size,
|
270
|
+
format: [:main, "main"].include?(category_name) ? main_input_format : nil,
|
271
|
+
columns: [:main, "main"].include?(category_name) ? main_input_columns : nil,
|
272
|
+
mode: [:main, "main"].include?(category_name) ? main_input_mode : nil
|
273
|
+
).as_document
|
274
|
+
end
|
275
|
+
|
276
|
+
collect_output = false
|
277
|
+
if attribute_present?(:collect_output)
|
278
|
+
collect_output = self[:collect_output]
|
279
|
+
remove_attribute(:collect_output)
|
280
|
+
end
|
281
|
+
|
282
|
+
collect_nil_output = true
|
283
|
+
if attribute_present?(:collect_nil_output)
|
284
|
+
collect_nil_output = self[:collect_nil_output]
|
285
|
+
remove_attribute(:collect_nil_output)
|
286
|
+
end
|
287
|
+
|
288
|
+
main_output_format = nil
|
289
|
+
main_output_columns = nil
|
290
|
+
main_output_options = nil
|
291
|
+
|
292
|
+
# Only migrate tabular attributes if the job also removed the tabular plugin.
|
293
|
+
unless respond_to?(:tabular_output_render)
|
294
|
+
if attribute_present?(:tabular_output_format)
|
295
|
+
main_output_format = self[:tabular_output_format]
|
296
|
+
remove_attribute(:tabular_output_format)
|
297
|
+
end
|
298
|
+
|
299
|
+
if attribute_present?(:tabular_output_header)
|
300
|
+
main_output_columns = self[:tabular_output_header]
|
301
|
+
remove_attribute(:tabular_output_header)
|
302
|
+
end
|
303
|
+
|
304
|
+
if attribute_present?(:tabular_output_options)
|
305
|
+
main_output_options = self[:tabular_output_options]
|
306
|
+
remove_attribute(:tabular_output_options)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
existing = self[:output_categories]
|
311
|
+
self[:output_categories] = []
|
312
|
+
if collect_output
|
313
|
+
if existing.blank?
|
314
|
+
self[:output_categories] = [
|
315
|
+
RocketJob::Category::Output.new(
|
316
|
+
nils: collect_nil_output,
|
317
|
+
format: main_output_format,
|
318
|
+
columns: main_output_columns,
|
319
|
+
format_options: main_output_options
|
320
|
+
).as_document
|
321
|
+
]
|
322
|
+
elsif existing.first.is_a?(Symbol)
|
323
|
+
self[:output_categories] = existing.collect do |category_name|
|
324
|
+
RocketJob::Category::Output.new(
|
325
|
+
name: category_name,
|
326
|
+
serializer: serializer,
|
327
|
+
nils: collect_nil_output,
|
328
|
+
format: [:main, "main"].include?(category_name) ? main_output_format : nil,
|
329
|
+
columns: [:main, "main"].include?(category_name) ? main_output_columns : nil,
|
330
|
+
format_options: [:main, "main"].include?(category_name) ? main_output_options : nil
|
331
|
+
).as_document
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -9,32 +9,68 @@ module RocketJob
|
|
9
9
|
# Returns [RocketJob::Sliced::Input] input collection for holding input slices
|
10
10
|
#
|
11
11
|
# Parameters:
|
12
|
-
# category [Symbol]
|
13
|
-
# The name of the category to access or upload data into
|
12
|
+
# category [Symbol|RocketJob::Category::Input]
|
13
|
+
# The category or the name of the category to access or upload data into
|
14
14
|
# Default: None ( Uses the single default input collection for this job )
|
15
15
|
# Validates: This value must be one of those listed in #input_categories
|
16
16
|
def input(category = :main)
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
raise(ArgumentError, "Cannot supply Output Category to input category") if category.is_a?(Category::Output)
|
18
|
+
|
19
|
+
category = input_category(category) unless category.is_a?(Category::Input)
|
20
20
|
|
21
|
-
(@inputs ||= {})[category] ||= RocketJob::Sliced.factory(:input, category, self)
|
21
|
+
(@inputs ||= {})[category.name] ||= RocketJob::Sliced.factory(:input, category, self)
|
22
22
|
end
|
23
23
|
|
24
24
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
25
25
|
# Returns nil if no output is being collected
|
26
26
|
#
|
27
27
|
# Parameters:
|
28
|
-
# category [Symbol]
|
29
|
-
# The name of the category to access or download data from
|
28
|
+
# category [Symbol|RocketJob::Category::Input]
|
29
|
+
# The category or the name of the category to access or download data from
|
30
30
|
# Default: None ( Uses the single default output collection for this job )
|
31
31
|
# Validates: This value must be one of those listed in #output_categories
|
32
32
|
def output(category = :main)
|
33
|
-
|
34
|
-
|
35
|
-
|
33
|
+
raise(ArgumentError, "Cannot supply Input Category to output category") if category.is_a?(Category::Input)
|
34
|
+
|
35
|
+
category = output_category(category) unless category.is_a?(Category::Output)
|
36
36
|
|
37
|
-
(@outputs ||= {})[category] ||= RocketJob::Sliced.factory(:output, category, self)
|
37
|
+
(@outputs ||= {})[category.name] ||= RocketJob::Sliced.factory(:output, category, self)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Rapidly upload individual records in batches.
|
41
|
+
#
|
42
|
+
# Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
|
43
|
+
# for each and every row.
|
44
|
+
#
|
45
|
+
# input_category(:my_lookup).find(id: 123).first
|
46
|
+
#
|
47
|
+
# Lookup collection.
|
48
|
+
#
|
49
|
+
# Upload side / secondary lookup tables that can be accessed during job processing.
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# lookup_collection(:my_lookup).upload do |io|
|
53
|
+
# io << {id: 123, data: "first record"}
|
54
|
+
# io << {id: 124, data: "second record"}
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# Parameters:
|
58
|
+
# category [Symbol|RocketJob::Category::Input]
|
59
|
+
# The category or the name of the category to access or download data from
|
60
|
+
# Default: None ( Uses the single default output collection for this job )
|
61
|
+
# Validates: This value must be one of those listed in #input_categories
|
62
|
+
def lookup_collection(category = :main)
|
63
|
+
category = input_category(category) unless category.is_a?(Category::Input)
|
64
|
+
|
65
|
+
collection = (@lookup_collections ||= {})[category.name]
|
66
|
+
|
67
|
+
unless collection
|
68
|
+
collection_name = "rocket_job.inputs.#{id}"
|
69
|
+
collection_name << ".#{category.name}" unless category.name == :main
|
70
|
+
|
71
|
+
@lookup_collections[category.name] ||=
|
72
|
+
LookupCollection.new(Sliced::Slice.collection.database, collection_name)
|
73
|
+
end
|
38
74
|
end
|
39
75
|
|
40
76
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
@@ -65,6 +101,11 @@ module RocketJob
|
|
65
101
|
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
66
102
|
# See IOStreams::Stream#each.
|
67
103
|
#
|
104
|
+
# category [Symbol|RocketJob::Category::Input]
|
105
|
+
# The category or the name of the category to access or download data from
|
106
|
+
# Default: None ( Uses the single default output collection for this job )
|
107
|
+
# Validates: This value must be one of those listed in #input_categories
|
108
|
+
#
|
68
109
|
# Example:
|
69
110
|
# # Load plain text records from a file
|
70
111
|
# job.upload('hello.csv')
|
@@ -116,22 +157,46 @@ module RocketJob
|
|
116
157
|
def upload(stream = nil, file_name: nil, category: :main, stream_mode: :line, on_first: nil, **args, &block)
|
117
158
|
raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
|
118
159
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
160
|
+
category = input_category(category) unless category.is_a?(Category::Input)
|
161
|
+
stream ||= category.file_name
|
162
|
+
path = nil
|
163
|
+
|
164
|
+
if stream
|
165
|
+
path = IOStreams.new(stream)
|
166
|
+
path.file_name = file_name if file_name
|
167
|
+
category.file_name = path.file_name
|
123
168
|
|
124
|
-
|
169
|
+
# Auto detect the format based on the upload file name if present.
|
170
|
+
if category.format == :auto
|
171
|
+
format = path.format
|
172
|
+
if format
|
173
|
+
# Rebuild tabular with the above file name
|
174
|
+
category.reset_tabular
|
175
|
+
category.format = format
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Tabular transformations required for upload?
|
181
|
+
if category.tabular?
|
182
|
+
# Remove non-printable characters from tabular input formats
|
183
|
+
# Cannot change the length of fixed width lines
|
184
|
+
replace = category.format == :fixed ? " " : ""
|
185
|
+
path&.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
|
186
|
+
|
187
|
+
# Extract the header line during the file upload when needed.
|
188
|
+
on_first = rocket_job_upload_header_lambda(category, on_first) if category.tabular.header?
|
189
|
+
end
|
190
|
+
|
191
|
+
count =
|
125
192
|
if block
|
126
193
|
input(category).upload(on_first: on_first, &block)
|
127
194
|
else
|
128
|
-
path = IOStreams.new(stream)
|
129
|
-
path.file_name = file_name if file_name
|
130
|
-
self.upload_file_name = path.file_name
|
131
195
|
input(category).upload(on_first: on_first) do |io|
|
132
196
|
path.each(stream_mode, **args) { |line| io << line }
|
133
197
|
end
|
134
198
|
end
|
199
|
+
|
135
200
|
self.record_count = (record_count || 0) + count
|
136
201
|
count
|
137
202
|
end
|
@@ -144,6 +209,9 @@ module RocketJob
|
|
144
209
|
# and uploaded into the job
|
145
210
|
# These columns are automatically added to the select list to reduce overhead
|
146
211
|
#
|
212
|
+
# category [Symbol|RocketJob::Category::Input]
|
213
|
+
# The category or the name of the category to upload to.
|
214
|
+
#
|
147
215
|
# If a Block is supplied it is passed the model returned from the database and should
|
148
216
|
# return the work item to be uploaded into the job.
|
149
217
|
#
|
@@ -221,7 +289,7 @@ module RocketJob
|
|
221
289
|
# for each slice processed
|
222
290
|
#
|
223
291
|
# Example
|
224
|
-
# job.slice_size = 100
|
292
|
+
# job.input_category.slice_size = 100
|
225
293
|
# job.upload_integer_range(200, 421)
|
226
294
|
#
|
227
295
|
# # Equivalent to calling:
|
@@ -253,7 +321,7 @@ module RocketJob
|
|
253
321
|
# in a database based on the id column
|
254
322
|
#
|
255
323
|
# Example
|
256
|
-
# job.slice_size = 100
|
324
|
+
# job.input_category.slice_size = 100
|
257
325
|
# job.upload_integer_range_in_reverse_order(200, 421)
|
258
326
|
#
|
259
327
|
# # Equivalent to calling:
|
@@ -285,12 +353,12 @@ module RocketJob
|
|
285
353
|
# For example the following types are not supported: Date
|
286
354
|
#
|
287
355
|
# Note:
|
288
|
-
# The caller should
|
356
|
+
# The caller should implement `:slice_size`, since the entire slice is saved as-is.
|
289
357
|
#
|
290
358
|
# Note:
|
291
359
|
# Not thread-safe. Only call from one thread at a time
|
292
|
-
def upload_slice(slice)
|
293
|
-
input.insert(slice)
|
360
|
+
def upload_slice(slice, category: :main)
|
361
|
+
input(category).insert(slice)
|
294
362
|
count = slice.size
|
295
363
|
self.record_count = (record_count || 0) + count
|
296
364
|
count
|
@@ -353,54 +421,54 @@ module RocketJob
|
|
353
421
|
def download(stream = nil, category: :main, header_line: nil, **args, &block)
|
354
422
|
raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
|
355
423
|
|
356
|
-
|
357
|
-
|
424
|
+
category = output_category(category) unless category.is_a?(Category::Output)
|
358
425
|
output_collection = output(category)
|
359
426
|
|
427
|
+
# Store the output file name in the category
|
428
|
+
category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
|
429
|
+
|
360
430
|
if output_collection.binary?
|
361
|
-
|
362
|
-
|
431
|
+
raise(ArgumentError, "A `header_line` is not supported with binary output collections") if header_line
|
432
|
+
|
433
|
+
return output_collection.download(&block) if block
|
363
434
|
|
435
|
+
IOStreams.new(stream || category.file_name).stream(:none).writer(**args) do |io|
|
364
436
|
output_collection.download { |record| io << record[:binary] }
|
365
437
|
end
|
366
438
|
else
|
367
|
-
|
439
|
+
header_line ||= category.render_header
|
440
|
+
|
441
|
+
return output_collection.download(header_line: header_line, &block) if block
|
442
|
+
|
443
|
+
raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
|
444
|
+
|
445
|
+
IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
|
368
446
|
output_collection.download(header_line: header_line) { |record| io << record }
|
369
447
|
end
|
370
448
|
end
|
371
449
|
end
|
372
450
|
|
373
|
-
|
374
|
-
#
|
375
|
-
# If a block is supplied, the block is supplied with a writer that should be used to
|
376
|
-
# accumulate the results.
|
377
|
-
#
|
378
|
-
# Examples
|
379
|
-
#
|
380
|
-
# job.write_output('hello world')
|
381
|
-
#
|
382
|
-
# job.write_output do |writer|
|
383
|
-
# writer << 'hello world'
|
384
|
-
# end
|
385
|
-
#
|
386
|
-
# job.write_output do |writer|
|
387
|
-
# result = RocketJob::Batch::Results
|
388
|
-
# result << RocketJob::Batch::Result.new(:main, 'hello world')
|
389
|
-
# result << RocketJob::Batch::Result.new(:errors, 'errors')
|
390
|
-
# writer << result
|
391
|
-
# end
|
392
|
-
#
|
393
|
-
# result = RocketJob::Batch::Results
|
394
|
-
# result << RocketJob::Batch::Result.new(:main, 'hello world')
|
395
|
-
# result << RocketJob::Batch::Result.new(:errors, 'errors')
|
396
|
-
# job.write_output(result)
|
397
|
-
def write_output(result = nil, input_slice = nil, &block)
|
398
|
-
if block
|
399
|
-
RocketJob::Sliced::Writer::Output.collect(self, input_slice, &block)
|
400
|
-
else
|
401
|
-
raise(ArgumentError, "result parameter is required when no block is supplied") unless result
|
451
|
+
private
|
402
452
|
|
403
|
-
|
453
|
+
# Return a lambda to extract the header row from the uploaded file.
|
454
|
+
def rocket_job_upload_header_lambda(category, on_first)
|
455
|
+
case category.mode
|
456
|
+
when :line
|
457
|
+
lambda do |line|
|
458
|
+
category.tabular.parse_header(line)
|
459
|
+
category.cleanse_header!
|
460
|
+
category.columns = category.tabular.header.columns
|
461
|
+
# Call chained on_first if present
|
462
|
+
on_first&.call(line)
|
463
|
+
end
|
464
|
+
when :array
|
465
|
+
lambda do |row|
|
466
|
+
category.tabular.header.columns = row
|
467
|
+
category.cleanse_header!
|
468
|
+
category.columns = category.tabular.header.columns
|
469
|
+
# Call chained on_first if present
|
470
|
+
on_first&.call(line)
|
471
|
+
end
|
404
472
|
end
|
405
473
|
end
|
406
474
|
end
|