rocketjob 6.0.0.rc1 → 6.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +164 -8
- data/lib/rocket_job/batch/categories.rb +25 -18
- data/lib/rocket_job/batch/io.rb +130 -130
- data/lib/rocket_job/batch/performance.rb +2 -2
- data/lib/rocket_job/batch/statistics.rb +2 -2
- data/lib/rocket_job/batch/throttle_running_workers.rb +1 -1
- data/lib/rocket_job/batch/worker.rb +14 -12
- data/lib/rocket_job/batch.rb +0 -1
- data/lib/rocket_job/category/base.rb +10 -7
- data/lib/rocket_job/category/input.rb +61 -1
- data/lib/rocket_job/category/output.rb +9 -0
- data/lib/rocket_job/cli.rb +1 -1
- data/lib/rocket_job/dirmon_entry.rb +1 -1
- data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +2 -2
- data/lib/rocket_job/extensions/rocket_job_adapter.rb +2 -2
- data/lib/rocket_job/job_exception.rb +1 -1
- data/lib/rocket_job/jobs/conversion_job.rb +43 -0
- data/lib/rocket_job/jobs/dirmon_job.rb +24 -35
- data/lib/rocket_job/jobs/housekeeping_job.rb +4 -5
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +15 -11
- data/lib/rocket_job/jobs/on_demand_job.rb +2 -2
- data/lib/rocket_job/jobs/re_encrypt/relational_job.rb +103 -97
- data/lib/rocket_job/jobs/upload_file_job.rb +6 -3
- data/lib/rocket_job/lookup_collection.rb +4 -3
- data/lib/rocket_job/plugins/cron.rb +60 -20
- data/lib/rocket_job/plugins/job/persistence.rb +36 -0
- data/lib/rocket_job/plugins/job/throttle.rb +2 -2
- data/lib/rocket_job/plugins/restart.rb +3 -110
- data/lib/rocket_job/plugins/state_machine.rb +2 -2
- data/lib/rocket_job/plugins/throttle_dependent_jobs.rb +43 -0
- data/lib/rocket_job/sliced/bzip2_output_slice.rb +18 -19
- data/lib/rocket_job/sliced/compressed_slice.rb +3 -6
- data/lib/rocket_job/sliced/encrypted_bzip2_output_slice.rb +49 -0
- data/lib/rocket_job/sliced/encrypted_slice.rb +4 -6
- data/lib/rocket_job/sliced/input.rb +42 -54
- data/lib/rocket_job/sliced/slice.rb +7 -3
- data/lib/rocket_job/sliced/slices.rb +12 -9
- data/lib/rocket_job/sliced/writer/input.rb +46 -18
- data/lib/rocket_job/sliced/writer/output.rb +0 -1
- data/lib/rocket_job/sliced.rb +1 -19
- data/lib/rocket_job/throttle_definitions.rb +7 -1
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocketjob.rb +4 -5
- metadata +12 -12
- data/lib/rocket_job/batch/tabular/input.rb +0 -133
- data/lib/rocket_job/batch/tabular/output.rb +0 -67
- data/lib/rocket_job/batch/tabular.rb +0 -58
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 305189df5d57cf64c3d771bc05f940df6be6fd8c322ae9f3f796166fe99e1b75
|
4
|
+
data.tar.gz: a16ed0d77f1d1cb4e0ec4beefba922278a8b05e90ab5ecaffe6a3c347abdfed0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2afe61652e3b6b225515b95e7b370f8c4a8853c30cad45d0fc93d49a86571764b480aaca5a5cfb264ea1ea86912b6b680e4e122952383f2425585e3683e8581
|
7
|
+
data.tar.gz: eb1b041fbe425143c5f2b64aa5e799c35bf6885c00020964cf87d87485ba359e19796f7e5c5378baefe9f4ca93fe5de900b78e6669c7ffe83838c87e197ab0e1
|
data/README.md
CHANGED
@@ -17,21 +17,177 @@ Checkout https://rocketjob.io/
|
|
17
17
|
* Questions? Join the chat room on Gitter for [rocketjob support](https://gitter.im/rocketjob/support)
|
18
18
|
* [Report bugs](https://github.com/rocketjob/rocketjob/issues)
|
19
19
|
|
20
|
-
## Rocket Job
|
20
|
+
## Rocket Job v6
|
21
21
|
|
22
22
|
- Support for Ruby v3 and Rails 6.
|
23
|
-
-
|
24
|
-
-
|
25
|
-
-
|
23
|
+
- Major enhancements in Batch job support:
|
24
|
+
- Direct built-in Tabular support for all input and output categories.
|
25
|
+
- Multiple output file support, each with its own settings for:
|
26
|
+
- Compression
|
27
|
+
- GZip, Zip, BZip2 (Chunked for much faster loading into Apache Spark).
|
28
|
+
- Encryption
|
29
|
+
- PGP, Symmetric Encryption.
|
30
|
+
- File format
|
31
|
+
- CSV, PSV, JSON, Fixed Format, xlsx.
|
32
|
+
- Significant error handling improvements, especially around throttle failures
|
33
|
+
that used to result in "hanging" jobs.
|
34
|
+
- Support AWS DocumentDB in addition to MongoDB as the data store.
|
26
35
|
- Removed use of Symbols to meet Symbol deprecation in MongoDB and Mongoid.
|
27
36
|
|
28
|
-
|
29
|
-
|
30
|
-
|
37
|
+
### Upgrading to Rocket Job v6
|
38
|
+
|
39
|
+
The following plugins have been deprecated and are no longer loaded by default.
|
40
|
+
- `RocketJob::Batch::Tabular::Input`
|
41
|
+
- `RocketJob::Batch::Tabular::Output`
|
42
|
+
|
43
|
+
If your code relies on these plugins and you still want to upgrade to Rocket Job v6,
|
44
|
+
add the following require statement to any jobs that still use them:
|
45
|
+
|
46
|
+
~~~ruby
|
47
|
+
require "rocket_job/batch/tabular"
|
48
|
+
~~~
|
49
|
+
|
50
|
+
It is important to migrate away from these plugins, since they will be removed in a future release.
|
51
|
+
|
52
|
+
#### Scheduled Jobs
|
53
|
+
|
54
|
+
For any scheduled jobs that include the `RocketJob::Plugins::Cron` plugin, the default behavior has changed
|
55
|
+
so that the scheduled job instance is created immediately after the currently scheduled instance starts.
|
56
|
+
|
57
|
+
To maintain the old behavior of creating the job when it fails, aborts, or completes, add the following line
|
58
|
+
to each of the applicable jobs:
|
59
|
+
|
60
|
+
~~~ruby
|
61
|
+
self.cron_after_start = false
|
62
|
+
~~~
|
63
|
+
|
64
|
+
Additionally, scheduled jobs will now prevent a new one from being created when another scheduled instance
|
65
|
+
of the same job is already queued, or running with the _same_ `cron_schedule`.
|
66
|
+
|
67
|
+
To maintain the old behavior of allowing multiple instances with the same cron schedule, add the following
|
68
|
+
line to each of the applicable jobs:
|
69
|
+
|
70
|
+
~~~ruby
|
71
|
+
self.cron_singleton = false
|
72
|
+
~~~
|
73
|
+
|
74
|
+
##### Singleton
|
75
|
+
|
76
|
+
Since Scheduled jobs now implement their own singleton logic, remove the singleton plugin from any scheduled jobs.
|
77
|
+
|
78
|
+
#### Upgrading Batch Jobs to Rocket Job v6
|
79
|
+
|
80
|
+
Rocket Job v6 replaces the array of symbol type for `input_categories` and `output_categories`
|
81
|
+
with an array of `RocketJob::Category::Input` and `RocketJob::Category::Output`.
|
82
|
+
|
83
|
+
Jobs that added or modified the input or output categories need to be upgraded. For example:
|
84
|
+
~~~ruby
|
85
|
+
class MyJob < RocketJob::Job
|
86
|
+
include RocketJob::Batch
|
87
|
+
|
88
|
+
self.output_categories = [:main, :errors, :ignored]
|
89
|
+
end
|
90
|
+
~~~
|
91
|
+
|
92
|
+
Needs to be changed to:
|
93
|
+
~~~ruby
|
94
|
+
class MyJob < RocketJob::Job
|
95
|
+
include RocketJob::Batch
|
96
|
+
|
97
|
+
output_category name: :main
|
98
|
+
output_category name: :errors
|
99
|
+
output_category name: :ignored
|
100
|
+
end
|
101
|
+
~~~
|
102
|
+
|
103
|
+
##### slice_size, encrypt, compress
|
104
|
+
|
105
|
+
These fields have been removed from the job itself:
|
106
|
+
~~~ruby
|
107
|
+
class MyJob < RocketJob::Job
|
108
|
+
include RocketJob::Batch
|
109
|
+
|
110
|
+
self.slice_sice = 1_000
|
111
|
+
self.encrypt = true
|
112
|
+
self.compress = true
|
113
|
+
end
|
114
|
+
~~~
|
115
|
+
|
116
|
+
They are now specified on the `input_category` as follows:
|
117
|
+
- `slice_size` just moves under `input_category`.
|
118
|
+
- `encrypt` becomes an option to `serializer`.
|
119
|
+
- `compress` is now the default for all batch jobs so is not needed.
|
120
|
+
|
121
|
+
If the serializer is set to `encrypt` then it is automatically compressed.
|
122
|
+
|
123
|
+
~~~ruby
|
124
|
+
class MyJob < RocketJob::Job
|
125
|
+
include RocketJob::Batch
|
126
|
+
|
127
|
+
input_category slice_sice: 1_000, serializer: :encrypt
|
128
|
+
end
|
129
|
+
~~~
|
130
|
+
|
131
|
+
##### collect_output, collect_nil_output
|
132
|
+
|
133
|
+
The following fields have been moved from the job itself:
|
134
|
+
~~~ruby
|
135
|
+
class MyJob < RocketJob::Job
|
136
|
+
include RocketJob::Batch
|
137
|
+
|
138
|
+
self.collect_output = true
|
139
|
+
self.collect_nil_output = true
|
140
|
+
end
|
141
|
+
~~~
|
142
|
+
|
143
|
+
Into the corresponding `output_category`:
|
144
|
+
- `collect_output` no longer has any meaning. Output is collected anytime an `output_category` is defined.
|
145
|
+
- `collect_nil_output` is now the option `nils` on the `output_category.
|
146
|
+
It defaults to `false` so that by default any `nil` output from the `perform` method is not collected.
|
147
|
+
~~~ruby
|
148
|
+
class MyJob < RocketJob::Job
|
149
|
+
include RocketJob::Batch
|
150
|
+
|
151
|
+
output_category nils: true
|
152
|
+
end
|
153
|
+
~~~
|
154
|
+
|
155
|
+
##### name
|
156
|
+
|
157
|
+
For both `input_category` and `output_category`, when the `name` argument is not supplied
|
158
|
+
it defaults to `:main`.
|
159
|
+
|
160
|
+
For Example:
|
161
|
+
~~~ruby
|
162
|
+
class MyJob < RocketJob::Job
|
163
|
+
include RocketJob::Batch
|
164
|
+
|
165
|
+
input_category name: :main, serializer: :encrypt
|
166
|
+
output_category name: :main
|
167
|
+
end
|
168
|
+
~~~
|
169
|
+
|
170
|
+
Is the same as:
|
171
|
+
~~~ruby
|
172
|
+
class MyJob < RocketJob::Job
|
173
|
+
include RocketJob::Batch
|
174
|
+
|
175
|
+
input_category serializer: :encrypt
|
176
|
+
output_category
|
177
|
+
end
|
178
|
+
~~~
|
179
|
+
|
180
|
+
##### Existing and inflight jobs
|
181
|
+
|
182
|
+
When migrating to Rocket Job 6, it is recommended to load every job and then save it back again as part of the
|
183
|
+
deployment. When the job loads it will automatically convert itself from the old schema to the new v6 schema.
|
184
|
+
|
185
|
+
In flight jobs should not be affected, other than it is important to shutdown all running batch
|
186
|
+
servers _before_ running any new instances.
|
31
187
|
|
32
188
|
## Rocket Job v4
|
33
189
|
|
34
|
-
Rocket Job Pro is now open source and included in Rocket Job.
|
190
|
+
Rocket Job Pro is now fully open source and included in Rocket Job under the Apache License.
|
35
191
|
|
36
192
|
The `RocketJob::Batch` plugin now adds batch processing capabilities to break up a single task into many
|
37
193
|
concurrent workers processing slices of the entire job at the same time.
|
@@ -72,31 +72,38 @@ module RocketJob
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def input_category(category_name = :main)
|
75
|
+
return category_name if category_name.is_a?(Category::Input)
|
76
|
+
raise(ArgumentError, "Cannot supply Output Category to input category") if category_name.is_a?(Category::Output)
|
77
|
+
|
75
78
|
category_name = category_name.to_sym
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
unless
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
else
|
85
|
-
raise(ArgumentError, "Unknown Input Category: #{category_name.inspect}. Registered categories: #{input_categories.collect(&:name).join(',')}")
|
86
|
-
end
|
79
|
+
# find does not work against this association
|
80
|
+
input_categories.each { |category| return category if category.name == category_name }
|
81
|
+
|
82
|
+
unless category_name == :main
|
83
|
+
raise(
|
84
|
+
ArgumentError,
|
85
|
+
"Unknown Input Category: #{category_name.inspect}. Registered categories: #{input_categories.collect(&:name).join(',')}"
|
86
|
+
)
|
87
87
|
end
|
88
|
+
|
89
|
+
# Auto-register main input category when not defined
|
90
|
+
category = Category::Input.new(job: self)
|
91
|
+
self.input_categories << category
|
88
92
|
category
|
89
93
|
end
|
90
94
|
|
91
95
|
def output_category(category_name = :main)
|
96
|
+
return category_name if category_name.is_a?(Category::Output)
|
97
|
+
raise(ArgumentError, "Cannot supply Input Category to output category") if category_name.is_a?(Category::Input)
|
98
|
+
|
92
99
|
category_name = category_name.to_sym
|
93
|
-
category = nil
|
94
100
|
# .find does not work against this association
|
95
|
-
output_categories.each { |
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
101
|
+
output_categories.each { |category| return category if category.name == category_name }
|
102
|
+
|
103
|
+
raise(
|
104
|
+
ArgumentError,
|
105
|
+
"Unknown Output Category: #{category_name.inspect}. Registered categories: #{output_categories.collect(&:name).join(',')}"
|
106
|
+
)
|
100
107
|
end
|
101
108
|
|
102
109
|
# Returns [true|false] whether the named category has already been defined
|
@@ -211,7 +218,7 @@ module RocketJob
|
|
211
218
|
category.tabular.render(row)
|
212
219
|
end
|
213
220
|
|
214
|
-
# Migrate existing
|
221
|
+
# Migrate existing v5 batch jobs to v6
|
215
222
|
def rocketjob_categories_migrate
|
216
223
|
return unless attribute_present?(:input_categories) && self[:input_categories]&.first.is_a?(Symbol)
|
217
224
|
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -14,11 +14,9 @@ module RocketJob
|
|
14
14
|
# Default: None ( Uses the single default input collection for this job )
|
15
15
|
# Validates: This value must be one of those listed in #input_categories
|
16
16
|
def input(category = :main)
|
17
|
-
|
17
|
+
category = input_category(category)
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
(@inputs ||= {})[category.name] ||= RocketJob::Sliced.factory(:input, category, self)
|
19
|
+
(@inputs ||= {})[category.name] ||= category.data_store(self)
|
22
20
|
end
|
23
21
|
|
24
22
|
# Returns [RocketJob::Sliced::Output] output collection for holding output slices
|
@@ -30,11 +28,9 @@ module RocketJob
|
|
30
28
|
# Default: None ( Uses the single default output collection for this job )
|
31
29
|
# Validates: This value must be one of those listed in #output_categories
|
32
30
|
def output(category = :main)
|
33
|
-
|
34
|
-
|
35
|
-
category = output_category(category) unless category.is_a?(Category::Output)
|
31
|
+
category = output_category(category)
|
36
32
|
|
37
|
-
(@outputs ||= {})[category.name] ||=
|
33
|
+
(@outputs ||= {})[category.name] ||= category.data_store(self)
|
38
34
|
end
|
39
35
|
|
40
36
|
# Rapidly upload individual records in batches.
|
@@ -59,19 +55,19 @@ module RocketJob
|
|
59
55
|
# The category or the name of the category to access or download data from
|
60
56
|
# Default: None ( Uses the single default output collection for this job )
|
61
57
|
# Validates: This value must be one of those listed in #input_categories
|
62
|
-
def lookup_collection(category = :main)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
end
|
58
|
+
# def lookup_collection(category = :main)
|
59
|
+
# category = input_category(category) unless category.is_a?(Category::Input)
|
60
|
+
#
|
61
|
+
# collection = (@lookup_collections ||= {})[category.name]
|
62
|
+
#
|
63
|
+
# unless collection
|
64
|
+
# collection_name = "rocket_job.inputs.#{id}"
|
65
|
+
# collection_name << ".#{category.name}" unless category.name == :main
|
66
|
+
#
|
67
|
+
# @lookup_collections[category.name] ||=
|
68
|
+
# LookupCollection.new(Sliced::Slice.collection.database, collection_name)
|
69
|
+
# end
|
70
|
+
# end
|
75
71
|
|
76
72
|
# Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
|
77
73
|
#
|
@@ -154,53 +150,7 @@ module RocketJob
|
|
154
150
|
# * If an io stream is supplied, it is read until it returns nil.
|
155
151
|
# * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
156
152
|
# * CSV parsing is slow, so it is usually left for the workers to do.
|
157
|
-
|
158
|
-
raise(ArgumentError, "Either stream, or a block must be supplied") unless stream || block
|
159
|
-
|
160
|
-
category = input_category(category) unless category.is_a?(Category::Input)
|
161
|
-
stream ||= category.file_name
|
162
|
-
path = nil
|
163
|
-
|
164
|
-
if stream
|
165
|
-
path = IOStreams.new(stream)
|
166
|
-
path.file_name = file_name if file_name
|
167
|
-
category.file_name = path.file_name
|
168
|
-
|
169
|
-
# Auto detect the format based on the upload file name if present.
|
170
|
-
if category.format == :auto
|
171
|
-
format = path.format
|
172
|
-
if format
|
173
|
-
# Rebuild tabular with the above file name
|
174
|
-
category.reset_tabular
|
175
|
-
category.format = format
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
# Tabular transformations required for upload?
|
181
|
-
if category.tabular?
|
182
|
-
# Remove non-printable characters from tabular input formats
|
183
|
-
# Cannot change the length of fixed width lines
|
184
|
-
replace = category.format == :fixed ? " " : ""
|
185
|
-
path&.option_or_stream(:encode, encoding: "UTF-8", cleaner: :printable, replace: replace)
|
186
|
-
|
187
|
-
# Extract the header line during the file upload when needed.
|
188
|
-
on_first = rocket_job_upload_header_lambda(category, on_first) if category.tabular.header?
|
189
|
-
end
|
190
|
-
|
191
|
-
count =
|
192
|
-
if block
|
193
|
-
input(category).upload(on_first: on_first, &block)
|
194
|
-
else
|
195
|
-
input(category).upload(on_first: on_first) do |io|
|
196
|
-
path.each(stream_mode, **args) { |line| io << line }
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
self.record_count = (record_count || 0) + count
|
201
|
-
count
|
202
|
-
end
|
203
|
-
|
153
|
+
#
|
204
154
|
# Upload results from an Arel into RocketJob::SlicedJob.
|
205
155
|
#
|
206
156
|
# Params
|
@@ -227,18 +177,13 @@ module RocketJob
|
|
227
177
|
#
|
228
178
|
# Example: Upload user_name and zip_code
|
229
179
|
# arel = User.where(country_code: 'US')
|
230
|
-
# job.upload_arel(arel, :user_name, :zip_code)
|
180
|
+
# job.upload_arel(arel, columns: [:user_name, :zip_code])
|
231
181
|
#
|
232
182
|
# Notes:
|
233
183
|
# * Only call from one thread at a time against a single instance of this job.
|
234
184
|
# * The record_count for the job is set to the number of records returned by the arel.
|
235
185
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
236
186
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
237
|
-
def upload_arel(arel, *column_names, category: :main, &block)
|
238
|
-
count = input(category).upload_arel(arel, *column_names, &block)
|
239
|
-
self.record_count = (record_count || 0) + count
|
240
|
-
count
|
241
|
-
end
|
242
187
|
|
243
188
|
# Upload the result of a MongoDB query to the input collection for processing
|
244
189
|
# Useful when an entire MongoDB collection, or part thereof needs to be
|
@@ -266,24 +211,19 @@ module RocketJob
|
|
266
211
|
# criteria = User.where(state: 'FL')
|
267
212
|
# job.record_count = job.upload_mongo_query(criteria)
|
268
213
|
#
|
269
|
-
# Example: Upload
|
214
|
+
# Example: Upload only the specified column(s)
|
270
215
|
# criteria = User.where(state: 'FL')
|
271
|
-
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
216
|
+
# job.record_count = job.upload_mongo_query(criteria, columns: [:zip_code])
|
272
217
|
#
|
273
218
|
# Notes:
|
274
219
|
# * Only call from one thread at a time against a single instance of this job.
|
275
220
|
# * The record_count for the job is set to the number of records returned by the monqo query.
|
276
221
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
277
222
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
278
|
-
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
279
|
-
count = input(category).upload_mongo_query(criteria, *column_names, &block)
|
280
|
-
self.record_count = (record_count || 0) + count
|
281
|
-
count
|
282
|
-
end
|
283
223
|
|
284
224
|
# Upload sliced range of integer requests as arrays of start and end ids.
|
285
225
|
#
|
286
|
-
# Returns [Integer]
|
226
|
+
# Returns [Integer] the number of slices uploaded.
|
287
227
|
#
|
288
228
|
# Uploads one range per slice so that the response can return multiple records
|
289
229
|
# for each slice processed
|
@@ -302,17 +242,11 @@ module RocketJob
|
|
302
242
|
# * The record_count for the job is set to: last_id - start_id + 1.
|
303
243
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
304
244
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
305
|
-
def upload_integer_range(start_id, last_id, category: :main)
|
306
|
-
input(category).upload_integer_range(start_id, last_id)
|
307
|
-
count = last_id - start_id + 1
|
308
|
-
self.record_count = (record_count || 0) + count
|
309
|
-
count
|
310
|
-
end
|
311
245
|
|
312
246
|
# Upload sliced range of integer requests as an arrays of start and end ids
|
313
247
|
# starting with the last range first
|
314
248
|
#
|
315
|
-
# Returns [Integer]
|
249
|
+
# Returns [Integer] the number of slices uploaded.
|
316
250
|
#
|
317
251
|
# Uploads one range per slice so that the response can return multiple records
|
318
252
|
# for each slice processed.
|
@@ -334,14 +268,102 @@ module RocketJob
|
|
334
268
|
# * The record_count for the job is set to: last_id - start_id + 1.
|
335
269
|
# * If an exception is raised while uploading data, the input collection is cleared out
|
336
270
|
# so that if a job is retried during an upload failure, data is not duplicated.
|
337
|
-
|
338
|
-
|
339
|
-
|
271
|
+
|
272
|
+
def upload(object = nil, category: :main, file_name: nil, stream_mode: nil, on_first: nil, columns: nil, slice_batch_size: nil, **args, &block)
|
273
|
+
input_collection = input(category)
|
274
|
+
|
275
|
+
if block
|
276
|
+
raise(ArgumentError, "Cannot supply both an object to upload, and a block.") if object
|
277
|
+
if stream_mode || columns || slice_batch_size || args.size > 0
|
278
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a block. Only accepts :category, :file_name, or :on_first")
|
279
|
+
end
|
280
|
+
|
281
|
+
category = input_category(category)
|
282
|
+
category.file_name = file_name if file_name
|
283
|
+
|
284
|
+
# Extract the header line during the upload when applicable.
|
285
|
+
extract_header = category.extract_header_callback(on_first)
|
286
|
+
|
287
|
+
count = input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size, &block)
|
288
|
+
self.record_count = (record_count || 0) + count
|
289
|
+
return count
|
290
|
+
end
|
291
|
+
|
292
|
+
count =
|
293
|
+
case object
|
294
|
+
when Range
|
295
|
+
if file_name || stream_mode || on_first || args.size > 0
|
296
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a Range. Only accepts :category, :columns, or :slice_batch_size")
|
297
|
+
end
|
298
|
+
|
299
|
+
first = object.first
|
300
|
+
last = object.last
|
301
|
+
if first < last
|
302
|
+
input_collection.upload_integer_range(first, last, slice_batch_size: slice_batch_size || 1_000)
|
303
|
+
else
|
304
|
+
input_collection.upload_integer_range_in_reverse_order(last, first, slice_batch_size: slice_batch_size || 1_000)
|
305
|
+
end
|
306
|
+
when Mongoid::Criteria
|
307
|
+
if file_name || stream_mode || on_first || args.size > 0
|
308
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading a Mongoid::Criteria. Only accepts :category, :columns, or :slice_batch_size")
|
309
|
+
end
|
310
|
+
|
311
|
+
input_collection.upload_mongo_query(object, columns: columns, slice_batch_size: slice_batch_size, &block)
|
312
|
+
when defined?(ActiveRecord::Relation) ? ActiveRecord::Relation : false
|
313
|
+
if file_name || stream_mode || on_first || args.size > 0
|
314
|
+
raise(ArgumentError, "Unknown keyword arguments when uploading an ActiveRecord::Relation. Only accepts :category, :columns, or :slice_batch_size")
|
315
|
+
end
|
316
|
+
|
317
|
+
input_collection.upload_arel(object, columns: columns, slice_batch_size: slice_batch_size, &block)
|
318
|
+
|
319
|
+
else
|
320
|
+
raise(ArgumentError, "Unknown keyword argument :columns when uploading a file") if columns
|
321
|
+
|
322
|
+
category = input_category(category)
|
323
|
+
|
324
|
+
# Extract the header line during the upload when applicable.
|
325
|
+
extract_header = category.extract_header_callback(on_first)
|
326
|
+
path = category.upload_path(object, original_file_name: file_name)
|
327
|
+
|
328
|
+
input_collection.upload(on_first: extract_header, slice_batch_size: slice_batch_size) do |io|
|
329
|
+
path.each(stream_mode || :line, **args) { |line| io << line }
|
330
|
+
end
|
331
|
+
|
332
|
+
end
|
333
|
+
|
334
|
+
self.record_count = (record_count || 0) + count
|
335
|
+
count
|
336
|
+
end
|
337
|
+
|
338
|
+
# @deprecated
|
339
|
+
def upload_arel(arel, *column_names, category: :main, &block)
|
340
|
+
count = input(category).upload_arel(arel, columns: column_names, &block)
|
340
341
|
self.record_count = (record_count || 0) + count
|
341
342
|
count
|
342
343
|
end
|
343
344
|
|
344
|
-
#
|
345
|
+
# @deprecated
|
346
|
+
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
347
|
+
count = input(category).upload_mongo_query(criteria, columns: column_names, &block)
|
348
|
+
self.record_count = (record_count || 0) + count
|
349
|
+
count
|
350
|
+
end
|
351
|
+
|
352
|
+
# @deprecated
|
353
|
+
def upload_integer_range(start_id, last_id, category: :main, slice_batch_size: 1_000)
|
354
|
+
count = input(category).upload_integer_range(start_id, last_id, slice_batch_size: slice_batch_size)
|
355
|
+
self.record_count = (record_count || 0) + count
|
356
|
+
count
|
357
|
+
end
|
358
|
+
|
359
|
+
# @deprecated
|
360
|
+
def upload_integer_range_in_reverse_order(start_id, last_id, category: :main, slice_batch_size: 1_000)
|
361
|
+
count = input(category).upload_integer_range_in_reverse_order(start_id, last_id, slice_batch_size: slice_batch_size)
|
362
|
+
self.record_count = (record_count || 0) + count
|
363
|
+
count
|
364
|
+
end
|
365
|
+
|
366
|
+
# Upload the supplied slice for processing by workers
|
345
367
|
#
|
346
368
|
# Updates the record_count after adding the records
|
347
369
|
#
|
@@ -421,56 +443,34 @@ module RocketJob
|
|
421
443
|
def download(stream = nil, category: :main, header_line: nil, **args, &block)
|
422
444
|
raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
|
423
445
|
|
424
|
-
category
|
425
|
-
output_collection
|
446
|
+
category = output_category(category) unless category.is_a?(Category::Output)
|
447
|
+
output_collection = output(category)
|
426
448
|
|
427
449
|
# Store the output file name in the category
|
428
450
|
category.file_name = stream if !block && (stream.is_a?(String) || stream.is_a?(IOStreams::Path))
|
429
451
|
|
430
|
-
|
431
|
-
raise(ArgumentError, "A `header_line` is not supported with binary output collections") if header_line
|
432
|
-
|
433
|
-
return output_collection.download(&block) if block
|
452
|
+
header_line ||= category.render_header
|
434
453
|
|
435
|
-
|
436
|
-
output_collection.download { |record| io << record[:binary] }
|
437
|
-
end
|
438
|
-
else
|
439
|
-
header_line ||= category.render_header
|
454
|
+
return output_collection.download(header_line: header_line, &block) if block
|
440
455
|
|
441
|
-
|
456
|
+
raise(ArgumentError, "Missing mandatory `stream` or `category.file_name`") unless stream || category.file_name
|
442
457
|
|
443
|
-
|
458
|
+
if output_collection.slice_class.binary_format
|
459
|
+
binary_header_line = output_collection.slice_class.to_binary(header_line) if header_line
|
444
460
|
|
461
|
+
# Don't overwrite supplied stream options if any
|
462
|
+
stream = stream&.is_a?(IOStreams::Stream) ? stream.dup : IOStreams.new(category.file_name)
|
463
|
+
stream.remove_from_pipeline(output_collection.slice_class.binary_format)
|
464
|
+
stream.writer(**args) do |io|
|
465
|
+
# TODO: Binary formats should return the record count, instead of the slice count.
|
466
|
+
output_collection.download(header_line: binary_header_line) { |record| io.write(record) }
|
467
|
+
end
|
468
|
+
else
|
445
469
|
IOStreams.new(stream || category.file_name).writer(:line, **args) do |io|
|
446
470
|
output_collection.download(header_line: header_line) { |record| io << record }
|
447
471
|
end
|
448
472
|
end
|
449
473
|
end
|
450
|
-
|
451
|
-
private
|
452
|
-
|
453
|
-
# Return a lambda to extract the header row from the uploaded file.
|
454
|
-
def rocket_job_upload_header_lambda(category, on_first)
|
455
|
-
case category.mode
|
456
|
-
when :line
|
457
|
-
lambda do |line|
|
458
|
-
category.tabular.parse_header(line)
|
459
|
-
category.cleanse_header!
|
460
|
-
category.columns = category.tabular.header.columns
|
461
|
-
# Call chained on_first if present
|
462
|
-
on_first&.call(line)
|
463
|
-
end
|
464
|
-
when :array
|
465
|
-
lambda do |row|
|
466
|
-
category.tabular.header.columns = row
|
467
|
-
category.cleanse_header!
|
468
|
-
category.columns = category.tabular.header.columns
|
469
|
-
# Call chained on_first if present
|
470
|
-
on_first&.call(line)
|
471
|
-
end
|
472
|
-
end
|
473
|
-
end
|
474
474
|
end
|
475
475
|
end
|
476
476
|
end
|
@@ -22,7 +22,7 @@ module RocketJob
|
|
22
22
|
count_running_workers
|
23
23
|
|
24
24
|
puts "Loading job with #{count} records/lines"
|
25
|
-
job
|
25
|
+
job = RocketJob::Jobs::PerformanceJob.new(log_level: :warn)
|
26
26
|
job.input_category.slice_size = slice_size
|
27
27
|
if encrypt
|
28
28
|
job.input_category.serializer = :encrypt
|
@@ -64,7 +64,7 @@ module RocketJob
|
|
64
64
|
|
65
65
|
# Parse command line options
|
66
66
|
def parse(argv)
|
67
|
-
parser
|
67
|
+
parser = OptionParser.new do |o|
|
68
68
|
o.on("-c", "--count COUNT", "Count of records to enqueue") do |arg|
|
69
69
|
self.count = arg.to_i
|
70
70
|
end
|
@@ -49,7 +49,7 @@ module RocketJob
|
|
49
49
|
last = paths.pop
|
50
50
|
return unless last
|
51
51
|
|
52
|
-
last_target
|
52
|
+
last_target = paths.inject(in_memory) do |target, sub_key|
|
53
53
|
target.key?(sub_key) ? target[sub_key] : target[sub_key] = Hash.new(0)
|
54
54
|
end
|
55
55
|
last_target[last] += increment
|
@@ -99,7 +99,7 @@ module RocketJob
|
|
99
99
|
|
100
100
|
# Overrides RocketJob::Batch::Logger#rocket_job_batch_log_payload
|
101
101
|
def rocket_job_batch_log_payload
|
102
|
-
h
|
102
|
+
h = {
|
103
103
|
from: aasm.from_state,
|
104
104
|
to: aasm.to_state,
|
105
105
|
event: aasm.current_event
|
@@ -53,7 +53,7 @@ module RocketJob
|
|
53
53
|
# Allows another job with a higher priority to start even though this one is running already
|
54
54
|
# @overrides RocketJob::Plugins::Job::ThrottleRunningJobs#throttle_running_jobs_base_query
|
55
55
|
def throttle_running_jobs_base_query
|
56
|
-
query
|
56
|
+
query = super
|
57
57
|
query[:priority.lte] = priority if throttle_running_workers&.positive?
|
58
58
|
query
|
59
59
|
end
|