rocketjob 4.1.1 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rocket_job/batch/io.rb +236 -10
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +3 -3
- data/lib/rocket_job/sliced/input.rb +2 -188
- data/lib/rocket_job/sliced/slice.rb +8 -0
- data/lib/rocket_job/sliced/slices.rb +1 -0
- data/lib/rocket_job/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
|
4
|
+
data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
|
7
|
+
data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -39,21 +39,107 @@ module RocketJob
|
|
39
39
|
(@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
|
40
40
|
end
|
41
41
|
|
42
|
-
# Upload the supplied file_name or stream
|
42
|
+
# Upload the supplied file_name or stream.
|
43
43
|
#
|
44
|
-
#
|
44
|
+
# Returns [Integer] the number of records uploaded.
|
45
45
|
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
46
|
+
# Parameters
|
47
|
+
# file_name_or_io [String | IO]
|
48
|
+
# Full path and file name to stream into the job,
|
49
|
+
# Or, an IO Stream that responds to: :read
|
50
50
|
#
|
51
|
-
#
|
51
|
+
# streams [Symbol|Array]
|
52
|
+
# Streams to convert the data whilst it is being read.
|
53
|
+
# When nil, the file_name extensions will be inspected to determine what
|
54
|
+
# streams should be applied.
|
55
|
+
# Default: nil
|
52
56
|
#
|
53
|
-
#
|
57
|
+
# delimiter[String]
|
58
|
+
# Line / Record delimiter to use to break the stream up into records
|
59
|
+
# Any string to break the stream up by
|
60
|
+
# The records when saved will not include this delimiter
|
61
|
+
# Default: nil
|
62
|
+
# Automatically detect line endings and break up by line
|
63
|
+
# Searches for the first "\r\n" or "\n" and then uses that as the
|
64
|
+
# delimiter for all subsequent records
|
54
65
|
#
|
55
|
-
#
|
56
|
-
#
|
66
|
+
# buffer_size [Integer]
|
67
|
+
# Size of the blocks when reading from the input file / stream.
|
68
|
+
# Default: 65536 ( 64K )
|
69
|
+
#
|
70
|
+
# encoding: [String|Encoding]
|
71
|
+
# Encode returned data with this encoding.
|
72
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
73
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
74
|
+
# 'UTF-8': UTF-8 Format
|
75
|
+
# Etc.
|
76
|
+
# Default: 'UTF-8'
|
77
|
+
#
|
78
|
+
# encode_replace: [String]
|
79
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
80
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
81
|
+
# Default: nil
|
82
|
+
#
|
83
|
+
# encode_cleaner: [nil|symbol|Proc]
|
84
|
+
# Cleanse data read from the input stream.
|
85
|
+
# nil: No cleansing
|
86
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
87
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
88
|
+
# Default: :printable
|
89
|
+
#
|
90
|
+
# stream_mode: [:line | :row | :record]
|
91
|
+
# :line
|
92
|
+
# Uploads the file a line (String) at a time for processing by workers.
|
93
|
+
# :row
|
94
|
+
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
95
|
+
# :record
|
96
|
+
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
97
|
+
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
98
|
+
#
|
99
|
+
# Example:
|
100
|
+
# # Load plain text records from a file
|
101
|
+
# job.input.upload('hello.csv')
|
102
|
+
#
|
103
|
+
# Example:
|
104
|
+
# # Load plain text records from a file, stripping all non-printable characters,
|
105
|
+
# # as well as any characters that cannot be converted to UTF-8
|
106
|
+
# job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
|
107
|
+
#
|
108
|
+
# Example: Zip
|
109
|
+
# # Since csv is not known to RocketJob it is ignored
|
110
|
+
# job.input.upload('myfile.csv.zip')
|
111
|
+
#
|
112
|
+
# Example: Encrypted Zip
|
113
|
+
# job.input.upload('myfile.csv.zip.enc')
|
114
|
+
#
|
115
|
+
# Example: Explicitly set the streams
|
116
|
+
# job.input.upload('myfile.ze', streams: [:zip, :enc])
|
117
|
+
#
|
118
|
+
# Example: Supply custom options
|
119
|
+
# job.input.upload('myfile.csv.enc', streams: :enc])
|
120
|
+
#
|
121
|
+
# Example: Extract streams from filename but write to a temp file
|
122
|
+
# streams = IOStreams.streams_for_file_name('myfile.gz.enc')
|
123
|
+
# t = Tempfile.new('my_project')
|
124
|
+
# job.input.upload(t.to_path, streams: streams)
|
125
|
+
#
|
126
|
+
# Example: Upload by writing records one at a time to the upload stream
|
127
|
+
# job.upload do |writer|
|
128
|
+
# 10.times { |i| writer << i }
|
129
|
+
# end
|
130
|
+
#
|
131
|
+
# Notes:
|
132
|
+
# * Only call from one thread at a time against a single instance of this job.
|
133
|
+
# * The record_count for the job is set to the number of records returned by the arel.
|
134
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
135
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
136
|
+
# * By default all data read from the file/stream is converted into UTF-8 before being persisted. This
|
137
|
+
# is recommended since Mongo only supports UTF-8 strings.
|
138
|
+
# * When zip format, the Zip file/stream must contain only one file, the first file found will be
|
139
|
+
# loaded into the job
|
140
|
+
# * If an io stream is supplied, it is read until it returns nil.
|
141
|
+
# * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
142
|
+
# * CSV parsing is slow, so it is usually left for the workers to do.
|
57
143
|
def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
|
58
144
|
if file_name
|
59
145
|
self.upload_file_name = file_name
|
@@ -63,18 +149,158 @@ module RocketJob
|
|
63
149
|
count = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
|
64
150
|
self.record_count = (record_count || 0) + count
|
65
151
|
count
|
152
|
+
rescue StandardError => exc
|
153
|
+
input(category).delete_all
|
154
|
+
raise(exc)
|
66
155
|
end
|
67
156
|
|
157
|
+
# Upload results from an Arel into RocketJob::SlicedJob.
|
158
|
+
#
|
159
|
+
# Params
|
160
|
+
# column_names
|
161
|
+
# When a block is not supplied, supply the names of the columns to be returned
|
162
|
+
# and uploaded into the job
|
163
|
+
# These columns are automatically added to the select list to reduce overhead
|
164
|
+
#
|
165
|
+
# If a Block is supplied it is passed the model returned from the database and should
|
166
|
+
# return the work item to be uploaded into the job.
|
167
|
+
#
|
168
|
+
# Returns [Integer] the number of records uploaded
|
169
|
+
#
|
170
|
+
# Example: Upload id's for all users
|
171
|
+
# arel = User.all
|
172
|
+
# job.upload_arel(arel)
|
173
|
+
#
|
174
|
+
# Example: Upload selected user id's
|
175
|
+
# arel = User.where(country_code: 'US')
|
176
|
+
# job.upload_arel(arel)
|
177
|
+
#
|
178
|
+
# Example: Upload user_name and zip_code
|
179
|
+
# arel = User.where(country_code: 'US')
|
180
|
+
# job.upload_arel(arel, :user_name, :zip_code)
|
181
|
+
#
|
182
|
+
# Notes:
|
183
|
+
# * Only call from one thread at a time against a single instance of this job.
|
184
|
+
# * The record_count for the job is set to the number of records returned by the arel.
|
185
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
186
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
68
187
|
def upload_arel(arel, *column_names, category: :main, &block)
|
69
188
|
count = input(category).upload_arel(arel, *column_names, &block)
|
70
189
|
self.record_count = (record_count || 0) + count
|
71
190
|
count
|
191
|
+
rescue StandardError => exc
|
192
|
+
input(category).delete_all
|
193
|
+
raise(exc)
|
72
194
|
end
|
73
195
|
|
196
|
+
# Upload the result of a MongoDB query to the input collection for processing
|
197
|
+
# Useful when an entire MongoDB collection, or part thereof needs to be
|
198
|
+
# processed by a job.
|
199
|
+
#
|
200
|
+
# Returns [Integer] the number of records uploaded
|
201
|
+
#
|
202
|
+
# If a Block is supplied it is passed the document returned from the
|
203
|
+
# database and should return a record for processing
|
204
|
+
#
|
205
|
+
# If no Block is supplied then the record will be the :fields returned
|
206
|
+
# from MongoDB
|
207
|
+
#
|
208
|
+
# Note:
|
209
|
+
# This method uses the collection and not the MongoMapper document to
|
210
|
+
# avoid the overhead of constructing a Model with every document returned
|
211
|
+
# by the query
|
212
|
+
#
|
213
|
+
# Note:
|
214
|
+
# The Block must return types that can be serialized to BSON.
|
215
|
+
# Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
|
216
|
+
# Invalid: Date, etc.
|
217
|
+
#
|
218
|
+
# Example: Upload document ids
|
219
|
+
# criteria = User.where(state: 'FL')
|
220
|
+
# job.record_count = job.upload_mongo_query(criteria)
|
221
|
+
#
|
222
|
+
# Example: Upload just the supplied column
|
223
|
+
# criteria = User.where(state: 'FL')
|
224
|
+
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
225
|
+
#
|
226
|
+
# Notes:
|
227
|
+
# * Only call from one thread at a time against a single instance of this job.
|
228
|
+
# * The record_count for the job is set to the number of records returned by the monqo query.
|
229
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
230
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
74
231
|
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
75
232
|
count = input(category).upload_mongo_query(criteria, *column_names, &block)
|
76
233
|
self.record_count = (record_count || 0) + count
|
77
234
|
count
|
235
|
+
rescue StandardError => exc
|
236
|
+
input(category).delete_all
|
237
|
+
raise(exc)
|
238
|
+
end
|
239
|
+
|
240
|
+
# Upload sliced range of integer requests as arrays of start and end ids.
|
241
|
+
#
|
242
|
+
# Returns [Integer] last_id - start_id + 1.
|
243
|
+
#
|
244
|
+
# Uploads one range per slice so that the response can return multiple records
|
245
|
+
# for each slice processed
|
246
|
+
#
|
247
|
+
# Example
|
248
|
+
# job.slice_size = 100
|
249
|
+
# job.upload_integer_range(200, 421)
|
250
|
+
#
|
251
|
+
# # Equivalent to calling:
|
252
|
+
# job.input.insert([200,299])
|
253
|
+
# job.input.insert([300,399])
|
254
|
+
# job.input.insert([400,421])
|
255
|
+
#
|
256
|
+
# Notes:
|
257
|
+
# * Only call from one thread at a time against a single instance of this job.
|
258
|
+
# * The record_count for the job is set to: last_id - start_id + 1.
|
259
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
260
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
261
|
+
def upload_integer_range(start_id, last_id, category: :main)
|
262
|
+
input(category).upload_integer_range(start_id, last_id)
|
263
|
+
count = last_id - start_id + 1
|
264
|
+
self.record_count = (record_count || 0) + count
|
265
|
+
count
|
266
|
+
rescue StandardError => exc
|
267
|
+
input(category).delete_all
|
268
|
+
raise(exc)
|
269
|
+
end
|
270
|
+
|
271
|
+
# Upload sliced range of integer requests as an arrays of start and end ids
|
272
|
+
# starting with the last range first
|
273
|
+
#
|
274
|
+
# Returns [Integer] last_id - start_id + 1.
|
275
|
+
#
|
276
|
+
# Uploads one range per slice so that the response can return multiple records
|
277
|
+
# for each slice processed.
|
278
|
+
# Useful for when the highest order integer values should be processed before
|
279
|
+
# the lower integer value ranges. For example when processing every record
|
280
|
+
# in a database based on the id column
|
281
|
+
#
|
282
|
+
# Example
|
283
|
+
# job.slice_size = 100
|
284
|
+
# job.upload_integer_range_in_reverse_order(200, 421)
|
285
|
+
#
|
286
|
+
# # Equivalent to calling:
|
287
|
+
# job.input.insert([400,421])
|
288
|
+
# job.input.insert([300,399])
|
289
|
+
# job.input.insert([200,299])
|
290
|
+
#
|
291
|
+
# Notes:
|
292
|
+
# * Only call from one thread at a time against a single instance of this job.
|
293
|
+
# * The record_count for the job is set to: last_id - start_id + 1.
|
294
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
295
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
296
|
+
def upload_integer_range_in_reverse_order(start_id, last_id, category: :main)
|
297
|
+
input(category).upload_integer_range_in_reverse_order(start_id, last_id)
|
298
|
+
count = last_id - start_id + 1
|
299
|
+
self.record_count = (record_count || 0) + count
|
300
|
+
count
|
301
|
+
rescue StandardError => exc
|
302
|
+
input(category).delete_all
|
303
|
+
raise(exc)
|
78
304
|
end
|
79
305
|
|
80
306
|
# Upload the supplied slices for processing by workers
|
@@ -11,7 +11,7 @@
|
|
11
11
|
# CODE
|
12
12
|
# job = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
|
13
13
|
# arel = User.unscoped.all.order('updated_at DESC')
|
14
|
-
# job.
|
14
|
+
# job.upload_arel(arel)
|
15
15
|
# job.save!
|
16
16
|
#
|
17
17
|
# Console Testing:
|
@@ -25,7 +25,7 @@
|
|
25
25
|
#
|
26
26
|
# # Run against a sub-set using a limit
|
27
27
|
# arel = User.unscoped.all.order('updated_at DESC').limit(100)
|
28
|
-
# job.
|
28
|
+
# job.upload_arel(arel)
|
29
29
|
#
|
30
30
|
# # Run the subset directly within the console
|
31
31
|
# job.perform_now
|
@@ -38,7 +38,7 @@
|
|
38
38
|
# Example: Move the upload operation into a before_batch.
|
39
39
|
# upload_code = <<-CODE
|
40
40
|
# arel = User.unscoped.all.order('updated_at DESC')
|
41
|
-
#
|
41
|
+
# upload_arel(arel)
|
42
42
|
# CODE
|
43
43
|
#
|
44
44
|
# code = <<-CODE
|
@@ -1,104 +1,6 @@
|
|
1
1
|
module RocketJob
|
2
2
|
module Sliced
|
3
3
|
class Input < Slices
|
4
|
-
# Load lines for processing from the supplied filename or stream into this job.
|
5
|
-
#
|
6
|
-
# Returns [Integer] the number of lines loaded into this collection
|
7
|
-
#
|
8
|
-
# Parameters
|
9
|
-
# file_name_or_io [String | IO]
|
10
|
-
# Full path and file name to stream into the job,
|
11
|
-
# Or, an IO Stream that responds to: :read
|
12
|
-
#
|
13
|
-
# streams [Symbol|Array]
|
14
|
-
# Streams to convert the data whilst it is being read.
|
15
|
-
# When nil, the file_name extensions will be inspected to determine what
|
16
|
-
# streams should be applied.
|
17
|
-
# Default: nil
|
18
|
-
#
|
19
|
-
# delimiter[String]
|
20
|
-
# Line / Record delimiter to use to break the stream up into records
|
21
|
-
# Any string to break the stream up by
|
22
|
-
# The records when saved will not include this delimiter
|
23
|
-
# Default: nil
|
24
|
-
# Automatically detect line endings and break up by line
|
25
|
-
# Searches for the first "\r\n" or "\n" and then uses that as the
|
26
|
-
# delimiter for all subsequent records
|
27
|
-
#
|
28
|
-
# buffer_size [Integer]
|
29
|
-
# Size of the blocks when reading from the input file / stream.
|
30
|
-
# Default: 65536 ( 64K )
|
31
|
-
#
|
32
|
-
# encoding: [String|Encoding]
|
33
|
-
# Encode returned data with this encoding.
|
34
|
-
# 'US-ASCII': Original 7 bit ASCII Format
|
35
|
-
# 'ASCII-8BIT': 8-bit ASCII Format
|
36
|
-
# 'UTF-8': UTF-8 Format
|
37
|
-
# Etc.
|
38
|
-
# Default: 'UTF-8'
|
39
|
-
#
|
40
|
-
# encode_replace: [String]
|
41
|
-
# The character to replace with when a character cannot be converted to the target encoding.
|
42
|
-
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
43
|
-
# Default: nil
|
44
|
-
#
|
45
|
-
# encode_cleaner: [nil|symbol|Proc]
|
46
|
-
# Cleanse data read from the input stream.
|
47
|
-
# nil: No cleansing
|
48
|
-
# :printable Cleanse all non-printable characters except \r and \n
|
49
|
-
# Proc/lambda Proc to call after every read to cleanse the data
|
50
|
-
# Default: :printable
|
51
|
-
#
|
52
|
-
# stream_mode: [:line | :row | :record]
|
53
|
-
# :line
|
54
|
-
# Uploads the file a line (String) at a time for processing by workers.
|
55
|
-
# :row
|
56
|
-
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
57
|
-
# :record
|
58
|
-
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
59
|
-
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
60
|
-
#
|
61
|
-
# Example:
|
62
|
-
# # Load plain text records from a file
|
63
|
-
# job.input.upload('hello.csv')
|
64
|
-
#
|
65
|
-
# Example:
|
66
|
-
# # Load plain text records from a file, stripping all non-printable characters,
|
67
|
-
# # as well as any characters that cannot be converted to UTF-8
|
68
|
-
# job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
|
69
|
-
#
|
70
|
-
# Example: Zip
|
71
|
-
# # Since csv is not known to RocketJob it is ignored
|
72
|
-
# job.input.upload('myfile.csv.zip')
|
73
|
-
#
|
74
|
-
# Example: Encrypted Zip
|
75
|
-
# job.input.upload('myfile.csv.zip.enc')
|
76
|
-
#
|
77
|
-
# Example: Explicitly set the streams
|
78
|
-
# job.input.upload('myfile.ze', streams: [:zip, :enc])
|
79
|
-
#
|
80
|
-
# Example: Supply custom options
|
81
|
-
# job.input.upload('myfile.csv.enc', streams: :enc])
|
82
|
-
#
|
83
|
-
# Example: Extract streams from filename but write to a temp file
|
84
|
-
# streams = IOStreams.streams_for_file_name('myfile.gz.enc')
|
85
|
-
# t = Tempfile.new('my_project')
|
86
|
-
# job.input.upload(t.to_path, streams: streams)
|
87
|
-
#
|
88
|
-
# Example: Upload by writing records one at a time to the upload stream
|
89
|
-
# job.upload do |writer|
|
90
|
-
# 10.times { |i| writer << i }
|
91
|
-
# end
|
92
|
-
#
|
93
|
-
# Notes:
|
94
|
-
# - By default all data read from the file/stream is converted into UTF-8 before being persisted. This
|
95
|
-
# is recommended since Mongo only supports UTF-8 strings.
|
96
|
-
# - When zip format, the Zip file/stream must contain only one file, the first file found will be
|
97
|
-
# loaded into the job
|
98
|
-
# - If an io stream is supplied, it is read until it returns nil.
|
99
|
-
# - Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
100
|
-
# - Only call from one thread at a time per job instance.
|
101
|
-
# - CSV parsing is slow, so it is left for the workers to do.
|
102
4
|
def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
|
103
5
|
raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
|
104
6
|
|
@@ -110,35 +12,6 @@ module RocketJob
|
|
110
12
|
Writer::Input.collect(self, on_first: on_first, &block)
|
111
13
|
end
|
112
14
|
|
113
|
-
# Upload the result of a MongoDB query to the input collection for processing
|
114
|
-
# Useful when an entire MongoDB collection, or part thereof needs to be
|
115
|
-
# processed by a job.
|
116
|
-
#
|
117
|
-
# Returns [Integer] the number of records uploaded
|
118
|
-
#
|
119
|
-
# If a Block is supplied it is passed the document returned from the
|
120
|
-
# database and should return a record for processing
|
121
|
-
#
|
122
|
-
# If no Block is supplied then the record will be the :fields returned
|
123
|
-
# from MongoDB
|
124
|
-
#
|
125
|
-
# Note:
|
126
|
-
# This method uses the collection and not the MongoMapper document to
|
127
|
-
# avoid the overhead of constructing a Model with every document returned
|
128
|
-
# by the query
|
129
|
-
#
|
130
|
-
# Note:
|
131
|
-
# The Block must return types that can be serialized to BSON.
|
132
|
-
# Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
|
133
|
-
# Invalid: Date, etc.
|
134
|
-
#
|
135
|
-
# Example: Upload document ids
|
136
|
-
# criteria = User.where(state: 'FL')
|
137
|
-
# job.record_count = job.upload_mongo_query(criteria)
|
138
|
-
#
|
139
|
-
# Example: Upload just the supplied column
|
140
|
-
# criteria = User.where(state: 'FL')
|
141
|
-
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
142
15
|
def upload_mongo_query(criteria, *column_names, &block)
|
143
16
|
options = criteria.options
|
144
17
|
|
@@ -171,30 +44,6 @@ module RocketJob
|
|
171
44
|
end
|
172
45
|
end
|
173
46
|
|
174
|
-
# Upload results from an Arel into RocketJob::SlicedJob.
|
175
|
-
#
|
176
|
-
# Params
|
177
|
-
# column_names
|
178
|
-
# When a block is not supplied, supply the names of the columns to be returned
|
179
|
-
# and uploaded into the job
|
180
|
-
# These columns are automatically added to the select list to reduce overhead
|
181
|
-
#
|
182
|
-
# If a Block is supplied it is passed the model returned from the database and should
|
183
|
-
# return the work item to be uploaded into the job.
|
184
|
-
#
|
185
|
-
# Returns [Integer] the number of records uploaded
|
186
|
-
#
|
187
|
-
# Example: Upload id's for all users
|
188
|
-
# arel = User.all
|
189
|
-
# job.record_count = job.upload_arel(arel)
|
190
|
-
#
|
191
|
-
# Example: Upload selected user id's
|
192
|
-
# arel = User.where(country_code: 'US')
|
193
|
-
# job.record_count = job.upload_arel(arel)
|
194
|
-
#
|
195
|
-
# Example: Upload user_name and zip_code
|
196
|
-
# arel = User.where(country_code: 'US')
|
197
|
-
# job.record_count = job.upload_arel(arel, :user_name, :zip_code)
|
198
47
|
def upload_arel(arel, *column_names, &block)
|
199
48
|
unless block
|
200
49
|
column_names = column_names.collect(&:to_sym)
|
@@ -217,21 +66,6 @@ module RocketJob
|
|
217
66
|
end
|
218
67
|
end
|
219
68
|
|
220
|
-
# Upload sliced range of integer requests as a an arrays of start and end ids
|
221
|
-
#
|
222
|
-
# Returns [Integer] the number of slices uploaded
|
223
|
-
#
|
224
|
-
# Uploads one range per slice so that the response can return multiple records
|
225
|
-
# for each slice processed
|
226
|
-
#
|
227
|
-
# Example
|
228
|
-
# job.slice_size = 100
|
229
|
-
# job.record_count = job.upload_integer_range(200, 421)
|
230
|
-
#
|
231
|
-
# # Equivalent to calling:
|
232
|
-
# job.record_count = job.insert([200,299])
|
233
|
-
# job.record_count += job.insert([300,399])
|
234
|
-
# job.record_count += job.insert([400,421])
|
235
69
|
def upload_integer_range(start_id, last_id)
|
236
70
|
create_indexes
|
237
71
|
count = 0
|
@@ -245,25 +79,6 @@ module RocketJob
|
|
245
79
|
count
|
246
80
|
end
|
247
81
|
|
248
|
-
# Upload sliced range of integer requests as an arrays of start and end ids
|
249
|
-
# starting with the last range first
|
250
|
-
#
|
251
|
-
# Returns [Integer] the number of slices uploaded
|
252
|
-
#
|
253
|
-
# Uploads one range per slice so that the response can return multiple records
|
254
|
-
# for each slice processed.
|
255
|
-
# Useful for when the highest order integer values should be processed before
|
256
|
-
# the lower integer value ranges. For example when processing every record
|
257
|
-
# in a database based on the id column
|
258
|
-
#
|
259
|
-
# Example
|
260
|
-
# job.slice_size = 100
|
261
|
-
# job.record_count = job.upload_integer_range_in_reverse_order(200, 421) * job.slice_size
|
262
|
-
#
|
263
|
-
# # Equivalent to calling:
|
264
|
-
# job.insert([400,421])
|
265
|
-
# job.insert([300,399])
|
266
|
-
# job.insert([200,299])
|
267
82
|
def upload_integer_range_in_reverse_order(start_id, last_id)
|
268
83
|
create_indexes
|
269
84
|
end_id = last_id
|
@@ -290,9 +105,8 @@ module RocketJob
|
|
290
105
|
#
|
291
106
|
def each_failed_record
|
292
107
|
failed.each do |slice|
|
293
|
-
|
294
|
-
|
295
|
-
end
|
108
|
+
record = slice.failed_record
|
109
|
+
yield(record, slice) unless record.nil?
|
296
110
|
end
|
297
111
|
end
|
298
112
|
|
@@ -119,6 +119,14 @@ module RocketJob
|
|
119
119
|
self.worker_name = nil
|
120
120
|
end
|
121
121
|
|
122
|
+
# Returns the failed record.
|
123
|
+
# Returns [nil] if there is no failed record
|
124
|
+
def failed_record
|
125
|
+
if exception && (record_number = exception.record_number)
|
126
|
+
at(record_number - 1)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
122
130
|
# Returns [Hash] the slice as a Hash for storage purposes
|
123
131
|
# Compresses / Encrypts the slice according to the job setting
|
124
132
|
if ::Mongoid::VERSION.to_i >= 6
|
@@ -98,6 +98,7 @@ module RocketJob
|
|
98
98
|
all.collection.indexes.create_one(state: 1, _id: 1)
|
99
99
|
end
|
100
100
|
|
101
|
+
# Forward additional methods.
|
101
102
|
def_instance_delegators :@all, :collection, :count, :delete_all, :first, :find, :last, :nor, :not, :or, :to_a, :where
|
102
103
|
|
103
104
|
# Drop this collection when it is no longer needed
|
data/lib/rocket_job/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rocketjob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aasm
|