rocketjob 4.1.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rocket_job/batch/io.rb +236 -10
- data/lib/rocket_job/jobs/on_demand_batch_job.rb +3 -3
- data/lib/rocket_job/sliced/input.rb +2 -188
- data/lib/rocket_job/sliced/slice.rb +8 -0
- data/lib/rocket_job/sliced/slices.rb +1 -0
- data/lib/rocket_job/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
|
4
|
+
data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
|
7
|
+
data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907
|
data/lib/rocket_job/batch/io.rb
CHANGED
@@ -39,21 +39,107 @@ module RocketJob
|
|
39
39
|
(@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
|
40
40
|
end
|
41
41
|
|
42
|
-
# Upload the supplied file_name or stream
|
42
|
+
# Upload the supplied file_name or stream.
|
43
43
|
#
|
44
|
-
#
|
44
|
+
# Returns [Integer] the number of records uploaded.
|
45
45
|
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
46
|
+
# Parameters
|
47
|
+
# file_name_or_io [String | IO]
|
48
|
+
# Full path and file name to stream into the job,
|
49
|
+
# Or, an IO Stream that responds to: :read
|
50
50
|
#
|
51
|
-
#
|
51
|
+
# streams [Symbol|Array]
|
52
|
+
# Streams to convert the data whilst it is being read.
|
53
|
+
# When nil, the file_name extensions will be inspected to determine what
|
54
|
+
# streams should be applied.
|
55
|
+
# Default: nil
|
52
56
|
#
|
53
|
-
#
|
57
|
+
# delimiter[String]
|
58
|
+
# Line / Record delimiter to use to break the stream up into records
|
59
|
+
# Any string to break the stream up by
|
60
|
+
# The records when saved will not include this delimiter
|
61
|
+
# Default: nil
|
62
|
+
# Automatically detect line endings and break up by line
|
63
|
+
# Searches for the first "\r\n" or "\n" and then uses that as the
|
64
|
+
# delimiter for all subsequent records
|
54
65
|
#
|
55
|
-
#
|
56
|
-
#
|
66
|
+
# buffer_size [Integer]
|
67
|
+
# Size of the blocks when reading from the input file / stream.
|
68
|
+
# Default: 65536 ( 64K )
|
69
|
+
#
|
70
|
+
# encoding: [String|Encoding]
|
71
|
+
# Encode returned data with this encoding.
|
72
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
73
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
74
|
+
# 'UTF-8': UTF-8 Format
|
75
|
+
# Etc.
|
76
|
+
# Default: 'UTF-8'
|
77
|
+
#
|
78
|
+
# encode_replace: [String]
|
79
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
80
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
81
|
+
# Default: nil
|
82
|
+
#
|
83
|
+
# encode_cleaner: [nil|symbol|Proc]
|
84
|
+
# Cleanse data read from the input stream.
|
85
|
+
# nil: No cleansing
|
86
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
87
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
88
|
+
# Default: :printable
|
89
|
+
#
|
90
|
+
# stream_mode: [:line | :row | :record]
|
91
|
+
# :line
|
92
|
+
# Uploads the file a line (String) at a time for processing by workers.
|
93
|
+
# :row
|
94
|
+
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
95
|
+
# :record
|
96
|
+
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
97
|
+
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
98
|
+
#
|
99
|
+
# Example:
|
100
|
+
# # Load plain text records from a file
|
101
|
+
# job.input.upload('hello.csv')
|
102
|
+
#
|
103
|
+
# Example:
|
104
|
+
# # Load plain text records from a file, stripping all non-printable characters,
|
105
|
+
# # as well as any characters that cannot be converted to UTF-8
|
106
|
+
# job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
|
107
|
+
#
|
108
|
+
# Example: Zip
|
109
|
+
# # Since csv is not known to RocketJob it is ignored
|
110
|
+
# job.input.upload('myfile.csv.zip')
|
111
|
+
#
|
112
|
+
# Example: Encrypted Zip
|
113
|
+
# job.input.upload('myfile.csv.zip.enc')
|
114
|
+
#
|
115
|
+
# Example: Explicitly set the streams
|
116
|
+
# job.input.upload('myfile.ze', streams: [:zip, :enc])
|
117
|
+
#
|
118
|
+
# Example: Supply custom options
|
119
|
+
# job.input.upload('myfile.csv.enc', streams: :enc])
|
120
|
+
#
|
121
|
+
# Example: Extract streams from filename but write to a temp file
|
122
|
+
# streams = IOStreams.streams_for_file_name('myfile.gz.enc')
|
123
|
+
# t = Tempfile.new('my_project')
|
124
|
+
# job.input.upload(t.to_path, streams: streams)
|
125
|
+
#
|
126
|
+
# Example: Upload by writing records one at a time to the upload stream
|
127
|
+
# job.upload do |writer|
|
128
|
+
# 10.times { |i| writer << i }
|
129
|
+
# end
|
130
|
+
#
|
131
|
+
# Notes:
|
132
|
+
# * Only call from one thread at a time against a single instance of this job.
|
133
|
+
# * The record_count for the job is set to the number of records returned by the arel.
|
134
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
135
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
136
|
+
# * By default all data read from the file/stream is converted into UTF-8 before being persisted. This
|
137
|
+
# is recommended since Mongo only supports UTF-8 strings.
|
138
|
+
# * When zip format, the Zip file/stream must contain only one file, the first file found will be
|
139
|
+
# loaded into the job
|
140
|
+
# * If an io stream is supplied, it is read until it returns nil.
|
141
|
+
# * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
142
|
+
# * CSV parsing is slow, so it is usually left for the workers to do.
|
57
143
|
def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
|
58
144
|
if file_name
|
59
145
|
self.upload_file_name = file_name
|
@@ -63,18 +149,158 @@ module RocketJob
|
|
63
149
|
count = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
|
64
150
|
self.record_count = (record_count || 0) + count
|
65
151
|
count
|
152
|
+
rescue StandardError => exc
|
153
|
+
input(category).delete_all
|
154
|
+
raise(exc)
|
66
155
|
end
|
67
156
|
|
157
|
+
# Upload results from an Arel into RocketJob::SlicedJob.
|
158
|
+
#
|
159
|
+
# Params
|
160
|
+
# column_names
|
161
|
+
# When a block is not supplied, supply the names of the columns to be returned
|
162
|
+
# and uploaded into the job
|
163
|
+
# These columns are automatically added to the select list to reduce overhead
|
164
|
+
#
|
165
|
+
# If a Block is supplied it is passed the model returned from the database and should
|
166
|
+
# return the work item to be uploaded into the job.
|
167
|
+
#
|
168
|
+
# Returns [Integer] the number of records uploaded
|
169
|
+
#
|
170
|
+
# Example: Upload id's for all users
|
171
|
+
# arel = User.all
|
172
|
+
# job.upload_arel(arel)
|
173
|
+
#
|
174
|
+
# Example: Upload selected user id's
|
175
|
+
# arel = User.where(country_code: 'US')
|
176
|
+
# job.upload_arel(arel)
|
177
|
+
#
|
178
|
+
# Example: Upload user_name and zip_code
|
179
|
+
# arel = User.where(country_code: 'US')
|
180
|
+
# job.upload_arel(arel, :user_name, :zip_code)
|
181
|
+
#
|
182
|
+
# Notes:
|
183
|
+
# * Only call from one thread at a time against a single instance of this job.
|
184
|
+
# * The record_count for the job is set to the number of records returned by the arel.
|
185
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
186
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
68
187
|
def upload_arel(arel, *column_names, category: :main, &block)
|
69
188
|
count = input(category).upload_arel(arel, *column_names, &block)
|
70
189
|
self.record_count = (record_count || 0) + count
|
71
190
|
count
|
191
|
+
rescue StandardError => exc
|
192
|
+
input(category).delete_all
|
193
|
+
raise(exc)
|
72
194
|
end
|
73
195
|
|
196
|
+
# Upload the result of a MongoDB query to the input collection for processing
|
197
|
+
# Useful when an entire MongoDB collection, or part thereof needs to be
|
198
|
+
# processed by a job.
|
199
|
+
#
|
200
|
+
# Returns [Integer] the number of records uploaded
|
201
|
+
#
|
202
|
+
# If a Block is supplied it is passed the document returned from the
|
203
|
+
# database and should return a record for processing
|
204
|
+
#
|
205
|
+
# If no Block is supplied then the record will be the :fields returned
|
206
|
+
# from MongoDB
|
207
|
+
#
|
208
|
+
# Note:
|
209
|
+
# This method uses the collection and not the MongoMapper document to
|
210
|
+
# avoid the overhead of constructing a Model with every document returned
|
211
|
+
# by the query
|
212
|
+
#
|
213
|
+
# Note:
|
214
|
+
# The Block must return types that can be serialized to BSON.
|
215
|
+
# Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
|
216
|
+
# Invalid: Date, etc.
|
217
|
+
#
|
218
|
+
# Example: Upload document ids
|
219
|
+
# criteria = User.where(state: 'FL')
|
220
|
+
# job.record_count = job.upload_mongo_query(criteria)
|
221
|
+
#
|
222
|
+
# Example: Upload just the supplied column
|
223
|
+
# criteria = User.where(state: 'FL')
|
224
|
+
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
225
|
+
#
|
226
|
+
# Notes:
|
227
|
+
# * Only call from one thread at a time against a single instance of this job.
|
228
|
+
# * The record_count for the job is set to the number of records returned by the monqo query.
|
229
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
230
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
74
231
|
def upload_mongo_query(criteria, *column_names, category: :main, &block)
|
75
232
|
count = input(category).upload_mongo_query(criteria, *column_names, &block)
|
76
233
|
self.record_count = (record_count || 0) + count
|
77
234
|
count
|
235
|
+
rescue StandardError => exc
|
236
|
+
input(category).delete_all
|
237
|
+
raise(exc)
|
238
|
+
end
|
239
|
+
|
240
|
+
# Upload sliced range of integer requests as arrays of start and end ids.
|
241
|
+
#
|
242
|
+
# Returns [Integer] last_id - start_id + 1.
|
243
|
+
#
|
244
|
+
# Uploads one range per slice so that the response can return multiple records
|
245
|
+
# for each slice processed
|
246
|
+
#
|
247
|
+
# Example
|
248
|
+
# job.slice_size = 100
|
249
|
+
# job.upload_integer_range(200, 421)
|
250
|
+
#
|
251
|
+
# # Equivalent to calling:
|
252
|
+
# job.input.insert([200,299])
|
253
|
+
# job.input.insert([300,399])
|
254
|
+
# job.input.insert([400,421])
|
255
|
+
#
|
256
|
+
# Notes:
|
257
|
+
# * Only call from one thread at a time against a single instance of this job.
|
258
|
+
# * The record_count for the job is set to: last_id - start_id + 1.
|
259
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
260
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
261
|
+
def upload_integer_range(start_id, last_id, category: :main)
|
262
|
+
input(category).upload_integer_range(start_id, last_id)
|
263
|
+
count = last_id - start_id + 1
|
264
|
+
self.record_count = (record_count || 0) + count
|
265
|
+
count
|
266
|
+
rescue StandardError => exc
|
267
|
+
input(category).delete_all
|
268
|
+
raise(exc)
|
269
|
+
end
|
270
|
+
|
271
|
+
# Upload sliced range of integer requests as an arrays of start and end ids
|
272
|
+
# starting with the last range first
|
273
|
+
#
|
274
|
+
# Returns [Integer] last_id - start_id + 1.
|
275
|
+
#
|
276
|
+
# Uploads one range per slice so that the response can return multiple records
|
277
|
+
# for each slice processed.
|
278
|
+
# Useful for when the highest order integer values should be processed before
|
279
|
+
# the lower integer value ranges. For example when processing every record
|
280
|
+
# in a database based on the id column
|
281
|
+
#
|
282
|
+
# Example
|
283
|
+
# job.slice_size = 100
|
284
|
+
# job.upload_integer_range_in_reverse_order(200, 421)
|
285
|
+
#
|
286
|
+
# # Equivalent to calling:
|
287
|
+
# job.input.insert([400,421])
|
288
|
+
# job.input.insert([300,399])
|
289
|
+
# job.input.insert([200,299])
|
290
|
+
#
|
291
|
+
# Notes:
|
292
|
+
# * Only call from one thread at a time against a single instance of this job.
|
293
|
+
# * The record_count for the job is set to: last_id - start_id + 1.
|
294
|
+
# * If an exception is raised while uploading data, the input collection is cleared out
|
295
|
+
# so that if a job is retried during an upload failure, data is not duplicated.
|
296
|
+
def upload_integer_range_in_reverse_order(start_id, last_id, category: :main)
|
297
|
+
input(category).upload_integer_range_in_reverse_order(start_id, last_id)
|
298
|
+
count = last_id - start_id + 1
|
299
|
+
self.record_count = (record_count || 0) + count
|
300
|
+
count
|
301
|
+
rescue StandardError => exc
|
302
|
+
input(category).delete_all
|
303
|
+
raise(exc)
|
78
304
|
end
|
79
305
|
|
80
306
|
# Upload the supplied slices for processing by workers
|
@@ -11,7 +11,7 @@
|
|
11
11
|
# CODE
|
12
12
|
# job = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
|
13
13
|
# arel = User.unscoped.all.order('updated_at DESC')
|
14
|
-
# job.
|
14
|
+
# job.upload_arel(arel)
|
15
15
|
# job.save!
|
16
16
|
#
|
17
17
|
# Console Testing:
|
@@ -25,7 +25,7 @@
|
|
25
25
|
#
|
26
26
|
# # Run against a sub-set using a limit
|
27
27
|
# arel = User.unscoped.all.order('updated_at DESC').limit(100)
|
28
|
-
# job.
|
28
|
+
# job.upload_arel(arel)
|
29
29
|
#
|
30
30
|
# # Run the subset directly within the console
|
31
31
|
# job.perform_now
|
@@ -38,7 +38,7 @@
|
|
38
38
|
# Example: Move the upload operation into a before_batch.
|
39
39
|
# upload_code = <<-CODE
|
40
40
|
# arel = User.unscoped.all.order('updated_at DESC')
|
41
|
-
#
|
41
|
+
# upload_arel(arel)
|
42
42
|
# CODE
|
43
43
|
#
|
44
44
|
# code = <<-CODE
|
@@ -1,104 +1,6 @@
|
|
1
1
|
module RocketJob
|
2
2
|
module Sliced
|
3
3
|
class Input < Slices
|
4
|
-
# Load lines for processing from the supplied filename or stream into this job.
|
5
|
-
#
|
6
|
-
# Returns [Integer] the number of lines loaded into this collection
|
7
|
-
#
|
8
|
-
# Parameters
|
9
|
-
# file_name_or_io [String | IO]
|
10
|
-
# Full path and file name to stream into the job,
|
11
|
-
# Or, an IO Stream that responds to: :read
|
12
|
-
#
|
13
|
-
# streams [Symbol|Array]
|
14
|
-
# Streams to convert the data whilst it is being read.
|
15
|
-
# When nil, the file_name extensions will be inspected to determine what
|
16
|
-
# streams should be applied.
|
17
|
-
# Default: nil
|
18
|
-
#
|
19
|
-
# delimiter[String]
|
20
|
-
# Line / Record delimiter to use to break the stream up into records
|
21
|
-
# Any string to break the stream up by
|
22
|
-
# The records when saved will not include this delimiter
|
23
|
-
# Default: nil
|
24
|
-
# Automatically detect line endings and break up by line
|
25
|
-
# Searches for the first "\r\n" or "\n" and then uses that as the
|
26
|
-
# delimiter for all subsequent records
|
27
|
-
#
|
28
|
-
# buffer_size [Integer]
|
29
|
-
# Size of the blocks when reading from the input file / stream.
|
30
|
-
# Default: 65536 ( 64K )
|
31
|
-
#
|
32
|
-
# encoding: [String|Encoding]
|
33
|
-
# Encode returned data with this encoding.
|
34
|
-
# 'US-ASCII': Original 7 bit ASCII Format
|
35
|
-
# 'ASCII-8BIT': 8-bit ASCII Format
|
36
|
-
# 'UTF-8': UTF-8 Format
|
37
|
-
# Etc.
|
38
|
-
# Default: 'UTF-8'
|
39
|
-
#
|
40
|
-
# encode_replace: [String]
|
41
|
-
# The character to replace with when a character cannot be converted to the target encoding.
|
42
|
-
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
43
|
-
# Default: nil
|
44
|
-
#
|
45
|
-
# encode_cleaner: [nil|symbol|Proc]
|
46
|
-
# Cleanse data read from the input stream.
|
47
|
-
# nil: No cleansing
|
48
|
-
# :printable Cleanse all non-printable characters except \r and \n
|
49
|
-
# Proc/lambda Proc to call after every read to cleanse the data
|
50
|
-
# Default: :printable
|
51
|
-
#
|
52
|
-
# stream_mode: [:line | :row | :record]
|
53
|
-
# :line
|
54
|
-
# Uploads the file a line (String) at a time for processing by workers.
|
55
|
-
# :row
|
56
|
-
# Parses each line from the file as an Array and uploads each array for processing by workers.
|
57
|
-
# :record
|
58
|
-
# Parses each line from the file into a Hash and uploads each hash for processing by workers.
|
59
|
-
# See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
|
60
|
-
#
|
61
|
-
# Example:
|
62
|
-
# # Load plain text records from a file
|
63
|
-
# job.input.upload('hello.csv')
|
64
|
-
#
|
65
|
-
# Example:
|
66
|
-
# # Load plain text records from a file, stripping all non-printable characters,
|
67
|
-
# # as well as any characters that cannot be converted to UTF-8
|
68
|
-
# job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
|
69
|
-
#
|
70
|
-
# Example: Zip
|
71
|
-
# # Since csv is not known to RocketJob it is ignored
|
72
|
-
# job.input.upload('myfile.csv.zip')
|
73
|
-
#
|
74
|
-
# Example: Encrypted Zip
|
75
|
-
# job.input.upload('myfile.csv.zip.enc')
|
76
|
-
#
|
77
|
-
# Example: Explicitly set the streams
|
78
|
-
# job.input.upload('myfile.ze', streams: [:zip, :enc])
|
79
|
-
#
|
80
|
-
# Example: Supply custom options
|
81
|
-
# job.input.upload('myfile.csv.enc', streams: :enc])
|
82
|
-
#
|
83
|
-
# Example: Extract streams from filename but write to a temp file
|
84
|
-
# streams = IOStreams.streams_for_file_name('myfile.gz.enc')
|
85
|
-
# t = Tempfile.new('my_project')
|
86
|
-
# job.input.upload(t.to_path, streams: streams)
|
87
|
-
#
|
88
|
-
# Example: Upload by writing records one at a time to the upload stream
|
89
|
-
# job.upload do |writer|
|
90
|
-
# 10.times { |i| writer << i }
|
91
|
-
# end
|
92
|
-
#
|
93
|
-
# Notes:
|
94
|
-
# - By default all data read from the file/stream is converted into UTF-8 before being persisted. This
|
95
|
-
# is recommended since Mongo only supports UTF-8 strings.
|
96
|
-
# - When zip format, the Zip file/stream must contain only one file, the first file found will be
|
97
|
-
# loaded into the job
|
98
|
-
# - If an io stream is supplied, it is read until it returns nil.
|
99
|
-
# - Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
|
100
|
-
# - Only call from one thread at a time per job instance.
|
101
|
-
# - CSV parsing is slow, so it is left for the workers to do.
|
102
4
|
def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
|
103
5
|
raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
|
104
6
|
|
@@ -110,35 +12,6 @@ module RocketJob
|
|
110
12
|
Writer::Input.collect(self, on_first: on_first, &block)
|
111
13
|
end
|
112
14
|
|
113
|
-
# Upload the result of a MongoDB query to the input collection for processing
|
114
|
-
# Useful when an entire MongoDB collection, or part thereof needs to be
|
115
|
-
# processed by a job.
|
116
|
-
#
|
117
|
-
# Returns [Integer] the number of records uploaded
|
118
|
-
#
|
119
|
-
# If a Block is supplied it is passed the document returned from the
|
120
|
-
# database and should return a record for processing
|
121
|
-
#
|
122
|
-
# If no Block is supplied then the record will be the :fields returned
|
123
|
-
# from MongoDB
|
124
|
-
#
|
125
|
-
# Note:
|
126
|
-
# This method uses the collection and not the MongoMapper document to
|
127
|
-
# avoid the overhead of constructing a Model with every document returned
|
128
|
-
# by the query
|
129
|
-
#
|
130
|
-
# Note:
|
131
|
-
# The Block must return types that can be serialized to BSON.
|
132
|
-
# Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
|
133
|
-
# Invalid: Date, etc.
|
134
|
-
#
|
135
|
-
# Example: Upload document ids
|
136
|
-
# criteria = User.where(state: 'FL')
|
137
|
-
# job.record_count = job.upload_mongo_query(criteria)
|
138
|
-
#
|
139
|
-
# Example: Upload just the supplied column
|
140
|
-
# criteria = User.where(state: 'FL')
|
141
|
-
# job.record_count = job.upload_mongo_query(criteria, :zip_code)
|
142
15
|
def upload_mongo_query(criteria, *column_names, &block)
|
143
16
|
options = criteria.options
|
144
17
|
|
@@ -171,30 +44,6 @@ module RocketJob
|
|
171
44
|
end
|
172
45
|
end
|
173
46
|
|
174
|
-
# Upload results from an Arel into RocketJob::SlicedJob.
|
175
|
-
#
|
176
|
-
# Params
|
177
|
-
# column_names
|
178
|
-
# When a block is not supplied, supply the names of the columns to be returned
|
179
|
-
# and uploaded into the job
|
180
|
-
# These columns are automatically added to the select list to reduce overhead
|
181
|
-
#
|
182
|
-
# If a Block is supplied it is passed the model returned from the database and should
|
183
|
-
# return the work item to be uploaded into the job.
|
184
|
-
#
|
185
|
-
# Returns [Integer] the number of records uploaded
|
186
|
-
#
|
187
|
-
# Example: Upload id's for all users
|
188
|
-
# arel = User.all
|
189
|
-
# job.record_count = job.upload_arel(arel)
|
190
|
-
#
|
191
|
-
# Example: Upload selected user id's
|
192
|
-
# arel = User.where(country_code: 'US')
|
193
|
-
# job.record_count = job.upload_arel(arel)
|
194
|
-
#
|
195
|
-
# Example: Upload user_name and zip_code
|
196
|
-
# arel = User.where(country_code: 'US')
|
197
|
-
# job.record_count = job.upload_arel(arel, :user_name, :zip_code)
|
198
47
|
def upload_arel(arel, *column_names, &block)
|
199
48
|
unless block
|
200
49
|
column_names = column_names.collect(&:to_sym)
|
@@ -217,21 +66,6 @@ module RocketJob
|
|
217
66
|
end
|
218
67
|
end
|
219
68
|
|
220
|
-
# Upload sliced range of integer requests as a an arrays of start and end ids
|
221
|
-
#
|
222
|
-
# Returns [Integer] the number of slices uploaded
|
223
|
-
#
|
224
|
-
# Uploads one range per slice so that the response can return multiple records
|
225
|
-
# for each slice processed
|
226
|
-
#
|
227
|
-
# Example
|
228
|
-
# job.slice_size = 100
|
229
|
-
# job.record_count = job.upload_integer_range(200, 421)
|
230
|
-
#
|
231
|
-
# # Equivalent to calling:
|
232
|
-
# job.record_count = job.insert([200,299])
|
233
|
-
# job.record_count += job.insert([300,399])
|
234
|
-
# job.record_count += job.insert([400,421])
|
235
69
|
def upload_integer_range(start_id, last_id)
|
236
70
|
create_indexes
|
237
71
|
count = 0
|
@@ -245,25 +79,6 @@ module RocketJob
|
|
245
79
|
count
|
246
80
|
end
|
247
81
|
|
248
|
-
# Upload sliced range of integer requests as an arrays of start and end ids
|
249
|
-
# starting with the last range first
|
250
|
-
#
|
251
|
-
# Returns [Integer] the number of slices uploaded
|
252
|
-
#
|
253
|
-
# Uploads one range per slice so that the response can return multiple records
|
254
|
-
# for each slice processed.
|
255
|
-
# Useful for when the highest order integer values should be processed before
|
256
|
-
# the lower integer value ranges. For example when processing every record
|
257
|
-
# in a database based on the id column
|
258
|
-
#
|
259
|
-
# Example
|
260
|
-
# job.slice_size = 100
|
261
|
-
# job.record_count = job.upload_integer_range_in_reverse_order(200, 421) * job.slice_size
|
262
|
-
#
|
263
|
-
# # Equivalent to calling:
|
264
|
-
# job.insert([400,421])
|
265
|
-
# job.insert([300,399])
|
266
|
-
# job.insert([200,299])
|
267
82
|
def upload_integer_range_in_reverse_order(start_id, last_id)
|
268
83
|
create_indexes
|
269
84
|
end_id = last_id
|
@@ -290,9 +105,8 @@ module RocketJob
|
|
290
105
|
#
|
291
106
|
def each_failed_record
|
292
107
|
failed.each do |slice|
|
293
|
-
|
294
|
-
|
295
|
-
end
|
108
|
+
record = slice.failed_record
|
109
|
+
yield(record, slice) unless record.nil?
|
296
110
|
end
|
297
111
|
end
|
298
112
|
|
@@ -119,6 +119,14 @@ module RocketJob
|
|
119
119
|
self.worker_name = nil
|
120
120
|
end
|
121
121
|
|
122
|
+
# Returns the failed record.
|
123
|
+
# Returns [nil] if there is no failed record
|
124
|
+
def failed_record
|
125
|
+
if exception && (record_number = exception.record_number)
|
126
|
+
at(record_number - 1)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
122
130
|
# Returns [Hash] the slice as a Hash for storage purposes
|
123
131
|
# Compresses / Encrypts the slice according to the job setting
|
124
132
|
if ::Mongoid::VERSION.to_i >= 6
|
@@ -98,6 +98,7 @@ module RocketJob
|
|
98
98
|
all.collection.indexes.create_one(state: 1, _id: 1)
|
99
99
|
end
|
100
100
|
|
101
|
+
# Forward additional methods.
|
101
102
|
def_instance_delegators :@all, :collection, :count, :delete_all, :first, :find, :last, :nor, :not, :or, :to_a, :where
|
102
103
|
|
103
104
|
# Drop this collection when it is no longer needed
|
data/lib/rocket_job/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rocketjob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aasm
|