rocketjob 4.2.0 → 4.3.0.beta

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
4
- data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
3
+ metadata.gz: 8ae22e2ca14255089a3500e9294fc689847bb92525da45ba90426cae38a45378
4
+ data.tar.gz: fc0f6dd9c042020a01d47f4c2be35df3896db47e3cc07d875a27f057a8e64ab6
5
5
  SHA512:
6
- metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
7
- data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907
6
+ metadata.gz: cb612469360af546d76ea1d024e80cbdf50f40693533fd3e608927911d62c86da6ad4ba290da0186ce98b3be95b3cfad21ceed3bf22091d6e80cf2adc7b2387d
7
+ data.tar.gz: 4038eb8af3353d6358f3dc74c50410d1f16cf96fe716c19b29aa8843428e2a8938267b3d130c4023b7234ca45ad5a546d65836ea69775d354889e6cb50121fd6
@@ -19,7 +19,7 @@ module RocketJob
19
19
  collection_name = "rocket_job.inputs.#{id}"
20
20
  collection_name << ".#{category}" unless category == :main
21
21
 
22
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(slice_arguments(collection_name))
22
+ (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(collection_name: collection_name, slice_size: slice_size)
23
23
  end
24
24
 
25
25
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -36,23 +36,18 @@ module RocketJob
36
36
  collection_name = "rocket_job.outputs.#{id}"
37
37
  collection_name << ".#{category}" unless category == :main
38
38
 
39
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
39
+ (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(collection_name: collection_name, slice_size: slice_size)
40
40
  end
41
41
 
42
- # Upload the supplied file_name or stream.
42
+ # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
43
43
  #
44
44
  # Returns [Integer] the number of records uploaded.
45
45
  #
46
46
  # Parameters
47
- # file_name_or_io [String | IO]
47
+ # stream [String | IO | IOStreams::Path | IOStreams::Stream]
48
48
  # Full path and file name to stream into the job,
49
49
  # Or, an IO Stream that responds to: :read
50
- #
51
- # streams [Symbol|Array]
52
- # Streams to convert the data whilst it is being read.
53
- # When nil, the file_name extensions will be inspected to determine what
54
- # streams should be applied.
55
- # Default: nil
50
+ # Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
56
51
  #
57
52
  # delimiter[String]
58
53
  # Line / Record delimiter to use to break the stream up into records
@@ -63,9 +58,14 @@ module RocketJob
63
58
  # Searches for the first "\r\n" or "\n" and then uses that as the
64
59
  # delimiter for all subsequent records
65
60
  #
66
- # buffer_size [Integer]
67
- # Size of the blocks when reading from the input file / stream.
68
- # Default: 65536 ( 64K )
61
+ # stream_mode: [:line | :row | :record]
62
+ # :line
63
+ # Uploads the file a line (String) at a time for processing by workers.
64
+ # :row
65
+ # Parses each line from the file as an Array and uploads each array for processing by workers.
66
+ # :record
67
+ # Parses each line from the file into a Hash and uploads each hash for processing by workers.
68
+ # See IOStreams::Stream#each_line, IOStreams::Stream#each_row, and IOStreams::Stream#each_record.
69
69
  #
70
70
  # encoding: [String|Encoding]
71
71
  # Encode returned data with this encoding.
@@ -74,11 +74,15 @@ module RocketJob
74
74
  # 'UTF-8': UTF-8 Format
75
75
  # Etc.
76
76
  # Default: 'UTF-8'
77
+ # NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
78
+ # if not already set in the supplied stream.
77
79
  #
78
80
  # encode_replace: [String]
79
81
  # The character to replace with when a character cannot be converted to the target encoding.
80
82
  # nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
81
83
  # Default: nil
84
+ # NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
85
+ # if not already set in the supplied stream.
82
86
  #
83
87
  # encode_cleaner: [nil|symbol|Proc]
84
88
  # Cleanse data read from the input stream.
@@ -86,42 +90,38 @@ module RocketJob
86
90
  # :printable Cleanse all non-printable characters except \r and \n
87
91
  # Proc/lambda Proc to call after every read to cleanse the data
88
92
  # Default: :printable
89
- #
90
- # stream_mode: [:line | :row | :record]
91
- # :line
92
- # Uploads the file a line (String) at a time for processing by workers.
93
- # :row
94
- # Parses each line from the file as an Array and uploads each array for processing by workers.
95
- # :record
96
- # Parses each line from the file into a Hash and uploads each hash for processing by workers.
97
- # See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
93
+ # NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
94
+ # if not already set in the supplied stream.
98
95
  #
99
96
  # Example:
100
97
  # # Load plain text records from a file
101
- # job.input.upload('hello.csv')
98
+ # job.upload('hello.csv')
102
99
  #
103
100
  # Example:
104
101
  # # Load plain text records from a file, stripping all non-printable characters,
105
102
  # # as well as any characters that cannot be converted to UTF-8
106
- # job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
103
+ # job.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
107
104
  #
108
105
  # Example: Zip
109
106
  # # Since csv is not known to RocketJob it is ignored
110
- # job.input.upload('myfile.csv.zip')
107
+ # job.upload('myfile.csv.zip')
111
108
  #
112
109
  # Example: Encrypted Zip
113
- # job.input.upload('myfile.csv.zip.enc')
110
+ # job.upload('myfile.csv.zip.enc')
114
111
  #
115
112
  # Example: Explicitly set the streams
116
- # job.input.upload('myfile.ze', streams: [:zip, :enc])
113
+ # path = IOStreams.path('myfile.ze').stream(:encode, encoding: 'UTF-8').stream(:zip).stream(:enc)
114
+ # job.upload(path)
117
115
  #
118
116
  # Example: Supply custom options
119
- # job.input.upload('myfile.csv.enc', streams: :enc])
117
+ # path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false).option(:encode, encoding: 'UTF-8')
118
+ # job.upload(path)
120
119
  #
121
- # Example: Extract streams from filename but write to a temp file
122
- # streams = IOStreams.streams_for_file_name('myfile.gz.enc')
123
- # t = Tempfile.new('my_project')
124
- # job.input.upload(t.to_path, streams: streams)
120
+ # Example: Read from a tempfile and use the original file name to determine which streams to apply
121
+ # temp_file = Tempfile.new('my_project')
122
+ # temp_file.write(gzip_and_encrypted_data)
123
+ # stream = IOStreams.stream(temp_file).file_name('myfile.gz.enc')
124
+ # job.upload(stream)
125
125
  #
126
126
  # Example: Upload by writing records one at a time to the upload stream
127
127
  # job.upload do |writer|
@@ -140,18 +140,22 @@ module RocketJob
140
140
  # * If an io stream is supplied, it is read until it returns nil.
141
141
  # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
142
142
  # * CSV parsing is slow, so it is usually left for the workers to do.
143
- def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
144
- if file_name
145
- self.upload_file_name = file_name
146
- elsif file_name_or_io.is_a?(String)
147
- self.upload_file_name = file_name_or_io
148
- end
149
- count = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
143
+ def upload(stream = nil, file_name: nil, category: :main, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, stream_mode: :line, on_first: nil, **args, &block)
144
+ raise(ArgumentError, 'Either stream, or a block must be supplied') unless stream || block
145
+
146
+ count =
147
+ if block
148
+ input(category).upload(on_first: on_first, &block)
149
+ else
150
+ path = build_path(stream, file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
151
+
152
+ self.upload_file_name = path.file_name
153
+ input(category).upload(on_first: on_first) do |io|
154
+ path.public_send("each_#{stream_mode}".to_sym, **args) { |line| io << line }
155
+ end
156
+ end
150
157
  self.record_count = (record_count || 0) + count
151
158
  count
152
- rescue StandardError => exc
153
- input(category).delete_all
154
- raise(exc)
155
159
  end
156
160
 
157
161
  # Upload results from an Arel into RocketJob::SlicedJob.
@@ -188,9 +192,6 @@ module RocketJob
188
192
  count = input(category).upload_arel(arel, *column_names, &block)
189
193
  self.record_count = (record_count || 0) + count
190
194
  count
191
- rescue StandardError => exc
192
- input(category).delete_all
193
- raise(exc)
194
195
  end
195
196
 
196
197
  # Upload the result of a MongoDB query to the input collection for processing
@@ -232,9 +233,6 @@ module RocketJob
232
233
  count = input(category).upload_mongo_query(criteria, *column_names, &block)
233
234
  self.record_count = (record_count || 0) + count
234
235
  count
235
- rescue StandardError => exc
236
- input(category).delete_all
237
- raise(exc)
238
236
  end
239
237
 
240
238
  # Upload sliced range of integer requests as arrays of start and end ids.
@@ -263,9 +261,6 @@ module RocketJob
263
261
  count = last_id - start_id + 1
264
262
  self.record_count = (record_count || 0) + count
265
263
  count
266
- rescue StandardError => exc
267
- input(category).delete_all
268
- raise(exc)
269
264
  end
270
265
 
271
266
  # Upload sliced range of integer requests as an arrays of start and end ids
@@ -298,9 +293,6 @@ module RocketJob
298
293
  count = last_id - start_id + 1
299
294
  self.record_count = (record_count || 0) + count
300
295
  count
301
- rescue StandardError => exc
302
- input(category).delete_all
303
- raise(exc)
304
296
  end
305
297
 
306
298
  # Upload the supplied slices for processing by workers
@@ -326,24 +318,71 @@ module RocketJob
326
318
  count
327
319
  end
328
320
 
329
- # Download the output data into the supplied file_name or stream
321
+ # Download the output data into the supplied file, io, IOStreams::Path, or IOStreams::Stream.
322
+ # Returns [Integer] the number of records / lines downloaded.
330
323
  #
331
324
  # Parameters
332
- # file_name_or_io [String|IO]
333
- # The file_name of the file to write to, or an IO Stream that implements #write.
325
+ # stream [String | IO | IOStreams::Path | IOStreams::Stream]
326
+ # Full path and file name to stream into the job,
327
+ # Or, an IO stream that responds to: :write
328
+ # Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
329
+ #
330
+ # Example: Zip
331
+ # # Since csv is not known to RocketJob it is ignored
332
+ # job.download('myfile.csv.zip')
333
+ #
334
+ # Example: Encrypted Zip
335
+ # job.download('myfile.csv.zip.enc')
336
+ #
337
+ # Example: Explicitly set the streams
338
+ # path = IOStreams.path('myfile.ze').stream(:zip).stream(:enc)
339
+ # job.download(path)
334
340
  #
335
- # options:
336
- # category [Symbol]
337
- # The category of output to download
338
- # Default: :main
341
+ # Example: Supply custom options
342
+ # path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false)
343
+ # job.download(path)
344
+ #
345
+ # Example: Supply custom options. Set the file name within the zip file.
346
+ # path = IOStreams.path('myfile.csv.zip').option(:zip, zip_file_name: 'myfile.csv')
347
+ # job.download(path)
348
+ #
349
+ # Example: Download into a tempfile, or stream, using the original file name to determine the streams to apply:
350
+ # tempfile = Tempfile.new('my_project')
351
+ # stream = IOStreams.stream(tempfile).file_name('myfile.gz.enc')
352
+ # job.download(stream)
353
+ #
354
+ # Example: Add a header and/or trailer record to the downloaded file:
355
+ # IOStreams.path('/tmp/file.txt.gz').writer do |writer|
356
+ # writer << "Header\n"
357
+ # job.download do |line|
358
+ # writer << line + "\n"
359
+ # end
360
+ # writer << "Trailer\n"
361
+ # end
339
362
  #
340
- # See RocketJob::Sliced::Output#download for remaining options
363
+ # Example: Add a header and/or trailer record to the downloaded file, letting the line writer add the line breaks:
364
+ # IOStreams.path('/tmp/file.txt.gz').line_writer do |writer|
365
+ # writer << "Header"
366
+ # job.download do |line|
367
+ # writer << line
368
+ # end
369
+ # writer << "Trailer"
370
+ # end
341
371
  #
342
- # Returns [Integer] the number of records downloaded
343
- def download(file_name_or_io = nil, category: :main, **args, &block)
372
+ # Notes:
373
+ # - The records are returned in '_id' order. Usually this is the order in
374
+ # which the records were originally loaded.
375
+ def download(stream = nil, category: :main, header_line: nil, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, **args, &block)
344
376
  raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
345
377
 
346
- output(category).download(file_name_or_io, **args, &block)
378
+ if block
379
+ output(category).download(header_line: header_line, &block)
380
+ else
381
+ path = build_path(stream, nil, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
382
+ path.line_writer(**args) do |io|
383
+ output(category).download(header_line: header_line) { |record| io << record }
384
+ end
385
+ end
347
386
  end
348
387
 
349
388
  # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
@@ -381,11 +420,13 @@ module RocketJob
381
420
 
382
421
  private
383
422
 
384
- def slice_arguments(collection_name)
385
- {
386
- collection_name: collection_name,
387
- slice_size: slice_size
388
- }
423
+ def build_path(stream, file_name, encoding: nil, encode_cleaner: nil, encode_replace: nil)
424
+ path = IOStreams.new(stream)
425
+ path.file_name = file_name if file_name
426
+ if (encoding || encode_cleaner || encode_replace) && !path.setting(:encode)
427
+ path.option_or_stream(:encode, encoding: encoding, cleaner: encode_cleaner, replace: encode_replace)
428
+ end
429
+ path
389
430
  end
390
431
  end
391
432
  end
@@ -20,7 +20,7 @@ module RocketJob
20
20
  # Parses each line from the file as an Array and uploads each array for processing by workers.
21
21
  # :record
22
22
  # Parses each line from the file into a Hash and uploads each hash for processing by workers.
23
- # See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
23
+ # See IOStreams#each_line, IOStreams#each_row, and IOStreams#each_record.
24
24
  field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
25
25
 
26
26
  validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
@@ -1,5 +1,4 @@
1
1
  require 'concurrent'
2
- require 'pathname'
3
2
  require 'fileutils'
4
3
  module RocketJob
5
4
  class DirmonEntry
@@ -143,7 +142,7 @@ module RocketJob
143
142
  # Raises: Errno::ENOENT: No such file or directory
144
143
  def self.add_whitelist_path(path)
145
144
  # Confirms that path exists
146
- path = Pathname.new(path).realpath.to_s
145
+ path = IOStreams.path(path).realpath.to_s
147
146
  whitelist_paths << path
148
147
  whitelist_paths.uniq!
149
148
  path
@@ -153,7 +152,7 @@ module RocketJob
153
152
  # Raises: Errno::ENOENT: No such file or directory
154
153
  def self.delete_whitelist_path(path)
155
154
  # Confirms that path exists
156
- path = Pathname.new(path).realpath.to_s
155
+ path = IOStreams.path(path).realpath.to_s
157
156
  whitelist_paths.delete(path)
158
157
  whitelist_paths.uniq!
159
158
  path
@@ -186,32 +185,23 @@ module RocketJob
186
185
  def each
187
186
  SemanticLogger.named_tagged(dirmon_entry: id.to_s) do
188
187
  # Case insensitive filename matching
189
- Pathname.glob(pattern, File::FNM_CASEFOLD).each do |pathname|
190
- next if pathname.directory?
191
- pathname = begin
192
- pathname.realpath
193
- rescue Errno::ENOENT
194
- logger.warn("Unable to expand the realpath for #{pathname.inspect}. Skipping file.")
195
- next
196
- end
197
-
198
- file_name = pathname.to_s
199
-
188
+ IOStreams.each_child(pattern) do |path|
189
+ path = path.realpath
200
190
  # Skip archive directories
201
- next if file_name.include?(self.class.default_archive_directory)
191
+ next if path.to_s.include?(archive_directory || self.class.default_archive_directory)
202
192
 
203
193
  # Security check?
204
- if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| file_name.to_s.start_with?(whitepath) }
205
- logger.error "Skipping file: #{file_name} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
194
+ if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| path.to_s.start_with?(whitepath) }
195
+ logger.warn "Skipping file: #{path} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
206
196
  next
207
197
  end
208
198
 
209
199
  # File must be writable so it can be removed after processing
210
- unless pathname.writable?
211
- logger.error "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
200
+ if path.respond_to?(:writable?) && !path.writable?
201
+ logger.warn "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
212
202
  next
213
203
  end
214
- yield(pathname)
204
+ yield(path)
215
205
  end
216
206
  end
217
207
  end
@@ -239,17 +229,18 @@ module RocketJob
239
229
  nil
240
230
  end
241
231
 
242
- # Archives the file and kicks off a proxy job to upload the file.
243
- def later(pathname)
244
- job_id = BSON::ObjectId.new
245
- archived_file_name = archive_file(job_id, pathname)
232
+ # Archives the file, then kicks off a file upload job to upload the archived file.
233
+ def later(iopath)
234
+ job_id = BSON::ObjectId.new
235
+ archive_path = archive_iopath(iopath).join("#{job_id}_#{iopath.basename}")
236
+ iopath.move_to(archive_path)
246
237
 
247
238
  job = RocketJob::Jobs::UploadFileJob.create!(
248
239
  job_class_name: job_class_name,
249
240
  properties: properties,
250
- description: "#{name}: #{pathname.basename}",
251
- upload_file_name: archived_file_name.to_s,
252
- original_file_name: pathname.to_s,
241
+ description: "#{name}: #{iopath.basename}",
242
+ upload_file_name: archive_path.to_s,
243
+ original_file_name: iopath.to_s,
253
244
  job_id: job_id
254
245
  )
255
246
 
@@ -257,8 +248,8 @@ module RocketJob
257
248
  message: 'Created RocketJob::Jobs::UploadFileJob',
258
249
  payload: {
259
250
  dirmon_entry_name: name,
260
- upload_file_name: archived_file_name.to_s,
261
- original_file_name: pathname.to_s,
251
+ upload_file_name: archive_path.to_s,
252
+ original_file_name: iopath.to_s,
262
253
  job_class_name: job_class_name,
263
254
  job_id: job_id.to_s,
264
255
  upload_job_id: job.id.to_s
@@ -278,37 +269,13 @@ module RocketJob
278
269
  class_attribute :whitelist_paths
279
270
  self.whitelist_paths = Concurrent::Array.new
280
271
 
281
- # Move the file to the archive directory
282
- #
283
- # The archived file name is prefixed with the job id
284
- #
285
- # Returns [String] the fully qualified archived file name
286
- #
287
- # Note:
288
- # - Works across partitions when the file and the archive are on different partitions
289
- def archive_file(job_id, pathname)
290
- target_path = archive_pathname(pathname)
291
- target_path.mkpath
292
- target_file_name = target_path.join("#{job_id}_#{pathname.basename}")
293
- # In case the file is being moved across partitions
294
- FileUtils.move(pathname.to_s, target_file_name.to_s)
295
- target_file_name.to_s
296
- end
297
-
298
272
  # Returns [Pathname] to the archive directory, and creates it if it does not exist.
299
273
  #
300
274
  # If `archive_directory` is a relative path, it is appended to the `file_pathname`.
301
275
  # If `archive_directory` is an absolute path, it is returned as-is.
302
- def archive_pathname(file_pathname)
303
- path = Pathname.new(archive_directory)
304
- path = file_pathname.dirname.join(archive_directory) if path.relative?
305
-
306
- begin
307
- path.mkpath unless path.exist?
308
- rescue Errno::ENOENT => exc
309
- raise(Errno::ENOENT, "DirmonJob failed to create archive directory: #{path}, #{exc.message}")
310
- end
311
- path.realpath
276
+ def archive_iopath(iopath)
277
+ path = IOStreams.path(archive_directory)
278
+ path.relative? ? iopath.directory.join(archive_directory) : path
312
279
  end
313
280
 
314
281
  # Validates job_class is a Rocket Job
@@ -70,11 +70,18 @@ module RocketJob
70
70
  def check_directories
71
71
  new_file_names = {}
72
72
  DirmonEntry.enabled.each do |entry|
73
- entry.each do |pathname|
73
+ entry.each do |iopath|
74
+ # S3 files are only visible once completely uploaded.
75
+ if iopath.is_a?(IOStreams::Paths::S3)
76
+ logger.info("S3 File: #{iopath}. Starting: #{entry.job_class_name}")
77
+ entry.later(iopath)
78
+ next
79
+ end
80
+
74
81
  # BSON Keys cannot contain periods
75
- key = pathname.to_s.tr('.', '_')
82
+ key = iopath.to_s.tr('.', '_')
76
83
  previous_size = previous_file_names[key]
77
- size = check_file(entry, pathname, previous_size)
84
+ size = check_file(entry, iopath, previous_size)
78
85
  new_file_names[key] = size if size
79
86
  end
80
87
  end
@@ -83,14 +90,14 @@ module RocketJob
83
90
 
84
91
  # Checks if a file should result in starting a job
85
92
  # Returns [Integer] file size, or nil if the file started a job
86
- def check_file(entry, pathname, previous_size)
87
- size = pathname.size
93
+ def check_file(entry, iopath, previous_size)
94
+ size = iopath.size
88
95
  if previous_size && (previous_size == size)
89
- logger.info("File stabilized: #{pathname}. Starting: #{entry.job_class_name}")
90
- entry.later(pathname)
96
+ logger.info("File stabilized: #{iopath}. Starting: #{entry.job_class_name}")
97
+ entry.later(iopath)
91
98
  nil
92
99
  else
93
- logger.info("Found file: #{pathname}. File size: #{size}")
100
+ logger.info("Found file: #{iopath}. File size: #{size}")
94
101
  # Keep for the next run
95
102
  size
96
103
  end
@@ -0,0 +1,27 @@
1
+ # Job to dynamically perform ruby code on demand as a Batch,
2
+ # with input and/or output from CSV/JSON or other format supported by Tabular.
3
+ #
4
+ # Nodes:
5
+ # - Need to specify `destroy_on_complete: false` to collect output from this job.
6
+ # - `after_code` can be used to automatically download the output of this job to a file on completion.
7
+ #
8
+ # Example: Iterate over all rows in a table:
9
+ # code = <<-CODE
10
+ # if user = User.find(row)
11
+ # user.cleanse_attributes!
12
+ # user.save(validate: false)
13
+ # end
14
+ # CODE
15
+ # job = RocketJob::Jobs::OnDemandBatchTabularJob.new(code: code, description: 'cleanse users', destroy_on_complete: false)
16
+ # job.upload("users.csv")
17
+ # job.save!
18
+ #
19
+ # On completion export the output:
20
+ # job.download("output.csv")
21
+ module RocketJob
22
+ module Jobs
23
+ class OnDemandBatchTabularJob < OnDemandBatchJob
24
+ include RocketJob::Batch::Tabular
25
+ end
26
+ end
27
+ end
@@ -1,15 +1,13 @@
1
1
  module RocketJob
2
2
  module Sliced
3
3
  class Input < Slices
4
- def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
5
- raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
6
-
7
- block ||= -> (io) do
8
- iterator = "each_#{stream_mode}".to_sym
9
- IOStreams.public_send(iterator, file_name_or_io, encoding: encoding, **args) { |line| io << line }
10
- end
11
-
4
+ def upload(on_first: nil, &block)
5
+ # Create indexes before uploading
6
+ create_indexes
12
7
  Writer::Input.collect(self, on_first: on_first, &block)
8
+ rescue StandardError => exc
9
+ drop
10
+ raise(exc)
13
11
  end
14
12
 
15
13
  def upload_mongo_query(criteria, *column_names, &block)
@@ -36,7 +34,7 @@ module RocketJob
36
34
  end
37
35
  end
38
36
 
39
- Writer::Input.collect(self) do |records|
37
+ upload do |records|
40
38
  # Drop down to the mongo driver level to avoid constructing a Model for each document returned
41
39
  criteria.klass.collection.find(criteria.selector, options).each do |document|
42
40
  records << block.call(document)
@@ -46,8 +44,7 @@ module RocketJob
46
44
 
47
45
  def upload_arel(arel, *column_names, &block)
48
46
  unless block
49
- column_names = column_names.collect(&:to_sym)
50
- column_names << :id if column_names.size.zero?
47
+ column_names = column_names.empty? ? [:id] : column_names.collect(&:to_sym)
51
48
 
52
49
  block =
53
50
  if column_names.size == 1
@@ -61,12 +58,11 @@ module RocketJob
61
58
  arel = arel.select(selection)
62
59
  end
63
60
 
64
- Writer::Input.collect(self) do |records|
65
- arel.find_each { |model| records << block.call(model) }
66
- end
61
+ upload { |records| arel.find_each { |model| records << block.call(model) } }
67
62
  end
68
63
 
69
64
  def upload_integer_range(start_id, last_id)
65
+ # Create indexes before uploading
70
66
  create_indexes
71
67
  count = 0
72
68
  while start_id <= last_id
@@ -77,9 +73,13 @@ module RocketJob
77
73
  count += 1
78
74
  end
79
75
  count
76
+ rescue StandardError => exc
77
+ drop
78
+ raise(exc)
80
79
  end
81
80
 
82
81
  def upload_integer_range_in_reverse_order(start_id, last_id)
82
+ # Create indexes before uploading
83
83
  create_indexes
84
84
  end_id = last_id
85
85
  count = 0
@@ -91,6 +91,9 @@ module RocketJob
91
91
  count += 1
92
92
  end
93
93
  count
94
+ rescue StandardError => exc
95
+ drop
96
+ raise(exc)
94
97
  end
95
98
 
96
99
  # Iterate over each failed record, if any
@@ -3,93 +3,18 @@ require 'tempfile'
3
3
  module RocketJob
4
4
  module Sliced
5
5
  class Output < Slices
6
- # Write this output collection to the specified file/io stream
7
- #
8
- # Returns [Integer] the number of records returned from the collection
9
- #
10
- # Parameters
11
- # file_name_or_io [String|IO]
12
- # The file_name of the file to write to, or an IO Stream that implements
13
- # #write.
14
- #
15
- # options:
16
- # streams [Symbol|Array]
17
- # The formats/streams that be used to convert the data whilst it is
18
- # being written.
19
- # When nil, `file_name_or_io` will be inspected to try and determine what
20
- # streams should be applied.
21
- # Default: nil
22
- #
23
- # Any other option that can be supplied to IOStreams::Line::Writer
24
- #
25
- # Stream types / extensions supported:
26
- # .zip Zip File [ :zip ]
27
- # .gz, .gzip GZip File [ :gzip ]
28
- # .enc File Encrypted using symmetric encryption [ :enc ]
29
- #
30
- # When a file is encrypted, it may also be compressed:
31
- # .zip.enc [ :zip, :enc ]
32
- # .gz.enc [ :gz, :enc ]
33
- #
34
- # Example: Zip
35
- # # Since csv is not known to RocketJob it is ignored
36
- # job.output.download('myfile.csv.zip')
37
- #
38
- # Example: Encrypted Zip
39
- # job.output.download('myfile.csv.zip.enc')
40
- #
41
- # Example: Explicitly set the streams
42
- # job.output.download('myfile.ze', streams: [:zip, :enc])
43
- #
44
- # Example: Supply custom options
45
- # job.output.download('myfile.csv.enc', streams: [enc: { compress: true }])
46
- #
47
- # Example: Supply custom options
48
- # job.output.download('myfile.csv.zip', streams: [ zip: { zip_file_name: 'myfile.csv' } ])
49
- #
50
- # Example: Extract streams from filename but write to a temp file
51
- # t = Tempfile.new('my_project')
52
- # job.output.download(t.to_path, file_name: 'myfile.gz.enc')
53
- #
54
- # Example: Add a header and/or trailer record to the downloaded file:
55
- # IOStreams.writer('/tmp/file.txt.gz') do |writer|
56
- # writer << "Header\n"
57
- # job.download do |line|
58
- # writer << line
59
- # end
60
- # writer << "Trailer\n"
61
- # end
62
- #
63
- # Notes:
64
- # - The records are returned in '_id' order. Usually this is the order in
65
- # which the records were originally loaded.
66
- def download(file_name_or_io = nil, header_line: nil, **args)
67
- raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block_given?
6
+ def download(header_line: nil)
7
+ raise(ArgumentError, 'Block is mandatory') unless block_given?
68
8
 
69
- record_count = 0
70
-
71
- if block_given?
72
- # Write the header line
73
- yield(header_line) if header_line
9
+ # Write the header line
10
+ yield(header_line) if header_line
74
11
 
75
- # Call the supplied block for every record returned
76
- each do |slice|
77
- slice.each do |record|
78
- record_count += 1
79
- yield(record)
80
- end
81
- end
82
- else
83
- IOStreams.line_writer(file_name_or_io, **args) do |io|
84
- # Write the header line
85
- io << header_line if header_line
86
-
87
- each do |slice|
88
- slice.each do |record|
89
- record_count += 1
90
- io << record
91
- end
92
- end
12
+ # Call the supplied block for every record returned
13
+ record_count = 0
14
+ each do |slice|
15
+ slice.each do |record|
16
+ record_count += 1
17
+ yield(record)
93
18
  end
94
19
  end
95
20
  record_count
@@ -12,16 +12,10 @@ module RocketJob
12
12
  # Block to call on the first line only, instead of storing in the slice.
13
13
  # Useful for extracting the header row
14
14
  # Default: nil
15
- def self.collect(input, **args, &block)
15
+ def self.collect(input, **args)
16
16
  writer = new(input, **args)
17
- # Create indexes before uploading
18
- input.create_indexes if input.respond_to?(:create_indexes)
19
- block.call(writer)
17
+ yield(writer)
20
18
  writer.record_count
21
- rescue Exception => exc
22
- # Drop input collection when upload fails
23
- input.drop
24
- raise exc
25
19
  ensure
26
20
  writer&.close
27
21
  end
@@ -1,3 +1,3 @@
1
1
  module RocketJob
2
- VERSION = '4.2.0'.freeze
2
+ VERSION = '4.3.0.beta'.freeze
3
3
  end
@@ -119,6 +119,9 @@ module RocketJob
119
119
 
120
120
  SemanticLogger.named_tagged(job: job.id.to_s) do
121
121
  processed = true unless job.rocket_job_work(self, false, current_filter)
122
+
123
+ # Return the database connections for this thread back to the connection pool
124
+ ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord::Base)
122
125
  end
123
126
  end
124
127
  processed
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rocketjob
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 4.3.0.beta
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-19 00:00:00.000000000 Z
11
+ date: 2019-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aasm
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0.16'
47
+ version: 1.0.0.beta
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.16'
54
+ version: 1.0.0.beta
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: mongoid
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -131,6 +131,7 @@ files:
131
131
  - lib/rocket_job/jobs/dirmon_job.rb
132
132
  - lib/rocket_job/jobs/housekeeping_job.rb
133
133
  - lib/rocket_job/jobs/on_demand_batch_job.rb
134
+ - lib/rocket_job/jobs/on_demand_batch_tabular_job.rb
134
135
  - lib/rocket_job/jobs/on_demand_job.rb
135
136
  - lib/rocket_job/jobs/performance_job.rb
136
137
  - lib/rocket_job/jobs/simple_job.rb
@@ -189,11 +190,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
189
190
  version: '2.3'
190
191
  required_rubygems_version: !ruby/object:Gem::Requirement
191
192
  requirements:
192
- - - ">="
193
+ - - ">"
193
194
  - !ruby/object:Gem::Version
194
- version: '0'
195
+ version: 1.3.1
195
196
  requirements: []
196
- rubygems_version: 3.0.3
197
+ rubygems_version: 3.0.6
197
198
  signing_key:
198
199
  specification_version: 4
199
200
  summary: Ruby's missing batch system.