rocketjob 4.2.0 → 4.3.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
4
- data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
3
+ metadata.gz: 8ae22e2ca14255089a3500e9294fc689847bb92525da45ba90426cae38a45378
4
+ data.tar.gz: fc0f6dd9c042020a01d47f4c2be35df3896db47e3cc07d875a27f057a8e64ab6
5
5
  SHA512:
6
- metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
7
- data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907
6
+ metadata.gz: cb612469360af546d76ea1d024e80cbdf50f40693533fd3e608927911d62c86da6ad4ba290da0186ce98b3be95b3cfad21ceed3bf22091d6e80cf2adc7b2387d
7
+ data.tar.gz: 4038eb8af3353d6358f3dc74c50410d1f16cf96fe716c19b29aa8843428e2a8938267b3d130c4023b7234ca45ad5a546d65836ea69775d354889e6cb50121fd6
@@ -19,7 +19,7 @@ module RocketJob
19
19
  collection_name = "rocket_job.inputs.#{id}"
20
20
  collection_name << ".#{category}" unless category == :main
21
21
 
22
- (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(slice_arguments(collection_name))
22
+ (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(collection_name: collection_name, slice_size: slice_size)
23
23
  end
24
24
 
25
25
  # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -36,23 +36,18 @@ module RocketJob
36
36
  collection_name = "rocket_job.outputs.#{id}"
37
37
  collection_name << ".#{category}" unless category == :main
38
38
 
39
- (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
39
+ (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(collection_name: collection_name, slice_size: slice_size)
40
40
  end
41
41
 
42
- # Upload the supplied file_name or stream.
42
+ # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
43
43
  #
44
44
  # Returns [Integer] the number of records uploaded.
45
45
  #
46
46
  # Parameters
47
- # file_name_or_io [String | IO]
47
+ # stream [String | IO | IOStreams::Path | IOStreams::Stream]
48
48
  # Full path and file name to stream into the job,
49
49
  # Or, an IO Stream that responds to: :read
50
- #
51
- # streams [Symbol|Array]
52
- # Streams to convert the data whilst it is being read.
53
- # When nil, the file_name extensions will be inspected to determine what
54
- # streams should be applied.
55
- # Default: nil
50
+ # Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
56
51
  #
57
52
  # delimiter[String]
58
53
  # Line / Record delimiter to use to break the stream up into records
@@ -63,9 +58,14 @@ module RocketJob
63
58
  # Searches for the first "\r\n" or "\n" and then uses that as the
64
59
  # delimiter for all subsequent records
65
60
  #
66
- # buffer_size [Integer]
67
- # Size of the blocks when reading from the input file / stream.
68
- # Default: 65536 ( 64K )
61
+ # stream_mode: [:line | :row | :record]
62
+ # :line
63
+ # Uploads the file a line (String) at a time for processing by workers.
64
+ # :row
65
+ # Parses each line from the file as an Array and uploads each array for processing by workers.
66
+ # :record
67
+ # Parses each line from the file into a Hash and uploads each hash for processing by workers.
68
+ # See IOStreams::Stream#each_line, IOStreams::Stream#each_row, and IOStreams::Stream#each_record.
69
69
  #
70
70
  # encoding: [String|Encoding]
71
71
  # Encode returned data with this encoding.
@@ -74,11 +74,15 @@ module RocketJob
74
74
  # 'UTF-8': UTF-8 Format
75
75
  # Etc.
76
76
  # Default: 'UTF-8'
77
+ # NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
78
+ # if not already set in the supplied stream.
77
79
  #
78
80
  # encode_replace: [String]
79
81
  # The character to replace with when a character cannot be converted to the target encoding.
80
82
  # nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
81
83
  # Default: nil
84
+ # NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
85
+ # if not already set in the supplied stream.
82
86
  #
83
87
  # encode_cleaner: [nil|symbol|Proc]
84
88
  # Cleanse data read from the input stream.
@@ -86,42 +90,38 @@ module RocketJob
86
90
  # :printable Cleanse all non-printable characters except \r and \n
87
91
  # Proc/lambda Proc to call after every read to cleanse the data
88
92
  # Default: :printable
89
- #
90
- # stream_mode: [:line | :row | :record]
91
- # :line
92
- # Uploads the file a line (String) at a time for processing by workers.
93
- # :row
94
- # Parses each line from the file as an Array and uploads each array for processing by workers.
95
- # :record
96
- # Parses each line from the file into a Hash and uploads each hash for processing by workers.
97
- # See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
93
+ # NOTE: If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
94
+ # if not already set in the supplied stream.
98
95
  #
99
96
  # Example:
100
97
  # # Load plain text records from a file
101
- # job.input.upload('hello.csv')
98
+ # job.upload('hello.csv')
102
99
  #
103
100
  # Example:
104
101
  # # Load plain text records from a file, stripping all non-printable characters,
105
102
  # # as well as any characters that cannot be converted to UTF-8
106
- # job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
103
+ # job.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
107
104
  #
108
105
  # Example: Zip
109
106
  # # Since csv is not known to RocketJob it is ignored
110
- # job.input.upload('myfile.csv.zip')
107
+ # job.upload('myfile.csv.zip')
111
108
  #
112
109
  # Example: Encrypted Zip
113
- # job.input.upload('myfile.csv.zip.enc')
110
+ # job.upload('myfile.csv.zip.enc')
114
111
  #
115
112
  # Example: Explicitly set the streams
116
- # job.input.upload('myfile.ze', streams: [:zip, :enc])
113
+ # path = IOStreams.path('myfile.ze').stream(:encode, encoding: 'UTF-8').stream(:zip).stream(:enc)
114
+ # job.upload(path)
117
115
  #
118
116
  # Example: Supply custom options
119
- # job.input.upload('myfile.csv.enc', streams: :enc])
117
+ # path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false).option(:encode, encoding: 'UTF-8')
118
+ # job.upload(path)
120
119
  #
121
- # Example: Extract streams from filename but write to a temp file
122
- # streams = IOStreams.streams_for_file_name('myfile.gz.enc')
123
- # t = Tempfile.new('my_project')
124
- # job.input.upload(t.to_path, streams: streams)
120
+ # Example: Read from a tempfile and use the original file name to determine which streams to apply
121
+ # temp_file = Tempfile.new('my_project')
122
+ # temp_file.write(gzip_and_encrypted_data)
123
+ # stream = IOStreams.stream(temp_file).file_name('myfile.gz.enc')
124
+ # job.upload(stream)
125
125
  #
126
126
  # Example: Upload by writing records one at a time to the upload stream
127
127
  # job.upload do |writer|
@@ -140,18 +140,22 @@ module RocketJob
140
140
  # * If an io stream is supplied, it is read until it returns nil.
141
141
  # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
142
142
  # * CSV parsing is slow, so it is usually left for the workers to do.
143
- def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
144
- if file_name
145
- self.upload_file_name = file_name
146
- elsif file_name_or_io.is_a?(String)
147
- self.upload_file_name = file_name_or_io
148
- end
149
- count = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
143
+ def upload(stream = nil, file_name: nil, category: :main, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, stream_mode: :line, on_first: nil, **args, &block)
144
+ raise(ArgumentError, 'Either stream, or a block must be supplied') unless stream || block
145
+
146
+ count =
147
+ if block
148
+ input(category).upload(on_first: on_first, &block)
149
+ else
150
+ path = build_path(stream, file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
151
+
152
+ self.upload_file_name = path.file_name
153
+ input(category).upload(on_first: on_first) do |io|
154
+ path.public_send("each_#{stream_mode}".to_sym, **args) { |line| io << line }
155
+ end
156
+ end
150
157
  self.record_count = (record_count || 0) + count
151
158
  count
152
- rescue StandardError => exc
153
- input(category).delete_all
154
- raise(exc)
155
159
  end
156
160
 
157
161
  # Upload results from an Arel into RocketJob::SlicedJob.
@@ -188,9 +192,6 @@ module RocketJob
188
192
  count = input(category).upload_arel(arel, *column_names, &block)
189
193
  self.record_count = (record_count || 0) + count
190
194
  count
191
- rescue StandardError => exc
192
- input(category).delete_all
193
- raise(exc)
194
195
  end
195
196
 
196
197
  # Upload the result of a MongoDB query to the input collection for processing
@@ -232,9 +233,6 @@ module RocketJob
232
233
  count = input(category).upload_mongo_query(criteria, *column_names, &block)
233
234
  self.record_count = (record_count || 0) + count
234
235
  count
235
- rescue StandardError => exc
236
- input(category).delete_all
237
- raise(exc)
238
236
  end
239
237
 
240
238
  # Upload sliced range of integer requests as arrays of start and end ids.
@@ -263,9 +261,6 @@ module RocketJob
263
261
  count = last_id - start_id + 1
264
262
  self.record_count = (record_count || 0) + count
265
263
  count
266
- rescue StandardError => exc
267
- input(category).delete_all
268
- raise(exc)
269
264
  end
270
265
 
271
266
  # Upload sliced range of integer requests as an arrays of start and end ids
@@ -298,9 +293,6 @@ module RocketJob
298
293
  count = last_id - start_id + 1
299
294
  self.record_count = (record_count || 0) + count
300
295
  count
301
- rescue StandardError => exc
302
- input(category).delete_all
303
- raise(exc)
304
296
  end
305
297
 
306
298
  # Upload the supplied slices for processing by workers
@@ -326,24 +318,71 @@ module RocketJob
326
318
  count
327
319
  end
328
320
 
329
- # Download the output data into the supplied file_name or stream
321
+ # Download the output data into the supplied file, io, IOStreams::Path, or IOStreams::Stream.
322
+ # Returns [Integer] the number of records / lines downloaded.
330
323
  #
331
324
  # Parameters
332
- # file_name_or_io [String|IO]
333
- # The file_name of the file to write to, or an IO Stream that implements #write.
325
+ # stream [String | IO | IOStreams::Path | IOStreams::Stream]
326
+ # Full path and file name to stream into the job,
327
+ # Or, an IO stream that responds to: :write
328
+ # Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
329
+ #
330
+ # Example: Zip
331
+ # # Since csv is not known to RocketJob it is ignored
332
+ # job.download('myfile.csv.zip')
333
+ #
334
+ # Example: Encrypted Zip
335
+ # job.download('myfile.csv.zip.enc')
336
+ #
337
+ # Example: Explicitly set the streams
338
+ # path = IOStreams.path('myfile.ze').stream(:zip).stream(:enc)
339
+ # job.download(path)
334
340
  #
335
- # options:
336
- # category [Symbol]
337
- # The category of output to download
338
- # Default: :main
341
+ # Example: Supply custom options
342
+ # path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false)
343
+ # job.download(path)
344
+ #
345
+ # Example: Supply custom options. Set the file name within the zip file.
346
+ # path = IOStreams.path('myfile.csv.zip').option(:zip, zip_file_name: 'myfile.csv')
347
+ # job.download(path)
348
+ #
349
+ # Example: Download into a tempfile, or stream, using the original file name to determine the streams to apply:
350
+ # tempfile = Tempfile.new('my_project')
351
+ # stream = IOStreams.stream(tempfile).file_name('myfile.gz.enc')
352
+ # job.download(stream)
353
+ #
354
+ # Example: Add a header and/or trailer record to the downloaded file:
355
+ # IOStreams.path('/tmp/file.txt.gz').writer do |writer|
356
+ # writer << "Header\n"
357
+ # job.download do |line|
358
+ # writer << line + "\n"
359
+ # end
360
+ # writer << "Trailer\n"
361
+ # end
339
362
  #
340
- # See RocketJob::Sliced::Output#download for remaining options
363
+ # Example: Add a header and/or trailer record to the downloaded file, letting the line writer add the line breaks:
364
+ # IOStreams.path('/tmp/file.txt.gz').line_writer do |writer|
365
+ # writer << "Header"
366
+ # job.download do |line|
367
+ # writer << line
368
+ # end
369
+ # writer << "Trailer"
370
+ # end
341
371
  #
342
- # Returns [Integer] the number of records downloaded
343
- def download(file_name_or_io = nil, category: :main, **args, &block)
372
+ # Notes:
373
+ # - The records are returned in '_id' order. Usually this is the order in
374
+ # which the records were originally loaded.
375
+ def download(stream = nil, category: :main, header_line: nil, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, **args, &block)
344
376
  raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
345
377
 
346
- output(category).download(file_name_or_io, **args, &block)
378
+ if block
379
+ output(category).download(header_line: header_line, &block)
380
+ else
381
+ path = build_path(stream, nil, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
382
+ path.line_writer(**args) do |io|
383
+ output(category).download(header_line: header_line) { |record| io << record }
384
+ end
385
+ end
347
386
  end
348
387
 
349
388
  # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
@@ -381,11 +420,13 @@ module RocketJob
381
420
 
382
421
  private
383
422
 
384
- def slice_arguments(collection_name)
385
- {
386
- collection_name: collection_name,
387
- slice_size: slice_size
388
- }
423
+ def build_path(stream, file_name, encoding: nil, encode_cleaner: nil, encode_replace: nil)
424
+ path = IOStreams.new(stream)
425
+ path.file_name = file_name if file_name
426
+ if (encoding || encode_cleaner || encode_replace) && !path.setting(:encode)
427
+ path.option_or_stream(:encode, encoding: encoding, cleaner: encode_cleaner, replace: encode_replace)
428
+ end
429
+ path
389
430
  end
390
431
  end
391
432
  end
@@ -20,7 +20,7 @@ module RocketJob
20
20
  # Parses each line from the file as an Array and uploads each array for processing by workers.
21
21
  # :record
22
22
  # Parses each line from the file into a Hash and uploads each hash for processing by workers.
23
- # See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
23
+ # See IOStreams#each_line, IOStreams#each_row, and IOStreams#each_record.
24
24
  field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
25
25
 
26
26
  validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats
@@ -1,5 +1,4 @@
1
1
  require 'concurrent'
2
- require 'pathname'
3
2
  require 'fileutils'
4
3
  module RocketJob
5
4
  class DirmonEntry
@@ -143,7 +142,7 @@ module RocketJob
143
142
  # Raises: Errno::ENOENT: No such file or directory
144
143
  def self.add_whitelist_path(path)
145
144
  # Confirms that path exists
146
- path = Pathname.new(path).realpath.to_s
145
+ path = IOStreams.path(path).realpath.to_s
147
146
  whitelist_paths << path
148
147
  whitelist_paths.uniq!
149
148
  path
@@ -153,7 +152,7 @@ module RocketJob
153
152
  # Raises: Errno::ENOENT: No such file or directory
154
153
  def self.delete_whitelist_path(path)
155
154
  # Confirms that path exists
156
- path = Pathname.new(path).realpath.to_s
155
+ path = IOStreams.path(path).realpath.to_s
157
156
  whitelist_paths.delete(path)
158
157
  whitelist_paths.uniq!
159
158
  path
@@ -186,32 +185,23 @@ module RocketJob
186
185
  def each
187
186
  SemanticLogger.named_tagged(dirmon_entry: id.to_s) do
188
187
  # Case insensitive filename matching
189
- Pathname.glob(pattern, File::FNM_CASEFOLD).each do |pathname|
190
- next if pathname.directory?
191
- pathname = begin
192
- pathname.realpath
193
- rescue Errno::ENOENT
194
- logger.warn("Unable to expand the realpath for #{pathname.inspect}. Skipping file.")
195
- next
196
- end
197
-
198
- file_name = pathname.to_s
199
-
188
+ IOStreams.each_child(pattern) do |path|
189
+ path = path.realpath
200
190
  # Skip archive directories
201
- next if file_name.include?(self.class.default_archive_directory)
191
+ next if path.to_s.include?(archive_directory || self.class.default_archive_directory)
202
192
 
203
193
  # Security check?
204
- if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| file_name.to_s.start_with?(whitepath) }
205
- logger.error "Skipping file: #{file_name} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
194
+ if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| path.to_s.start_with?(whitepath) }
195
+ logger.warn "Skipping file: #{path} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
206
196
  next
207
197
  end
208
198
 
209
199
  # File must be writable so it can be removed after processing
210
- unless pathname.writable?
211
- logger.error "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
200
+ if path.respond_to?(:writable?) && !path.writable?
201
+ logger.warn "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
212
202
  next
213
203
  end
214
- yield(pathname)
204
+ yield(path)
215
205
  end
216
206
  end
217
207
  end
@@ -239,17 +229,18 @@ module RocketJob
239
229
  nil
240
230
  end
241
231
 
242
- # Archives the file and kicks off a proxy job to upload the file.
243
- def later(pathname)
244
- job_id = BSON::ObjectId.new
245
- archived_file_name = archive_file(job_id, pathname)
232
+ # Archives the file, then kicks off a file upload job to upload the archived file.
233
+ def later(iopath)
234
+ job_id = BSON::ObjectId.new
235
+ archive_path = archive_iopath(iopath).join("#{job_id}_#{iopath.basename}")
236
+ iopath.move_to(archive_path)
246
237
 
247
238
  job = RocketJob::Jobs::UploadFileJob.create!(
248
239
  job_class_name: job_class_name,
249
240
  properties: properties,
250
- description: "#{name}: #{pathname.basename}",
251
- upload_file_name: archived_file_name.to_s,
252
- original_file_name: pathname.to_s,
241
+ description: "#{name}: #{iopath.basename}",
242
+ upload_file_name: archive_path.to_s,
243
+ original_file_name: iopath.to_s,
253
244
  job_id: job_id
254
245
  )
255
246
 
@@ -257,8 +248,8 @@ module RocketJob
257
248
  message: 'Created RocketJob::Jobs::UploadFileJob',
258
249
  payload: {
259
250
  dirmon_entry_name: name,
260
- upload_file_name: archived_file_name.to_s,
261
- original_file_name: pathname.to_s,
251
+ upload_file_name: archive_path.to_s,
252
+ original_file_name: iopath.to_s,
262
253
  job_class_name: job_class_name,
263
254
  job_id: job_id.to_s,
264
255
  upload_job_id: job.id.to_s
@@ -278,37 +269,13 @@ module RocketJob
278
269
  class_attribute :whitelist_paths
279
270
  self.whitelist_paths = Concurrent::Array.new
280
271
 
281
- # Move the file to the archive directory
282
- #
283
- # The archived file name is prefixed with the job id
284
- #
285
- # Returns [String] the fully qualified archived file name
286
- #
287
- # Note:
288
- # - Works across partitions when the file and the archive are on different partitions
289
- def archive_file(job_id, pathname)
290
- target_path = archive_pathname(pathname)
291
- target_path.mkpath
292
- target_file_name = target_path.join("#{job_id}_#{pathname.basename}")
293
- # In case the file is being moved across partitions
294
- FileUtils.move(pathname.to_s, target_file_name.to_s)
295
- target_file_name.to_s
296
- end
297
-
298
272
  # Returns [Pathname] to the archive directory, and creates it if it does not exist.
299
273
  #
300
274
  # If `archive_directory` is a relative path, it is appended to the `file_pathname`.
301
275
  # If `archive_directory` is an absolute path, it is returned as-is.
302
- def archive_pathname(file_pathname)
303
- path = Pathname.new(archive_directory)
304
- path = file_pathname.dirname.join(archive_directory) if path.relative?
305
-
306
- begin
307
- path.mkpath unless path.exist?
308
- rescue Errno::ENOENT => exc
309
- raise(Errno::ENOENT, "DirmonJob failed to create archive directory: #{path}, #{exc.message}")
310
- end
311
- path.realpath
276
+ def archive_iopath(iopath)
277
+ path = IOStreams.path(archive_directory)
278
+ path.relative? ? iopath.directory.join(archive_directory) : path
312
279
  end
313
280
 
314
281
  # Validates job_class is a Rocket Job
@@ -70,11 +70,18 @@ module RocketJob
70
70
  def check_directories
71
71
  new_file_names = {}
72
72
  DirmonEntry.enabled.each do |entry|
73
- entry.each do |pathname|
73
+ entry.each do |iopath|
74
+ # S3 files are only visible once completely uploaded.
75
+ if iopath.is_a?(IOStreams::Paths::S3)
76
+ logger.info("S3 File: #{iopath}. Starting: #{entry.job_class_name}")
77
+ entry.later(iopath)
78
+ next
79
+ end
80
+
74
81
  # BSON Keys cannot contain periods
75
- key = pathname.to_s.tr('.', '_')
82
+ key = iopath.to_s.tr('.', '_')
76
83
  previous_size = previous_file_names[key]
77
- size = check_file(entry, pathname, previous_size)
84
+ size = check_file(entry, iopath, previous_size)
78
85
  new_file_names[key] = size if size
79
86
  end
80
87
  end
@@ -83,14 +90,14 @@ module RocketJob
83
90
 
84
91
  # Checks if a file should result in starting a job
85
92
  # Returns [Integer] file size, or nil if the file started a job
86
- def check_file(entry, pathname, previous_size)
87
- size = pathname.size
93
+ def check_file(entry, iopath, previous_size)
94
+ size = iopath.size
88
95
  if previous_size && (previous_size == size)
89
- logger.info("File stabilized: #{pathname}. Starting: #{entry.job_class_name}")
90
- entry.later(pathname)
96
+ logger.info("File stabilized: #{iopath}. Starting: #{entry.job_class_name}")
97
+ entry.later(iopath)
91
98
  nil
92
99
  else
93
- logger.info("Found file: #{pathname}. File size: #{size}")
100
+ logger.info("Found file: #{iopath}. File size: #{size}")
94
101
  # Keep for the next run
95
102
  size
96
103
  end
@@ -0,0 +1,27 @@
1
+ # Job to dynamically perform ruby code on demand as a Batch,
2
+ # with input and/or output from CSV/JSON or other format supported by Tabular.
3
+ #
4
+ # Nodes:
5
+ # - Need to specify `destroy_on_complete: false` to collect output from this job.
6
+ # - `after_code` can be used to automatically download the output of this job to a file on completion.
7
+ #
8
+ # Example: Iterate over all rows in a table:
9
+ # code = <<-CODE
10
+ # if user = User.find(row)
11
+ # user.cleanse_attributes!
12
+ # user.save(validate: false)
13
+ # end
14
+ # CODE
15
+ # job = RocketJob::Jobs::OnDemandBatchTabularJob.new(code: code, description: 'cleanse users', destroy_on_complete: false)
16
+ # job.upload("users.csv")
17
+ # job.save!
18
+ #
19
+ # On completion export the output:
20
+ # job.download("output.csv")
21
+ module RocketJob
22
+ module Jobs
23
+ class OnDemandBatchTabularJob < OnDemandBatchJob
24
+ include RocketJob::Batch::Tabular
25
+ end
26
+ end
27
+ end
@@ -1,15 +1,13 @@
1
1
  module RocketJob
2
2
  module Sliced
3
3
  class Input < Slices
4
- def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
5
- raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
6
-
7
- block ||= -> (io) do
8
- iterator = "each_#{stream_mode}".to_sym
9
- IOStreams.public_send(iterator, file_name_or_io, encoding: encoding, **args) { |line| io << line }
10
- end
11
-
4
+ def upload(on_first: nil, &block)
5
+ # Create indexes before uploading
6
+ create_indexes
12
7
  Writer::Input.collect(self, on_first: on_first, &block)
8
+ rescue StandardError => exc
9
+ drop
10
+ raise(exc)
13
11
  end
14
12
 
15
13
  def upload_mongo_query(criteria, *column_names, &block)
@@ -36,7 +34,7 @@ module RocketJob
36
34
  end
37
35
  end
38
36
 
39
- Writer::Input.collect(self) do |records|
37
+ upload do |records|
40
38
  # Drop down to the mongo driver level to avoid constructing a Model for each document returned
41
39
  criteria.klass.collection.find(criteria.selector, options).each do |document|
42
40
  records << block.call(document)
@@ -46,8 +44,7 @@ module RocketJob
46
44
 
47
45
  def upload_arel(arel, *column_names, &block)
48
46
  unless block
49
- column_names = column_names.collect(&:to_sym)
50
- column_names << :id if column_names.size.zero?
47
+ column_names = column_names.empty? ? [:id] : column_names.collect(&:to_sym)
51
48
 
52
49
  block =
53
50
  if column_names.size == 1
@@ -61,12 +58,11 @@ module RocketJob
61
58
  arel = arel.select(selection)
62
59
  end
63
60
 
64
- Writer::Input.collect(self) do |records|
65
- arel.find_each { |model| records << block.call(model) }
66
- end
61
+ upload { |records| arel.find_each { |model| records << block.call(model) } }
67
62
  end
68
63
 
69
64
  def upload_integer_range(start_id, last_id)
65
+ # Create indexes before uploading
70
66
  create_indexes
71
67
  count = 0
72
68
  while start_id <= last_id
@@ -77,9 +73,13 @@ module RocketJob
77
73
  count += 1
78
74
  end
79
75
  count
76
+ rescue StandardError => exc
77
+ drop
78
+ raise(exc)
80
79
  end
81
80
 
82
81
  def upload_integer_range_in_reverse_order(start_id, last_id)
82
+ # Create indexes before uploading
83
83
  create_indexes
84
84
  end_id = last_id
85
85
  count = 0
@@ -91,6 +91,9 @@ module RocketJob
91
91
  count += 1
92
92
  end
93
93
  count
94
+ rescue StandardError => exc
95
+ drop
96
+ raise(exc)
94
97
  end
95
98
 
96
99
  # Iterate over each failed record, if any
@@ -3,93 +3,18 @@ require 'tempfile'
3
3
  module RocketJob
4
4
  module Sliced
5
5
  class Output < Slices
6
- # Write this output collection to the specified file/io stream
7
- #
8
- # Returns [Integer] the number of records returned from the collection
9
- #
10
- # Parameters
11
- # file_name_or_io [String|IO]
12
- # The file_name of the file to write to, or an IO Stream that implements
13
- # #write.
14
- #
15
- # options:
16
- # streams [Symbol|Array]
17
- # The formats/streams that be used to convert the data whilst it is
18
- # being written.
19
- # When nil, `file_name_or_io` will be inspected to try and determine what
20
- # streams should be applied.
21
- # Default: nil
22
- #
23
- # Any other option that can be supplied to IOStreams::Line::Writer
24
- #
25
- # Stream types / extensions supported:
26
- # .zip Zip File [ :zip ]
27
- # .gz, .gzip GZip File [ :gzip ]
28
- # .enc File Encrypted using symmetric encryption [ :enc ]
29
- #
30
- # When a file is encrypted, it may also be compressed:
31
- # .zip.enc [ :zip, :enc ]
32
- # .gz.enc [ :gz, :enc ]
33
- #
34
- # Example: Zip
35
- # # Since csv is not known to RocketJob it is ignored
36
- # job.output.download('myfile.csv.zip')
37
- #
38
- # Example: Encrypted Zip
39
- # job.output.download('myfile.csv.zip.enc')
40
- #
41
- # Example: Explicitly set the streams
42
- # job.output.download('myfile.ze', streams: [:zip, :enc])
43
- #
44
- # Example: Supply custom options
45
- # job.output.download('myfile.csv.enc', streams: [enc: { compress: true }])
46
- #
47
- # Example: Supply custom options
48
- # job.output.download('myfile.csv.zip', streams: [ zip: { zip_file_name: 'myfile.csv' } ])
49
- #
50
- # Example: Extract streams from filename but write to a temp file
51
- # t = Tempfile.new('my_project')
52
- # job.output.download(t.to_path, file_name: 'myfile.gz.enc')
53
- #
54
- # Example: Add a header and/or trailer record to the downloaded file:
55
- # IOStreams.writer('/tmp/file.txt.gz') do |writer|
56
- # writer << "Header\n"
57
- # job.download do |line|
58
- # writer << line
59
- # end
60
- # writer << "Trailer\n"
61
- # end
62
- #
63
- # Notes:
64
- # - The records are returned in '_id' order. Usually this is the order in
65
- # which the records were originally loaded.
66
- def download(file_name_or_io = nil, header_line: nil, **args)
67
- raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block_given?
6
+ def download(header_line: nil)
7
+ raise(ArgumentError, 'Block is mandatory') unless block_given?
68
8
 
69
- record_count = 0
70
-
71
- if block_given?
72
- # Write the header line
73
- yield(header_line) if header_line
9
+ # Write the header line
10
+ yield(header_line) if header_line
74
11
 
75
- # Call the supplied block for every record returned
76
- each do |slice|
77
- slice.each do |record|
78
- record_count += 1
79
- yield(record)
80
- end
81
- end
82
- else
83
- IOStreams.line_writer(file_name_or_io, **args) do |io|
84
- # Write the header line
85
- io << header_line if header_line
86
-
87
- each do |slice|
88
- slice.each do |record|
89
- record_count += 1
90
- io << record
91
- end
92
- end
12
+ # Call the supplied block for every record returned
13
+ record_count = 0
14
+ each do |slice|
15
+ slice.each do |record|
16
+ record_count += 1
17
+ yield(record)
93
18
  end
94
19
  end
95
20
  record_count
@@ -12,16 +12,10 @@ module RocketJob
12
12
  # Block to call on the first line only, instead of storing in the slice.
13
13
  # Useful for extracting the header row
14
14
  # Default: nil
15
- def self.collect(input, **args, &block)
15
+ def self.collect(input, **args)
16
16
  writer = new(input, **args)
17
- # Create indexes before uploading
18
- input.create_indexes if input.respond_to?(:create_indexes)
19
- block.call(writer)
17
+ yield(writer)
20
18
  writer.record_count
21
- rescue Exception => exc
22
- # Drop input collection when upload fails
23
- input.drop
24
- raise exc
25
19
  ensure
26
20
  writer&.close
27
21
  end
@@ -1,3 +1,3 @@
1
1
  module RocketJob
2
- VERSION = '4.2.0'.freeze
2
+ VERSION = '4.3.0.beta'.freeze
3
3
  end
@@ -119,6 +119,9 @@ module RocketJob
119
119
 
120
120
  SemanticLogger.named_tagged(job: job.id.to_s) do
121
121
  processed = true unless job.rocket_job_work(self, false, current_filter)
122
+
123
+ # Return the database connections for this thread back to the connection pool
124
+ ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord::Base)
122
125
  end
123
126
  end
124
127
  processed
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rocketjob
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 4.3.0.beta
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-19 00:00:00.000000000 Z
11
+ date: 2019-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aasm
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0.16'
47
+ version: 1.0.0.beta
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.16'
54
+ version: 1.0.0.beta
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: mongoid
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -131,6 +131,7 @@ files:
131
131
  - lib/rocket_job/jobs/dirmon_job.rb
132
132
  - lib/rocket_job/jobs/housekeeping_job.rb
133
133
  - lib/rocket_job/jobs/on_demand_batch_job.rb
134
+ - lib/rocket_job/jobs/on_demand_batch_tabular_job.rb
134
135
  - lib/rocket_job/jobs/on_demand_job.rb
135
136
  - lib/rocket_job/jobs/performance_job.rb
136
137
  - lib/rocket_job/jobs/simple_job.rb
@@ -189,11 +190,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
189
190
  version: '2.3'
190
191
  required_rubygems_version: !ruby/object:Gem::Requirement
191
192
  requirements:
192
- - - ">="
193
+ - - ">"
193
194
  - !ruby/object:Gem::Version
194
- version: '0'
195
+ version: 1.3.1
195
196
  requirements: []
196
- rubygems_version: 3.0.3
197
+ rubygems_version: 3.0.6
197
198
  signing_key:
198
199
  specification_version: 4
199
200
  summary: Ruby's missing batch system.