RubyGems - rocketjob - Versions diffs - 4.2.0 → 4.3.0.beta - Mend

rocketjob 4.2.0 → 4.3.0.beta

Files changed (12) hide show

checksums.yaml +4 -4
data/lib/rocket_job/batch/io.rb +111 -70
data/lib/rocket_job/batch/tabular/input.rb +1 -1
data/lib/rocket_job/dirmon_entry.rb +23 -56
data/lib/rocket_job/jobs/dirmon_job.rb +15 -8
data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +27 -0
data/lib/rocket_job/sliced/input.rb +17 -14
data/lib/rocket_job/sliced/output.rb +10 -85
data/lib/rocket_job/sliced/writer/input.rb +2 -8
data/lib/rocket_job/version.rb +1 -1
data/lib/rocket_job/worker.rb +3 -0
metadata +8 -7

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
-  data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
+  metadata.gz: 8ae22e2ca14255089a3500e9294fc689847bb92525da45ba90426cae38a45378
+  data.tar.gz: fc0f6dd9c042020a01d47f4c2be35df3896db47e3cc07d875a27f057a8e64ab6
 SHA512:
-  metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
-  data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907
+  metadata.gz: cb612469360af546d76ea1d024e80cbdf50f40693533fd3e608927911d62c86da6ad4ba290da0186ce98b3be95b3cfad21ceed3bf22091d6e80cf2adc7b2387d
+  data.tar.gz: 4038eb8af3353d6358f3dc74c50410d1f16cf96fe716c19b29aa8843428e2a8938267b3d130c4023b7234ca45ad5a546d65836ea69775d354889e6cb50121fd6

data/lib/rocket_job/batch/io.rb CHANGED

@@ -19,7 +19,7 @@ module RocketJob
         collection_name = "rocket_job.inputs.#{id}"
         collection_name << ".#{category}" unless category == :main
-        (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(slice_arguments(collection_name))
+        (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(collection_name: collection_name, slice_size: slice_size)
       end
       # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -36,23 +36,18 @@ module RocketJob
         collection_name = "rocket_job.outputs.#{id}"
         collection_name << ".#{category}" unless category == :main
-        (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
+        (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(collection_name: collection_name, slice_size: slice_size)
       end
-      # Upload the supplied file_name or stream.
+      # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
       #
       # Returns [Integer] the number of records uploaded.
       #
       # Parameters
-      #   file_name_or_io [String | IO]
+      #   stream [String | IO | IOStreams::Path | IOStreams::Stream]
       #     Full path and file name to stream into the job,
       #     Or, an IO Stream that responds to: :read
-      #
-      #   streams [Symbol|Array]
-      #     Streams to convert the data whilst it is being read.
-      #     When nil, the file_name extensions will be inspected to determine what
-      #     streams should be applied.
-      #     Default: nil
+      #     Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
       #
       #   delimiter[String]
       #     Line / Record delimiter to use to break the stream up into records
@@ -63,9 +58,14 @@ module RocketJob
       #       Searches for the first "\r\n" or "\n" and then uses that as the
       #       delimiter for all subsequent records
       #
-      #   buffer_size [Integer]
-      #     Size of the blocks when reading from the input file / stream.
-      #     Default: 65536 ( 64K )
+      #   stream_mode: [:line | :row | :record]
+      #     :line
+      #       Uploads the file a line (String) at a time for processing by workers.
+      #     :row
+      #       Parses each line from the file as an Array and uploads each array for processing by workers.
+      #     :record
+      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
+      #     See IOStreams::Stream#each_line, IOStreams::Stream#each_row, and IOStreams::Stream#each_record.
       #
       #   encoding: [String|Encoding]
       #     Encode returned data with this encoding.
@@ -74,11 +74,15 @@ module RocketJob
       #     'UTF-8':      UTF-8 Format
       #     Etc.
       #     Default: 'UTF-8'
+      #     NOTE:    If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
+      #              if not already set in the supplied stream.
       #
       #   encode_replace: [String]
       #     The character to replace with when a character cannot be converted to the target encoding.
       #     nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
       #     Default: nil
+      #     NOTE:    If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
+      #              if not already set in the supplied stream.
       #
       #   encode_cleaner: [nil|symbol|Proc]
       #     Cleanse data read from the input stream.
@@ -86,42 +90,38 @@ module RocketJob
       #     :printable Cleanse all non-printable characters except \r and \n
       #     Proc/lambda    Proc to call after every read to cleanse the data
       #     Default: :printable
-      #
-      #   stream_mode: [:line | :row | :record]
-      #     :line
-      #       Uploads the file a line (String) at a time for processing by workers.
-      #     :row
-      #       Parses each line from the file as an Array and uploads each array for processing by workers.
-      #     :record
-      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
-      #     See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
+      #     NOTE:    If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
+      #              if not already set in the supplied stream.
       #
       # Example:
       #   # Load plain text records from a file
-      #   job.input.upload('hello.csv')
+      #   job.upload('hello.csv')
       #
       # Example:
       #   # Load plain text records from a file, stripping all non-printable characters,
       #   # as well as any characters that cannot be converted to UTF-8
-      #   job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
+      #   job.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
       #
       # Example: Zip
       #   # Since csv is not known to RocketJob it is ignored
-      #   job.input.upload('myfile.csv.zip')
+      #   job.upload('myfile.csv.zip')
       #
       # Example: Encrypted Zip
-      #   job.input.upload('myfile.csv.zip.enc')
+      #   job.upload('myfile.csv.zip.enc')
       #
       # Example: Explicitly set the streams
-      #   job.input.upload('myfile.ze', streams: [:zip, :enc])
+      #   path = IOStreams.path('myfile.ze').stream(:encode, encoding: 'UTF-8').stream(:zip).stream(:enc)
+      #   job.upload(path)
       #
       # Example: Supply custom options
-      #   job.input.upload('myfile.csv.enc', streams: :enc])
+      #   path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false).option(:encode, encoding: 'UTF-8')
+      #   job.upload(path)
       #
-      # Example: Extract streams from filename but write to a temp file
-      #   streams = IOStreams.streams_for_file_name('myfile.gz.enc')
-      #   t = Tempfile.new('my_project')
-      #   job.input.upload(t.to_path, streams: streams)
+      # Example: Read from a tempfile and use the original file name to determine which streams to apply
+      #   temp_file = Tempfile.new('my_project')
+      #   temp_file.write(gzip_and_encrypted_data)
+      #   stream = IOStreams.stream(temp_file).file_name('myfile.gz.enc')
+      #   job.upload(stream)
       #
       # Example: Upload by writing records one at a time to the upload stream
       #   job.upload do |writer|
@@ -140,18 +140,22 @@ module RocketJob
       # * If an io stream is supplied, it is read until it returns nil.
       # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
       # * CSV parsing is slow, so it is usually left for the workers to do.
-      def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
-        if file_name
-          self.upload_file_name = file_name
-        elsif file_name_or_io.is_a?(String)
-          self.upload_file_name = file_name_or_io
-        end
-        count             = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
+      def upload(stream = nil, file_name: nil, category: :main, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, stream_mode: :line, on_first: nil, **args, &block)
+        raise(ArgumentError, 'Either stream, or a block must be supplied') unless stream || block
+        count             =
+          if block
+            input(category).upload(on_first: on_first, &block)
+          else
+            path = build_path(stream, file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
+            self.upload_file_name = path.file_name
+            input(category).upload(on_first: on_first) do |io|
+              path.public_send("each_#{stream_mode}".to_sym, **args) { |line| io << line }
+            end
+          end
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload results from an Arel into RocketJob::SlicedJob.
@@ -188,9 +192,6 @@ module RocketJob
         count             = input(category).upload_arel(arel, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload the result of a MongoDB query to the input collection for processing
@@ -232,9 +233,6 @@ module RocketJob
         count             = input(category).upload_mongo_query(criteria, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload sliced range of integer requests as arrays of start and end ids.
@@ -263,9 +261,6 @@ module RocketJob
         count             = last_id - start_id + 1
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload sliced range of integer requests as an arrays of start and end ids
@@ -298,9 +293,6 @@ module RocketJob
         count             = last_id - start_id + 1
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload the supplied slices for processing by workers
@@ -326,24 +318,71 @@ module RocketJob
         count
       end
-      # Download the output data into the supplied file_name or stream
+      # Download the output data into the supplied file, io, IOStreams::Path, or IOStreams::Stream.
+      # Returns [Integer] the number of records / lines downloaded.
       #
       # Parameters
-      #   file_name_or_io [String|IO]
-      #     The file_name of the file to write to, or an IO Stream that implements #write.
+      #   stream [String | IO | IOStreams::Path | IOStreams::Stream]
+      #     Full path and file name to stream into the job,
+      #     Or, an IO stream that responds to: :write
+      #     Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
+      #
+      # Example: Zip
+      #   # Since csv is not known to RocketJob it is ignored
+      #   job.download('myfile.csv.zip')
+      #
+      # Example: Encrypted Zip
+      #   job.download('myfile.csv.zip.enc')
+      #
+      # Example: Explicitly set the streams
+      #   path = IOStreams.path('myfile.ze').stream(:zip).stream(:enc)
+      #   job.download(path)
       #
-      #   options:
-      #     category [Symbol]
-      #       The category of output to download
-      #       Default: :main
+      # Example: Supply custom options
+      #   path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false)
+      #   job.download(path)
+      #
+      # Example: Supply custom options. Set the file name within the zip file.
+      #   path = IOStreams.path('myfile.csv.zip').option(:zip, zip_file_name: 'myfile.csv')
+      #   job.download(path)
+      #
+      # Example: Download into a tempfile, or stream, using the original file name to determine the streams to apply:
+      #   tempfile = Tempfile.new('my_project')
+      #   stream = IOStreams.stream(tempfile).file_name('myfile.gz.enc')
+      #   job.download(stream)
+      #
+      # Example: Add a header and/or trailer record to the downloaded file:
+      #   IOStreams.path('/tmp/file.txt.gz').writer do |writer|
+      #     writer << "Header\n"
+      #     job.download do |line|
+      #       writer << line + "\n"
+      #     end
+      #     writer << "Trailer\n"
+      #   end
       #
-      # See RocketJob::Sliced::Output#download for remaining options
+      # Example: Add a header and/or trailer record to the downloaded file, letting the line writer add the line breaks:
+      #   IOStreams.path('/tmp/file.txt.gz').line_writer do |writer|
+      #     writer << "Header"
+      #     job.download do |line|
+      #       writer << line
+      #     end
+      #     writer << "Trailer"
+      #   end
       #
-      # Returns [Integer] the number of records downloaded
-      def download(file_name_or_io = nil, category: :main, **args, &block)
+      # Notes:
+      # - The records are returned in '_id' order. Usually this is the order in
+      #   which the records were originally loaded.
+      def download(stream = nil, category: :main, header_line: nil, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, **args, &block)
         raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
-        output(category).download(file_name_or_io, **args, &block)
+        if block
+          output(category).download(header_line: header_line, &block)
+        else
+          path = build_path(stream, nil, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
+          path.line_writer(**args) do |io|
+            output(category).download(header_line: header_line) { |record| io << record }
+          end
+        end
       end
       # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
@@ -381,11 +420,13 @@ module RocketJob
       private
-      def slice_arguments(collection_name)
-        {
-          collection_name: collection_name,
-          slice_size:      slice_size
-        }
+      def build_path(stream, file_name, encoding: nil, encode_cleaner: nil, encode_replace: nil)
+        path           = IOStreams.new(stream)
+        path.file_name = file_name if file_name
+        if (encoding || encode_cleaner || encode_replace) && !path.setting(:encode)
+          path.option_or_stream(:encode, encoding: encoding, cleaner: encode_cleaner, replace: encode_replace)
+        end
+        path
       end
     end
   end

data/lib/rocket_job/batch/tabular/input.rb CHANGED

@@ -20,7 +20,7 @@ module RocketJob
           #     Parses each line from the file as an Array and uploads each array for processing by workers.
           #   :record
           #     Parses each line from the file into a Hash and uploads each hash for processing by workers.
-          #   See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
+          #   See IOStreams#each_line, IOStreams#each_row, and IOStreams#each_record.
           field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
           validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats

data/lib/rocket_job/dirmon_entry.rb CHANGED

@@ -1,5 +1,4 @@
 require 'concurrent'
-require 'pathname'
 require 'fileutils'
 module RocketJob
   class DirmonEntry
@@ -143,7 +142,7 @@ module RocketJob
     # Raises: Errno::ENOENT: No such file or directory
     def self.add_whitelist_path(path)
       # Confirms that path exists
-      path = Pathname.new(path).realpath.to_s
+      path = IOStreams.path(path).realpath.to_s
       whitelist_paths << path
       whitelist_paths.uniq!
       path
@@ -153,7 +152,7 @@ module RocketJob
     # Raises: Errno::ENOENT: No such file or directory
     def self.delete_whitelist_path(path)
       # Confirms that path exists
-      path = Pathname.new(path).realpath.to_s
+      path = IOStreams.path(path).realpath.to_s
       whitelist_paths.delete(path)
       whitelist_paths.uniq!
       path
@@ -186,32 +185,23 @@ module RocketJob
     def each
       SemanticLogger.named_tagged(dirmon_entry: id.to_s) do
         # Case insensitive filename matching
-        Pathname.glob(pattern, File::FNM_CASEFOLD).each do |pathname|
-          next if pathname.directory?
-          pathname = begin
-            pathname.realpath
-          rescue Errno::ENOENT
-            logger.warn("Unable to expand the realpath for #{pathname.inspect}. Skipping file.")
-            next
-          end
-          file_name = pathname.to_s
+        IOStreams.each_child(pattern) do |path|
+          path = path.realpath
           # Skip archive directories
-          next if file_name.include?(self.class.default_archive_directory)
+          next if path.to_s.include?(archive_directory || self.class.default_archive_directory)
           # Security check?
-          if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| file_name.to_s.start_with?(whitepath) }
-            logger.error "Skipping file: #{file_name} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
+          if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| path.to_s.start_with?(whitepath) }
+            logger.warn "Skipping file: #{path} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
             next
           end
           # File must be writable so it can be removed after processing
-          unless pathname.writable?
-            logger.error "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
+          if path.respond_to?(:writable?) && !path.writable?
+            logger.warn "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
             next
           end
-          yield(pathname)
+          yield(path)
         end
       end
     end
@@ -239,17 +229,18 @@ module RocketJob
       nil
     end
-    # Archives the file and kicks off a proxy job to upload the file.
-    def later(pathname)
-      job_id             = BSON::ObjectId.new
-      archived_file_name = archive_file(job_id, pathname)
+    # Archives the file, then kicks off a file upload job to upload the archived file.
+    def later(iopath)
+      job_id       = BSON::ObjectId.new
+      archive_path = archive_iopath(iopath).join("#{job_id}_#{iopath.basename}")
+      iopath.move_to(archive_path)
       job = RocketJob::Jobs::UploadFileJob.create!(
         job_class_name:     job_class_name,
         properties:         properties,
-        description:        "#{name}: #{pathname.basename}",
-        upload_file_name:   archived_file_name.to_s,
-        original_file_name: pathname.to_s,
+        description:        "#{name}: #{iopath.basename}",
+        upload_file_name:   archive_path.to_s,
+        original_file_name: iopath.to_s,
         job_id:             job_id
       )
@@ -257,8 +248,8 @@ module RocketJob
         message: 'Created RocketJob::Jobs::UploadFileJob',
         payload: {
           dirmon_entry_name:  name,
-          upload_file_name:   archived_file_name.to_s,
-          original_file_name: pathname.to_s,
+          upload_file_name:   archive_path.to_s,
+          original_file_name: iopath.to_s,
           job_class_name:     job_class_name,
           job_id:             job_id.to_s,
           upload_job_id:      job.id.to_s
@@ -278,37 +269,13 @@ module RocketJob
     class_attribute :whitelist_paths
     self.whitelist_paths = Concurrent::Array.new
-    # Move the file to the archive directory
-    #
-    # The archived file name is prefixed with the job id
-    #
-    # Returns [String] the fully qualified archived file name
-    #
-    # Note:
-    # - Works across partitions when the file and the archive are on different partitions
-    def archive_file(job_id, pathname)
-      target_path = archive_pathname(pathname)
-      target_path.mkpath
-      target_file_name = target_path.join("#{job_id}_#{pathname.basename}")
-      # In case the file is being moved across partitions
-      FileUtils.move(pathname.to_s, target_file_name.to_s)
-      target_file_name.to_s
-    end
     # Returns [Pathname] to the archive directory, and creates it if it does not exist.
     #
     # If `archive_directory` is a relative path, it is appended to the `file_pathname`.
     # If `archive_directory` is an absolute path, it is returned as-is.
-    def archive_pathname(file_pathname)
-      path = Pathname.new(archive_directory)
-      path = file_pathname.dirname.join(archive_directory) if path.relative?
-      begin
-        path.mkpath unless path.exist?
-      rescue Errno::ENOENT => exc
-        raise(Errno::ENOENT, "DirmonJob failed to create archive directory: #{path}, #{exc.message}")
-      end
-      path.realpath
+    def archive_iopath(iopath)
+      path = IOStreams.path(archive_directory)
+      path.relative? ? iopath.directory.join(archive_directory) : path
     end
     # Validates job_class is a Rocket Job

data/lib/rocket_job/jobs/dirmon_job.rb CHANGED

@@ -70,11 +70,18 @@ module RocketJob
       def check_directories
         new_file_names = {}
         DirmonEntry.enabled.each do |entry|
-          entry.each do |pathname|
+          entry.each do |iopath|
+            # S3 files are only visible once completely uploaded.
+            if iopath.is_a?(IOStreams::Paths::S3)
+              logger.info("S3 File: #{iopath}. Starting: #{entry.job_class_name}")
+              entry.later(iopath)
+              next
+            end
             # BSON Keys cannot contain periods
-            key                 = pathname.to_s.tr('.', '_')
+            key                 = iopath.to_s.tr('.', '_')
             previous_size       = previous_file_names[key]
-            size                = check_file(entry, pathname, previous_size)
+            size                = check_file(entry, iopath, previous_size)
             new_file_names[key] = size if size
           end
         end
@@ -83,14 +90,14 @@ module RocketJob
       # Checks if a file should result in starting a job
       # Returns [Integer] file size, or nil if the file started a job
-      def check_file(entry, pathname, previous_size)
-        size = pathname.size
+      def check_file(entry, iopath, previous_size)
+        size = iopath.size
         if previous_size && (previous_size == size)
-          logger.info("File stabilized: #{pathname}. Starting: #{entry.job_class_name}")
-          entry.later(pathname)
+          logger.info("File stabilized: #{iopath}. Starting: #{entry.job_class_name}")
+          entry.later(iopath)
           nil
         else
-          logger.info("Found file: #{pathname}. File size: #{size}")
+          logger.info("Found file: #{iopath}. File size: #{size}")
           # Keep for the next run
           size
         end

data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb ADDED

@@ -0,0 +1,27 @@
+# Job to dynamically perform ruby code on demand as a Batch,
+# with input and/or output from CSV/JSON or other format supported by Tabular.
+#
+# Nodes:
+# - Need to specify `destroy_on_complete: false` to collect output from this job.
+# - `after_code` can be used to automatically download the output of this job to a file on completion.
+#
+# Example: Iterate over all rows in a table:
+#   code = <<-CODE
+#     if user = User.find(row)
+#       user.cleanse_attributes!
+#       user.save(validate: false)
+#     end
+#   CODE
+#   job  = RocketJob::Jobs::OnDemandBatchTabularJob.new(code: code, description: 'cleanse users', destroy_on_complete: false)
+#   job.upload("users.csv")
+#   job.save!
+#
+# On completion export the output:
+# job.download("output.csv")
+module RocketJob
+  module Jobs
+    class OnDemandBatchTabularJob < OnDemandBatchJob
+      include RocketJob::Batch::Tabular
+    end
+  end
+end

data/lib/rocket_job/sliced/input.rb CHANGED

@@ -1,15 +1,13 @@
 module RocketJob
   module Sliced
     class Input < Slices
-      def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
-        raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
-        block ||= -> (io) do
-          iterator = "each_#{stream_mode}".to_sym
-          IOStreams.public_send(iterator, file_name_or_io, encoding: encoding, **args) { |line| io << line }
-        end
+      def upload(on_first: nil, &block)
+        # Create indexes before uploading
+        create_indexes
         Writer::Input.collect(self, on_first: on_first, &block)
+      rescue StandardError => exc
+        drop
+        raise(exc)
       end
       def upload_mongo_query(criteria, *column_names, &block)
@@ -36,7 +34,7 @@ module RocketJob
             end
         end
-        Writer::Input.collect(self) do |records|
+        upload do |records|
           # Drop down to the mongo driver level to avoid constructing a Model for each document returned
           criteria.klass.collection.find(criteria.selector, options).each do |document|
             records << block.call(document)
@@ -46,8 +44,7 @@ module RocketJob
       def upload_arel(arel, *column_names, &block)
         unless block
-          column_names = column_names.collect(&:to_sym)
-          column_names << :id if column_names.size.zero?
+          column_names = column_names.empty? ? [:id] : column_names.collect(&:to_sym)
           block =
             if column_names.size == 1
@@ -61,12 +58,11 @@ module RocketJob
           arel      = arel.select(selection)
         end
-        Writer::Input.collect(self) do |records|
-          arel.find_each { |model| records << block.call(model) }
-        end
+        upload { |records| arel.find_each { |model| records << block.call(model) } }
       end
       def upload_integer_range(start_id, last_id)
+        # Create indexes before uploading
         create_indexes
         count = 0
         while start_id <= last_id
@@ -77,9 +73,13 @@ module RocketJob
           count    += 1
         end
         count
+      rescue StandardError => exc
+        drop
+        raise(exc)
       end
       def upload_integer_range_in_reverse_order(start_id, last_id)
+        # Create indexes before uploading
         create_indexes
         end_id = last_id
         count  = 0
@@ -91,6 +91,9 @@ module RocketJob
           count  += 1
         end
         count
+      rescue StandardError => exc
+        drop
+        raise(exc)
       end
       # Iterate over each failed record, if any

data/lib/rocket_job/sliced/output.rb CHANGED

@@ -3,93 +3,18 @@ require 'tempfile'
 module RocketJob
   module Sliced
     class Output < Slices
-      # Write this output collection to the specified file/io stream
-      #
-      # Returns [Integer] the number of records returned from the collection
-      #
-      # Parameters
-      #   file_name_or_io [String|IO]
-      #     The file_name of the file to write to, or an IO Stream that implements
-      #     #write.
-      #
-      #   options:
-      #     streams [Symbol|Array]
-      #       The formats/streams that be used to convert the data whilst it is
-      #       being written.
-      #       When nil, `file_name_or_io` will be inspected to try and determine what
-      #       streams should be applied.
-      #       Default: nil
-      #
-      #     Any other option that can be supplied to IOStreams::Line::Writer
-      #
-      # Stream types / extensions supported:
-      #   .zip       Zip File                                   [ :zip ]
-      #   .gz, .gzip GZip File                                  [ :gzip ]
-      #   .enc       File Encrypted using symmetric encryption  [ :enc ]
-      #
-      # When a file is encrypted, it may also be compressed:
-      #   .zip.enc  [ :zip, :enc ]
-      #   .gz.enc   [ :gz,  :enc ]
-      #
-      # Example: Zip
-      #   # Since csv is not known to RocketJob it is ignored
-      #   job.output.download('myfile.csv.zip')
-      #
-      # Example: Encrypted Zip
-      #   job.output.download('myfile.csv.zip.enc')
-      #
-      # Example: Explicitly set the streams
-      #   job.output.download('myfile.ze', streams: [:zip, :enc])
-      #
-      # Example: Supply custom options
-      #   job.output.download('myfile.csv.enc', streams: [enc: { compress: true }])
-      #
-      # Example: Supply custom options
-      #   job.output.download('myfile.csv.zip', streams: [ zip: { zip_file_name: 'myfile.csv' } ])
-      #
-      # Example: Extract streams from filename but write to a temp file
-      #   t = Tempfile.new('my_project')
-      #   job.output.download(t.to_path, file_name: 'myfile.gz.enc')
-      #
-      # Example: Add a header and/or trailer record to the downloaded file:
-      #   IOStreams.writer('/tmp/file.txt.gz') do |writer|
-      #     writer << "Header\n"
-      #     job.download do |line|
-      #       writer << line
-      #     end
-      #     writer << "Trailer\n"
-      #   end
-      #
-      # Notes:
-      # - The records are returned in '_id' order. Usually this is the order in
-      #   which the records were originally loaded.
-      def download(file_name_or_io = nil, header_line: nil, **args)
-        raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block_given?
+      def download(header_line: nil)
+        raise(ArgumentError, 'Block is mandatory') unless block_given?
-        record_count = 0
-        if block_given?
-          # Write the header line
-          yield(header_line) if header_line
+        # Write the header line
+        yield(header_line) if header_line
-          # Call the supplied block for every record returned
-          each do |slice|
-            slice.each do |record|
-              record_count += 1
-              yield(record)
-            end
-          end
-        else
-          IOStreams.line_writer(file_name_or_io, **args) do |io|
-            # Write the header line
-            io << header_line if header_line
-            each do |slice|
-              slice.each do |record|
-                record_count += 1
-                io << record
-              end
-            end
+        # Call the supplied block for every record returned
+        record_count = 0
+        each do |slice|
+          slice.each do |record|
+            record_count += 1
+            yield(record)
           end
         end
         record_count

data/lib/rocket_job/sliced/writer/input.rb CHANGED

@@ -12,16 +12,10 @@ module RocketJob
         #     Block to call on the first line only, instead of storing in the slice.
         #     Useful for extracting the header row
         #     Default: nil
-        def self.collect(input, **args, &block)
+        def self.collect(input, **args)
           writer = new(input, **args)
-          # Create indexes before uploading
-          input.create_indexes if input.respond_to?(:create_indexes)
-          block.call(writer)
+          yield(writer)
           writer.record_count
-        rescue Exception => exc
-          # Drop input collection when upload fails
-          input.drop
-          raise exc
         ensure
           writer&.close
         end

data/lib/rocket_job/version.rb CHANGED

@@ -1,3 +1,3 @@
 module RocketJob
-  VERSION = '4.2.0'.freeze
+  VERSION = '4.3.0.beta'.freeze
 end

data/lib/rocket_job/worker.rb CHANGED

@@ -119,6 +119,9 @@ module RocketJob
         SemanticLogger.named_tagged(job: job.id.to_s) do
           processed = true unless job.rocket_job_work(self, false, current_filter)
+          # Return the database connections for this thread back to the connection pool
+          ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord::Base)
         end
       end
       processed

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rocketjob
 version: !ruby/object:Gem::Version
-  version: 4.2.0
+  version: 4.3.0.beta
 platform: ruby
 authors:
 - Reid Morrison
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-08-19 00:00:00.000000000 Z
+date: 2019-10-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: aasm
@@ -44,14 +44,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.16'
+        version: 1.0.0.beta
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.16'
+        version: 1.0.0.beta
 - !ruby/object:Gem::Dependency
   name: mongoid
   requirement: !ruby/object:Gem::Requirement
@@ -131,6 +131,7 @@ files:
 - lib/rocket_job/jobs/dirmon_job.rb
 - lib/rocket_job/jobs/housekeeping_job.rb
 - lib/rocket_job/jobs/on_demand_batch_job.rb
+- lib/rocket_job/jobs/on_demand_batch_tabular_job.rb
 - lib/rocket_job/jobs/on_demand_job.rb
 - lib/rocket_job/jobs/performance_job.rb
 - lib/rocket_job/jobs/simple_job.rb
@@ -189,11 +190,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '2.3'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">="
+  - - ">"
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 1.3.1
 requirements: []
-rubygems_version: 3.0.3
+rubygems_version: 3.0.6
 signing_key:
 specification_version: 4
 summary: Ruby's missing batch system.