RubyGems - rocketjob - Versions diffs - 4.2.0 → 4.3.0.beta - Mend

rocketjob 4.2.0 → 4.3.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/lib/rocket_job/batch/io.rb +111 -70
data/lib/rocket_job/batch/tabular/input.rb +1 -1
data/lib/rocket_job/dirmon_entry.rb +23 -56
data/lib/rocket_job/jobs/dirmon_job.rb +15 -8
data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb +27 -0
data/lib/rocket_job/sliced/input.rb +17 -14
data/lib/rocket_job/sliced/output.rb +10 -85
data/lib/rocket_job/sliced/writer/input.rb +2 -8
data/lib/rocket_job/version.rb +1 -1
data/lib/rocket_job/worker.rb +3 -0
metadata +8 -7

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
-  data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
+  metadata.gz: 8ae22e2ca14255089a3500e9294fc689847bb92525da45ba90426cae38a45378
+  data.tar.gz: fc0f6dd9c042020a01d47f4c2be35df3896db47e3cc07d875a27f057a8e64ab6
 SHA512:
-  metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
-  data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907
+  metadata.gz: cb612469360af546d76ea1d024e80cbdf50f40693533fd3e608927911d62c86da6ad4ba290da0186ce98b3be95b3cfad21ceed3bf22091d6e80cf2adc7b2387d
+  data.tar.gz: 4038eb8af3353d6358f3dc74c50410d1f16cf96fe716c19b29aa8843428e2a8938267b3d130c4023b7234ca45ad5a546d65836ea69775d354889e6cb50121fd6

data/lib/rocket_job/batch/io.rb CHANGED

@@ -19,7 +19,7 @@ module RocketJob
         collection_name = "rocket_job.inputs.#{id}"
         collection_name << ".#{category}" unless category == :main
-        (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(slice_arguments(collection_name))
+        (@inputs ||= {})[category] ||= RocketJob::Sliced::Input.new(collection_name: collection_name, slice_size: slice_size)
       end
       # Returns [RocketJob::Sliced::Output] output collection for holding output slices
@@ -36,23 +36,18 @@ module RocketJob
         collection_name = "rocket_job.outputs.#{id}"
         collection_name << ".#{category}" unless category == :main
-        (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
+        (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(collection_name: collection_name, slice_size: slice_size)
       end
-      # Upload the supplied file_name or stream.
+      # Upload the supplied file, io, IOStreams::Path, or IOStreams::Stream.
       #
       # Returns [Integer] the number of records uploaded.
       #
       # Parameters
-      #   file_name_or_io [String | IO]
+      #   stream [String | IO | IOStreams::Path | IOStreams::Stream]
       #     Full path and file name to stream into the job,
       #     Or, an IO Stream that responds to: :read
-      #
-      #   streams [Symbol|Array]
-      #     Streams to convert the data whilst it is being read.
-      #     When nil, the file_name extensions will be inspected to determine what
-      #     streams should be applied.
-      #     Default: nil
+      #     Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
       #
       #   delimiter[String]
       #     Line / Record delimiter to use to break the stream up into records
@@ -63,9 +58,14 @@ module RocketJob
       #       Searches for the first "\r\n" or "\n" and then uses that as the
       #       delimiter for all subsequent records
       #
-      #   buffer_size [Integer]
-      #     Size of the blocks when reading from the input file / stream.
-      #     Default: 65536 ( 64K )
+      #   stream_mode: [:line | :row | :record]
+      #     :line
+      #       Uploads the file a line (String) at a time for processing by workers.
+      #     :row
+      #       Parses each line from the file as an Array and uploads each array for processing by workers.
+      #     :record
+      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
+      #     See IOStreams::Stream#each_line, IOStreams::Stream#each_row, and IOStreams::Stream#each_record.
       #
       #   encoding: [String|Encoding]
       #     Encode returned data with this encoding.
@@ -74,11 +74,15 @@ module RocketJob
       #     'UTF-8':      UTF-8 Format
       #     Etc.
       #     Default: 'UTF-8'
+      #     NOTE:    If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
+      #              if not already set in the supplied stream.
       #
       #   encode_replace: [String]
       #     The character to replace with when a character cannot be converted to the target encoding.
       #     nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
       #     Default: nil
+      #     NOTE:    If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
+      #              if not already set in the supplied stream.
       #
       #   encode_cleaner: [nil|symbol|Proc]
       #     Cleanse data read from the input stream.
@@ -86,42 +90,38 @@ module RocketJob
       #     :printable Cleanse all non-printable characters except \r and \n
       #     Proc/lambda    Proc to call after every read to cleanse the data
       #     Default: :printable
-      #
-      #   stream_mode: [:line | :row | :record]
-      #     :line
-      #       Uploads the file a line (String) at a time for processing by workers.
-      #     :row
-      #       Parses each line from the file as an Array and uploads each array for processing by workers.
-      #     :record
-      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
-      #     See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
+      #     NOTE:    If a IOStreams::Path, or IOStreams::Stream was supplied then the encoding will be set
+      #              if not already set in the supplied stream.
       #
       # Example:
       #   # Load plain text records from a file
-      #   job.input.upload('hello.csv')
+      #   job.upload('hello.csv')
       #
       # Example:
       #   # Load plain text records from a file, stripping all non-printable characters,
       #   # as well as any characters that cannot be converted to UTF-8
-      #   job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
+      #   job.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
       #
       # Example: Zip
       #   # Since csv is not known to RocketJob it is ignored
-      #   job.input.upload('myfile.csv.zip')
+      #   job.upload('myfile.csv.zip')
       #
       # Example: Encrypted Zip
-      #   job.input.upload('myfile.csv.zip.enc')
+      #   job.upload('myfile.csv.zip.enc')
       #
       # Example: Explicitly set the streams
-      #   job.input.upload('myfile.ze', streams: [:zip, :enc])
+      #   path = IOStreams.path('myfile.ze').stream(:encode, encoding: 'UTF-8').stream(:zip).stream(:enc)
+      #   job.upload(path)
       #
       # Example: Supply custom options
-      #   job.input.upload('myfile.csv.enc', streams: :enc])
+      #   path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false).option(:encode, encoding: 'UTF-8')
+      #   job.upload(path)
       #
-      # Example: Extract streams from filename but write to a temp file
-      #   streams = IOStreams.streams_for_file_name('myfile.gz.enc')
-      #   t = Tempfile.new('my_project')
-      #   job.input.upload(t.to_path, streams: streams)
+      # Example: Read from a tempfile and use the original file name to determine which streams to apply
+      #   temp_file = Tempfile.new('my_project')
+      #   temp_file.write(gzip_and_encrypted_data)
+      #   stream = IOStreams.stream(temp_file).file_name('myfile.gz.enc')
+      #   job.upload(stream)
       #
       # Example: Upload by writing records one at a time to the upload stream
       #   job.upload do |writer|
@@ -140,18 +140,22 @@ module RocketJob
       # * If an io stream is supplied, it is read until it returns nil.
       # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
       # * CSV parsing is slow, so it is usually left for the workers to do.
-      def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
-        if file_name
-          self.upload_file_name = file_name
-        elsif file_name_or_io.is_a?(String)
-          self.upload_file_name = file_name_or_io
-        end
-        count             = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
+      def upload(stream = nil, file_name: nil, category: :main, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, stream_mode: :line, on_first: nil, **args, &block)
+        raise(ArgumentError, 'Either stream, or a block must be supplied') unless stream || block
+        count             =
+          if block
+            input(category).upload(on_first: on_first, &block)
+          else
+            path = build_path(stream, file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
+            self.upload_file_name = path.file_name
+            input(category).upload(on_first: on_first) do |io|
+              path.public_send("each_#{stream_mode}".to_sym, **args) { |line| io << line }
+            end
+          end
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload results from an Arel into RocketJob::SlicedJob.
@@ -188,9 +192,6 @@ module RocketJob
         count             = input(category).upload_arel(arel, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload the result of a MongoDB query to the input collection for processing
@@ -232,9 +233,6 @@ module RocketJob
         count             = input(category).upload_mongo_query(criteria, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload sliced range of integer requests as arrays of start and end ids.
@@ -263,9 +261,6 @@ module RocketJob
         count             = last_id - start_id + 1
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload sliced range of integer requests as an arrays of start and end ids
@@ -298,9 +293,6 @@ module RocketJob
         count             = last_id - start_id + 1
         self.record_count = (record_count || 0) + count
         count
-      rescue StandardError => exc
-        input(category).delete_all
-        raise(exc)
       end
       # Upload the supplied slices for processing by workers
@@ -326,24 +318,71 @@ module RocketJob
         count
       end
-      # Download the output data into the supplied file_name or stream
+      # Download the output data into the supplied file, io, IOStreams::Path, or IOStreams::Stream.
+      # Returns [Integer] the number of records / lines downloaded.
       #
       # Parameters
-      #   file_name_or_io [String|IO]
-      #     The file_name of the file to write to, or an IO Stream that implements #write.
+      #   stream [String | IO | IOStreams::Path | IOStreams::Stream]
+      #     Full path and file name to stream into the job,
+      #     Or, an IO stream that responds to: :write
+      #     Or, an IOStreams path such as IOStreams::Paths::File, or IOStreams::Paths::S3
+      #
+      # Example: Zip
+      #   # Since csv is not known to RocketJob it is ignored
+      #   job.download('myfile.csv.zip')
+      #
+      # Example: Encrypted Zip
+      #   job.download('myfile.csv.zip.enc')
+      #
+      # Example: Explicitly set the streams
+      #   path = IOStreams.path('myfile.ze').stream(:zip).stream(:enc)
+      #   job.download(path)
       #
-      #   options:
-      #     category [Symbol]
-      #       The category of output to download
-      #       Default: :main
+      # Example: Supply custom options
+      #   path = IOStreams.path('myfile.csv.enc').option(:enc, compress: false)
+      #   job.download(path)
+      #
+      # Example: Supply custom options. Set the file name within the zip file.
+      #   path = IOStreams.path('myfile.csv.zip').option(:zip, zip_file_name: 'myfile.csv')
+      #   job.download(path)
+      #
+      # Example: Download into a tempfile, or stream, using the original file name to determine the streams to apply:
+      #   tempfile = Tempfile.new('my_project')
+      #   stream = IOStreams.stream(tempfile).file_name('myfile.gz.enc')
+      #   job.download(stream)
+      #
+      # Example: Add a header and/or trailer record to the downloaded file:
+      #   IOStreams.path('/tmp/file.txt.gz').writer do |writer|
+      #     writer << "Header\n"
+      #     job.download do |line|
+      #       writer << line + "\n"
+      #     end
+      #     writer << "Trailer\n"
+      #   end
       #
-      # See RocketJob::Sliced::Output#download for remaining options
+      # Example: Add a header and/or trailer record to the downloaded file, letting the line writer add the line breaks:
+      #   IOStreams.path('/tmp/file.txt.gz').line_writer do |writer|
+      #     writer << "Header"
+      #     job.download do |line|
+      #       writer << line
+      #     end
+      #     writer << "Trailer"
+      #   end
       #
-      # Returns [Integer] the number of records downloaded
-      def download(file_name_or_io = nil, category: :main, **args, &block)
+      # Notes:
+      # - The records are returned in '_id' order. Usually this is the order in
+      #   which the records were originally loaded.
+      def download(stream = nil, category: :main, header_line: nil, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil, **args, &block)
         raise "Cannot download incomplete job: #{id}. Currently in state: #{state}-#{sub_state}" if rocket_job_processing?
-        output(category).download(file_name_or_io, **args, &block)
+        if block
+          output(category).download(header_line: header_line, &block)
+        else
+          path = build_path(stream, nil, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace)
+          path.line_writer(**args) do |io|
+            output(category).download(header_line: header_line) { |record| io << record }
+          end
+        end
       end
       # Writes the supplied result, Batch::Result or Batch::Results to the relevant collections.
@@ -381,11 +420,13 @@ module RocketJob
       private
-      def slice_arguments(collection_name)
-        {
-          collection_name: collection_name,
-          slice_size:      slice_size
-        }
+      def build_path(stream, file_name, encoding: nil, encode_cleaner: nil, encode_replace: nil)
+        path           = IOStreams.new(stream)
+        path.file_name = file_name if file_name
+        if (encoding || encode_cleaner || encode_replace) && !path.setting(:encode)
+          path.option_or_stream(:encode, encoding: encoding, cleaner: encode_cleaner, replace: encode_replace)
+        end
+        path
       end
     end
   end

data/lib/rocket_job/batch/tabular/input.rb CHANGED

@@ -20,7 +20,7 @@ module RocketJob
           #     Parses each line from the file as an Array and uploads each array for processing by workers.
           #   :record
           #     Parses each line from the file into a Hash and uploads each hash for processing by workers.
-          #   See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
+          #   See IOStreams#each_line, IOStreams#each_row, and IOStreams#each_record.
           field :tabular_input_mode, type: Symbol, default: :line, class_attribute: true, user_editable: true, copy_on_restart: true
           validates_inclusion_of :tabular_input_format, in: IOStreams::Tabular.registered_formats

data/lib/rocket_job/dirmon_entry.rb CHANGED

@@ -1,5 +1,4 @@
 require 'concurrent'
-require 'pathname'
 require 'fileutils'
 module RocketJob
   class DirmonEntry
@@ -143,7 +142,7 @@ module RocketJob
     # Raises: Errno::ENOENT: No such file or directory
     def self.add_whitelist_path(path)
       # Confirms that path exists
-      path = Pathname.new(path).realpath.to_s
+      path = IOStreams.path(path).realpath.to_s
       whitelist_paths << path
       whitelist_paths.uniq!
       path
@@ -153,7 +152,7 @@ module RocketJob
     # Raises: Errno::ENOENT: No such file or directory
     def self.delete_whitelist_path(path)
       # Confirms that path exists
-      path = Pathname.new(path).realpath.to_s
+      path = IOStreams.path(path).realpath.to_s
       whitelist_paths.delete(path)
       whitelist_paths.uniq!
       path
@@ -186,32 +185,23 @@ module RocketJob
     def each
       SemanticLogger.named_tagged(dirmon_entry: id.to_s) do
         # Case insensitive filename matching
-        Pathname.glob(pattern, File::FNM_CASEFOLD).each do |pathname|
-          next if pathname.directory?
-          pathname = begin
-            pathname.realpath
-          rescue Errno::ENOENT
-            logger.warn("Unable to expand the realpath for #{pathname.inspect}. Skipping file.")
-            next
-          end
-          file_name = pathname.to_s
+        IOStreams.each_child(pattern) do |path|
+          path = path.realpath
           # Skip archive directories
-          next if file_name.include?(self.class.default_archive_directory)
+          next if path.to_s.include?(archive_directory || self.class.default_archive_directory)
           # Security check?
-          if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| file_name.to_s.start_with?(whitepath) }
-            logger.error "Skipping file: #{file_name} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
+          if whitelist_paths.size.positive? && whitelist_paths.none? { |whitepath| path.to_s.start_with?(whitepath) }
+            logger.warn "Skipping file: #{path} since it is not in any of the whitelisted paths: #{whitelist_paths.join(', ')}"
             next
           end
           # File must be writable so it can be removed after processing
-          unless pathname.writable?
-            logger.error "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
+          if path.respond_to?(:writable?) && !path.writable?
+            logger.warn "Skipping file: #{file_name} since it is not writable by the current user. Must be able to delete/move the file after queueing the job"
             next
           end
-          yield(pathname)
+          yield(path)
         end
       end
     end
@@ -239,17 +229,18 @@ module RocketJob
       nil
     end
-    # Archives the file and kicks off a proxy job to upload the file.
-    def later(pathname)
-      job_id             = BSON::ObjectId.new
-      archived_file_name = archive_file(job_id, pathname)
+    # Archives the file, then kicks off a file upload job to upload the archived file.
+    def later(iopath)
+      job_id       = BSON::ObjectId.new
+      archive_path = archive_iopath(iopath).join("#{job_id}_#{iopath.basename}")
+      iopath.move_to(archive_path)
       job = RocketJob::Jobs::UploadFileJob.create!(
         job_class_name:     job_class_name,
         properties:         properties,
-        description:        "#{name}: #{pathname.basename}",
-        upload_file_name:   archived_file_name.to_s,
-        original_file_name: pathname.to_s,
+        description:        "#{name}: #{iopath.basename}",
+        upload_file_name:   archive_path.to_s,
+        original_file_name: iopath.to_s,
         job_id:             job_id
       )
@@ -257,8 +248,8 @@ module RocketJob
         message: 'Created RocketJob::Jobs::UploadFileJob',
         payload: {
           dirmon_entry_name:  name,
-          upload_file_name:   archived_file_name.to_s,
-          original_file_name: pathname.to_s,
+          upload_file_name:   archive_path.to_s,
+          original_file_name: iopath.to_s,
           job_class_name:     job_class_name,
           job_id:             job_id.to_s,
           upload_job_id:      job.id.to_s
@@ -278,37 +269,13 @@ module RocketJob
     class_attribute :whitelist_paths
     self.whitelist_paths = Concurrent::Array.new
-    # Move the file to the archive directory
-    #
-    # The archived file name is prefixed with the job id
-    #
-    # Returns [String] the fully qualified archived file name
-    #
-    # Note:
-    # - Works across partitions when the file and the archive are on different partitions
-    def archive_file(job_id, pathname)
-      target_path = archive_pathname(pathname)
-      target_path.mkpath
-      target_file_name = target_path.join("#{job_id}_#{pathname.basename}")
-      # In case the file is being moved across partitions
-      FileUtils.move(pathname.to_s, target_file_name.to_s)
-      target_file_name.to_s
-    end
     # Returns [Pathname] to the archive directory, and creates it if it does not exist.
     #
     # If `archive_directory` is a relative path, it is appended to the `file_pathname`.
     # If `archive_directory` is an absolute path, it is returned as-is.
-    def archive_pathname(file_pathname)
-      path = Pathname.new(archive_directory)
-      path = file_pathname.dirname.join(archive_directory) if path.relative?
-      begin
-        path.mkpath unless path.exist?
-      rescue Errno::ENOENT => exc
-        raise(Errno::ENOENT, "DirmonJob failed to create archive directory: #{path}, #{exc.message}")
-      end
-      path.realpath
+    def archive_iopath(iopath)
+      path = IOStreams.path(archive_directory)
+      path.relative? ? iopath.directory.join(archive_directory) : path
     end
     # Validates job_class is a Rocket Job

data/lib/rocket_job/jobs/dirmon_job.rb CHANGED

@@ -70,11 +70,18 @@ module RocketJob
       def check_directories
         new_file_names = {}
         DirmonEntry.enabled.each do |entry|
-          entry.each do |pathname|
+          entry.each do |iopath|
+            # S3 files are only visible once completely uploaded.
+            if iopath.is_a?(IOStreams::Paths::S3)
+              logger.info("S3 File: #{iopath}. Starting: #{entry.job_class_name}")
+              entry.later(iopath)
+              next
+            end
             # BSON Keys cannot contain periods
-            key                 = pathname.to_s.tr('.', '_')
+            key                 = iopath.to_s.tr('.', '_')
             previous_size       = previous_file_names[key]
-            size                = check_file(entry, pathname, previous_size)
+            size                = check_file(entry, iopath, previous_size)
             new_file_names[key] = size if size
           end
         end
@@ -83,14 +90,14 @@ module RocketJob
       # Checks if a file should result in starting a job
       # Returns [Integer] file size, or nil if the file started a job
-      def check_file(entry, pathname, previous_size)
-        size = pathname.size
+      def check_file(entry, iopath, previous_size)
+        size = iopath.size
         if previous_size && (previous_size == size)
-          logger.info("File stabilized: #{pathname}. Starting: #{entry.job_class_name}")
-          entry.later(pathname)
+          logger.info("File stabilized: #{iopath}. Starting: #{entry.job_class_name}")
+          entry.later(iopath)
           nil
         else
-          logger.info("Found file: #{pathname}. File size: #{size}")
+          logger.info("Found file: #{iopath}. File size: #{size}")
           # Keep for the next run
           size
         end

data/lib/rocket_job/jobs/on_demand_batch_tabular_job.rb ADDED

@@ -0,0 +1,27 @@
+# Job to dynamically perform ruby code on demand as a Batch,
+# with input and/or output from CSV/JSON or other format supported by Tabular.
+#
+# Nodes:
+# - Need to specify `destroy_on_complete: false` to collect output from this job.
+# - `after_code` can be used to automatically download the output of this job to a file on completion.
+#
+# Example: Iterate over all rows in a table:
+#   code = <<-CODE
+#     if user = User.find(row)
+#       user.cleanse_attributes!
+#       user.save(validate: false)
+#     end
+#   CODE
+#   job  = RocketJob::Jobs::OnDemandBatchTabularJob.new(code: code, description: 'cleanse users', destroy_on_complete: false)
+#   job.upload("users.csv")
+#   job.save!
+#
+# On completion export the output:
+# job.download("output.csv")
+module RocketJob
+  module Jobs
+    class OnDemandBatchTabularJob < OnDemandBatchJob
+      include RocketJob::Batch::Tabular
+    end
+  end
+end

data/lib/rocket_job/sliced/input.rb CHANGED

@@ -1,15 +1,13 @@
 module RocketJob
   module Sliced
     class Input < Slices
-      def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
-        raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
-        block ||= -> (io) do
-          iterator = "each_#{stream_mode}".to_sym
-          IOStreams.public_send(iterator, file_name_or_io, encoding: encoding, **args) { |line| io << line }
-        end
+      def upload(on_first: nil, &block)
+        # Create indexes before uploading
+        create_indexes
         Writer::Input.collect(self, on_first: on_first, &block)
+      rescue StandardError => exc
+        drop
+        raise(exc)
       end
       def upload_mongo_query(criteria, *column_names, &block)
@@ -36,7 +34,7 @@ module RocketJob
             end
         end
-        Writer::Input.collect(self) do |records|
+        upload do |records|
           # Drop down to the mongo driver level to avoid constructing a Model for each document returned
           criteria.klass.collection.find(criteria.selector, options).each do |document|
             records << block.call(document)
@@ -46,8 +44,7 @@ module RocketJob
       def upload_arel(arel, *column_names, &block)
         unless block
-          column_names = column_names.collect(&:to_sym)
-          column_names << :id if column_names.size.zero?
+          column_names = column_names.empty? ? [:id] : column_names.collect(&:to_sym)
           block =
             if column_names.size == 1
@@ -61,12 +58,11 @@ module RocketJob
           arel      = arel.select(selection)
         end
-        Writer::Input.collect(self) do |records|
-          arel.find_each { |model| records << block.call(model) }
-        end
+        upload { |records| arel.find_each { |model| records << block.call(model) } }
       end
       def upload_integer_range(start_id, last_id)
+        # Create indexes before uploading
         create_indexes
         count = 0
         while start_id <= last_id
@@ -77,9 +73,13 @@ module RocketJob
           count    += 1
         end
         count
+      rescue StandardError => exc
+        drop
+        raise(exc)
       end
       def upload_integer_range_in_reverse_order(start_id, last_id)
+        # Create indexes before uploading
         create_indexes
         end_id = last_id
         count  = 0
@@ -91,6 +91,9 @@ module RocketJob
           count  += 1
         end
         count
+      rescue StandardError => exc
+        drop
+        raise(exc)
       end
       # Iterate over each failed record, if any

data/lib/rocket_job/sliced/output.rb CHANGED

@@ -3,93 +3,18 @@ require 'tempfile'
 module RocketJob
   module Sliced
     class Output < Slices
-      # Write this output collection to the specified file/io stream
-      #
-      # Returns [Integer] the number of records returned from the collection
-      #
-      # Parameters
-      #   file_name_or_io [String|IO]
-      #     The file_name of the file to write to, or an IO Stream that implements
-      #     #write.
-      #
-      #   options:
-      #     streams [Symbol|Array]
-      #       The formats/streams that be used to convert the data whilst it is
-      #       being written.
-      #       When nil, `file_name_or_io` will be inspected to try and determine what
-      #       streams should be applied.
-      #       Default: nil
-      #
-      #     Any other option that can be supplied to IOStreams::Line::Writer
-      #
-      # Stream types / extensions supported:
-      #   .zip       Zip File                                   [ :zip ]
-      #   .gz, .gzip GZip File                                  [ :gzip ]
-      #   .enc       File Encrypted using symmetric encryption  [ :enc ]
-      #
-      # When a file is encrypted, it may also be compressed:
-      #   .zip.enc  [ :zip, :enc ]
-      #   .gz.enc   [ :gz,  :enc ]
-      #
-      # Example: Zip
-      #   # Since csv is not known to RocketJob it is ignored
-      #   job.output.download('myfile.csv.zip')
-      #
-      # Example: Encrypted Zip
-      #   job.output.download('myfile.csv.zip.enc')
-      #
-      # Example: Explicitly set the streams
-      #   job.output.download('myfile.ze', streams: [:zip, :enc])
-      #
-      # Example: Supply custom options
-      #   job.output.download('myfile.csv.enc', streams: [enc: { compress: true }])
-      #
-      # Example: Supply custom options
-      #   job.output.download('myfile.csv.zip', streams: [ zip: { zip_file_name: 'myfile.csv' } ])
-      #
-      # Example: Extract streams from filename but write to a temp file
-      #   t = Tempfile.new('my_project')
-      #   job.output.download(t.to_path, file_name: 'myfile.gz.enc')
-      #
-      # Example: Add a header and/or trailer record to the downloaded file:
-      #   IOStreams.writer('/tmp/file.txt.gz') do |writer|
-      #     writer << "Header\n"
-      #     job.download do |line|
-      #       writer << line
-      #     end
-      #     writer << "Trailer\n"
-      #   end
-      #
-      # Notes:
-      # - The records are returned in '_id' order. Usually this is the order in
-      #   which the records were originally loaded.
-      def download(file_name_or_io = nil, header_line: nil, **args)
-        raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block_given?
+      def download(header_line: nil)
+        raise(ArgumentError, 'Block is mandatory') unless block_given?
-        record_count = 0
-        if block_given?
-          # Write the header line
-          yield(header_line) if header_line
+        # Write the header line
+        yield(header_line) if header_line
-          # Call the supplied block for every record returned
-          each do |slice|
-            slice.each do |record|
-              record_count += 1
-              yield(record)
-            end
-          end
-        else
-          IOStreams.line_writer(file_name_or_io, **args) do |io|
-            # Write the header line
-            io << header_line if header_line
-            each do |slice|
-              slice.each do |record|
-                record_count += 1
-                io << record
-              end
-            end
+        # Call the supplied block for every record returned
+        record_count = 0
+        each do |slice|
+          slice.each do |record|
+            record_count += 1
+            yield(record)
           end
         end
         record_count

data/lib/rocket_job/sliced/writer/input.rb CHANGED

@@ -12,16 +12,10 @@ module RocketJob
         #     Block to call on the first line only, instead of storing in the slice.
         #     Useful for extracting the header row
         #     Default: nil
-        def self.collect(input, **args, &block)
+        def self.collect(input, **args)
           writer = new(input, **args)
-          # Create indexes before uploading
-          input.create_indexes if input.respond_to?(:create_indexes)
-          block.call(writer)
+          yield(writer)
           writer.record_count
-        rescue Exception => exc
-          # Drop input collection when upload fails
-          input.drop
-          raise exc
         ensure
           writer&.close
         end

data/lib/rocket_job/version.rb CHANGED

@@ -1,3 +1,3 @@
 module RocketJob
-  VERSION = '4.2.0'.freeze
+  VERSION = '4.3.0.beta'.freeze
 end

data/lib/rocket_job/worker.rb CHANGED

@@ -119,6 +119,9 @@ module RocketJob
         SemanticLogger.named_tagged(job: job.id.to_s) do
           processed = true unless job.rocket_job_work(self, false, current_filter)
+          # Return the database connections for this thread back to the connection pool
+          ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord::Base)
         end
       end
       processed

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rocketjob
 version: !ruby/object:Gem::Version
-  version: 4.2.0
+  version: 4.3.0.beta
 platform: ruby
 authors:
 - Reid Morrison
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-08-19 00:00:00.000000000 Z
+date: 2019-10-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: aasm
@@ -44,14 +44,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.16'
+        version: 1.0.0.beta
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.16'
+        version: 1.0.0.beta
 - !ruby/object:Gem::Dependency
   name: mongoid
   requirement: !ruby/object:Gem::Requirement
@@ -131,6 +131,7 @@ files:
 - lib/rocket_job/jobs/dirmon_job.rb
 - lib/rocket_job/jobs/housekeeping_job.rb
 - lib/rocket_job/jobs/on_demand_batch_job.rb
+- lib/rocket_job/jobs/on_demand_batch_tabular_job.rb
 - lib/rocket_job/jobs/on_demand_job.rb
 - lib/rocket_job/jobs/performance_job.rb
 - lib/rocket_job/jobs/simple_job.rb
@@ -189,11 +190,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '2.3'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">="
+  - - ">"
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 1.3.1
 requirements: []
-rubygems_version: 3.0.3
+rubygems_version: 3.0.6
 signing_key:
 specification_version: 4
 summary: Ruby's missing batch system.