RubyGems - rocketjob - Versions diffs - 4.1.1 → 4.2.0 - Mend

rocketjob 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/lib/rocket_job/batch/io.rb +236 -10
data/lib/rocket_job/jobs/on_demand_batch_job.rb +3 -3
data/lib/rocket_job/sliced/input.rb +2 -188
data/lib/rocket_job/sliced/slice.rb +8 -0
data/lib/rocket_job/sliced/slices.rb +1 -0
data/lib/rocket_job/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a88179130b3f8b1570a98c02b22b1daa93d1cfff9e91231f12c08af3eb47c750
-  data.tar.gz: e6fc5a6cf000dc9faa61b61bb232aaef6bf77dec144959e6bfb2114e193845db
+  metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
+  data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
 SHA512:
-  metadata.gz: cef575bafd6e780beb0c235bfae3efab3c79ae456cf907c1335372f664ccab1ea125cfd9cbcca17a7791b328b97f78750bc039adf80786bf48834d9d1e6079af
-  data.tar.gz: dd88230b68ecf82433c641f903fcec276b0e873c8849dd1873ed4ee14cd88d3e99fe93ca4ffaf3f973efca16d6837bb1daf3cb91202176ec909a8c19bf28fc08
+  metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
+  data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907

data/lib/rocket_job/batch/io.rb CHANGED Viewed

@@ -39,21 +39,107 @@ module RocketJob
         (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
       end
-      # Upload the supplied file_name or stream
+      # Upload the supplied file_name or stream.
       #
-      # Updates the record_count after adding the records
+      # Returns [Integer] the number of records uploaded.
       #
-      # Options
-      #     :file_name [String]
-      #       When file_name_or_io is an IO, the original base file name if any.
-      #       Default: nil
+      # Parameters
+      #   file_name_or_io [String | IO]
+      #     Full path and file name to stream into the job,
+      #     Or, an IO Stream that responds to: :read
       #
-      # See RocketJob::Sliced::Input#upload for remaining options
+      #   streams [Symbol|Array]
+      #     Streams to convert the data whilst it is being read.
+      #     When nil, the file_name extensions will be inspected to determine what
+      #     streams should be applied.
+      #     Default: nil
       #
-      # Returns [Integer] the number of records uploaded
+      #   delimiter[String]
+      #     Line / Record delimiter to use to break the stream up into records
+      #       Any string to break the stream up by
+      #       The records when saved will not include this delimiter
+      #     Default: nil
+      #       Automatically detect line endings and break up by line
+      #       Searches for the first "\r\n" or "\n" and then uses that as the
+      #       delimiter for all subsequent records
       #
-      # Note:
-      # * Not thread-safe. Only call from one thread at a time
+      #   buffer_size [Integer]
+      #     Size of the blocks when reading from the input file / stream.
+      #     Default: 65536 ( 64K )
+      #
+      #   encoding: [String|Encoding]
+      #     Encode returned data with this encoding.
+      #     'US-ASCII':   Original 7 bit ASCII Format
+      #     'ASCII-8BIT': 8-bit ASCII Format
+      #     'UTF-8':      UTF-8 Format
+      #     Etc.
+      #     Default: 'UTF-8'
+      #
+      #   encode_replace: [String]
+      #     The character to replace with when a character cannot be converted to the target encoding.
+      #     nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
+      #     Default: nil
+      #
+      #   encode_cleaner: [nil|symbol|Proc]
+      #     Cleanse data read from the input stream.
+      #     nil:           No cleansing
+      #     :printable Cleanse all non-printable characters except \r and \n
+      #     Proc/lambda    Proc to call after every read to cleanse the data
+      #     Default: :printable
+      #
+      #   stream_mode: [:line | :row | :record]
+      #     :line
+      #       Uploads the file a line (String) at a time for processing by workers.
+      #     :row
+      #       Parses each line from the file as an Array and uploads each array for processing by workers.
+      #     :record
+      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
+      #     See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
+      #
+      # Example:
+      #   # Load plain text records from a file
+      #   job.input.upload('hello.csv')
+      #
+      # Example:
+      #   # Load plain text records from a file, stripping all non-printable characters,
+      #   # as well as any characters that cannot be converted to UTF-8
+      #   job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
+      #
+      # Example: Zip
+      #   # Since csv is not known to RocketJob it is ignored
+      #   job.input.upload('myfile.csv.zip')
+      #
+      # Example: Encrypted Zip
+      #   job.input.upload('myfile.csv.zip.enc')
+      #
+      # Example: Explicitly set the streams
+      #   job.input.upload('myfile.ze', streams: [:zip, :enc])
+      #
+      # Example: Supply custom options
+      #   job.input.upload('myfile.csv.enc', streams: :enc])
+      #
+      # Example: Extract streams from filename but write to a temp file
+      #   streams = IOStreams.streams_for_file_name('myfile.gz.enc')
+      #   t = Tempfile.new('my_project')
+      #   job.input.upload(t.to_path, streams: streams)
+      #
+      # Example: Upload by writing records one at a time to the upload stream
+      #   job.upload do |writer|
+      #     10.times { |i| writer << i }
+      #   end
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to the number of records returned by the arel.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
+      # * By default all data read from the file/stream is converted into UTF-8 before being persisted. This
+      #   is recommended since Mongo only supports UTF-8 strings.
+      # * When zip format, the Zip file/stream must contain only one file, the first file found will be
+      #   loaded into the job
+      # * If an io stream is supplied, it is read until it returns nil.
+      # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
+      # * CSV parsing is slow, so it is usually left for the workers to do.
       def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
         if file_name
           self.upload_file_name = file_name
@@ -63,18 +149,158 @@ module RocketJob
         count             = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
         self.record_count = (record_count || 0) + count
         count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
       end
+      # Upload results from an Arel into RocketJob::SlicedJob.
+      #
+      # Params
+      #   column_names
+      #     When a block is not supplied, supply the names of the columns to be returned
+      #     and uploaded into the job
+      #     These columns are automatically added to the select list to reduce overhead
+      #
+      # If a Block is supplied it is passed the model returned from the database and should
+      # return the work item to be uploaded into the job.
+      #
+      # Returns [Integer] the number of records uploaded
+      #
+      # Example: Upload id's for all users
+      #   arel = User.all
+      #   job.upload_arel(arel)
+      #
+      # Example: Upload selected user id's
+      #   arel = User.where(country_code: 'US')
+      #   job.upload_arel(arel)
+      #
+      # Example: Upload user_name and zip_code
+      #   arel = User.where(country_code: 'US')
+      #   job.upload_arel(arel, :user_name, :zip_code)
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to the number of records returned by the arel.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
       def upload_arel(arel, *column_names, category: :main, &block)
         count             = input(category).upload_arel(arel, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
       end
+      # Upload the result of a MongoDB query to the input collection for processing
+      # Useful when an entire MongoDB collection, or part thereof needs to be
+      # processed by a job.
+      #
+      # Returns [Integer] the number of records uploaded
+      #
+      # If a Block is supplied it is passed the document returned from the
+      # database and should return a record for processing
+      #
+      # If no Block is supplied then the record will be the :fields returned
+      # from MongoDB
+      #
+      # Note:
+      #   This method uses the collection and not the MongoMapper document to
+      #   avoid the overhead of constructing a Model with every document returned
+      #   by the query
+      #
+      # Note:
+      #   The Block must return types that can be serialized to BSON.
+      #   Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
+      #   Invalid: Date, etc.
+      #
+      # Example: Upload document ids
+      #   criteria = User.where(state: 'FL')
+      #   job.record_count = job.upload_mongo_query(criteria)
+      #
+      # Example: Upload just the supplied column
+      #   criteria = User.where(state: 'FL')
+      #   job.record_count = job.upload_mongo_query(criteria, :zip_code)
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to the number of records returned by the monqo query.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
       def upload_mongo_query(criteria, *column_names, category: :main, &block)
         count             = input(category).upload_mongo_query(criteria, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
+      end
+      # Upload sliced range of integer requests as arrays of start and end ids.
+      #
+      # Returns [Integer] last_id - start_id + 1.
+      #
+      # Uploads one range per slice so that the response can return multiple records
+      # for each slice processed
+      #
+      # Example
+      #   job.slice_size = 100
+      #   job.upload_integer_range(200, 421)
+      #
+      #   # Equivalent to calling:
+      #   job.input.insert([200,299])
+      #   job.input.insert([300,399])
+      #   job.input.insert([400,421])
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to: last_id - start_id + 1.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
+      def upload_integer_range(start_id, last_id, category: :main)
+        input(category).upload_integer_range(start_id, last_id)
+        count             = last_id - start_id + 1
+        self.record_count = (record_count || 0) + count
+        count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
+      end
+      # Upload sliced range of integer requests as an arrays of start and end ids
+      # starting with the last range first
+      #
+      # Returns [Integer] last_id - start_id + 1.
+      #
+      # Uploads one range per slice so that the response can return multiple records
+      # for each slice processed.
+      # Useful for when the highest order integer values should be processed before
+      # the lower integer value ranges. For example when processing every record
+      # in a database based on the id column
+      #
+      # Example
+      #   job.slice_size = 100
+      #   job.upload_integer_range_in_reverse_order(200, 421)
+      #
+      #   # Equivalent to calling:
+      #   job.input.insert([400,421])
+      #   job.input.insert([300,399])
+      #   job.input.insert([200,299])
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to: last_id - start_id + 1.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
+      def upload_integer_range_in_reverse_order(start_id, last_id, category: :main)
+        input(category).upload_integer_range_in_reverse_order(start_id, last_id)
+        count             = last_id - start_id + 1
+        self.record_count = (record_count || 0) + count
+        count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
       end
       # Upload the supplied slices for processing by workers

data/lib/rocket_job/jobs/on_demand_batch_job.rb CHANGED Viewed

@@ -11,7 +11,7 @@
 #   CODE
 #   job  = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
 #   arel = User.unscoped.all.order('updated_at DESC')
-#   job.record_count = input.upload_arel(arel)
+#   job.upload_arel(arel)
 #   job.save!
 #
 # Console Testing:
@@ -25,7 +25,7 @@
 #
 #   # Run against a sub-set using a limit
 #   arel = User.unscoped.all.order('updated_at DESC').limit(100)
-#   job.record_count = job.input.upload_arel(arel)
+#   job.upload_arel(arel)
 #
 #   # Run the subset directly within the console
 #   job.perform_now
@@ -38,7 +38,7 @@
 # Example: Move the upload operation into a before_batch.
 #   upload_code = <<-CODE
 #     arel = User.unscoped.all.order('updated_at DESC')
-#     self.record_count = input.upload_arel(arel)
+#     upload_arel(arel)
 #   CODE
 #
 #   code = <<-CODE

data/lib/rocket_job/sliced/input.rb CHANGED Viewed

@@ -1,104 +1,6 @@
 module RocketJob
   module Sliced
     class Input < Slices
-      # Load lines for processing from the supplied filename or stream into this job.
-      #
-      # Returns [Integer] the number of lines loaded into this collection
-      #
-      # Parameters
-      #   file_name_or_io [String | IO]
-      #     Full path and file name to stream into the job,
-      #     Or, an IO Stream that responds to: :read
-      #
-      #   streams [Symbol|Array]
-      #     Streams to convert the data whilst it is being read.
-      #     When nil, the file_name extensions will be inspected to determine what
-      #     streams should be applied.
-      #     Default: nil
-      #
-      #   delimiter[String]
-      #     Line / Record delimiter to use to break the stream up into records
-      #       Any string to break the stream up by
-      #       The records when saved will not include this delimiter
-      #     Default: nil
-      #       Automatically detect line endings and break up by line
-      #       Searches for the first "\r\n" or "\n" and then uses that as the
-      #       delimiter for all subsequent records
-      #
-      #   buffer_size [Integer]
-      #     Size of the blocks when reading from the input file / stream.
-      #     Default: 65536 ( 64K )
-      #
-      #   encoding: [String|Encoding]
-      #     Encode returned data with this encoding.
-      #     'US-ASCII':   Original 7 bit ASCII Format
-      #     'ASCII-8BIT': 8-bit ASCII Format
-      #     'UTF-8':      UTF-8 Format
-      #     Etc.
-      #     Default: 'UTF-8'
-      #
-      #   encode_replace: [String]
-      #     The character to replace with when a character cannot be converted to the target encoding.
-      #     nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
-      #     Default: nil
-      #
-      #   encode_cleaner: [nil|symbol|Proc]
-      #     Cleanse data read from the input stream.
-      #     nil:           No cleansing
-      #     :printable Cleanse all non-printable characters except \r and \n
-      #     Proc/lambda    Proc to call after every read to cleanse the data
-      #     Default: :printable
-      #
-      #   stream_mode: [:line | :row | :record]
-      #     :line
-      #       Uploads the file a line (String) at a time for processing by workers.
-      #     :row
-      #       Parses each line from the file as an Array and uploads each array for processing by workers.
-      #     :record
-      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
-      #     See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
-      #
-      # Example:
-      #   # Load plain text records from a file
-      #   job.input.upload('hello.csv')
-      #
-      # Example:
-      #   # Load plain text records from a file, stripping all non-printable characters,
-      #   # as well as any characters that cannot be converted to UTF-8
-      #   job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
-      #
-      # Example: Zip
-      #   # Since csv is not known to RocketJob it is ignored
-      #   job.input.upload('myfile.csv.zip')
-      #
-      # Example: Encrypted Zip
-      #   job.input.upload('myfile.csv.zip.enc')
-      #
-      # Example: Explicitly set the streams
-      #   job.input.upload('myfile.ze', streams: [:zip, :enc])
-      #
-      # Example: Supply custom options
-      #   job.input.upload('myfile.csv.enc', streams: :enc])
-      #
-      # Example: Extract streams from filename but write to a temp file
-      #   streams = IOStreams.streams_for_file_name('myfile.gz.enc')
-      #   t = Tempfile.new('my_project')
-      #   job.input.upload(t.to_path, streams: streams)
-      #
-      # Example: Upload by writing records one at a time to the upload stream
-      #   job.upload do |writer|
-      #     10.times { |i| writer << i }
-      #   end
-      #
-      # Notes:
-      # - By default all data read from the file/stream is converted into UTF-8 before being persisted. This
-      #   is recommended since Mongo only supports UTF-8 strings.
-      # - When zip format, the Zip file/stream must contain only one file, the first file found will be
-      #   loaded into the job
-      # - If an io stream is supplied, it is read until it returns nil.
-      # - Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
-      # - Only call from one thread at a time per job instance.
-      # - CSV parsing is slow, so it is left for the workers to do.
       def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
         raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
@@ -110,35 +12,6 @@ module RocketJob
         Writer::Input.collect(self, on_first: on_first, &block)
       end
-      # Upload the result of a MongoDB query to the input collection for processing
-      # Useful when an entire MongoDB collection, or part thereof needs to be
-      # processed by a job.
-      #
-      # Returns [Integer] the number of records uploaded
-      #
-      # If a Block is supplied it is passed the document returned from the
-      # database and should return a record for processing
-      #
-      # If no Block is supplied then the record will be the :fields returned
-      # from MongoDB
-      #
-      # Note:
-      #   This method uses the collection and not the MongoMapper document to
-      #   avoid the overhead of constructing a Model with every document returned
-      #   by the query
-      #
-      # Note:
-      #   The Block must return types that can be serialized to BSON.
-      #   Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
-      #   Invalid: Date, etc.
-      #
-      # Example: Upload document ids
-      #   criteria = User.where(state: 'FL')
-      #   job.record_count = job.upload_mongo_query(criteria)
-      #
-      # Example: Upload just the supplied column
-      #   criteria = User.where(state: 'FL')
-      #   job.record_count = job.upload_mongo_query(criteria, :zip_code)
       def upload_mongo_query(criteria, *column_names, &block)
         options = criteria.options
@@ -171,30 +44,6 @@ module RocketJob
         end
       end
-      # Upload results from an Arel into RocketJob::SlicedJob.
-      #
-      # Params
-      #   column_names
-      #     When a block is not supplied, supply the names of the columns to be returned
-      #     and uploaded into the job
-      #     These columns are automatically added to the select list to reduce overhead
-      #
-      # If a Block is supplied it is passed the model returned from the database and should
-      # return the work item to be uploaded into the job.
-      #
-      # Returns [Integer] the number of records uploaded
-      #
-      # Example: Upload id's for all users
-      #   arel = User.all
-      #   job.record_count = job.upload_arel(arel)
-      #
-      # Example: Upload selected user id's
-      #   arel = User.where(country_code: 'US')
-      #   job.record_count = job.upload_arel(arel)
-      #
-      # Example: Upload user_name and zip_code
-      #   arel = User.where(country_code: 'US')
-      #   job.record_count = job.upload_arel(arel, :user_name, :zip_code)
       def upload_arel(arel, *column_names, &block)
         unless block
           column_names = column_names.collect(&:to_sym)
@@ -217,21 +66,6 @@ module RocketJob
         end
       end
-      # Upload sliced range of integer requests as a an arrays of start and end ids
-      #
-      # Returns [Integer] the number of slices uploaded
-      #
-      # Uploads one range per slice so that the response can return multiple records
-      # for each slice processed
-      #
-      # Example
-      #   job.slice_size = 100
-      #   job.record_count = job.upload_integer_range(200, 421)
-      #
-      #   # Equivalent to calling:
-      #   job.record_count = job.insert([200,299])
-      #   job.record_count += job.insert([300,399])
-      #   job.record_count += job.insert([400,421])
       def upload_integer_range(start_id, last_id)
         create_indexes
         count = 0
@@ -245,25 +79,6 @@ module RocketJob
         count
       end
-      # Upload sliced range of integer requests as an arrays of start and end ids
-      # starting with the last range first
-      #
-      # Returns [Integer] the number of slices uploaded
-      #
-      # Uploads one range per slice so that the response can return multiple records
-      # for each slice processed.
-      # Useful for when the highest order integer values should be processed before
-      # the lower integer value ranges. For example when processing every record
-      # in a database based on the id column
-      #
-      # Example
-      #   job.slice_size = 100
-      #   job.record_count = job.upload_integer_range_in_reverse_order(200, 421) * job.slice_size
-      #
-      #   # Equivalent to calling:
-      #   job.insert([400,421])
-      #   job.insert([300,399])
-      #   job.insert([200,299])
       def upload_integer_range_in_reverse_order(start_id, last_id)
         create_indexes
         end_id = last_id
@@ -290,9 +105,8 @@ module RocketJob
       #
       def each_failed_record
         failed.each do |slice|
-          if slice.exception && (record_number = slice.exception.record_number)
-            yield(slice.at(record_number - 1), slice)
-          end
+          record = slice.failed_record
+          yield(record, slice) unless record.nil?
         end
       end

data/lib/rocket_job/sliced/slice.rb CHANGED Viewed

@@ -119,6 +119,14 @@ module RocketJob
         self.worker_name   = nil
       end
+      # Returns the failed record.
+      # Returns [nil] if there is no failed record
+      def failed_record
+        if exception && (record_number = exception.record_number)
+          at(record_number - 1)
+        end
+      end
       # Returns [Hash] the slice as a Hash for storage purposes
       # Compresses / Encrypts the slice according to the job setting
       if ::Mongoid::VERSION.to_i >= 6

data/lib/rocket_job/sliced/slices.rb CHANGED Viewed

@@ -98,6 +98,7 @@ module RocketJob
         all.collection.indexes.create_one(state: 1, _id: 1)
       end
+      # Forward additional methods.
       def_instance_delegators :@all, :collection, :count, :delete_all, :first, :find, :last, :nor, :not, :or, :to_a, :where
       # Drop this collection when it is no longer needed

data/lib/rocket_job/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module RocketJob
-  VERSION = '4.1.1'.freeze
+  VERSION = '4.2.0'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rocketjob
 version: !ruby/object:Gem::Version
-  version: 4.1.1
+  version: 4.2.0
 platform: ruby
 authors:
 - Reid Morrison
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-07-01 00:00:00.000000000 Z
+date: 2019-08-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: aasm