RubyGems - rocketjob - Versions diffs - 4.1.1 → 4.2.0 - Mend

rocketjob 4.1.1 → 4.2.0

Files changed (8) hide show

checksums.yaml +4 -4
data/lib/rocket_job/batch/io.rb +236 -10
data/lib/rocket_job/jobs/on_demand_batch_job.rb +3 -3
data/lib/rocket_job/sliced/input.rb +2 -188
data/lib/rocket_job/sliced/slice.rb +8 -0
data/lib/rocket_job/sliced/slices.rb +1 -0
data/lib/rocket_job/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a88179130b3f8b1570a98c02b22b1daa93d1cfff9e91231f12c08af3eb47c750
-  data.tar.gz: e6fc5a6cf000dc9faa61b61bb232aaef6bf77dec144959e6bfb2114e193845db
+  metadata.gz: f4a9d008dd87609ead82e1ddb964aa798fc412e40e0e9634bb0ac0ee1a136a6b
+  data.tar.gz: ea8f96c4791b84175488e7ab9cc0e31b05b62403e98c4853cafb339f85c118d9
 SHA512:
-  metadata.gz: cef575bafd6e780beb0c235bfae3efab3c79ae456cf907c1335372f664ccab1ea125cfd9cbcca17a7791b328b97f78750bc039adf80786bf48834d9d1e6079af
-  data.tar.gz: dd88230b68ecf82433c641f903fcec276b0e873c8849dd1873ed4ee14cd88d3e99fe93ca4ffaf3f973efca16d6837bb1daf3cb91202176ec909a8c19bf28fc08
+  metadata.gz: 1eb4a41765c4096fd6ac9c664da2bf27afebc37ce82cc4fc7545e22609443bd263e8a8bb04f22a986bc0bc4babf0797109fc958b3ca4122b3fc226ab9c9db8bc
+  data.tar.gz: 4507a2de381ddef1dee859cc906564d59167e7336002e568ff5cac06d4281cd1b214329a434375ba9c81bfc3ff69e03edf9a3edf4bab1703986b86feda95d907

data/lib/rocket_job/batch/io.rb CHANGED Viewed

@@ -39,21 +39,107 @@ module RocketJob
         (@outputs ||= {})[category] ||= RocketJob::Sliced::Output.new(slice_arguments(collection_name))
       end
-      # Upload the supplied file_name or stream
+      # Upload the supplied file_name or stream.
       #
-      # Updates the record_count after adding the records
+      # Returns [Integer] the number of records uploaded.
       #
-      # Options
-      #     :file_name [String]
-      #       When file_name_or_io is an IO, the original base file name if any.
-      #       Default: nil
+      # Parameters
+      #   file_name_or_io [String | IO]
+      #     Full path and file name to stream into the job,
+      #     Or, an IO Stream that responds to: :read
       #
-      # See RocketJob::Sliced::Input#upload for remaining options
+      #   streams [Symbol|Array]
+      #     Streams to convert the data whilst it is being read.
+      #     When nil, the file_name extensions will be inspected to determine what
+      #     streams should be applied.
+      #     Default: nil
       #
-      # Returns [Integer] the number of records uploaded
+      #   delimiter[String]
+      #     Line / Record delimiter to use to break the stream up into records
+      #       Any string to break the stream up by
+      #       The records when saved will not include this delimiter
+      #     Default: nil
+      #       Automatically detect line endings and break up by line
+      #       Searches for the first "\r\n" or "\n" and then uses that as the
+      #       delimiter for all subsequent records
       #
-      # Note:
-      # * Not thread-safe. Only call from one thread at a time
+      #   buffer_size [Integer]
+      #     Size of the blocks when reading from the input file / stream.
+      #     Default: 65536 ( 64K )
+      #
+      #   encoding: [String|Encoding]
+      #     Encode returned data with this encoding.
+      #     'US-ASCII':   Original 7 bit ASCII Format
+      #     'ASCII-8BIT': 8-bit ASCII Format
+      #     'UTF-8':      UTF-8 Format
+      #     Etc.
+      #     Default: 'UTF-8'
+      #
+      #   encode_replace: [String]
+      #     The character to replace with when a character cannot be converted to the target encoding.
+      #     nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
+      #     Default: nil
+      #
+      #   encode_cleaner: [nil|symbol|Proc]
+      #     Cleanse data read from the input stream.
+      #     nil:           No cleansing
+      #     :printable Cleanse all non-printable characters except \r and \n
+      #     Proc/lambda    Proc to call after every read to cleanse the data
+      #     Default: :printable
+      #
+      #   stream_mode: [:line | :row | :record]
+      #     :line
+      #       Uploads the file a line (String) at a time for processing by workers.
+      #     :row
+      #       Parses each line from the file as an Array and uploads each array for processing by workers.
+      #     :record
+      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
+      #     See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
+      #
+      # Example:
+      #   # Load plain text records from a file
+      #   job.input.upload('hello.csv')
+      #
+      # Example:
+      #   # Load plain text records from a file, stripping all non-printable characters,
+      #   # as well as any characters that cannot be converted to UTF-8
+      #   job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
+      #
+      # Example: Zip
+      #   # Since csv is not known to RocketJob it is ignored
+      #   job.input.upload('myfile.csv.zip')
+      #
+      # Example: Encrypted Zip
+      #   job.input.upload('myfile.csv.zip.enc')
+      #
+      # Example: Explicitly set the streams
+      #   job.input.upload('myfile.ze', streams: [:zip, :enc])
+      #
+      # Example: Supply custom options
+      #   job.input.upload('myfile.csv.enc', streams: :enc])
+      #
+      # Example: Extract streams from filename but write to a temp file
+      #   streams = IOStreams.streams_for_file_name('myfile.gz.enc')
+      #   t = Tempfile.new('my_project')
+      #   job.input.upload(t.to_path, streams: streams)
+      #
+      # Example: Upload by writing records one at a time to the upload stream
+      #   job.upload do |writer|
+      #     10.times { |i| writer << i }
+      #   end
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to the number of records returned by the arel.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
+      # * By default all data read from the file/stream is converted into UTF-8 before being persisted. This
+      #   is recommended since Mongo only supports UTF-8 strings.
+      # * When zip format, the Zip file/stream must contain only one file, the first file found will be
+      #   loaded into the job
+      # * If an io stream is supplied, it is read until it returns nil.
+      # * Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
+      # * CSV parsing is slow, so it is usually left for the workers to do.
       def upload(file_name_or_io = nil, file_name: nil, category: :main, **args, &block)
         if file_name
           self.upload_file_name = file_name
@@ -63,18 +149,158 @@ module RocketJob
         count             = input(category).upload(file_name_or_io, file_name: file_name, **args, &block)
         self.record_count = (record_count || 0) + count
         count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
       end
+      # Upload results from an Arel into RocketJob::SlicedJob.
+      #
+      # Params
+      #   column_names
+      #     When a block is not supplied, supply the names of the columns to be returned
+      #     and uploaded into the job
+      #     These columns are automatically added to the select list to reduce overhead
+      #
+      # If a Block is supplied it is passed the model returned from the database and should
+      # return the work item to be uploaded into the job.
+      #
+      # Returns [Integer] the number of records uploaded
+      #
+      # Example: Upload id's for all users
+      #   arel = User.all
+      #   job.upload_arel(arel)
+      #
+      # Example: Upload selected user id's
+      #   arel = User.where(country_code: 'US')
+      #   job.upload_arel(arel)
+      #
+      # Example: Upload user_name and zip_code
+      #   arel = User.where(country_code: 'US')
+      #   job.upload_arel(arel, :user_name, :zip_code)
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to the number of records returned by the arel.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
       def upload_arel(arel, *column_names, category: :main, &block)
         count             = input(category).upload_arel(arel, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
       end
+      # Upload the result of a MongoDB query to the input collection for processing
+      # Useful when an entire MongoDB collection, or part thereof needs to be
+      # processed by a job.
+      #
+      # Returns [Integer] the number of records uploaded
+      #
+      # If a Block is supplied it is passed the document returned from the
+      # database and should return a record for processing
+      #
+      # If no Block is supplied then the record will be the :fields returned
+      # from MongoDB
+      #
+      # Note:
+      #   This method uses the collection and not the MongoMapper document to
+      #   avoid the overhead of constructing a Model with every document returned
+      #   by the query
+      #
+      # Note:
+      #   The Block must return types that can be serialized to BSON.
+      #   Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
+      #   Invalid: Date, etc.
+      #
+      # Example: Upload document ids
+      #   criteria = User.where(state: 'FL')
+      #   job.record_count = job.upload_mongo_query(criteria)
+      #
+      # Example: Upload just the supplied column
+      #   criteria = User.where(state: 'FL')
+      #   job.record_count = job.upload_mongo_query(criteria, :zip_code)
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to the number of records returned by the monqo query.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
       def upload_mongo_query(criteria, *column_names, category: :main, &block)
         count             = input(category).upload_mongo_query(criteria, *column_names, &block)
         self.record_count = (record_count || 0) + count
         count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
+      end
+      # Upload sliced range of integer requests as arrays of start and end ids.
+      #
+      # Returns [Integer] last_id - start_id + 1.
+      #
+      # Uploads one range per slice so that the response can return multiple records
+      # for each slice processed
+      #
+      # Example
+      #   job.slice_size = 100
+      #   job.upload_integer_range(200, 421)
+      #
+      #   # Equivalent to calling:
+      #   job.input.insert([200,299])
+      #   job.input.insert([300,399])
+      #   job.input.insert([400,421])
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to: last_id - start_id + 1.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
+      def upload_integer_range(start_id, last_id, category: :main)
+        input(category).upload_integer_range(start_id, last_id)
+        count             = last_id - start_id + 1
+        self.record_count = (record_count || 0) + count
+        count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
+      end
+      # Upload sliced range of integer requests as an arrays of start and end ids
+      # starting with the last range first
+      #
+      # Returns [Integer] last_id - start_id + 1.
+      #
+      # Uploads one range per slice so that the response can return multiple records
+      # for each slice processed.
+      # Useful for when the highest order integer values should be processed before
+      # the lower integer value ranges. For example when processing every record
+      # in a database based on the id column
+      #
+      # Example
+      #   job.slice_size = 100
+      #   job.upload_integer_range_in_reverse_order(200, 421)
+      #
+      #   # Equivalent to calling:
+      #   job.input.insert([400,421])
+      #   job.input.insert([300,399])
+      #   job.input.insert([200,299])
+      #
+      # Notes:
+      # * Only call from one thread at a time against a single instance of this job.
+      # * The record_count for the job is set to: last_id - start_id + 1.
+      # * If an exception is raised while uploading data, the input collection is cleared out
+      #   so that if a job is retried during an upload failure, data is not duplicated.
+      def upload_integer_range_in_reverse_order(start_id, last_id, category: :main)
+        input(category).upload_integer_range_in_reverse_order(start_id, last_id)
+        count             = last_id - start_id + 1
+        self.record_count = (record_count || 0) + count
+        count
+      rescue StandardError => exc
+        input(category).delete_all
+        raise(exc)
       end
       # Upload the supplied slices for processing by workers

data/lib/rocket_job/jobs/on_demand_batch_job.rb CHANGED Viewed

@@ -11,7 +11,7 @@
 #   CODE
 #   job  = RocketJob::Jobs::OnDemandBatchJob.new(code: code, description: 'cleanse users')
 #   arel = User.unscoped.all.order('updated_at DESC')
-#   job.record_count = input.upload_arel(arel)
+#   job.upload_arel(arel)
 #   job.save!
 #
 # Console Testing:
@@ -25,7 +25,7 @@
 #
 #   # Run against a sub-set using a limit
 #   arel = User.unscoped.all.order('updated_at DESC').limit(100)
-#   job.record_count = job.input.upload_arel(arel)
+#   job.upload_arel(arel)
 #
 #   # Run the subset directly within the console
 #   job.perform_now
@@ -38,7 +38,7 @@
 # Example: Move the upload operation into a before_batch.
 #   upload_code = <<-CODE
 #     arel = User.unscoped.all.order('updated_at DESC')
-#     self.record_count = input.upload_arel(arel)
+#     upload_arel(arel)
 #   CODE
 #
 #   code = <<-CODE

data/lib/rocket_job/sliced/input.rb CHANGED Viewed

@@ -1,104 +1,6 @@
 module RocketJob
   module Sliced
     class Input < Slices
-      # Load lines for processing from the supplied filename or stream into this job.
-      #
-      # Returns [Integer] the number of lines loaded into this collection
-      #
-      # Parameters
-      #   file_name_or_io [String | IO]
-      #     Full path and file name to stream into the job,
-      #     Or, an IO Stream that responds to: :read
-      #
-      #   streams [Symbol|Array]
-      #     Streams to convert the data whilst it is being read.
-      #     When nil, the file_name extensions will be inspected to determine what
-      #     streams should be applied.
-      #     Default: nil
-      #
-      #   delimiter[String]
-      #     Line / Record delimiter to use to break the stream up into records
-      #       Any string to break the stream up by
-      #       The records when saved will not include this delimiter
-      #     Default: nil
-      #       Automatically detect line endings and break up by line
-      #       Searches for the first "\r\n" or "\n" and then uses that as the
-      #       delimiter for all subsequent records
-      #
-      #   buffer_size [Integer]
-      #     Size of the blocks when reading from the input file / stream.
-      #     Default: 65536 ( 64K )
-      #
-      #   encoding: [String|Encoding]
-      #     Encode returned data with this encoding.
-      #     'US-ASCII':   Original 7 bit ASCII Format
-      #     'ASCII-8BIT': 8-bit ASCII Format
-      #     'UTF-8':      UTF-8 Format
-      #     Etc.
-      #     Default: 'UTF-8'
-      #
-      #   encode_replace: [String]
-      #     The character to replace with when a character cannot be converted to the target encoding.
-      #     nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
-      #     Default: nil
-      #
-      #   encode_cleaner: [nil|symbol|Proc]
-      #     Cleanse data read from the input stream.
-      #     nil:           No cleansing
-      #     :printable Cleanse all non-printable characters except \r and \n
-      #     Proc/lambda    Proc to call after every read to cleanse the data
-      #     Default: :printable
-      #
-      #   stream_mode: [:line | :row | :record]
-      #     :line
-      #       Uploads the file a line (String) at a time for processing by workers.
-      #     :row
-      #       Parses each line from the file as an Array and uploads each array for processing by workers.
-      #     :record
-      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
-      #     See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
-      #
-      # Example:
-      #   # Load plain text records from a file
-      #   job.input.upload('hello.csv')
-      #
-      # Example:
-      #   # Load plain text records from a file, stripping all non-printable characters,
-      #   # as well as any characters that cannot be converted to UTF-8
-      #   job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
-      #
-      # Example: Zip
-      #   # Since csv is not known to RocketJob it is ignored
-      #   job.input.upload('myfile.csv.zip')
-      #
-      # Example: Encrypted Zip
-      #   job.input.upload('myfile.csv.zip.enc')
-      #
-      # Example: Explicitly set the streams
-      #   job.input.upload('myfile.ze', streams: [:zip, :enc])
-      #
-      # Example: Supply custom options
-      #   job.input.upload('myfile.csv.enc', streams: :enc])
-      #
-      # Example: Extract streams from filename but write to a temp file
-      #   streams = IOStreams.streams_for_file_name('myfile.gz.enc')
-      #   t = Tempfile.new('my_project')
-      #   job.input.upload(t.to_path, streams: streams)
-      #
-      # Example: Upload by writing records one at a time to the upload stream
-      #   job.upload do |writer|
-      #     10.times { |i| writer << i }
-      #   end
-      #
-      # Notes:
-      # - By default all data read from the file/stream is converted into UTF-8 before being persisted. This
-      #   is recommended since Mongo only supports UTF-8 strings.
-      # - When zip format, the Zip file/stream must contain only one file, the first file found will be
-      #   loaded into the job
-      # - If an io stream is supplied, it is read until it returns nil.
-      # - Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
-      # - Only call from one thread at a time per job instance.
-      # - CSV parsing is slow, so it is left for the workers to do.
       def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
         raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
@@ -110,35 +12,6 @@ module RocketJob
         Writer::Input.collect(self, on_first: on_first, &block)
       end
-      # Upload the result of a MongoDB query to the input collection for processing
-      # Useful when an entire MongoDB collection, or part thereof needs to be
-      # processed by a job.
-      #
-      # Returns [Integer] the number of records uploaded
-      #
-      # If a Block is supplied it is passed the document returned from the
-      # database and should return a record for processing
-      #
-      # If no Block is supplied then the record will be the :fields returned
-      # from MongoDB
-      #
-      # Note:
-      #   This method uses the collection and not the MongoMapper document to
-      #   avoid the overhead of constructing a Model with every document returned
-      #   by the query
-      #
-      # Note:
-      #   The Block must return types that can be serialized to BSON.
-      #   Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
-      #   Invalid: Date, etc.
-      #
-      # Example: Upload document ids
-      #   criteria = User.where(state: 'FL')
-      #   job.record_count = job.upload_mongo_query(criteria)
-      #
-      # Example: Upload just the supplied column
-      #   criteria = User.where(state: 'FL')
-      #   job.record_count = job.upload_mongo_query(criteria, :zip_code)
       def upload_mongo_query(criteria, *column_names, &block)
         options = criteria.options
@@ -171,30 +44,6 @@ module RocketJob
         end
       end
-      # Upload results from an Arel into RocketJob::SlicedJob.
-      #
-      # Params
-      #   column_names
-      #     When a block is not supplied, supply the names of the columns to be returned
-      #     and uploaded into the job
-      #     These columns are automatically added to the select list to reduce overhead
-      #
-      # If a Block is supplied it is passed the model returned from the database and should
-      # return the work item to be uploaded into the job.
-      #
-      # Returns [Integer] the number of records uploaded
-      #
-      # Example: Upload id's for all users
-      #   arel = User.all
-      #   job.record_count = job.upload_arel(arel)
-      #
-      # Example: Upload selected user id's
-      #   arel = User.where(country_code: 'US')
-      #   job.record_count = job.upload_arel(arel)
-      #
-      # Example: Upload user_name and zip_code
-      #   arel = User.where(country_code: 'US')
-      #   job.record_count = job.upload_arel(arel, :user_name, :zip_code)
       def upload_arel(arel, *column_names, &block)
         unless block
           column_names = column_names.collect(&:to_sym)
@@ -217,21 +66,6 @@ module RocketJob
         end
       end
-      # Upload sliced range of integer requests as a an arrays of start and end ids
-      #
-      # Returns [Integer] the number of slices uploaded
-      #
-      # Uploads one range per slice so that the response can return multiple records
-      # for each slice processed
-      #
-      # Example
-      #   job.slice_size = 100
-      #   job.record_count = job.upload_integer_range(200, 421)
-      #
-      #   # Equivalent to calling:
-      #   job.record_count = job.insert([200,299])
-      #   job.record_count += job.insert([300,399])
-      #   job.record_count += job.insert([400,421])
       def upload_integer_range(start_id, last_id)
         create_indexes
         count = 0
@@ -245,25 +79,6 @@ module RocketJob
         count
       end
-      # Upload sliced range of integer requests as an arrays of start and end ids
-      # starting with the last range first
-      #
-      # Returns [Integer] the number of slices uploaded
-      #
-      # Uploads one range per slice so that the response can return multiple records
-      # for each slice processed.
-      # Useful for when the highest order integer values should be processed before
-      # the lower integer value ranges. For example when processing every record
-      # in a database based on the id column
-      #
-      # Example
-      #   job.slice_size = 100
-      #   job.record_count = job.upload_integer_range_in_reverse_order(200, 421) * job.slice_size
-      #
-      #   # Equivalent to calling:
-      #   job.insert([400,421])
-      #   job.insert([300,399])
-      #   job.insert([200,299])
       def upload_integer_range_in_reverse_order(start_id, last_id)
         create_indexes
         end_id = last_id
@@ -290,9 +105,8 @@ module RocketJob
       #
       def each_failed_record
         failed.each do |slice|
-          if slice.exception && (record_number = slice.exception.record_number)
-            yield(slice.at(record_number - 1), slice)
-          end
+          record = slice.failed_record
+          yield(record, slice) unless record.nil?
         end
       end

data/lib/rocket_job/sliced/slice.rb CHANGED Viewed

@@ -119,6 +119,14 @@ module RocketJob
         self.worker_name   = nil
       end
+      # Returns the failed record.
+      # Returns [nil] if there is no failed record
+      def failed_record
+        if exception && (record_number = exception.record_number)
+          at(record_number - 1)
+        end
+      end
       # Returns [Hash] the slice as a Hash for storage purposes
       # Compresses / Encrypts the slice according to the job setting
       if ::Mongoid::VERSION.to_i >= 6

data/lib/rocket_job/sliced/slices.rb CHANGED Viewed

@@ -98,6 +98,7 @@ module RocketJob
         all.collection.indexes.create_one(state: 1, _id: 1)
       end
+      # Forward additional methods.
       def_instance_delegators :@all, :collection, :count, :delete_all, :first, :find, :last, :nor, :not, :or, :to_a, :where
       # Drop this collection when it is no longer needed

data/lib/rocket_job/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module RocketJob
-  VERSION = '4.1.1'.freeze
+  VERSION = '4.2.0'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rocketjob
 version: !ruby/object:Gem::Version
-  version: 4.1.1
+  version: 4.2.0
 platform: ruby
 authors:
 - Reid Morrison
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-07-01 00:00:00.000000000 Z
+date: 2019-08-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: aasm