RubyGems - rocketjob - Versions diffs - 3.5.2 → 4.0.0 - Mend

rocketjob 3.5.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

checksums.yaml +4 -4
data/README.md +63 -1
data/bin/rocketjob +1 -0
data/bin/rocketjob_batch_perf +11 -0
data/lib/rocket_job/batch.rb +32 -0
data/lib/rocket_job/batch/callbacks.rb +40 -0
data/lib/rocket_job/batch/io.rb +154 -0
data/lib/rocket_job/batch/logger.rb +57 -0
data/lib/rocket_job/batch/lower_priority.rb +54 -0
data/lib/rocket_job/batch/model.rb +157 -0
data/lib/rocket_job/batch/performance.rb +99 -0
data/lib/rocket_job/batch/result.rb +8 -0
data/lib/rocket_job/batch/results.rb +9 -0
data/lib/rocket_job/batch/state_machine.rb +102 -0
data/lib/rocket_job/batch/statistics.rb +88 -0
data/lib/rocket_job/batch/tabular.rb +56 -0
data/lib/rocket_job/batch/tabular/input.rb +123 -0
data/lib/rocket_job/batch/tabular/output.rb +59 -0
data/lib/rocket_job/batch/throttle.rb +91 -0
data/lib/rocket_job/batch/throttle_running_slices.rb +53 -0
data/lib/rocket_job/batch/worker.rb +288 -0
data/lib/rocket_job/cli.rb +29 -7
data/lib/rocket_job/config.rb +1 -1
data/lib/rocket_job/extensions/mongoid/clients/options.rb +37 -0
data/lib/rocket_job/extensions/mongoid/contextual/mongo.rb +17 -0
data/lib/rocket_job/extensions/mongoid/factory.rb +4 -4
data/lib/rocket_job/extensions/mongoid_5/clients/options.rb +38 -0
data/lib/rocket_job/extensions/mongoid_5/contextual/mongo.rb +64 -0
data/lib/rocket_job/extensions/mongoid_5/factory.rb +13 -0
data/lib/rocket_job/jobs/on_demand_batch_job.rb +127 -0
data/lib/rocket_job/jobs/performance_job.rb +18 -0
data/lib/rocket_job/jobs/upload_file_job.rb +2 -5
data/lib/rocket_job/plugins/document.rb +2 -8
data/lib/rocket_job/plugins/job/persistence.rb +6 -4
data/lib/rocket_job/plugins/job/throttle.rb +3 -6
data/lib/rocket_job/plugins/job/worker.rb +2 -2
data/lib/rocket_job/server.rb +14 -3
data/lib/rocket_job/sliced/input.rb +336 -0
data/lib/rocket_job/sliced/output.rb +99 -0
data/lib/rocket_job/sliced/slice.rb +166 -0
data/lib/rocket_job/sliced/slices.rb +166 -0
data/lib/rocket_job/sliced/writer/input.rb +60 -0
data/lib/rocket_job/sliced/writer/output.rb +82 -0
data/lib/rocket_job/version.rb +1 -1
data/lib/rocket_job/worker.rb +2 -2
data/lib/rocketjob.rb +28 -0
metadata +51 -62
data/test/config/database.yml +0 -5
data/test/config/mongoid.yml +0 -88
data/test/config_test.rb +0 -10
data/test/dirmon_entry_test.rb +0 -313
data/test/dirmon_job_test.rb +0 -216
data/test/files/text.txt +0 -3
data/test/job_test.rb +0 -71
data/test/jobs/housekeeping_job_test.rb +0 -102
data/test/jobs/on_demand_job_test.rb +0 -59
data/test/jobs/upload_file_job_test.rb +0 -107
data/test/plugins/cron_test.rb +0 -166
data/test/plugins/job/callbacks_test.rb +0 -166
data/test/plugins/job/defaults_test.rb +0 -53
data/test/plugins/job/logger_test.rb +0 -56
data/test/plugins/job/model_test.rb +0 -94
data/test/plugins/job/persistence_test.rb +0 -94
data/test/plugins/job/state_machine_test.rb +0 -116
data/test/plugins/job/throttle_test.rb +0 -111
data/test/plugins/job/worker_test.rb +0 -199
data/test/plugins/processing_window_test.rb +0 -109
data/test/plugins/restart_test.rb +0 -193
data/test/plugins/retry_test.rb +0 -88
data/test/plugins/singleton_test.rb +0 -92
data/test/plugins/state_machine_event_callbacks_test.rb +0 -102
data/test/plugins/state_machine_test.rb +0 -67
data/test/plugins/transaction_test.rb +0 -84
data/test/test_db.sqlite3 +0 -0
data/test/test_helper.rb +0 -17

data/lib/rocket_job/plugins/document.rb CHANGED

@@ -5,19 +5,13 @@ module RocketJob
     # Base class for storing models in MongoDB
     module Document
       extend ActiveSupport::Concern
-      include Mongoid::Document
+      include ::Mongoid::Document
       included do
         store_in client: 'rocketjob'
       end
       module ClassMethods
-        # V2 Backward compatibility
-        # DEPRECATED
-        def key(name, type, options = {})
-          field(name, options.merge(type: type))
-        end
         # Mongoid does not apply ordering, add sort
         def first
           all.sort('_id' => 1).first
@@ -35,7 +29,7 @@ module RocketJob
       # Allows other changes to be made on the server that will be loaded.
       def find_and_update(attrs)
         doc = collection.find(_id: id).find_one_and_update({'$set' => attrs}, return_document: :after)
-        raise(Mongoid::Errors::DocumentNotFound.new(self.class, id)) unless doc
+        raise(::Mongoid::Errors::DocumentNotFound.new(self.class, id)) unless doc
         # Clear out keys that are not returned during the reload from MongoDB
         (fields.keys + embedded_relations.keys - doc.keys).each { |key| send("#{key}=", nil) }

data/lib/rocket_job/plugins/job/persistence.rb CHANGED

@@ -32,9 +32,11 @@ module RocketJob
           #   job    = RocketJob::Job.rocket_job_retrieve('host:pid:worker', filter)
           def rocket_job_retrieve(worker_name, filter)
             SemanticLogger.silence(:info) do
-              query  = queued_now
-              query  = query.where(filter) unless filter.blank?
-              update = {'$set' => {'worker_name' => worker_name, 'state' => 'running', 'started_at' => Time.now}}
+              scheduled = {'$or' => [{run_at: nil}, {:run_at.lte => Time.now}]}
+              working   = {'$or' => [{state: :queued}, {state: :running, sub_state: :processing}]}
+              query     = self.and(working, scheduled)
+              query     = query.where(filter) unless filter.blank?
+              update    = {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
               query.sort(priority: 1, _id: 1).find_one_and_update(update, bypass_document_validation: true)
             end
           end
@@ -101,7 +103,7 @@ module RocketJob
           return super unless destroy_on_complete
           begin
             super
-          rescue Mongoid::Errors::DocumentNotFound
+          rescue ::Mongoid::Errors::DocumentNotFound
             unless completed?
               self.state = :completed
               rocket_job_set_completed_at

data/lib/rocket_job/plugins/job/throttle.rb CHANGED

@@ -58,17 +58,14 @@ module RocketJob
           end
           # Undefine a previously defined throttle
-          def undefine_throttle(_method_name)
-            rocket_job_throttles.delete_if(&:method_name)
+          def undefine_throttle(method_name)
+            rocket_job_throttles.delete_if { |throttle| throttle.method_name == method_name }
           end
           # Has a throttle been defined?
           def throttle?(method_name)
-            rocket_job_throttles.find { |throttle| throttle.method_name == method_name }
+            rocket_job_throttles.any? { |throttle| throttle.method_name == method_name }
           end
-          # DEPRECATED
-          alias has_throttle? throttle?
         end
         # Default throttle to use when the throttle is exceeded.

data/lib/rocket_job/plugins/job/worker.rb CHANGED

@@ -51,7 +51,7 @@ module RocketJob
               else
                 job.worker_name = worker_name
                 job.rocket_job_fail_on_exception!(worker_name) do
-                  defined?(RocketJobPro) ? job.start! : job.start
+                  job.start!
                 end
                 return job if job.running?
               end
@@ -105,7 +105,7 @@ module RocketJob
         #
         # Exceptions are _not_ suppressed and should be handled by the caller.
         def perform_now
-          raise(Mongoid::Errors::Validations, self) unless valid?
+          raise(::Mongoid::Errors::Validations, self) unless valid?
           worker = RocketJob::Worker.new(inline: true)
           start if may_start?

data/lib/rocket_job/server.rb CHANGED

@@ -1,3 +1,4 @@
+require 'yaml'
 require 'concurrent'
 module RocketJob
   # Server
@@ -45,7 +46,7 @@ module RocketJob
     field :started_at, type: Time
     # Filter to apply to control which job classes this server can process
-    field :filter, type: Hash
+    field :yaml_filter, type: String
     # The heartbeat information for this server
     embeds_one :heartbeat, class_name: 'RocketJob::Heartbeat'
@@ -188,7 +189,7 @@ module RocketJob
     def self.run(attrs = {})
       Thread.current.name = 'rocketjob main'
       # Create Indexes on server startup
-      Mongoid::Tasks::Database.create_indexes
+      ::Mongoid::Tasks::Database.create_indexes
       register_signal_handlers
       server = create!(attrs)
@@ -228,6 +229,15 @@ module RocketJob
       (Time.now - heartbeat.updated_at) >= dead_seconds
     end
+    # Where clause filter to apply to workers looking for jobs
+    def filter
+      YAML.load(yaml_filter) if yaml_filter
+    end
+    def filter=(hash)
+      self.yaml_filter = hash.nil? ? nil : hash.to_yaml
+    end
     private
     # Returns [Array<Worker>] collection of workers
@@ -238,6 +248,7 @@ module RocketJob
     # Management Thread
     def run
       logger.info "Using MongoDB Database: #{RocketJob::Job.collection.database.name}"
+      logger.info('Running with filter', filter) if filter
       build_heartbeat(updated_at: Time.now, workers: 0)
       started!
       logger.info 'Rocket Job Server started'
@@ -262,7 +273,7 @@ module RocketJob
       end
       logger.info 'Shutdown'
-    rescue Mongoid::Errors::DocumentNotFound
+    rescue ::Mongoid::Errors::DocumentNotFound
       logger.warn('Server has been destroyed. Going down hard!')
     rescue Exception => exc
       logger.error('RocketJob::Server is stopping due to an exception', exc)

data/lib/rocket_job/sliced/input.rb ADDED

@@ -0,0 +1,336 @@
+module RocketJob
+  module Sliced
+    class Input < Slices
+      # Load lines for processing from the supplied filename or stream into this job.
+      #
+      # Returns [Integer] the number of lines loaded into this collection
+      #
+      # Parameters
+      #   file_name_or_io [String | IO]
+      #     Full path and file name to stream into the job,
+      #     Or, an IO Stream that responds to: :read
+      #
+      #   streams [Symbol|Array]
+      #     Streams to convert the data whilst it is being read.
+      #     When nil, the file_name extensions will be inspected to determine what
+      #     streams should be applied.
+      #     Default: nil
+      #
+      #   delimiter[String]
+      #     Line / Record delimiter to use to break the stream up into records
+      #       Any string to break the stream up by
+      #       The records when saved will not include this delimiter
+      #     Default: nil
+      #       Automatically detect line endings and break up by line
+      #       Searches for the first "\r\n" or "\n" and then uses that as the
+      #       delimiter for all subsequent records
+      #
+      #   buffer_size [Integer]
+      #     Size of the blocks when reading from the input file / stream.
+      #     Default: 65536 ( 64K )
+      #
+      #   encoding: [String|Encoding]
+      #     Encode returned data with this encoding.
+      #     'US-ASCII':   Original 7 bit ASCII Format
+      #     'ASCII-8BIT': 8-bit ASCII Format
+      #     'UTF-8':      UTF-8 Format
+      #     Etc.
+      #     Default: 'UTF-8'
+      #
+      #   encode_replace: [String]
+      #     The character to replace with when a character cannot be converted to the target encoding.
+      #     nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
+      #     Default: nil
+      #
+      #   encode_cleaner: [nil|symbol|Proc]
+      #     Cleanse data read from the input stream.
+      #     nil:           No cleansing
+      #     :printable Cleanse all non-printable characters except \r and \n
+      #     Proc/lambda    Proc to call after every read to cleanse the data
+      #     Default: :printable
+      #
+      #   stream_mode: [:line | :row | :record]
+      #     :line
+      #       Uploads the file a line (String) at a time for processing by workers.
+      #     :row
+      #       Parses each line from the file as an Array and uploads each array for processing by workers.
+      #     :record
+      #       Parses each line from the file into a Hash and uploads each hash for processing by workers.
+      #     See IOStream#each_line, IOStream#each_row, and IOStream#each_record.
+      #
+      # Example:
+      #   # Load plain text records from a file
+      #   job.input.upload('hello.csv')
+      #
+      # Example:
+      #   # Load plain text records from a file, stripping all non-printable characters,
+      #   # as well as any characters that cannot be converted to UTF-8
+      #   job.input.upload('hello.csv', encode_cleaner: :printable, encode_replace: '')
+      #
+      # Example: Zip
+      #   # Since csv is not known to RocketJob it is ignored
+      #   job.input.upload('myfile.csv.zip')
+      #
+      # Example: Encrypted Zip
+      #   job.input.upload('myfile.csv.zip.enc')
+      #
+      # Example: Explicitly set the streams
+      #   job.input.upload('myfile.ze', streams: [:zip, :enc])
+      #
+      # Example: Supply custom options
+      #   job.input.upload('myfile.csv.enc', streams: :enc])
+      #
+      # Example: Extract streams from filename but write to a temp file
+      #   streams = IOStreams.streams_for_file_name('myfile.gz.enc')
+      #   t = Tempfile.new('my_project')
+      #   job.input.upload(t.to_path, streams: streams)
+      #
+      # Example: Upload by writing records one at a time to the upload stream
+      #   job.upload do |writer|
+      #     10.times { |i| writer << i }
+      #   end
+      #
+      # Notes:
+      # - By default all data read from the file/stream is converted into UTF-8 before being persisted. This
+      #   is recommended since Mongo only supports UTF-8 strings.
+      # - When zip format, the Zip file/stream must contain only one file, the first file found will be
+      #   loaded into the job
+      # - If an io stream is supplied, it is read until it returns nil.
+      # - Only use this method for UTF-8 data, for binary data use #input_slice or #input_records.
+      # - Only call from one thread at a time per job instance.
+      # - CSV parsing is slow, so it is left for the workers to do.
+      def upload(file_name_or_io = nil, encoding: 'UTF-8', stream_mode: :line, on_first: nil, **args, &block)
+        raise(ArgumentError, 'Either file_name_or_io, or a block must be supplied') unless file_name_or_io || block
+        block ||= -> (io) do
+          iterator = "each_#{stream_mode}".to_sym
+          IOStreams.public_send(iterator, file_name_or_io, encoding: encoding, **args) { |line| io << line }
+        end
+        create_indexes
+        Writer::Input.collect(self, on_first: on_first, &block)
+      end
+      # Upload the result of a MongoDB query to the input collection for processing
+      # Useful when an entire MongoDB collection, or part thereof needs to be
+      # processed by a job.
+      #
+      # Returns [Integer] the number of records uploaded
+      #
+      # If a Block is supplied it is passed the document returned from the
+      # database and should return a record for processing
+      #
+      # If no Block is supplied then the record will be the :fields returned
+      # from MongoDB
+      #
+      # Note:
+      #   This method uses the collection and not the MongoMapper document to
+      #   avoid the overhead of constructing a Model with every document returned
+      #   by the query
+      #
+      # Note:
+      #   The Block must return types that can be serialized to BSON.
+      #   Valid Types: Hash | Array | String | Integer | Float | Symbol | Regexp | Time
+      #   Invalid: Date, etc.
+      #
+      # Example: Upload document ids
+      #   criteria = User.where(state: 'FL')
+      #   job.record_count = job.upload_mongo_query(criteria)
+      #
+      # Example: Upload just the supplied column
+      #   criteria = User.where(state: 'FL')
+      #   job.record_count = job.upload_mongo_query(criteria, :zip_code)
+      def upload_mongo_query(criteria, *column_names, &block)
+        create_indexes
+        options = criteria.options
+        # Without a block extract the fields from the supplied criteria
+        if block
+          # Criteria is returning old school :fields instead of :projections
+          options[:projection] = options.delete(:fields) if options.key?(:fields)
+        else
+          column_names = column_names.collect(&:to_s)
+          column_names << '_id' if column_names.size.zero?
+          fields = options.delete(:fields) || {}
+          column_names.each { |col| fields[col] = 1 }
+          options[:projection] = fields
+          block =
+            if column_names.size == 1
+              column = column_names.first
+              ->(document) { document[column] }
+            else
+              ->(document) { column_names.collect { |c| document[c] } }
+            end
+        end
+        Writer::Input.collect(self) do |records|
+          # Drop down to the mongo driver level to avoid constructing a Model for each document returned
+          criteria.klass.collection.find(criteria.selector, options).each do |document|
+            records << block.call(document)
+          end
+        end
+      end
+      # Upload results from an Arel into RocketJob::SlicedJob.
+      #
+      # Params
+      #   column_names
+      #     When a block is not supplied, supply the names of the columns to be returned
+      #     and uploaded into the job
+      #     These columns are automatically added to the select list to reduce overhead
+      #
+      # If a Block is supplied it is passed the model returned from the database and should
+      # return the work item to be uploaded into the job.
+      #
+      # Returns [Integer] the number of records uploaded
+      #
+      # Example: Upload id's for all users
+      #   arel = User.all
+      #   job.record_count = job.upload_arel(arel)
+      #
+      # Example: Upload selected user id's
+      #   arel = User.where(country_code: 'US')
+      #   job.record_count = job.upload_arel(arel)
+      #
+      # Example: Upload user_name and zip_code
+      #   arel = User.where(country_code: 'US')
+      #   job.record_count = job.upload_arel(arel, :user_name, :zip_code)
+      def upload_arel(arel, *column_names, &block)
+        create_indexes
+        unless block
+          column_names = column_names.collect(&:to_sym)
+          column_names << :id if column_names.size.zero?
+          block =
+            if column_names.size == 1
+              column = column_names.first
+              ->(model) { model.send(column) }
+            else
+              ->(model) { column_names.collect { |c| model.send(c) } }
+            end
+          # find_each requires the :id column in the query
+          selection = column_names.include?(:id) ? column_names : column_names + [:id]
+          arel      = arel.select(selection)
+        end
+        Writer::Input.collect(self) do |records|
+          arel.find_each { |model| records << block.call(model) }
+        end
+      end
+      # Upload sliced range of integer requests as a an arrays of start and end ids
+      #
+      # Returns [Integer] the number of slices uploaded
+      #
+      # Uploads one range per slice so that the response can return multiple records
+      # for each slice processed
+      #
+      # Example
+      #   job.slice_size = 100
+      #   job.record_count = job.upload_integer_range(200, 421)
+      #
+      #   # Equivalent to calling:
+      #   job.record_count = job.insert([200,299])
+      #   job.record_count += job.insert([300,399])
+      #   job.record_count += job.insert([400,421])
+      def upload_integer_range(start_id, last_id)
+        create_indexes
+        count = 0
+        while start_id <= last_id
+          end_id = start_id + slice_size - 1
+          end_id = last_id if end_id > last_id
+          create!(records: [[start_id, end_id]])
+          start_id += slice_size
+          count    += 1
+        end
+        count
+      end
+      # Upload sliced range of integer requests as an arrays of start and end ids
+      # starting with the last range first
+      #
+      # Returns [Integer] the number of slices uploaded
+      #
+      # Uploads one range per slice so that the response can return multiple records
+      # for each slice processed.
+      # Useful for when the highest order integer values should be processed before
+      # the lower integer value ranges. For example when processing every record
+      # in a database based on the id column
+      #
+      # Example
+      #   job.slice_size = 100
+      #   job.record_count = job.upload_integer_range_in_reverse_order(200, 421) * job.slice_size
+      #
+      #   # Equivalent to calling:
+      #   job.insert([400,421])
+      #   job.insert([300,399])
+      #   job.insert([200,299])
+      def upload_integer_range_in_reverse_order(start_id, last_id)
+        create_indexes
+        end_id = last_id
+        count  = 0
+        while end_id >= start_id
+          first_id = end_id - slice_size + 1
+          first_id = start_id if first_id.negative? || (first_id < start_id)
+          create!(records: [[first_id, end_id]])
+          end_id -= slice_size
+          count  += 1
+        end
+        count
+      end
+      # Iterate over each failed record, if any
+      # Since each slice can only contain 1 failed record, only the failed
+      # record is returned along with the slice containing the exception
+      # details
+      #
+      # Example:
+      #   job.each_failed_record do |record, slice|
+      #     ap slice
+      #   end
+      #
+      def each_failed_record
+        failed.each do |slice|
+          if slice.exception && (record_number = slice.exception.record_number)
+            yield(slice.at(record_number - 1), slice)
+          end
+        end
+      end
+      # Requeue all failed slices
+      def requeue_failed
+        failed.update_all(
+          '$unset' => {worker_name: nil, started_at: nil},
+          '$set'   => {state: :queued}
+        )
+      end
+      # Requeue all running slices for a server or worker that is no longer available
+      def requeue_running(worker_name)
+        running.where(worker_name: /\A#{worker_name}/).update_all(
+          '$unset' => {worker_name: nil, started_at: nil},
+          '$set'   => {state: :queued}
+        )
+      end
+      # Returns the next slice to work on in id order
+      # Returns nil if there are currently no queued slices
+      #
+      # If a slice is in queued state it will be started and assigned to this worker
+      def next_slice(worker_name)
+        # TODO: Will it perform faster without the id sort?
+        # I.e. Just process on a FIFO basis?
+        document                 = all.queued.
+          sort('_id' => 1).
+          find_one_and_update(
+            {'$set' => {worker_name: worker_name, state: :running, started_at: Time.now}},
+            return_document: :after
+          )
+        document.collection_name = collection_name if document
+        document
+      end
+    end
+  end
+end