RubyGems - rocketjob - Versions diffs - 0.7.0 - Mend

rocketjob 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +7 -0
data/README.md +160 -0
data/Rakefile +28 -0
data/bin/rocketjob +13 -0
data/lib/rocket_job/cli.rb +76 -0
data/lib/rocket_job/concerns/worker.rb +157 -0
data/lib/rocket_job/config.rb +82 -0
data/lib/rocket_job/heartbeat.rb +44 -0
data/lib/rocket_job/job.rb +442 -0
data/lib/rocket_job/job_exception.rb +35 -0
data/lib/rocket_job/server.rb +346 -0
data/lib/rocket_job/version.rb +4 -0
data/lib/rocketjob.rb +18 -0
data/test/config/mongo.yml +46 -0
data/test/job_test.rb +186 -0
data/test/jobs/test_job.rb +42 -0
data/test/server_test.rb +39 -0
data/test/test_helper.rb +43 -0
data/test/worker_test.rb +89 -0
metadata +168 -0

data/lib/rocket_job/config.rb ADDED Viewed

@@ -0,0 +1,82 @@
+# encoding: UTF-8
+require 'sync_attr'
+module RocketJob
+  # Centralized Configuration for Rocket Jobs
+  class Config
+    include MongoMapper::Document
+    include SyncAttr
+    # Prevent data in MongoDB from re-defining the model behavior
+    #self.static_keys = true
+    # Returns the single instance of the Rocket Job Configuration for this site
+    # in a thread-safe way
+    sync_cattr_reader(:instance) do
+      begin
+        first || create
+      rescue Exception => exc
+        # In case another process has already created the first document
+        first
+      end
+    end
+    # By enabling inline_mode jobs will be called in-line
+    # No server processes will be created, nor threads created
+    sync_cattr_accessor(:inline_mode) { false }
+    # The maximum number of worker threads to create on any one server
+    key :max_worker_threads,         Integer, default: 10
+    # Number of seconds between heartbeats from Rocket Job Server processes
+    key :heartbeat_seconds,          Integer, default: 15
+    # Maximum number of seconds a Server will wait before checking for new jobs
+    key :max_poll_seconds,           Integer, default: 5
+    # Number of seconds between checking for:
+    # - Jobs with a higher priority
+    # - If the current job has been paused, or aborted
+    #
+    # Making this interval too short results in too many checks for job status
+    # changes instead of focusing on completing the active tasks
+    #
+    # Note:
+    #   Not all job types support pausing in the middle
+    key :re_check_seconds,           Integer, default: 60
+    # Limit the number of workers per job class per server
+    #    'class_name' / group => 100
+    #key :limits, Hash
+    # Replace the MongoMapper default mongo connection for holding jobs
+    def self.mongo_connection=(connection)
+      connection(connection)
+      Server.connection(connection)
+      Job.connection(connection)
+      db_name = connection.db.name
+      set_database_name(db_name)
+      Server.set_database_name(db_name)
+      Job.set_database_name(db_name)
+    end
+    # Use a separate Mongo connection for the Records and Results
+    # Allows the records and results to be stored in a separate Mongo database
+    # from the Jobs themselves.
+    #
+    # It is recommended to set the work_connection to a local Mongo Server that
+    # is not replicated to another data center to prevent flooding the network
+    # with replication of data records and results.
+    # The jobs themselves can/should be replicated across data centers so that
+    # they are never lost.
+    def self.mongo_work_connection=(connection)
+      @@mongo_work_connection = connection
+    end
+    # Returns the Mongo connection for the Records and Results
+    def self.mongo_work_connection
+      @@mongo_work_connection || connection
+    end
+  end
+end

data/lib/rocket_job/heartbeat.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# encoding: UTF-8
+module RocketJob
+  # Heartbeat
+  #
+  # Information from the server as at it's last heartbeat
+  class Heartbeat
+    include MongoMapper::EmbeddedDocument
+    embedded_in :server
+    # Time of the last heartbeat received from this server
+    key :updated_at,                 Time
+    # Number of threads running as at the last heartbeat interval
+    key :active_threads,             Integer
+    # Number of threads in the pool
+    #   This number should grow and shrink between 1 and :max_threads
+    key :current_threads,             Integer
+    #
+    # Process Information
+    #
+    # Percentage utilization for the server process alone
+    key :process_cpu,                Integer
+    # Kilo Bytes used by the server process (Virtual & Physical)
+    key :process_mem_phys_kb,        Integer
+    key :process_mem_virt_kb,        Integer
+    #
+    # System Information
+    #
+    # Percentage utilization for the host machine
+    key :host_cpu,                   Integer
+    # Kilo Bytes Available on the host machine (Physical)
+    key :host_mem_avail_phys_kbytes, Float
+    key :host_mem_avail_virt_kbytes, Float
+    # If available
+    key :load_average,               Float
+  end
+end

data/lib/rocket_job/job.rb ADDED Viewed

@@ -0,0 +1,442 @@
+# encoding: UTF-8
+require 'aasm'
+module RocketJob
+  # The base job from which all jobs are created
+  class Job
+    include MongoMapper::Document
+    include AASM
+    include SemanticLogger::Loggable
+    include Concerns::Worker
+    # Prevent data in MongoDB from re-defining the model behavior
+    #self.static_keys = true
+    #
+    # User definable attributes
+    #
+    # The following attributes are set when the job is created
+    # Description for this job instance
+    key :description,             String
+    # Method that must be invoked to complete this job
+    key :perform_method,          Symbol, default: :perform
+    # Priority of this job as it relates to other jobs [1..100]
+    #   1: Lowest Priority
+    # 100: Highest Priority
+    #  50: Default Priority
+    key :priority,                Integer, default: 50
+    # Support running this job in the future
+    #   Also set when a job fails and needs to be re-tried in the future
+    key :run_at,                  Time
+    # If a job has not started by this time, destroy it
+    key :expires_at,              Time
+    # When specified a job will be re-scheduled to run at it's next scheduled interval
+    # Format is the same as cron
+    key :schedule,                String
+    # Job should be marked as repeatable when it can be run multiple times
+    # without changing the system state or modifying database contents.
+    # Setting to false will result in an additional lookup on the results collection
+    # before processing the record to ensure it was not previously processed.
+    # This is necessary for retrying a job.
+    key :repeatable,              Boolean, default: true
+    # When the job completes destroy it from both the database and the UI
+    key :destroy_on_complete,     Boolean, default: true
+    # Any user supplied arguments for the method invocation
+    # All keys must be UTF-8 strings. The values can be any valid BSON type:
+    #   Integer
+    #   Float
+    #   Time    (UTC)
+    #   String  (UTF-8)
+    #   Array
+    #   Hash
+    #   True
+    #   False
+    #   Symbol
+    #   nil
+    #   Regular Expression
+    #
+    # Note: Date is not supported, convert it to a UTC time
+    key :arguments,               Array,    default: []
+    # Whether to store the results from this job
+    key :collect_output,          Boolean, default: false
+    # Raise or lower the log level when calling the job
+    # Can be used to reduce log noise, especially during high volume calls
+    # For debugging a single job can be logged at a low level such as :trace
+    #   Levels supported: :trace, :debug, :info, :warn, :error, :fatal
+    key :log_level,               Symbol
+    # Only give access through the Web UI to this group identifier
+    #key :group,                   String
+    #
+    # Read-only attributes
+    #
+    # Current state, as set by AASM
+    key :state,                   Symbol, default: :queued
+    # When the job was created
+    key :created_at,              Time, default: -> { Time.now }
+    # When processing started on this job
+    key :started_at,              Time
+    # When the job completed processing
+    key :completed_at,            Time
+    # Number of times that this job has failed to process
+    key :failure_count,           Integer, default: 0
+    # This name of the server that this job is being processed by, or was processed by
+    key :server_name,             String
+    #
+    # Values that jobs can update during processing
+    #
+    # Allow a job to updates its estimated progress
+    # Any integer from 0 to 100
+    key :percent_complete,        Integer, default: 0
+    # Store the last exception for this job
+    one :exception,               class_name: 'RocketJob::JobException'
+    # Store the Hash result from this job if collect_output is true,
+    # and the job returned actually returned a Hash, otherwise nil
+    # Not applicable to SlicedJob jobs, since its output is stored in a
+    # separate collection
+    key :result,                  Hash
+    # Store all job types in this collection
+    set_collection_name 'rocket_job.jobs'
+    validates_presence_of :state, :failure_count, :created_at, :perform_method
+    # :repeatable, :destroy_on_complete, :collect_output, :arguments
+    validates :priority, inclusion: 1..100
+    # State Machine events and transitions
+    #
+    # For Job Record jobs, usual processing:
+    #   :queued -> :running -> :completed
+    #                       -> :paused     -> :running  ( manual )
+    #                       -> :failed     -> :running  ( manual )
+    #                       -> :retry      -> :running  ( future date )
+    #
+    # Any state other than :completed can transition manually to :aborted
+    #
+    # Work queue is priority based and then FIFO thereafter
+    # means that records from existing multi-record jobs will be completed before
+    # new jobs are started with the same priority.
+    # Unless, the loader is not fast enough and the
+    # records queue is empty. In this case the next multi-record job will
+    # start loading too.
+    #
+    # Where: state: [:queued, :running], run_at: $lte: Time.now
+    # Sort:  priority, created_at
+    #
+    # Index: state, run_at
+    aasm column: :state do
+      # Job has been created and is queued for processing ( Initial state )
+      state :queued, initial: true
+      # Job is running
+      state :running
+      # Job has completed processing ( End state )
+      state :completed
+      # Job is temporarily paused and no further processing will be completed
+      # until this job has been resumed
+      state :paused
+      # Job failed to process and needs to be manually re-tried or aborted
+      state :failed
+      # Job failed to process previously and is scheduled to be retried at a
+      # future date
+      state :retry
+      # Job was aborted and cannot be resumed ( End state )
+      state :aborted
+      event :start, before: :before_start do
+        transitions from: :queued, to: :running
+      end
+      event :complete, before: :before_complete do
+        after do
+          destroy if destroy_on_complete
+        end
+        transitions from: :running, to: :completed
+      end
+      event :fail, before: :before_fail do
+        transitions from: :queued,  to: :failed
+        transitions from: :running, to: :failed
+        transitions from: :paused,  to: :failed
+      end
+      event :retry, before: :before_retry do
+        transitions from: :failed, to: :running
+      end
+      event :pause, before: :before_pause do
+        transitions from: :running, to: :paused
+      end
+      event :resume, before: :before_resume do
+        transitions from: :paused, to: :running
+      end
+      event :abort, before: :before_abort do
+        transitions from: :running, to: :aborted
+        transitions from: :queued,  to: :aborted
+        transitions from: :failed,  to: :aborted
+        transitions from: :paused,  to: :aborted
+      end
+    end
+    # Create indexes
+    def self.create_indexes
+      # Used by find_and_modify in .next_job
+      ensure_index({ state:1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
+      # Remove outdated index if present
+      drop_index("state_1_priority_1_created_at_1_sub_state_1") rescue nil
+      # Used by Mission Control
+      ensure_index [[:created_at, 1]]
+    end
+    # Requeue all jobs for the specified dead server
+    def self.requeue_dead_server(server_name)
+      collection.update(
+        { 'server_name' => server_name, 'state' => :running },
+        { '$unset' => { 'server_name' => true, 'started_at' => true }, '$set' => { 'state' => :queued } },
+        multi: true
+      )
+    end
+    # Pause all running jobs
+    def self.pause_all
+      where(state: 'running').each { |job| job.pause! }
+    end
+    # Resume all paused jobs
+    def self.resume_all
+      where(state: 'paused').each { |job| job.resume! }
+    end
+    # Returns [true|false] whether to collect the results from running this batch
+    def collect_output?
+      collect_output == true
+    end
+    # Returns [Time] how long the job has taken to complete
+    # If not started then it is the time spent waiting in the queue
+    def duration
+      seconds = if completed_at
+        completed_at - (started_at || created_at)
+      elsif started_at
+        Time.now - started_at
+      else
+        Time.now - created_at
+      end
+      Time.at(seconds)
+    end
+    # Returns [Hash] status of this job
+    def status(time_zone='Eastern Time (US & Canada)')
+      h = {
+        state:                state,
+        description:          description
+      }
+      h[:started_at]         = started_at.in_time_zone(time_zone) if started_at
+      case
+      when running? || paused?
+        h[:paused_at]        = completed_at.in_time_zone(time_zone) if paused?
+        h[:percent_complete] = percent_complete if percent_complete
+      when completed?
+        h[:completed_at]     = completed_at.in_time_zone(time_zone)
+      when aborted?
+        h[:aborted_at]       = completed_at.in_time_zone(time_zone)
+        h[:percent_complete] = percent_complete if percent_complete
+      when failed?
+        h[:failed_at]        = completed_at.in_time_zone(time_zone)
+        h[:percent_complete] = percent_complete if percent_complete
+        h[:exception]        = exception.nil? ? {} : exception.attributes
+      end
+      h[:duration]           = duration.strftime('%H:%M:%S')
+      h
+    end
+    # Same basic formula for calculating retry interval as delayed_job and Sidekiq
+    # TODO Consider lowering the priority automatically after every retry?
+    def seconds_to_delay(count)
+      (count ** 4) + 15 + (rand(30)*(count+1))
+    end
+    # Patch the way MongoMapper reloads a model
+    def reload
+      if doc = collection.find_one(:_id => id)
+        load_from_database(doc)
+        self
+      else
+        raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
+      end
+    end
+    # After this model is read, convert any hashes in the arguments list to HashWithIndifferentAccess
+    def load_from_database(*args)
+      super
+      self.arguments = arguments.collect {|i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i  } if arguments.present?
+    end
+    protected
+    # Before events that can be overridden by child classes
+    def before_start
+      self.started_at = Time.now
+    end
+    def before_complete
+      self.percent_complete = 100
+      self.completed_at = Time.now
+    end
+    def before_fail
+      self.completed_at = Time.now
+    end
+    def before_retry
+      self.completed_at = nil
+    end
+    def before_pause
+      self.completed_at = Time.now
+    end
+    def before_resume
+      self.completed_at = nil
+    end
+    def before_abort
+      self.completed_at = Time.now
+    end
+    ############################################################################
+    protected
+    # Returns the next job to work on in priority based order
+    # Returns nil if there are currently no queued jobs, or processing batch jobs
+    #   with records that require processing
+    #
+    # Parameters
+    #   server_name [String]
+    #     Name of the server that will be processing this job
+    #
+    #   skip_job_ids [Array<BSON::ObjectId>]
+    #     Job ids to exclude when looking for 3the next job
+    #
+    # Note:
+    #   If a job is in queued state it will be started
+    def self.next_job(server_name, skip_job_ids = nil)
+      query = {
+        '$and' => [
+          {
+            '$or' => [
+              { 'state' => 'queued' }, # Jobs
+              { 'state' => 'running', 'sub_state' => :processing }  # Slices
+            ]
+          },
+          {
+            '$or' => [
+              { run_at: { '$exists' => false } },
+              { run_at: { '$lte' => Time.now } }
+            ]
+          },
+        ]
+      }
+      query['_id'] = { '$nin' => skip_job_ids } if skip_job_ids && skip_job_ids.size > 0
+      if doc = find_and_modify(
+          query:  query,
+          sort:   [['priority', 'asc'], ['created_at', 'asc']],
+          update: { '$set' => { 'server_name' => server_name, 'state' => 'running' } }
+        )
+        job = load(doc)
+        unless job.running?
+          # Also update in-memory state and run call-backs
+          job.start
+          job.set(started_at: job.started_at)
+        end
+        job
+      end
+    end
+    ############################################################################
+    private
+    # Set exception information for this job
+    def set_exception(server_name, exc)
+      self.server_name = nil
+      self.failure_count += 1
+      self.exception = JobException.from_exception(exc)
+      exception.server_name = server_name
+      fail! unless failed?
+      logger.error("Exception running #{self.class.name}##{perform_method}", exc)
+    end
+    # Calls a method on this job, if it is defined
+    # Adds the event name to the method call if supplied
+    #
+    # Returns [Object] the result of calling the method
+    #
+    # Parameters
+    #   method [Symbol]
+    #     The method to call on this job
+    #
+    #   arguments [Array]
+    #     Arguments to pass to the method call
+    #
+    #   Options:
+    #     event: [Symbol]
+    #       Any one of: :before, :after
+    #       Default: None, just calls the method itself
+    #
+    #     log_level: [Symbol]
+    #       Log level to apply to silence logging during the call
+    #       Default: nil ( no change )
+    #
+    def call_method(method, arguments, options={})
+      options               = options.dup
+      event                 = options.delete(:event)
+      log_level             = options.delete(:log_level)
+      raise(ArgumentError, "Unknown #{self.class.name}#call_method options: #{options.inspect}") if options.size > 0
+      the_method = event.nil? ? method : "#{event}_#{method}".to_sym
+      if respond_to?(the_method)
+        method_name = "#{self.class.name}##{the_method}"
+        logger.info "Start #{method_name}"
+        logger.benchmark_info("Completed #{method_name}",
+          metric:             "rocketjob/#{self.class.name.underscore}/#{the_method}",
+          log_exception:      :full,
+          on_exception_level: :error,
+          silence:            log_level
+        ) do
+          self.send(the_method, *arguments)
+        end
+      end
+    end
+  end
+end