RubyGems - procrastinator - Versions diffs - 0.6.1 → 1.0.0.pre.rc2 - Mend

procrastinator 0.6.1 → 1.0.0.pre.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +5 -5
data/.gitignore +6 -1
data/.rubocop.yml +26 -0
data/.ruby-version +1 -1
data/Gemfile +2 -0
data/README.md +492 -144
data/RELEASE_NOTES.md +44 -0
data/Rakefile +5 -3
data/lib/procrastinator/config.rb +149 -0
data/lib/procrastinator/logged_task.rb +50 -0
data/lib/procrastinator/queue.rb +206 -0
data/lib/procrastinator/queue_worker.rb +66 -91
data/lib/procrastinator/rake/daemon_tasks.rb +54 -0
data/lib/procrastinator/rake/tasks.rb +3 -0
data/lib/procrastinator/scheduler.rb +393 -0
data/lib/procrastinator/task.rb +64 -0
data/lib/procrastinator/task_meta_data.rb +172 -0
data/lib/procrastinator/task_store/file_transaction.rb +76 -0
data/lib/procrastinator/task_store/simple_comma_store.rb +161 -0
data/lib/procrastinator/test/mocks.rb +35 -0
data/lib/procrastinator/version.rb +3 -1
data/lib/procrastinator.rb +29 -23
data/procrastinator.gemspec +17 -11
metadata +66 -28
data/lib/procrastinator/environment.rb +0 -148
data/lib/procrastinator/task_worker.rb +0 -120

data/lib/procrastinator/rake/daemon_tasks.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+require 'rake'
+module Procrastinator
+   module Rake
+      # RakeTask builder. Provide this in your Rakefile:
+      #
+      #    require 'procrastinator/rake/task'
+      #    Procrastinator::RakeTask.new('/var/run') do
+      #       # return your Procrastinator::Scheduler here or construct it using Procrastinator.config
+      #    end
+      #
+      class DaemonTasks
+         include ::Rake::Cloneable
+         include ::Rake::DSL
+         # Shorthand for DaemonTasks.new.define
+         #
+         # @param (see #define)
+         # @see DaemonTasks#define
+         def self.define(**args)
+            new.define(**args)
+         end
+         # Defines procrastinator:start and procrastinator:stop Rake tasks that operate on the given scheduler.
+         # If provided a block, that block will run in the daemon process.
+         #
+         # @param scheduler [Procrastinator::Scheduler]
+         # @param pid_path [Pathname, File, String, nil]
+         def define(scheduler:, pid_path: nil, &block)
+            pid_path = Scheduler::DaemonWorking.normalize_pid(pid_path)
+            namespace :procrastinator do
+               task :start do
+                  scheduler.work.daemonized!(pid_path, &block)
+               end
+               task :status do
+                  if Scheduler::DaemonWorking.running?(pid_path)
+                     warn "Procrastinator instance running (pid #{ File.read(pid_path) })"
+                  else
+                     warn "No Procrastinator instance detected for #{ pid_path }"
+                  end
+               end
+               task :stop do
+                  Scheduler::DaemonWorking.halt!(pid_path)
+               end
+            end
+         end
+      end
+   end
+end

data/lib/procrastinator/rake/tasks.rb ADDED Viewed

@@ -0,0 +1,3 @@
+# frozen_string_literal: true
+require_relative 'daemon_tasks'

data/lib/procrastinator/scheduler.rb ADDED Viewed

@@ -0,0 +1,393 @@
+# frozen_string_literal: true
+require 'stringio'
+module Procrastinator
+   # A Scheduler object provides the API for client applications to manage delayed tasks.
+   #
+   # Use #delay to schedule new tasks, #reschedule to alter existing tasks, and #cancel to remove unwanted tasks.
+   #
+   # @author Robin Miller
+   class Scheduler
+      def initialize(config)
+         @config = config
+      end
+      # Records a new task to be executed at the given time.
+      #
+      # @param queue_name [Symbol] the symbol identifier for the queue to add a new task on
+      # @param run_at [Time, Integer] Optional time when this task should be executed. Defaults to the current time.
+      # @param data [Hash, Array, String, Integer] Optional simple data object to be provided to the task on execution.
+      # @param expire_at [Time, Integer] Optional time when the task should be abandoned
+      def delay(queue_name = nil, data: nil, run_at: Time.now, expire_at: nil)
+         raise ArgumentError, <<~ERR unless queue_name.nil? || queue_name.is_a?(Symbol)
+            must provide a queue name as the first argument. Received: #{ queue_name }
+         ERR
+         queue = @config.queue(name: queue_name)
+         queue.create(run_at: run_at, expire_at: expire_at, data: data)
+      end
+      # Alters an existing task to run at a new time, expire at a new time, or both.
+      #
+      # Call #to on the result and pass in the new :run_at and/or :expire_at.
+      #
+      # Example:
+      #
+      # scheduler.reschedule(:alerts, data: {user_id: 5}).to(run_at: Time.now, expire_at: Time.now + 10)
+      #
+      # The identifier can include any data field stored in the task loader. Often this is the information in :data.
+      #
+      # @param queue [Symbol] the symbol identifier for the queue to add a new task on
+      # @param identifier [Hash] Some identifying information to find the appropriate task.
+      #
+      # @see TaskMetaData
+      def reschedule(queue, identifier)
+         UpdateProxy.new(@config, identifier: identifier.merge(queue: queue.to_s))
+      end
+      # Removes an existing task, as located by the givne identifying information.
+      #
+      # The identifier can include any data field stored in the task loader. Often this is the information in :data.
+      #
+      # @param queue [Symbol] the symbol identifier for the queue to add a new task on
+      # @param identifier [Hash] Some identifying information to find the appropriate task.
+      #
+      # @see TaskMetaData
+      def cancel(queue, identifier)
+         queue = @config.queue(name: queue)
+         tasks = queue.read(identifier.merge(queue: queue.name.to_s))
+         raise "no task matches search: #{ identifier }" if tasks.empty?
+         raise "multiple tasks match search: #{ identifier }" if tasks.size > 1
+         queue.delete(tasks.first[:id])
+      end
+      # Spawns a new worker thread for each queue defined in the config
+      #
+      # @param queue_names [Array<String,Symbol>] Names of specific queues to act upon.
+      #                                           Omit or leave empty to act on all queues.
+      def work(*queue_names)
+         queue_names = @config.queues if queue_names.empty?
+         workers = queue_names.collect do |queue_name|
+            QueueWorker.new(queue: queue_name, config: @config)
+         end
+         WorkProxy.new(workers, @config)
+      end
+      # Provides a more natural syntax for rescheduling tasks
+      #
+      # @see Scheduler#reschedule
+      class UpdateProxy
+         def initialize(queue, identifier:)
+            @queue      = queue
+            @identifier = identifier
+         end
+         def to(run_at: nil, expire_at: nil)
+            task = @queue.fetch_task(@identifier)
+            raise ArgumentError, 'you must provide at least :run_at or :expire_at' if run_at.nil? && expire_at.nil?
+            task.reschedule(expire_at: expire_at) if expire_at
+            task.reschedule(run_at: run_at) if run_at
+            new_data = task.to_h
+            new_data.delete(:queue)
+            new_data.delete(:data)
+            @queue.update(new_data.delete(:id), new_data)
+         end
+         alias at to
+      end
+      # Serial work style
+      #
+      # @see WorkProxy
+      module SerialWorking
+         # Work off the given number of tasks for each queue and return
+         # @param steps [integer] The number of tasks to complete.
+         def serially(steps: 1)
+            steps.times do
+               workers.each(&:work_one)
+            end
+         end
+      end
+      # Threaded work style
+      #
+      # @see WorkProxy
+      module ThreadedWorking
+         PROG_NAME = 'Procrastinator'
+         # Work off jobs per queue, each in its own thread.
+         #
+         # @param timeout Maximum number of seconds to run for. If nil, will run indefinitely.
+         def threaded(timeout: nil)
+            open_log
+            shutdown_on_interrupt
+            begin
+               @threads = spawn_threads
+               @logger.info "Procrastinator running. Process ID: #{ Process.pid }"
+               @threads.each do |thread|
+                  thread.join(timeout)
+               end
+            rescue StandardError => e
+               thread_crash(e)
+            ensure
+               @logger&.info 'Halting worker threads...'
+               shutdown!
+               @logger&.info 'Threads halted.'
+            end
+         end
+         private
+         def spawn_threads
+            @logger.info "Starting workers for queues: #{ @workers.collect(&:name).join(', ') }"
+            @workers.collect do |worker|
+               @logger.debug "Spawning thread: #{ worker.name }"
+               Thread.new(worker) do |w|
+                  Thread.current.abort_on_exception = true
+                  Thread.current.thread_variable_set(:name, w.name)
+                  begin
+                     worker.work!
+                  ensure
+                     worker.halt
+                  end
+               end
+            end
+         end
+         def thread_crash(error)
+            crashed_threads = (@threads || []).select { |t| t.status.nil? }.collect do |thread|
+               "Crashed thread: #{ thread.thread_variable_get(:name) }"
+            end
+            @logger.fatal <<~MSG
+               Crash detected in queue worker thread.
+                  #{ crashed_threads.join("\n") }
+                  #{ error.message }
+                  #{ error.backtrace.join("\n\t") }"
+            MSG
+         end
+         def shutdown_on_interrupt
+            Signal.trap('INT') do
+               warn "\n" # just to separate the shutdown log item
+               shutdown!
+            end
+         end
+         def shutdown!
+            (@threads || []).select(&:alive?).each(&:kill)
+         end
+         def open_log(quiet: false)
+            return if @logger
+            log_devs = []
+            log_devs << StringIO.new if quiet && !@config.log_level
+            log_devs << $stderr unless quiet
+            log_devs << log_path.open('a') if @config.log_level
+            multi      = MultiIO.new(*log_devs)
+            multi.sync = true
+            @logger = Logger.new(multi,
+                                 progname:  PROG_NAME.downcase,
+                                 level:     @config.log_level || Logger::INFO,
+                                 formatter: Config::DEFAULT_LOG_FORMATTER)
+         end
+         def log_path
+            path = @config.log_dir / "#{ PROG_NAME.downcase }.log"
+            path.dirname.mkpath
+            # FileUtils.touch(log_path)
+            path
+         end
+         # IO Multiplexer that forwards calls to a list of IO streams.
+         class MultiIO
+            def initialize(*stream)
+               @streams = stream
+            end
+            (IO.methods << :path << :sync=).uniq.each do |method_name|
+               define_method(method_name) do |*args|
+                  able_streams(method_name).collect do |stream|
+                     stream.send(method_name, *args)
+                  end.last # forces consistent return result type for callers (but may lose some info)
+               end
+            end
+            private
+            def able_streams(method_name)
+               @streams.select { |stream| stream.respond_to?(method_name) }
+            end
+         end
+      end
+      # Daemonized work style
+      #
+      # @see WorkProxy
+      module DaemonWorking
+         PID_EXT         = '.pid'
+         DEFAULT_PID_DIR = Pathname.new('/var/run/').freeze
+         # 15 chars is linux limit
+         MAX_PROC_LEN = 15
+         # Consumes the current process and turns it into a background daemon. A log will be started in the log
+         # directory defined in the configuration block.
+         #
+         # If pid_path ends with extension '.pid', the basename will be requested as process title (depending on OS
+         # support). An extensionless path is assumed to be a directory and a default filename (and proctitle) is used.
+         #
+         # @param pid_path [Pathname, File, String, nil] Path to where the process ID file is to be kept.
+         # @yield [void] Block to run after daemonization
+         def daemonized!(pid_path = nil, &block)
+            spawn_daemon(pid_path, &block)
+            threaded
+         end
+         # Normalizes the given pid path, including conversion to absolute path and defaults.
+         #
+         # @param pid_path [Pathname, String] path to normalize
+         def self.normalize_pid(pid_path)
+            pid_path = Pathname.new(pid_path || DEFAULT_PID_DIR)
+            pid_path /= "#{ PROG_NAME.downcase }#{ PID_EXT }" unless pid_path.extname == PID_EXT
+            pid_path.expand_path
+         end
+         # Stops the procrastinator process denoted by the provided pid file
+         def self.halt!(pid_path)
+            pid_path = normalize_pid pid_path
+            Process.kill('TERM', pid_path.read.to_i)
+         end
+         def self.running?(pid_path)
+            pid = normalize_pid(pid_path).read.to_i
+            # this raises Errno::ESRCH when no process found, therefore if found we should exit
+            Process.getpgid pid
+            true
+         rescue Errno::ESRCH
+            false
+         end
+         private
+         # "You, search from the spastic dentistry department down through disembowelment. You, cover children's dance
+         #  recitals through holiday weekend IKEA. Go."
+         def spawn_daemon(pid_path, &block)
+            pid_path = DaemonWorking.normalize_pid pid_path
+            open_log quiet: true
+            @logger.info "Starting #{ PROG_NAME } daemon..."
+            print_debug_context
+            Process.daemon
+            manage_pid pid_path
+            rename_process pid_path
+            yield if block
+         rescue StandardError => e
+            @logger.fatal ([e.message] + e.backtrace).join("\n")
+            raise e
+         end
+         def manage_pid(pid_path)
+            ensure_unique(pid_path)
+            @logger.debug "Managing pid at path: #{ pid_path }"
+            pid_path.dirname.mkpath
+            pid_path.write Process.pid.to_s
+            at_exit do
+               if pid_path.exist?
+                  @logger.debug "Cleaning up pid file #{ pid_path }"
+                  pid_path.delete
+               end
+               @logger.info "Procrastinator (pid #{ Process.pid }) halted."
+            end
+         end
+         def ensure_unique(pid_path)
+            return unless pid_path.exist?
+            @logger.debug "Checking pid file #{ pid_path }"
+            if DaemonWorking.running? pid_path
+               hint = 'Either terminate that process or remove the pid file (if coincidental).'
+               msg  = "Another process (pid #{ pid_path.read }) already exists for #{ pid_path }. #{ hint }"
+               @logger.fatal msg
+               raise ProcessExistsError, msg
+            else
+               @logger.warn "Replacing old pid file of defunct process (pid #{ pid_path.read }) at #{ pid_path }."
+            end
+         end
+         def print_debug_context
+            @logger.debug "Ruby Path: #{ ENV['RUBY_ROOT'] }"
+            @logger.debug "Bundler Path: #{ ENV['BUNDLE_BIN_PATH'] }"
+            # logname is the posix standard and is set by cron, so probably reliable.
+            @logger.debug "Runtime User: #{ ENV['LOGNAME'] || ENV['USERNAME'] }"
+         end
+         def rename_process(pid_path)
+            name = pid_path.basename(PID_EXT).to_s
+            if name.size > MAX_PROC_LEN
+               @logger.warn "Process name is longer than max length (#{ MAX_PROC_LEN }). Trimming to fit."
+               name = name[0, MAX_PROC_LEN]
+            end
+            if system('pidof', name, out: File::NULL)
+               @logger.warn "Another process is already named '#{ name }'. Consider the 'name:' keyword to distinguish."
+            end
+            @logger.debug "Renaming process to: #{ name }"
+            Process.setproctitle name
+         end
+         include ThreadedWorking
+      end
+      # DSL grammar object to enable chaining #work with the three work modes.
+      #
+      # @see Scheduler#work
+      class WorkProxy
+         include SerialWorking
+         include ThreadedWorking
+         include DaemonWorking
+         attr_reader :workers
+         def initialize(workers, config)
+            @workers = workers
+            @config  = config
+         end
+      end
+   end
+   class ProcessExistsError < RuntimeError
+   end
+end

data/lib/procrastinator/task.rb ADDED Viewed

@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+require 'forwardable'
+require 'time'
+module Procrastinator
+   # Wraps a task handler and task metadata
+   #
+   # @author Robin Miller
+   class Task
+      extend Forwardable
+      def_delegators :@metadata,
+                     :id, :run_at, :initial_run_at, :expire_at,
+                     :attempts, :last_fail_at, :last_error,
+                     :data, :to_h, :serialized_data,
+                     :queue, :reschedule
+      def initialize(metadata, handler)
+         @metadata = metadata
+         @handler  = handler
+      end
+      def run
+         raise ExpiredError, "task is over its expiry time of #{ @metadata.expire_at.iso8601 }" if @metadata.expired?
+         @metadata.add_attempt
+         result = Timeout.timeout(queue.timeout) do
+            @handler.run
+         end
+         @metadata.clear_fails
+         try_hook(:success, result)
+      end
+      alias call run
+      # Records a failure in metadata and attempts to run the handler's #fail hook if present.
+      #
+      # @param error [StandardError] - the error that caused the failure
+      def fail(error)
+         hook = @metadata.failure(error)
+         try_hook(hook, error)
+         hook
+      end
+      def try_hook(method, *params)
+         @handler.send(method, *params) if @handler.respond_to? method
+      rescue StandardError => e
+         warn "#{ method.to_s.capitalize } hook error: #{ e.message }"
+      end
+      def to_s
+         "#{ @metadata.queue.name }##{ id } [#{ serialized_data }]"
+      end
+      class ExpiredError < RuntimeError
+      end
+      class AttemptsExhaustedError < RuntimeError
+      end
+   end
+end

data/lib/procrastinator/task_meta_data.rb ADDED Viewed

@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+require 'time'
+module Procrastinator
+   # TaskMetaData objects are State Patterns that record information about the work done on a particular task.
+   #
+   # It contains the specific information needed to run a task instance. Users define a task handler class, which
+   # describes the "how" of a task and TaskMetaData represents the "what" and "when".
+   #
+   # It contains task-specific data, timing information, and error records.
+   #
+   # All of its state is read-only.
+   #
+   # @author Robin Miller
+   #
+   # @!attribute [r] :id
+   #    @return [Integer] the unique identifier for this task
+   # @!attribute [r] :run_at
+   #    @return [Integer] Linux epoch timestamp of when to attempt this task next
+   # @!attribute [r] :initial_run_at
+   #    @return [Integer] Linux epoch timestamp of the original value for run_at
+   # @!attribute [r] :expire_at
+   #    @return [Integer] Linux epoch timestamp of when to consider this task obsolete
+   # @!attribute [r] :attempts
+   #    @return [Integer] The number of times this task has been attempted
+   # @!attribute [r] :last_error
+   #    @return [String] The message and stack trace of the error encountered on the most recent failed attempt
+   # @!attribute [r] :last_fail_at
+   #    @return [Integer] Linux epoch timestamp of when the last_error was recorded
+   # @!attribute [r] :data
+   #    @return [String] App-provided JSON data
+   class TaskMetaData
+      # These are the attributes expected to be in the persistence mechanism
+      EXPECTED_DATA = [:id, :run_at, :initial_run_at, :expire_at, :attempts, :last_error, :last_fail_at, :data].freeze
+      attr_reader(*EXPECTED_DATA, :queue)
+      def initialize(id: nil, queue: nil, data: nil,
+                     run_at: nil, initial_run_at: nil, expire_at: nil,
+                     attempts: 0, last_error: nil, last_fail_at: nil)
+         @id             = id
+         @queue          = queue || raise(ArgumentError, 'queue cannot be nil')
+         @run_at         = get_time(run_at)
+         @initial_run_at = get_time(initial_run_at) || @run_at
+         @expire_at      = get_time(expire_at)
+         @attempts       = (attempts || 0).to_i
+         @last_error     = last_error
+         @last_fail_at   = get_time(last_fail_at)
+         @data           = data ? JSON.parse(data, symbolize_names: true) : nil
+      end
+      def add_attempt
+         raise Task::AttemptsExhaustedError unless attempts_left?
+         @attempts += 1
+      end
+      # Records a failure on this task
+      #
+      # @param error [StandardError] The error to record
+      def failure(error)
+         @last_fail_at = Time.now
+         @last_error   = %[Task failed: #{ error.message }\n#{ error.backtrace&.join("\n") }]
+         if retryable?
+            reschedule
+            :fail
+         else
+            @run_at = nil
+            :final_fail
+         end
+      end
+      def retryable?
+         attempts_left? && !expired?
+      end
+      def expired?
+         !@expire_at.nil? && @expire_at < Time.now
+      end
+      def attempts_left?
+         @queue.max_attempts.nil? || @attempts < @queue.max_attempts
+      end
+      def runnable?
+         !@run_at.nil? && @run_at <= Time.now
+      end
+      def successful?
+         raise 'you cannot check for success before running #work' if !expired? && @attempts <= 0
+         !expired? && @last_error.nil? && @last_fail_at.nil?
+      end
+      # Updates the run and/or expiry time. If neither is provided, will reschedule based on the rescheduling
+      # calculation algorithm.
+      #
+      # @param run_at - the new time to run this task
+      # @param expire_at - the new time to expire this task
+      def reschedule(run_at: nil, expire_at: nil)
+         validate_run_at(run_at, expire_at)
+         @expire_at = expire_at if expire_at
+         if run_at
+            @run_at = @initial_run_at = get_time(run_at)
+            clear_fails
+            @attempts = 0
+         end
+         return if run_at || expire_at
+         # (30 + n_attempts^4) seconds is chosen to rapidly expand
+         # but with the baseline of 30s to avoid hitting the disk too frequently.
+         @run_at += 30 + (@attempts ** 4) unless @run_at.nil?
+      end
+      def to_h
+         {id:             @id,
+          queue:          @queue.name.to_s,
+          run_at:         @run_at,
+          initial_run_at: @initial_run_at,
+          expire_at:      @expire_at,
+          attempts:       @attempts,
+          last_fail_at:   @last_fail_at,
+          last_error:     @last_error,
+          data:           serialized_data}
+      end
+      def serialized_data
+         JSON.dump(@data)
+      end
+      def clear_fails
+         @last_error   = nil
+         @last_fail_at = nil
+      end
+      private
+      def get_time(data)
+         case data
+         when NilClass
+            nil
+         when Numeric
+            Time.at data
+         when String
+            Time.parse data
+         when Time
+            data
+         else
+            return data.to_time if data.respond_to? :to_time
+            raise ArgumentError, "Unknown data type: #{ data.class } (#{ data })"
+         end
+      end
+      def validate_run_at(run_at, expire_at)
+         return unless run_at
+         if expire_at && run_at > expire_at
+            raise ArgumentError, "new run_at (#{ run_at }) is later than new expire_at (#{ expire_at })"
+         end
+         return unless @expire_at && run_at > @expire_at
+         raise ArgumentError, "new run_at (#{ run_at }) is later than existing expire_at (#{ @expire_at })"
+      end
+   end
+end