RubyGems - procrastinator - Versions diffs - 0.9.0 → 1.0.0.pre.rc2 - Mend

procrastinator 0.9.0 → 1.0.0.pre.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +5 -5
data/.gitignore +6 -1
data/.rubocop.yml +20 -1
data/README.md +327 -333
data/RELEASE_NOTES.md +44 -0
data/lib/procrastinator/config.rb +93 -129
data/lib/procrastinator/logged_task.rb +50 -0
data/lib/procrastinator/queue.rb +168 -12
data/lib/procrastinator/queue_worker.rb +52 -97
data/lib/procrastinator/rake/daemon_tasks.rb +54 -0
data/lib/procrastinator/rake/tasks.rb +3 -0
data/lib/procrastinator/scheduler.rb +299 -77
data/lib/procrastinator/task.rb +46 -28
data/lib/procrastinator/task_meta_data.rb +96 -52
data/lib/procrastinator/task_store/file_transaction.rb +76 -0
data/lib/procrastinator/task_store/simple_comma_store.rb +161 -0
data/lib/procrastinator/test/mocks.rb +35 -0
data/lib/procrastinator/version.rb +1 -1
data/lib/procrastinator.rb +9 -24
data/procrastinator.gemspec +13 -9
metadata +43 -26
data/lib/procrastinator/loaders/csv_loader.rb +0 -107
data/lib/procrastinator/queue_manager.rb +0 -201
data/lib/procrastinator/task_worker.rb +0 -100
data/lib/rake/procrastinator_task.rb +0 -34

data/RELEASE_NOTES.md ADDED Viewed

@@ -0,0 +1,44 @@
+# Release Notes
+## 1.0.0 (       )
+### Major Changes
+* Minimum supported Ruby is now 2.4
+* Added generic `Procrastinator::Config#log_with`
+    * Removed `Procrastinator::Config#log_inside`
+    * Removed `Procrastinator::Config#log_at_level`
+    * falsey log level is now the control for whether logging occurs, instead of falsey log directory
+* Queues are managed as threads rather than sub processes
+    * These unnecessary methods no longer exist:
+        * `Procrastinator.test_mode`
+        * `Procrastinator::Config#enable_test_mode`
+        * `Procrastinator::Config#test_mode?`
+        * `Procrastinator::Config#test_mode`
+        * `Procrastinator::Config#prefix`
+        * `Procrastinator::Config#pid_dir`
+        * `Procrastinator::Config#each_process`
+        * `Procrastinator::Config#run_process_block`
+    * Removed use of envvar `PROCRASTINATOR_STOP`
+    * `Procrastinator::QueueManager` is merged into `Procrastinator::Scheduler`
+    * Removed rake task to halt queue processes
+    * Renamed `Procrastinator::Config#provide_context` to `provide_container`
+    * You must now call `Scheduler#work` on the result of `Procrastinator.config`
+    * Use a dedicated process monitor (like `monit`) instead in production environments
+    * Suuply a block to `daemonized!` to run code in the spawned process.
+* `max_tasks` is removed as it only added concurrency complexity
+* Data is now stored as JSON instead of YAML
+* Added with_store that applies its settings to its block
+    * `load_with` has been removed
+* Removed `task_attr` and `Procrastinator::Task` module. Tasks is now duck-type checked for accessors instead.
+### Minor Changes
+* Started release notes file
+* Updated development gems
+* Logs now include the queue name in log lines
+* Logs can now set the shift size or age (like Ruby's Logger)
+### Bugfixes
+* none

data/lib/procrastinator/config.rb CHANGED Viewed

@@ -1,45 +1,59 @@
 # frozen_string_literal: true
+require 'time'
 module Procrastinator
    # Configuration object (State Pattern) used to coordinate settings across
    # various components within Procrastinator.
    #
-   # All of its state is read-only, set using the methods in the DSL module below.
+   # It is immutable after init; use the config DSL in the configuration block to set its state.
    #
    # @author Robin Miller
    #
-   # @!attribute [r] :test_mode?
-   #    @return [Boolean] Whether test mode is enabled
    # @!attribute [r] :queues
    #    @return [Array] List of defined queues
-   # @!attribute [r] :context
-   #    @return [Object] Provided context object that will be forwarded to tasks
-   # @!attribute [r] :loader
-   #    @return [Object] Provided persistence strategy object to use for task I/O
+   # @!attribute [r] :container
+   #    @return [Object] Container object that will be forwarded to tasks
    # @!attribute [r] :log_dir
    #    @return [Pathname] Directory to write log files in
    # @!attribute [r] :log_level
    #    @return [Integer] Logging level to use
-   # @!attribute [r] :prefix
-   #    @return [String] The prefix to prepend to process names
-   # @!attribute [r] :pid_dir
-   #    @return [Pathname] Directory to write process ID records in
+   # @!attribute [r] :log_shift_age
+   #    @return [Integer] Number of previous files to keep (see Ruby Logger for details)
+   # @!attribute [r] :log_shift_size
+   #    @return [Integer] Filesize before rotating to a new logfile (see Ruby Logger for details)
    class Config
-      attr_reader :queues, :log_dir, :log_level, :prefix, :test_mode, :context, :loader, :pid_dir
-      alias test_mode? test_mode
-      DEFAULT_LOG_DIRECTORY = 'log/'
-      DEFAULT_PID_DIRECTORY = 'pid/'
+      attr_reader :queues, :log_dir, :log_level, :log_shift_age, :log_shift_size, :container
+      DEFAULT_LOG_DIRECTORY  = Pathname.new('log').freeze
+      DEFAULT_LOG_SHIFT_AGE  = 0
+      DEFAULT_LOG_SHIFT_SIZE = 2 ** 20 # 1 MB
+      DEFAULT_LOG_FORMATTER  = proc do |severity, datetime, progname, msg|
+         [datetime.iso8601(8),
+          severity,
+          "#{ progname } (#{ Process.pid }):",
+          msg].join("\t") << "\n"
+      end
       def initialize
-         @test_mode        = false
-         @queues           = []
-         @loader           = nil
-         @context          = nil
-         @subprocess_block = nil
-         @log_dir          = Pathname.new(DEFAULT_LOG_DIRECTORY)
-         @log_level        = Logger::INFO
-         @pid_dir          = Pathname.new(DEFAULT_PID_DIRECTORY)
+         @queues         = []
+         @container      = nil
+         @log_dir        = DEFAULT_LOG_DIRECTORY
+         @log_level      = Logger::INFO
+         @log_shift_age  = DEFAULT_LOG_SHIFT_AGE
+         @log_shift_size = DEFAULT_LOG_SHIFT_SIZE
+         with_store(csv: TaskStore::SimpleCommaStore::DEFAULT_FILE) do
+            if block_given?
+               yield(self)
+               raise SetupError, SetupError::ERR_NO_QUEUE if @queues.empty?
+            end
+         end
+         @log_dir = @log_dir.expand_path
+         @queues.freeze
+         freeze
       end
       # Collection of all of the methods intended for use within Procrastinator.setup
@@ -47,139 +61,89 @@ module Procrastinator
       # @see Procrastinator
       module DSL
          # Assigns a task loader
-         # It should be called in an each_process block as well so that they get
-         # distinct resources (eg. DB connections) from the parent process.
-         def load_with(loader)
-            if loader.is_a? Hash
-               unless loader.key? :location
-                  raise ArgumentError, 'Must pass keyword :location if specifying a location for CSV file'
-               end
-               loader = Loader::CSVLoader.new(loader[:location])
-            end
-            raise MalformedTaskLoaderError, 'task loader cannot be nil' if loader.nil?
+         def with_store(store)
+            raise(ArgumentError, 'with_store must be provided a block') unless block_given?
-            [:read, :create, :update, :delete].each do |method|
-               unless loader.respond_to? method
-                  raise MalformedTaskLoaderError, "task loader #{ loader.class } must respond to ##{ method }"
-               end
-            end
-            @loader = loader
+            old_store      = @default_store
+            @default_store = interpret_store(store)
+            yield
+            @default_store = old_store
          end
-         def provide_context(context)
-            @context = context
-         end
-         # Accepts a block that will be executed on the queue sub-processes. Use it to control resource allocations.
-         def each_process(prefix: nil, pid_dir: DEFAULT_PID_DIRECTORY, &block)
-            @prefix           = prefix
-            @subprocess_block = block
-            @pid_dir          = Pathname.new(pid_dir)
+         def provide_container(container)
+            @container = container
          end
          def define_queue(name, task_class, properties = {})
             raise ArgumentError, 'queue name cannot be nil' if name.nil?
             raise ArgumentError, 'queue task class cannot be nil' if task_class.nil?
-            verify_task_class(task_class)
-            @queues << Queue.new(properties.merge(name: name, task_class: task_class))
-         end
-         def enable_test_mode
-            @test_mode = true
-         end
+            properties[:store] = interpret_store(properties[:store]) if properties.key? :store
-         def log_inside(path)
-            @log_dir = path ? Pathname.new(path) : path
+            @queues << Queue.new(**{name: name, task_class: task_class, store: @default_store}.merge(properties))
          end
-         def log_at_level(lvl)
-            @log_level = lvl
+         # Sets details of logging behaviour
+         #
+         # @param directory [Pathname,String] the directory to save logs within.
+         # @param level [Logger::UNKNOWN,Logger::FATAL,Logger::ERROR,Logger::WARN,Logger::INFO,Logger::DEBUG,Integer,Boolean] the Ruby Logger level to use. If falsey, no logging is performed.
+         # @param shift_age [Integer] number of old log files to keep (see Ruby Logger for details)
+         # @param shift_size [Integer] filesize before log is rotated to a fresh file (see Ruby Logger for details)
+         def log_with(directory: @log_dir, level: @log_level, shift_age: @log_shift_age, shift_size: @log_shift_size)
+            @log_dir        = directory ? Pathname.new(directory) : directory
+            @log_level      = level
+            @log_shift_age  = shift_age
+            @log_shift_size = shift_size
          end
       end
       include DSL
-      def setup(test_mode = false)
-         yield(self)
-         enable_test_mode if test_mode
-         load_with(Loader::CSVLoader.new) unless @loader
-         raise 'setup block must call #define_queue on the environment' if @queues.empty?
-         if @context && @queues.none? { |queue| queue.task_class.method_defined?(:context=) }
-            raise <<~ERROR
-               setup block called #provide_context, but no queue task classes import :context.
-               Add this to your Task classes that expect to receive the context:
-                  include Procrastinator::Task
-                  task_attr :context
-            ERROR
-         end
-         self
+      def queue(name: nil)
+         queue = if name
+                    @queues.find do |q|
+                       q.name == name
+                    end
+                 else
+                    if name.nil? && @queues.length > 1
+                       raise ArgumentError,
+                             "queue must be specified when more than one is defined. #{ known_queues }"
+                    end
+                    @queues.first
+                 end
+         raise ArgumentError, "there is no :#{ name } queue registered. #{ known_queues }" unless queue
+         queue
       end
-      def queues_string
-         # it drops the colon if you call #to_s on a symbol, so we need to add it back
-         @queues.map { |queue| ":#{ queue.name }" }.join(', ')
-      end
+      private
-      def single_queue?
-         @queues.size == 1
+      def known_queues
+         "Known queues are: #{ @queues.map { |queue| ":#{ queue.name }" }.join(', ') }"
       end
-      def run_process_block
-         @subprocess_block&.call
-      end
+      def interpret_store(store)
+         raise(ArgumentError, 'task store cannot be nil') if store.nil?
-      def queue(name: nil)
-         if name
-            @queues.find do |q|
-               q.name == name
+         case store
+         when Hash
+            store_strategy = :csv
+            unless store.key? store_strategy
+               raise ArgumentError, "Must pass keyword :#{ store_strategy } if specifying a location for CSV file"
             end
+            TaskStore::SimpleCommaStore.new(store[store_strategy])
+         when String, Pathname
+            TaskStore::SimpleCommaStore.new(store)
          else
-            @queues.first
+            store
          end
       end
-      private
-      def verify_task_class(task_class)
-         unless task_class.method_defined? :run
-            raise MalformedTaskError, "task #{ task_class } does not support #run method"
-         end
-         # We're checking the interface compliance on init because it's one of those extremely rare cases where
-         # you'd want to know early because the sub-processes would crash async, which is harder to debug.
-         # It's a bit belt-and suspenders, but UX is important for devs, too. - robinetmiller
-         if task_class.method_defined?(:run) && task_class.instance_method(:run).arity.positive?
-            err = "task #{ task_class } cannot require parameters to its #run method"
-            raise MalformedTaskError, err
-         end
-         expected_arity = 1
-         [:success, :fail, :final_fail].each do |method_name|
-            next unless task_class.method_defined?(method_name)
-            next if task_class.instance_method(method_name).arity == expected_arity
-            err = "task #{ task_class } must accept #{ expected_arity } parameter to its ##{ method_name } method"
-            raise MalformedTaskError, err
-         end
+      class SetupError < RuntimeError
+         ERR_NO_QUEUE = 'setup block must call #define_queue on the environment'
       end
    end
-   class MalformedTaskLoaderError < StandardError
-   end
 end

data/lib/procrastinator/logged_task.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+require 'json'
+require 'ostruct'
+require 'timeout'
+# require 'forwardable'
+require 'delegate'
+require_relative 'task'
+module Procrastinator
+   # Task wrapper that adds logging to each step.
+   #
+   # @author Robin Miller
+   #
+   # @see Task
+   class LoggedTask < DelegateClass(Task)
+      # extend Forwardable
+      #
+      # def_delegators :@task, :id, :to_h
+      attr_reader :logger
+      alias task __getobj__
+      def initialize(task, logger: Logger.new(StringIO.new))
+         super task
+         @logger = logger || raise(ArgumentError, 'Logger cannot be nil')
+      end
+      def run
+         task.run
+         begin
+            @logger.info("Task completed: #{ task }")
+         rescue StandardError => e
+            warn "Task logging error: #{ e.message }"
+         end
+      end
+      def fail(error)
+         hook = task.fail(error)
+         begin
+            @logger.error("Task #{ hook }ed: #{ task }")
+         rescue StandardError => e
+            warn "Task logging error: #{ e.message }"
+         end
+         hook
+      end
+   end
+end

data/lib/procrastinator/queue.rb CHANGED Viewed

@@ -14,37 +14,193 @@ module Procrastinator
    # @!attribute [r] :max_attempts
    #    @return [Object] Maximum number of attempts for tasks in this queue.
    # @!attribute [r] :update_period
-   #    @return [Pathname] Delay (seconds) between reloads of tasks from the task loader.
-   # @!attribute [r] :max_tasks
-   #    @return [Pathname] The maximum number of tasks to run concurrently within a queue worker process.
+   #    @return [Pathname] Delay (seconds) between reloads of tasks from the task store.
    class Queue
+      extend Forwardable
       DEFAULT_TIMEOUT       = 3600 # in seconds; one hour total
       DEFAULT_MAX_ATTEMPTS  = 20
       DEFAULT_UPDATE_PERIOD = 10 # seconds
-      DEFAULT_MAX_TASKS     = 10
-      attr_reader :name, :task_class, :max_attempts, :timeout, :update_period, :max_tasks
+      attr_reader :name, :max_attempts, :timeout, :update_period, :task_store, :task_class
+      alias store task_store
+      alias storage task_store
+      def_delegators :@task_store, :read, :update, :delete
       # Timeout is in seconds
-      def initialize(name:,
-                     task_class:,
+      def initialize(name:, task_class:,
                      max_attempts: DEFAULT_MAX_ATTEMPTS,
                      timeout: DEFAULT_TIMEOUT,
                      update_period: DEFAULT_UPDATE_PERIOD,
-                     max_tasks: DEFAULT_MAX_TASKS)
-         raise ArgumentError, ':name may not be nil' unless name
-         raise ArgumentError, ':task_class may not be nil' unless task_class
+                     store: TaskStore::SimpleCommaStore.new)
+         raise ArgumentError, ':name cannot be nil' unless name
+         raise ArgumentError, ':task_class cannot be nil' unless task_class
          raise ArgumentError, 'Task class must be initializable' unless task_class.respond_to? :new
-         raise ArgumentError, 'timeout cannot be negative' if timeout&.negative?
+         raise ArgumentError, ':timeout cannot be negative' if timeout&.negative?
          @name          = name.to_s.strip.gsub(/[^A-Za-z0-9]+/, '_').to_sym
          @task_class    = task_class
+         @task_store    = store
          @max_attempts  = max_attempts
          @timeout       = timeout
          @update_period = update_period
-         @max_tasks     = max_tasks
+         validate!
+         freeze
+      end
+      def next_task(logger: Logger.new(StringIO.new), container: nil, scheduler: nil)
+         metadata = next_metas.find(&:runnable?)
+         return nil unless metadata
+         task = Task.new(metadata, task_handler(data:      metadata.data,
+                                                container: container,
+                                                logger:    logger,
+                                                scheduler: scheduler))
+         LoggedTask.new(task, logger: logger)
+      end
+      def fetch_task(identifier)
+         identifier[:data] = JSON.dump(identifier[:data]) if identifier[:data]
+         tasks = read(**identifier)
+         raise "no task found matching #{ identifier }" if tasks.nil? || tasks.empty?
+         raise "too many (#{ tasks.size }) tasks match #{ identifier }. Found: #{ tasks }" if tasks.size > 1
+         TaskMetaData.new(tasks.first.merge(queue: self))
+      end
+      def create(run_at:, expire_at:, data:)
+         if data.nil? && expects_data?
+            raise ArgumentError, "task #{ @task_class } expects to receive :data. Provide :data to #delay."
+         end
+         unless data.nil? || expects_data?
+            raise MalformedTaskError, <<~ERROR
+               found unexpected :data argument. Either do not provide :data when scheduling a task,
+               or add this in the #{ @task_class } class definition:
+                     attr_accessor :data
+            ERROR
+         end
+         # TODO: shorten to using slice once updated to Ruby 2.5+
+         attrs = {queue: self, run_at: run_at, initial_run_at: run_at, expire_at: expire_at, data: JSON.dump(data)}
+         create_data = TaskMetaData.new(**attrs).to_h
+         create_data.delete(:id)
+         create_data.delete(:attempts)
+         create_data.delete(:last_fail_at)
+         create_data.delete(:last_error)
+         @task_store.create(**create_data)
+      end
+      def expects_data?
+         @task_class.method_defined?(:data=)
+      end
+      private
+      def task_handler(data: nil, container: nil, logger: nil, scheduler: nil)
+         handler           = @task_class.new
+         handler.data      = data if handler.respond_to?(:data=)
+         handler.container = container
+         handler.logger    = logger
+         handler.scheduler = scheduler
+         handler
+      end
+      def next_metas
+         tasks = read(queue: @name).reject { |t| t[:run_at].nil? }.collect do |t|
+            t.to_h.delete_if { |key| !TaskMetaData::EXPECTED_DATA.include?(key) }.merge(queue: self)
+         end
+         sort_tasks(tasks.collect { |t| TaskMetaData.new(**t) })
+      end
+      def sort_tasks(tasks)
+         # TODO: improve this
+         # shuffling and re-sorting to avoid worst case O(n^2) when receiving already sorted data
+         # on quicksort (which is default ruby sort). It is not unreasonable that the persister could return sorted
+         # results
+         # Ideally, we'd use a better algo than qsort for this, but this will do for now
+         tasks.shuffle.sort_by(&:run_at)
+      end
+      # Internal queue validator
+      module QueueValidation
+         def validate!
+            verify_task_class!
+            verify_task_store!
+         end
+         def verify_task_class!
+            verify_run_method!
+            verify_accessors!
+            verify_hooks!
+         end
+         # The interface compliance is checked on init because it's one of those rare cases where you want to know early;
+         # otherwise, you wouldn't know until task execution and that could be far in the future.
+         # UX is important for devs, too.
+         #    - R
+         def verify_run_method!
+            unless @task_class.method_defined? :run
+               raise MalformedTaskError, "task #{ @task_class } does not support #run method"
+            end
+            return unless @task_class.instance_method(:run).arity.positive?
+            raise MalformedTaskError, "task #{ @task_class } cannot require parameters to its #run method"
+         end
+         def verify_accessors!
+            [:logger, :container, :scheduler].each do |method_name|
+               next if @task_class.method_defined?(method_name) && @task_class.method_defined?("#{ method_name }=")
+               raise MalformedTaskError, <<~ERR
+                  Task handler is missing a #{ method_name } accessor. Add this to the #{ @task_class } class definition:
+                     attr_accessor :logger, :container, :scheduler
+               ERR
+            end
+         end
+         def verify_hooks!
+            expected_arity = 1
+            [:success, :fail, :final_fail].each do |method_name|
+               next unless @task_class.method_defined?(method_name)
+               next if @task_class.instance_method(method_name).arity == expected_arity
+               err = "task #{ @task_class } must accept #{ expected_arity } parameter to its ##{ method_name } method"
+               raise MalformedTaskError, err
+            end
+         end
+         def verify_task_store!
+            raise ArgumentError, ':store cannot be nil' if @task_store.nil?
+            [:read, :create, :update, :delete].each do |method|
+               unless @task_store.respond_to? method
+                  raise MalformedTaskStoreError, "task store #{ @task_store.class } must respond to ##{ method }"
+               end
+            end
+         end
       end
+      include QueueValidation
+   end
+   class MalformedTaskError < StandardError
+   end
+   class MalformedTaskStoreError < RuntimeError
    end
 end