RubyGems - chore-core - Versions diffs - 1.5.2 - Mend

chore-core 1.5.2

Files changed (59) hide show

checksums.yaml +15 -0
data/LICENSE.txt +20 -0
data/README.md +260 -0
data/Rakefile +32 -0
data/bin/chore +34 -0
data/chore-core.gemspec +46 -0
data/lib/chore/cli.rb +232 -0
data/lib/chore/configuration.rb +13 -0
data/lib/chore/consumer.rb +52 -0
data/lib/chore/duplicate_detector.rb +56 -0
data/lib/chore/fetcher.rb +31 -0
data/lib/chore/hooks.rb +25 -0
data/lib/chore/job.rb +103 -0
data/lib/chore/json_encoder.rb +18 -0
data/lib/chore/manager.rb +47 -0
data/lib/chore/publisher.rb +29 -0
data/lib/chore/queues/filesystem/consumer.rb +128 -0
data/lib/chore/queues/filesystem/filesystem_queue.rb +49 -0
data/lib/chore/queues/filesystem/publisher.rb +45 -0
data/lib/chore/queues/sqs/consumer.rb +121 -0
data/lib/chore/queues/sqs/publisher.rb +55 -0
data/lib/chore/queues/sqs.rb +38 -0
data/lib/chore/railtie.rb +18 -0
data/lib/chore/signal.rb +175 -0
data/lib/chore/strategies/consumer/batcher.rb +76 -0
data/lib/chore/strategies/consumer/single_consumer_strategy.rb +34 -0
data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +81 -0
data/lib/chore/strategies/worker/forked_worker_strategy.rb +221 -0
data/lib/chore/strategies/worker/single_worker_strategy.rb +39 -0
data/lib/chore/tasks/queues.task +11 -0
data/lib/chore/unit_of_work.rb +17 -0
data/lib/chore/util.rb +18 -0
data/lib/chore/version.rb +9 -0
data/lib/chore/worker.rb +117 -0
data/lib/chore-core.rb +1 -0
data/lib/chore.rb +218 -0
data/spec/chore/cli_spec.rb +182 -0
data/spec/chore/consumer_spec.rb +36 -0
data/spec/chore/duplicate_detector_spec.rb +62 -0
data/spec/chore/fetcher_spec.rb +38 -0
data/spec/chore/hooks_spec.rb +44 -0
data/spec/chore/job_spec.rb +80 -0
data/spec/chore/json_encoder_spec.rb +11 -0
data/spec/chore/manager_spec.rb +39 -0
data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +71 -0
data/spec/chore/queues/sqs/consumer_spec.rb +136 -0
data/spec/chore/queues/sqs/publisher_spec.rb +74 -0
data/spec/chore/queues/sqs_spec.rb +37 -0
data/spec/chore/signal_spec.rb +244 -0
data/spec/chore/strategies/consumer/batcher_spec.rb +93 -0
data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +23 -0
data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +105 -0
data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +281 -0
data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +36 -0
data/spec/chore/worker_spec.rb +134 -0
data/spec/chore_spec.rb +108 -0
data/spec/spec_helper.rb +58 -0
data/spec/test_job.rb +7 -0
metadata +194 -0

data/lib/chore/consumer.rb ADDED Viewed

@@ -0,0 +1,52 @@
+module Chore
+  # Raised when Chore is booting up, but encounters a set of configuration that is impossible to boot from. Typically
+  # you'll find additional information around the cause of the exception by examining the logfiles
+  class TerribleMistake < Exception
+    # You can raise this exception if your queue is in a terrible state and must shut down
+  end
+  # Base class for a Chore Consumer. Provides the basic interface to adhere to for building custom
+  # Chore Consumers.
+  class Consumer
+    attr_accessor :queue_name
+    def initialize(queue_name, opts={})
+      @queue_name = queue_name
+      @running = true
+    end
+    # Causes the underlying connection for all consumers of this class to be reset. Useful for the case where
+    # the consumer is being used across a fork. Should be overriden in consumers (but is not required).
+    def self.reset_connection!
+    end
+    # Consume takes a block with an arity of two. The two params are
+    # |message_id,message_body| where message_id is any object that the
+    # consumer will need to be able to act on a message later (reject, complete, etc)
+    def consume(&block)
+      raise NotImplementedError
+    end
+    # Reject should put a message back on a queue to be processed again later. It takes
+    # a message_id as returned via consume.
+    def reject(message_id)
+      raise NotImplementedError
+    end
+    # Complete should mark a message as finished. It takes a message_id as returned via consume
+    def complete(message_id)
+      raise NotImplementedError
+    end
+    # Perform any shutdown behavior and stop consuming messages
+    def stop
+      @running = false
+    end
+    # Returns true if the Consumer is currently running
+    def running?
+      @running
+    end
+  end
+end

data/lib/chore/duplicate_detector.rb ADDED Viewed

@@ -0,0 +1,56 @@
+module Chore
+  class DuplicateDetector #:nodoc:
+    def initialize(opts={})
+      # Make it optional. Only required when we use it.
+      begin
+        require 'dalli'
+      rescue LoadError => e
+        Chore.logger.error "Unable to load dalli gem. It is required if duplicate \
+  detection is enabled.  Install it with 'gem install dalli'."
+        raise e
+      end
+      memcached_options = {
+        :auto_eject_hosts    => false,
+        :cache_lookups       => false,
+        :tcp_nodelay         => true,
+        :socket_max_failures => 5,
+        :socket_timeout      => 2
+      }
+      @timeouts              = {}
+      @dupe_on_cache_failure = opts.fetch(:dupe_on_cache_failure) { false }
+      @timeout               = opts.fetch(:timeout) { 0 }
+      @servers               = opts.fetch(:servers) { nil }
+      @memcached_client      = opts.fetch(:memcached_client) { Dalli::Client.new(@servers, memcached_options) }
+    end
+    # Checks the message against the configured dedupe server to see if the message is unique or not
+    # Unique messages will return false
+    # Duplicated messages will return true
+    def found_duplicate?(msg)
+      return false unless msg && msg.respond_to?(:queue) && msg.queue
+      timeout = self.queue_timeout(msg.queue)
+      begin
+        !@memcached_client.add(msg.id, "1",timeout)
+      rescue StandardError => e
+        if @dupe_on_cache_failure
+          Chore.logger.error "Error accessing duplicate cache server. Assuming message is a duplicate. #{e}\n#{e.backtrace * "\n"}"
+          true
+        else
+          Chore.logger.error "Error accessing duplicate cache server. Assuming message is not a duplicate. #{e}\n#{e.backtrace * "\n"}"
+          false
+        end
+      end
+    end
+    # Retrieves the timeout for the given queue. The timeout is the window of time in seconds that
+    # we would consider the message to be non-unique, before we consider it dead in the water
+    # After that timeout, we would consider the next copy of the message received to be unique, and process it.
+    def queue_timeout(queue)
+      @timeouts[queue.url] ||= queue.visibility_timeout || @timeout
+    end
+  end
+end

data/lib/chore/fetcher.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module Chore
+  class Fetcher #:nodoc:
+    attr_reader :manager, :consumers
+    def initialize(manager)
+      @stopping = false
+      @manager = manager
+      @strategy = Chore.config.consumer_strategy.new(self)
+    end
+    # Starts the fetcher with the configured Consumer Strategy. This will begin consuming messages from your queue
+    def start
+      Chore.logger.info "Fetcher starting up"
+      @strategy.fetch
+    end
+    # Stops the fetcher, preventing any further messages from being pulled from the queue
+    def stop!
+      unless @stopping
+        Chore.logger.info "Fetcher shutting down"
+        @stopping = true
+        @strategy.stop!
+      end
+    end
+    # Determines in the fetcher is in the process of stopping
+    def stopping?
+      @stopping
+    end
+  end
+end

data/lib/chore/hooks.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Chore
+  # Abstracts the notion of registering and running hooks during certain points in the lifecycle of chore
+  # processing work.
+  module Hooks
+    # Helper method to look up, and execute hooks based on an event name.
+    # Hooks are assumed to be methods defined on `self` that are of the pattern
+    # hook_name_identifier. ex: before_perform_log
+    def run_hooks_for(event,*args)
+      results = global_hooks_for(event).map { |prc| prc.call(*args) } || [true]
+      results << hooks_for(event).map { |method| send(method,*args) }
+      results = false if results.any? {|r| false == r }
+      results
+    end
+  private
+    def hooks_for(event)
+      (self.methods - Object.methods).grep(/^#{event}/).sort
+    end
+    def global_hooks_for(event)
+      Chore.hooks_for(event)
+    end
+  end
+end

data/lib/chore/job.rb ADDED Viewed

@@ -0,0 +1,103 @@
+require 'chore/hooks'
+module Chore
+  # <tt>Chore::Job</tt> is the module which gives your job classes the methods they need to be published
+  # and run within Chore. You cannot have a Job in Chore that does not include this module
+  module Job
+    # An exception to represent a job choosing to forcibly reject a given instance of itself.
+    # The reasoning behind rejecting the job and the message that spawned it are left to
+    # the developer to dedide to use or not to use.
+    class RejectMessageException < Exception
+      # Throw a RejectMessageException from your job to signal that the message should be rejected.
+      # The semantics of +reject+ are queue implementation dependent.
+    end
+    def self.job_classes #:nodoc:
+      @classes || []
+    end
+    def self.included(base) #:nodoc:
+      @classes ||= []
+      @classes << base.name
+      base.extend(ClassMethods)
+      base.extend(Hooks)
+    end
+    module ClassMethods
+      DEFAULT_OPTIONS = { }
+      # Pass a hash of options to queue_options the included class's use of Chore::Job
+      # +opts+ has just the one required option.
+      # * +:name+: which should map to the name of the queue this job should be published to.
+      def queue_options(opts = {})
+        @chore_options = (@chore_options || DEFAULT_OPTIONS).merge(opts_from_cli).merge(opts)
+        required_options.each do |k|
+          raise ArgumentError.new("#{self.to_s} :#{k} is a required option for Chore::Job") unless @chore_options[k]
+        end
+      end
+      # This is a method so it can be overriden to create additional required
+      # queue_options params.  This also determines what options get pulled
+      # from the global Chore.config.
+      def required_options
+        [:name, :publisher, :max_attempts]
+      end
+      def options #:nodoc:#
+        @chore_options ||= queue_options
+      end
+      def opts_from_cli #:nodoc:#
+        @from_cli ||= (Chore.config.marshal_dump.select {|k,v| required_options.include? k } || {})
+      end
+      # Execute the current job. We create an instance of the job to do the perform
+      # as this allows the jobs themselves to do initialization that might require access
+      # to the parameters of the job.
+      def perform(*args)
+        job = self.new(args)
+        job.perform(*args)
+      end
+      # Publish a job using an instance of job. Similar to perform we do this so that a job
+      # can perform initialization logic before the perform_async is begun. This, in addition, to
+      # hooks allows for rather complex jobs to be written simply.
+      def perform_async(*args)
+        job = self.new(args)
+        job.perform_async(*args)
+      end
+      # Resque/Sidekiq compatible serialization. No reason to change what works
+      def job_hash(job_params)
+        {:class => self.to_s, :args => job_params}
+      end
+      # The name of the configured queue, combined with an optional prefix
+      def prefixed_queue_name
+        "#{Chore.config.queue_prefix}#{self.options[:name]}"
+      end
+    end #ClassMethods
+    # This is handy to override in an included job to be able to do job setup that requires
+    # access to a job's arguments to be able to perform any context specific initialization that may
+    # be required.
+    def initialize(args=nil)
+    end
+    # This needs to be overriden by the object that is including this module.
+    def perform(*args)
+      raise NotImplementedError
+    end
+    # Use the current configured publisher to send this job into a queue.
+    def perform_async(*args)
+      self.class.run_hooks_for(:before_publish,*args)
+      @chore_publisher ||= self.class.options[:publisher]
+      @chore_publisher.publish(self.class.prefixed_queue_name,self.class.job_hash(args))
+      self.class.run_hooks_for(:after_publish,*args)
+    end
+  end #Job
+end #Chore

data/lib/chore/json_encoder.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'json'
+module Chore
+  # Json encoding for serializing jobs.
+  module JsonEncoder
+    class << self
+      # Encodes the +job+ into JSON using the standard ruby JSON parsing library
+      def encode(job)
+        JSON.generate(job.to_hash)
+      end
+      # Decodes the +job+ from JSON into a ruby Hash using the standard ruby JSON parsing library
+      def decode(job)
+        JSON.parse(job)
+      end
+    end
+  end
+end

data/lib/chore/manager.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require 'json'
+require 'chore/worker'
+require 'chore/fetcher'
+module Chore
+  # Manages the interactions between fetching messages (Consumer Strategy), and working over them (Worker Strategy)
+  class Manager
+    def initialize()
+      Chore.logger.info "Booting Chore #{Chore::VERSION}"
+      Chore.logger.debug { Chore.config.inspect }
+      @started_at = nil
+      @worker_strategy = Chore.config.worker_strategy.new(self)
+      @fetcher = Chore.config.fetcher.new(self)
+      @processed = 0
+      @stopping = false
+    end
+    # Start the Manager. This calls both the #start method of the configured Worker Strategy, as well as Fetcher#start.
+    def start
+      @started_at = Time.now
+      @worker_strategy.start
+      @fetcher.start
+    end
+    # Shut down the Manager, the Worker Strategy, and the Fetcher. This calls the +:before_shutdown+ hook.
+    def shutdown!
+      unless @stopping
+        Chore.logger.info "Manager shutting down"
+        @stopping = true
+        Chore.run_hooks_for(:before_shutdown)
+        @fetcher.stop!
+        @worker_strategy.stop!
+      end
+    end
+    # Take in an amount of +work+ (either an Array of, or a single UnitOfWork), and pass it down for the
+    # worker strategy to process. <b>This method is blocking</b>. It will continue to attempt to assign the work via
+    # the worker strategy, until it accepts it. It is up to the strategy to determine what cases it is allowed to accept
+    # work. The blocking semantic of this method is to prevent the Fetcher from getting messages off of the queue faster
+    # than they can be consumed.
+    def assign(work)
+      Chore.logger.debug { "Manager#assign: No. of UnitsOfWork: #{work.length})" }
+      @worker_strategy.assign(work) unless @stopping
+    end
+  end
+end

data/lib/chore/publisher.rb ADDED Viewed

@@ -0,0 +1,29 @@
+module Chore
+  # Base class for Chore Publishers. Provides the bare interface one needs to adhere to when writing custom publishers
+  class Publisher
+    DEFAULT_OPTIONS = { :encoder => JsonEncoder }
+    attr_accessor :options
+    def initialize(opts={})
+      self.options = DEFAULT_OPTIONS.merge(opts)
+    end
+    # Publishes the provided +job+ to the queue identified by the +queue_name+. Not designed to be used directly, this
+    # method ferries to the publish method on an instance of your configured Publisher.
+    def self.publish(queue_name,job)
+      self.new.publish(queue_name,job)
+    end
+    # Raises a NotImplementedError. This method should be overridden in your descendent, custom publisher class
+    def publish(queue_name,job)
+      raise NotImplementedError
+    end
+  protected
+    def encode_job(job)
+      options[:encoder].encode(job)
+    end
+  end
+end

data/lib/chore/queues/filesystem/consumer.rb ADDED Viewed

@@ -0,0 +1,128 @@
+require 'fileutils'
+require 'chore/queues/filesystem/filesystem_queue'
+module Chore
+  module Queues
+    module Filesystem
+      # This is the consuming side of the file system queue. This class consumes jobs created by
+      # FilesystemPublisher#publish.  The root of the file system queue is configured in
+      # Chore.config.fs_queue_root. In there a directory will be created for each queue name.
+      # Each queue directory contains a directory called "new" and one called "inprogress".
+      # FilesystemPublisher#publish creates new job files in the "new" directory. This consumer
+      # polls that directory every 5 seconds for new jobs which are moved to "inprogress".
+      #
+      # Once complete job files are deleted.
+      # If rejected they are moved back into new and will be processed again.  This may not be the
+      # desired behavior long term and we may want to add configuration to this class to allow more
+      # creating failure handling and retrying.
+      class Consumer < Chore::Consumer
+        include FilesystemQueue
+        Chore::CLI.register_option 'fs_queue_root', '--fs-queue-root DIRECTORY', 'Root directory for fs based queue'
+        FILE_QUEUE_MUTEXES = {}
+        # The amount of time units of work can run before the queue considers
+        # them timed out.  For filesystem queues, this is the global default.
+        attr_reader :queue_timeout
+        def initialize(queue_name, opts={})
+          super(queue_name, opts)
+          # Even though putting these Mutexes in this hash is, by itself, not particularly threadsafe
+          # as long as some Mutex ends up in the queue after all consumers are created we're good
+          # as they are pulled from the queue and synchronized for file operations below
+          FILE_QUEUE_MUTEXES[@queue_name] ||= Mutex.new
+          @in_progress_dir = in_progress_dir(queue_name)
+          @new_dir = new_dir(queue_name)
+          @queue_timeout = Chore.config.default_queue_timeout
+        end
+        def consume(&handler)
+          Chore.logger.info "Starting consuming file system queue #{@queue_name} in #{queue_dir(queue_name)}"
+          while running?
+            begin
+              #TODO move expired job files to new directory?
+              handle_jobs(&handler)
+            rescue => e
+              Chore.logger.error { "#{self.class}#consume: #{e} #{e.backtrace * "\n"}" }
+            ensure
+              sleep 5
+            end
+          end
+        end
+        def reject(id)
+          Chore.logger.debug "Rejecting: #{id}"
+          make_new_again(id)
+        end
+        def complete(id)
+          Chore.logger.debug "Completing (deleting): #{id}"
+          FileUtils.rm(File.join(@in_progress_dir, id))
+        end
+        private
+        # finds all new job files, moves them to in progress and starts the job
+        # Returns a list of the job files processed
+        def handle_jobs(&block)
+          # all consumers on a single queue share a lock on handling files.
+          # Each consumer comes along, processes all present files and release the lock.
+          # This isn't particularly useful but is here to allow the configuration of
+          # ThreadedConsumerStrategy with mutiple threads on a queue safely although you
+          # probably wouldn't want to do that.
+          FILE_QUEUE_MUTEXES[@queue_name].synchronize do
+            job_files.each do |job_file|
+              Chore.logger.debug "Found a new job #{job_file}"
+              job_json = File.read(make_in_progress(job_file))
+              basename, previous_attempts = file_info(job_file)
+              # job_file is just the name which is the job id
+              block.call(job_file, queue_name, queue_timeout, job_json, previous_attempts)
+              Chore.run_hooks_for(:on_fetch, job_file, job_json)
+            end
+          end
+        end
+        def make_in_progress(job)
+          move_job(File.join(@new_dir, job), File.join(@in_progress_dir, job))
+        end
+        def make_new_again(job)
+          basename, previous_attempts = file_info(job)
+          move_job(File.join(@in_progress_dir, job), File.join(@new_dir, "#{basename}.#{previous_attempts + 1}.job"))
+        end
+        # Moves job file to inprogress directory and returns the full path
+        def move_job(from, to)
+          f = File.open(from, "r")
+          # wait on the lock a publisher in another process might have.
+          # Once we get the lock the file is ours to move to mark it in progress
+          f.flock(File::LOCK_EX)
+          begin
+            FileUtils.mv(f.path, to)
+          ensure
+            f.flock(File::LOCK_UN) # yes we can unlock it after its been moved, I checked
+          end
+          to
+        end
+        def job_files
+          Dir.entries(@new_dir).select{|e| ! e.start_with?(".")}
+        end
+        # Grabs the unique identifier for the job filename and the number of times
+        # it's been attempted (also based on the filename)
+        def file_info(job_file)
+          id, previous_attempts = File.basename(job_file, '.job').split('.')
+          [id, previous_attempts.to_i]
+        end
+      end
+    end
+  end
+end

data/lib/chore/queues/filesystem/filesystem_queue.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# Common methods used by FilesystemConsumer and FilesystemPublisher for dealing with the
+# directories which implement the queue.
+module Chore::FilesystemQueue
+  # Local directory for new jobs to be placed
+  NEW_JOB_DIR = "new"
+  # Local directory for jobs currently in-process to be moved
+  IN_PROGRESS_DIR = "inprogress"
+  # Retrieves the directory for in-process messages to go. If the directory for the +queue_name+ doesn't exist,
+  # it will be created for you. If the directory cannot be created, an IOError will be raised
+  def in_progress_dir(queue_name)
+    validate_dir(queue_name, IN_PROGRESS_DIR)
+  end
+  # Retrieves the directory for newly recieved messages to go. If the directory for the +queue_name+ doesn't exist,
+  # it will be created for you. If the directory cannot be created, an IOError will be raised
+  def new_dir(queue_name)
+    validate_dir(queue_name, NEW_JOB_DIR)
+  end
+  # Returns the root directory where messages are placed
+  def root_dir
+    @root_dir ||= prepare_dir(File.expand_path(Chore.config.fs_queue_root))
+  end
+  # Returns the fully qualified path to the directory for +queue_name+
+  def queue_dir(queue_name)
+    prepare_dir(File.join(root_dir, queue_name))
+  end
+  private
+  # Returns the directory for the given +queue_name+ and +task_state+. If the directory doesn't exist, it will be
+  # created for you. If the directory cannot be created, an IOError will be raised
+  def validate_dir(queue_name, task_state)
+    prepare_dir(File.join(queue_dir(queue_name), task_state))
+  end
+  # Creates a directory if it does not exist. Returns the directory
+  def prepare_dir(dir)
+    unless Dir.exists?(dir)
+      FileUtils.mkdir_p(dir)
+    end
+    raise IOError.new("directory for file system queue does not have write permission: #{dir}") unless File.writable?(dir)
+    dir
+  end
+end

data/lib/chore/queues/filesystem/publisher.rb ADDED Viewed

@@ -0,0 +1,45 @@
+require 'chore/queues/filesystem/filesystem_queue'
+module Chore
+  module Queues
+    module Filesystem
+      # Publisher for writing jobs to the local filesystem. Useful for testing in offline environments or
+      # when queuing implementations are irrelevent to the task at hand, such as local development of new jobs.
+      class Publisher < Chore::Publisher
+        # See the top of FilesystemConsumer for comments on how this works
+        include FilesystemQueue
+        # Mutex for holding a lock over the files for this queue while they are in process
+        FILE_MUTEX = Mutex.new
+        # use of mutex and file locking should make this both threadsafe and safe for multiple
+        # processes to use the same queue directory simultaneously.
+        def publish(queue_name,job)
+          FILE_MUTEX.synchronize do
+            while true
+              # keep trying to get a file with nothing in it meaning we just created it
+              # as opposed to us getting someone else's file that hasn't been processed yet.
+              f = File.open(filename(queue_name, job[:class].to_s), "w")
+              if f.flock(File::LOCK_EX | File::LOCK_NB) && f.size == 0
+                begin
+                  f.write(job.to_json)
+                ensure
+                  f.flock(File::LOCK_UN)
+                  break
+                end
+              end
+            end
+          end
+        end
+        # create a unique filename for a job in a queue based on queue name, job name and date
+        def filename(queue_name, job_name)
+          now = Time.now.strftime "%Y%m%d-%H%M%S-%6N"
+          previous_attempts = 0
+          File.join(new_dir(queue_name), "#{queue_name}-#{job_name}-#{now}.#{previous_attempts}.job")
+        end
+      end
+    end
+  end
+end