RubyGems - chore-core - Versions diffs - 1.8.2 → 3.2.3 - Mend

chore-core 1.8.2 → 3.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +5 -5
data/README.md +6 -0
data/chore-core.gemspec +1 -0
data/lib/chore.rb +11 -5
data/lib/chore/cli.rb +21 -2
data/lib/chore/consumer.rb +15 -5
data/lib/chore/fetcher.rb +12 -7
data/lib/chore/hooks.rb +2 -1
data/lib/chore/job.rb +17 -0
data/lib/chore/manager.rb +18 -2
data/lib/chore/queues/filesystem/consumer.rb +116 -59
data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
data/lib/chore/queues/filesystem/publisher.rb +12 -18
data/lib/chore/queues/sqs/consumer.rb +6 -21
data/lib/chore/strategies/consumer/batcher.rb +8 -9
data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +3 -1
data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +121 -0
data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
data/lib/chore/strategies/worker/helpers/ipc.rb +88 -0
data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
data/lib/chore/strategies/worker/single_worker_strategy.rb +35 -13
data/lib/chore/unit_of_work.rb +8 -0
data/lib/chore/util.rb +5 -1
data/lib/chore/version.rb +3 -3
data/lib/chore/worker.rb +29 -0
data/spec/chore/cli_spec.rb +2 -2
data/spec/chore/consumer_spec.rb +0 -4
data/spec/chore/duplicate_detector_spec.rb +17 -5
data/spec/chore/fetcher_spec.rb +0 -11
data/spec/chore/manager_spec.rb +7 -0
data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +71 -11
data/spec/chore/queues/sqs/consumer_spec.rb +1 -3
data/spec/chore/strategies/consumer/batcher_spec.rb +50 -0
data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +1 -0
data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +16 -1
data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +25 -0
data/spec/chore/worker_spec.rb +69 -1
metadata +33 -5

data/lib/chore/queues/filesystem/filesystem_queue.rb CHANGED

@@ -6,6 +6,8 @@ module Chore::FilesystemQueue
   NEW_JOB_DIR = "new"
   # Local directory for jobs currently in-process to be moved
   IN_PROGRESS_DIR = "inprogress"
+  # Local directory for configuration info
+  CONFIG_DIR = "config"
   # Retrieves the directory for in-process messages to go. If the directory for the +queue_name+ doesn't exist,
   # it will be created for you. If the directory cannot be created, an IOError will be raised
@@ -29,6 +31,23 @@ module Chore::FilesystemQueue
     prepare_dir(File.join(root_dir, queue_name))
   end
+  # The configuration for the given queue
+  def config_dir(queue_name)
+    validate_dir(queue_name, CONFIG_DIR)
+  end
+  def config_value(queue_name, config_name)
+    config_file = File.join(config_dir(queue_name), config_name)
+    if File.exists?(config_file)
+      File.read(config_file).strip
+    end
+  end
+  # Returns the timeout for +queue_name+
+  def queue_timeout(queue_name)
+    (config_value(queue_name, 'timeout') || Chore.config.default_queue_timeout).to_i
+  end
   private
   # Returns the directory for the given +queue_name+ and +task_state+. If the directory doesn't exist, it will be
   # created for you. If the directory cannot be created, an IOError will be raised

data/lib/chore/queues/filesystem/publisher.rb CHANGED

@@ -10,27 +10,20 @@ module Chore
         # See the top of FilesystemConsumer for comments on how this works
         include FilesystemQueue
-        # Mutex for holding a lock over the files for this queue while they are in process
-        FILE_MUTEX = Mutex.new
         # use of mutex and file locking should make this both threadsafe and safe for multiple
         # processes to use the same queue directory simultaneously.
         def publish(queue_name,job)
-          FILE_MUTEX.synchronize do
-            while true
-              # keep trying to get a file with nothing in it meaning we just created it
-              # as opposed to us getting someone else's file that hasn't been processed yet.
-              f = File.open(filename(queue_name, job[:class].to_s), "w")
+          # First try encoding the job to avoid writing empty job files if this fails
+          encoded_job = encode_job(job)
+          published = false
+          while !published
+            # keep trying to get a file with nothing in it meaning we just created it
+            # as opposed to us getting someone else's file that hasn't been processed yet.
+            File.open(filename(queue_name, job[:class].to_s), "a") do |f|
               if f.flock(File::LOCK_EX | File::LOCK_NB) && f.size == 0
-                begin
-                  f.write(job.to_json)
-                rescue StandardError => e
-                  Chore.logger.error "#{e.class.name}: #{e.message}. Could not write #{job[:class]} job to '#{queue_name}' queue file."
-                  Chore.logger.error e.backtrace.join("\n")
-                ensure
-                  f.flock(File::LOCK_UN)
-                end
-                break
+                f.write(encoded_job)
+                published = true
               end
             end
           end
@@ -40,7 +33,8 @@ module Chore
         def filename(queue_name, job_name)
           now = Time.now.strftime "%Y%m%d-%H%M%S-%6N"
           previous_attempts = 0
-          File.join(new_dir(queue_name), "#{queue_name}-#{job_name}-#{now}.#{previous_attempts}.job")
+          pid = Process.pid
+          File.join(new_dir(queue_name), "#{queue_name}-#{job_name}-#{pid}-#{now}.#{previous_attempts}.job")
         end
       end
     end

data/lib/chore/queues/sqs/consumer.rb CHANGED

@@ -16,12 +16,11 @@ module Chore
         Chore::CLI.register_option 'aws_access_key', '--aws-access-key KEY', 'Valid AWS Access Key'
         Chore::CLI.register_option 'aws_secret_key', '--aws-secret-key KEY', 'Valid AWS Secret Key'
         Chore::CLI.register_option 'dedupe_servers', '--dedupe-servers SERVERS', 'List of mememcache compatible server(s) to use for storing SQS Message Dedupe cache'
-        Chore::CLI.register_option 'queue_polling_size', '--queue_polling_size NUM', Integer, 'Amount of messages to grab on each request' do |arg|
-          raise ArgumentError, "Cannot specify a queue polling size greater than 10" if arg > 10
-        end
         def initialize(queue_name, opts={})
           super(queue_name, opts)
+          raise Chore::TerribleMistake, "Cannot specify a queue polling size greater than 10" if sqs_polling_amount > 10
         end
         # Sets a flag that instructs the publisher to reset the connection the next time it's used
@@ -34,7 +33,7 @@ module Chore
           while running?
             begin
               messages = handle_messages(&handler)
-              sleep (Chore.config.consumer_sleep_interval || 1) if messages.empty?
+              sleep (Chore.config.consumer_sleep_interval) if messages.empty?
             rescue AWS::SQS::Errors::NonExistentQueue => e
               Chore.logger.error "You specified a queue '#{queue_name}' that does not exist. You must create the queue before starting Chore. Shutting down..."
               raise Chore::TerribleMistake
@@ -71,7 +70,7 @@ module Chore
           msg = queue.receive_messages(:limit => sqs_polling_amount, :attributes => [:receive_count])
           messages = *msg
           messages.each do |message|
-            unless duplicate_message?(message)
+            unless duplicate_message?(message.id, message.queue.url, queue_timeout)
               block.call(message.handle, queue_name, queue_timeout, message.body, message.receive_count - 1)
             end
             Chore.run_hooks_for(:on_fetch, message.handle, message.body)
@@ -79,18 +78,6 @@ module Chore
           messages
         end
-        # Checks if the given message has already been received within the timeout window for this queue
-        def duplicate_message?(message)
-          dupe_detector.found_duplicate?(:id=>message.id, :queue=>message.queue.url, :visibility_timeout=>message.queue.visibility_timeout)
-        end
-        # Returns the instance of the DuplicateDetector used to ensure unique messages.
-        # Will create one if one doesn't already exist
-        def dupe_detector
-          @dupes ||= DuplicateDetector.new({:servers => Chore.config.dedupe_servers,
-                                            :dupe_on_cache_failure => Chore.config.dupe_on_cache_failure})
-        end
         # Retrieves the SQS queue with the given +name+. The method will cache the results to prevent round trips on
         # subsequent calls. If <tt>reset_connection!</tt> has been called, this will result in the connection being
         # re-initialized, as well as clear any cached results from prior calls
@@ -116,13 +103,11 @@ module Chore
         def sqs
           @sqs ||= AWS::SQS.new(
             :access_key_id => Chore.config.aws_access_key,
-            :secret_access_key => Chore.config.aws_secret_key,
-            :logger => Chore.logger,
-            :log_level => :debug)
+            :secret_access_key => Chore.config.aws_secret_key)
         end
         def sqs_polling_amount
-          Chore.config.queue_polling_size || 10
+          Chore.config.queue_polling_size
         end
       end
     end

data/lib/chore/strategies/consumer/batcher.rb CHANGED

@@ -11,27 +11,27 @@ module Chore
         @size = size
         @batch = []
         @mutex = Mutex.new
-        @last_message = nil
         @callback = nil
         @running = true
       end
       # The main entry point of the Batcher, <tt>schedule</tt> begins a thread with the provided +batch_timeout+
       # as the only argument. While the Batcher is running, it will attempt to check if either the batch is full,
-      # or if the +batch_timeout+ has elapsed since the last batch was executed. If the batch is full, it will be executed.
-      # If the +batch_timeout+ has elapsed, as soon as the next message enters the batch, it will be executed.
+      # or if the +batch_timeout+ has elapsed since the oldest message was added. If either case is true, the
+      # items in the batch will be executed.
       #
       # Calling <tt>stop</tt> will cause the thread to finish it's current check, and exit
-      def schedule(batch_timeout=20)
+      def schedule(batch_timeout)
         @thread = Thread.new(batch_timeout) do |timeout|
           Chore.logger.info "Batching timeout thread starting"
           while @running do
             begin
-              Chore.logger.debug "Last message added to batch: #{@last_message}: #{@batch.size}"
-              if @last_message && Time.now > (@last_message + timeout)
-                Chore.logger.debug "Batching timeout reached (#{@last_message + timeout}), current size: #{@batch.size}"
+              oldest_item = @batch.first
+              timestamp = oldest_item && oldest_item.created_at
+              Chore.logger.debug "Oldest message in batch: #{timestamp}, size: #{@batch.size}"
+              if timestamp && Time.now > (timestamp + timeout)
+                Chore.logger.debug "Batching timeout reached (#{timestamp + timeout}), current size: #{@batch.size}"
                 self.execute(true)
-                @last_message = nil
               end
               sleep(1)
             rescue => e
@@ -44,7 +44,6 @@ module Chore
       # Adds the +item+ to the current batch
       def add(item)
         @batch << item
-        @last_message = Time.now
         execute if ready?
       end

data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb CHANGED

@@ -5,13 +5,14 @@ module Chore
       attr_accessor :batcher
       Chore::CLI.register_option 'batch_size', '--batch-size SIZE', Integer, 'Number of items to collect for a single worker to process'
+      Chore::CLI.register_option 'batch_timeout', '--batch-timeout SIZE', Integer, 'Maximum number of seconds to wait until processing a message'
       Chore::CLI.register_option 'threads_per_queue', '--threads-per-queue NUM_THREADS', Integer, 'Number of threads to create for each named queue'
       def initialize(fetcher)
         @fetcher = fetcher
         @batcher = Batcher.new(Chore.config.batch_size)
         @batcher.callback = lambda { |batch| @fetcher.manager.assign(batch) }
-        @batcher.schedule
+        @batcher.schedule(Chore.config.batch_timeout)
         @running = true
       end
@@ -63,6 +64,7 @@ module Chore
               Chore.logger.debug { "Got message: #{id}"}
               work = UnitOfWork.new(id, queue_name, queue_timeout, body, previous_attempts, consumer)
+              Chore.run_hooks_for(:consumed_from_source, work)
               @batcher.add(work)
             end
           rescue Chore::TerribleMistake

data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb ADDED

@@ -0,0 +1,121 @@
+module Chore
+  module Strategy
+    class ThrottledConsumerStrategy #:nodoc:
+      def initialize(fetcher)
+        @fetcher = fetcher
+        @queue = SizedQueue.new(Chore.config.num_workers)
+        @return_queue = Queue.new
+        @max_queue_size = Chore.config.num_workers
+        @consumers_per_queue = Chore.config.threads_per_queue
+        @running = true
+        @consumers = []
+      end
+      # Begins fetching from queues by spinning up the configured
+      # +:threads_per_queue:+ count of threads for each
+      # queue you're consuming from.
+      # Once all threads are spun up and running, the threads are then joined.
+      def fetch
+        Chore.logger.info "TCS: Starting up: #{self.class.name}"
+        threads = []
+        Chore.config.queues.each do |consume_queue|
+          Chore.logger.info "TCS: Starting #{@consumers_per_queue} threads for Queue #{consume_queue}"
+          @consumers_per_queue.times do
+            next unless running?
+            threads << consume(consume_queue)
+          end
+        end
+        threads.each(&:join)
+      end
+      # If the ThreadedConsumerStrategy is currently running <tt>stop!</tt>
+      # will begin signalling it to stop. It will stop the batcher
+      # from forking more work,as well as set a flag which will disable
+      # it's own consuming threads once they finish with their current work.
+      def stop!
+        if running?
+          Chore.logger.info "TCS: Shutting down fetcher: #{self.class.name}"
+          @running = false
+          @consumers.each do |consumer|
+            Chore.logger.info "TCS: Stopping consumer: #{consumer.object_id}"
+            @queue.clear
+            @return_queue.clear
+            consumer.stop
+          end
+        end
+      end
+      # Returns whether or not the ThreadedConsumerStrategy is running or not
+      def running?
+        @running
+      end
+      # return upto number_of_free_workers work objects
+      def provide_work(no_free_workers)
+        work_units = []
+        free_workers = [no_free_workers, @queue.size + @return_queue.size].min
+        while free_workers > 0
+          # Drain from the return queue first, then the consumer thread queue
+          queue = @return_queue.empty? ? @queue : @return_queue
+          work_units << queue.pop
+          free_workers -= 1
+        end
+        work_units
+      end
+      # Gives work back to the queue in case it couldn't be assigned
+      #
+      # This will go into a separate queue so that it will be prioritized
+      # over other work that hasn't been attempted yet.  It also avoids
+      # a deadlock where @queue is full and the master is waiting to return
+      # work that it couldn't assign.
+      def return_work(work_units)
+        work_units.each do |work|
+          @return_queue.push(work)
+        end
+      end
+      private
+      def consume(consume_queue)
+        consumer = Chore.config.consumer.new(consume_queue)
+        @consumers << consumer
+        start_consumer_thread(consumer)
+      end
+      # Starts a consumer thread for polling the given +consume_queue+.
+      # If <tt>stop!<tt> is called, the threads will shut themsevles down.
+      def start_consumer_thread(consumer)
+        t = Thread.new(consumer) do |th|
+          begin
+            create_work_units(th)
+          rescue Chore::TerribleMistake => e
+            Chore.logger.error 'Terrible mistake, shutting down Chore'
+            Chore.logger.error "#{e.inspect} at #{e.backtrace}"
+            @fetcher.manager.shutdown!
+          end
+        end
+        t
+      end
+      def create_work_units(consumer)
+        consumer.consume do |id, queue, timeout, body, previous_attempts|
+          # Note: The unit of work object contains a consumer object that when
+          # used to consume from SQS, would have a mutex (that comes as a part
+          # of the AWS sdk); When sending these objects across from one process
+          # to another, we cannot send this across (becasue of the mutex). To
+          # work around this, we simply ignore the consumer object when creating
+          # the unit of work object, and when the worker recieves the work
+          # object, it assigns it a consumer object.
+          # (to allow for communication back to the queue it was consumed from)
+          work = UnitOfWork.new(id, queue, timeout, body,
+                                previous_attempts)
+          Chore.run_hooks_for(:consumed_from_source, work)
+          @queue.push(work) if running?
+          Chore.run_hooks_for(:added_to_queue, work)
+        end
+      end
+    end # ThrottledConsumerStrategyyeah
+  end
+end # Chore

data/lib/chore/strategies/worker/forked_worker_strategy.rb CHANGED

@@ -3,6 +3,7 @@ require 'chore/signal'
 module Chore
   module Strategy
     class ForkedWorkerStrategy #:nodoc:
+      include Util
       attr_accessor :workers
       def initialize(manager, opts={})
@@ -63,6 +64,9 @@ module Chore
           pid = nil
           Chore.run_hooks_for(:around_fork,w) do
             pid = fork do
+              work.each do | item |
+                Chore.run_hooks_for(:fetched_off_internal_q, item)
+              end
               after_fork(w)
               Chore.run_hooks_for(:within_fork,w) do
                 Chore.run_hooks_for(:after_fork,w)
@@ -132,7 +136,7 @@ module Chore
       def after_fork(worker)
         # Immediately swap out the process name so that it doesn't look like
         # the master process
-        procline("Started:#{Time.now}")
+        procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
         clear_child_signals
         trap_child_signals(worker)
@@ -202,11 +206,6 @@ module Chore
         Kernel.fork(&block)
       end
-      def procline(str)
-        Chore.logger.info str
-        $0 = "chore-#{Chore::VERSION}:#{str}"
-      end
       def signal_children(sig, pids_to_signal = pids)
         pids_to_signal.each do |pid|
           begin

data/lib/chore/strategies/worker/helpers/ipc.rb ADDED

@@ -0,0 +1,88 @@
+require 'socket'
+module Chore
+  module Strategy
+    module Ipc #:nodoc:
+      BIG_ENDIAN = 'L>'.freeze
+      MSG_BYTES = 4
+      READY_MSG = 'R'
+      def create_master_socket
+        File.delete socket_file if File.exist? socket_file
+        UNIXServer.new(socket_file).tap do |socket|
+          socket_options(socket)
+        end
+      end
+      def child_connection(socket)
+        socket.accept
+      end
+      # Sending a message to a socket (must be a connected socket)
+      def send_msg(socket, msg)
+        raise 'send_msg cannot send empty messages' if msg.nil? || msg.size.zero?
+        message = Marshal.dump(msg)
+        encoded_size = [message.size].pack(BIG_ENDIAN)
+        encoded_message = "#{encoded_size}#{message}"
+        socket.send encoded_message, 0
+      end
+      # read a message from socket (must be a connected socket)
+      def read_msg(socket)
+        encoded_size = socket.recv(MSG_BYTES, Socket::MSG_PEEK)
+        return if encoded_size.nil? || encoded_size == ''
+        size = encoded_size.unpack(BIG_ENDIAN).first
+        encoded_message = socket.recv(MSG_BYTES + size)
+        Marshal.load(encoded_message[MSG_BYTES..-1])
+      rescue Errno::ECONNRESET => ex
+        Chore.logger.info "IPC: Connection was closed on socket #{socket}"
+        raise ex
+      end
+      def add_worker_socket
+        UNIXSocket.new(socket_file).tap do |socket|
+          socket_options(socket)
+        end
+      end
+      def clear_ready(socket)
+        _ = socket.gets
+      end
+      def signal_ready(socket)
+        socket.puts READY_MSG
+      rescue Errno::EPIPE => ex
+        Chore.logger.info 'IPC: Connection was shutdown by master'
+        raise ex
+      end
+      def select_sockets(sockets, self_pipe = nil, timeout = 0.5)
+        all_socks = [sockets, self_pipe].flatten.compact
+        IO.select(all_socks, nil, all_socks, timeout)
+      end
+      def delete_socket_file
+        File.unlink(socket_file)
+      rescue
+        nil
+      end
+      # Used for unit tests
+      def ipc_help
+        :available
+      end
+      private
+      # TODO: do we need this as a optional param
+      def socket_file
+        "./prefork_worker_sock-#{Process.pid}"
+      end
+      def socket_options(socket)
+        socket.setsockopt(:SOCKET, :REUSEADDR, true)
+      end
+    end
+  end
+end