RubyGems - chore-core - Versions diffs - 1.10.0 → 4.0.0 - Mend

chore-core 1.10.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

checksums.yaml +5 -13
data/LICENSE.txt +1 -1
data/README.md +172 -153
data/chore-core.gemspec +3 -3
data/lib/chore.rb +29 -5
data/lib/chore/cli.rb +22 -4
data/lib/chore/configuration.rb +1 -1
data/lib/chore/consumer.rb +54 -12
data/lib/chore/fetcher.rb +12 -7
data/lib/chore/hooks.rb +2 -1
data/lib/chore/job.rb +19 -0
data/lib/chore/manager.rb +17 -2
data/lib/chore/publisher.rb +18 -2
data/lib/chore/queues/filesystem/consumer.rb +126 -64
data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
data/lib/chore/queues/filesystem/publisher.rb +10 -16
data/lib/chore/queues/sqs.rb +22 -13
data/lib/chore/queues/sqs/consumer.rb +64 -51
data/lib/chore/queues/sqs/publisher.rb +26 -17
data/lib/chore/strategies/consumer/batcher.rb +6 -6
data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +7 -6
data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
data/lib/chore/unit_of_work.rb +2 -1
data/lib/chore/util.rb +5 -1
data/lib/chore/version.rb +2 -2
data/lib/chore/worker.rb +30 -3
data/spec/chore/cli_spec.rb +2 -2
data/spec/chore/consumer_spec.rb +1 -5
data/spec/chore/duplicate_detector_spec.rb +17 -5
data/spec/chore/fetcher_spec.rb +0 -11
data/spec/chore/manager_spec.rb +7 -0
data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
data/spec/chore/queues/sqs_spec.rb +32 -41
data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +6 -6
data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +6 -1
data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +1 -1
data/spec/chore/worker_spec.rb +70 -15
data/spec/spec_helper.rb +1 -1
data/spec/support/queues/sqs/fake_objects.rb +18 -0
metadata +53 -29

data/lib/chore/queues/sqs/publisher.rb CHANGED

@@ -3,21 +3,26 @@ require 'chore/publisher'
 module Chore
   module Queues
     module SQS
       # SQS Publisher, for writing messages to SQS from Chore
       class Publisher < Chore::Publisher
         @@reset_next = true
+        # @param [Hash] opts Publisher options
         def initialize(opts={})
           super
           @sqs_queues = {}
           @sqs_queue_urls = {}
         end
-        # Takes a given Chore::Job instance +job+, and publishes it by looking up the +queue_name+.
+        # Publishes a message to an SQS queue
+        #
+        # @param [String] queue_name Name of the SQS queue
+        # @param [Hash] job Job instance definition, will be encoded to JSON
+        #
+        # @return [struct Aws::SQS::Types::SendMessageResult]
         def publish(queue_name,job)
-          queue = self.queue(queue_name)
-          queue.send_message(encode_job(job))
+          queue = queue(queue_name)
+          queue.send_message(message_body: encode_job(job))
         end
         # Sets a flag that instructs the publisher to reset the connection the next time it's used
@@ -25,29 +30,33 @@ module Chore
           @@reset_next = true
         end
-        # Access to the configured SQS connection object
+        private
+        # SQS API client object
+        #
+        # @return [Aws::SQS::Client]
         def sqs
-          @sqs ||= AWS::SQS.new(
-            :access_key_id => Chore.config.aws_access_key,
-            :secret_access_key => Chore.config.aws_secret_key,
-            :logger => Chore.logger,
-            :log_level => :debug)
+          @sqs ||= Chore::Queues::SQS.sqs_client
         end
-        # Retrieves the SQS queue with the given +name+. The method will cache the results to prevent round trips on subsequent calls
+        # Retrieves the SQS queue object. The method will cache the results to prevent round trips on subsequent calls
+        #
         # If <tt>reset_connection!</tt> has been called, this will result in the connection being re-initialized,
         # as well as clear any cached results from prior calls
+        #
+        # @param [String] name Name of SQS queue
+        #
+        # @return [Aws::SQS::Queue]
         def queue(name)
-         if @@reset_next
-            AWS::Core::Http::ConnectionPool.pools.each do |p|
-              p.empty!
-            end
+          if @@reset_next
+            Aws.empty_connection_pools!
             @sqs = nil
             @@reset_next = false
             @sqs_queues = {}
           end
-          @sqs_queue_urls[name] ||= self.sqs.queues.url_for(name)
-          @sqs_queues[name] ||= self.sqs.queues[@sqs_queue_urls[name]]
+          @sqs_queue_urls[name] ||= sqs.get_queue_url(queue_name: name).queue_url
+          @sqs_queues[name] ||= Aws::SQS::Queue.new(url: @sqs_queue_urls[name], client: sqs)
         end
       end
     end

data/lib/chore/strategies/consumer/batcher.rb CHANGED

@@ -15,17 +15,17 @@ module Chore
         @running = true
       end
-      # The main entry point of the Batcher, <tt>schedule</tt> begins a thread with the provided +batch_timeout+
-      # as the only argument. While the Batcher is running, it will attempt to check if either the batch is full,
+      # The main entry point of the Batcher, <tt>schedule</tt> begins a thread with the provided +batch_timeout+
+      # as the only argument. While the Batcher is running, it will attempt to check if either the batch is full,
       # or if the +batch_timeout+ has elapsed since the oldest message was added. If either case is true, the
       # items in the batch will be executed.
-      #
+      #
       # Calling <tt>stop</tt> will cause the thread to finish it's current check, and exit
       def schedule(batch_timeout)
         @thread = Thread.new(batch_timeout) do |timeout|
-          Chore.logger.info "Batching timeout thread starting"
+          Chore.logger.info "Batching thread starting with #{batch_timeout} second timeout"
           while @running do
-            begin
+            begin
               oldest_item = @batch.first
               timestamp = oldest_item && oldest_item.created_at
               Chore.logger.debug "Oldest message in batch: #{timestamp}, size: #{@batch.size}"
@@ -33,7 +33,7 @@ module Chore
                 Chore.logger.debug "Batching timeout reached (#{timestamp + timeout}), current size: #{@batch.size}"
                 self.execute(true)
               end
-              sleep(1)
+              sleep(1)
             rescue => e
               Chore.logger.error "Batcher#schedule raised an exception: #{e.inspect}"
             end

data/lib/chore/strategies/consumer/single_consumer_strategy.rb CHANGED

@@ -10,16 +10,16 @@ module Chore
       end
       # Begins fetching from the configured queue by way of the configured Consumer. This can only be used if you have a
-      # single queue which can be kept up with at a relatively low volume. If you have more than a single queue configured,
-      # it will raise an exception.
+      # single queue which can be kept up with at a relatively low volume. If you have more than a single queue
+      # configured, it will raise an exception.
       def fetch
         Chore.logger.debug "Starting up consumer strategy: #{self.class.name}"
         queues = Chore.config.queues
         raise "When using SingleConsumerStrategy only one queue can be defined. Queues: #{queues}" unless queues.size == 1
         @consumer = Chore.config.consumer.new(queues.first)
-        @consumer.consume do |id,queue_name,queue_timeout,body,previous_attempts|
-          work = UnitOfWork.new(id, queue_name, queue_timeout, body, previous_attempts, @consumer)
+        @consumer.consume do |message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts|
+          work = UnitOfWork.new(message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts, @consumer)
           @fetcher.manager.assign(work)
         end
       end

data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb CHANGED

@@ -23,7 +23,7 @@ module Chore
         Chore.logger.debug "Starting up consumer strategy: #{self.class.name}"
         threads = []
         Chore.config.queues.each do |queue|
-          Chore.config.threads_per_queue.times do
+          Chore.config.threads_per_queue.times do
             if running?
               threads << start_consumer_thread(queue)
             end
@@ -32,7 +32,7 @@ module Chore
         threads.each(&:join)
       end
       # If the ThreadedConsumerStrategy is currently running <tt>stop!</tt> will begin signalling it to stop
       # It will stop the batcher from forking more work, as well as set a flag which will disable it's own consuming
       # threads once they finish with their current work.
@@ -49,21 +49,22 @@ module Chore
         @running
       end
-      private
+      private
       # Starts a consumer thread for polling the given +queue+.
       # If <tt>stop!<tt> is called, the threads will shut themsevles down.
       def start_consumer_thread(queue)
         t = Thread.new(queue) do |tQueue|
           begin
             consumer = Chore.config.consumer.new(tQueue)
-            consumer.consume do |id, queue_name, queue_timeout, body, previous_attempts|
+            consumer.consume do |message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts|
               # Quick hack to force this thread to end it's work
               # if we're shutting down. Could be delayed due to the
               # weird sometimes-blocking nature of SQS.
               consumer.stop if !running?
-              Chore.logger.debug { "Got message: #{id}"}
+              Chore.logger.debug { "Got message: #{message_id}"}
-              work = UnitOfWork.new(id, queue_name, queue_timeout, body, previous_attempts, consumer)
+              work = UnitOfWork.new(message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts, consumer)
+              Chore.run_hooks_for(:consumed_from_source, work)
               @batcher.add(work)
             end
           rescue Chore::TerribleMistake

data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb ADDED

@@ -0,0 +1,120 @@
+module Chore
+  module Strategy
+    class ThrottledConsumerStrategy #:nodoc:
+      def initialize(fetcher)
+        @fetcher = fetcher
+        @queue = SizedQueue.new(Chore.config.num_workers)
+        @return_queue = Queue.new
+        @max_queue_size = Chore.config.num_workers
+        @consumers_per_queue = Chore.config.threads_per_queue
+        @running = true
+        @consumers = []
+      end
+      # Begins fetching from queues by spinning up the configured
+      # +:threads_per_queue:+ count of threads for each
+      # queue you're consuming from.
+      # Once all threads are spun up and running, the threads are then joined.
+      def fetch
+        Chore.logger.info "TCS: Starting up: #{self.class.name}"
+        threads = []
+        Chore.config.queues.each do |consume_queue|
+          Chore.logger.info "TCS: Starting #{@consumers_per_queue} threads for Queue #{consume_queue}"
+          @consumers_per_queue.times do
+            next unless running?
+            threads << consume(consume_queue)
+          end
+        end
+        threads.each(&:join)
+      end
+      # If the ThreadedConsumerStrategy is currently running <tt>stop!</tt>
+      # will begin signalling it to stop. It will stop the batcher
+      # from forking more work,as well as set a flag which will disable
+      # it's own consuming threads once they finish with their current work.
+      def stop!
+        if running?
+          Chore.logger.info "TCS: Shutting down fetcher: #{self.class.name}"
+          @running = false
+          @consumers.each do |consumer|
+            Chore.logger.info "TCS: Stopping consumer: #{consumer.object_id}"
+            @queue.clear
+            @return_queue.clear
+            consumer.stop
+          end
+        end
+      end
+      # Returns whether or not the ThreadedConsumerStrategy is running or not
+      def running?
+        @running
+      end
+      # return upto number_of_free_workers work objects
+      def provide_work(no_free_workers)
+        work_units = []
+        free_workers = [no_free_workers, @queue.size + @return_queue.size].min
+        while free_workers > 0
+          # Drain from the return queue first, then the consumer thread queue
+          queue = @return_queue.empty? ? @queue : @return_queue
+          work_units << queue.pop
+          free_workers -= 1
+        end
+        work_units
+      end
+      # Gives work back to the queue in case it couldn't be assigned
+      #
+      # This will go into a separate queue so that it will be prioritized
+      # over other work that hasn't been attempted yet.  It also avoids
+      # a deadlock where @queue is full and the master is waiting to return
+      # work that it couldn't assign.
+      def return_work(work_units)
+        work_units.each do |work|
+          @return_queue.push(work)
+        end
+      end
+      private
+      def consume(consume_queue)
+        consumer = Chore.config.consumer.new(consume_queue)
+        @consumers << consumer
+        start_consumer_thread(consumer)
+      end
+      # Starts a consumer thread for polling the given +consume_queue+.
+      # If <tt>stop!<tt> is called, the threads will shut themsevles down.
+      def start_consumer_thread(consumer)
+        t = Thread.new(consumer) do |th|
+          begin
+            create_work_units(th)
+          rescue Chore::TerribleMistake => e
+            Chore.logger.error 'Terrible mistake, shutting down Chore'
+            Chore.logger.error "#{e.inspect} at #{e.backtrace}"
+            @fetcher.manager.shutdown!
+          end
+        end
+        t
+      end
+      def create_work_units(consumer)
+        consumer.consume do |message_id, message_receipt_handle, queue, timeout, body, previous_attempts|
+          # Note: The unit of work object contains a consumer object that when
+          # used to consume from SQS, would have a mutex (that comes as a part
+          # of the AWS sdk); When sending these objects across from one process
+          # to another, we cannot send this across (becasue of the mutex). To
+          # work around this, we simply ignore the consumer object when creating
+          # the unit of work object, and when the worker recieves the work
+          # object, it assigns it a consumer object.
+          # (to allow for communication back to the queue it was consumed from)
+          work = UnitOfWork.new(message_id, message_receipt_handle, queue, timeout, body, previous_attempts)
+          Chore.run_hooks_for(:consumed_from_source, work)
+          @queue.push(work) if running?
+          Chore.run_hooks_for(:added_to_queue, work)
+        end
+      end
+    end # ThrottledConsumerStrategy
+  end
+end # Chore

data/lib/chore/strategies/worker/forked_worker_strategy.rb CHANGED

@@ -3,6 +3,7 @@ require 'chore/signal'
 module Chore
   module Strategy
     class ForkedWorkerStrategy #:nodoc:
+      include Util
       attr_accessor :workers
       def initialize(manager, opts={})
@@ -63,6 +64,9 @@ module Chore
           pid = nil
           Chore.run_hooks_for(:around_fork,w) do
             pid = fork do
+              work.each do | item |
+                Chore.run_hooks_for(:fetched_off_internal_q, item)
+              end
               after_fork(w)
               Chore.run_hooks_for(:within_fork,w) do
                 Chore.run_hooks_for(:after_fork,w)
@@ -132,7 +136,7 @@ module Chore
       def after_fork(worker)
         # Immediately swap out the process name so that it doesn't look like
         # the master process
-        procline("Started:#{Time.now}")
+        procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
         clear_child_signals
         trap_child_signals(worker)
@@ -202,11 +206,6 @@ module Chore
         Kernel.fork(&block)
       end
-      def procline(str)
-        Chore.logger.info str
-        $0 = "chore-#{Chore::VERSION}:#{str}"
-      end
       def signal_children(sig, pids_to_signal = pids)
         pids_to_signal.each do |pid|
           begin

data/lib/chore/strategies/worker/helpers/ipc.rb ADDED

@@ -0,0 +1,87 @@
+require 'socket'
+module Chore
+  module Strategy
+    module Ipc #:nodoc:
+      BIG_ENDIAN = 'L>'.freeze
+      MSG_BYTES = 4
+      READY_MSG = 'R'
+      def create_master_socket
+        File.delete socket_file if File.exist? socket_file
+        UNIXServer.new(socket_file).tap do |socket|
+          socket_options(socket)
+        end
+      end
+      def child_connection(socket)
+        socket.accept
+      end
+      # Sending a message to a socket (must be a connected socket)
+      def send_msg(socket, msg)
+        raise 'send_msg cannot send empty messages' if msg.nil? || msg.size.zero?
+        message = Marshal.dump(msg)
+        encoded_size = [message.size].pack(BIG_ENDIAN)
+        encoded_message = "#{encoded_size}#{message}"
+        socket.send encoded_message, 0
+      end
+      # read a message from socket (must be a connected socket)
+      def read_msg(socket)
+        encoded_size = socket.recv(MSG_BYTES, Socket::MSG_PEEK)
+        return if encoded_size.nil? || encoded_size == ''
+        size = encoded_size.unpack(BIG_ENDIAN).first
+        encoded_message = socket.recv(MSG_BYTES + size)
+        Marshal.load(encoded_message[MSG_BYTES..-1])
+      rescue Errno::ECONNRESET => ex
+        Chore.logger.info "IPC: Connection was closed on socket #{socket}"
+        raise ex
+      end
+      def add_worker_socket
+        UNIXSocket.new(socket_file).tap do |socket|
+          socket_options(socket)
+        end
+      end
+      def clear_ready(socket)
+        _ = socket.gets
+      end
+      def signal_ready(socket)
+        socket.puts READY_MSG
+      rescue Errno::EPIPE => ex
+        Chore.logger.info 'IPC: Connection was shutdown by master'
+        raise ex
+      end
+      def select_sockets(sockets, self_pipe = nil, timeout = 0.5)
+        all_socks = [sockets, self_pipe].flatten.compact
+        IO.select(all_socks, nil, all_socks, timeout)
+      end
+      def delete_socket_file
+        File.unlink(socket_file)
+      rescue
+        nil
+      end
+      # Used for unit tests
+      def ipc_help
+        :available
+      end
+      private
+      def socket_file
+        "./prefork_worker_sock-#{Process.pid}"
+      end
+      def socket_options(socket)
+        socket.setsockopt(:SOCKET, :REUSEADDR, true)
+      end
+    end
+  end
+end

data/lib/chore/strategies/worker/helpers/preforked_worker.rb ADDED

@@ -0,0 +1,163 @@
+require 'chore/signal'
+require 'socket'
+require 'timeout'
+require 'chore/strategies/worker/helpers/ipc'
+module Chore
+  module Strategy
+    class PreforkedWorker #:nodoc:
+      include Util
+      include Ipc
+      def initialize(_opts = {})
+        Chore.logger.info "PFW: #{Process.pid} initializing"
+        @manager_pid = Process.ppid
+        @consumer_cache = {}
+        @running = true
+        post_fork_setup
+      end
+      def start_worker(master_socket)
+        Chore.logger.info 'PFW: Worker starting'
+        raise 'PFW: Did not get master_socket' unless master_socket
+        connection = connect_to_master(master_socket)
+        worker(connection)
+      rescue => e
+        Chore.logger.error "PFW: Shutting down #{e.message} #{e.backtrace}"
+        raise e
+      end
+      private
+      def worker(connection)
+        worker_killer = WorkerKiller.new
+        while running?
+          # Select on the connection to the master and the self pipe
+          readables, _, ex = select_sockets(connection, nil, Chore.config.shutdown_timeout)
+          if readables.nil? # timeout
+            next
+          end
+          read_socket = readables.first
+          # Get the work from the connection to master
+          work = read_msg(read_socket)
+          # When the Master (manager process) dies, the sockets are set to
+          # readable, but there is no data in the socket. In this case we check
+          # to see if the manager is actually dead, and in that case, we exit.
+          if work.nil? && is_orphan?
+            Chore.logger.info "PFW: Manager no longer alive; Shutting down"
+            break
+          end
+          unless work.nil?
+            # Do the work
+            process_work(work)
+            worker_killer.check_requests
+            worker_killer.check_memory
+            # Alert master that worker is ready to receive more work
+            signal_ready(read_socket)
+          end
+        end
+      rescue Errno::ECONNRESET, Errno::EPIPE
+        Chore.logger.info "PFW: Worker-#{Process.pid} lost connection to master, shutting down"
+      ensure
+        Chore.logger.info "PFW: Worker process terminating"
+        exit(true)
+      end
+      # Method wrapper around @running makes it easier to write specs
+      def running?
+        @running
+      end
+      # Connects to the master socket, sends its PID, send a ready for work
+      # message, and returns the connection
+      def connect_to_master(master_socket)
+        Chore.logger.info 'PFW: connect protocol started'
+        child_connection(master_socket).tap do |conn|
+          send_msg(conn, Process.pid)
+          signal_ready(conn)
+          Chore.logger.info 'PFW: connect protocol completed'
+        end
+      end
+      def post_fork_setup
+        # Immediately swap out the process name so that it doesn't look like
+        # the master process
+        procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
+        # We need to reset the logger after fork. This fixes a longstanding bug
+        # where workers would hang around and never die
+        Chore.logger = nil
+        config = Chore.config
+        # When we fork, the consumer's/publisher's need their connections reset.
+        # The specifics of this are queue dependent, and may result in a noop.
+        config.consumer.reset_connection!
+        # It is possible for this to be nil due to configuration woes with chore
+        config.publisher.reset_connection! if Chore.config.publisher
+        # Ensure that all signals are handled before we hand off a hook to the
+        # application.
+        trap_signals
+        Chore.run_hooks_for(:after_fork,self)
+      end
+      def process_work(work)
+        work = [work] unless work.is_a?(Array)
+        work.each do |item|
+          item.consumer = consumer(item.queue_name)
+          begin
+            Timeout.timeout( item.queue_timeout ) do
+              worker = Worker.new(item)
+              worker.start
+            end
+          rescue Timeout::Error => ex
+            Chore.logger.info "PFW: Worker #{Process.pid} timed out"
+            Chore.logger.info "PFW: Worker time out set at #{item.queue_timeout} seconds"
+            raise ex
+          end
+        end
+      end
+      # We need to resue Consumer objects because it takes 500ms to recreate
+      # each one.
+      def consumer(queue)
+        unless @consumer_cache.key?(queue)
+          raise Chore::TerribleMistake if @consumer_cache.size >= Chore.config.queues.size
+          @consumer_cache[queue] = Chore.config.consumer.new(queue)
+        end
+        @consumer_cache[queue]
+      end
+      def trap_signals
+        Signal.reset
+        [:INT, :QUIT, :TERM].each do |signal|
+          Signal.trap(signal) do
+            Chore.logger.info "PFW: received signal: #{signal}"
+            @running = false
+            sleep(Chore.config.shutdown_timeout)
+            Chore.logger.info "PFW: Worker process terminating"
+            exit(true)
+          end
+        end
+        Signal.trap(:USR1) do
+          Chore.reopen_logs
+          Chore.logger.info "PFW: Worker process reopened log"
+        end
+      end
+      def is_orphan?
+        Process.ppid != @manager_pid
+      end
+    end
+  end
+end