RubyGems - chore-core - Versions diffs - 1.8.2 → 4.0.0 - Mend

chore-core 1.8.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

checksums.yaml +4 -4
data/LICENSE.txt +1 -1
data/README.md +173 -150
data/chore-core.gemspec +3 -3
data/lib/chore.rb +31 -5
data/lib/chore/cli.rb +22 -4
data/lib/chore/configuration.rb +1 -1
data/lib/chore/consumer.rb +54 -12
data/lib/chore/fetcher.rb +12 -7
data/lib/chore/hooks.rb +2 -1
data/lib/chore/job.rb +19 -0
data/lib/chore/manager.rb +18 -2
data/lib/chore/publisher.rb +18 -2
data/lib/chore/queues/filesystem/consumer.rb +126 -64
data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
data/lib/chore/queues/filesystem/publisher.rb +13 -19
data/lib/chore/queues/sqs.rb +22 -13
data/lib/chore/queues/sqs/consumer.rb +64 -51
data/lib/chore/queues/sqs/publisher.rb +26 -17
data/lib/chore/strategies/consumer/batcher.rb +14 -15
data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +9 -7
data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
data/lib/chore/strategies/worker/single_worker_strategy.rb +35 -13
data/lib/chore/unit_of_work.rb +10 -1
data/lib/chore/util.rb +5 -1
data/lib/chore/version.rb +3 -3
data/lib/chore/worker.rb +32 -3
data/spec/chore/cli_spec.rb +2 -2
data/spec/chore/consumer_spec.rb +1 -5
data/spec/chore/duplicate_detector_spec.rb +17 -5
data/spec/chore/fetcher_spec.rb +0 -11
data/spec/chore/manager_spec.rb +7 -0
data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
data/spec/chore/queues/sqs_spec.rb +32 -41
data/spec/chore/strategies/consumer/batcher_spec.rb +50 -0
data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +7 -6
data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +17 -2
data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +25 -0
data/spec/chore/worker_spec.rb +82 -14
data/spec/spec_helper.rb +1 -1
data/spec/support/queues/sqs/fake_objects.rb +18 -0
metadata +39 -15

data/lib/chore/strategies/worker/helpers/preforked_worker.rb ADDED

@@ -0,0 +1,163 @@
+require 'chore/signal'
+require 'socket'
+require 'timeout'
+require 'chore/strategies/worker/helpers/ipc'
+module Chore
+  module Strategy
+    class PreforkedWorker #:nodoc:
+      include Util
+      include Ipc
+      def initialize(_opts = {})
+        Chore.logger.info "PFW: #{Process.pid} initializing"
+        @manager_pid = Process.ppid
+        @consumer_cache = {}
+        @running = true
+        post_fork_setup
+      end
+      def start_worker(master_socket)
+        Chore.logger.info 'PFW: Worker starting'
+        raise 'PFW: Did not get master_socket' unless master_socket
+        connection = connect_to_master(master_socket)
+        worker(connection)
+      rescue => e
+        Chore.logger.error "PFW: Shutting down #{e.message} #{e.backtrace}"
+        raise e
+      end
+      private
+      def worker(connection)
+        worker_killer = WorkerKiller.new
+        while running?
+          # Select on the connection to the master and the self pipe
+          readables, _, ex = select_sockets(connection, nil, Chore.config.shutdown_timeout)
+          if readables.nil? # timeout
+            next
+          end
+          read_socket = readables.first
+          # Get the work from the connection to master
+          work = read_msg(read_socket)
+          # When the Master (manager process) dies, the sockets are set to
+          # readable, but there is no data in the socket. In this case we check
+          # to see if the manager is actually dead, and in that case, we exit.
+          if work.nil? && is_orphan?
+            Chore.logger.info "PFW: Manager no longer alive; Shutting down"
+            break
+          end
+          unless work.nil?
+            # Do the work
+            process_work(work)
+            worker_killer.check_requests
+            worker_killer.check_memory
+            # Alert master that worker is ready to receive more work
+            signal_ready(read_socket)
+          end
+        end
+      rescue Errno::ECONNRESET, Errno::EPIPE
+        Chore.logger.info "PFW: Worker-#{Process.pid} lost connection to master, shutting down"
+      ensure
+        Chore.logger.info "PFW: Worker process terminating"
+        exit(true)
+      end
+      # Method wrapper around @running makes it easier to write specs
+      def running?
+        @running
+      end
+      # Connects to the master socket, sends its PID, send a ready for work
+      # message, and returns the connection
+      def connect_to_master(master_socket)
+        Chore.logger.info 'PFW: connect protocol started'
+        child_connection(master_socket).tap do |conn|
+          send_msg(conn, Process.pid)
+          signal_ready(conn)
+          Chore.logger.info 'PFW: connect protocol completed'
+        end
+      end
+      def post_fork_setup
+        # Immediately swap out the process name so that it doesn't look like
+        # the master process
+        procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
+        # We need to reset the logger after fork. This fixes a longstanding bug
+        # where workers would hang around and never die
+        Chore.logger = nil
+        config = Chore.config
+        # When we fork, the consumer's/publisher's need their connections reset.
+        # The specifics of this are queue dependent, and may result in a noop.
+        config.consumer.reset_connection!
+        # It is possible for this to be nil due to configuration woes with chore
+        config.publisher.reset_connection! if Chore.config.publisher
+        # Ensure that all signals are handled before we hand off a hook to the
+        # application.
+        trap_signals
+        Chore.run_hooks_for(:after_fork,self)
+      end
+      def process_work(work)
+        work = [work] unless work.is_a?(Array)
+        work.each do |item|
+          item.consumer = consumer(item.queue_name)
+          begin
+            Timeout.timeout( item.queue_timeout ) do
+              worker = Worker.new(item)
+              worker.start
+            end
+          rescue Timeout::Error => ex
+            Chore.logger.info "PFW: Worker #{Process.pid} timed out"
+            Chore.logger.info "PFW: Worker time out set at #{item.queue_timeout} seconds"
+            raise ex
+          end
+        end
+      end
+      # We need to resue Consumer objects because it takes 500ms to recreate
+      # each one.
+      def consumer(queue)
+        unless @consumer_cache.key?(queue)
+          raise Chore::TerribleMistake if @consumer_cache.size >= Chore.config.queues.size
+          @consumer_cache[queue] = Chore.config.consumer.new(queue)
+        end
+        @consumer_cache[queue]
+      end
+      def trap_signals
+        Signal.reset
+        [:INT, :QUIT, :TERM].each do |signal|
+          Signal.trap(signal) do
+            Chore.logger.info "PFW: received signal: #{signal}"
+            @running = false
+            sleep(Chore.config.shutdown_timeout)
+            Chore.logger.info "PFW: Worker process terminating"
+            exit(true)
+          end
+        end
+        Signal.trap(:USR1) do
+          Chore.reopen_logs
+          Chore.logger.info "PFW: Worker process reopened log"
+        end
+      end
+      def is_orphan?
+        Process.ppid != @manager_pid
+      end
+    end
+  end
+end

data/lib/chore/strategies/worker/helpers/work_distributor.rb ADDED

@@ -0,0 +1,65 @@
+require 'chore/strategies/worker/helpers/ipc'
+module Chore
+  module Strategy
+    class WorkDistributor #:nodoc:
+      class << self
+        include Ipc
+        def fetch_and_assign_jobs(workers, manager)
+          jobs = manager.fetch_work(workers.size)
+          raise "DW: jobs needs to be a list got #{jobs.class}" unless jobs.is_a?(Array)
+          if jobs.empty?
+            # This conditon is due to the internal consumer queue being empty.
+            # Assuming that the the consumer has to fetch from an external queue,
+            # if we return here, we would create a tight loop that would use up
+            # a lot the CPU's time. In order to prevent that, we wait for the
+            # consumer queue to be populated, by sleeping.
+            sleep(0.1)
+            return
+          end
+          jobs_to_return = assign_jobs(jobs, workers)
+          manager.return_work(jobs_to_return)
+        end
+        private
+        def assign_jobs(jobs, workers)
+          raise 'DW: assign_jobs got 0 workers' if workers.empty?
+          jobs_to_return = []
+          jobs.each_with_index do |job, i|
+            raise 'DW: More Jobs than Sockets' if workers[i].nil?
+            unless push_job_to_worker(job, workers[i])
+              jobs_to_return << job
+            end
+          end
+          jobs_to_return
+        end
+        def push_job_to_worker(job, worker)
+          Chore.run_hooks_for(:before_send_to_worker, job)
+          clear_ready(worker.socket)
+          send_msg(worker.socket, job)
+          true
+        rescue => e
+          Chore.logger.error "DW: Could not assign job #{job.inspect} (worker: #{worker.pid})\nException #{e.message} #{e.backtrace * "\n"}"
+          # We generally shouldn't get into this situations since we've already
+          # tested that we can read/write to the Worker's socket.  However,
+          # the Worker could still fail between that check and pushing the
+          # job, so we need to allow the work to be re-assigned to handle that
+          # edge case.
+          false
+        end
+        private
+        # Used for unit tests
+        def sleep(n)
+          Kernel.sleep(n)
+        end
+      end
+    end
+  end
+end

data/lib/chore/strategies/worker/helpers/worker_info.rb ADDED

@@ -0,0 +1,13 @@
+module Chore
+  module Strategy
+    class WorkerInfo
+      # Holds meta information about the worker: pid, and connection socket
+      attr_accessor :pid, :socket
+      def initialize(pid)
+        @pid = pid
+        @socket = nil
+      end
+    end
+  end
+end

data/lib/chore/strategies/worker/helpers/worker_killer.rb ADDED

@@ -0,0 +1,40 @@
+require 'get_process_mem'
+module Chore
+  module Strategy
+    class WorkerKiller  #:nodoc:
+      def initialize
+        @memory_limit = Chore.config.memory_limit_bytes
+        @request_limit = Chore.config.request_limit
+        @check_cycle = Chore.config.worker_check_cycle || 16
+        @check_count = 0
+        @current_requests = 0
+      end
+      def check_memory
+        return if @memory_limit.nil? || (@memory_limit == 0)
+        @check_count += 1
+        if @check_count == @check_cycle
+          rss = GetProcessMem.new.bytes.to_i
+          if rss > @memory_limit
+            Chore.logger.info "WK: (pid: #{Process.pid}) exceeded memory limit (#{rss.to_i} bytes > #{@memory_limit} bytes)"
+            Chore.run_hooks_for(:worker_mem_kill)
+            exit(true)
+          end
+          @check_count = 0
+        end
+      end
+      def check_requests
+        return if @request_limit.nil? || (@request_limit == 0)
+        if (@current_requests += 1) >= @request_limit
+          Chore.logger.info "WK: (pid: #{Process.pid}) exceeded max number of requests (limit: #{@request_limit})"
+          Chore.run_hooks_for(:worker_req_kill)
+          exit(true)
+        end
+      end
+    end
+  end
+end

data/lib/chore/strategies/worker/helpers/worker_manager.rb ADDED

@@ -0,0 +1,183 @@
+require 'chore/strategies/worker/helpers/ipc'
+module Chore
+  module Strategy
+    class WorkerManager #:nodoc:
+      include Ipc
+      def initialize(master_socket)
+        @master_socket = master_socket
+        @pid_to_worker = {}
+        @socket_to_worker = {}
+      end
+      # Create num of missing workers and sockets and attach them for the
+      # master
+      def create_and_attach_workers
+        create_workers do |num_workers|
+          attach_workers(num_workers)
+        end
+      end
+      # Reap dead workers and create new ones to replace them
+      def respawn_terminated_workers!
+        Chore.logger.info 'WM: Respawning terminated workers'
+        reap_workers
+        create_and_attach_workers
+      end
+      # Stop children with the given kill signal and wait for them to die
+      def stop_workers(sig)
+        @pid_to_worker.each do |pid, worker|
+          begin
+            Chore.logger.info { "WM: Sending #{sig} to: #{pid}" }
+            Process.kill(sig, pid)
+          rescue Errno::ESRCH => e
+            Chore.logger.error "WM: Signal to children error: #{e}"
+          end
+        end
+        # TODO: Sleep for the shutdown timeout and kill any remaining workers
+        reap_workers
+      end
+      # Return all the worker sockets
+      def worker_sockets
+        @socket_to_worker.keys
+      end
+      # Return the workers associated with a given array of sockets.
+      # +block+:: A block can be provided to perform tasks on the workers
+      # associated with the sockets given
+      def ready_workers(sockets = [], &block)
+        workers = @socket_to_worker.values_at(*sockets)
+        yield workers if block_given?
+        workers
+      end
+      private
+      # Creates worker processes until we have the number of workers defined
+      # by the configuration. Initializes and starts a worker instance in each
+      # of the new processes.
+      # +block+:: Block can be provided to run tasks on the number of newly
+      # created worker processes.
+      def create_workers(&block)
+        num_created_workers = 0
+        while @pid_to_worker.size < Chore.config.num_workers
+          pid = fork do
+            run_worker_instance
+          end
+          Chore.logger.info "WM: created_worker #{pid}"
+          # Keep track of the new worker process
+          @pid_to_worker[pid] = WorkerInfo.new(pid)
+          num_created_workers += 1
+        end
+        raise 'WM: Not enough workers' if inconsistent_worker_number
+        Chore.logger.info "WM: created #{num_created_workers} workers"
+        yield num_created_workers if block_given?
+        num_created_workers
+      end
+      # Check that number of workers registered in master match the config
+      def inconsistent_worker_number
+        Chore.config.num_workers != @pid_to_worker.size
+      end
+      # Initialize and start a new worker instance
+      def run_worker_instance
+        PreforkedWorker.new.start_worker(@master_socket)
+      ensure
+        exit(true)
+      end
+      # Creates individual sockets for each worker to use and attaches them to
+      # the correct worker
+      def attach_workers(num)
+        Chore.logger.info "WM: Started attaching #{num} workers"
+        create_worker_sockets(num).each do |socket|
+          begin
+            readable, _, _ = select_sockets(socket, nil, 2)
+            if readable.nil?
+              Chore.logger.info "WM: #{socket} timeout waiting for a worker"
+              socket.close
+              next
+            end
+            r_socket = readable.first
+            reported_pid = read_msg(r_socket)
+            assigned_worker = @pid_to_worker[reported_pid]
+            assigned_worker.socket = socket
+            @socket_to_worker[socket] = assigned_worker
+            Chore.logger.info "WM: Connected #{reported_pid} with #{r_socket}"
+          rescue Errno::ECONNRESET
+            Chore.logger.info "WM: A worker failed to connect to #{socket}"
+            socket.close
+            next
+          end
+        end
+        # If the connection from a worker times out, we are unable to associate
+        # the process with a connection and so we kill the worker process
+        kill_unattached_workers
+        Chore.logger.info 'WM: Finished attaching workers'
+      end
+      # Create num amount of sockets that are available for worker connections
+      def create_worker_sockets(num)
+        Array.new(num) do
+          add_worker_socket
+        end
+      end
+      # Kill workers that failed to connect to the master
+      def kill_unattached_workers
+        @pid_to_worker.each do |pid, worker|
+          next unless worker.socket.nil?
+          Chore.logger.info "WM: kill_unattached_workers #{pid}"
+          Process.kill('KILL', pid)
+        end
+      end
+      # Wait for terminated workers to die and remove their references from
+      # master
+      def reap_workers
+        Chore.logger.info "WM: reaping workers.."
+        dead_workers = @pid_to_worker.select do |pid, worker|
+          reap_process(pid)
+        end
+        dead_workers.each do |pid, worker|
+          dead_worker = @pid_to_worker.delete(pid)
+          dead_worker.socket.close
+          @socket_to_worker.delete(dead_worker.socket)
+          Chore.logger.info "WM: Removed preforked worker:#{worker.pid} - #{worker.socket}"
+        end
+      end
+      # Non-blocking wait for process to die. Returns whether it stopped
+      def reap_process(pid)
+        status = Process.wait(pid, Process::WNOHANG)
+        case status
+        when nil # Process is still running
+          return false
+        when pid # Collected status of this pid
+          return true
+        end
+      rescue Errno::ECHILD
+        # Child process has already terminated
+        true
+      end
+      def fork(&block)
+        Kernel.fork(&block)
+      end
+    end
+  end
+end