RubyGems - sidekiq - Versions diffs - 3.5.4 → 4.0.0.pre1 - Mend

sidekiq 3.5.4 → 4.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sidekiq might be problematic. Click here for more details.

Files changed (32) hide show

checksums.yaml +4 -4
data/4.0-Upgrade.md +40 -0
data/Changes.md +5 -8
data/Ent-Changes.md +6 -0
data/Gemfile +2 -1
data/Pro-3.0-Upgrade.md +46 -0
data/bin/sidekiqctl +3 -3
data/bin/sidekiqload +17 -6
data/lib/sidekiq.rb +24 -11
data/lib/sidekiq/cli.rb +19 -28
data/lib/sidekiq/client.rb +0 -5
data/lib/sidekiq/fetch.rb +35 -111
data/lib/sidekiq/launcher.rb +105 -46
data/lib/sidekiq/manager.rb +71 -181
data/lib/sidekiq/middleware/server/retry_jobs.rb +1 -1
data/lib/sidekiq/processor.rb +119 -96
data/lib/sidekiq/redis_connection.rb +22 -4
data/lib/sidekiq/scheduled.rb +47 -26
data/lib/sidekiq/util.rb +7 -0
data/lib/sidekiq/version.rb +1 -1
data/sidekiq.gemspec +1 -1
data/test/helper.rb +30 -5
data/test/test_actors.rb +137 -0
data/test/test_api.rb +395 -394
data/test/test_fetch.rb +2 -57
data/test/test_launcher.rb +80 -0
data/test/test_manager.rb +13 -132
data/test/test_middleware.rb +3 -5
data/test/test_processor.rb +20 -57
data/test/test_scheduled.rb +2 -2
metadata +19 -14
data/lib/sidekiq/actor.rb +0 -39

data/lib/sidekiq/launcher.rb CHANGED

@@ -1,4 +1,4 @@
-require 'sidekiq/actor'
+# encoding: utf-8
 require 'sidekiq/manager'
 require 'sidekiq/fetch'
 require 'sidekiq/scheduled'
@@ -9,65 +9,116 @@ module Sidekiq
   # If any of these actors die, the Sidekiq process exits
   # immediately.
   class Launcher
-    include Actor
     include Util
-    trap_exit :actor_died
-    attr_reader :manager, :poller, :fetcher
+    attr_accessor :manager, :poller, :fetcher
     def initialize(options)
-      @condvar = Celluloid::Condition.new
-      @manager = Sidekiq::Manager.new_link(@condvar, options)
-      @poller = Sidekiq::Scheduled::Poller.new_link
-      @fetcher = Sidekiq::Fetcher.new_link(@manager, options)
-      @manager.fetcher = @fetcher
+      @manager = Sidekiq::Manager.new(options)
+      @poller = Sidekiq::Scheduled::Poller.new
       @done = false
       @options = options
     end
-    def actor_died(actor, reason)
-      # https://github.com/mperham/sidekiq/issues/2057#issuecomment-66485477
-      return if @done || !reason
+    def run
+      @thread = safe_thread("heartbeat", &method(:start_heartbeat))
+      @poller.start
+      @manager.start
+    end
-      Sidekiq.logger.warn("Sidekiq died due to the following error, cannot recover, process exiting")
-      handle_exception(reason)
-      exit(1)
+    # Stops this instance from processing any more jobs,
+    #
+    def quiet
+      @done = true
+      @manager.quiet
+      @poller.terminate
     end
-    def run
-      watchdog('Launcher#run') do
-        manager.async.start
-        poller.async.poll(true)
+    # Shuts down the process.  This method does not
+    # return until all work is complete and cleaned up.
+    # It can take up to the timeout to complete.
+    def stop
+      deadline = Time.now + @options[:timeout]
-        start_heartbeat
-      end
+      @done = true
+      @manager.quiet
+      @poller.terminate
+      @manager.stop(deadline)
+      # Requeue everything in case there was a worker who grabbed work while stopped
+      # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
+      strategy = (@options[:fetch] || Sidekiq::BasicFetch)
+      strategy.bulk_requeue([], @options)
+      clear_heartbeat
     end
-    def stop
-      watchdog('Launcher#stop') do
-        @done = true
-        Sidekiq::Fetcher.done!
-        fetcher.terminate if fetcher.alive?
-        poller.terminate if poller.alive?
-        manager.async.stop(:shutdown => true, :timeout => @options[:timeout])
-        fire_event(:shutdown, true)
-        @condvar.wait
-        manager.terminate
-        # Requeue everything in case there was a worker who grabbed work while stopped
-        # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
-        Sidekiq::Fetcher.strategy.bulk_requeue([], @options)
-        stop_heartbeat
-      end
+    def stopping?
+      @done
     end
-    private
+    private unless $TESTING
+    JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
+    def heartbeat(k, data, json)
+      results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, data) }
+      results.compact!
+      $0 = results.join(' ')
+      ❤(k, json)
+    end
+    def ❤(key, json)
+      fails = procd = 0
+      begin
+        Processor::FAILURE.update {|curr| fails = curr; 0 }
+        Processor::PROCESSED.update {|curr| procd = curr; 0 }
+        workers_key = "#{key}:workers".freeze
+        nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
+        Sidekiq.redis do |conn|
+          conn.pipelined do
+            conn.incrby("stat:processed".freeze, procd)
+            conn.incrby("stat:processed:#{nowdate}", procd)
+            conn.incrby("stat:failed".freeze, fails)
+            conn.incrby("stat:failed:#{nowdate}", fails)
+            conn.del(workers_key)
+            Processor::WORKER_STATE.each_pair do |tid, hash|
+              conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
+            end
+          end
+        end
+        fails = procd = 0
+        _, _, _, msg = Sidekiq.redis do |conn|
+          conn.pipelined do
+            conn.sadd('processes', key)
+            conn.hmset(key, 'info', json, 'busy', Processor::WORKER_STATE.size, 'beat', Time.now.to_f)
+            conn.expire(key, 60)
+            conn.rpop("#{key}-signals")
+          end
+        end
+        return unless msg
+        if JVM_RESERVED_SIGNALS.include?(msg)
+          Sidekiq::CLI.instance.handle_signal(msg)
+        else
+          ::Process.kill(msg, $$)
+        end
+      rescue => e
+        # ignore all redis/network issues
+        logger.error("heartbeat: #{e.message}")
+        # don't lose the counts if there was a network issue
+        PROCESSED.increment(procd)
+        FAILURE.increment(fails)
+      end
+    end
     def start_heartbeat
-      key = identity
+      k = identity
       data = {
         'hostname' => hostname,
         'started_at' => Time.now.to_f,
@@ -75,16 +126,24 @@ module Sidekiq
         'tag' => @options[:tag] || '',
         'concurrency' => @options[:concurrency],
         'queues' => @options[:queues].uniq,
-        'labels' => Sidekiq.options[:labels],
-        'identity' => identity,
+        'labels' => @options[:labels],
+        'identity' => k,
       }
       # this data doesn't change so dump it to a string
       # now so we don't need to dump it every heartbeat.
       json = Sidekiq.dump_json(data)
-      manager.heartbeat(key, data, json)
+      while true
+        heartbeat(k, data, json)
+        sleep 5
+      end
+      Sidekiq.logger.info("Heartbeat stopping...")
     end
-    def stop_heartbeat
+    def clear_heartbeat
+      # Remove record from Redis since we are shutting down.
+      # Note we don't stop the heartbeat thread; if the process
+      # doesn't actually exit, it'll reappear in the Web UI.
       Sidekiq.redis do |conn|
         conn.pipelined do
           conn.srem('processes', identity)

data/lib/sidekiq/manager.rb CHANGED

@@ -1,156 +1,89 @@
 # encoding: utf-8
 require 'sidekiq/util'
-require 'sidekiq/actor'
 require 'sidekiq/processor'
 require 'sidekiq/fetch'
+require 'thread'
 module Sidekiq
   ##
-  # The main router in the system.  This
-  # manages the processor state and accepts messages
-  # from Redis to be dispatched to an idle processor.
+  # The Manager is the central coordination point in Sidekiq, controlling
+  # the lifecycle of the Processors and feeding them jobs as necessary.
+  #
+  # Tasks:
+  #
+  # 1. start: Spin up Processors.
+  # 3. processor_died: Handle job failure, throw away Processor, create new one.
+  # 4. quiet: shutdown idle Processors.
+  # 5. stop: hard stop the Processors by deadline.
+  #
+  # Note that only the last task requires its own Thread since it has to monitor
+  # the shutdown process.  The other tasks are performed by other threads.
   #
   class Manager
     include Util
-    include Actor
-    trap_exit :processor_died
-    attr_reader :ready
-    attr_reader :busy
-    attr_accessor :fetcher
+    attr_reader :workers
+    attr_reader :options
-    SPIN_TIME_FOR_GRACEFUL_SHUTDOWN = 1
-    JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
-    def initialize(condvar, options={})
+    def initialize(options={})
       logger.debug { options.inspect }
       @options = options
       @count = options[:concurrency] || 25
       raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
-      @done_callback = nil
-      @finished = condvar
-      @in_progress = {}
-      @threads = {}
       @done = false
-      @busy = []
-      @ready = @count.times.map do
-        p = Processor.new_link(current_actor)
-        p.proxy_id = p.object_id
-        p
+      @workers = Set.new
+      @count.times do
+        @workers << Processor.new(self)
       end
+      @plock = Mutex.new
     end
-    def stop(options={})
-      watchdog('Manager#stop died') do
-        should_shutdown = options[:shutdown]
-        timeout = options[:timeout]
-        @done = true
-        logger.info { "Terminating #{@ready.size} quiet workers" }
-        @ready.each { |x| x.terminate if x.alive? }
-        @ready.clear
-        return if clean_up_for_graceful_shutdown
-        hard_shutdown_in timeout if should_shutdown
+    def start
+      @workers.each do |x|
+        x.start
       end
     end
-    def clean_up_for_graceful_shutdown
-      if @busy.empty?
-        shutdown
-        return true
-      end
+    def quiet
+      return if @done
+      @done = true
-      after(SPIN_TIME_FOR_GRACEFUL_SHUTDOWN) { clean_up_for_graceful_shutdown }
-      false
+      logger.info { "Terminating quiet workers" }
+      @workers.each { |x| x.terminate }
     end
-    def start
-      @ready.each { dispatch }
-    end
+    def stop(deadline)
+      quiet
+      return if @workers.empty?
-    def when_done(&blk)
-      @done_callback = blk
-    end
-    def processor_done(processor)
-      watchdog('Manager#processor_done died') do
-        @done_callback.call(processor) if @done_callback
-        @in_progress.delete(processor.object_id)
-        @threads.delete(processor.object_id)
-        @busy.delete(processor)
-        if stopped?
-          processor.terminate if processor.alive?
-          shutdown if @busy.empty?
-        else
-          @ready << processor if processor.alive?
-        end
-        dispatch
+      logger.info { "Pausing to allow workers to finish..." }
+      remaining = deadline - Time.now
+      while remaining > 0.5
+        return if @workers.empty?
+        sleep 0.5
+        remaining = deadline - Time.now
       end
-    end
+      return if @workers.empty?
-    def processor_died(processor, reason)
-      watchdog("Manager#processor_died died") do
-        @in_progress.delete(processor.object_id)
-        @threads.delete(processor.object_id)
-        @busy.delete(processor)
-        unless stopped?
-          p = Processor.new_link(current_actor)
-          p.proxy_id = p.object_id
-          @ready << p
-          dispatch
-        else
-          shutdown if @busy.empty?
-        end
-      end
+      hard_shutdown
     end
-    def assign(work)
-      watchdog("Manager#assign died") do
-        if stopped?
-          # Race condition between Manager#stop if Fetcher
-          # is blocked on redis and gets a message after
-          # all the ready Processors have been stopped.
-          # Push the message back to redis.
-          work.requeue
-        else
-          processor = @ready.pop
-          @in_progress[processor.object_id] = work
-          @busy << processor
-          processor.async.process(work)
-        end
+    def processor_stopped(processor)
+      @plock.synchronize do
+        @workers.delete(processor)
       end
     end
-    # A hack worthy of Rube Goldberg.  We need to be able
-    # to hard stop a working thread.  But there's no way for us to
-    # get handle to the underlying thread performing work for a processor
-    # so we have it call us and tell us.
-    def real_thread(proxy_id, thr)
-      @threads[proxy_id] = thr if thr.alive?
-    end
-    PROCTITLES = [
-      proc { 'sidekiq'.freeze },
-      proc { Sidekiq::VERSION },
-      proc { |mgr, data| data['tag'] },
-      proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
-      proc { |mgr, data| "stopping" if mgr.stopped? },
-    ]
-    def heartbeat(key, data, json)
-      results = PROCTITLES.map {|x| x.(self, data) }
-      results.compact!
-      $0 = results.join(' ')
-      ❤(key, json)
-      after(5) do
-        heartbeat(key, data, json)
+    def processor_died(processor, reason)
+      @plock.synchronize do
+        @workers.delete(processor)
+        unless @done
+          p = Processor.new(self)
+          @workers << p
+          p.start
+        end
       end
     end
@@ -160,77 +93,34 @@ module Sidekiq
     private
-    def ❤(key, json)
-      begin
-        _, _, _, msg = Sidekiq.redis do |conn|
-          conn.multi do
-            conn.sadd('processes', key)
-            conn.hmset(key, 'info', json, 'busy', @busy.size, 'beat', Time.now.to_f)
-            conn.expire(key, 60)
-            conn.rpop("#{key}-signals")
-          end
-        end
-        return unless msg
-        if JVM_RESERVED_SIGNALS.include?(msg)
-          Sidekiq::CLI.instance.handle_signal(msg)
-        else
-          ::Process.kill(msg, $$)
-        end
-      rescue => e
-        # ignore all redis/network issues
-        logger.error("heartbeat: #{e.message}")
+    def hard_shutdown
+      # We've reached the timeout and we still have busy workers.
+      # They must die but their jobs shall live on.
+      cleanup = nil
+      @plock.synchronize do
+        cleanup = @workers.dup
       end
-    end
-    def hard_shutdown_in(delay)
-      logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
+      if cleanup.size > 0
+        jobs = cleanup.map {|p| p.job }.compact
-      after(delay) do
-        watchdog("Manager#hard_shutdown_in died") do
-          # We've reached the timeout and we still have busy workers.
-          # They must die but their messages shall live on.
-          logger.warn { "Terminating #{@busy.size} busy worker threads" }
-          logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
+        logger.warn { "Terminating #{cleanup.size} busy worker threads" }
+        logger.warn { "Work still in progress #{jobs.inspect}" }
-          requeue
-          @busy.each do |processor|
-            if processor.alive? && t = @threads.delete(processor.object_id)
-              t.raise Shutdown
-            end
-          end
-          @finished.signal
-        end
+        # Re-enqueue unfinished jobs
+        # NOTE: You may notice that we may push a job back to redis before
+        # the worker thread is terminated. This is ok because Sidekiq's
+        # contract says that jobs are run AT LEAST once. Process termination
+        # is delayed until we're certain the jobs are back in Redis because
+        # it is worse to lose a job than to run it twice.
+        strategy = (@options[:fetch] || Sidekiq::BasicFetch)
+        strategy.bulk_requeue(jobs, @options)
       end
-    end
-    def dispatch
-      return if stopped?
-      # This is a safety check to ensure we haven't leaked
-      # processors somehow.
-      raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
-      raise "No ready processor!?" if @ready.empty?
-      @fetcher.async.fetch
-    end
-    def shutdown
-      requeue
-      @finished.signal
+      cleanup.each do |processor|
+        processor.kill
+      end
     end
-    def requeue
-      # Re-enqueue terminated jobs
-      # NOTE: You may notice that we may push a job back to redis before
-      # the worker thread is terminated. This is ok because Sidekiq's
-      # contract says that jobs are run AT LEAST once. Process termination
-      # is delayed until we're certain the jobs are back in Redis because
-      # it is worse to lose a job than to run it twice.
-      Sidekiq::Fetcher.strategy.bulk_requeue(@in_progress.values, @options)
-      @in_progress.clear
-    end
   end
 end