RubyGems - sidekiq - Versions diffs - 4.2.2 → 6.3.1 - Mend

sidekiq 4.2.2 → 6.3.1

Potentially problematic release.

This version of sidekiq might be problematic. Click here for more details.

Files changed (138) hide show

checksums.yaml +5 -5
data/Changes.md +516 -0
data/LICENSE +2 -2
data/README.md +23 -36
data/bin/sidekiq +26 -2
data/bin/sidekiqload +28 -38
data/bin/sidekiqmon +8 -0
data/lib/generators/sidekiq/templates/worker_spec.rb.erb +1 -1
data/lib/generators/sidekiq/templates/worker_test.rb.erb +2 -2
data/lib/generators/sidekiq/worker_generator.rb +21 -13
data/lib/sidekiq/api.rb +401 -243
data/lib/sidekiq/cli.rb +228 -212
data/lib/sidekiq/client.rb +76 -53
data/lib/sidekiq/delay.rb +41 -0
data/lib/sidekiq/exception_handler.rb +12 -16
data/lib/sidekiq/extensions/action_mailer.rb +13 -22
data/lib/sidekiq/extensions/active_record.rb +13 -10
data/lib/sidekiq/extensions/class_methods.rb +14 -11
data/lib/sidekiq/extensions/generic_proxy.rb +12 -4
data/lib/sidekiq/fetch.rb +39 -31
data/lib/sidekiq/job.rb +13 -0
data/lib/sidekiq/job_logger.rb +63 -0
data/lib/sidekiq/job_retry.rb +259 -0
data/lib/sidekiq/launcher.rb +170 -71
data/lib/sidekiq/logger.rb +166 -0
data/lib/sidekiq/manager.rb +17 -20
data/lib/sidekiq/middleware/chain.rb +20 -8
data/lib/sidekiq/middleware/current_attributes.rb +52 -0
data/lib/sidekiq/middleware/i18n.rb +5 -7
data/lib/sidekiq/monitor.rb +133 -0
data/lib/sidekiq/paginator.rb +18 -14
data/lib/sidekiq/processor.rb +169 -78
data/lib/sidekiq/rails.rb +41 -36
data/lib/sidekiq/redis_connection.rb +65 -20
data/lib/sidekiq/scheduled.rb +85 -34
data/lib/sidekiq/sd_notify.rb +149 -0
data/lib/sidekiq/systemd.rb +24 -0
data/lib/sidekiq/testing/inline.rb +2 -1
data/lib/sidekiq/testing.rb +52 -26
data/lib/sidekiq/util.rb +48 -15
data/lib/sidekiq/version.rb +2 -1
data/lib/sidekiq/web/action.rb +15 -17
data/lib/sidekiq/web/application.rb +114 -92
data/lib/sidekiq/web/csrf_protection.rb +180 -0
data/lib/sidekiq/web/helpers.rb +151 -83
data/lib/sidekiq/web/router.rb +27 -19
data/lib/sidekiq/web.rb +85 -76
data/lib/sidekiq/worker.rb +233 -43
data/lib/sidekiq.rb +88 -64
data/sidekiq.gemspec +24 -22
data/web/assets/images/apple-touch-icon.png +0 -0
data/web/assets/javascripts/application.js +86 -59
data/web/assets/javascripts/dashboard.js +81 -85
data/web/assets/stylesheets/application-dark.css +147 -0
data/web/assets/stylesheets/application-rtl.css +242 -0
data/web/assets/stylesheets/application.css +319 -141
data/web/assets/stylesheets/bootstrap-rtl.min.css +9 -0
data/web/assets/stylesheets/bootstrap.css +2 -2
data/web/locales/ar.yml +87 -0
data/web/locales/de.yml +14 -2
data/web/locales/en.yml +8 -1
data/web/locales/es.yml +22 -5
data/web/locales/fa.yml +80 -0
data/web/locales/fr.yml +10 -3
data/web/locales/he.yml +79 -0
data/web/locales/ja.yml +12 -4
data/web/locales/lt.yml +83 -0
data/web/locales/pl.yml +4 -4
data/web/locales/ru.yml +4 -0
data/web/locales/ur.yml +80 -0
data/web/locales/vi.yml +83 -0
data/web/views/_footer.erb +5 -2
data/web/views/_job_info.erb +4 -3
data/web/views/_nav.erb +4 -18
data/web/views/_paging.erb +1 -1
data/web/views/_poll_link.erb +2 -5
data/web/views/_summary.erb +7 -7
data/web/views/busy.erb +60 -22
data/web/views/dashboard.erb +23 -15
data/web/views/dead.erb +3 -3
data/web/views/layout.erb +14 -3
data/web/views/morgue.erb +19 -12
data/web/views/queue.erb +24 -14
data/web/views/queues.erb +14 -4
data/web/views/retries.erb +22 -13
data/web/views/retry.erb +4 -4
data/web/views/scheduled.erb +7 -4
metadata +44 -194
data/.github/contributing.md +0 -32
data/.github/issue_template.md +0 -4
data/.gitignore +0 -12
data/.travis.yml +0 -12
data/3.0-Upgrade.md +0 -70
data/4.0-Upgrade.md +0 -53
data/COMM-LICENSE +0 -95
data/Ent-Changes.md +0 -146
data/Gemfile +0 -29
data/Pro-2.0-Upgrade.md +0 -138
data/Pro-3.0-Upgrade.md +0 -44
data/Pro-Changes.md +0 -570
data/Rakefile +0 -9
data/bin/sidekiqctl +0 -99
data/code_of_conduct.md +0 -50
data/lib/sidekiq/core_ext.rb +0 -106
data/lib/sidekiq/logging.rb +0 -106
data/lib/sidekiq/middleware/server/active_record.rb +0 -13
data/lib/sidekiq/middleware/server/logging.rb +0 -40
data/lib/sidekiq/middleware/server/retry_jobs.rb +0 -205
data/test/config.yml +0 -9
data/test/env_based_config.yml +0 -11
data/test/fake_env.rb +0 -1
data/test/fixtures/en.yml +0 -2
data/test/helper.rb +0 -75
data/test/test_actors.rb +0 -138
data/test/test_api.rb +0 -528
data/test/test_cli.rb +0 -418
data/test/test_client.rb +0 -266
data/test/test_exception_handler.rb +0 -56
data/test/test_extensions.rb +0 -127
data/test/test_fetch.rb +0 -50
data/test/test_launcher.rb +0 -95
data/test/test_logging.rb +0 -35
data/test/test_manager.rb +0 -50
data/test/test_middleware.rb +0 -158
data/test/test_processor.rb +0 -201
data/test/test_rails.rb +0 -22
data/test/test_redis_connection.rb +0 -132
data/test/test_retry.rb +0 -326
data/test/test_retry_exhausted.rb +0 -149
data/test/test_scheduled.rb +0 -115
data/test/test_scheduling.rb +0 -50
data/test/test_sidekiq.rb +0 -107
data/test/test_testing.rb +0 -143
data/test/test_testing_fake.rb +0 -357
data/test/test_testing_inline.rb +0 -94
data/test/test_util.rb +0 -13
data/test/test_web.rb +0 -666
data/test/test_web_helpers.rb +0 -54

data/lib/sidekiq/job.rb ADDED Viewed

@@ -0,0 +1,13 @@
+require "sidekiq/worker"
+module Sidekiq
+  # Sidekiq::Job is a new alias for Sidekiq::Worker as of Sidekiq 6.3.0.
+  # Use `include Sidekiq::Job` rather than `include Sidekiq::Worker`.
+  #
+  # The term "worker" is too generic and overly confusing, used in several
+  # different contexts meaning different things. Many people call a Sidekiq
+  # process a "worker". Some people call the thread that executes jobs a
+  # "worker". This change brings Sidekiq closer to ActiveJob where your job
+  # classes extend ApplicationJob.
+  Job = Worker
+end

data/lib/sidekiq/job_logger.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+module Sidekiq
+  class JobLogger
+    def initialize(logger = Sidekiq.logger)
+      @logger = logger
+    end
+    def call(item, queue)
+      start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
+      @logger.info("start")
+      yield
+      with_elapsed_time_context(start) do
+        @logger.info("done")
+      end
+    rescue Exception
+      with_elapsed_time_context(start) do
+        @logger.info("fail")
+      end
+      raise
+    end
+    def prepare(job_hash, &block)
+      level = job_hash["log_level"]
+      if level
+        @logger.log_at(level) do
+          Sidekiq::Context.with(job_hash_context(job_hash), &block)
+        end
+      else
+        Sidekiq::Context.with(job_hash_context(job_hash), &block)
+      end
+    end
+    def job_hash_context(job_hash)
+      # If we're using a wrapper class, like ActiveJob, use the "wrapped"
+      # attribute to expose the underlying thing.
+      h = {
+        class: job_hash["display_class"] || job_hash["wrapped"] || job_hash["class"],
+        jid: job_hash["jid"]
+      }
+      h[:bid] = job_hash["bid"] if job_hash["bid"]
+      h[:tags] = job_hash["tags"] if job_hash["tags"]
+      h
+    end
+    def with_elapsed_time_context(start, &block)
+      Sidekiq::Context.with(elapsed_time_context(start), &block)
+    end
+    def elapsed_time_context(start)
+      {elapsed: elapsed(start).to_s}
+    end
+    private
+    def elapsed(start)
+      (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start).round(3)
+    end
+  end
+end

data/lib/sidekiq/job_retry.rb ADDED Viewed

@@ -0,0 +1,259 @@
+# frozen_string_literal: true
+require "sidekiq/scheduled"
+require "sidekiq/api"
+require "zlib"
+require "base64"
+module Sidekiq
+  ##
+  # Automatically retry jobs that fail in Sidekiq.
+  # Sidekiq's retry support assumes a typical development lifecycle:
+  #
+  #   0. Push some code changes with a bug in it.
+  #   1. Bug causes job processing to fail, Sidekiq's middleware captures
+  #      the job and pushes it onto a retry queue.
+  #   2. Sidekiq retries jobs in the retry queue multiple times with
+  #      an exponential delay, the job continues to fail.
+  #   3. After a few days, a developer deploys a fix. The job is
+  #      reprocessed successfully.
+  #   4. Once retries are exhausted, Sidekiq will give up and move the
+  #      job to the Dead Job Queue (aka morgue) where it must be dealt with
+  #      manually in the Web UI.
+  #   5. After 6 months on the DJQ, Sidekiq will discard the job.
+  #
+  # A job looks like:
+  #
+  #     { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => true }
+  #
+  # The 'retry' option also accepts a number (in place of 'true'):
+  #
+  #     { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => 5 }
+  #
+  # The job will be retried this number of times before giving up. (If simply
+  # 'true', Sidekiq retries 25 times)
+  #
+  # We'll add a bit more data to the job to support retries:
+  #
+  #  * 'queue' - the queue to use
+  #  * 'retry_count' - number of times we've retried so far.
+  #  * 'error_message' - the message from the exception
+  #  * 'error_class' - the exception class
+  #  * 'failed_at' - the first time it failed
+  #  * 'retried_at' - the last time it was retried
+  #  * 'backtrace' - the number of lines of error backtrace to store
+  #
+  # We don't store the backtrace by default as that can add a lot of overhead
+  # to the job and everyone is using an error service, right?
+  #
+  # The default number of retries is 25 which works out to about 3 weeks
+  # You can change the default maximum number of retries in your initializer:
+  #
+  #   Sidekiq.options[:max_retries] = 7
+  #
+  # or limit the number of retries for a particular worker with:
+  #
+  #    class MyWorker
+  #      include Sidekiq::Worker
+  #      sidekiq_options :retry => 10
+  #    end
+  #
+  class JobRetry
+    class Handled < ::RuntimeError; end
+    class Skip < Handled; end
+    include Sidekiq::Util
+    DEFAULT_MAX_RETRY_ATTEMPTS = 25
+    def initialize(options = {})
+      @max_retries = Sidekiq.options.merge(options).fetch(:max_retries, DEFAULT_MAX_RETRY_ATTEMPTS)
+    end
+    # The global retry handler requires only the barest of data.
+    # We want to be able to retry as much as possible so we don't
+    # require the worker to be instantiated.
+    def global(jobstr, queue)
+      yield
+    rescue Handled => ex
+      raise ex
+    rescue Sidekiq::Shutdown => ey
+      # ignore, will be pushed back onto queue during hard_shutdown
+      raise ey
+    rescue Exception => e
+      # ignore, will be pushed back onto queue during hard_shutdown
+      raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
+      msg = Sidekiq.load_json(jobstr)
+      if msg["retry"]
+        attempt_retry(nil, msg, queue, e)
+      else
+        Sidekiq.death_handlers.each do |handler|
+          handler.call(msg, e)
+        rescue => handler_ex
+          handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
+        end
+      end
+      raise Handled
+    end
+    # The local retry support means that any errors that occur within
+    # this block can be associated with the given worker instance.
+    # This is required to support the `sidekiq_retries_exhausted` block.
+    #
+    # Note that any exception from the block is wrapped in the Skip
+    # exception so the global block does not reprocess the error.  The
+    # Skip exception is unwrapped within Sidekiq::Processor#process before
+    # calling the handle_exception handlers.
+    def local(worker, jobstr, queue)
+      yield
+    rescue Handled => ex
+      raise ex
+    rescue Sidekiq::Shutdown => ey
+      # ignore, will be pushed back onto queue during hard_shutdown
+      raise ey
+    rescue Exception => e
+      # ignore, will be pushed back onto queue during hard_shutdown
+      raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
+      msg = Sidekiq.load_json(jobstr)
+      if msg["retry"].nil?
+        msg["retry"] = worker.class.get_sidekiq_options["retry"]
+      end
+      raise e unless msg["retry"]
+      attempt_retry(worker, msg, queue, e)
+      # We've handled this error associated with this job, don't
+      # need to handle it at the global level
+      raise Skip
+    end
+    private
+    # Note that +worker+ can be nil here if an error is raised before we can
+    # instantiate the worker instance.  All access must be guarded and
+    # best effort.
+    def attempt_retry(worker, msg, queue, exception)
+      max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
+      msg["queue"] = (msg["retry_queue"] || queue)
+      m = exception_message(exception)
+      if m.respond_to?(:scrub!)
+        m.force_encoding("utf-8")
+        m.scrub!
+      end
+      msg["error_message"] = m
+      msg["error_class"] = exception.class.name
+      count = if msg["retry_count"]
+        msg["retried_at"] = Time.now.to_f
+        msg["retry_count"] += 1
+      else
+        msg["failed_at"] = Time.now.to_f
+        msg["retry_count"] = 0
+      end
+      if msg["backtrace"]
+        lines = if msg["backtrace"] == true
+          exception.backtrace
+        else
+          exception.backtrace[0...msg["backtrace"].to_i]
+        end
+        msg["error_backtrace"] = compress_backtrace(lines)
+      end
+      if count < max_retry_attempts
+        delay = delay_for(worker, count, exception)
+        # Logging here can break retries if the logging device raises ENOSPC #3979
+        # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
+        retry_at = Time.now.to_f + delay
+        payload = Sidekiq.dump_json(msg)
+        Sidekiq.redis do |conn|
+          conn.zadd("retry", retry_at.to_s, payload)
+        end
+      else
+        # Goodbye dear message, you (re)tried your best I'm sure.
+        retries_exhausted(worker, msg, exception)
+      end
+    end
+    def retries_exhausted(worker, msg, exception)
+      begin
+        block = worker&.sidekiq_retries_exhausted_block
+        block&.call(msg, exception)
+      rescue => e
+        handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
+      end
+      send_to_morgue(msg) unless msg["dead"] == false
+      Sidekiq.death_handlers.each do |handler|
+        handler.call(msg, exception)
+      rescue => e
+        handle_exception(e, {context: "Error calling death handler", job: msg})
+      end
+    end
+    def send_to_morgue(msg)
+      logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
+      payload = Sidekiq.dump_json(msg)
+      DeadSet.new.kill(payload, notify_failure: false)
+    end
+    def retry_attempts_from(msg_retry, default)
+      if msg_retry.is_a?(Integer)
+        msg_retry
+      else
+        default
+      end
+    end
+    def delay_for(worker, count, exception)
+      jitter = rand(10) * (count + 1)
+      if worker&.sidekiq_retry_in_block
+        custom_retry_in = retry_in(worker, count, exception).to_i
+        return custom_retry_in + jitter if custom_retry_in > 0
+      end
+      (count**4) + 15 + jitter
+    end
+    def retry_in(worker, count, exception)
+      worker.sidekiq_retry_in_block.call(count, exception)
+    rescue Exception => e
+      handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
+      nil
+    end
+    def exception_caused_by_shutdown?(e, checked_causes = [])
+      return false unless e.cause
+      # Handle circular causes
+      checked_causes << e.object_id
+      return false if checked_causes.include?(e.cause.object_id)
+      e.cause.instance_of?(Sidekiq::Shutdown) ||
+        exception_caused_by_shutdown?(e.cause, checked_causes)
+    end
+    # Extract message from exception.
+    # Set a default if the message raises an error
+    def exception_message(exception)
+      # App code can stuff all sorts of crazy binary data into the error message
+      # that won't convert to JSON.
+      exception.message.to_s[0, 10_000]
+    rescue
+      +"!!! ERROR MESSAGE THREW AN ERROR !!!"
+    end
+    def compress_backtrace(backtrace)
+      serialized = Sidekiq.dump_json(backtrace)
+      compressed = Zlib::Deflate.deflate(serialized)
+      Base64.encode64(compressed)
+    end
+  end
+end

data/lib/sidekiq/launcher.rb CHANGED Viewed

@@ -1,20 +1,28 @@
 # frozen_string_literal: true
-# encoding: utf-8
-require 'sidekiq/manager'
-require 'sidekiq/fetch'
-require 'sidekiq/scheduled'
+require "sidekiq/manager"
+require "sidekiq/fetch"
+require "sidekiq/scheduled"
 module Sidekiq
-  # The Launcher is a very simple Actor whose job is to
-  # start, monitor and stop the core Actors in Sidekiq.
-  # If any of these actors die, the Sidekiq process exits
-  # immediately.
+  # The Launcher starts the Manager and Poller threads and provides the process heartbeat.
   class Launcher
     include Util
+    STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
+    PROCTITLES = [
+      proc { "sidekiq" },
+      proc { Sidekiq::VERSION },
+      proc { |me, data| data["tag"] },
+      proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
+      proc { |me, data| "stopping" if me.stopping? }
+    ]
     attr_accessor :manager, :poller, :fetcher
     def initialize(options)
+      options[:fetch] ||= BasicFetch.new(options)
       @manager = Sidekiq::Manager.new(options)
       @poller = Sidekiq::Scheduled::Poller.new
       @done = false
@@ -39,7 +47,7 @@ module Sidekiq
     # return until all work is complete and cleaned up.
     # It can take up to the timeout to complete.
     def stop
-      deadline = Time.now + @options[:timeout]
+      deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @options[:timeout]
       @done = true
       @manager.quiet
@@ -49,7 +57,7 @@ module Sidekiq
       # Requeue everything in case there was a worker who grabbed work while stopped
       # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
-      strategy = (@options[:fetch] || Sidekiq::BasicFetch)
+      strategy = @options[:fetch]
       strategy.bulk_requeue([], @options)
       clear_heartbeat
@@ -61,104 +69,195 @@ module Sidekiq
     private unless $TESTING
-    JVM_RESERVED_SIGNALS = ['USR1', 'USR2'] # Don't Process#kill if we get these signals via the API
+    BEAT_PAUSE = 5
+    def start_heartbeat
+      loop do
+        heartbeat
+        sleep BEAT_PAUSE
+      end
+      Sidekiq.logger.info("Heartbeat stopping...")
+    end
+    def clear_heartbeat
+      # Remove record from Redis since we are shutting down.
+      # Note we don't stop the heartbeat thread; if the process
+      # doesn't actually exit, it'll reappear in the Web UI.
+      Sidekiq.redis do |conn|
+        conn.pipelined do
+          conn.srem("processes", identity)
+          conn.unlink("#{identity}:workers")
+        end
+      end
+    rescue
+      # best effort, ignore network errors
+    end
+    def heartbeat
+      $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
+      ❤
+    end
+    def self.flush_stats
+      fails = Processor::FAILURE.reset
+      procd = Processor::PROCESSED.reset
+      return if fails + procd == 0
-    def heartbeat(k, data, json)
-      results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, data) }
-      results.compact!
-      $0 = results.join(' ')
+      nowdate = Time.now.utc.strftime("%Y-%m-%d")
+      begin
+        Sidekiq.redis do |conn|
+          conn.pipelined do
+            conn.incrby("stat:processed", procd)
+            conn.incrby("stat:processed:#{nowdate}", procd)
+            conn.expire("stat:processed:#{nowdate}", STATS_TTL)
-      ❤(k, json)
+            conn.incrby("stat:failed", fails)
+            conn.incrby("stat:failed:#{nowdate}", fails)
+            conn.expire("stat:failed:#{nowdate}", STATS_TTL)
+          end
+        end
+      rescue => ex
+        # we're exiting the process, things might be shut down so don't
+        # try to handle the exception
+        Sidekiq.logger.warn("Unable to flush stats: #{ex}")
+      end
     end
+    at_exit(&method(:flush_stats))
-    def ❤(key, json)
+    def ❤
+      key = identity
       fails = procd = 0
       begin
-        Processor::FAILURE.update {|curr| fails = curr; 0 }
-        Processor::PROCESSED.update {|curr| procd = curr; 0 }
+        fails = Processor::FAILURE.reset
+        procd = Processor::PROCESSED.reset
+        curstate = Processor::WORKER_STATE.dup
+        workers_key = "#{key}:workers"
+        nowdate = Time.now.utc.strftime("%Y-%m-%d")
-        workers_key = "#{key}:workers".freeze
-        nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
         Sidekiq.redis do |conn|
           conn.multi do
-            conn.incrby("stat:processed".freeze, procd)
+            conn.incrby("stat:processed", procd)
             conn.incrby("stat:processed:#{nowdate}", procd)
-            conn.incrby("stat:failed".freeze, fails)
+            conn.expire("stat:processed:#{nowdate}", STATS_TTL)
+            conn.incrby("stat:failed", fails)
             conn.incrby("stat:failed:#{nowdate}", fails)
-            conn.del(workers_key)
-            Processor::WORKER_STATE.each_pair do |tid, hash|
+            conn.expire("stat:failed:#{nowdate}", STATS_TTL)
+            conn.unlink(workers_key)
+            curstate.each_pair do |tid, hash|
               conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
             end
             conn.expire(workers_key, 60)
           end
         end
+        rtt = check_rtt
         fails = procd = 0
+        kb = memory_usage(::Process.pid)
-        _, exists, _, _, msg = Sidekiq.redis do |conn|
-          conn.multi do
-            conn.sadd('processes', key)
-            conn.exists(key)
-            conn.hmset(key, 'info', json, 'busy', Processor::WORKER_STATE.size, 'beat', Time.now.to_f, 'quiet', @done)
+        _, exists, _, _, msg = Sidekiq.redis { |conn|
+          conn.multi {
+            conn.sadd("processes", key)
+            conn.exists?(key)
+            conn.hmset(key, "info", to_json,
+              "busy", curstate.size,
+              "beat", Time.now.to_f,
+              "rtt_us", rtt,
+              "quiet", @done,
+              "rss", kb)
             conn.expire(key, 60)
             conn.rpop("#{key}-signals")
-          end
-        end
+          }
+        }
         # first heartbeat or recovering from an outage and need to reestablish our heartbeat
-        fire_event(:heartbeat) if !exists
+        fire_event(:heartbeat) unless exists
         return unless msg
-        if JVM_RESERVED_SIGNALS.include?(msg)
-          Sidekiq::CLI.instance.handle_signal(msg)
-        else
-          ::Process.kill(msg, $$)
-        end
+        ::Process.kill(msg, ::Process.pid)
       rescue => e
         # ignore all redis/network issues
-        logger.error("heartbeat: #{e.message}")
+        logger.error("heartbeat: #{e}")
         # don't lose the counts if there was a network issue
-        Processor::PROCESSED.increment(procd)
-        Processor::FAILURE.increment(fails)
+        Processor::PROCESSED.incr(procd)
+        Processor::FAILURE.incr(fails)
       end
     end
-    def start_heartbeat
-      k = identity
-      data = {
-        'hostname' => hostname,
-        'started_at' => Time.now.to_f,
-        'pid' => $$,
-        'tag' => @options[:tag] || '',
-        'concurrency' => @options[:concurrency],
-        'queues' => @options[:queues].uniq,
-        'labels' => @options[:labels],
-        'identity' => k,
-      }
-      # this data doesn't change so dump it to a string
-      # now so we don't need to dump it every heartbeat.
-      json = Sidekiq.dump_json(data)
+    # We run the heartbeat every five seconds.
+    # Capture five samples of RTT, log a warning if each sample
+    # is above our warning threshold.
+    RTT_READINGS = RingBuffer.new(5)
+    RTT_WARNING_LEVEL = 50_000
-      while true
-        heartbeat(k, data, json)
-        sleep 5
+    def check_rtt
+      a = b = 0
+      Sidekiq.redis do |x|
+        a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
+        x.ping
+        b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
       end
-      Sidekiq.logger.info("Heartbeat stopping...")
+      rtt = b - a
+      RTT_READINGS << rtt
+      # Ideal RTT for Redis is < 1000µs
+      # Workable is < 10,000µs
+      # Log a warning if it's a disaster.
+      if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
+        Sidekiq.logger.warn <<~EOM
+          Your Redis network connection is performing extremely poorly.
+          Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
+          Ensure Redis is running in the same AZ or datacenter as Sidekiq.
+          If these values are close to 100,000, that means your Sidekiq process may be
+          CPU overloaded; see https://github.com/mperham/sidekiq/discussions/5039
+        EOM
+        RTT_READINGS.reset
+      end
+      rtt
     end
-    def clear_heartbeat
-      # Remove record from Redis since we are shutting down.
-      # Note we don't stop the heartbeat thread; if the process
-      # doesn't actually exit, it'll reappear in the Web UI.
-      Sidekiq.redis do |conn|
-        conn.pipelined do
-          conn.srem('processes', identity)
-          conn.del("#{identity}:workers")
+    MEMORY_GRABBER = case RUBY_PLATFORM
+    when /linux/
+      ->(pid) {
+        IO.readlines("/proc/#{$$}/status").each do |line|
+          next unless line.start_with?("VmRSS:")
+          break line.split[1].to_i
         end
-      end
-    rescue
-      # best effort, ignore network errors
+      }
+    when /darwin|bsd/
+      ->(pid) {
+        `ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
+      }
+    else
+      ->(pid) { 0 }
+    end
+    def memory_usage(pid)
+      MEMORY_GRABBER.call(pid)
+    end
+    def to_data
+      @data ||= {
+        "hostname" => hostname,
+        "started_at" => Time.now.to_f,
+        "pid" => ::Process.pid,
+        "tag" => @options[:tag] || "",
+        "concurrency" => @options[:concurrency],
+        "queues" => @options[:queues].uniq,
+        "labels" => @options[:labels],
+        "identity" => identity
+      }
     end
+    def to_json
+      # this data changes infrequently so dump it to a string
+      # now so we don't need to dump it every heartbeat.
+      @json ||= Sidekiq.dump_json(to_data)
+    end
   end
 end