RubyGems - sidekiq - Versions diffs - 5.2.8 → 6.2.2 - Mend

sidekiq 5.2.8 → 6.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sidekiq might be problematic. Click here for more details.

Files changed (97) hide show

checksums.yaml +4 -4
data/Changes.md +248 -0
data/LICENSE +1 -1
data/README.md +18 -34
data/bin/sidekiq +26 -2
data/bin/sidekiqload +32 -24
data/bin/sidekiqmon +8 -0
data/lib/generators/sidekiq/templates/worker_test.rb.erb +1 -1
data/lib/generators/sidekiq/worker_generator.rb +21 -13
data/lib/sidekiq/api.rb +310 -249
data/lib/sidekiq/cli.rb +144 -180
data/lib/sidekiq/client.rb +64 -48
data/lib/sidekiq/delay.rb +5 -6
data/lib/sidekiq/exception_handler.rb +10 -12
data/lib/sidekiq/extensions/action_mailer.rb +13 -22
data/lib/sidekiq/extensions/active_record.rb +13 -10
data/lib/sidekiq/extensions/class_methods.rb +14 -11
data/lib/sidekiq/extensions/generic_proxy.rb +6 -4
data/lib/sidekiq/fetch.rb +38 -31
data/lib/sidekiq/job.rb +8 -0
data/lib/sidekiq/job_logger.rb +45 -7
data/lib/sidekiq/job_retry.rb +64 -67
data/lib/sidekiq/launcher.rb +146 -60
data/lib/sidekiq/logger.rb +166 -0
data/lib/sidekiq/manager.rb +11 -13
data/lib/sidekiq/middleware/chain.rb +20 -8
data/lib/sidekiq/middleware/i18n.rb +5 -7
data/lib/sidekiq/monitor.rb +133 -0
data/lib/sidekiq/paginator.rb +18 -14
data/lib/sidekiq/processor.rb +71 -70
data/lib/sidekiq/rails.rb +29 -37
data/lib/sidekiq/redis_connection.rb +50 -48
data/lib/sidekiq/scheduled.rb +35 -30
data/lib/sidekiq/sd_notify.rb +149 -0
data/lib/sidekiq/systemd.rb +24 -0
data/lib/sidekiq/testing/inline.rb +2 -1
data/lib/sidekiq/testing.rb +36 -27
data/lib/sidekiq/util.rb +45 -16
data/lib/sidekiq/version.rb +2 -1
data/lib/sidekiq/web/action.rb +15 -11
data/lib/sidekiq/web/application.rb +86 -76
data/lib/sidekiq/web/csrf_protection.rb +180 -0
data/lib/sidekiq/web/helpers.rb +114 -86
data/lib/sidekiq/web/router.rb +23 -19
data/lib/sidekiq/web.rb +61 -105
data/lib/sidekiq/worker.rb +126 -102
data/lib/sidekiq.rb +69 -44
data/sidekiq.gemspec +23 -16
data/web/assets/images/apple-touch-icon.png +0 -0
data/web/assets/javascripts/application.js +25 -27
data/web/assets/javascripts/dashboard.js +4 -23
data/web/assets/stylesheets/application-dark.css +147 -0
data/web/assets/stylesheets/application.css +37 -128
data/web/locales/ar.yml +8 -2
data/web/locales/de.yml +14 -2
data/web/locales/en.yml +5 -0
data/web/locales/es.yml +18 -2
data/web/locales/fr.yml +10 -3
data/web/locales/ja.yml +7 -1
data/web/locales/lt.yml +83 -0
data/web/locales/pl.yml +4 -4
data/web/locales/ru.yml +4 -0
data/web/locales/vi.yml +83 -0
data/web/views/_job_info.erb +3 -2
data/web/views/busy.erb +54 -20
data/web/views/dashboard.erb +14 -6
data/web/views/dead.erb +3 -3
data/web/views/layout.erb +2 -0
data/web/views/morgue.erb +9 -6
data/web/views/queue.erb +11 -2
data/web/views/queues.erb +10 -2
data/web/views/retries.erb +11 -8
data/web/views/retry.erb +3 -3
data/web/views/scheduled.erb +5 -2
metadata +32 -64
data/.circleci/config.yml +0 -61
data/.github/contributing.md +0 -32
data/.github/issue_template.md +0 -11
data/.gitignore +0 -15
data/.travis.yml +0 -11
data/3.0-Upgrade.md +0 -70
data/4.0-Upgrade.md +0 -53
data/5.0-Upgrade.md +0 -56
data/COMM-LICENSE +0 -97
data/Ent-Changes.md +0 -238
data/Gemfile +0 -23
data/Pro-2.0-Upgrade.md +0 -138
data/Pro-3.0-Upgrade.md +0 -44
data/Pro-4.0-Upgrade.md +0 -35
data/Pro-Changes.md +0 -759
data/Rakefile +0 -9
data/bin/sidekiqctl +0 -20
data/code_of_conduct.md +0 -50
data/lib/sidekiq/core_ext.rb +0 -1
data/lib/sidekiq/ctl.rb +0 -221
data/lib/sidekiq/logging.rb +0 -122
data/lib/sidekiq/middleware/server/active_record.rb +0 -23

data/lib/sidekiq/fetch.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
-require 'sidekiq'
+require "sidekiq"
 module Sidekiq
   class BasicFetch
@@ -7,68 +8,60 @@ module Sidekiq
     # can check if the process is shutting down.
     TIMEOUT = 2
-    UnitOfWork = Struct.new(:queue, :job) do
+    UnitOfWork = Struct.new(:queue, :job) {
       def acknowledge
         # nothing to do
       end
       def queue_name
-        queue.sub(/.*queue:/, '')
+        queue.delete_prefix("queue:")
       end
       def requeue
         Sidekiq.redis do |conn|
-          conn.rpush("queue:#{queue_name}", job)
+          conn.rpush(queue, job)
         end
       end
-    end
+    }
     def initialize(options)
-      @strictly_ordered_queues = !!options[:strict]
-      @queues = options[:queues].map { |q| "queue:#{q}" }
+      raise ArgumentError, "missing queue list" unless options[:queues]
+      @options = options
+      @strictly_ordered_queues = !!@options[:strict]
+      @queues = @options[:queues].map { |q| "queue:#{q}" }
       if @strictly_ordered_queues
-        @queues = @queues.uniq
+        @queues.uniq!
         @queues << TIMEOUT
       end
     end
     def retrieve_work
-      work = Sidekiq.redis { |conn| conn.brpop(*queues_cmd) }
-      UnitOfWork.new(*work) if work
-    end
-    # Creating the Redis#brpop command takes into account any
-    # configured queue weights. By default Redis#brpop returns
-    # data from the first queue that has pending elements. We
-    # recreate the queue command each time we invoke Redis#brpop
-    # to honor weights and avoid queue starvation.
-    def queues_cmd
-      if @strictly_ordered_queues
-        @queues
-      else
-        queues = @queues.shuffle.uniq
-        queues << TIMEOUT
-        queues
+      qs = queues_cmd
+      # 4825 Sidekiq Pro with all queues paused will return an
+      # empty set of queues with a trailing TIMEOUT value.
+      if qs.size <= 1
+        sleep(TIMEOUT)
+        return nil
       end
-    end
+      work = Sidekiq.redis { |conn| conn.brpop(*qs) }
+      UnitOfWork.new(*work) if work
+    end
-    # By leaving this as a class method, it can be pluggable and used by the Manager actor. Making it
-    # an instance method will make it async to the Fetcher actor
-    def self.bulk_requeue(inprogress, options)
+    def bulk_requeue(inprogress, options)
       return if inprogress.empty?
       Sidekiq.logger.debug { "Re-queueing terminated jobs" }
       jobs_to_requeue = {}
       inprogress.each do |unit_of_work|
-        jobs_to_requeue[unit_of_work.queue_name] ||= []
-        jobs_to_requeue[unit_of_work.queue_name] << unit_of_work.job
+        jobs_to_requeue[unit_of_work.queue] ||= []
+        jobs_to_requeue[unit_of_work.queue] << unit_of_work.job
       end
       Sidekiq.redis do |conn|
         conn.pipelined do
           jobs_to_requeue.each do |queue, jobs|
-            conn.rpush("queue:#{queue}", jobs)
+            conn.rpush(queue, jobs)
           end
         end
       end
@@ -77,5 +70,19 @@ module Sidekiq
       Sidekiq.logger.warn("Failed to requeue #{inprogress.size} jobs: #{ex.message}")
     end
+    # Creating the Redis#brpop command takes into account any
+    # configured queue weights. By default Redis#brpop returns
+    # data from the first queue that has pending elements. We
+    # recreate the queue command each time we invoke Redis#brpop
+    # to honor weights and avoid queue starvation.
+    def queues_cmd
+      if @strictly_ordered_queues
+        @queues
+      else
+        queues = @queues.shuffle!.uniq
+        queues << TIMEOUT
+        queues
+      end
+    end
   end
 end

data/lib/sidekiq/job.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require "sidekiq/worker"
+module Sidekiq
+  # Sidekiq::Job is a new alias for Sidekiq::Worker, coming in 6.3.0.
+  # You can opt into this by requiring 'sidekiq/job' in your initializer
+  # and then using `include Sidekiq::Job` rather than `Sidekiq::Worker`.
+  Job = Worker
+end

data/lib/sidekiq/job_logger.rb CHANGED Viewed

@@ -1,25 +1,63 @@
 # frozen_string_literal: true
 module Sidekiq
   class JobLogger
+    def initialize(logger = Sidekiq.logger)
+      @logger = logger
+    end
     def call(item, queue)
       start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
-      logger.info("start")
+      @logger.info("start")
       yield
-      logger.info("done: #{elapsed(start)} sec")
+      with_elapsed_time_context(start) do
+        @logger.info("done")
+      end
     rescue Exception
-      logger.info("fail: #{elapsed(start)} sec")
+      with_elapsed_time_context(start) do
+        @logger.info("fail")
+      end
       raise
     end
+    def prepare(job_hash, &block)
+      level = job_hash["log_level"]
+      if level
+        @logger.log_at(level) do
+          Sidekiq::Context.with(job_hash_context(job_hash), &block)
+        end
+      else
+        Sidekiq::Context.with(job_hash_context(job_hash), &block)
+      end
+    end
+    def job_hash_context(job_hash)
+      # If we're using a wrapper class, like ActiveJob, use the "wrapped"
+      # attribute to expose the underlying thing.
+      h = {
+        class: job_hash["display_class"] || job_hash["wrapped"] || job_hash["class"],
+        jid: job_hash["jid"]
+      }
+      h[:bid] = job_hash["bid"] if job_hash["bid"]
+      h[:tags] = job_hash["tags"] if job_hash["tags"]
+      h
+    end
+    def with_elapsed_time_context(start, &block)
+      Sidekiq::Context.with(elapsed_time_context(start), &block)
+    end
+    def elapsed_time_context(start)
+      {elapsed: elapsed(start).to_s}
+    end
     private
     def elapsed(start)
       (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start).round(3)
     end
-    def logger
-      Sidekiq.logger
-    end
   end
 end

data/lib/sidekiq/job_retry.rb CHANGED Viewed

@@ -1,6 +1,10 @@
 # frozen_string_literal: true
-require 'sidekiq/scheduled'
-require 'sidekiq/api'
+require "sidekiq/scheduled"
+require "sidekiq/api"
+require "zlib"
+require "base64"
 module Sidekiq
   ##
@@ -57,6 +61,7 @@ module Sidekiq
   #
   class JobRetry
     class Handled < ::RuntimeError; end
     class Skip < Handled; end
     include Sidekiq::Util
@@ -70,7 +75,7 @@ module Sidekiq
     # The global retry handler requires only the barest of data.
     # We want to be able to retry as much as possible so we don't
     # require the worker to be instantiated.
-    def global(msg, queue)
+    def global(jobstr, queue)
       yield
     rescue Handled => ex
       raise ex
@@ -81,22 +86,20 @@ module Sidekiq
       # ignore, will be pushed back onto queue during hard_shutdown
       raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
-      if msg['retry']
+      msg = Sidekiq.load_json(jobstr)
+      if msg["retry"]
         attempt_retry(nil, msg, queue, e)
       else
         Sidekiq.death_handlers.each do |handler|
-          begin
-            handler.call(msg, e)
-          rescue => handler_ex
-            handle_exception(handler_ex, { context: "Error calling death handler", job: msg })
-          end
+          handler.call(msg, e)
+        rescue => handler_ex
+          handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
         end
       end
       raise Handled
     end
     # The local retry support means that any errors that occur within
     # this block can be associated with the given worker instance.
     # This is required to support the `sidekiq_retries_exhausted` block.
@@ -105,7 +108,7 @@ module Sidekiq
     # exception so the global block does not reprocess the error.  The
     # Skip exception is unwrapped within Sidekiq::Processor#process before
     # calling the handle_exception handlers.
-    def local(worker, msg, queue)
+    def local(worker, jobstr, queue)
       yield
     rescue Handled => ex
       raise ex
@@ -116,11 +119,12 @@ module Sidekiq
       # ignore, will be pushed back onto queue during hard_shutdown
       raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
-      if msg['retry'] == nil
-        msg['retry'] = worker.class.get_sidekiq_options['retry']
+      msg = Sidekiq.load_json(jobstr)
+      if msg["retry"].nil?
+        msg["retry"] = worker.class.get_sidekiq_options["retry"]
       end
-      raise e unless msg['retry']
+      raise e unless msg["retry"]
       attempt_retry(worker, msg, queue, e)
       # We've handled this error associated with this job, don't
       # need to handle it at the global level
@@ -133,13 +137,9 @@ module Sidekiq
     # instantiate the worker instance.  All access must be guarded and
     # best effort.
     def attempt_retry(worker, msg, queue, exception)
-      max_retry_attempts = retry_attempts_from(msg['retry'], @max_retries)
+      max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
-      msg['queue'] = if msg['retry_queue']
-        msg['retry_queue']
-      else
-        queue
-      end
+      msg["queue"] = (msg["retry_queue"] || queue)
       m = exception_message(exception)
       if m.respond_to?(:scrub!)
@@ -147,32 +147,34 @@ module Sidekiq
         m.scrub!
       end
-      msg['error_message'] = m
-      msg['error_class'] = exception.class.name
-      count = if msg['retry_count']
-        msg['retried_at'] = Time.now.to_f
-        msg['retry_count'] += 1
+      msg["error_message"] = m
+      msg["error_class"] = exception.class.name
+      count = if msg["retry_count"]
+        msg["retried_at"] = Time.now.to_f
+        msg["retry_count"] += 1
       else
-        msg['failed_at'] = Time.now.to_f
-        msg['retry_count'] = 0
+        msg["failed_at"] = Time.now.to_f
+        msg["retry_count"] = 0
       end
-      if msg['backtrace'] == true
-        msg['error_backtrace'] = exception.backtrace
-      elsif !msg['backtrace']
-        # do nothing
-      elsif msg['backtrace'].to_i != 0
-        msg['error_backtrace'] = exception.backtrace[0...msg['backtrace'].to_i]
+      if msg["backtrace"]
+        lines = if msg["backtrace"] == true
+          exception.backtrace
+        else
+          exception.backtrace[0...msg["backtrace"].to_i]
+        end
+        msg["error_backtrace"] = compress_backtrace(lines)
       end
       if count < max_retry_attempts
         delay = delay_for(worker, count, exception)
         # Logging here can break retries if the logging device raises ENOSPC #3979
-        #logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
+        # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
         retry_at = Time.now.to_f + delay
         payload = Sidekiq.dump_json(msg)
         Sidekiq.redis do |conn|
-          conn.zadd('retry', retry_at.to_s, payload)
+          conn.zadd("retry", retry_at.to_s, payload)
         end
       else
         # Goodbye dear message, you (re)tried your best I'm sure.
@@ -182,25 +184,23 @@ module Sidekiq
     def retries_exhausted(worker, msg, exception)
       begin
-        block = worker && worker.sidekiq_retries_exhausted_block
-        block.call(msg, exception) if block
+        block = worker&.sidekiq_retries_exhausted_block
+        block&.call(msg, exception)
       rescue => e
-        handle_exception(e, { context: "Error calling retries_exhausted", job: msg })
+        handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
       end
+      send_to_morgue(msg) unless msg["dead"] == false
       Sidekiq.death_handlers.each do |handler|
-        begin
-          handler.call(msg, exception)
-        rescue => e
-          handle_exception(e, { context: "Error calling death handler", job: msg })
-        end
+        handler.call(msg, exception)
+      rescue => e
+        handle_exception(e, {context: "Error calling death handler", job: msg})
       end
-      send_to_morgue(msg) unless msg['dead'] == false
     end
     def send_to_morgue(msg)
-      logger.info { "Adding dead #{msg['class']} job #{msg['jid']}" }
+      logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
       payload = Sidekiq.dump_json(msg)
       DeadSet.new.kill(payload, notify_failure: false)
     end
@@ -214,25 +214,19 @@ module Sidekiq
     end
     def delay_for(worker, count, exception)
-      if worker && worker.sidekiq_retry_in_block
+      jitter = rand(10) * (count + 1)
+      if worker&.sidekiq_retry_in_block
         custom_retry_in = retry_in(worker, count, exception).to_i
-        return custom_retry_in if custom_retry_in > 0
+        return custom_retry_in + jitter if custom_retry_in > 0
       end
-      seconds_to_delay(count)
-    end
-    # delayed_job uses the same basic formula
-    def seconds_to_delay(count)
-      (count ** 4) + 15 + (rand(30)*(count+1))
+      (count**4) + 15 + jitter
     end
     def retry_in(worker, count, exception)
-      begin
-        worker.sidekiq_retry_in_block.call(count, exception)
-      rescue Exception => e
-        handle_exception(e, { context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default" })
-        nil
-      end
+      worker.sidekiq_retry_in_block.call(count, exception)
+    rescue Exception => e
+      handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
+      nil
     end
     def exception_caused_by_shutdown?(e, checked_causes = [])
@@ -249,14 +243,17 @@ module Sidekiq
     # Extract message from exception.
     # Set a default if the message raises an error
     def exception_message(exception)
-      begin
-        # App code can stuff all sorts of crazy binary data into the error message
-        # that won't convert to JSON.
-        exception.message.to_s[0, 10_000]
-      rescue
-        "!!! ERROR MESSAGE THREW AN ERROR !!!".dup
-      end
+      # App code can stuff all sorts of crazy binary data into the error message
+      # that won't convert to JSON.
+      exception.message.to_s[0, 10_000]
+    rescue
+      +"!!! ERROR MESSAGE THREW AN ERROR !!!"
     end
+    def compress_backtrace(backtrace)
+      serialized = Sidekiq.dump_json(backtrace)
+      compressed = Zlib::Deflate.deflate(serialized)
+      Base64.encode64(compressed)
+    end
   end
 end

data/lib/sidekiq/launcher.rb CHANGED Viewed

@@ -1,21 +1,28 @@
 # frozen_string_literal: true
-require 'sidekiq/manager'
-require 'sidekiq/fetch'
-require 'sidekiq/scheduled'
+require "sidekiq/manager"
+require "sidekiq/fetch"
+require "sidekiq/scheduled"
 module Sidekiq
-  # The Launcher is a very simple Actor whose job is to
-  # start, monitor and stop the core Actors in Sidekiq.
-  # If any of these actors die, the Sidekiq process exits
-  # immediately.
+  # The Launcher starts the Manager and Poller threads and provides the process heartbeat.
   class Launcher
     include Util
-    attr_accessor :manager, :poller, :fetcher
+    STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
-    STATS_TTL = 5*365*24*60*60
+    PROCTITLES = [
+      proc { "sidekiq" },
+      proc { Sidekiq::VERSION },
+      proc { |me, data| data["tag"] },
+      proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
+      proc { |me, data| "stopping" if me.stopping? }
+    ]
+    attr_accessor :manager, :poller, :fetcher
     def initialize(options)
+      options[:fetch] ||= BasicFetch.new(options)
       @manager = Sidekiq::Manager.new(options)
       @poller = Sidekiq::Scheduled::Poller.new
       @done = false
@@ -50,7 +57,7 @@ module Sidekiq
       # Requeue everything in case there was a worker who grabbed work while stopped
       # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
-      strategy = (@options[:fetch] || Sidekiq::BasicFetch)
+      strategy = @options[:fetch]
       strategy.bulk_requeue([], @options)
       clear_heartbeat
@@ -62,17 +69,64 @@ module Sidekiq
     private unless $TESTING
+    def start_heartbeat
+      loop do
+        heartbeat
+        sleep 5
+      end
+      Sidekiq.logger.info("Heartbeat stopping...")
+    end
+    def clear_heartbeat
+      # Remove record from Redis since we are shutting down.
+      # Note we don't stop the heartbeat thread; if the process
+      # doesn't actually exit, it'll reappear in the Web UI.
+      Sidekiq.redis do |conn|
+        conn.pipelined do
+          conn.srem("processes", identity)
+          conn.unlink("#{identity}:workers")
+        end
+      end
+    rescue
+      # best effort, ignore network errors
+    end
     def heartbeat
-      results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, to_data) }
-      results.compact!
-      $0 = results.join(' ')
+      $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
       ❤
     end
+    def self.flush_stats
+      fails = Processor::FAILURE.reset
+      procd = Processor::PROCESSED.reset
+      return if fails + procd == 0
+      nowdate = Time.now.utc.strftime("%Y-%m-%d")
+      begin
+        Sidekiq.redis do |conn|
+          conn.pipelined do
+            conn.incrby("stat:processed", procd)
+            conn.incrby("stat:processed:#{nowdate}", procd)
+            conn.expire("stat:processed:#{nowdate}", STATS_TTL)
+            conn.incrby("stat:failed", fails)
+            conn.incrby("stat:failed:#{nowdate}", fails)
+            conn.expire("stat:failed:#{nowdate}", STATS_TTL)
+          end
+        end
+      rescue => ex
+        # we're exiting the process, things might be shut down so don't
+        # try to handle the exception
+        Sidekiq.logger.warn("Unable to flush stats: #{ex}")
+      end
+    end
+    at_exit(&method(:flush_stats))
     def ❤
       key = identity
       fails = procd = 0
       begin
         fails = Processor::FAILURE.reset
         procd = Processor::PROCESSED.reset
@@ -80,6 +134,7 @@ module Sidekiq
         workers_key = "#{key}:workers"
         nowdate = Time.now.utc.strftime("%Y-%m-%d")
         Sidekiq.redis do |conn|
           conn.multi do
             conn.incrby("stat:processed", procd)
@@ -90,84 +145,115 @@ module Sidekiq
             conn.incrby("stat:failed:#{nowdate}", fails)
             conn.expire("stat:failed:#{nowdate}", STATS_TTL)
-            conn.del(workers_key)
+            conn.unlink(workers_key)
             curstate.each_pair do |tid, hash|
               conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
             end
             conn.expire(workers_key, 60)
           end
         end
+        rtt = check_rtt
         fails = procd = 0
+        kb = memory_usage(::Process.pid)
-        _, exists, _, _, msg = Sidekiq.redis do |conn|
-          conn.multi do
-            conn.sadd('processes', key)
-            conn.exists(key)
-            conn.hmset(key, 'info', to_json, 'busy', curstate.size, 'beat', Time.now.to_f, 'quiet', @done)
+        _, exists, _, _, msg = Sidekiq.redis { |conn|
+          conn.multi {
+            conn.sadd("processes", key)
+            conn.exists?(key)
+            conn.hmset(key, "info", to_json,
+              "busy", curstate.size,
+              "beat", Time.now.to_f,
+              "rtt_us", rtt,
+              "quiet", @done,
+              "rss", kb)
             conn.expire(key, 60)
             conn.rpop("#{key}-signals")
-          end
-        end
+          }
+        }
         # first heartbeat or recovering from an outage and need to reestablish our heartbeat
-        fire_event(:heartbeat) if !exists
+        fire_event(:heartbeat) unless exists
         return unless msg
-        ::Process.kill(msg, $$)
+        ::Process.kill(msg, ::Process.pid)
       rescue => e
         # ignore all redis/network issues
-        logger.error("heartbeat: #{e.message}")
+        logger.error("heartbeat: #{e}")
         # don't lose the counts if there was a network issue
         Processor::PROCESSED.incr(procd)
         Processor::FAILURE.incr(fails)
       end
     end
-    def start_heartbeat
-      while true
-        heartbeat
-        sleep 5
+    # We run the heartbeat every five seconds.
+    # Capture five samples of RTT, log a warning if each sample
+    # is above our warning threshold.
+    RTT_READINGS = RingBuffer.new(5)
+    RTT_WARNING_LEVEL = 50_000
+    def check_rtt
+      a = b = 0
+      Sidekiq.redis do |x|
+        a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
+        x.ping
+        b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
       end
-      Sidekiq.logger.info("Heartbeat stopping...")
+      rtt = b - a
+      RTT_READINGS << rtt
+      # Ideal RTT for Redis is < 1000µs
+      # Workable is < 10,000µs
+      # Log a warning if it's a disaster.
+      if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
+        Sidekiq.logger.warn <<~EOM
+          Your Redis network connection is performing extremely poorly.
+          Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
+          Ensure Redis is running in the same AZ or datacenter as Sidekiq.
+        EOM
+        RTT_READINGS.reset
+      end
+      rtt
     end
-    def to_data
-      @data ||= begin
-        {
-          'hostname' => hostname,
-          'started_at' => Time.now.to_f,
-          'pid' => $$,
-          'tag' => @options[:tag] || '',
-          'concurrency' => @options[:concurrency],
-          'queues' => @options[:queues].uniq,
-          'labels' => @options[:labels],
-          'identity' => identity,
-        }
-      end
+    MEMORY_GRABBER = case RUBY_PLATFORM
+    when /linux/
+      ->(pid) {
+        IO.readlines("/proc/#{$$}/status").each do |line|
+          next unless line.start_with?("VmRSS:")
+          break line.split[1].to_i
+        end
+      }
+    when /darwin|bsd/
+      ->(pid) {
+        `ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
+      }
+    else
+      ->(pid) { 0 }
     end
-    def to_json
-      @json ||= begin
-        # this data changes infrequently so dump it to a string
-        # now so we don't need to dump it every heartbeat.
-        Sidekiq.dump_json(to_data)
-      end
+    def memory_usage(pid)
+      MEMORY_GRABBER.call(pid)
     end
-    def clear_heartbeat
-      # Remove record from Redis since we are shutting down.
-      # Note we don't stop the heartbeat thread; if the process
-      # doesn't actually exit, it'll reappear in the Web UI.
-      Sidekiq.redis do |conn|
-        conn.pipelined do
-          conn.srem('processes', identity)
-          conn.del("#{identity}:workers")
-        end
-      end
-    rescue
-      # best effort, ignore network errors
+    def to_data
+      @data ||= {
+        "hostname" => hostname,
+        "started_at" => Time.now.to_f,
+        "pid" => ::Process.pid,
+        "tag" => @options[:tag] || "",
+        "concurrency" => @options[:concurrency],
+        "queues" => @options[:queues].uniq,
+        "labels" => @options[:labels],
+        "identity" => identity
+      }
     end
+    def to_json
+      # this data changes infrequently so dump it to a string
+      # now so we don't need to dump it every heartbeat.
+      @json ||= Sidekiq.dump_json(to_data)
+    end
   end
 end