RubyGems - sidekiq - Versions diffs - 6.5.0 → 6.5.3 - Mend

sidekiq 6.5.0 → 6.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sidekiq might be problematic. Click here for more details.

Files changed (35) hide show

checksums.yaml +4 -4
data/Changes.md +22 -2
data/lib/sidekiq/api.rb +146 -27
data/lib/sidekiq/cli.rb +2 -1
data/lib/sidekiq/client.rb +1 -1
data/lib/sidekiq/component.rb +3 -2
data/lib/sidekiq/delay.rb +1 -1
data/lib/sidekiq/fetch.rb +1 -1
data/lib/sidekiq/job_retry.rb +46 -31
data/lib/sidekiq/launcher.rb +4 -2
data/lib/sidekiq/metrics/deploy.rb +47 -0
data/lib/sidekiq/metrics/query.rb +124 -0
data/lib/sidekiq/metrics/shared.rb +94 -0
data/lib/sidekiq/metrics/tracking.rb +134 -0
data/lib/sidekiq/middleware/chain.rb +70 -35
data/lib/sidekiq/middleware/current_attributes.rb +6 -4
data/lib/sidekiq/middleware/modules.rb +2 -0
data/lib/sidekiq/processor.rb +1 -1
data/lib/sidekiq/version.rb +1 -1
data/lib/sidekiq/web/application.rb +13 -0
data/lib/sidekiq/web/helpers.rb +24 -1
data/lib/sidekiq/web.rb +4 -0
data/lib/sidekiq.rb +9 -1
data/sidekiq.gemspec +1 -1
data/web/assets/javascripts/application.js +1 -1
data/web/assets/javascripts/dashboard.js +0 -17
data/web/assets/javascripts/graph.js +16 -0
data/web/locales/en.yml +4 -0
data/web/views/_nav.erb +1 -1
data/web/views/busy.erb +1 -1
data/web/views/dashboard.erb +1 -0
data/web/views/metrics.erb +59 -0
data/web/views/metrics_for_job.erb +92 -0
data/web/views/queue.erb +5 -1
metadata +11 -4

data/lib/sidekiq/job_retry.rb CHANGED Viewed

@@ -91,7 +91,7 @@ module Sidekiq
       msg = Sidekiq.load_json(jobstr)
       if msg["retry"]
-        attempt_retry(nil, msg, queue, e)
+        process_retry(nil, msg, queue, e)
       else
         Sidekiq.death_handlers.each do |handler|
           handler.call(msg, e)
@@ -128,7 +128,7 @@ module Sidekiq
       end
       raise e unless msg["retry"]
-      attempt_retry(jobinst, msg, queue, e)
+      process_retry(jobinst, msg, queue, e)
       # We've handled this error associated with this job, don't
       # need to handle it at the global level
       raise Skip
@@ -139,7 +139,7 @@ module Sidekiq
     # Note that +jobinst+ can be nil here if an error is raised before we can
     # instantiate the job instance.  All access must be guarded and
     # best effort.
-    def attempt_retry(jobinst, msg, queue, exception)
+    def process_retry(jobinst, msg, queue, exception)
       max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
       msg["queue"] = (msg["retry_queue"] || queue)
@@ -170,19 +170,50 @@ module Sidekiq
         msg["error_backtrace"] = compress_backtrace(lines)
       end
-      if count < max_retry_attempts
-        delay = delay_for(jobinst, count, exception)
-        # Logging here can break retries if the logging device raises ENOSPC #3979
-        # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
-        retry_at = Time.now.to_f + delay
-        payload = Sidekiq.dump_json(msg)
-        Sidekiq.redis do |conn|
-          conn.zadd("retry", retry_at.to_s, payload)
-        end
+      # Goodbye dear message, you (re)tried your best I'm sure.
+      return retries_exhausted(jobinst, msg, exception) if count >= max_retry_attempts
+      strategy, delay = delay_for(jobinst, count, exception)
+      case strategy
+      when :discard
+        return # poof!
+      when :kill
+        return retries_exhausted(jobinst, msg, exception)
+      end
+      # Logging here can break retries if the logging device raises ENOSPC #3979
+      # logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
+      jitter = rand(10) * (count + 1)
+      retry_at = Time.now.to_f + delay + jitter
+      payload = Sidekiq.dump_json(msg)
+      redis do |conn|
+        conn.zadd("retry", retry_at.to_s, payload)
+      end
+    end
+    # returns (strategy, seconds)
+    def delay_for(jobinst, count, exception)
+      rv = begin
+        # sidekiq_retry_in can return two different things:
+        # 1. When to retry next, as an integer of seconds
+        # 2. A symbol which re-routes the job elsewhere, e.g. :discard, :kill, :default
+        jobinst&.sidekiq_retry_in_block&.call(count, exception)
+      rescue Exception => e
+        handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
+        nil
+      end
+      delay = if Integer === rv && rv > 0
+        rv
+      elsif rv == :discard
+        return [:discard, nil] # do nothing, job goes poof
+      elsif rv == :kill
+        return [:kill, nil]
       else
-        # Goodbye dear message, you (re)tried your best I'm sure.
-        retries_exhausted(jobinst, msg, exception)
+        (count**4) + 15
       end
+      [:default, delay]
     end
     def retries_exhausted(jobinst, msg, exception)
@@ -195,7 +226,7 @@ module Sidekiq
       send_to_morgue(msg) unless msg["dead"] == false
-      Sidekiq.death_handlers.each do |handler|
+      config.death_handlers.each do |handler|
         handler.call(msg, exception)
       rescue => e
         handle_exception(e, {context: "Error calling death handler", job: msg})
@@ -216,22 +247,6 @@ module Sidekiq
       end
     end
-    def delay_for(jobinst, count, exception)
-      jitter = rand(10) * (count + 1)
-      if jobinst&.sidekiq_retry_in_block
-        custom_retry_in = retry_in(jobinst, count, exception).to_i
-        return custom_retry_in + jitter if custom_retry_in > 0
-      end
-      (count**4) + 15 + jitter
-    end
-    def retry_in(jobinst, count, exception)
-      jobinst.sidekiq_retry_in_block.call(count, exception)
-    rescue Exception => e
-      handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
-      nil
-    end
     def exception_caused_by_shutdown?(e, checked_causes = [])
       return false unless e.cause

data/lib/sidekiq/launcher.rb CHANGED Viewed

@@ -79,6 +79,8 @@ module Sidekiq
     end
     def clear_heartbeat
+      flush_stats
       # Remove record from Redis since we are shutting down.
       # Note we don't stop the heartbeat thread; if the process
       # doesn't actually exit, it'll reappear in the Web UI.
@@ -98,7 +100,7 @@ module Sidekiq
       ❤
     end
-    def self.flush_stats
+    def flush_stats
       fails = Processor::FAILURE.reset
       procd = Processor::PROCESSED.reset
       return if fails + procd == 0
@@ -122,7 +124,6 @@ module Sidekiq
         Sidekiq.logger.warn("Unable to flush stats: #{ex}")
       end
     end
-    at_exit(&method(:flush_stats))
     def ❤
       key = identity
@@ -179,6 +180,7 @@ module Sidekiq
         # first heartbeat or recovering from an outage and need to reestablish our heartbeat
         fire_event(:heartbeat) unless exists
+        fire_event(:beat, oneshot: false)
         return unless msg

data/lib/sidekiq/metrics/deploy.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require "sidekiq"
+require "date"
+# This file is designed to be required within the user's
+# deployment script; it should need a bare minimum of dependencies.
+#
+#   require "sidekiq/metrics/deploy"
+#   gitdesc = `git log -1 --format="%h %s"`.strip
+#   d = Sidekiq::Metrics::Deploy.new
+#   d.mark(label: gitdesc)
+#
+# Note that you cannot mark more than once per minute. This is a feature, not a bug.
+module Sidekiq
+  module Metrics
+    class Deploy
+      MARK_TTL = 90 * 24 * 60 * 60 # 90 days
+      def initialize(pool = Sidekiq.redis_pool)
+        @pool = pool
+      end
+      def mark(at: Time.now, label: "")
+        # we need to round the timestamp so that we gracefully
+        # handle an excepted common error in marking deploys:
+        # having every process mark its deploy, leading
+        # to N marks for each deploy. Instead we round the time
+        # to the minute so that multple marks within that minute
+        # will all naturally rollup into one mark per minute.
+        whence = at.utc
+        floor = Time.utc(whence.year, whence.month, whence.mday, whence.hour, whence.min, 0)
+        datecode = floor.strftime("%Y%m%d")
+        key = "#{datecode}-marks"
+        @pool.with do |c|
+          c.pipelined do |pipe|
+            pipe.hsetnx(key, floor.rfc3339, label)
+            pipe.expire(key, MARK_TTL)
+          end
+        end
+      end
+      def fetch(date = Time.now.utc.to_date)
+        datecode = date.strftime("%Y%m%d")
+        @pool.with { |c| c.hgetall("#{datecode}-marks") }
+      end
+    end
+  end
+end

data/lib/sidekiq/metrics/query.rb ADDED Viewed

@@ -0,0 +1,124 @@
+require "sidekiq"
+require "date"
+require "set"
+require "sidekiq/metrics/shared"
+module Sidekiq
+  module Metrics
+    # Allows caller to query for Sidekiq execution metrics within Redis.
+    # Caller sets a set of attributes to act as filters. {#fetch} will call
+    # Redis and return a Hash of results.
+    #
+    # NB: all metrics and times/dates are UTC only. We specifically do not
+    # support timezones.
+    class Query
+      # :hour, :day, :month
+      attr_accessor :period
+      # a specific job class, e.g. "App::OrderJob"
+      attr_accessor :klass
+      # the date specific to the period
+      # for :day or :hour, something like Date.today or Date.new(2022, 7, 13)
+      # for :month, Date.new(2022, 7, 1)
+      attr_accessor :date
+      # for period = :hour, the specific hour, integer e.g. 1 or 18
+      # note that hours and minutes do not have a leading zero so minute-specific
+      # keys will look like "j|20220718|7:3" for data at 07:03.
+      attr_accessor :hour
+      def initialize(pool: Sidekiq.redis_pool, now: Time.now)
+        @time = now.utc
+        @pool = pool
+        @klass = nil
+      end
+      # Get metric data from the last hour and roll it up
+      # into top processed count and execution time based on class.
+      def top_jobs
+        resultset = {}
+        resultset[:date] = @time.to_date
+        resultset[:period] = :hour
+        resultset[:ends_at] = @time
+        time = @time
+        results = @pool.with do |conn|
+          conn.pipelined do |pipe|
+            resultset[:size] = 60
+            60.times do |idx|
+              key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
+              pipe.hgetall key
+              time -= 60
+            end
+            resultset[:starts_at] = time
+          end
+        end
+        t = Hash.new(0)
+        klsset = Set.new
+        # merge the per-minute data into a totals hash for the hour
+        results.each do |hash|
+          hash.each { |k, v| t[k] = t[k] + v.to_i }
+          klsset.merge(hash.keys.map { |k| k.split("|")[0] })
+        end
+        resultset[:job_classes] = klsset.delete_if { |item| item.size < 3 }
+        resultset[:totals] = t
+        top = t.each_with_object({}) do |(k, v), memo|
+          (kls, metric) = k.split("|")
+          memo[metric] ||= Hash.new(0)
+          memo[metric][kls] = v
+        end
+        sorted = {}
+        top.each_pair do |metric, hash|
+          sorted[metric] = hash.sort_by { |k, v| v }.reverse.to_h
+        end
+        resultset[:top_classes] = sorted
+        resultset
+      end
+      def for_job(klass)
+        resultset = {}
+        resultset[:date] = @time.to_date
+        resultset[:period] = :hour
+        resultset[:ends_at] = @time
+        marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
+        time = @time
+        initial = @pool.with do |conn|
+          conn.pipelined do |pipe|
+            resultset[:size] = 60
+            60.times do |idx|
+              key = "j|#{time.strftime("%Y%m%d|%-H:%-M")}"
+              pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
+              time -= 60
+            end
+          end
+        end
+        time = @time
+        hist = Histogram.new(klass)
+        results = @pool.with do |conn|
+          initial.map do |(ms, p, f)|
+            tm = Time.utc(time.year, time.month, time.mday, time.hour, time.min, 0)
+            {
+              time: tm.iso8601,
+              epoch: tm.to_i,
+              ms: ms.to_i, p: p.to_i, f: f.to_i, hist: hist.fetch(conn, time)
+            }.tap { |x|
+              x[:mark] = marks[x[:time]] if marks[x[:time]]
+              time -= 60
+            }
+          end
+        end
+        resultset[:marks] = marks
+        resultset[:starts_at] = time
+        resultset[:data] = results
+        resultset
+      end
+    end
+  end
+end

data/lib/sidekiq/metrics/shared.rb ADDED Viewed

@@ -0,0 +1,94 @@
+require "concurrent"
+module Sidekiq
+  module Metrics
+    # TODO Support apps without concurrent-ruby
+    Counter = ::Concurrent::AtomicFixnum
+    # Implements space-efficient but statistically useful histogram storage.
+    # A precise time histogram stores every time. Instead we break times into a set of
+    # known buckets and increment counts of the associated time bucket. Even if we call
+    # the histogram a million times, we'll still only store 26 buckets.
+    # NB: needs to be thread-safe or resiliant to races.
+    #
+    # To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
+    # per bucket per klass per minute. It's unlikely that most people will be executing more
+    # than 1000 job/sec for a full minute of a specific type.
+    class Histogram
+      include Enumerable
+      # This number represents the maximum milliseconds for this bucket.
+      # 20 means all job executions up to 20ms, e.g. if a job takes
+      # 280ms, it'll increment bucket[7]. Note we can track job executions
+      # up to about 5.5 minutes. After that, it's assumed you're probably
+      # not too concerned with its performance.
+      BUCKET_INTERVALS = [
+        20, 30, 45, 65, 100,
+        150, 225, 335, 500, 750,
+        1100, 1700, 2500, 3800, 5750,
+        8500, 13000, 20000, 30000, 45000,
+        65000, 100000, 150000, 225000, 335000,
+        Float::INFINITY # the "maybe your job is too long" bucket
+      ]
+      LABELS = [
+        "20ms", "30ms", "45ms", "65ms", "100ms",
+        "150ms", "225ms", "335ms", "500ms", "750ms",
+        "1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
+        "8.5s", "13s", "20s", "30s", "45s",
+        "65s", "100s", "150s", "225s", "335s",
+        "Slow"
+      ]
+      FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
+        GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
+        GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
+        GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
+        GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
+        GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
+        GET u16 #24 GET u16 #25".split
+      def each
+        buckets.each { |counter| yield counter.value }
+      end
+      def label(idx)
+        LABELS[idx]
+      end
+      attr_reader :buckets
+      def initialize(klass)
+        @klass = klass
+        @buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
+      end
+      def record_time(ms)
+        index_to_use = BUCKET_INTERVALS.each_index do |idx|
+          break idx if ms < BUCKET_INTERVALS[idx]
+        end
+        @buckets[index_to_use].increment
+      end
+      def fetch(conn, now = Time.now)
+        window = now.utc.strftime("%d-%H:%-M")
+        key = "#{@klass}-#{window}"
+        conn.bitfield(key, *FETCH)
+      end
+      def persist(conn, now = Time.now)
+        buckets, @buckets = @buckets, []
+        window = now.utc.strftime("%d-%H:%-M")
+        key = "#{@klass}-#{window}"
+        cmd = [key, "OVERFLOW", "SAT"]
+        buckets.each_with_index do |counter, idx|
+          val = counter.value
+          cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
+        end
+        conn.bitfield(*cmd) if cmd.size > 3
+        conn.expire(key, 86400)
+        key
+      end
+    end
+  end
+end

data/lib/sidekiq/metrics/tracking.rb ADDED Viewed

@@ -0,0 +1,134 @@
+require "time"
+require "sidekiq"
+require "sidekiq/metrics/shared"
+# This file contains the components which track execution metrics within Sidekiq.
+module Sidekiq
+  module Metrics
+    class ExecutionTracker
+      include Sidekiq::Component
+      def initialize(config)
+        @config = config
+        @jobs = Hash.new(0)
+        @totals = Hash.new(0)
+        @grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
+        @lock = Mutex.new
+      end
+      def track(queue, klass)
+        start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
+        time_ms = 0
+        begin
+          begin
+            yield
+          ensure
+            finish = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
+            time_ms = finish - start
+          end
+          # We don't track time for failed jobs as they can have very unpredictable
+          # execution times. more important to know average time for successful jobs so we
+          # can better recognize when a perf regression is introduced.
+          @lock.synchronize {
+            @grams[klass].record_time(time_ms)
+            @jobs["#{klass}|ms"] += time_ms
+            @totals["ms"] += time_ms
+          }
+        rescue Exception
+          @lock.synchronize {
+            @jobs["#{klass}|f"] += 1
+            @totals["f"] += 1
+          }
+          raise
+        ensure
+          @lock.synchronize {
+            @jobs["#{klass}|p"] += 1
+            @totals["p"] += 1
+          }
+        end
+      end
+      LONG_TERM = 90 * 24 * 60 * 60
+      MID_TERM = 7 * 24 * 60 * 60
+      SHORT_TERM = 8 * 60 * 60
+      def flush(time = Time.now)
+        totals, jobs, grams = reset
+        procd = totals["p"]
+        fails = totals["f"]
+        return if procd == 0 && fails == 0
+        now = time.utc
+        nowdate = now.strftime("%Y%m%d")
+        nowhour = now.strftime("%Y%m%d|%-H")
+        nowmin = now.strftime("%Y%m%d|%-H:%-M")
+        count = 0
+        redis do |conn|
+          if grams.size > 0
+            conn.pipelined do |pipe|
+              grams.each do |_, gram|
+                gram.persist(pipe, now)
+              end
+            end
+          end
+          [
+            ["j", jobs, nowdate, LONG_TERM],
+            ["j", jobs, nowhour, MID_TERM],
+            ["j", jobs, nowmin, SHORT_TERM]
+          ].each do |prefix, data, bucket, ttl|
+            # Quietly seed the new 7.0 stats format so migration is painless.
+            conn.pipelined do |xa|
+              stats = "#{prefix}|#{bucket}"
+              # logger.debug "Flushing metrics #{stats}"
+              data.each_pair do |key, value|
+                xa.hincrby stats, key, value
+                count += 1
+              end
+              xa.expire(stats, ttl)
+            end
+          end
+          logger.info "Flushed #{count} metrics"
+          count
+        end
+      end
+      private
+      def reset
+        @lock.synchronize {
+          array = [@totals, @jobs, @grams]
+          @totals = Hash.new(0)
+          @jobs = Hash.new(0)
+          @grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
+          array
+        }
+      end
+    end
+    class Middleware
+      include Sidekiq::ServerMiddleware
+      def initialize(options)
+        @exec = options
+      end
+      def call(_instance, hash, queue, &block)
+        @exec.track(queue, hash["wrapped"] || hash["class"], &block)
+      end
+    end
+  end
+end
+if ENV["SIDEKIQ_METRICS_BETA"] == "1"
+  Sidekiq.configure_server do |config|
+    exec = Sidekiq::Metrics::ExecutionTracker.new(config)
+    config.server_middleware do |chain|
+      chain.add Sidekiq::Metrics::Middleware, exec
+    end
+    config.on(:beat) do
+      exec.flush
+    end
+  end
+end