sidekiq 6.5.1 → 7.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/Changes.md +142 -12
  3. data/README.md +40 -32
  4. data/bin/sidekiq +3 -8
  5. data/bin/sidekiqload +186 -118
  6. data/bin/sidekiqmon +3 -0
  7. data/lib/sidekiq/api.rb +226 -139
  8. data/lib/sidekiq/capsule.rb +127 -0
  9. data/lib/sidekiq/cli.rb +55 -61
  10. data/lib/sidekiq/client.rb +31 -18
  11. data/lib/sidekiq/component.rb +5 -1
  12. data/lib/sidekiq/config.rb +270 -0
  13. data/lib/sidekiq/deploy.rb +62 -0
  14. data/lib/sidekiq/embedded.rb +61 -0
  15. data/lib/sidekiq/fetch.rb +11 -14
  16. data/lib/sidekiq/job.rb +375 -10
  17. data/lib/sidekiq/job_logger.rb +2 -2
  18. data/lib/sidekiq/job_retry.rb +62 -41
  19. data/lib/sidekiq/job_util.rb +48 -14
  20. data/lib/sidekiq/launcher.rb +71 -65
  21. data/lib/sidekiq/logger.rb +1 -26
  22. data/lib/sidekiq/manager.rb +9 -11
  23. data/lib/sidekiq/metrics/query.rb +153 -0
  24. data/lib/sidekiq/metrics/shared.rb +95 -0
  25. data/lib/sidekiq/metrics/tracking.rb +136 -0
  26. data/lib/sidekiq/middleware/chain.rb +84 -48
  27. data/lib/sidekiq/middleware/current_attributes.rb +12 -17
  28. data/lib/sidekiq/monitor.rb +17 -4
  29. data/lib/sidekiq/paginator.rb +9 -1
  30. data/lib/sidekiq/processor.rb +27 -27
  31. data/lib/sidekiq/rails.rb +4 -9
  32. data/lib/sidekiq/redis_client_adapter.rb +8 -47
  33. data/lib/sidekiq/redis_connection.rb +11 -113
  34. data/lib/sidekiq/scheduled.rb +60 -33
  35. data/lib/sidekiq/testing.rb +5 -33
  36. data/lib/sidekiq/transaction_aware_client.rb +4 -5
  37. data/lib/sidekiq/version.rb +2 -1
  38. data/lib/sidekiq/web/action.rb +3 -3
  39. data/lib/sidekiq/web/application.rb +40 -9
  40. data/lib/sidekiq/web/csrf_protection.rb +1 -1
  41. data/lib/sidekiq/web/helpers.rb +32 -18
  42. data/lib/sidekiq/web.rb +7 -14
  43. data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
  44. data/lib/sidekiq.rb +76 -266
  45. data/sidekiq.gemspec +21 -10
  46. data/web/assets/javascripts/application.js +19 -1
  47. data/web/assets/javascripts/base-charts.js +106 -0
  48. data/web/assets/javascripts/chart.min.js +13 -0
  49. data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
  50. data/web/assets/javascripts/dashboard-charts.js +166 -0
  51. data/web/assets/javascripts/dashboard.js +3 -240
  52. data/web/assets/javascripts/metrics.js +264 -0
  53. data/web/assets/stylesheets/application-dark.css +4 -0
  54. data/web/assets/stylesheets/application-rtl.css +2 -91
  55. data/web/assets/stylesheets/application.css +65 -297
  56. data/web/locales/ar.yml +70 -70
  57. data/web/locales/cs.yml +62 -62
  58. data/web/locales/da.yml +60 -53
  59. data/web/locales/de.yml +65 -65
  60. data/web/locales/el.yml +43 -24
  61. data/web/locales/en.yml +82 -69
  62. data/web/locales/es.yml +68 -68
  63. data/web/locales/fa.yml +65 -65
  64. data/web/locales/fr.yml +67 -67
  65. data/web/locales/gd.yml +99 -0
  66. data/web/locales/he.yml +65 -64
  67. data/web/locales/hi.yml +59 -59
  68. data/web/locales/it.yml +53 -53
  69. data/web/locales/ja.yml +73 -68
  70. data/web/locales/ko.yml +52 -52
  71. data/web/locales/lt.yml +66 -66
  72. data/web/locales/nb.yml +61 -61
  73. data/web/locales/nl.yml +52 -52
  74. data/web/locales/pl.yml +45 -45
  75. data/web/locales/pt-br.yml +59 -69
  76. data/web/locales/pt.yml +51 -51
  77. data/web/locales/ru.yml +67 -66
  78. data/web/locales/sv.yml +53 -53
  79. data/web/locales/ta.yml +60 -60
  80. data/web/locales/uk.yml +62 -61
  81. data/web/locales/ur.yml +64 -64
  82. data/web/locales/vi.yml +67 -67
  83. data/web/locales/zh-cn.yml +43 -16
  84. data/web/locales/zh-tw.yml +42 -8
  85. data/web/views/_footer.erb +5 -2
  86. data/web/views/_job_info.erb +18 -2
  87. data/web/views/_metrics_period_select.erb +12 -0
  88. data/web/views/_nav.erb +1 -1
  89. data/web/views/_paging.erb +2 -0
  90. data/web/views/_poll_link.erb +1 -1
  91. data/web/views/busy.erb +43 -27
  92. data/web/views/dashboard.erb +36 -4
  93. data/web/views/metrics.erb +82 -0
  94. data/web/views/metrics_for_job.erb +68 -0
  95. data/web/views/morgue.erb +5 -9
  96. data/web/views/queue.erb +15 -15
  97. data/web/views/queues.erb +3 -1
  98. data/web/views/retries.erb +5 -9
  99. data/web/views/scheduled.erb +12 -13
  100. metadata +60 -27
  101. data/lib/sidekiq/.DS_Store +0 -0
  102. data/lib/sidekiq/delay.rb +0 -43
  103. data/lib/sidekiq/extensions/action_mailer.rb +0 -48
  104. data/lib/sidekiq/extensions/active_record.rb +0 -43
  105. data/lib/sidekiq/extensions/class_methods.rb +0 -43
  106. data/lib/sidekiq/extensions/generic_proxy.rb +0 -33
  107. data/lib/sidekiq/worker.rb +0 -367
  108. /data/{LICENSE → LICENSE.txt} +0 -0
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sidekiq/manager"
4
- require "sidekiq/fetch"
4
+ require "sidekiq/capsule"
5
5
  require "sidekiq/scheduled"
6
6
  require "sidekiq/ring_buffer"
7
7
 
8
8
  module Sidekiq
9
- # The Launcher starts the Manager and Poller threads and provides the process heartbeat.
9
+ # The Launcher starts the Capsule Managers, the Poller thread and provides the process heartbeat.
10
10
  class Launcher
11
11
  include Sidekiq::Component
12
12
 
@@ -16,48 +16,56 @@ module Sidekiq
16
16
  proc { "sidekiq" },
17
17
  proc { Sidekiq::VERSION },
18
18
  proc { |me, data| data["tag"] },
19
- proc { |me, data| "[#{Processor::WORK_STATE.size} of #{data["concurrency"]} busy]" },
19
+ proc { |me, data| "[#{Processor::WORK_STATE.size} of #{me.config.total_concurrency} busy]" },
20
20
  proc { |me, data| "stopping" if me.stopping? }
21
21
  ]
22
22
 
23
- attr_accessor :manager, :poller, :fetcher
23
+ attr_accessor :managers, :poller
24
24
 
25
- def initialize(options)
26
- @config = options
27
- options[:fetch] ||= BasicFetch.new(options)
28
- @manager = Sidekiq::Manager.new(options)
29
- @poller = Sidekiq::Scheduled::Poller.new(options)
25
+ def initialize(config, embedded: false)
26
+ @config = config
27
+ @embedded = embedded
28
+ @managers = config.capsules.values.map do |cap|
29
+ Sidekiq::Manager.new(cap)
30
+ end
31
+ @poller = Sidekiq::Scheduled::Poller.new(@config)
30
32
  @done = false
31
33
  end
32
34
 
33
- def run
34
- @thread = safe_thread("heartbeat", &method(:start_heartbeat))
35
+ # Start this Sidekiq instance. If an embedding process already
36
+ # has a heartbeat thread, caller can use `async_beat: false`
37
+ # and instead have thread call Launcher#heartbeat every N seconds.
38
+ def run(async_beat: true)
39
+ Sidekiq.freeze!
40
+ logger.debug { @config.merge!({}) }
41
+ @thread = safe_thread("heartbeat", &method(:start_heartbeat)) if async_beat
35
42
  @poller.start
36
- @manager.start
43
+ @managers.each(&:start)
37
44
  end
38
45
 
39
46
  # Stops this instance from processing any more jobs,
40
- #
41
47
  def quiet
48
+ return if @done
49
+
42
50
  @done = true
43
- @manager.quiet
51
+ @managers.each(&:quiet)
44
52
  @poller.terminate
53
+ fire_event(:quiet, reverse: true)
45
54
  end
46
55
 
47
56
  # Shuts down this Sidekiq instance. Waits up to the deadline for all jobs to complete.
48
57
  def stop
49
58
  deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @config[:timeout]
50
59
 
51
- @done = true
52
- @manager.quiet
53
- @poller.terminate
54
-
55
- @manager.stop(deadline)
60
+ quiet
61
+ stoppers = @managers.map do |mgr|
62
+ Thread.new do
63
+ mgr.stop(deadline)
64
+ end
65
+ end
56
66
 
57
- # Requeue everything in case there was a thread which fetched a job while the process was stopped.
58
- # This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
59
- strategy = @config[:fetch]
60
- strategy.bulk_requeue([], @config)
67
+ fire_event(:shutdown, reverse: true)
68
+ stoppers.each(&:join)
61
69
 
62
70
  clear_heartbeat
63
71
  end
@@ -66,25 +74,39 @@ module Sidekiq
66
74
  @done
67
75
  end
68
76
 
77
+ # If embedding Sidekiq, you can have the process heartbeat
78
+ # call this method to regularly heartbeat rather than creating
79
+ # a separate thread.
80
+ def heartbeat
81
+
82
+ end
83
+
69
84
  private unless $TESTING
70
85
 
71
- BEAT_PAUSE = 5
86
+ BEAT_PAUSE = 10
72
87
 
73
88
  def start_heartbeat
74
89
  loop do
75
- heartbeat
90
+ beat
76
91
  sleep BEAT_PAUSE
77
92
  end
78
93
  logger.info("Heartbeat stopping...")
79
94
  end
80
95
 
96
+ def beat
97
+ $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ") unless @embedded
98
+
99
+ end
100
+
81
101
  def clear_heartbeat
102
+ flush_stats
103
+
82
104
  # Remove record from Redis since we are shutting down.
83
105
  # Note we don't stop the heartbeat thread; if the process
84
106
  # doesn't actually exit, it'll reappear in the Web UI.
85
107
  redis do |conn|
86
108
  conn.pipelined do |pipeline|
87
- pipeline.srem("processes", identity)
109
+ pipeline.srem("processes", [identity])
88
110
  pipeline.unlink("#{identity}:work")
89
111
  end
90
112
  end
@@ -92,20 +114,14 @@ module Sidekiq
92
114
  # best effort, ignore network errors
93
115
  end
94
116
 
95
- def heartbeat
96
- $0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
97
-
98
-
99
- end
100
-
101
- def self.flush_stats
117
+ def flush_stats
102
118
  fails = Processor::FAILURE.reset
103
119
  procd = Processor::PROCESSED.reset
104
120
  return if fails + procd == 0
105
121
 
106
122
  nowdate = Time.now.utc.strftime("%Y-%m-%d")
107
123
  begin
108
- Sidekiq.redis do |conn|
124
+ redis do |conn|
109
125
  conn.pipelined do |pipeline|
110
126
  pipeline.incrby("stat:processed", procd)
111
127
  pipeline.incrby("stat:processed:#{nowdate}", procd)
@@ -117,35 +133,19 @@ module Sidekiq
117
133
  end
118
134
  end
119
135
  rescue => ex
120
- # we're exiting the process, things might be shut down so don't
121
- # try to handle the exception
122
- Sidekiq.logger.warn("Unable to flush stats: #{ex}")
136
+ logger.warn("Unable to flush stats: #{ex}")
123
137
  end
124
138
  end
125
- at_exit(&method(:flush_stats))
126
139
 
127
140
  def ❤
128
141
  key = identity
129
142
  fails = procd = 0
130
143
 
131
144
  begin
132
- fails = Processor::FAILURE.reset
133
- procd = Processor::PROCESSED.reset
134
- curstate = Processor::WORK_STATE.dup
135
-
136
- nowdate = Time.now.utc.strftime("%Y-%m-%d")
145
+ flush_stats
137
146
 
147
+ curstate = Processor::WORK_STATE.dup
138
148
  redis do |conn|
139
- conn.multi do |transaction|
140
- transaction.incrby("stat:processed", procd)
141
- transaction.incrby("stat:processed:#{nowdate}", procd)
142
- transaction.expire("stat:processed:#{nowdate}", STATS_TTL)
143
-
144
- transaction.incrby("stat:failed", fails)
145
- transaction.incrby("stat:failed:#{nowdate}", fails)
146
- transaction.expire("stat:failed:#{nowdate}", STATS_TTL)
147
- end
148
-
149
149
  # work is the current set of executing jobs
150
150
  work_key = "#{key}:work"
151
151
  conn.pipelined do |transaction|
@@ -162,10 +162,10 @@ module Sidekiq
162
162
  fails = procd = 0
163
163
  kb = memory_usage(::Process.pid)
164
164
 
165
- _, exists, _, _, msg = redis { |conn|
165
+ _, exists, _, _, signal = redis { |conn|
166
166
  conn.multi { |transaction|
167
- transaction.sadd("processes", key)
168
- transaction.exists?(key)
167
+ transaction.sadd("processes", [key])
168
+ transaction.exists(key)
169
169
  transaction.hmset(key, "info", to_json,
170
170
  "busy", curstate.size,
171
171
  "beat", Time.now.to_f,
@@ -178,11 +178,10 @@ module Sidekiq
178
178
  }
179
179
 
180
180
  # first heartbeat or recovering from an outage and need to reestablish our heartbeat
181
- fire_event(:heartbeat) unless exists
181
+ fire_event(:heartbeat) unless exists > 0
182
+ fire_event(:beat, oneshot: false)
182
183
 
183
- return unless msg
184
-
185
- ::Process.kill(msg, ::Process.pid)
184
+ ::Process.kill(signal, ::Process.pid) if signal && !@embedded
186
185
  rescue => e
187
186
  # ignore all redis/network issues
188
187
  logger.error("heartbeat: #{e}")
@@ -216,7 +215,7 @@ module Sidekiq
216
215
  Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
217
216
  Ensure Redis is running in the same AZ or datacenter as Sidekiq.
218
217
  If these values are close to 100,000, that means your Sidekiq process may be
219
- CPU-saturated; reduce your concurrency and/or see https://github.com/mperham/sidekiq/discussions/5039
218
+ CPU-saturated; reduce your concurrency and/or see https://github.com/sidekiq/sidekiq/discussions/5039
220
219
  EOM
221
220
  RTT_READINGS.reset
222
221
  end
@@ -249,13 +248,20 @@ module Sidekiq
249
248
  "started_at" => Time.now.to_f,
250
249
  "pid" => ::Process.pid,
251
250
  "tag" => @config[:tag] || "",
252
- "concurrency" => @config[:concurrency],
253
- "queues" => @config[:queues].uniq,
254
- "labels" => @config[:labels],
255
- "identity" => identity
251
+ "concurrency" => @config.total_concurrency,
252
+ "queues" => @config.capsules.values.flat_map { |cap| cap.queues }.uniq,
253
+ "weights" => to_weights,
254
+ "labels" => @config[:labels].to_a,
255
+ "identity" => identity,
256
+ "version" => Sidekiq::VERSION,
257
+ "embedded" => @embedded
256
258
  }
257
259
  end
258
260
 
261
+ def to_weights
262
+ @config.capsules.values.map(&:weights)
263
+ end
264
+
259
265
  def to_json
260
266
  # this data changes infrequently so dump it to a string
261
267
  # now so we don't need to dump it every heartbeat.
@@ -31,7 +31,7 @@ module Sidekiq
31
31
  "fatal" => 4
32
32
  }
33
33
  LEVELS.default_proc = proc do |_, level|
34
- Sidekiq.logger.warn("Invalid log level: #{level.inspect}")
34
+ puts("Invalid log level: #{level.inspect}")
35
35
  nil
36
36
  end
37
37
 
@@ -70,36 +70,11 @@ module Sidekiq
70
70
  ensure
71
71
  self.local_level = old_local_level
72
72
  end
73
-
74
- # Redefined to check severity against #level, and thus the thread-local level, rather than +@level+.
75
- # FIXME: Remove when the minimum Ruby version supports overriding Logger#level.
76
- def add(severity, message = nil, progname = nil, &block)
77
- severity ||= ::Logger::UNKNOWN
78
- progname ||= @progname
79
-
80
- return true if @logdev.nil? || severity < level
81
-
82
- if message.nil?
83
- if block
84
- message = yield
85
- else
86
- message = progname
87
- progname = @progname
88
- end
89
- end
90
-
91
- @logdev.write format_message(format_severity(severity), Time.now, progname, message)
92
- end
93
73
  end
94
74
 
95
75
  class Logger < ::Logger
96
76
  include LoggingUtils
97
77
 
98
- def initialize(*args, **kwargs)
99
- super
100
- self.formatter = Sidekiq.log_formatter
101
- end
102
-
103
78
  module Formatters
104
79
  class Base < ::Logger::Formatter
105
80
  def tid
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sidekiq/processor"
4
- require "sidekiq/fetch"
5
4
  require "set"
6
5
 
7
6
  module Sidekiq
@@ -23,19 +22,19 @@ module Sidekiq
23
22
  include Sidekiq::Component
24
23
 
25
24
  attr_reader :workers
25
+ attr_reader :capsule
26
26
 
27
- def initialize(options = {})
28
- @config = options
29
- logger.debug { options.inspect }
30
- @count = options[:concurrency] || 10
27
+ def initialize(capsule)
28
+ @config = @capsule = capsule
29
+ @count = capsule.concurrency
31
30
  raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
32
31
 
33
32
  @done = false
34
33
  @workers = Set.new
34
+ @plock = Mutex.new
35
35
  @count.times do
36
36
  @workers << Processor.new(@config, &method(:processor_result))
37
37
  end
38
- @plock = Mutex.new
39
38
  end
40
39
 
41
40
  def start
@@ -46,14 +45,12 @@ module Sidekiq
46
45
  return if @done
47
46
  @done = true
48
47
 
49
- logger.info { "Terminating quiet threads" }
48
+ logger.info { "Terminating quiet threads for #{capsule.name} capsule" }
50
49
  @workers.each(&:terminate)
51
- fire_event(:quiet, reverse: true)
52
50
  end
53
51
 
54
52
  def stop(deadline)
55
53
  quiet
56
- fire_event(:shutdown, reverse: true)
57
54
 
58
55
  # some of the shutdown events can be async,
59
56
  # we don't have any way to know when they're done but
@@ -66,6 +63,8 @@ module Sidekiq
66
63
  return if @workers.empty?
67
64
 
68
65
  hard_shutdown
66
+ ensure
67
+ capsule.stop
69
68
  end
70
69
 
71
70
  def processor_result(processor, reason = nil)
@@ -105,8 +104,7 @@ module Sidekiq
105
104
  # contract says that jobs are run AT LEAST once. Process termination
106
105
  # is delayed until we're certain the jobs are back in Redis because
107
106
  # it is worse to lose a job than to run it twice.
108
- strategy = @config[:fetch]
109
- strategy.bulk_requeue(jobs, @config)
107
+ capsule.fetcher.bulk_requeue(jobs)
110
108
  end
111
109
 
112
110
  cleanup.each do |processor|
@@ -0,0 +1,153 @@
1
+ require "sidekiq"
2
+ require "date"
3
+ require "set"
4
+
5
+ require "sidekiq/metrics/shared"
6
+
7
+ module Sidekiq
8
+ module Metrics
9
+ # Allows caller to query for Sidekiq execution metrics within Redis.
10
+ # Caller sets a set of attributes to act as filters. {#fetch} will call
11
+ # Redis and return a Hash of results.
12
+ #
13
+ # NB: all metrics and times/dates are UTC only. We specifically do not
14
+ # support timezones.
15
+ class Query
16
+ def initialize(pool: nil, now: Time.now)
17
+ @time = now.utc
18
+ @pool = pool || Sidekiq.default_configuration.redis_pool
19
+ @klass = nil
20
+ end
21
+
22
+ # Get metric data for all jobs from the last hour
23
+ def top_jobs(minutes: 60)
24
+ result = Result.new
25
+
26
+ time = @time
27
+ redis_results = @pool.with do |conn|
28
+ conn.pipelined do |pipe|
29
+ minutes.times do |idx|
30
+ key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
31
+ pipe.hgetall key
32
+ result.prepend_bucket time
33
+ time -= 60
34
+ end
35
+ end
36
+ end
37
+
38
+ time = @time
39
+ redis_results.each do |hash|
40
+ hash.each do |k, v|
41
+ kls, metric = k.split("|")
42
+ result.job_results[kls].add_metric metric, time, v.to_i
43
+ end
44
+ time -= 60
45
+ end
46
+
47
+ result.marks = fetch_marks(result.starts_at..result.ends_at)
48
+
49
+ result
50
+ end
51
+
52
+ def for_job(klass, minutes: 60)
53
+ result = Result.new
54
+
55
+ time = @time
56
+ redis_results = @pool.with do |conn|
57
+ conn.pipelined do |pipe|
58
+ minutes.times do |idx|
59
+ key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
60
+ pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
61
+ result.prepend_bucket time
62
+ time -= 60
63
+ end
64
+ end
65
+ end
66
+
67
+ time = @time
68
+ @pool.with do |conn|
69
+ redis_results.each do |(ms, p, f)|
70
+ result.job_results[klass].add_metric "ms", time, ms.to_i if ms
71
+ result.job_results[klass].add_metric "p", time, p.to_i if p
72
+ result.job_results[klass].add_metric "f", time, f.to_i if f
73
+ result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time).reverse
74
+ time -= 60
75
+ end
76
+ end
77
+
78
+ result.marks = fetch_marks(result.starts_at..result.ends_at)
79
+
80
+ result
81
+ end
82
+
83
+ class Result < Struct.new(:starts_at, :ends_at, :size, :buckets, :job_results, :marks)
84
+ def initialize
85
+ super
86
+ self.buckets = []
87
+ self.marks = []
88
+ self.job_results = Hash.new { |h, k| h[k] = JobResult.new }
89
+ end
90
+
91
+ def prepend_bucket(time)
92
+ buckets.unshift time.strftime("%H:%M")
93
+ self.ends_at ||= time
94
+ self.starts_at = time
95
+ end
96
+ end
97
+
98
+ class JobResult < Struct.new(:series, :hist, :totals)
99
+ def initialize
100
+ super
101
+ self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
102
+ self.hist = Hash.new { |h, k| h[k] = [] }
103
+ self.totals = Hash.new(0)
104
+ end
105
+
106
+ def add_metric(metric, time, value)
107
+ totals[metric] += value
108
+ series[metric][time.strftime("%H:%M")] += value
109
+
110
+ # Include timing measurements in seconds for convenience
111
+ add_metric("s", time, value / 1000.0) if metric == "ms"
112
+ end
113
+
114
+ def add_hist(time, hist_result)
115
+ hist[time.strftime("%H:%M")] = hist_result
116
+ end
117
+
118
+ def total_avg(metric = "ms")
119
+ completed = totals["p"] - totals["f"]
120
+ totals[metric].to_f / completed
121
+ end
122
+
123
+ def series_avg(metric = "ms")
124
+ series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
125
+ completed = series.dig("p", bucket) - series.dig("f", bucket)
126
+ result[bucket] = (completed == 0) ? 0 : value.to_f / completed
127
+ end
128
+ end
129
+ end
130
+
131
+ class MarkResult < Struct.new(:time, :label)
132
+ def bucket
133
+ time.strftime("%H:%M")
134
+ end
135
+ end
136
+
137
+ private
138
+
139
+ def fetch_marks(time_range)
140
+ [].tap do |result|
141
+ marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
142
+
143
+ marks.each do |timestamp, label|
144
+ time = Time.parse(timestamp)
145
+ if time_range.cover? time
146
+ result << MarkResult.new(time, label)
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,95 @@
1
+ require "concurrent"
2
+
3
+ module Sidekiq
4
+ module Metrics
5
+ # This is the only dependency on concurrent-ruby in Sidekiq but it's
6
+ # mandatory for thread-safety until MRI supports atomic operations on values.
7
+ Counter = ::Concurrent::AtomicFixnum
8
+
9
+ # Implements space-efficient but statistically useful histogram storage.
10
+ # A precise time histogram stores every time. Instead we break times into a set of
11
+ # known buckets and increment counts of the associated time bucket. Even if we call
12
+ # the histogram a million times, we'll still only store 26 buckets.
13
+ # NB: needs to be thread-safe or resiliant to races.
14
+ #
15
+ # To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
16
+ # per bucket per klass per minute. It's unlikely that most people will be executing more
17
+ # than 1000 job/sec for a full minute of a specific type.
18
+ class Histogram
19
+ include Enumerable
20
+
21
+ # This number represents the maximum milliseconds for this bucket.
22
+ # 20 means all job executions up to 20ms, e.g. if a job takes
23
+ # 280ms, it'll increment bucket[7]. Note we can track job executions
24
+ # up to about 5.5 minutes. After that, it's assumed you're probably
25
+ # not too concerned with its performance.
26
+ BUCKET_INTERVALS = [
27
+ 20, 30, 45, 65, 100,
28
+ 150, 225, 335, 500, 750,
29
+ 1100, 1700, 2500, 3800, 5750,
30
+ 8500, 13000, 20000, 30000, 45000,
31
+ 65000, 100000, 150000, 225000, 335000,
32
+ 1e20 # the "maybe your job is too long" bucket
33
+ ].freeze
34
+ LABELS = [
35
+ "20ms", "30ms", "45ms", "65ms", "100ms",
36
+ "150ms", "225ms", "335ms", "500ms", "750ms",
37
+ "1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
38
+ "8.5s", "13s", "20s", "30s", "45s",
39
+ "65s", "100s", "150s", "225s", "335s",
40
+ "Slow"
41
+ ].freeze
42
+ FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
43
+ GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
44
+ GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
45
+ GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
46
+ GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
47
+ GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
48
+ GET u16 #24 GET u16 #25".split
49
+ HISTOGRAM_TTL = 8 * 60 * 60
50
+
51
+ def each
52
+ buckets.each { |counter| yield counter.value }
53
+ end
54
+
55
+ def label(idx)
56
+ LABELS[idx]
57
+ end
58
+
59
+ attr_reader :buckets
60
+ def initialize(klass)
61
+ @klass = klass
62
+ @buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
63
+ end
64
+
65
+ def record_time(ms)
66
+ index_to_use = BUCKET_INTERVALS.each_index do |idx|
67
+ break idx if ms < BUCKET_INTERVALS[idx]
68
+ end
69
+
70
+ @buckets[index_to_use].increment
71
+ end
72
+
73
+ def fetch(conn, now = Time.now)
74
+ window = now.utc.strftime("%d-%H:%-M")
75
+ key = "#{@klass}-#{window}"
76
+ conn.bitfield(key, *FETCH)
77
+ end
78
+
79
+ def persist(conn, now = Time.now)
80
+ buckets, @buckets = @buckets, []
81
+ window = now.utc.strftime("%d-%H:%-M")
82
+ key = "#{@klass}-#{window}"
83
+ cmd = [key, "OVERFLOW", "SAT"]
84
+ buckets.each_with_index do |counter, idx|
85
+ val = counter.value
86
+ cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
87
+ end
88
+
89
+ conn.bitfield(*cmd) if cmd.size > 3
90
+ conn.expire(key, HISTOGRAM_TTL)
91
+ key
92
+ end
93
+ end
94
+ end
95
+ end