sidekiq 6.5.1 → 7.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changes.md +142 -12
- data/README.md +40 -32
- data/bin/sidekiq +3 -8
- data/bin/sidekiqload +186 -118
- data/bin/sidekiqmon +3 -0
- data/lib/sidekiq/api.rb +226 -139
- data/lib/sidekiq/capsule.rb +127 -0
- data/lib/sidekiq/cli.rb +55 -61
- data/lib/sidekiq/client.rb +31 -18
- data/lib/sidekiq/component.rb +5 -1
- data/lib/sidekiq/config.rb +270 -0
- data/lib/sidekiq/deploy.rb +62 -0
- data/lib/sidekiq/embedded.rb +61 -0
- data/lib/sidekiq/fetch.rb +11 -14
- data/lib/sidekiq/job.rb +375 -10
- data/lib/sidekiq/job_logger.rb +2 -2
- data/lib/sidekiq/job_retry.rb +62 -41
- data/lib/sidekiq/job_util.rb +48 -14
- data/lib/sidekiq/launcher.rb +71 -65
- data/lib/sidekiq/logger.rb +1 -26
- data/lib/sidekiq/manager.rb +9 -11
- data/lib/sidekiq/metrics/query.rb +153 -0
- data/lib/sidekiq/metrics/shared.rb +95 -0
- data/lib/sidekiq/metrics/tracking.rb +136 -0
- data/lib/sidekiq/middleware/chain.rb +84 -48
- data/lib/sidekiq/middleware/current_attributes.rb +12 -17
- data/lib/sidekiq/monitor.rb +17 -4
- data/lib/sidekiq/paginator.rb +9 -1
- data/lib/sidekiq/processor.rb +27 -27
- data/lib/sidekiq/rails.rb +4 -9
- data/lib/sidekiq/redis_client_adapter.rb +8 -47
- data/lib/sidekiq/redis_connection.rb +11 -113
- data/lib/sidekiq/scheduled.rb +60 -33
- data/lib/sidekiq/testing.rb +5 -33
- data/lib/sidekiq/transaction_aware_client.rb +4 -5
- data/lib/sidekiq/version.rb +2 -1
- data/lib/sidekiq/web/action.rb +3 -3
- data/lib/sidekiq/web/application.rb +40 -9
- data/lib/sidekiq/web/csrf_protection.rb +1 -1
- data/lib/sidekiq/web/helpers.rb +32 -18
- data/lib/sidekiq/web.rb +7 -14
- data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
- data/lib/sidekiq.rb +76 -266
- data/sidekiq.gemspec +21 -10
- data/web/assets/javascripts/application.js +19 -1
- data/web/assets/javascripts/base-charts.js +106 -0
- data/web/assets/javascripts/chart.min.js +13 -0
- data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
- data/web/assets/javascripts/dashboard-charts.js +166 -0
- data/web/assets/javascripts/dashboard.js +3 -240
- data/web/assets/javascripts/metrics.js +264 -0
- data/web/assets/stylesheets/application-dark.css +4 -0
- data/web/assets/stylesheets/application-rtl.css +2 -91
- data/web/assets/stylesheets/application.css +65 -297
- data/web/locales/ar.yml +70 -70
- data/web/locales/cs.yml +62 -62
- data/web/locales/da.yml +60 -53
- data/web/locales/de.yml +65 -65
- data/web/locales/el.yml +43 -24
- data/web/locales/en.yml +82 -69
- data/web/locales/es.yml +68 -68
- data/web/locales/fa.yml +65 -65
- data/web/locales/fr.yml +67 -67
- data/web/locales/gd.yml +99 -0
- data/web/locales/he.yml +65 -64
- data/web/locales/hi.yml +59 -59
- data/web/locales/it.yml +53 -53
- data/web/locales/ja.yml +73 -68
- data/web/locales/ko.yml +52 -52
- data/web/locales/lt.yml +66 -66
- data/web/locales/nb.yml +61 -61
- data/web/locales/nl.yml +52 -52
- data/web/locales/pl.yml +45 -45
- data/web/locales/pt-br.yml +59 -69
- data/web/locales/pt.yml +51 -51
- data/web/locales/ru.yml +67 -66
- data/web/locales/sv.yml +53 -53
- data/web/locales/ta.yml +60 -60
- data/web/locales/uk.yml +62 -61
- data/web/locales/ur.yml +64 -64
- data/web/locales/vi.yml +67 -67
- data/web/locales/zh-cn.yml +43 -16
- data/web/locales/zh-tw.yml +42 -8
- data/web/views/_footer.erb +5 -2
- data/web/views/_job_info.erb +18 -2
- data/web/views/_metrics_period_select.erb +12 -0
- data/web/views/_nav.erb +1 -1
- data/web/views/_paging.erb +2 -0
- data/web/views/_poll_link.erb +1 -1
- data/web/views/busy.erb +43 -27
- data/web/views/dashboard.erb +36 -4
- data/web/views/metrics.erb +82 -0
- data/web/views/metrics_for_job.erb +68 -0
- data/web/views/morgue.erb +5 -9
- data/web/views/queue.erb +15 -15
- data/web/views/queues.erb +3 -1
- data/web/views/retries.erb +5 -9
- data/web/views/scheduled.erb +12 -13
- metadata +60 -27
- data/lib/sidekiq/.DS_Store +0 -0
- data/lib/sidekiq/delay.rb +0 -43
- data/lib/sidekiq/extensions/action_mailer.rb +0 -48
- data/lib/sidekiq/extensions/active_record.rb +0 -43
- data/lib/sidekiq/extensions/class_methods.rb +0 -43
- data/lib/sidekiq/extensions/generic_proxy.rb +0 -33
- data/lib/sidekiq/worker.rb +0 -367
- /data/{LICENSE → LICENSE.txt} +0 -0
data/lib/sidekiq/launcher.rb
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "sidekiq/manager"
|
|
4
|
-
require "sidekiq/
|
|
4
|
+
require "sidekiq/capsule"
|
|
5
5
|
require "sidekiq/scheduled"
|
|
6
6
|
require "sidekiq/ring_buffer"
|
|
7
7
|
|
|
8
8
|
module Sidekiq
|
|
9
|
-
# The Launcher starts the
|
|
9
|
+
# The Launcher starts the Capsule Managers, the Poller thread and provides the process heartbeat.
|
|
10
10
|
class Launcher
|
|
11
11
|
include Sidekiq::Component
|
|
12
12
|
|
|
@@ -16,48 +16,56 @@ module Sidekiq
|
|
|
16
16
|
proc { "sidekiq" },
|
|
17
17
|
proc { Sidekiq::VERSION },
|
|
18
18
|
proc { |me, data| data["tag"] },
|
|
19
|
-
proc { |me, data| "[#{Processor::WORK_STATE.size} of #{
|
|
19
|
+
proc { |me, data| "[#{Processor::WORK_STATE.size} of #{me.config.total_concurrency} busy]" },
|
|
20
20
|
proc { |me, data| "stopping" if me.stopping? }
|
|
21
21
|
]
|
|
22
22
|
|
|
23
|
-
attr_accessor :
|
|
23
|
+
attr_accessor :managers, :poller
|
|
24
24
|
|
|
25
|
-
def initialize(
|
|
26
|
-
@config =
|
|
27
|
-
|
|
28
|
-
@
|
|
29
|
-
|
|
25
|
+
def initialize(config, embedded: false)
|
|
26
|
+
@config = config
|
|
27
|
+
@embedded = embedded
|
|
28
|
+
@managers = config.capsules.values.map do |cap|
|
|
29
|
+
Sidekiq::Manager.new(cap)
|
|
30
|
+
end
|
|
31
|
+
@poller = Sidekiq::Scheduled::Poller.new(@config)
|
|
30
32
|
@done = false
|
|
31
33
|
end
|
|
32
34
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
# Start this Sidekiq instance. If an embedding process already
|
|
36
|
+
# has a heartbeat thread, caller can use `async_beat: false`
|
|
37
|
+
# and instead have thread call Launcher#heartbeat every N seconds.
|
|
38
|
+
def run(async_beat: true)
|
|
39
|
+
Sidekiq.freeze!
|
|
40
|
+
logger.debug { @config.merge!({}) }
|
|
41
|
+
@thread = safe_thread("heartbeat", &method(:start_heartbeat)) if async_beat
|
|
35
42
|
@poller.start
|
|
36
|
-
@
|
|
43
|
+
@managers.each(&:start)
|
|
37
44
|
end
|
|
38
45
|
|
|
39
46
|
# Stops this instance from processing any more jobs,
|
|
40
|
-
#
|
|
41
47
|
def quiet
|
|
48
|
+
return if @done
|
|
49
|
+
|
|
42
50
|
@done = true
|
|
43
|
-
@
|
|
51
|
+
@managers.each(&:quiet)
|
|
44
52
|
@poller.terminate
|
|
53
|
+
fire_event(:quiet, reverse: true)
|
|
45
54
|
end
|
|
46
55
|
|
|
47
56
|
# Shuts down this Sidekiq instance. Waits up to the deadline for all jobs to complete.
|
|
48
57
|
def stop
|
|
49
58
|
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @config[:timeout]
|
|
50
59
|
|
|
51
|
-
|
|
52
|
-
@
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
60
|
+
quiet
|
|
61
|
+
stoppers = @managers.map do |mgr|
|
|
62
|
+
Thread.new do
|
|
63
|
+
mgr.stop(deadline)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
56
66
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
strategy = @config[:fetch]
|
|
60
|
-
strategy.bulk_requeue([], @config)
|
|
67
|
+
fire_event(:shutdown, reverse: true)
|
|
68
|
+
stoppers.each(&:join)
|
|
61
69
|
|
|
62
70
|
clear_heartbeat
|
|
63
71
|
end
|
|
@@ -66,25 +74,39 @@ module Sidekiq
|
|
|
66
74
|
@done
|
|
67
75
|
end
|
|
68
76
|
|
|
77
|
+
# If embedding Sidekiq, you can have the process heartbeat
|
|
78
|
+
# call this method to regularly heartbeat rather than creating
|
|
79
|
+
# a separate thread.
|
|
80
|
+
def heartbeat
|
|
81
|
+
❤
|
|
82
|
+
end
|
|
83
|
+
|
|
69
84
|
private unless $TESTING
|
|
70
85
|
|
|
71
|
-
BEAT_PAUSE =
|
|
86
|
+
BEAT_PAUSE = 10
|
|
72
87
|
|
|
73
88
|
def start_heartbeat
|
|
74
89
|
loop do
|
|
75
|
-
|
|
90
|
+
beat
|
|
76
91
|
sleep BEAT_PAUSE
|
|
77
92
|
end
|
|
78
93
|
logger.info("Heartbeat stopping...")
|
|
79
94
|
end
|
|
80
95
|
|
|
96
|
+
def beat
|
|
97
|
+
$0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ") unless @embedded
|
|
98
|
+
❤
|
|
99
|
+
end
|
|
100
|
+
|
|
81
101
|
def clear_heartbeat
|
|
102
|
+
flush_stats
|
|
103
|
+
|
|
82
104
|
# Remove record from Redis since we are shutting down.
|
|
83
105
|
# Note we don't stop the heartbeat thread; if the process
|
|
84
106
|
# doesn't actually exit, it'll reappear in the Web UI.
|
|
85
107
|
redis do |conn|
|
|
86
108
|
conn.pipelined do |pipeline|
|
|
87
|
-
pipeline.srem("processes", identity)
|
|
109
|
+
pipeline.srem("processes", [identity])
|
|
88
110
|
pipeline.unlink("#{identity}:work")
|
|
89
111
|
end
|
|
90
112
|
end
|
|
@@ -92,20 +114,14 @@ module Sidekiq
|
|
|
92
114
|
# best effort, ignore network errors
|
|
93
115
|
end
|
|
94
116
|
|
|
95
|
-
def
|
|
96
|
-
$0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
|
|
97
|
-
|
|
98
|
-
❤
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
def self.flush_stats
|
|
117
|
+
def flush_stats
|
|
102
118
|
fails = Processor::FAILURE.reset
|
|
103
119
|
procd = Processor::PROCESSED.reset
|
|
104
120
|
return if fails + procd == 0
|
|
105
121
|
|
|
106
122
|
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
|
107
123
|
begin
|
|
108
|
-
|
|
124
|
+
redis do |conn|
|
|
109
125
|
conn.pipelined do |pipeline|
|
|
110
126
|
pipeline.incrby("stat:processed", procd)
|
|
111
127
|
pipeline.incrby("stat:processed:#{nowdate}", procd)
|
|
@@ -117,35 +133,19 @@ module Sidekiq
|
|
|
117
133
|
end
|
|
118
134
|
end
|
|
119
135
|
rescue => ex
|
|
120
|
-
|
|
121
|
-
# try to handle the exception
|
|
122
|
-
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
|
136
|
+
logger.warn("Unable to flush stats: #{ex}")
|
|
123
137
|
end
|
|
124
138
|
end
|
|
125
|
-
at_exit(&method(:flush_stats))
|
|
126
139
|
|
|
127
140
|
def ❤
|
|
128
141
|
key = identity
|
|
129
142
|
fails = procd = 0
|
|
130
143
|
|
|
131
144
|
begin
|
|
132
|
-
|
|
133
|
-
procd = Processor::PROCESSED.reset
|
|
134
|
-
curstate = Processor::WORK_STATE.dup
|
|
135
|
-
|
|
136
|
-
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
|
145
|
+
flush_stats
|
|
137
146
|
|
|
147
|
+
curstate = Processor::WORK_STATE.dup
|
|
138
148
|
redis do |conn|
|
|
139
|
-
conn.multi do |transaction|
|
|
140
|
-
transaction.incrby("stat:processed", procd)
|
|
141
|
-
transaction.incrby("stat:processed:#{nowdate}", procd)
|
|
142
|
-
transaction.expire("stat:processed:#{nowdate}", STATS_TTL)
|
|
143
|
-
|
|
144
|
-
transaction.incrby("stat:failed", fails)
|
|
145
|
-
transaction.incrby("stat:failed:#{nowdate}", fails)
|
|
146
|
-
transaction.expire("stat:failed:#{nowdate}", STATS_TTL)
|
|
147
|
-
end
|
|
148
|
-
|
|
149
149
|
# work is the current set of executing jobs
|
|
150
150
|
work_key = "#{key}:work"
|
|
151
151
|
conn.pipelined do |transaction|
|
|
@@ -162,10 +162,10 @@ module Sidekiq
|
|
|
162
162
|
fails = procd = 0
|
|
163
163
|
kb = memory_usage(::Process.pid)
|
|
164
164
|
|
|
165
|
-
_, exists, _, _,
|
|
165
|
+
_, exists, _, _, signal = redis { |conn|
|
|
166
166
|
conn.multi { |transaction|
|
|
167
|
-
transaction.sadd("processes", key)
|
|
168
|
-
transaction.exists
|
|
167
|
+
transaction.sadd("processes", [key])
|
|
168
|
+
transaction.exists(key)
|
|
169
169
|
transaction.hmset(key, "info", to_json,
|
|
170
170
|
"busy", curstate.size,
|
|
171
171
|
"beat", Time.now.to_f,
|
|
@@ -178,11 +178,10 @@ module Sidekiq
|
|
|
178
178
|
}
|
|
179
179
|
|
|
180
180
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
|
181
|
-
fire_event(:heartbeat) unless exists
|
|
181
|
+
fire_event(:heartbeat) unless exists > 0
|
|
182
|
+
fire_event(:beat, oneshot: false)
|
|
182
183
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
::Process.kill(msg, ::Process.pid)
|
|
184
|
+
::Process.kill(signal, ::Process.pid) if signal && !@embedded
|
|
186
185
|
rescue => e
|
|
187
186
|
# ignore all redis/network issues
|
|
188
187
|
logger.error("heartbeat: #{e}")
|
|
@@ -216,7 +215,7 @@ module Sidekiq
|
|
|
216
215
|
Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
|
|
217
216
|
Ensure Redis is running in the same AZ or datacenter as Sidekiq.
|
|
218
217
|
If these values are close to 100,000, that means your Sidekiq process may be
|
|
219
|
-
CPU-saturated; reduce your concurrency and/or see https://github.com/
|
|
218
|
+
CPU-saturated; reduce your concurrency and/or see https://github.com/sidekiq/sidekiq/discussions/5039
|
|
220
219
|
EOM
|
|
221
220
|
RTT_READINGS.reset
|
|
222
221
|
end
|
|
@@ -249,13 +248,20 @@ module Sidekiq
|
|
|
249
248
|
"started_at" => Time.now.to_f,
|
|
250
249
|
"pid" => ::Process.pid,
|
|
251
250
|
"tag" => @config[:tag] || "",
|
|
252
|
-
"concurrency" => @config
|
|
253
|
-
"queues" => @config
|
|
254
|
-
"
|
|
255
|
-
"
|
|
251
|
+
"concurrency" => @config.total_concurrency,
|
|
252
|
+
"queues" => @config.capsules.values.flat_map { |cap| cap.queues }.uniq,
|
|
253
|
+
"weights" => to_weights,
|
|
254
|
+
"labels" => @config[:labels].to_a,
|
|
255
|
+
"identity" => identity,
|
|
256
|
+
"version" => Sidekiq::VERSION,
|
|
257
|
+
"embedded" => @embedded
|
|
256
258
|
}
|
|
257
259
|
end
|
|
258
260
|
|
|
261
|
+
def to_weights
|
|
262
|
+
@config.capsules.values.map(&:weights)
|
|
263
|
+
end
|
|
264
|
+
|
|
259
265
|
def to_json
|
|
260
266
|
# this data changes infrequently so dump it to a string
|
|
261
267
|
# now so we don't need to dump it every heartbeat.
|
data/lib/sidekiq/logger.rb
CHANGED
|
@@ -31,7 +31,7 @@ module Sidekiq
|
|
|
31
31
|
"fatal" => 4
|
|
32
32
|
}
|
|
33
33
|
LEVELS.default_proc = proc do |_, level|
|
|
34
|
-
|
|
34
|
+
puts("Invalid log level: #{level.inspect}")
|
|
35
35
|
nil
|
|
36
36
|
end
|
|
37
37
|
|
|
@@ -70,36 +70,11 @@ module Sidekiq
|
|
|
70
70
|
ensure
|
|
71
71
|
self.local_level = old_local_level
|
|
72
72
|
end
|
|
73
|
-
|
|
74
|
-
# Redefined to check severity against #level, and thus the thread-local level, rather than +@level+.
|
|
75
|
-
# FIXME: Remove when the minimum Ruby version supports overriding Logger#level.
|
|
76
|
-
def add(severity, message = nil, progname = nil, &block)
|
|
77
|
-
severity ||= ::Logger::UNKNOWN
|
|
78
|
-
progname ||= @progname
|
|
79
|
-
|
|
80
|
-
return true if @logdev.nil? || severity < level
|
|
81
|
-
|
|
82
|
-
if message.nil?
|
|
83
|
-
if block
|
|
84
|
-
message = yield
|
|
85
|
-
else
|
|
86
|
-
message = progname
|
|
87
|
-
progname = @progname
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
@logdev.write format_message(format_severity(severity), Time.now, progname, message)
|
|
92
|
-
end
|
|
93
73
|
end
|
|
94
74
|
|
|
95
75
|
class Logger < ::Logger
|
|
96
76
|
include LoggingUtils
|
|
97
77
|
|
|
98
|
-
def initialize(*args, **kwargs)
|
|
99
|
-
super
|
|
100
|
-
self.formatter = Sidekiq.log_formatter
|
|
101
|
-
end
|
|
102
|
-
|
|
103
78
|
module Formatters
|
|
104
79
|
class Base < ::Logger::Formatter
|
|
105
80
|
def tid
|
data/lib/sidekiq/manager.rb
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "sidekiq/processor"
|
|
4
|
-
require "sidekiq/fetch"
|
|
5
4
|
require "set"
|
|
6
5
|
|
|
7
6
|
module Sidekiq
|
|
@@ -23,19 +22,19 @@ module Sidekiq
|
|
|
23
22
|
include Sidekiq::Component
|
|
24
23
|
|
|
25
24
|
attr_reader :workers
|
|
25
|
+
attr_reader :capsule
|
|
26
26
|
|
|
27
|
-
def initialize(
|
|
28
|
-
@config =
|
|
29
|
-
|
|
30
|
-
@count = options[:concurrency] || 10
|
|
27
|
+
def initialize(capsule)
|
|
28
|
+
@config = @capsule = capsule
|
|
29
|
+
@count = capsule.concurrency
|
|
31
30
|
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
|
32
31
|
|
|
33
32
|
@done = false
|
|
34
33
|
@workers = Set.new
|
|
34
|
+
@plock = Mutex.new
|
|
35
35
|
@count.times do
|
|
36
36
|
@workers << Processor.new(@config, &method(:processor_result))
|
|
37
37
|
end
|
|
38
|
-
@plock = Mutex.new
|
|
39
38
|
end
|
|
40
39
|
|
|
41
40
|
def start
|
|
@@ -46,14 +45,12 @@ module Sidekiq
|
|
|
46
45
|
return if @done
|
|
47
46
|
@done = true
|
|
48
47
|
|
|
49
|
-
logger.info { "Terminating quiet threads" }
|
|
48
|
+
logger.info { "Terminating quiet threads for #{capsule.name} capsule" }
|
|
50
49
|
@workers.each(&:terminate)
|
|
51
|
-
fire_event(:quiet, reverse: true)
|
|
52
50
|
end
|
|
53
51
|
|
|
54
52
|
def stop(deadline)
|
|
55
53
|
quiet
|
|
56
|
-
fire_event(:shutdown, reverse: true)
|
|
57
54
|
|
|
58
55
|
# some of the shutdown events can be async,
|
|
59
56
|
# we don't have any way to know when they're done but
|
|
@@ -66,6 +63,8 @@ module Sidekiq
|
|
|
66
63
|
return if @workers.empty?
|
|
67
64
|
|
|
68
65
|
hard_shutdown
|
|
66
|
+
ensure
|
|
67
|
+
capsule.stop
|
|
69
68
|
end
|
|
70
69
|
|
|
71
70
|
def processor_result(processor, reason = nil)
|
|
@@ -105,8 +104,7 @@ module Sidekiq
|
|
|
105
104
|
# contract says that jobs are run AT LEAST once. Process termination
|
|
106
105
|
# is delayed until we're certain the jobs are back in Redis because
|
|
107
106
|
# it is worse to lose a job than to run it twice.
|
|
108
|
-
|
|
109
|
-
strategy.bulk_requeue(jobs, @config)
|
|
107
|
+
capsule.fetcher.bulk_requeue(jobs)
|
|
110
108
|
end
|
|
111
109
|
|
|
112
110
|
cleanup.each do |processor|
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
require "sidekiq"
|
|
2
|
+
require "date"
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
require "sidekiq/metrics/shared"
|
|
6
|
+
|
|
7
|
+
module Sidekiq
|
|
8
|
+
module Metrics
|
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
|
11
|
+
# Redis and return a Hash of results.
|
|
12
|
+
#
|
|
13
|
+
# NB: all metrics and times/dates are UTC only. We specifically do not
|
|
14
|
+
# support timezones.
|
|
15
|
+
class Query
|
|
16
|
+
def initialize(pool: nil, now: Time.now)
|
|
17
|
+
@time = now.utc
|
|
18
|
+
@pool = pool || Sidekiq.default_configuration.redis_pool
|
|
19
|
+
@klass = nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Get metric data for all jobs from the last hour
|
|
23
|
+
def top_jobs(minutes: 60)
|
|
24
|
+
result = Result.new
|
|
25
|
+
|
|
26
|
+
time = @time
|
|
27
|
+
redis_results = @pool.with do |conn|
|
|
28
|
+
conn.pipelined do |pipe|
|
|
29
|
+
minutes.times do |idx|
|
|
30
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
|
31
|
+
pipe.hgetall key
|
|
32
|
+
result.prepend_bucket time
|
|
33
|
+
time -= 60
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
time = @time
|
|
39
|
+
redis_results.each do |hash|
|
|
40
|
+
hash.each do |k, v|
|
|
41
|
+
kls, metric = k.split("|")
|
|
42
|
+
result.job_results[kls].add_metric metric, time, v.to_i
|
|
43
|
+
end
|
|
44
|
+
time -= 60
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
|
48
|
+
|
|
49
|
+
result
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def for_job(klass, minutes: 60)
|
|
53
|
+
result = Result.new
|
|
54
|
+
|
|
55
|
+
time = @time
|
|
56
|
+
redis_results = @pool.with do |conn|
|
|
57
|
+
conn.pipelined do |pipe|
|
|
58
|
+
minutes.times do |idx|
|
|
59
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
|
60
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
|
61
|
+
result.prepend_bucket time
|
|
62
|
+
time -= 60
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
time = @time
|
|
68
|
+
@pool.with do |conn|
|
|
69
|
+
redis_results.each do |(ms, p, f)|
|
|
70
|
+
result.job_results[klass].add_metric "ms", time, ms.to_i if ms
|
|
71
|
+
result.job_results[klass].add_metric "p", time, p.to_i if p
|
|
72
|
+
result.job_results[klass].add_metric "f", time, f.to_i if f
|
|
73
|
+
result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time).reverse
|
|
74
|
+
time -= 60
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
|
79
|
+
|
|
80
|
+
result
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
class Result < Struct.new(:starts_at, :ends_at, :size, :buckets, :job_results, :marks)
|
|
84
|
+
def initialize
|
|
85
|
+
super
|
|
86
|
+
self.buckets = []
|
|
87
|
+
self.marks = []
|
|
88
|
+
self.job_results = Hash.new { |h, k| h[k] = JobResult.new }
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def prepend_bucket(time)
|
|
92
|
+
buckets.unshift time.strftime("%H:%M")
|
|
93
|
+
self.ends_at ||= time
|
|
94
|
+
self.starts_at = time
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
class JobResult < Struct.new(:series, :hist, :totals)
|
|
99
|
+
def initialize
|
|
100
|
+
super
|
|
101
|
+
self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
|
|
102
|
+
self.hist = Hash.new { |h, k| h[k] = [] }
|
|
103
|
+
self.totals = Hash.new(0)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def add_metric(metric, time, value)
|
|
107
|
+
totals[metric] += value
|
|
108
|
+
series[metric][time.strftime("%H:%M")] += value
|
|
109
|
+
|
|
110
|
+
# Include timing measurements in seconds for convenience
|
|
111
|
+
add_metric("s", time, value / 1000.0) if metric == "ms"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def add_hist(time, hist_result)
|
|
115
|
+
hist[time.strftime("%H:%M")] = hist_result
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def total_avg(metric = "ms")
|
|
119
|
+
completed = totals["p"] - totals["f"]
|
|
120
|
+
totals[metric].to_f / completed
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def series_avg(metric = "ms")
|
|
124
|
+
series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
|
|
125
|
+
completed = series.dig("p", bucket) - series.dig("f", bucket)
|
|
126
|
+
result[bucket] = (completed == 0) ? 0 : value.to_f / completed
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
class MarkResult < Struct.new(:time, :label)
|
|
132
|
+
def bucket
|
|
133
|
+
time.strftime("%H:%M")
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
private
|
|
138
|
+
|
|
139
|
+
def fetch_marks(time_range)
|
|
140
|
+
[].tap do |result|
|
|
141
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
|
142
|
+
|
|
143
|
+
marks.each do |timestamp, label|
|
|
144
|
+
time = Time.parse(timestamp)
|
|
145
|
+
if time_range.cover? time
|
|
146
|
+
result << MarkResult.new(time, label)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
require "concurrent"
|
|
2
|
+
|
|
3
|
+
module Sidekiq
|
|
4
|
+
module Metrics
|
|
5
|
+
# This is the only dependency on concurrent-ruby in Sidekiq but it's
|
|
6
|
+
# mandatory for thread-safety until MRI supports atomic operations on values.
|
|
7
|
+
Counter = ::Concurrent::AtomicFixnum
|
|
8
|
+
|
|
9
|
+
# Implements space-efficient but statistically useful histogram storage.
|
|
10
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
|
11
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
|
12
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
|
13
|
+
# NB: needs to be thread-safe or resiliant to races.
|
|
14
|
+
#
|
|
15
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
|
16
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
|
17
|
+
# than 1000 job/sec for a full minute of a specific type.
|
|
18
|
+
class Histogram
|
|
19
|
+
include Enumerable
|
|
20
|
+
|
|
21
|
+
# This number represents the maximum milliseconds for this bucket.
|
|
22
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
|
23
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
|
24
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
|
25
|
+
# not too concerned with its performance.
|
|
26
|
+
BUCKET_INTERVALS = [
|
|
27
|
+
20, 30, 45, 65, 100,
|
|
28
|
+
150, 225, 335, 500, 750,
|
|
29
|
+
1100, 1700, 2500, 3800, 5750,
|
|
30
|
+
8500, 13000, 20000, 30000, 45000,
|
|
31
|
+
65000, 100000, 150000, 225000, 335000,
|
|
32
|
+
1e20 # the "maybe your job is too long" bucket
|
|
33
|
+
].freeze
|
|
34
|
+
LABELS = [
|
|
35
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
|
36
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
|
37
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
|
38
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
|
39
|
+
"65s", "100s", "150s", "225s", "335s",
|
|
40
|
+
"Slow"
|
|
41
|
+
].freeze
|
|
42
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
|
43
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
|
44
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
|
45
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
|
46
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
|
47
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
|
48
|
+
GET u16 #24 GET u16 #25".split
|
|
49
|
+
HISTOGRAM_TTL = 8 * 60 * 60
|
|
50
|
+
|
|
51
|
+
def each
|
|
52
|
+
buckets.each { |counter| yield counter.value }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def label(idx)
|
|
56
|
+
LABELS[idx]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
attr_reader :buckets
|
|
60
|
+
def initialize(klass)
|
|
61
|
+
@klass = klass
|
|
62
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def record_time(ms)
|
|
66
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
|
67
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
@buckets[index_to_use].increment
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def fetch(conn, now = Time.now)
|
|
74
|
+
window = now.utc.strftime("%d-%H:%-M")
|
|
75
|
+
key = "#{@klass}-#{window}"
|
|
76
|
+
conn.bitfield(key, *FETCH)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def persist(conn, now = Time.now)
|
|
80
|
+
buckets, @buckets = @buckets, []
|
|
81
|
+
window = now.utc.strftime("%d-%H:%-M")
|
|
82
|
+
key = "#{@klass}-#{window}"
|
|
83
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
|
84
|
+
buckets.each_with_index do |counter, idx|
|
|
85
|
+
val = counter.value
|
|
86
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
|
90
|
+
conn.expire(key, HISTOGRAM_TTL)
|
|
91
|
+
key
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|