sidekiq 6.4.2 → 6.5.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Changes.md +35 -0
- data/bin/sidekiqload +15 -3
- data/lib/sidekiq/api.rb +160 -32
- data/lib/sidekiq/cli.rb +34 -32
- data/lib/sidekiq/client.rb +4 -4
- data/lib/sidekiq/component.rb +65 -0
- data/lib/sidekiq/delay.rb +1 -1
- data/lib/sidekiq/fetch.rb +16 -14
- data/lib/sidekiq/job_retry.rb +50 -34
- data/lib/sidekiq/job_util.rb +7 -3
- data/lib/sidekiq/launcher.rb +22 -19
- data/lib/sidekiq/logger.rb +1 -1
- data/lib/sidekiq/manager.rb +23 -20
- data/lib/sidekiq/metrics/deploy.rb +47 -0
- data/lib/sidekiq/metrics/query.rb +124 -0
- data/lib/sidekiq/metrics/shared.rb +94 -0
- data/lib/sidekiq/metrics/tracking.rb +134 -0
- data/lib/sidekiq/middleware/chain.rb +82 -38
- data/lib/sidekiq/middleware/current_attributes.rb +10 -4
- data/lib/sidekiq/middleware/i18n.rb +2 -0
- data/lib/sidekiq/middleware/modules.rb +21 -0
- data/lib/sidekiq/paginator.rb +2 -2
- data/lib/sidekiq/processor.rb +13 -13
- data/lib/sidekiq/rails.rb +5 -5
- data/lib/sidekiq/redis_client_adapter.rb +154 -0
- data/lib/sidekiq/redis_connection.rb +80 -47
- data/lib/sidekiq/ring_buffer.rb +29 -0
- data/lib/sidekiq/scheduled.rb +11 -10
- data/lib/sidekiq/testing.rb +1 -1
- data/lib/sidekiq/transaction_aware_client.rb +45 -0
- data/lib/sidekiq/version.rb +1 -1
- data/lib/sidekiq/web/application.rb +13 -0
- data/lib/sidekiq/web/helpers.rb +25 -2
- data/lib/sidekiq/web.rb +4 -0
- data/lib/sidekiq/worker.rb +2 -1
- data/lib/sidekiq.rb +87 -18
- data/sidekiq.gemspec +1 -1
- data/web/assets/javascripts/application.js +1 -1
- data/web/assets/javascripts/dashboard.js +0 -17
- data/web/assets/javascripts/graph.js +16 -0
- data/web/locales/en.yml +4 -0
- data/web/locales/pt-br.yml +27 -9
- data/web/views/_nav.erb +1 -1
- data/web/views/busy.erb +1 -1
- data/web/views/dashboard.erb +1 -0
- data/web/views/metrics.erb +59 -0
- data/web/views/metrics_for_job.erb +92 -0
- data/web/views/queue.erb +5 -1
- metadata +16 -6
- data/lib/sidekiq/exception_handler.rb +0 -27
- data/lib/sidekiq/util.rb +0 -108
data/lib/sidekiq/launcher.rb
CHANGED
@@ -3,11 +3,12 @@
|
|
3
3
|
require "sidekiq/manager"
|
4
4
|
require "sidekiq/fetch"
|
5
5
|
require "sidekiq/scheduled"
|
6
|
+
require "sidekiq/ring_buffer"
|
6
7
|
|
7
8
|
module Sidekiq
|
8
9
|
# The Launcher starts the Manager and Poller threads and provides the process heartbeat.
|
9
10
|
class Launcher
|
10
|
-
include
|
11
|
+
include Sidekiq::Component
|
11
12
|
|
12
13
|
STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
|
13
14
|
|
@@ -22,11 +23,11 @@ module Sidekiq
|
|
22
23
|
attr_accessor :manager, :poller, :fetcher
|
23
24
|
|
24
25
|
def initialize(options)
|
26
|
+
@config = options
|
25
27
|
options[:fetch] ||= BasicFetch.new(options)
|
26
28
|
@manager = Sidekiq::Manager.new(options)
|
27
|
-
@poller = Sidekiq::Scheduled::Poller.new
|
29
|
+
@poller = Sidekiq::Scheduled::Poller.new(options)
|
28
30
|
@done = false
|
29
|
-
@options = options
|
30
31
|
end
|
31
32
|
|
32
33
|
def run
|
@@ -45,7 +46,7 @@ module Sidekiq
|
|
45
46
|
|
46
47
|
# Shuts down this Sidekiq instance. Waits up to the deadline for all jobs to complete.
|
47
48
|
def stop
|
48
|
-
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @
|
49
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @config[:timeout]
|
49
50
|
|
50
51
|
@done = true
|
51
52
|
@manager.quiet
|
@@ -55,8 +56,8 @@ module Sidekiq
|
|
55
56
|
|
56
57
|
# Requeue everything in case there was a thread which fetched a job while the process was stopped.
|
57
58
|
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
58
|
-
strategy = @
|
59
|
-
strategy.bulk_requeue([], @
|
59
|
+
strategy = @config[:fetch]
|
60
|
+
strategy.bulk_requeue([], @config)
|
60
61
|
|
61
62
|
clear_heartbeat
|
62
63
|
end
|
@@ -74,14 +75,16 @@ module Sidekiq
|
|
74
75
|
heartbeat
|
75
76
|
sleep BEAT_PAUSE
|
76
77
|
end
|
77
|
-
|
78
|
+
logger.info("Heartbeat stopping...")
|
78
79
|
end
|
79
80
|
|
80
81
|
def clear_heartbeat
|
82
|
+
flush_stats
|
83
|
+
|
81
84
|
# Remove record from Redis since we are shutting down.
|
82
85
|
# Note we don't stop the heartbeat thread; if the process
|
83
86
|
# doesn't actually exit, it'll reappear in the Web UI.
|
84
|
-
|
87
|
+
redis do |conn|
|
85
88
|
conn.pipelined do |pipeline|
|
86
89
|
pipeline.srem("processes", identity)
|
87
90
|
pipeline.unlink("#{identity}:work")
|
@@ -97,7 +100,7 @@ module Sidekiq
|
|
97
100
|
❤
|
98
101
|
end
|
99
102
|
|
100
|
-
def
|
103
|
+
def flush_stats
|
101
104
|
fails = Processor::FAILURE.reset
|
102
105
|
procd = Processor::PROCESSED.reset
|
103
106
|
return if fails + procd == 0
|
@@ -121,7 +124,6 @@ module Sidekiq
|
|
121
124
|
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
122
125
|
end
|
123
126
|
end
|
124
|
-
at_exit(&method(:flush_stats))
|
125
127
|
|
126
128
|
def ❤
|
127
129
|
key = identity
|
@@ -134,7 +136,7 @@ module Sidekiq
|
|
134
136
|
|
135
137
|
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
136
138
|
|
137
|
-
|
139
|
+
redis do |conn|
|
138
140
|
conn.multi do |transaction|
|
139
141
|
transaction.incrby("stat:processed", procd)
|
140
142
|
transaction.incrby("stat:processed:#{nowdate}", procd)
|
@@ -161,7 +163,7 @@ module Sidekiq
|
|
161
163
|
fails = procd = 0
|
162
164
|
kb = memory_usage(::Process.pid)
|
163
165
|
|
164
|
-
_, exists, _, _, msg =
|
166
|
+
_, exists, _, _, msg = redis { |conn|
|
165
167
|
conn.multi { |transaction|
|
166
168
|
transaction.sadd("processes", key)
|
167
169
|
transaction.exists?(key)
|
@@ -169,7 +171,7 @@ module Sidekiq
|
|
169
171
|
"busy", curstate.size,
|
170
172
|
"beat", Time.now.to_f,
|
171
173
|
"rtt_us", rtt,
|
172
|
-
"quiet", @done,
|
174
|
+
"quiet", @done.to_s,
|
173
175
|
"rss", kb)
|
174
176
|
transaction.expire(key, 60)
|
175
177
|
transaction.rpop("#{key}-signals")
|
@@ -178,6 +180,7 @@ module Sidekiq
|
|
178
180
|
|
179
181
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
180
182
|
fire_event(:heartbeat) unless exists
|
183
|
+
fire_event(:beat, oneshot: false)
|
181
184
|
|
182
185
|
return unless msg
|
183
186
|
|
@@ -199,7 +202,7 @@ module Sidekiq
|
|
199
202
|
|
200
203
|
def check_rtt
|
201
204
|
a = b = 0
|
202
|
-
|
205
|
+
redis do |x|
|
203
206
|
a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
204
207
|
x.ping
|
205
208
|
b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
@@ -210,7 +213,7 @@ module Sidekiq
|
|
210
213
|
# Workable is < 10,000µs
|
211
214
|
# Log a warning if it's a disaster.
|
212
215
|
if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
|
213
|
-
|
216
|
+
logger.warn <<~EOM
|
214
217
|
Your Redis network connection is performing extremely poorly.
|
215
218
|
Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
|
216
219
|
Ensure Redis is running in the same AZ or datacenter as Sidekiq.
|
@@ -247,10 +250,10 @@ module Sidekiq
|
|
247
250
|
"hostname" => hostname,
|
248
251
|
"started_at" => Time.now.to_f,
|
249
252
|
"pid" => ::Process.pid,
|
250
|
-
"tag" => @
|
251
|
-
"concurrency" => @
|
252
|
-
"queues" => @
|
253
|
-
"labels" => @
|
253
|
+
"tag" => @config[:tag] || "",
|
254
|
+
"concurrency" => @config[:concurrency],
|
255
|
+
"queues" => @config[:queues].uniq,
|
256
|
+
"labels" => @config[:labels],
|
254
257
|
"identity" => identity
|
255
258
|
}
|
256
259
|
end
|
data/lib/sidekiq/logger.rb
CHANGED
data/lib/sidekiq/manager.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sidekiq/util"
|
4
3
|
require "sidekiq/processor"
|
5
4
|
require "sidekiq/fetch"
|
6
5
|
require "set"
|
@@ -21,29 +20,26 @@ module Sidekiq
|
|
21
20
|
# the shutdown process. The other tasks are performed by other threads.
|
22
21
|
#
|
23
22
|
class Manager
|
24
|
-
include
|
23
|
+
include Sidekiq::Component
|
25
24
|
|
26
25
|
attr_reader :workers
|
27
|
-
attr_reader :options
|
28
26
|
|
29
27
|
def initialize(options = {})
|
28
|
+
@config = options
|
30
29
|
logger.debug { options.inspect }
|
31
|
-
@options = options
|
32
30
|
@count = options[:concurrency] || 10
|
33
31
|
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
34
32
|
|
35
33
|
@done = false
|
36
34
|
@workers = Set.new
|
37
35
|
@count.times do
|
38
|
-
@workers << Processor.new(
|
36
|
+
@workers << Processor.new(@config, &method(:processor_result))
|
39
37
|
end
|
40
38
|
@plock = Mutex.new
|
41
39
|
end
|
42
40
|
|
43
41
|
def start
|
44
|
-
@workers.each
|
45
|
-
x.start
|
46
|
-
end
|
42
|
+
@workers.each(&:start)
|
47
43
|
end
|
48
44
|
|
49
45
|
def quiet
|
@@ -51,7 +47,7 @@ module Sidekiq
|
|
51
47
|
@done = true
|
52
48
|
|
53
49
|
logger.info { "Terminating quiet threads" }
|
54
|
-
@workers.each
|
50
|
+
@workers.each(&:terminate)
|
55
51
|
fire_event(:quiet, reverse: true)
|
56
52
|
end
|
57
53
|
|
@@ -72,17 +68,11 @@ module Sidekiq
|
|
72
68
|
hard_shutdown
|
73
69
|
end
|
74
70
|
|
75
|
-
def
|
76
|
-
@plock.synchronize do
|
77
|
-
@workers.delete(processor)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def processor_died(processor, reason)
|
71
|
+
def processor_result(processor, reason = nil)
|
82
72
|
@plock.synchronize do
|
83
73
|
@workers.delete(processor)
|
84
74
|
unless @done
|
85
|
-
p = Processor.new(
|
75
|
+
p = Processor.new(@config, &method(:processor_result))
|
86
76
|
@workers << p
|
87
77
|
p.start
|
88
78
|
end
|
@@ -107,7 +97,7 @@ module Sidekiq
|
|
107
97
|
jobs = cleanup.map { |p| p.job }.compact
|
108
98
|
|
109
99
|
logger.warn { "Terminating #{cleanup.size} busy threads" }
|
110
|
-
logger.
|
100
|
+
logger.debug { "Jobs still in progress #{jobs.inspect}" }
|
111
101
|
|
112
102
|
# Re-enqueue unfinished jobs
|
113
103
|
# NOTE: You may notice that we may push a job back to redis before
|
@@ -115,8 +105,8 @@ module Sidekiq
|
|
115
105
|
# contract says that jobs are run AT LEAST once. Process termination
|
116
106
|
# is delayed until we're certain the jobs are back in Redis because
|
117
107
|
# it is worse to lose a job than to run it twice.
|
118
|
-
strategy = @
|
119
|
-
strategy.bulk_requeue(jobs, @
|
108
|
+
strategy = @config[:fetch]
|
109
|
+
strategy.bulk_requeue(jobs, @config)
|
120
110
|
end
|
121
111
|
|
122
112
|
cleanup.each do |processor|
|
@@ -129,5 +119,18 @@ module Sidekiq
|
|
129
119
|
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + 3
|
130
120
|
wait_for(deadline) { @workers.empty? }
|
131
121
|
end
|
122
|
+
|
123
|
+
# hack for quicker development / testing environment #2774
|
124
|
+
PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
|
125
|
+
|
126
|
+
# Wait for the orblock to be true or the deadline passed.
|
127
|
+
def wait_for(deadline, &condblock)
|
128
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
129
|
+
while remaining > PAUSE_TIME
|
130
|
+
return if condblock.call
|
131
|
+
sleep PAUSE_TIME
|
132
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
133
|
+
end
|
134
|
+
end
|
132
135
|
end
|
133
136
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
# This file is designed to be required within the user's
|
5
|
+
# deployment script; it should need a bare minimum of dependencies.
|
6
|
+
#
|
7
|
+
# require "sidekiq/metrics/deploy"
|
8
|
+
# gitdesc = `git log -1 --format="%h %s"`.strip
|
9
|
+
# d = Sidekiq::Metrics::Deploy.new
|
10
|
+
# d.mark(label: gitdesc)
|
11
|
+
#
|
12
|
+
# Note that you cannot mark more than once per minute. This is a feature, not a bug.
|
13
|
+
module Sidekiq
|
14
|
+
module Metrics
|
15
|
+
class Deploy
|
16
|
+
MARK_TTL = 90 * 24 * 60 * 60 # 90 days
|
17
|
+
|
18
|
+
def initialize(pool = Sidekiq.redis_pool)
|
19
|
+
@pool = pool
|
20
|
+
end
|
21
|
+
|
22
|
+
def mark(at: Time.now, label: "")
|
23
|
+
# we need to round the timestamp so that we gracefully
|
24
|
+
# handle an excepted common error in marking deploys:
|
25
|
+
# having every process mark its deploy, leading
|
26
|
+
# to N marks for each deploy. Instead we round the time
|
27
|
+
# to the minute so that multple marks within that minute
|
28
|
+
# will all naturally rollup into one mark per minute.
|
29
|
+
whence = at.utc
|
30
|
+
floor = Time.utc(whence.year, whence.month, whence.mday, whence.hour, whence.min, 0)
|
31
|
+
datecode = floor.strftime("%Y%m%d")
|
32
|
+
key = "#{datecode}-marks"
|
33
|
+
@pool.with do |c|
|
34
|
+
c.pipelined do |pipe|
|
35
|
+
pipe.hsetnx(key, floor.rfc3339, label)
|
36
|
+
pipe.expire(key, MARK_TTL)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch(date = Time.now.utc.to_date)
|
42
|
+
datecode = date.strftime("%Y%m%d")
|
43
|
+
@pool.with { |c| c.hgetall("#{datecode}-marks") }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "date"
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
require "sidekiq/metrics/shared"
|
6
|
+
|
7
|
+
module Sidekiq
|
8
|
+
module Metrics
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
11
|
+
# Redis and return a Hash of results.
|
12
|
+
#
|
13
|
+
# NB: all metrics and times/dates are UTC only. We specifically do not
|
14
|
+
# support timezones.
|
15
|
+
class Query
|
16
|
+
# :hour, :day, :month
|
17
|
+
attr_accessor :period
|
18
|
+
|
19
|
+
# a specific job class, e.g. "App::OrderJob"
|
20
|
+
attr_accessor :klass
|
21
|
+
|
22
|
+
# the date specific to the period
|
23
|
+
# for :day or :hour, something like Date.today or Date.new(2022, 7, 13)
|
24
|
+
# for :month, Date.new(2022, 7, 1)
|
25
|
+
attr_accessor :date
|
26
|
+
|
27
|
+
# for period = :hour, the specific hour, integer e.g. 1 or 18
|
28
|
+
# note that hours and minutes do not have a leading zero so minute-specific
|
29
|
+
# keys will look like "j|20220718|7:3" for data at 07:03.
|
30
|
+
attr_accessor :hour
|
31
|
+
|
32
|
+
def initialize(pool: Sidekiq.redis_pool, now: Time.now)
|
33
|
+
@time = now.utc
|
34
|
+
@pool = pool
|
35
|
+
@klass = nil
|
36
|
+
end
|
37
|
+
|
38
|
+
# Get metric data from the last hour and roll it up
|
39
|
+
# into top processed count and execution time based on class.
|
40
|
+
def top_jobs
|
41
|
+
resultset = {}
|
42
|
+
resultset[:date] = @time.to_date
|
43
|
+
resultset[:period] = :hour
|
44
|
+
resultset[:ends_at] = @time
|
45
|
+
time = @time
|
46
|
+
|
47
|
+
results = @pool.with do |conn|
|
48
|
+
conn.pipelined do |pipe|
|
49
|
+
resultset[:size] = 60
|
50
|
+
60.times do |idx|
|
51
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
52
|
+
pipe.hgetall key
|
53
|
+
time -= 60
|
54
|
+
end
|
55
|
+
resultset[:starts_at] = time
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
t = Hash.new(0)
|
60
|
+
klsset = Set.new
|
61
|
+
# merge the per-minute data into a totals hash for the hour
|
62
|
+
results.each do |hash|
|
63
|
+
hash.each { |k, v| t[k] = t[k] + v.to_i }
|
64
|
+
klsset.merge(hash.keys.map { |k| k.split("|")[0] })
|
65
|
+
end
|
66
|
+
resultset[:job_classes] = klsset.delete_if { |item| item.size < 3 }
|
67
|
+
resultset[:totals] = t
|
68
|
+
top = t.each_with_object({}) do |(k, v), memo|
|
69
|
+
(kls, metric) = k.split("|")
|
70
|
+
memo[metric] ||= Hash.new(0)
|
71
|
+
memo[metric][kls] = v
|
72
|
+
end
|
73
|
+
|
74
|
+
sorted = {}
|
75
|
+
top.each_pair do |metric, hash|
|
76
|
+
sorted[metric] = hash.sort_by { |k, v| v }.reverse.to_h
|
77
|
+
end
|
78
|
+
resultset[:top_classes] = sorted
|
79
|
+
resultset
|
80
|
+
end
|
81
|
+
|
82
|
+
def for_job(klass)
|
83
|
+
resultset = {}
|
84
|
+
resultset[:date] = @time.to_date
|
85
|
+
resultset[:period] = :hour
|
86
|
+
resultset[:ends_at] = @time
|
87
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
88
|
+
|
89
|
+
time = @time
|
90
|
+
initial = @pool.with do |conn|
|
91
|
+
conn.pipelined do |pipe|
|
92
|
+
resultset[:size] = 60
|
93
|
+
60.times do |idx|
|
94
|
+
key = "j|#{time.strftime("%Y%m%d|%-H:%-M")}"
|
95
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
96
|
+
time -= 60
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
time = @time
|
102
|
+
hist = Histogram.new(klass)
|
103
|
+
results = @pool.with do |conn|
|
104
|
+
initial.map do |(ms, p, f)|
|
105
|
+
tm = Time.utc(time.year, time.month, time.mday, time.hour, time.min, 0)
|
106
|
+
{
|
107
|
+
time: tm.iso8601,
|
108
|
+
epoch: tm.to_i,
|
109
|
+
ms: ms.to_i, p: p.to_i, f: f.to_i, hist: hist.fetch(conn, time)
|
110
|
+
}.tap { |x|
|
111
|
+
x[:mark] = marks[x[:time]] if marks[x[:time]]
|
112
|
+
time -= 60
|
113
|
+
}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
resultset[:marks] = marks
|
118
|
+
resultset[:starts_at] = time
|
119
|
+
resultset[:data] = results
|
120
|
+
resultset
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
module Metrics
|
5
|
+
# TODO Support apps without concurrent-ruby
|
6
|
+
Counter = ::Concurrent::AtomicFixnum
|
7
|
+
|
8
|
+
# Implements space-efficient but statistically useful histogram storage.
|
9
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
10
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
11
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
12
|
+
# NB: needs to be thread-safe or resiliant to races.
|
13
|
+
#
|
14
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
15
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
16
|
+
# than 1000 job/sec for a full minute of a specific type.
|
17
|
+
class Histogram
|
18
|
+
include Enumerable
|
19
|
+
|
20
|
+
# This number represents the maximum milliseconds for this bucket.
|
21
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
22
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
23
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
24
|
+
# not too concerned with its performance.
|
25
|
+
BUCKET_INTERVALS = [
|
26
|
+
20, 30, 45, 65, 100,
|
27
|
+
150, 225, 335, 500, 750,
|
28
|
+
1100, 1700, 2500, 3800, 5750,
|
29
|
+
8500, 13000, 20000, 30000, 45000,
|
30
|
+
65000, 100000, 150000, 225000, 335000,
|
31
|
+
Float::INFINITY # the "maybe your job is too long" bucket
|
32
|
+
]
|
33
|
+
LABELS = [
|
34
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
35
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
36
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
37
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
38
|
+
"65s", "100s", "150s", "225s", "335s",
|
39
|
+
"Slow"
|
40
|
+
]
|
41
|
+
|
42
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
43
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
44
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
45
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
46
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
47
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
48
|
+
GET u16 #24 GET u16 #25".split
|
49
|
+
|
50
|
+
def each
|
51
|
+
buckets.each { |counter| yield counter.value }
|
52
|
+
end
|
53
|
+
|
54
|
+
def label(idx)
|
55
|
+
LABELS[idx]
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :buckets
|
59
|
+
def initialize(klass)
|
60
|
+
@klass = klass
|
61
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
62
|
+
end
|
63
|
+
|
64
|
+
def record_time(ms)
|
65
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
66
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
67
|
+
end
|
68
|
+
|
69
|
+
@buckets[index_to_use].increment
|
70
|
+
end
|
71
|
+
|
72
|
+
def fetch(conn, now = Time.now)
|
73
|
+
window = now.utc.strftime("%d-%H:%-M")
|
74
|
+
key = "#{@klass}-#{window}"
|
75
|
+
conn.bitfield(key, *FETCH)
|
76
|
+
end
|
77
|
+
|
78
|
+
def persist(conn, now = Time.now)
|
79
|
+
buckets, @buckets = @buckets, []
|
80
|
+
window = now.utc.strftime("%d-%H:%-M")
|
81
|
+
key = "#{@klass}-#{window}"
|
82
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
83
|
+
buckets.each_with_index do |counter, idx|
|
84
|
+
val = counter.value
|
85
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
86
|
+
end
|
87
|
+
|
88
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
89
|
+
conn.expire(key, 86400)
|
90
|
+
key
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require "time"
|
2
|
+
require "sidekiq"
|
3
|
+
require "sidekiq/metrics/shared"
|
4
|
+
|
5
|
+
# This file contains the components which track execution metrics within Sidekiq.
|
6
|
+
module Sidekiq
|
7
|
+
module Metrics
|
8
|
+
class ExecutionTracker
|
9
|
+
include Sidekiq::Component
|
10
|
+
|
11
|
+
def initialize(config)
|
12
|
+
@config = config
|
13
|
+
@jobs = Hash.new(0)
|
14
|
+
@totals = Hash.new(0)
|
15
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
16
|
+
@lock = Mutex.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def track(queue, klass)
|
20
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
21
|
+
time_ms = 0
|
22
|
+
begin
|
23
|
+
begin
|
24
|
+
yield
|
25
|
+
ensure
|
26
|
+
finish = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
27
|
+
time_ms = finish - start
|
28
|
+
end
|
29
|
+
# We don't track time for failed jobs as they can have very unpredictable
|
30
|
+
# execution times. more important to know average time for successful jobs so we
|
31
|
+
# can better recognize when a perf regression is introduced.
|
32
|
+
@lock.synchronize {
|
33
|
+
@grams[klass].record_time(time_ms)
|
34
|
+
@jobs["#{klass}|ms"] += time_ms
|
35
|
+
@totals["ms"] += time_ms
|
36
|
+
}
|
37
|
+
rescue Exception
|
38
|
+
@lock.synchronize {
|
39
|
+
@jobs["#{klass}|f"] += 1
|
40
|
+
@totals["f"] += 1
|
41
|
+
}
|
42
|
+
raise
|
43
|
+
ensure
|
44
|
+
@lock.synchronize {
|
45
|
+
@jobs["#{klass}|p"] += 1
|
46
|
+
@totals["p"] += 1
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
LONG_TERM = 90 * 24 * 60 * 60
|
52
|
+
MID_TERM = 7 * 24 * 60 * 60
|
53
|
+
SHORT_TERM = 8 * 60 * 60
|
54
|
+
|
55
|
+
def flush(time = Time.now)
|
56
|
+
totals, jobs, grams = reset
|
57
|
+
procd = totals["p"]
|
58
|
+
fails = totals["f"]
|
59
|
+
return if procd == 0 && fails == 0
|
60
|
+
|
61
|
+
now = time.utc
|
62
|
+
nowdate = now.strftime("%Y%m%d")
|
63
|
+
nowhour = now.strftime("%Y%m%d|%-H")
|
64
|
+
nowmin = now.strftime("%Y%m%d|%-H:%-M")
|
65
|
+
count = 0
|
66
|
+
|
67
|
+
redis do |conn|
|
68
|
+
if grams.size > 0
|
69
|
+
conn.pipelined do |pipe|
|
70
|
+
grams.each do |_, gram|
|
71
|
+
gram.persist(pipe, now)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[
|
77
|
+
["j", jobs, nowdate, LONG_TERM],
|
78
|
+
["j", jobs, nowhour, MID_TERM],
|
79
|
+
["j", jobs, nowmin, SHORT_TERM]
|
80
|
+
].each do |prefix, data, bucket, ttl|
|
81
|
+
# Quietly seed the new 7.0 stats format so migration is painless.
|
82
|
+
conn.pipelined do |xa|
|
83
|
+
stats = "#{prefix}|#{bucket}"
|
84
|
+
# logger.debug "Flushing metrics #{stats}"
|
85
|
+
data.each_pair do |key, value|
|
86
|
+
xa.hincrby stats, key, value
|
87
|
+
count += 1
|
88
|
+
end
|
89
|
+
xa.expire(stats, ttl)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
logger.info "Flushed #{count} metrics"
|
93
|
+
count
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def reset
|
100
|
+
@lock.synchronize {
|
101
|
+
array = [@totals, @jobs, @grams]
|
102
|
+
@totals = Hash.new(0)
|
103
|
+
@jobs = Hash.new(0)
|
104
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
105
|
+
array
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Middleware
|
111
|
+
include Sidekiq::ServerMiddleware
|
112
|
+
|
113
|
+
def initialize(options)
|
114
|
+
@exec = options
|
115
|
+
end
|
116
|
+
|
117
|
+
def call(_instance, hash, queue, &block)
|
118
|
+
@exec.track(queue, hash["wrapped"] || hash["class"], &block)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
if ENV["SIDEKIQ_METRICS_BETA"] == "1"
|
125
|
+
Sidekiq.configure_server do |config|
|
126
|
+
exec = Sidekiq::Metrics::ExecutionTracker.new(config)
|
127
|
+
config.server_middleware do |chain|
|
128
|
+
chain.add Sidekiq::Metrics::Middleware, exec
|
129
|
+
end
|
130
|
+
config.on(:beat) do
|
131
|
+
exec.flush
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|