sidekiq 6.5.0 → 6.5.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Changes.md +22 -2
- data/lib/sidekiq/api.rb +146 -27
- data/lib/sidekiq/cli.rb +2 -1
- data/lib/sidekiq/client.rb +1 -1
- data/lib/sidekiq/component.rb +3 -2
- data/lib/sidekiq/delay.rb +1 -1
- data/lib/sidekiq/fetch.rb +1 -1
- data/lib/sidekiq/job_retry.rb +46 -31
- data/lib/sidekiq/launcher.rb +4 -2
- data/lib/sidekiq/metrics/deploy.rb +47 -0
- data/lib/sidekiq/metrics/query.rb +124 -0
- data/lib/sidekiq/metrics/shared.rb +94 -0
- data/lib/sidekiq/metrics/tracking.rb +134 -0
- data/lib/sidekiq/middleware/chain.rb +70 -35
- data/lib/sidekiq/middleware/current_attributes.rb +6 -4
- data/lib/sidekiq/middleware/modules.rb +2 -0
- data/lib/sidekiq/processor.rb +1 -1
- data/lib/sidekiq/version.rb +1 -1
- data/lib/sidekiq/web/application.rb +13 -0
- data/lib/sidekiq/web/helpers.rb +24 -1
- data/lib/sidekiq/web.rb +4 -0
- data/lib/sidekiq.rb +9 -1
- data/sidekiq.gemspec +1 -1
- data/web/assets/javascripts/application.js +1 -1
- data/web/assets/javascripts/dashboard.js +0 -17
- data/web/assets/javascripts/graph.js +16 -0
- data/web/locales/en.yml +4 -0
- data/web/views/_nav.erb +1 -1
- data/web/views/busy.erb +1 -1
- data/web/views/dashboard.erb +1 -0
- data/web/views/metrics.erb +59 -0
- data/web/views/metrics_for_job.erb +92 -0
- data/web/views/queue.erb +5 -1
- metadata +11 -4
data/lib/sidekiq/job_retry.rb
CHANGED
@@ -91,7 +91,7 @@ module Sidekiq
|
|
91
91
|
|
92
92
|
msg = Sidekiq.load_json(jobstr)
|
93
93
|
if msg["retry"]
|
94
|
-
|
94
|
+
process_retry(nil, msg, queue, e)
|
95
95
|
else
|
96
96
|
Sidekiq.death_handlers.each do |handler|
|
97
97
|
handler.call(msg, e)
|
@@ -128,7 +128,7 @@ module Sidekiq
|
|
128
128
|
end
|
129
129
|
|
130
130
|
raise e unless msg["retry"]
|
131
|
-
|
131
|
+
process_retry(jobinst, msg, queue, e)
|
132
132
|
# We've handled this error associated with this job, don't
|
133
133
|
# need to handle it at the global level
|
134
134
|
raise Skip
|
@@ -139,7 +139,7 @@ module Sidekiq
|
|
139
139
|
# Note that +jobinst+ can be nil here if an error is raised before we can
|
140
140
|
# instantiate the job instance. All access must be guarded and
|
141
141
|
# best effort.
|
142
|
-
def
|
142
|
+
def process_retry(jobinst, msg, queue, exception)
|
143
143
|
max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
|
144
144
|
|
145
145
|
msg["queue"] = (msg["retry_queue"] || queue)
|
@@ -170,19 +170,50 @@ module Sidekiq
|
|
170
170
|
msg["error_backtrace"] = compress_backtrace(lines)
|
171
171
|
end
|
172
172
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
173
|
+
# Goodbye dear message, you (re)tried your best I'm sure.
|
174
|
+
return retries_exhausted(jobinst, msg, exception) if count >= max_retry_attempts
|
175
|
+
|
176
|
+
strategy, delay = delay_for(jobinst, count, exception)
|
177
|
+
case strategy
|
178
|
+
when :discard
|
179
|
+
return # poof!
|
180
|
+
when :kill
|
181
|
+
return retries_exhausted(jobinst, msg, exception)
|
182
|
+
end
|
183
|
+
|
184
|
+
# Logging here can break retries if the logging device raises ENOSPC #3979
|
185
|
+
# logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
186
|
+
jitter = rand(10) * (count + 1)
|
187
|
+
retry_at = Time.now.to_f + delay + jitter
|
188
|
+
payload = Sidekiq.dump_json(msg)
|
189
|
+
redis do |conn|
|
190
|
+
conn.zadd("retry", retry_at.to_s, payload)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# returns (strategy, seconds)
|
195
|
+
def delay_for(jobinst, count, exception)
|
196
|
+
rv = begin
|
197
|
+
# sidekiq_retry_in can return two different things:
|
198
|
+
# 1. When to retry next, as an integer of seconds
|
199
|
+
# 2. A symbol which re-routes the job elsewhere, e.g. :discard, :kill, :default
|
200
|
+
jobinst&.sidekiq_retry_in_block&.call(count, exception)
|
201
|
+
rescue Exception => e
|
202
|
+
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
|
203
|
+
nil
|
204
|
+
end
|
205
|
+
|
206
|
+
delay = if Integer === rv && rv > 0
|
207
|
+
rv
|
208
|
+
elsif rv == :discard
|
209
|
+
return [:discard, nil] # do nothing, job goes poof
|
210
|
+
elsif rv == :kill
|
211
|
+
return [:kill, nil]
|
182
212
|
else
|
183
|
-
|
184
|
-
retries_exhausted(jobinst, msg, exception)
|
213
|
+
(count**4) + 15
|
185
214
|
end
|
215
|
+
|
216
|
+
[:default, delay]
|
186
217
|
end
|
187
218
|
|
188
219
|
def retries_exhausted(jobinst, msg, exception)
|
@@ -195,7 +226,7 @@ module Sidekiq
|
|
195
226
|
|
196
227
|
send_to_morgue(msg) unless msg["dead"] == false
|
197
228
|
|
198
|
-
|
229
|
+
config.death_handlers.each do |handler|
|
199
230
|
handler.call(msg, exception)
|
200
231
|
rescue => e
|
201
232
|
handle_exception(e, {context: "Error calling death handler", job: msg})
|
@@ -216,22 +247,6 @@ module Sidekiq
|
|
216
247
|
end
|
217
248
|
end
|
218
249
|
|
219
|
-
def delay_for(jobinst, count, exception)
|
220
|
-
jitter = rand(10) * (count + 1)
|
221
|
-
if jobinst&.sidekiq_retry_in_block
|
222
|
-
custom_retry_in = retry_in(jobinst, count, exception).to_i
|
223
|
-
return custom_retry_in + jitter if custom_retry_in > 0
|
224
|
-
end
|
225
|
-
(count**4) + 15 + jitter
|
226
|
-
end
|
227
|
-
|
228
|
-
def retry_in(jobinst, count, exception)
|
229
|
-
jobinst.sidekiq_retry_in_block.call(count, exception)
|
230
|
-
rescue Exception => e
|
231
|
-
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
|
232
|
-
nil
|
233
|
-
end
|
234
|
-
|
235
250
|
def exception_caused_by_shutdown?(e, checked_causes = [])
|
236
251
|
return false unless e.cause
|
237
252
|
|
data/lib/sidekiq/launcher.rb
CHANGED
@@ -79,6 +79,8 @@ module Sidekiq
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def clear_heartbeat
|
82
|
+
flush_stats
|
83
|
+
|
82
84
|
# Remove record from Redis since we are shutting down.
|
83
85
|
# Note we don't stop the heartbeat thread; if the process
|
84
86
|
# doesn't actually exit, it'll reappear in the Web UI.
|
@@ -98,7 +100,7 @@ module Sidekiq
|
|
98
100
|
❤
|
99
101
|
end
|
100
102
|
|
101
|
-
def
|
103
|
+
def flush_stats
|
102
104
|
fails = Processor::FAILURE.reset
|
103
105
|
procd = Processor::PROCESSED.reset
|
104
106
|
return if fails + procd == 0
|
@@ -122,7 +124,6 @@ module Sidekiq
|
|
122
124
|
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
123
125
|
end
|
124
126
|
end
|
125
|
-
at_exit(&method(:flush_stats))
|
126
127
|
|
127
128
|
def ❤
|
128
129
|
key = identity
|
@@ -179,6 +180,7 @@ module Sidekiq
|
|
179
180
|
|
180
181
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
181
182
|
fire_event(:heartbeat) unless exists
|
183
|
+
fire_event(:beat, oneshot: false)
|
182
184
|
|
183
185
|
return unless msg
|
184
186
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
# This file is designed to be required within the user's
|
5
|
+
# deployment script; it should need a bare minimum of dependencies.
|
6
|
+
#
|
7
|
+
# require "sidekiq/metrics/deploy"
|
8
|
+
# gitdesc = `git log -1 --format="%h %s"`.strip
|
9
|
+
# d = Sidekiq::Metrics::Deploy.new
|
10
|
+
# d.mark(label: gitdesc)
|
11
|
+
#
|
12
|
+
# Note that you cannot mark more than once per minute. This is a feature, not a bug.
|
13
|
+
module Sidekiq
|
14
|
+
module Metrics
|
15
|
+
class Deploy
|
16
|
+
MARK_TTL = 90 * 24 * 60 * 60 # 90 days
|
17
|
+
|
18
|
+
def initialize(pool = Sidekiq.redis_pool)
|
19
|
+
@pool = pool
|
20
|
+
end
|
21
|
+
|
22
|
+
def mark(at: Time.now, label: "")
|
23
|
+
# we need to round the timestamp so that we gracefully
|
24
|
+
# handle an excepted common error in marking deploys:
|
25
|
+
# having every process mark its deploy, leading
|
26
|
+
# to N marks for each deploy. Instead we round the time
|
27
|
+
# to the minute so that multple marks within that minute
|
28
|
+
# will all naturally rollup into one mark per minute.
|
29
|
+
whence = at.utc
|
30
|
+
floor = Time.utc(whence.year, whence.month, whence.mday, whence.hour, whence.min, 0)
|
31
|
+
datecode = floor.strftime("%Y%m%d")
|
32
|
+
key = "#{datecode}-marks"
|
33
|
+
@pool.with do |c|
|
34
|
+
c.pipelined do |pipe|
|
35
|
+
pipe.hsetnx(key, floor.rfc3339, label)
|
36
|
+
pipe.expire(key, MARK_TTL)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch(date = Time.now.utc.to_date)
|
42
|
+
datecode = date.strftime("%Y%m%d")
|
43
|
+
@pool.with { |c| c.hgetall("#{datecode}-marks") }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "date"
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
require "sidekiq/metrics/shared"
|
6
|
+
|
7
|
+
module Sidekiq
|
8
|
+
module Metrics
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
11
|
+
# Redis and return a Hash of results.
|
12
|
+
#
|
13
|
+
# NB: all metrics and times/dates are UTC only. We specifically do not
|
14
|
+
# support timezones.
|
15
|
+
class Query
|
16
|
+
# :hour, :day, :month
|
17
|
+
attr_accessor :period
|
18
|
+
|
19
|
+
# a specific job class, e.g. "App::OrderJob"
|
20
|
+
attr_accessor :klass
|
21
|
+
|
22
|
+
# the date specific to the period
|
23
|
+
# for :day or :hour, something like Date.today or Date.new(2022, 7, 13)
|
24
|
+
# for :month, Date.new(2022, 7, 1)
|
25
|
+
attr_accessor :date
|
26
|
+
|
27
|
+
# for period = :hour, the specific hour, integer e.g. 1 or 18
|
28
|
+
# note that hours and minutes do not have a leading zero so minute-specific
|
29
|
+
# keys will look like "j|20220718|7:3" for data at 07:03.
|
30
|
+
attr_accessor :hour
|
31
|
+
|
32
|
+
def initialize(pool: Sidekiq.redis_pool, now: Time.now)
|
33
|
+
@time = now.utc
|
34
|
+
@pool = pool
|
35
|
+
@klass = nil
|
36
|
+
end
|
37
|
+
|
38
|
+
# Get metric data from the last hour and roll it up
|
39
|
+
# into top processed count and execution time based on class.
|
40
|
+
def top_jobs
|
41
|
+
resultset = {}
|
42
|
+
resultset[:date] = @time.to_date
|
43
|
+
resultset[:period] = :hour
|
44
|
+
resultset[:ends_at] = @time
|
45
|
+
time = @time
|
46
|
+
|
47
|
+
results = @pool.with do |conn|
|
48
|
+
conn.pipelined do |pipe|
|
49
|
+
resultset[:size] = 60
|
50
|
+
60.times do |idx|
|
51
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
52
|
+
pipe.hgetall key
|
53
|
+
time -= 60
|
54
|
+
end
|
55
|
+
resultset[:starts_at] = time
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
t = Hash.new(0)
|
60
|
+
klsset = Set.new
|
61
|
+
# merge the per-minute data into a totals hash for the hour
|
62
|
+
results.each do |hash|
|
63
|
+
hash.each { |k, v| t[k] = t[k] + v.to_i }
|
64
|
+
klsset.merge(hash.keys.map { |k| k.split("|")[0] })
|
65
|
+
end
|
66
|
+
resultset[:job_classes] = klsset.delete_if { |item| item.size < 3 }
|
67
|
+
resultset[:totals] = t
|
68
|
+
top = t.each_with_object({}) do |(k, v), memo|
|
69
|
+
(kls, metric) = k.split("|")
|
70
|
+
memo[metric] ||= Hash.new(0)
|
71
|
+
memo[metric][kls] = v
|
72
|
+
end
|
73
|
+
|
74
|
+
sorted = {}
|
75
|
+
top.each_pair do |metric, hash|
|
76
|
+
sorted[metric] = hash.sort_by { |k, v| v }.reverse.to_h
|
77
|
+
end
|
78
|
+
resultset[:top_classes] = sorted
|
79
|
+
resultset
|
80
|
+
end
|
81
|
+
|
82
|
+
def for_job(klass)
|
83
|
+
resultset = {}
|
84
|
+
resultset[:date] = @time.to_date
|
85
|
+
resultset[:period] = :hour
|
86
|
+
resultset[:ends_at] = @time
|
87
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
88
|
+
|
89
|
+
time = @time
|
90
|
+
initial = @pool.with do |conn|
|
91
|
+
conn.pipelined do |pipe|
|
92
|
+
resultset[:size] = 60
|
93
|
+
60.times do |idx|
|
94
|
+
key = "j|#{time.strftime("%Y%m%d|%-H:%-M")}"
|
95
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
96
|
+
time -= 60
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
time = @time
|
102
|
+
hist = Histogram.new(klass)
|
103
|
+
results = @pool.with do |conn|
|
104
|
+
initial.map do |(ms, p, f)|
|
105
|
+
tm = Time.utc(time.year, time.month, time.mday, time.hour, time.min, 0)
|
106
|
+
{
|
107
|
+
time: tm.iso8601,
|
108
|
+
epoch: tm.to_i,
|
109
|
+
ms: ms.to_i, p: p.to_i, f: f.to_i, hist: hist.fetch(conn, time)
|
110
|
+
}.tap { |x|
|
111
|
+
x[:mark] = marks[x[:time]] if marks[x[:time]]
|
112
|
+
time -= 60
|
113
|
+
}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
resultset[:marks] = marks
|
118
|
+
resultset[:starts_at] = time
|
119
|
+
resultset[:data] = results
|
120
|
+
resultset
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
module Metrics
|
5
|
+
# TODO Support apps without concurrent-ruby
|
6
|
+
Counter = ::Concurrent::AtomicFixnum
|
7
|
+
|
8
|
+
# Implements space-efficient but statistically useful histogram storage.
|
9
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
10
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
11
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
12
|
+
# NB: needs to be thread-safe or resiliant to races.
|
13
|
+
#
|
14
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
15
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
16
|
+
# than 1000 job/sec for a full minute of a specific type.
|
17
|
+
class Histogram
|
18
|
+
include Enumerable
|
19
|
+
|
20
|
+
# This number represents the maximum milliseconds for this bucket.
|
21
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
22
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
23
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
24
|
+
# not too concerned with its performance.
|
25
|
+
BUCKET_INTERVALS = [
|
26
|
+
20, 30, 45, 65, 100,
|
27
|
+
150, 225, 335, 500, 750,
|
28
|
+
1100, 1700, 2500, 3800, 5750,
|
29
|
+
8500, 13000, 20000, 30000, 45000,
|
30
|
+
65000, 100000, 150000, 225000, 335000,
|
31
|
+
Float::INFINITY # the "maybe your job is too long" bucket
|
32
|
+
]
|
33
|
+
LABELS = [
|
34
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
35
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
36
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
37
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
38
|
+
"65s", "100s", "150s", "225s", "335s",
|
39
|
+
"Slow"
|
40
|
+
]
|
41
|
+
|
42
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
43
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
44
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
45
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
46
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
47
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
48
|
+
GET u16 #24 GET u16 #25".split
|
49
|
+
|
50
|
+
def each
|
51
|
+
buckets.each { |counter| yield counter.value }
|
52
|
+
end
|
53
|
+
|
54
|
+
def label(idx)
|
55
|
+
LABELS[idx]
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :buckets
|
59
|
+
def initialize(klass)
|
60
|
+
@klass = klass
|
61
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
62
|
+
end
|
63
|
+
|
64
|
+
def record_time(ms)
|
65
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
66
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
67
|
+
end
|
68
|
+
|
69
|
+
@buckets[index_to_use].increment
|
70
|
+
end
|
71
|
+
|
72
|
+
def fetch(conn, now = Time.now)
|
73
|
+
window = now.utc.strftime("%d-%H:%-M")
|
74
|
+
key = "#{@klass}-#{window}"
|
75
|
+
conn.bitfield(key, *FETCH)
|
76
|
+
end
|
77
|
+
|
78
|
+
def persist(conn, now = Time.now)
|
79
|
+
buckets, @buckets = @buckets, []
|
80
|
+
window = now.utc.strftime("%d-%H:%-M")
|
81
|
+
key = "#{@klass}-#{window}"
|
82
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
83
|
+
buckets.each_with_index do |counter, idx|
|
84
|
+
val = counter.value
|
85
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
86
|
+
end
|
87
|
+
|
88
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
89
|
+
conn.expire(key, 86400)
|
90
|
+
key
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require "time"
|
2
|
+
require "sidekiq"
|
3
|
+
require "sidekiq/metrics/shared"
|
4
|
+
|
5
|
+
# This file contains the components which track execution metrics within Sidekiq.
|
6
|
+
module Sidekiq
|
7
|
+
module Metrics
|
8
|
+
class ExecutionTracker
|
9
|
+
include Sidekiq::Component
|
10
|
+
|
11
|
+
def initialize(config)
|
12
|
+
@config = config
|
13
|
+
@jobs = Hash.new(0)
|
14
|
+
@totals = Hash.new(0)
|
15
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
16
|
+
@lock = Mutex.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def track(queue, klass)
|
20
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
21
|
+
time_ms = 0
|
22
|
+
begin
|
23
|
+
begin
|
24
|
+
yield
|
25
|
+
ensure
|
26
|
+
finish = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
27
|
+
time_ms = finish - start
|
28
|
+
end
|
29
|
+
# We don't track time for failed jobs as they can have very unpredictable
|
30
|
+
# execution times. more important to know average time for successful jobs so we
|
31
|
+
# can better recognize when a perf regression is introduced.
|
32
|
+
@lock.synchronize {
|
33
|
+
@grams[klass].record_time(time_ms)
|
34
|
+
@jobs["#{klass}|ms"] += time_ms
|
35
|
+
@totals["ms"] += time_ms
|
36
|
+
}
|
37
|
+
rescue Exception
|
38
|
+
@lock.synchronize {
|
39
|
+
@jobs["#{klass}|f"] += 1
|
40
|
+
@totals["f"] += 1
|
41
|
+
}
|
42
|
+
raise
|
43
|
+
ensure
|
44
|
+
@lock.synchronize {
|
45
|
+
@jobs["#{klass}|p"] += 1
|
46
|
+
@totals["p"] += 1
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
LONG_TERM = 90 * 24 * 60 * 60
|
52
|
+
MID_TERM = 7 * 24 * 60 * 60
|
53
|
+
SHORT_TERM = 8 * 60 * 60
|
54
|
+
|
55
|
+
def flush(time = Time.now)
|
56
|
+
totals, jobs, grams = reset
|
57
|
+
procd = totals["p"]
|
58
|
+
fails = totals["f"]
|
59
|
+
return if procd == 0 && fails == 0
|
60
|
+
|
61
|
+
now = time.utc
|
62
|
+
nowdate = now.strftime("%Y%m%d")
|
63
|
+
nowhour = now.strftime("%Y%m%d|%-H")
|
64
|
+
nowmin = now.strftime("%Y%m%d|%-H:%-M")
|
65
|
+
count = 0
|
66
|
+
|
67
|
+
redis do |conn|
|
68
|
+
if grams.size > 0
|
69
|
+
conn.pipelined do |pipe|
|
70
|
+
grams.each do |_, gram|
|
71
|
+
gram.persist(pipe, now)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[
|
77
|
+
["j", jobs, nowdate, LONG_TERM],
|
78
|
+
["j", jobs, nowhour, MID_TERM],
|
79
|
+
["j", jobs, nowmin, SHORT_TERM]
|
80
|
+
].each do |prefix, data, bucket, ttl|
|
81
|
+
# Quietly seed the new 7.0 stats format so migration is painless.
|
82
|
+
conn.pipelined do |xa|
|
83
|
+
stats = "#{prefix}|#{bucket}"
|
84
|
+
# logger.debug "Flushing metrics #{stats}"
|
85
|
+
data.each_pair do |key, value|
|
86
|
+
xa.hincrby stats, key, value
|
87
|
+
count += 1
|
88
|
+
end
|
89
|
+
xa.expire(stats, ttl)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
logger.info "Flushed #{count} metrics"
|
93
|
+
count
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def reset
|
100
|
+
@lock.synchronize {
|
101
|
+
array = [@totals, @jobs, @grams]
|
102
|
+
@totals = Hash.new(0)
|
103
|
+
@jobs = Hash.new(0)
|
104
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
105
|
+
array
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Middleware
|
111
|
+
include Sidekiq::ServerMiddleware
|
112
|
+
|
113
|
+
def initialize(options)
|
114
|
+
@exec = options
|
115
|
+
end
|
116
|
+
|
117
|
+
def call(_instance, hash, queue, &block)
|
118
|
+
@exec.track(queue, hash["wrapped"] || hash["class"], &block)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
if ENV["SIDEKIQ_METRICS_BETA"] == "1"
|
125
|
+
Sidekiq.configure_server do |config|
|
126
|
+
exec = Sidekiq::Metrics::ExecutionTracker.new(config)
|
127
|
+
config.server_middleware do |chain|
|
128
|
+
chain.add Sidekiq::Metrics::Middleware, exec
|
129
|
+
end
|
130
|
+
config.on(:beat) do
|
131
|
+
exec.flush
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|