sidekiq 6.5.1 → 6.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changes.md +65 -0
- data/bin/sidekiqload +2 -2
- data/lib/sidekiq/api.rb +161 -37
- data/lib/sidekiq/cli.rb +13 -0
- data/lib/sidekiq/client.rb +2 -2
- data/lib/sidekiq/component.rb +2 -1
- data/lib/sidekiq/fetch.rb +2 -2
- data/lib/sidekiq/job_retry.rb +55 -35
- data/lib/sidekiq/launcher.rb +6 -4
- data/lib/sidekiq/metrics/deploy.rb +47 -0
- data/lib/sidekiq/metrics/query.rb +153 -0
- data/lib/sidekiq/metrics/shared.rb +94 -0
- data/lib/sidekiq/metrics/tracking.rb +134 -0
- data/lib/sidekiq/middleware/chain.rb +70 -35
- data/lib/sidekiq/middleware/current_attributes.rb +14 -12
- data/lib/sidekiq/monitor.rb +1 -1
- data/lib/sidekiq/paginator.rb +9 -1
- data/lib/sidekiq/processor.rb +9 -3
- data/lib/sidekiq/rails.rb +10 -11
- data/lib/sidekiq/redis_connection.rb +0 -2
- data/lib/sidekiq/scheduled.rb +43 -15
- data/lib/sidekiq/version.rb +1 -1
- data/lib/sidekiq/web/action.rb +3 -3
- data/lib/sidekiq/web/application.rb +21 -5
- data/lib/sidekiq/web/helpers.rb +17 -4
- data/lib/sidekiq/web.rb +5 -1
- data/lib/sidekiq/worker.rb +6 -3
- data/lib/sidekiq.rb +9 -1
- data/sidekiq.gemspec +2 -2
- data/web/assets/javascripts/application.js +2 -1
- data/web/assets/javascripts/chart.min.js +13 -0
- data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
- data/web/assets/javascripts/dashboard.js +0 -17
- data/web/assets/javascripts/graph.js +16 -0
- data/web/assets/javascripts/metrics.js +262 -0
- data/web/assets/stylesheets/application.css +44 -1
- data/web/locales/el.yml +43 -19
- data/web/locales/en.yml +7 -0
- data/web/locales/ja.yml +7 -0
- data/web/locales/zh-cn.yml +36 -11
- data/web/locales/zh-tw.yml +32 -7
- data/web/views/_nav.erb +1 -1
- data/web/views/busy.erb +7 -2
- data/web/views/dashboard.erb +1 -0
- data/web/views/metrics.erb +69 -0
- data/web/views/metrics_for_job.erb +87 -0
- data/web/views/queue.erb +5 -1
- metadata +29 -8
- data/lib/sidekiq/.DS_Store +0 -0
data/lib/sidekiq/fetch.rb
CHANGED
@@ -33,7 +33,7 @@ module Sidekiq # :nodoc:
|
|
33
33
|
@queues = @config[:queues].map { |q| "queue:#{q}" }
|
34
34
|
if @strictly_ordered_queues
|
35
35
|
@queues.uniq!
|
36
|
-
@queues << TIMEOUT
|
36
|
+
@queues << {timeout: TIMEOUT}
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
@@ -83,7 +83,7 @@ module Sidekiq # :nodoc:
|
|
83
83
|
else
|
84
84
|
permute = @queues.shuffle
|
85
85
|
permute.uniq!
|
86
|
-
permute << TIMEOUT
|
86
|
+
permute << {timeout: TIMEOUT}
|
87
87
|
permute
|
88
88
|
end
|
89
89
|
end
|
data/lib/sidekiq/job_retry.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sidekiq/scheduled"
|
4
|
-
require "sidekiq/api"
|
5
|
-
|
6
3
|
require "zlib"
|
7
4
|
require "base64"
|
5
|
+
require "sidekiq/component"
|
8
6
|
|
9
7
|
module Sidekiq
|
10
8
|
##
|
@@ -91,7 +89,7 @@ module Sidekiq
|
|
91
89
|
|
92
90
|
msg = Sidekiq.load_json(jobstr)
|
93
91
|
if msg["retry"]
|
94
|
-
|
92
|
+
process_retry(nil, msg, queue, e)
|
95
93
|
else
|
96
94
|
Sidekiq.death_handlers.each do |handler|
|
97
95
|
handler.call(msg, e)
|
@@ -128,7 +126,7 @@ module Sidekiq
|
|
128
126
|
end
|
129
127
|
|
130
128
|
raise e unless msg["retry"]
|
131
|
-
|
129
|
+
process_retry(jobinst, msg, queue, e)
|
132
130
|
# We've handled this error associated with this job, don't
|
133
131
|
# need to handle it at the global level
|
134
132
|
raise Skip
|
@@ -139,7 +137,7 @@ module Sidekiq
|
|
139
137
|
# Note that +jobinst+ can be nil here if an error is raised before we can
|
140
138
|
# instantiate the job instance. All access must be guarded and
|
141
139
|
# best effort.
|
142
|
-
def
|
140
|
+
def process_retry(jobinst, msg, queue, exception)
|
143
141
|
max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
|
144
142
|
|
145
143
|
msg["queue"] = (msg["retry_queue"] || queue)
|
@@ -170,19 +168,49 @@ module Sidekiq
|
|
170
168
|
msg["error_backtrace"] = compress_backtrace(lines)
|
171
169
|
end
|
172
170
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
171
|
+
# Goodbye dear message, you (re)tried your best I'm sure.
|
172
|
+
return retries_exhausted(jobinst, msg, exception) if count >= max_retry_attempts
|
173
|
+
|
174
|
+
strategy, delay = delay_for(jobinst, count, exception)
|
175
|
+
case strategy
|
176
|
+
when :discard
|
177
|
+
return # poof!
|
178
|
+
when :kill
|
179
|
+
return retries_exhausted(jobinst, msg, exception)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Logging here can break retries if the logging device raises ENOSPC #3979
|
183
|
+
# logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
184
|
+
jitter = rand(10) * (count + 1)
|
185
|
+
retry_at = Time.now.to_f + delay + jitter
|
186
|
+
payload = Sidekiq.dump_json(msg)
|
187
|
+
redis do |conn|
|
188
|
+
conn.zadd("retry", retry_at.to_s, payload)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# returns (strategy, seconds)
|
193
|
+
def delay_for(jobinst, count, exception)
|
194
|
+
rv = begin
|
195
|
+
# sidekiq_retry_in can return two different things:
|
196
|
+
# 1. When to retry next, as an integer of seconds
|
197
|
+
# 2. A symbol which re-routes the job elsewhere, e.g. :discard, :kill, :default
|
198
|
+
jobinst&.sidekiq_retry_in_block&.call(count, exception)
|
199
|
+
rescue Exception => e
|
200
|
+
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
|
201
|
+
nil
|
202
|
+
end
|
203
|
+
|
204
|
+
delay = (count**4) + 15
|
205
|
+
if Integer === rv && rv > 0
|
206
|
+
delay = rv
|
207
|
+
elsif rv == :discard
|
208
|
+
return [:discard, nil] # do nothing, job goes poof
|
209
|
+
elsif rv == :kill
|
210
|
+
return [:kill, nil]
|
185
211
|
end
|
212
|
+
|
213
|
+
[:default, delay]
|
186
214
|
end
|
187
215
|
|
188
216
|
def retries_exhausted(jobinst, msg, exception)
|
@@ -205,7 +233,15 @@ module Sidekiq
|
|
205
233
|
def send_to_morgue(msg)
|
206
234
|
logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
|
207
235
|
payload = Sidekiq.dump_json(msg)
|
208
|
-
|
236
|
+
now = Time.now.to_f
|
237
|
+
|
238
|
+
config.redis do |conn|
|
239
|
+
conn.multi do |xa|
|
240
|
+
xa.zadd("dead", now.to_s, payload)
|
241
|
+
xa.zremrangebyscore("dead", "-inf", now - config[:dead_timeout_in_seconds])
|
242
|
+
xa.zremrangebyrank("dead", 0, - config[:dead_max_jobs])
|
243
|
+
end
|
244
|
+
end
|
209
245
|
end
|
210
246
|
|
211
247
|
def retry_attempts_from(msg_retry, default)
|
@@ -216,22 +252,6 @@ module Sidekiq
|
|
216
252
|
end
|
217
253
|
end
|
218
254
|
|
219
|
-
def delay_for(jobinst, count, exception)
|
220
|
-
jitter = rand(10) * (count + 1)
|
221
|
-
if jobinst&.sidekiq_retry_in_block
|
222
|
-
custom_retry_in = retry_in(jobinst, count, exception).to_i
|
223
|
-
return custom_retry_in + jitter if custom_retry_in > 0
|
224
|
-
end
|
225
|
-
(count**4) + 15 + jitter
|
226
|
-
end
|
227
|
-
|
228
|
-
def retry_in(jobinst, count, exception)
|
229
|
-
jobinst.sidekiq_retry_in_block.call(count, exception)
|
230
|
-
rescue Exception => e
|
231
|
-
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
|
232
|
-
nil
|
233
|
-
end
|
234
|
-
|
235
255
|
def exception_caused_by_shutdown?(e, checked_causes = [])
|
236
256
|
return false unless e.cause
|
237
257
|
|
data/lib/sidekiq/launcher.rb
CHANGED
@@ -79,12 +79,14 @@ module Sidekiq
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def clear_heartbeat
|
82
|
+
flush_stats
|
83
|
+
|
82
84
|
# Remove record from Redis since we are shutting down.
|
83
85
|
# Note we don't stop the heartbeat thread; if the process
|
84
86
|
# doesn't actually exit, it'll reappear in the Web UI.
|
85
87
|
redis do |conn|
|
86
88
|
conn.pipelined do |pipeline|
|
87
|
-
pipeline.srem("processes", identity)
|
89
|
+
pipeline.srem("processes", [identity])
|
88
90
|
pipeline.unlink("#{identity}:work")
|
89
91
|
end
|
90
92
|
end
|
@@ -98,7 +100,7 @@ module Sidekiq
|
|
98
100
|
❤
|
99
101
|
end
|
100
102
|
|
101
|
-
def
|
103
|
+
def flush_stats
|
102
104
|
fails = Processor::FAILURE.reset
|
103
105
|
procd = Processor::PROCESSED.reset
|
104
106
|
return if fails + procd == 0
|
@@ -122,7 +124,6 @@ module Sidekiq
|
|
122
124
|
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
123
125
|
end
|
124
126
|
end
|
125
|
-
at_exit(&method(:flush_stats))
|
126
127
|
|
127
128
|
def ❤
|
128
129
|
key = identity
|
@@ -164,7 +165,7 @@ module Sidekiq
|
|
164
165
|
|
165
166
|
_, exists, _, _, msg = redis { |conn|
|
166
167
|
conn.multi { |transaction|
|
167
|
-
transaction.sadd("processes", key)
|
168
|
+
transaction.sadd("processes", [key])
|
168
169
|
transaction.exists?(key)
|
169
170
|
transaction.hmset(key, "info", to_json,
|
170
171
|
"busy", curstate.size,
|
@@ -179,6 +180,7 @@ module Sidekiq
|
|
179
180
|
|
180
181
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
181
182
|
fire_event(:heartbeat) unless exists
|
183
|
+
fire_event(:beat, oneshot: false)
|
182
184
|
|
183
185
|
return unless msg
|
184
186
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
# This file is designed to be required within the user's
|
5
|
+
# deployment script; it should need a bare minimum of dependencies.
|
6
|
+
#
|
7
|
+
# require "sidekiq/metrics/deploy"
|
8
|
+
# gitdesc = `git log -1 --format="%h %s"`.strip
|
9
|
+
# d = Sidekiq::Metrics::Deploy.new
|
10
|
+
# d.mark(label: gitdesc)
|
11
|
+
#
|
12
|
+
# Note that you cannot mark more than once per minute. This is a feature, not a bug.
|
13
|
+
module Sidekiq
|
14
|
+
module Metrics
|
15
|
+
class Deploy
|
16
|
+
MARK_TTL = 90 * 24 * 60 * 60 # 90 days
|
17
|
+
|
18
|
+
def initialize(pool = Sidekiq.redis_pool)
|
19
|
+
@pool = pool
|
20
|
+
end
|
21
|
+
|
22
|
+
def mark(at: Time.now, label: "")
|
23
|
+
# we need to round the timestamp so that we gracefully
|
24
|
+
# handle an excepted common error in marking deploys:
|
25
|
+
# having every process mark its deploy, leading
|
26
|
+
# to N marks for each deploy. Instead we round the time
|
27
|
+
# to the minute so that multple marks within that minute
|
28
|
+
# will all naturally rollup into one mark per minute.
|
29
|
+
whence = at.utc
|
30
|
+
floor = Time.utc(whence.year, whence.month, whence.mday, whence.hour, whence.min, 0)
|
31
|
+
datecode = floor.strftime("%Y%m%d")
|
32
|
+
key = "#{datecode}-marks"
|
33
|
+
@pool.with do |c|
|
34
|
+
c.pipelined do |pipe|
|
35
|
+
pipe.hsetnx(key, floor.iso8601, label)
|
36
|
+
pipe.expire(key, MARK_TTL)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch(date = Time.now.utc.to_date)
|
42
|
+
datecode = date.strftime("%Y%m%d")
|
43
|
+
@pool.with { |c| c.hgetall("#{datecode}-marks") }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "date"
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
require "sidekiq/metrics/shared"
|
6
|
+
|
7
|
+
module Sidekiq
|
8
|
+
module Metrics
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
11
|
+
# Redis and return a Hash of results.
|
12
|
+
#
|
13
|
+
# NB: all metrics and times/dates are UTC only. We specifically do not
|
14
|
+
# support timezones.
|
15
|
+
class Query
|
16
|
+
def initialize(pool: Sidekiq.redis_pool, now: Time.now)
|
17
|
+
@time = now.utc
|
18
|
+
@pool = pool
|
19
|
+
@klass = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
# Get metric data for all jobs from the last hour
|
23
|
+
def top_jobs(minutes: 60)
|
24
|
+
result = Result.new
|
25
|
+
|
26
|
+
time = @time
|
27
|
+
redis_results = @pool.with do |conn|
|
28
|
+
conn.pipelined do |pipe|
|
29
|
+
minutes.times do |idx|
|
30
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
31
|
+
pipe.hgetall key
|
32
|
+
result.prepend_bucket time
|
33
|
+
time -= 60
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
time = @time
|
39
|
+
redis_results.each do |hash|
|
40
|
+
hash.each do |k, v|
|
41
|
+
kls, metric = k.split("|")
|
42
|
+
result.job_results[kls].add_metric metric, time, v.to_i
|
43
|
+
end
|
44
|
+
time -= 60
|
45
|
+
end
|
46
|
+
|
47
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
48
|
+
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
def for_job(klass, minutes: 60)
|
53
|
+
result = Result.new
|
54
|
+
|
55
|
+
time = @time
|
56
|
+
redis_results = @pool.with do |conn|
|
57
|
+
conn.pipelined do |pipe|
|
58
|
+
minutes.times do |idx|
|
59
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
60
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
61
|
+
result.prepend_bucket time
|
62
|
+
time -= 60
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
time = @time
|
68
|
+
@pool.with do |conn|
|
69
|
+
redis_results.each do |(ms, p, f)|
|
70
|
+
result.job_results[klass].add_metric "ms", time, ms.to_i if ms
|
71
|
+
result.job_results[klass].add_metric "p", time, p.to_i if p
|
72
|
+
result.job_results[klass].add_metric "f", time, f.to_i if f
|
73
|
+
result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time)
|
74
|
+
time -= 60
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
79
|
+
|
80
|
+
result
|
81
|
+
end
|
82
|
+
|
83
|
+
class Result < Struct.new(:starts_at, :ends_at, :size, :buckets, :job_results, :marks)
|
84
|
+
def initialize
|
85
|
+
super
|
86
|
+
self.buckets = []
|
87
|
+
self.marks = []
|
88
|
+
self.job_results = Hash.new { |h, k| h[k] = JobResult.new }
|
89
|
+
end
|
90
|
+
|
91
|
+
def prepend_bucket(time)
|
92
|
+
buckets.unshift time.strftime("%H:%M")
|
93
|
+
self.ends_at ||= time
|
94
|
+
self.starts_at = time
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class JobResult < Struct.new(:series, :hist, :totals)
|
99
|
+
def initialize
|
100
|
+
super
|
101
|
+
self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
|
102
|
+
self.hist = Hash.new { |h, k| h[k] = [] }
|
103
|
+
self.totals = Hash.new(0)
|
104
|
+
end
|
105
|
+
|
106
|
+
def add_metric(metric, time, value)
|
107
|
+
totals[metric] += value
|
108
|
+
series[metric][time.strftime("%H:%M")] += value
|
109
|
+
|
110
|
+
# Include timing measurements in seconds for convenience
|
111
|
+
add_metric("s", time, value / 1000.0) if metric == "ms"
|
112
|
+
end
|
113
|
+
|
114
|
+
def add_hist(time, hist_result)
|
115
|
+
hist[time.strftime("%H:%M")] = hist_result
|
116
|
+
end
|
117
|
+
|
118
|
+
def total_avg(metric = "ms")
|
119
|
+
completed = totals["p"] - totals["f"]
|
120
|
+
totals[metric].to_f / completed
|
121
|
+
end
|
122
|
+
|
123
|
+
def series_avg(metric = "ms")
|
124
|
+
series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
|
125
|
+
completed = series.dig("p", bucket) - series.dig("f", bucket)
|
126
|
+
result[bucket] = (completed == 0) ? 0 : value.to_f / completed
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class MarkResult < Struct.new(:time, :label)
|
132
|
+
def bucket
|
133
|
+
time.strftime("%H:%M")
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def fetch_marks(time_range)
|
140
|
+
[].tap do |result|
|
141
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
142
|
+
|
143
|
+
marks.each do |timestamp, label|
|
144
|
+
time = Time.parse(timestamp)
|
145
|
+
if time_range.cover? time
|
146
|
+
result << MarkResult.new(time, label)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
module Metrics
|
5
|
+
# TODO Support apps without concurrent-ruby
|
6
|
+
Counter = ::Concurrent::AtomicFixnum
|
7
|
+
|
8
|
+
# Implements space-efficient but statistically useful histogram storage.
|
9
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
10
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
11
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
12
|
+
# NB: needs to be thread-safe or resiliant to races.
|
13
|
+
#
|
14
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
15
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
16
|
+
# than 1000 job/sec for a full minute of a specific type.
|
17
|
+
class Histogram
|
18
|
+
include Enumerable
|
19
|
+
|
20
|
+
# This number represents the maximum milliseconds for this bucket.
|
21
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
22
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
23
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
24
|
+
# not too concerned with its performance.
|
25
|
+
BUCKET_INTERVALS = [
|
26
|
+
20, 30, 45, 65, 100,
|
27
|
+
150, 225, 335, 500, 750,
|
28
|
+
1100, 1700, 2500, 3800, 5750,
|
29
|
+
8500, 13000, 20000, 30000, 45000,
|
30
|
+
65000, 100000, 150000, 225000, 335000,
|
31
|
+
Float::INFINITY # the "maybe your job is too long" bucket
|
32
|
+
]
|
33
|
+
LABELS = [
|
34
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
35
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
36
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
37
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
38
|
+
"65s", "100s", "150s", "225s", "335s",
|
39
|
+
"Slow"
|
40
|
+
]
|
41
|
+
|
42
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
43
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
44
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
45
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
46
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
47
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
48
|
+
GET u16 #24 GET u16 #25".split
|
49
|
+
|
50
|
+
def each
|
51
|
+
buckets.each { |counter| yield counter.value }
|
52
|
+
end
|
53
|
+
|
54
|
+
def label(idx)
|
55
|
+
LABELS[idx]
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :buckets
|
59
|
+
def initialize(klass)
|
60
|
+
@klass = klass
|
61
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
62
|
+
end
|
63
|
+
|
64
|
+
def record_time(ms)
|
65
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
66
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
67
|
+
end
|
68
|
+
|
69
|
+
@buckets[index_to_use].increment
|
70
|
+
end
|
71
|
+
|
72
|
+
def fetch(conn, now = Time.now)
|
73
|
+
window = now.utc.strftime("%d-%H:%-M")
|
74
|
+
key = "#{@klass}-#{window}"
|
75
|
+
conn.bitfield(key, *FETCH)
|
76
|
+
end
|
77
|
+
|
78
|
+
def persist(conn, now = Time.now)
|
79
|
+
buckets, @buckets = @buckets, []
|
80
|
+
window = now.utc.strftime("%d-%H:%-M")
|
81
|
+
key = "#{@klass}-#{window}"
|
82
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
83
|
+
buckets.each_with_index do |counter, idx|
|
84
|
+
val = counter.value
|
85
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
86
|
+
end
|
87
|
+
|
88
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
89
|
+
conn.expire(key, 86400)
|
90
|
+
key
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require "time"
|
2
|
+
require "sidekiq"
|
3
|
+
require "sidekiq/metrics/shared"
|
4
|
+
|
5
|
+
# This file contains the components which track execution metrics within Sidekiq.
|
6
|
+
module Sidekiq
|
7
|
+
module Metrics
|
8
|
+
class ExecutionTracker
|
9
|
+
include Sidekiq::Component
|
10
|
+
|
11
|
+
def initialize(config)
|
12
|
+
@config = config
|
13
|
+
@jobs = Hash.new(0)
|
14
|
+
@totals = Hash.new(0)
|
15
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
16
|
+
@lock = Mutex.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def track(queue, klass)
|
20
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
21
|
+
time_ms = 0
|
22
|
+
begin
|
23
|
+
begin
|
24
|
+
yield
|
25
|
+
ensure
|
26
|
+
finish = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
27
|
+
time_ms = finish - start
|
28
|
+
end
|
29
|
+
# We don't track time for failed jobs as they can have very unpredictable
|
30
|
+
# execution times. more important to know average time for successful jobs so we
|
31
|
+
# can better recognize when a perf regression is introduced.
|
32
|
+
@lock.synchronize {
|
33
|
+
@grams[klass].record_time(time_ms)
|
34
|
+
@jobs["#{klass}|ms"] += time_ms
|
35
|
+
@totals["ms"] += time_ms
|
36
|
+
}
|
37
|
+
rescue Exception
|
38
|
+
@lock.synchronize {
|
39
|
+
@jobs["#{klass}|f"] += 1
|
40
|
+
@totals["f"] += 1
|
41
|
+
}
|
42
|
+
raise
|
43
|
+
ensure
|
44
|
+
@lock.synchronize {
|
45
|
+
@jobs["#{klass}|p"] += 1
|
46
|
+
@totals["p"] += 1
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
LONG_TERM = 90 * 24 * 60 * 60
|
52
|
+
MID_TERM = 7 * 24 * 60 * 60
|
53
|
+
SHORT_TERM = 8 * 60 * 60
|
54
|
+
|
55
|
+
def flush(time = Time.now)
|
56
|
+
totals, jobs, grams = reset
|
57
|
+
procd = totals["p"]
|
58
|
+
fails = totals["f"]
|
59
|
+
return if procd == 0 && fails == 0
|
60
|
+
|
61
|
+
now = time.utc
|
62
|
+
nowdate = now.strftime("%Y%m%d")
|
63
|
+
nowhour = now.strftime("%Y%m%d|%-H")
|
64
|
+
nowmin = now.strftime("%Y%m%d|%-H:%-M")
|
65
|
+
count = 0
|
66
|
+
|
67
|
+
redis do |conn|
|
68
|
+
if grams.size > 0
|
69
|
+
conn.pipelined do |pipe|
|
70
|
+
grams.each do |_, gram|
|
71
|
+
gram.persist(pipe, now)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[
|
77
|
+
["j", jobs, nowdate, LONG_TERM],
|
78
|
+
["j", jobs, nowhour, MID_TERM],
|
79
|
+
["j", jobs, nowmin, SHORT_TERM]
|
80
|
+
].each do |prefix, data, bucket, ttl|
|
81
|
+
# Quietly seed the new 7.0 stats format so migration is painless.
|
82
|
+
conn.pipelined do |xa|
|
83
|
+
stats = "#{prefix}|#{bucket}"
|
84
|
+
# logger.debug "Flushing metrics #{stats}"
|
85
|
+
data.each_pair do |key, value|
|
86
|
+
xa.hincrby stats, key, value
|
87
|
+
count += 1
|
88
|
+
end
|
89
|
+
xa.expire(stats, ttl)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
logger.info "Flushed #{count} metrics"
|
93
|
+
count
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def reset
|
100
|
+
@lock.synchronize {
|
101
|
+
array = [@totals, @jobs, @grams]
|
102
|
+
@totals = Hash.new(0)
|
103
|
+
@jobs = Hash.new(0)
|
104
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
105
|
+
array
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Middleware
|
111
|
+
include Sidekiq::ServerMiddleware
|
112
|
+
|
113
|
+
def initialize(options)
|
114
|
+
@exec = options
|
115
|
+
end
|
116
|
+
|
117
|
+
def call(_instance, hash, queue, &block)
|
118
|
+
@exec.track(queue, hash["wrapped"] || hash["class"], &block)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
if ENV["SIDEKIQ_METRICS_BETA"] == "1"
|
125
|
+
Sidekiq.configure_server do |config|
|
126
|
+
exec = Sidekiq::Metrics::ExecutionTracker.new(config)
|
127
|
+
config.server_middleware do |chain|
|
128
|
+
chain.add Sidekiq::Metrics::Middleware, exec
|
129
|
+
end
|
130
|
+
config.on(:beat) do
|
131
|
+
exec.flush
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|