sidekiq 6.5.1 → 6.5.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Changes.md +65 -0
- data/bin/sidekiqload +2 -2
- data/lib/sidekiq/api.rb +161 -37
- data/lib/sidekiq/cli.rb +13 -0
- data/lib/sidekiq/client.rb +2 -2
- data/lib/sidekiq/component.rb +2 -1
- data/lib/sidekiq/fetch.rb +2 -2
- data/lib/sidekiq/job_retry.rb +55 -35
- data/lib/sidekiq/launcher.rb +6 -4
- data/lib/sidekiq/metrics/deploy.rb +47 -0
- data/lib/sidekiq/metrics/query.rb +153 -0
- data/lib/sidekiq/metrics/shared.rb +94 -0
- data/lib/sidekiq/metrics/tracking.rb +134 -0
- data/lib/sidekiq/middleware/chain.rb +70 -35
- data/lib/sidekiq/middleware/current_attributes.rb +14 -12
- data/lib/sidekiq/monitor.rb +1 -1
- data/lib/sidekiq/paginator.rb +9 -1
- data/lib/sidekiq/processor.rb +9 -3
- data/lib/sidekiq/rails.rb +10 -11
- data/lib/sidekiq/redis_connection.rb +0 -2
- data/lib/sidekiq/scheduled.rb +43 -15
- data/lib/sidekiq/version.rb +1 -1
- data/lib/sidekiq/web/action.rb +3 -3
- data/lib/sidekiq/web/application.rb +21 -5
- data/lib/sidekiq/web/helpers.rb +17 -4
- data/lib/sidekiq/web.rb +5 -1
- data/lib/sidekiq/worker.rb +6 -3
- data/lib/sidekiq.rb +9 -1
- data/sidekiq.gemspec +2 -2
- data/web/assets/javascripts/application.js +2 -1
- data/web/assets/javascripts/chart.min.js +13 -0
- data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
- data/web/assets/javascripts/dashboard.js +0 -17
- data/web/assets/javascripts/graph.js +16 -0
- data/web/assets/javascripts/metrics.js +262 -0
- data/web/assets/stylesheets/application.css +44 -1
- data/web/locales/el.yml +43 -19
- data/web/locales/en.yml +7 -0
- data/web/locales/ja.yml +7 -0
- data/web/locales/zh-cn.yml +36 -11
- data/web/locales/zh-tw.yml +32 -7
- data/web/views/_nav.erb +1 -1
- data/web/views/busy.erb +7 -2
- data/web/views/dashboard.erb +1 -0
- data/web/views/metrics.erb +69 -0
- data/web/views/metrics_for_job.erb +87 -0
- data/web/views/queue.erb +5 -1
- metadata +29 -8
- data/lib/sidekiq/.DS_Store +0 -0
data/lib/sidekiq/fetch.rb
CHANGED
@@ -33,7 +33,7 @@ module Sidekiq # :nodoc:
|
|
33
33
|
@queues = @config[:queues].map { |q| "queue:#{q}" }
|
34
34
|
if @strictly_ordered_queues
|
35
35
|
@queues.uniq!
|
36
|
-
@queues << TIMEOUT
|
36
|
+
@queues << {timeout: TIMEOUT}
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
@@ -83,7 +83,7 @@ module Sidekiq # :nodoc:
|
|
83
83
|
else
|
84
84
|
permute = @queues.shuffle
|
85
85
|
permute.uniq!
|
86
|
-
permute << TIMEOUT
|
86
|
+
permute << {timeout: TIMEOUT}
|
87
87
|
permute
|
88
88
|
end
|
89
89
|
end
|
data/lib/sidekiq/job_retry.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sidekiq/scheduled"
|
4
|
-
require "sidekiq/api"
|
5
|
-
|
6
3
|
require "zlib"
|
7
4
|
require "base64"
|
5
|
+
require "sidekiq/component"
|
8
6
|
|
9
7
|
module Sidekiq
|
10
8
|
##
|
@@ -91,7 +89,7 @@ module Sidekiq
|
|
91
89
|
|
92
90
|
msg = Sidekiq.load_json(jobstr)
|
93
91
|
if msg["retry"]
|
94
|
-
|
92
|
+
process_retry(nil, msg, queue, e)
|
95
93
|
else
|
96
94
|
Sidekiq.death_handlers.each do |handler|
|
97
95
|
handler.call(msg, e)
|
@@ -128,7 +126,7 @@ module Sidekiq
|
|
128
126
|
end
|
129
127
|
|
130
128
|
raise e unless msg["retry"]
|
131
|
-
|
129
|
+
process_retry(jobinst, msg, queue, e)
|
132
130
|
# We've handled this error associated with this job, don't
|
133
131
|
# need to handle it at the global level
|
134
132
|
raise Skip
|
@@ -139,7 +137,7 @@ module Sidekiq
|
|
139
137
|
# Note that +jobinst+ can be nil here if an error is raised before we can
|
140
138
|
# instantiate the job instance. All access must be guarded and
|
141
139
|
# best effort.
|
142
|
-
def
|
140
|
+
def process_retry(jobinst, msg, queue, exception)
|
143
141
|
max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
|
144
142
|
|
145
143
|
msg["queue"] = (msg["retry_queue"] || queue)
|
@@ -170,19 +168,49 @@ module Sidekiq
|
|
170
168
|
msg["error_backtrace"] = compress_backtrace(lines)
|
171
169
|
end
|
172
170
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
171
|
+
# Goodbye dear message, you (re)tried your best I'm sure.
|
172
|
+
return retries_exhausted(jobinst, msg, exception) if count >= max_retry_attempts
|
173
|
+
|
174
|
+
strategy, delay = delay_for(jobinst, count, exception)
|
175
|
+
case strategy
|
176
|
+
when :discard
|
177
|
+
return # poof!
|
178
|
+
when :kill
|
179
|
+
return retries_exhausted(jobinst, msg, exception)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Logging here can break retries if the logging device raises ENOSPC #3979
|
183
|
+
# logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
184
|
+
jitter = rand(10) * (count + 1)
|
185
|
+
retry_at = Time.now.to_f + delay + jitter
|
186
|
+
payload = Sidekiq.dump_json(msg)
|
187
|
+
redis do |conn|
|
188
|
+
conn.zadd("retry", retry_at.to_s, payload)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# returns (strategy, seconds)
|
193
|
+
def delay_for(jobinst, count, exception)
|
194
|
+
rv = begin
|
195
|
+
# sidekiq_retry_in can return two different things:
|
196
|
+
# 1. When to retry next, as an integer of seconds
|
197
|
+
# 2. A symbol which re-routes the job elsewhere, e.g. :discard, :kill, :default
|
198
|
+
jobinst&.sidekiq_retry_in_block&.call(count, exception)
|
199
|
+
rescue Exception => e
|
200
|
+
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
|
201
|
+
nil
|
202
|
+
end
|
203
|
+
|
204
|
+
delay = (count**4) + 15
|
205
|
+
if Integer === rv && rv > 0
|
206
|
+
delay = rv
|
207
|
+
elsif rv == :discard
|
208
|
+
return [:discard, nil] # do nothing, job goes poof
|
209
|
+
elsif rv == :kill
|
210
|
+
return [:kill, nil]
|
185
211
|
end
|
212
|
+
|
213
|
+
[:default, delay]
|
186
214
|
end
|
187
215
|
|
188
216
|
def retries_exhausted(jobinst, msg, exception)
|
@@ -205,7 +233,15 @@ module Sidekiq
|
|
205
233
|
def send_to_morgue(msg)
|
206
234
|
logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
|
207
235
|
payload = Sidekiq.dump_json(msg)
|
208
|
-
|
236
|
+
now = Time.now.to_f
|
237
|
+
|
238
|
+
config.redis do |conn|
|
239
|
+
conn.multi do |xa|
|
240
|
+
xa.zadd("dead", now.to_s, payload)
|
241
|
+
xa.zremrangebyscore("dead", "-inf", now - config[:dead_timeout_in_seconds])
|
242
|
+
xa.zremrangebyrank("dead", 0, - config[:dead_max_jobs])
|
243
|
+
end
|
244
|
+
end
|
209
245
|
end
|
210
246
|
|
211
247
|
def retry_attempts_from(msg_retry, default)
|
@@ -216,22 +252,6 @@ module Sidekiq
|
|
216
252
|
end
|
217
253
|
end
|
218
254
|
|
219
|
-
def delay_for(jobinst, count, exception)
|
220
|
-
jitter = rand(10) * (count + 1)
|
221
|
-
if jobinst&.sidekiq_retry_in_block
|
222
|
-
custom_retry_in = retry_in(jobinst, count, exception).to_i
|
223
|
-
return custom_retry_in + jitter if custom_retry_in > 0
|
224
|
-
end
|
225
|
-
(count**4) + 15 + jitter
|
226
|
-
end
|
227
|
-
|
228
|
-
def retry_in(jobinst, count, exception)
|
229
|
-
jobinst.sidekiq_retry_in_block.call(count, exception)
|
230
|
-
rescue Exception => e
|
231
|
-
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
|
232
|
-
nil
|
233
|
-
end
|
234
|
-
|
235
255
|
def exception_caused_by_shutdown?(e, checked_causes = [])
|
236
256
|
return false unless e.cause
|
237
257
|
|
data/lib/sidekiq/launcher.rb
CHANGED
@@ -79,12 +79,14 @@ module Sidekiq
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def clear_heartbeat
|
82
|
+
flush_stats
|
83
|
+
|
82
84
|
# Remove record from Redis since we are shutting down.
|
83
85
|
# Note we don't stop the heartbeat thread; if the process
|
84
86
|
# doesn't actually exit, it'll reappear in the Web UI.
|
85
87
|
redis do |conn|
|
86
88
|
conn.pipelined do |pipeline|
|
87
|
-
pipeline.srem("processes", identity)
|
89
|
+
pipeline.srem("processes", [identity])
|
88
90
|
pipeline.unlink("#{identity}:work")
|
89
91
|
end
|
90
92
|
end
|
@@ -98,7 +100,7 @@ module Sidekiq
|
|
98
100
|
❤
|
99
101
|
end
|
100
102
|
|
101
|
-
def
|
103
|
+
def flush_stats
|
102
104
|
fails = Processor::FAILURE.reset
|
103
105
|
procd = Processor::PROCESSED.reset
|
104
106
|
return if fails + procd == 0
|
@@ -122,7 +124,6 @@ module Sidekiq
|
|
122
124
|
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
123
125
|
end
|
124
126
|
end
|
125
|
-
at_exit(&method(:flush_stats))
|
126
127
|
|
127
128
|
def ❤
|
128
129
|
key = identity
|
@@ -164,7 +165,7 @@ module Sidekiq
|
|
164
165
|
|
165
166
|
_, exists, _, _, msg = redis { |conn|
|
166
167
|
conn.multi { |transaction|
|
167
|
-
transaction.sadd("processes", key)
|
168
|
+
transaction.sadd("processes", [key])
|
168
169
|
transaction.exists?(key)
|
169
170
|
transaction.hmset(key, "info", to_json,
|
170
171
|
"busy", curstate.size,
|
@@ -179,6 +180,7 @@ module Sidekiq
|
|
179
180
|
|
180
181
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
181
182
|
fire_event(:heartbeat) unless exists
|
183
|
+
fire_event(:beat, oneshot: false)
|
182
184
|
|
183
185
|
return unless msg
|
184
186
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
# This file is designed to be required within the user's
|
5
|
+
# deployment script; it should need a bare minimum of dependencies.
|
6
|
+
#
|
7
|
+
# require "sidekiq/metrics/deploy"
|
8
|
+
# gitdesc = `git log -1 --format="%h %s"`.strip
|
9
|
+
# d = Sidekiq::Metrics::Deploy.new
|
10
|
+
# d.mark(label: gitdesc)
|
11
|
+
#
|
12
|
+
# Note that you cannot mark more than once per minute. This is a feature, not a bug.
|
13
|
+
module Sidekiq
|
14
|
+
module Metrics
|
15
|
+
class Deploy
|
16
|
+
MARK_TTL = 90 * 24 * 60 * 60 # 90 days
|
17
|
+
|
18
|
+
def initialize(pool = Sidekiq.redis_pool)
|
19
|
+
@pool = pool
|
20
|
+
end
|
21
|
+
|
22
|
+
def mark(at: Time.now, label: "")
|
23
|
+
# we need to round the timestamp so that we gracefully
|
24
|
+
# handle an excepted common error in marking deploys:
|
25
|
+
# having every process mark its deploy, leading
|
26
|
+
# to N marks for each deploy. Instead we round the time
|
27
|
+
# to the minute so that multple marks within that minute
|
28
|
+
# will all naturally rollup into one mark per minute.
|
29
|
+
whence = at.utc
|
30
|
+
floor = Time.utc(whence.year, whence.month, whence.mday, whence.hour, whence.min, 0)
|
31
|
+
datecode = floor.strftime("%Y%m%d")
|
32
|
+
key = "#{datecode}-marks"
|
33
|
+
@pool.with do |c|
|
34
|
+
c.pipelined do |pipe|
|
35
|
+
pipe.hsetnx(key, floor.iso8601, label)
|
36
|
+
pipe.expire(key, MARK_TTL)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch(date = Time.now.utc.to_date)
|
42
|
+
datecode = date.strftime("%Y%m%d")
|
43
|
+
@pool.with { |c| c.hgetall("#{datecode}-marks") }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "date"
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
require "sidekiq/metrics/shared"
|
6
|
+
|
7
|
+
module Sidekiq
|
8
|
+
module Metrics
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
11
|
+
# Redis and return a Hash of results.
|
12
|
+
#
|
13
|
+
# NB: all metrics and times/dates are UTC only. We specifically do not
|
14
|
+
# support timezones.
|
15
|
+
class Query
|
16
|
+
def initialize(pool: Sidekiq.redis_pool, now: Time.now)
|
17
|
+
@time = now.utc
|
18
|
+
@pool = pool
|
19
|
+
@klass = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
# Get metric data for all jobs from the last hour
|
23
|
+
def top_jobs(minutes: 60)
|
24
|
+
result = Result.new
|
25
|
+
|
26
|
+
time = @time
|
27
|
+
redis_results = @pool.with do |conn|
|
28
|
+
conn.pipelined do |pipe|
|
29
|
+
minutes.times do |idx|
|
30
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
31
|
+
pipe.hgetall key
|
32
|
+
result.prepend_bucket time
|
33
|
+
time -= 60
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
time = @time
|
39
|
+
redis_results.each do |hash|
|
40
|
+
hash.each do |k, v|
|
41
|
+
kls, metric = k.split("|")
|
42
|
+
result.job_results[kls].add_metric metric, time, v.to_i
|
43
|
+
end
|
44
|
+
time -= 60
|
45
|
+
end
|
46
|
+
|
47
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
48
|
+
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
def for_job(klass, minutes: 60)
|
53
|
+
result = Result.new
|
54
|
+
|
55
|
+
time = @time
|
56
|
+
redis_results = @pool.with do |conn|
|
57
|
+
conn.pipelined do |pipe|
|
58
|
+
minutes.times do |idx|
|
59
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
60
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
61
|
+
result.prepend_bucket time
|
62
|
+
time -= 60
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
time = @time
|
68
|
+
@pool.with do |conn|
|
69
|
+
redis_results.each do |(ms, p, f)|
|
70
|
+
result.job_results[klass].add_metric "ms", time, ms.to_i if ms
|
71
|
+
result.job_results[klass].add_metric "p", time, p.to_i if p
|
72
|
+
result.job_results[klass].add_metric "f", time, f.to_i if f
|
73
|
+
result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time)
|
74
|
+
time -= 60
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
79
|
+
|
80
|
+
result
|
81
|
+
end
|
82
|
+
|
83
|
+
class Result < Struct.new(:starts_at, :ends_at, :size, :buckets, :job_results, :marks)
|
84
|
+
def initialize
|
85
|
+
super
|
86
|
+
self.buckets = []
|
87
|
+
self.marks = []
|
88
|
+
self.job_results = Hash.new { |h, k| h[k] = JobResult.new }
|
89
|
+
end
|
90
|
+
|
91
|
+
def prepend_bucket(time)
|
92
|
+
buckets.unshift time.strftime("%H:%M")
|
93
|
+
self.ends_at ||= time
|
94
|
+
self.starts_at = time
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class JobResult < Struct.new(:series, :hist, :totals)
|
99
|
+
def initialize
|
100
|
+
super
|
101
|
+
self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
|
102
|
+
self.hist = Hash.new { |h, k| h[k] = [] }
|
103
|
+
self.totals = Hash.new(0)
|
104
|
+
end
|
105
|
+
|
106
|
+
def add_metric(metric, time, value)
|
107
|
+
totals[metric] += value
|
108
|
+
series[metric][time.strftime("%H:%M")] += value
|
109
|
+
|
110
|
+
# Include timing measurements in seconds for convenience
|
111
|
+
add_metric("s", time, value / 1000.0) if metric == "ms"
|
112
|
+
end
|
113
|
+
|
114
|
+
def add_hist(time, hist_result)
|
115
|
+
hist[time.strftime("%H:%M")] = hist_result
|
116
|
+
end
|
117
|
+
|
118
|
+
def total_avg(metric = "ms")
|
119
|
+
completed = totals["p"] - totals["f"]
|
120
|
+
totals[metric].to_f / completed
|
121
|
+
end
|
122
|
+
|
123
|
+
def series_avg(metric = "ms")
|
124
|
+
series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
|
125
|
+
completed = series.dig("p", bucket) - series.dig("f", bucket)
|
126
|
+
result[bucket] = (completed == 0) ? 0 : value.to_f / completed
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class MarkResult < Struct.new(:time, :label)
|
132
|
+
def bucket
|
133
|
+
time.strftime("%H:%M")
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def fetch_marks(time_range)
|
140
|
+
[].tap do |result|
|
141
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
142
|
+
|
143
|
+
marks.each do |timestamp, label|
|
144
|
+
time = Time.parse(timestamp)
|
145
|
+
if time_range.cover? time
|
146
|
+
result << MarkResult.new(time, label)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
module Metrics
|
5
|
+
# TODO Support apps without concurrent-ruby
|
6
|
+
Counter = ::Concurrent::AtomicFixnum
|
7
|
+
|
8
|
+
# Implements space-efficient but statistically useful histogram storage.
|
9
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
10
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
11
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
12
|
+
# NB: needs to be thread-safe or resiliant to races.
|
13
|
+
#
|
14
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
15
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
16
|
+
# than 1000 job/sec for a full minute of a specific type.
|
17
|
+
class Histogram
|
18
|
+
include Enumerable
|
19
|
+
|
20
|
+
# This number represents the maximum milliseconds for this bucket.
|
21
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
22
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
23
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
24
|
+
# not too concerned with its performance.
|
25
|
+
BUCKET_INTERVALS = [
|
26
|
+
20, 30, 45, 65, 100,
|
27
|
+
150, 225, 335, 500, 750,
|
28
|
+
1100, 1700, 2500, 3800, 5750,
|
29
|
+
8500, 13000, 20000, 30000, 45000,
|
30
|
+
65000, 100000, 150000, 225000, 335000,
|
31
|
+
Float::INFINITY # the "maybe your job is too long" bucket
|
32
|
+
]
|
33
|
+
LABELS = [
|
34
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
35
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
36
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
37
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
38
|
+
"65s", "100s", "150s", "225s", "335s",
|
39
|
+
"Slow"
|
40
|
+
]
|
41
|
+
|
42
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
43
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
44
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
45
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
46
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
47
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
48
|
+
GET u16 #24 GET u16 #25".split
|
49
|
+
|
50
|
+
def each
|
51
|
+
buckets.each { |counter| yield counter.value }
|
52
|
+
end
|
53
|
+
|
54
|
+
def label(idx)
|
55
|
+
LABELS[idx]
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :buckets
|
59
|
+
def initialize(klass)
|
60
|
+
@klass = klass
|
61
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
62
|
+
end
|
63
|
+
|
64
|
+
def record_time(ms)
|
65
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
66
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
67
|
+
end
|
68
|
+
|
69
|
+
@buckets[index_to_use].increment
|
70
|
+
end
|
71
|
+
|
72
|
+
def fetch(conn, now = Time.now)
|
73
|
+
window = now.utc.strftime("%d-%H:%-M")
|
74
|
+
key = "#{@klass}-#{window}"
|
75
|
+
conn.bitfield(key, *FETCH)
|
76
|
+
end
|
77
|
+
|
78
|
+
def persist(conn, now = Time.now)
|
79
|
+
buckets, @buckets = @buckets, []
|
80
|
+
window = now.utc.strftime("%d-%H:%-M")
|
81
|
+
key = "#{@klass}-#{window}"
|
82
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
83
|
+
buckets.each_with_index do |counter, idx|
|
84
|
+
val = counter.value
|
85
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
86
|
+
end
|
87
|
+
|
88
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
89
|
+
conn.expire(key, 86400)
|
90
|
+
key
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require "time"
|
2
|
+
require "sidekiq"
|
3
|
+
require "sidekiq/metrics/shared"
|
4
|
+
|
5
|
+
# This file contains the components which track execution metrics within Sidekiq.
|
6
|
+
module Sidekiq
|
7
|
+
module Metrics
|
8
|
+
class ExecutionTracker
|
9
|
+
include Sidekiq::Component
|
10
|
+
|
11
|
+
def initialize(config)
|
12
|
+
@config = config
|
13
|
+
@jobs = Hash.new(0)
|
14
|
+
@totals = Hash.new(0)
|
15
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
16
|
+
@lock = Mutex.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def track(queue, klass)
|
20
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
21
|
+
time_ms = 0
|
22
|
+
begin
|
23
|
+
begin
|
24
|
+
yield
|
25
|
+
ensure
|
26
|
+
finish = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
27
|
+
time_ms = finish - start
|
28
|
+
end
|
29
|
+
# We don't track time for failed jobs as they can have very unpredictable
|
30
|
+
# execution times. more important to know average time for successful jobs so we
|
31
|
+
# can better recognize when a perf regression is introduced.
|
32
|
+
@lock.synchronize {
|
33
|
+
@grams[klass].record_time(time_ms)
|
34
|
+
@jobs["#{klass}|ms"] += time_ms
|
35
|
+
@totals["ms"] += time_ms
|
36
|
+
}
|
37
|
+
rescue Exception
|
38
|
+
@lock.synchronize {
|
39
|
+
@jobs["#{klass}|f"] += 1
|
40
|
+
@totals["f"] += 1
|
41
|
+
}
|
42
|
+
raise
|
43
|
+
ensure
|
44
|
+
@lock.synchronize {
|
45
|
+
@jobs["#{klass}|p"] += 1
|
46
|
+
@totals["p"] += 1
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
LONG_TERM = 90 * 24 * 60 * 60
|
52
|
+
MID_TERM = 7 * 24 * 60 * 60
|
53
|
+
SHORT_TERM = 8 * 60 * 60
|
54
|
+
|
55
|
+
def flush(time = Time.now)
|
56
|
+
totals, jobs, grams = reset
|
57
|
+
procd = totals["p"]
|
58
|
+
fails = totals["f"]
|
59
|
+
return if procd == 0 && fails == 0
|
60
|
+
|
61
|
+
now = time.utc
|
62
|
+
nowdate = now.strftime("%Y%m%d")
|
63
|
+
nowhour = now.strftime("%Y%m%d|%-H")
|
64
|
+
nowmin = now.strftime("%Y%m%d|%-H:%-M")
|
65
|
+
count = 0
|
66
|
+
|
67
|
+
redis do |conn|
|
68
|
+
if grams.size > 0
|
69
|
+
conn.pipelined do |pipe|
|
70
|
+
grams.each do |_, gram|
|
71
|
+
gram.persist(pipe, now)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[
|
77
|
+
["j", jobs, nowdate, LONG_TERM],
|
78
|
+
["j", jobs, nowhour, MID_TERM],
|
79
|
+
["j", jobs, nowmin, SHORT_TERM]
|
80
|
+
].each do |prefix, data, bucket, ttl|
|
81
|
+
# Quietly seed the new 7.0 stats format so migration is painless.
|
82
|
+
conn.pipelined do |xa|
|
83
|
+
stats = "#{prefix}|#{bucket}"
|
84
|
+
# logger.debug "Flushing metrics #{stats}"
|
85
|
+
data.each_pair do |key, value|
|
86
|
+
xa.hincrby stats, key, value
|
87
|
+
count += 1
|
88
|
+
end
|
89
|
+
xa.expire(stats, ttl)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
logger.info "Flushed #{count} metrics"
|
93
|
+
count
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def reset
|
100
|
+
@lock.synchronize {
|
101
|
+
array = [@totals, @jobs, @grams]
|
102
|
+
@totals = Hash.new(0)
|
103
|
+
@jobs = Hash.new(0)
|
104
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
105
|
+
array
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Middleware
|
111
|
+
include Sidekiq::ServerMiddleware
|
112
|
+
|
113
|
+
def initialize(options)
|
114
|
+
@exec = options
|
115
|
+
end
|
116
|
+
|
117
|
+
def call(_instance, hash, queue, &block)
|
118
|
+
@exec.track(queue, hash["wrapped"] || hash["class"], &block)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
if ENV["SIDEKIQ_METRICS_BETA"] == "1"
|
125
|
+
Sidekiq.configure_server do |config|
|
126
|
+
exec = Sidekiq::Metrics::ExecutionTracker.new(config)
|
127
|
+
config.server_middleware do |chain|
|
128
|
+
chain.add Sidekiq::Metrics::Middleware, exec
|
129
|
+
end
|
130
|
+
config.on(:beat) do
|
131
|
+
exec.flush
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|