sidekiq 6.4.2 → 6.5.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Changes.md +89 -0
- data/bin/sidekiqload +17 -5
- data/lib/sidekiq/api.rb +196 -45
- data/lib/sidekiq/cli.rb +46 -32
- data/lib/sidekiq/client.rb +6 -6
- data/lib/sidekiq/component.rb +65 -0
- data/lib/sidekiq/delay.rb +1 -1
- data/lib/sidekiq/fetch.rb +18 -16
- data/lib/sidekiq/job_retry.rb +60 -39
- data/lib/sidekiq/job_util.rb +7 -3
- data/lib/sidekiq/launcher.rb +24 -21
- data/lib/sidekiq/logger.rb +1 -1
- data/lib/sidekiq/manager.rb +23 -20
- data/lib/sidekiq/metrics/deploy.rb +47 -0
- data/lib/sidekiq/metrics/query.rb +153 -0
- data/lib/sidekiq/metrics/shared.rb +94 -0
- data/lib/sidekiq/metrics/tracking.rb +134 -0
- data/lib/sidekiq/middleware/chain.rb +82 -38
- data/lib/sidekiq/middleware/current_attributes.rb +18 -12
- data/lib/sidekiq/middleware/i18n.rb +2 -0
- data/lib/sidekiq/middleware/modules.rb +21 -0
- data/lib/sidekiq/monitor.rb +1 -1
- data/lib/sidekiq/paginator.rb +11 -3
- data/lib/sidekiq/processor.rb +21 -15
- data/lib/sidekiq/rails.rb +12 -13
- data/lib/sidekiq/redis_client_adapter.rb +154 -0
- data/lib/sidekiq/redis_connection.rb +78 -47
- data/lib/sidekiq/ring_buffer.rb +29 -0
- data/lib/sidekiq/scheduled.rb +53 -24
- data/lib/sidekiq/testing.rb +1 -1
- data/lib/sidekiq/transaction_aware_client.rb +45 -0
- data/lib/sidekiq/version.rb +1 -1
- data/lib/sidekiq/web/action.rb +3 -3
- data/lib/sidekiq/web/application.rb +21 -5
- data/lib/sidekiq/web/helpers.rb +18 -5
- data/lib/sidekiq/web.rb +5 -1
- data/lib/sidekiq/worker.rb +8 -4
- data/lib/sidekiq.rb +87 -18
- data/sidekiq.gemspec +2 -2
- data/web/assets/javascripts/application.js +2 -1
- data/web/assets/javascripts/chart.min.js +13 -0
- data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
- data/web/assets/javascripts/dashboard.js +0 -17
- data/web/assets/javascripts/graph.js +16 -0
- data/web/assets/javascripts/metrics.js +262 -0
- data/web/assets/stylesheets/application.css +44 -1
- data/web/locales/el.yml +43 -19
- data/web/locales/en.yml +7 -0
- data/web/locales/ja.yml +7 -0
- data/web/locales/pt-br.yml +27 -9
- data/web/locales/zh-cn.yml +36 -11
- data/web/locales/zh-tw.yml +32 -7
- data/web/views/_nav.erb +1 -1
- data/web/views/busy.erb +7 -2
- data/web/views/dashboard.erb +1 -0
- data/web/views/metrics.erb +69 -0
- data/web/views/metrics_for_job.erb +87 -0
- data/web/views/queue.erb +5 -1
- metadata +34 -9
- data/lib/sidekiq/exception_handler.rb +0 -27
- data/lib/sidekiq/util.rb +0 -108
data/lib/sidekiq/launcher.rb
CHANGED
@@ -3,11 +3,12 @@
|
|
3
3
|
require "sidekiq/manager"
|
4
4
|
require "sidekiq/fetch"
|
5
5
|
require "sidekiq/scheduled"
|
6
|
+
require "sidekiq/ring_buffer"
|
6
7
|
|
7
8
|
module Sidekiq
|
8
9
|
# The Launcher starts the Manager and Poller threads and provides the process heartbeat.
|
9
10
|
class Launcher
|
10
|
-
include
|
11
|
+
include Sidekiq::Component
|
11
12
|
|
12
13
|
STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
|
13
14
|
|
@@ -22,11 +23,11 @@ module Sidekiq
|
|
22
23
|
attr_accessor :manager, :poller, :fetcher
|
23
24
|
|
24
25
|
def initialize(options)
|
26
|
+
@config = options
|
25
27
|
options[:fetch] ||= BasicFetch.new(options)
|
26
28
|
@manager = Sidekiq::Manager.new(options)
|
27
|
-
@poller = Sidekiq::Scheduled::Poller.new
|
29
|
+
@poller = Sidekiq::Scheduled::Poller.new(options)
|
28
30
|
@done = false
|
29
|
-
@options = options
|
30
31
|
end
|
31
32
|
|
32
33
|
def run
|
@@ -45,7 +46,7 @@ module Sidekiq
|
|
45
46
|
|
46
47
|
# Shuts down this Sidekiq instance. Waits up to the deadline for all jobs to complete.
|
47
48
|
def stop
|
48
|
-
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @
|
49
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @config[:timeout]
|
49
50
|
|
50
51
|
@done = true
|
51
52
|
@manager.quiet
|
@@ -55,8 +56,8 @@ module Sidekiq
|
|
55
56
|
|
56
57
|
# Requeue everything in case there was a thread which fetched a job while the process was stopped.
|
57
58
|
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
58
|
-
strategy = @
|
59
|
-
strategy.bulk_requeue([], @
|
59
|
+
strategy = @config[:fetch]
|
60
|
+
strategy.bulk_requeue([], @config)
|
60
61
|
|
61
62
|
clear_heartbeat
|
62
63
|
end
|
@@ -74,16 +75,18 @@ module Sidekiq
|
|
74
75
|
heartbeat
|
75
76
|
sleep BEAT_PAUSE
|
76
77
|
end
|
77
|
-
|
78
|
+
logger.info("Heartbeat stopping...")
|
78
79
|
end
|
79
80
|
|
80
81
|
def clear_heartbeat
|
82
|
+
flush_stats
|
83
|
+
|
81
84
|
# Remove record from Redis since we are shutting down.
|
82
85
|
# Note we don't stop the heartbeat thread; if the process
|
83
86
|
# doesn't actually exit, it'll reappear in the Web UI.
|
84
|
-
|
87
|
+
redis do |conn|
|
85
88
|
conn.pipelined do |pipeline|
|
86
|
-
pipeline.srem("processes", identity)
|
89
|
+
pipeline.srem("processes", [identity])
|
87
90
|
pipeline.unlink("#{identity}:work")
|
88
91
|
end
|
89
92
|
end
|
@@ -97,7 +100,7 @@ module Sidekiq
|
|
97
100
|
❤
|
98
101
|
end
|
99
102
|
|
100
|
-
def
|
103
|
+
def flush_stats
|
101
104
|
fails = Processor::FAILURE.reset
|
102
105
|
procd = Processor::PROCESSED.reset
|
103
106
|
return if fails + procd == 0
|
@@ -121,7 +124,6 @@ module Sidekiq
|
|
121
124
|
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
122
125
|
end
|
123
126
|
end
|
124
|
-
at_exit(&method(:flush_stats))
|
125
127
|
|
126
128
|
def ❤
|
127
129
|
key = identity
|
@@ -134,7 +136,7 @@ module Sidekiq
|
|
134
136
|
|
135
137
|
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
136
138
|
|
137
|
-
|
139
|
+
redis do |conn|
|
138
140
|
conn.multi do |transaction|
|
139
141
|
transaction.incrby("stat:processed", procd)
|
140
142
|
transaction.incrby("stat:processed:#{nowdate}", procd)
|
@@ -161,15 +163,15 @@ module Sidekiq
|
|
161
163
|
fails = procd = 0
|
162
164
|
kb = memory_usage(::Process.pid)
|
163
165
|
|
164
|
-
_, exists, _, _, msg =
|
166
|
+
_, exists, _, _, msg = redis { |conn|
|
165
167
|
conn.multi { |transaction|
|
166
|
-
transaction.sadd("processes", key)
|
168
|
+
transaction.sadd("processes", [key])
|
167
169
|
transaction.exists?(key)
|
168
170
|
transaction.hmset(key, "info", to_json,
|
169
171
|
"busy", curstate.size,
|
170
172
|
"beat", Time.now.to_f,
|
171
173
|
"rtt_us", rtt,
|
172
|
-
"quiet", @done,
|
174
|
+
"quiet", @done.to_s,
|
173
175
|
"rss", kb)
|
174
176
|
transaction.expire(key, 60)
|
175
177
|
transaction.rpop("#{key}-signals")
|
@@ -178,6 +180,7 @@ module Sidekiq
|
|
178
180
|
|
179
181
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
180
182
|
fire_event(:heartbeat) unless exists
|
183
|
+
fire_event(:beat, oneshot: false)
|
181
184
|
|
182
185
|
return unless msg
|
183
186
|
|
@@ -199,7 +202,7 @@ module Sidekiq
|
|
199
202
|
|
200
203
|
def check_rtt
|
201
204
|
a = b = 0
|
202
|
-
|
205
|
+
redis do |x|
|
203
206
|
a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
204
207
|
x.ping
|
205
208
|
b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
@@ -210,7 +213,7 @@ module Sidekiq
|
|
210
213
|
# Workable is < 10,000µs
|
211
214
|
# Log a warning if it's a disaster.
|
212
215
|
if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
|
213
|
-
|
216
|
+
logger.warn <<~EOM
|
214
217
|
Your Redis network connection is performing extremely poorly.
|
215
218
|
Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
|
216
219
|
Ensure Redis is running in the same AZ or datacenter as Sidekiq.
|
@@ -247,10 +250,10 @@ module Sidekiq
|
|
247
250
|
"hostname" => hostname,
|
248
251
|
"started_at" => Time.now.to_f,
|
249
252
|
"pid" => ::Process.pid,
|
250
|
-
"tag" => @
|
251
|
-
"concurrency" => @
|
252
|
-
"queues" => @
|
253
|
-
"labels" => @
|
253
|
+
"tag" => @config[:tag] || "",
|
254
|
+
"concurrency" => @config[:concurrency],
|
255
|
+
"queues" => @config[:queues].uniq,
|
256
|
+
"labels" => @config[:labels],
|
254
257
|
"identity" => identity
|
255
258
|
}
|
256
259
|
end
|
data/lib/sidekiq/logger.rb
CHANGED
data/lib/sidekiq/manager.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sidekiq/util"
|
4
3
|
require "sidekiq/processor"
|
5
4
|
require "sidekiq/fetch"
|
6
5
|
require "set"
|
@@ -21,29 +20,26 @@ module Sidekiq
|
|
21
20
|
# the shutdown process. The other tasks are performed by other threads.
|
22
21
|
#
|
23
22
|
class Manager
|
24
|
-
include
|
23
|
+
include Sidekiq::Component
|
25
24
|
|
26
25
|
attr_reader :workers
|
27
|
-
attr_reader :options
|
28
26
|
|
29
27
|
def initialize(options = {})
|
28
|
+
@config = options
|
30
29
|
logger.debug { options.inspect }
|
31
|
-
@options = options
|
32
30
|
@count = options[:concurrency] || 10
|
33
31
|
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
34
32
|
|
35
33
|
@done = false
|
36
34
|
@workers = Set.new
|
37
35
|
@count.times do
|
38
|
-
@workers << Processor.new(
|
36
|
+
@workers << Processor.new(@config, &method(:processor_result))
|
39
37
|
end
|
40
38
|
@plock = Mutex.new
|
41
39
|
end
|
42
40
|
|
43
41
|
def start
|
44
|
-
@workers.each
|
45
|
-
x.start
|
46
|
-
end
|
42
|
+
@workers.each(&:start)
|
47
43
|
end
|
48
44
|
|
49
45
|
def quiet
|
@@ -51,7 +47,7 @@ module Sidekiq
|
|
51
47
|
@done = true
|
52
48
|
|
53
49
|
logger.info { "Terminating quiet threads" }
|
54
|
-
@workers.each
|
50
|
+
@workers.each(&:terminate)
|
55
51
|
fire_event(:quiet, reverse: true)
|
56
52
|
end
|
57
53
|
|
@@ -72,17 +68,11 @@ module Sidekiq
|
|
72
68
|
hard_shutdown
|
73
69
|
end
|
74
70
|
|
75
|
-
def
|
76
|
-
@plock.synchronize do
|
77
|
-
@workers.delete(processor)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def processor_died(processor, reason)
|
71
|
+
def processor_result(processor, reason = nil)
|
82
72
|
@plock.synchronize do
|
83
73
|
@workers.delete(processor)
|
84
74
|
unless @done
|
85
|
-
p = Processor.new(
|
75
|
+
p = Processor.new(@config, &method(:processor_result))
|
86
76
|
@workers << p
|
87
77
|
p.start
|
88
78
|
end
|
@@ -107,7 +97,7 @@ module Sidekiq
|
|
107
97
|
jobs = cleanup.map { |p| p.job }.compact
|
108
98
|
|
109
99
|
logger.warn { "Terminating #{cleanup.size} busy threads" }
|
110
|
-
logger.
|
100
|
+
logger.debug { "Jobs still in progress #{jobs.inspect}" }
|
111
101
|
|
112
102
|
# Re-enqueue unfinished jobs
|
113
103
|
# NOTE: You may notice that we may push a job back to redis before
|
@@ -115,8 +105,8 @@ module Sidekiq
|
|
115
105
|
# contract says that jobs are run AT LEAST once. Process termination
|
116
106
|
# is delayed until we're certain the jobs are back in Redis because
|
117
107
|
# it is worse to lose a job than to run it twice.
|
118
|
-
strategy = @
|
119
|
-
strategy.bulk_requeue(jobs, @
|
108
|
+
strategy = @config[:fetch]
|
109
|
+
strategy.bulk_requeue(jobs, @config)
|
120
110
|
end
|
121
111
|
|
122
112
|
cleanup.each do |processor|
|
@@ -129,5 +119,18 @@ module Sidekiq
|
|
129
119
|
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + 3
|
130
120
|
wait_for(deadline) { @workers.empty? }
|
131
121
|
end
|
122
|
+
|
123
|
+
# hack for quicker development / testing environment #2774
|
124
|
+
PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
|
125
|
+
|
126
|
+
# Wait for the orblock to be true or the deadline passed.
|
127
|
+
def wait_for(deadline, &condblock)
|
128
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
129
|
+
while remaining > PAUSE_TIME
|
130
|
+
return if condblock.call
|
131
|
+
sleep PAUSE_TIME
|
132
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
133
|
+
end
|
134
|
+
end
|
132
135
|
end
|
133
136
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
# This file is designed to be required within the user's
|
5
|
+
# deployment script; it should need a bare minimum of dependencies.
|
6
|
+
#
|
7
|
+
# require "sidekiq/metrics/deploy"
|
8
|
+
# gitdesc = `git log -1 --format="%h %s"`.strip
|
9
|
+
# d = Sidekiq::Metrics::Deploy.new
|
10
|
+
# d.mark(label: gitdesc)
|
11
|
+
#
|
12
|
+
# Note that you cannot mark more than once per minute. This is a feature, not a bug.
|
13
|
+
module Sidekiq
|
14
|
+
module Metrics
|
15
|
+
class Deploy
|
16
|
+
MARK_TTL = 90 * 24 * 60 * 60 # 90 days
|
17
|
+
|
18
|
+
def initialize(pool = Sidekiq.redis_pool)
|
19
|
+
@pool = pool
|
20
|
+
end
|
21
|
+
|
22
|
+
def mark(at: Time.now, label: "")
|
23
|
+
# we need to round the timestamp so that we gracefully
|
24
|
+
# handle an excepted common error in marking deploys:
|
25
|
+
# having every process mark its deploy, leading
|
26
|
+
# to N marks for each deploy. Instead we round the time
|
27
|
+
# to the minute so that multple marks within that minute
|
28
|
+
# will all naturally rollup into one mark per minute.
|
29
|
+
whence = at.utc
|
30
|
+
floor = Time.utc(whence.year, whence.month, whence.mday, whence.hour, whence.min, 0)
|
31
|
+
datecode = floor.strftime("%Y%m%d")
|
32
|
+
key = "#{datecode}-marks"
|
33
|
+
@pool.with do |c|
|
34
|
+
c.pipelined do |pipe|
|
35
|
+
pipe.hsetnx(key, floor.iso8601, label)
|
36
|
+
pipe.expire(key, MARK_TTL)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch(date = Time.now.utc.to_date)
|
42
|
+
datecode = date.strftime("%Y%m%d")
|
43
|
+
@pool.with { |c| c.hgetall("#{datecode}-marks") }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require "sidekiq"
|
2
|
+
require "date"
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
require "sidekiq/metrics/shared"
|
6
|
+
|
7
|
+
module Sidekiq
|
8
|
+
module Metrics
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
11
|
+
# Redis and return a Hash of results.
|
12
|
+
#
|
13
|
+
# NB: all metrics and times/dates are UTC only. We specifically do not
|
14
|
+
# support timezones.
|
15
|
+
class Query
|
16
|
+
def initialize(pool: Sidekiq.redis_pool, now: Time.now)
|
17
|
+
@time = now.utc
|
18
|
+
@pool = pool
|
19
|
+
@klass = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
# Get metric data for all jobs from the last hour
|
23
|
+
def top_jobs(minutes: 60)
|
24
|
+
result = Result.new
|
25
|
+
|
26
|
+
time = @time
|
27
|
+
redis_results = @pool.with do |conn|
|
28
|
+
conn.pipelined do |pipe|
|
29
|
+
minutes.times do |idx|
|
30
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
31
|
+
pipe.hgetall key
|
32
|
+
result.prepend_bucket time
|
33
|
+
time -= 60
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
time = @time
|
39
|
+
redis_results.each do |hash|
|
40
|
+
hash.each do |k, v|
|
41
|
+
kls, metric = k.split("|")
|
42
|
+
result.job_results[kls].add_metric metric, time, v.to_i
|
43
|
+
end
|
44
|
+
time -= 60
|
45
|
+
end
|
46
|
+
|
47
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
48
|
+
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
def for_job(klass, minutes: 60)
|
53
|
+
result = Result.new
|
54
|
+
|
55
|
+
time = @time
|
56
|
+
redis_results = @pool.with do |conn|
|
57
|
+
conn.pipelined do |pipe|
|
58
|
+
minutes.times do |idx|
|
59
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
60
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
61
|
+
result.prepend_bucket time
|
62
|
+
time -= 60
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
time = @time
|
68
|
+
@pool.with do |conn|
|
69
|
+
redis_results.each do |(ms, p, f)|
|
70
|
+
result.job_results[klass].add_metric "ms", time, ms.to_i if ms
|
71
|
+
result.job_results[klass].add_metric "p", time, p.to_i if p
|
72
|
+
result.job_results[klass].add_metric "f", time, f.to_i if f
|
73
|
+
result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time)
|
74
|
+
time -= 60
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
79
|
+
|
80
|
+
result
|
81
|
+
end
|
82
|
+
|
83
|
+
class Result < Struct.new(:starts_at, :ends_at, :size, :buckets, :job_results, :marks)
|
84
|
+
def initialize
|
85
|
+
super
|
86
|
+
self.buckets = []
|
87
|
+
self.marks = []
|
88
|
+
self.job_results = Hash.new { |h, k| h[k] = JobResult.new }
|
89
|
+
end
|
90
|
+
|
91
|
+
def prepend_bucket(time)
|
92
|
+
buckets.unshift time.strftime("%H:%M")
|
93
|
+
self.ends_at ||= time
|
94
|
+
self.starts_at = time
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class JobResult < Struct.new(:series, :hist, :totals)
|
99
|
+
def initialize
|
100
|
+
super
|
101
|
+
self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
|
102
|
+
self.hist = Hash.new { |h, k| h[k] = [] }
|
103
|
+
self.totals = Hash.new(0)
|
104
|
+
end
|
105
|
+
|
106
|
+
def add_metric(metric, time, value)
|
107
|
+
totals[metric] += value
|
108
|
+
series[metric][time.strftime("%H:%M")] += value
|
109
|
+
|
110
|
+
# Include timing measurements in seconds for convenience
|
111
|
+
add_metric("s", time, value / 1000.0) if metric == "ms"
|
112
|
+
end
|
113
|
+
|
114
|
+
def add_hist(time, hist_result)
|
115
|
+
hist[time.strftime("%H:%M")] = hist_result
|
116
|
+
end
|
117
|
+
|
118
|
+
def total_avg(metric = "ms")
|
119
|
+
completed = totals["p"] - totals["f"]
|
120
|
+
totals[metric].to_f / completed
|
121
|
+
end
|
122
|
+
|
123
|
+
def series_avg(metric = "ms")
|
124
|
+
series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
|
125
|
+
completed = series.dig("p", bucket) - series.dig("f", bucket)
|
126
|
+
result[bucket] = (completed == 0) ? 0 : value.to_f / completed
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class MarkResult < Struct.new(:time, :label)
|
132
|
+
def bucket
|
133
|
+
time.strftime("%H:%M")
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private
|
138
|
+
|
139
|
+
def fetch_marks(time_range)
|
140
|
+
[].tap do |result|
|
141
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
142
|
+
|
143
|
+
marks.each do |timestamp, label|
|
144
|
+
time = Time.parse(timestamp)
|
145
|
+
if time_range.cover? time
|
146
|
+
result << MarkResult.new(time, label)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
module Metrics
|
5
|
+
# TODO Support apps without concurrent-ruby
|
6
|
+
Counter = ::Concurrent::AtomicFixnum
|
7
|
+
|
8
|
+
# Implements space-efficient but statistically useful histogram storage.
|
9
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
10
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
11
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
12
|
+
# NB: needs to be thread-safe or resiliant to races.
|
13
|
+
#
|
14
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
15
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
16
|
+
# than 1000 job/sec for a full minute of a specific type.
|
17
|
+
class Histogram
|
18
|
+
include Enumerable
|
19
|
+
|
20
|
+
# This number represents the maximum milliseconds for this bucket.
|
21
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
22
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
23
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
24
|
+
# not too concerned with its performance.
|
25
|
+
BUCKET_INTERVALS = [
|
26
|
+
20, 30, 45, 65, 100,
|
27
|
+
150, 225, 335, 500, 750,
|
28
|
+
1100, 1700, 2500, 3800, 5750,
|
29
|
+
8500, 13000, 20000, 30000, 45000,
|
30
|
+
65000, 100000, 150000, 225000, 335000,
|
31
|
+
Float::INFINITY # the "maybe your job is too long" bucket
|
32
|
+
]
|
33
|
+
LABELS = [
|
34
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
35
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
36
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
37
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
38
|
+
"65s", "100s", "150s", "225s", "335s",
|
39
|
+
"Slow"
|
40
|
+
]
|
41
|
+
|
42
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
43
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
44
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
45
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
46
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
47
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
48
|
+
GET u16 #24 GET u16 #25".split
|
49
|
+
|
50
|
+
def each
|
51
|
+
buckets.each { |counter| yield counter.value }
|
52
|
+
end
|
53
|
+
|
54
|
+
def label(idx)
|
55
|
+
LABELS[idx]
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :buckets
|
59
|
+
def initialize(klass)
|
60
|
+
@klass = klass
|
61
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
62
|
+
end
|
63
|
+
|
64
|
+
def record_time(ms)
|
65
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
66
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
67
|
+
end
|
68
|
+
|
69
|
+
@buckets[index_to_use].increment
|
70
|
+
end
|
71
|
+
|
72
|
+
def fetch(conn, now = Time.now)
|
73
|
+
window = now.utc.strftime("%d-%H:%-M")
|
74
|
+
key = "#{@klass}-#{window}"
|
75
|
+
conn.bitfield(key, *FETCH)
|
76
|
+
end
|
77
|
+
|
78
|
+
def persist(conn, now = Time.now)
|
79
|
+
buckets, @buckets = @buckets, []
|
80
|
+
window = now.utc.strftime("%d-%H:%-M")
|
81
|
+
key = "#{@klass}-#{window}"
|
82
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
83
|
+
buckets.each_with_index do |counter, idx|
|
84
|
+
val = counter.value
|
85
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
86
|
+
end
|
87
|
+
|
88
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
89
|
+
conn.expire(key, 86400)
|
90
|
+
key
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require "time"
|
2
|
+
require "sidekiq"
|
3
|
+
require "sidekiq/metrics/shared"
|
4
|
+
|
5
|
+
# This file contains the components which track execution metrics within Sidekiq.
|
6
|
+
module Sidekiq
|
7
|
+
module Metrics
|
8
|
+
class ExecutionTracker
|
9
|
+
include Sidekiq::Component
|
10
|
+
|
11
|
+
def initialize(config)
|
12
|
+
@config = config
|
13
|
+
@jobs = Hash.new(0)
|
14
|
+
@totals = Hash.new(0)
|
15
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
16
|
+
@lock = Mutex.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def track(queue, klass)
|
20
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
21
|
+
time_ms = 0
|
22
|
+
begin
|
23
|
+
begin
|
24
|
+
yield
|
25
|
+
ensure
|
26
|
+
finish = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
27
|
+
time_ms = finish - start
|
28
|
+
end
|
29
|
+
# We don't track time for failed jobs as they can have very unpredictable
|
30
|
+
# execution times. more important to know average time for successful jobs so we
|
31
|
+
# can better recognize when a perf regression is introduced.
|
32
|
+
@lock.synchronize {
|
33
|
+
@grams[klass].record_time(time_ms)
|
34
|
+
@jobs["#{klass}|ms"] += time_ms
|
35
|
+
@totals["ms"] += time_ms
|
36
|
+
}
|
37
|
+
rescue Exception
|
38
|
+
@lock.synchronize {
|
39
|
+
@jobs["#{klass}|f"] += 1
|
40
|
+
@totals["f"] += 1
|
41
|
+
}
|
42
|
+
raise
|
43
|
+
ensure
|
44
|
+
@lock.synchronize {
|
45
|
+
@jobs["#{klass}|p"] += 1
|
46
|
+
@totals["p"] += 1
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
LONG_TERM = 90 * 24 * 60 * 60
|
52
|
+
MID_TERM = 7 * 24 * 60 * 60
|
53
|
+
SHORT_TERM = 8 * 60 * 60
|
54
|
+
|
55
|
+
def flush(time = Time.now)
|
56
|
+
totals, jobs, grams = reset
|
57
|
+
procd = totals["p"]
|
58
|
+
fails = totals["f"]
|
59
|
+
return if procd == 0 && fails == 0
|
60
|
+
|
61
|
+
now = time.utc
|
62
|
+
nowdate = now.strftime("%Y%m%d")
|
63
|
+
nowhour = now.strftime("%Y%m%d|%-H")
|
64
|
+
nowmin = now.strftime("%Y%m%d|%-H:%-M")
|
65
|
+
count = 0
|
66
|
+
|
67
|
+
redis do |conn|
|
68
|
+
if grams.size > 0
|
69
|
+
conn.pipelined do |pipe|
|
70
|
+
grams.each do |_, gram|
|
71
|
+
gram.persist(pipe, now)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
[
|
77
|
+
["j", jobs, nowdate, LONG_TERM],
|
78
|
+
["j", jobs, nowhour, MID_TERM],
|
79
|
+
["j", jobs, nowmin, SHORT_TERM]
|
80
|
+
].each do |prefix, data, bucket, ttl|
|
81
|
+
# Quietly seed the new 7.0 stats format so migration is painless.
|
82
|
+
conn.pipelined do |xa|
|
83
|
+
stats = "#{prefix}|#{bucket}"
|
84
|
+
# logger.debug "Flushing metrics #{stats}"
|
85
|
+
data.each_pair do |key, value|
|
86
|
+
xa.hincrby stats, key, value
|
87
|
+
count += 1
|
88
|
+
end
|
89
|
+
xa.expire(stats, ttl)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
logger.info "Flushed #{count} metrics"
|
93
|
+
count
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def reset
|
100
|
+
@lock.synchronize {
|
101
|
+
array = [@totals, @jobs, @grams]
|
102
|
+
@totals = Hash.new(0)
|
103
|
+
@jobs = Hash.new(0)
|
104
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
105
|
+
array
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Middleware
|
111
|
+
include Sidekiq::ServerMiddleware
|
112
|
+
|
113
|
+
def initialize(options)
|
114
|
+
@exec = options
|
115
|
+
end
|
116
|
+
|
117
|
+
def call(_instance, hash, queue, &block)
|
118
|
+
@exec.track(queue, hash["wrapped"] || hash["class"], &block)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
if ENV["SIDEKIQ_METRICS_BETA"] == "1"
|
125
|
+
Sidekiq.configure_server do |config|
|
126
|
+
exec = Sidekiq::Metrics::ExecutionTracker.new(config)
|
127
|
+
config.server_middleware do |chain|
|
128
|
+
chain.add Sidekiq::Metrics::Middleware, exec
|
129
|
+
end
|
130
|
+
config.on(:beat) do
|
131
|
+
exec.flush
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|