sidekiq 6.2.2 → 8.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changes.md +726 -11
- data/LICENSE.txt +9 -0
- data/README.md +70 -39
- data/bin/kiq +17 -0
- data/bin/lint-herb +13 -0
- data/bin/multi_queue_bench +271 -0
- data/bin/sidekiq +4 -9
- data/bin/sidekiqload +214 -115
- data/bin/sidekiqmon +4 -1
- data/bin/webload +69 -0
- data/lib/active_job/queue_adapters/sidekiq_adapter.rb +124 -0
- data/lib/generators/sidekiq/job_generator.rb +71 -0
- data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +3 -3
- data/lib/generators/sidekiq/templates/{worker_spec.rb.erb → job_spec.rb.erb} +1 -1
- data/lib/generators/sidekiq/templates/{worker_test.rb.erb → job_test.rb.erb} +1 -1
- data/lib/sidekiq/api.rb +729 -264
- data/lib/sidekiq/capsule.rb +135 -0
- data/lib/sidekiq/cli.rb +124 -100
- data/lib/sidekiq/client.rb +153 -106
- data/lib/sidekiq/component.rb +132 -0
- data/lib/sidekiq/config.rb +320 -0
- data/lib/sidekiq/deploy.rb +64 -0
- data/lib/sidekiq/embedded.rb +64 -0
- data/lib/sidekiq/fetch.rb +27 -26
- data/lib/sidekiq/iterable_job.rb +56 -0
- data/lib/sidekiq/job/interrupt_handler.rb +24 -0
- data/lib/sidekiq/job/iterable/active_record_enumerator.rb +53 -0
- data/lib/sidekiq/job/iterable/csv_enumerator.rb +47 -0
- data/lib/sidekiq/job/iterable/enumerators.rb +135 -0
- data/lib/sidekiq/job/iterable.rb +322 -0
- data/lib/sidekiq/job.rb +397 -5
- data/lib/sidekiq/job_logger.rb +23 -32
- data/lib/sidekiq/job_retry.rb +141 -68
- data/lib/sidekiq/job_util.rb +113 -0
- data/lib/sidekiq/launcher.rb +122 -98
- data/lib/sidekiq/loader.rb +57 -0
- data/lib/sidekiq/logger.rb +27 -106
- data/lib/sidekiq/manager.rb +41 -43
- data/lib/sidekiq/metrics/query.rb +184 -0
- data/lib/sidekiq/metrics/shared.rb +109 -0
- data/lib/sidekiq/metrics/tracking.rb +153 -0
- data/lib/sidekiq/middleware/chain.rb +96 -51
- data/lib/sidekiq/middleware/current_attributes.rb +120 -0
- data/lib/sidekiq/middleware/i18n.rb +8 -4
- data/lib/sidekiq/middleware/modules.rb +23 -0
- data/lib/sidekiq/monitor.rb +16 -6
- data/lib/sidekiq/paginator.rb +37 -10
- data/lib/sidekiq/processor.rb +105 -87
- data/lib/sidekiq/profiler.rb +73 -0
- data/lib/sidekiq/rails.rb +49 -36
- data/lib/sidekiq/redis_client_adapter.rb +117 -0
- data/lib/sidekiq/redis_connection.rb +55 -86
- data/lib/sidekiq/ring_buffer.rb +32 -0
- data/lib/sidekiq/scheduled.rb +106 -50
- data/lib/sidekiq/systemd.rb +2 -0
- data/lib/sidekiq/test_api.rb +331 -0
- data/lib/sidekiq/testing/inline.rb +2 -30
- data/lib/sidekiq/testing.rb +2 -342
- data/lib/sidekiq/transaction_aware_client.rb +59 -0
- data/lib/sidekiq/tui/controls.rb +53 -0
- data/lib/sidekiq/tui/filtering.rb +53 -0
- data/lib/sidekiq/tui/tabs/base_tab.rb +204 -0
- data/lib/sidekiq/tui/tabs/busy.rb +118 -0
- data/lib/sidekiq/tui/tabs/dead.rb +19 -0
- data/lib/sidekiq/tui/tabs/home.rb +144 -0
- data/lib/sidekiq/tui/tabs/metrics.rb +131 -0
- data/lib/sidekiq/tui/tabs/queues.rb +95 -0
- data/lib/sidekiq/tui/tabs/retries.rb +19 -0
- data/lib/sidekiq/tui/tabs/scheduled.rb +19 -0
- data/lib/sidekiq/tui/tabs/set_tab.rb +96 -0
- data/lib/sidekiq/tui/tabs.rb +15 -0
- data/lib/sidekiq/tui.rb +382 -0
- data/lib/sidekiq/version.rb +6 -1
- data/lib/sidekiq/web/action.rb +149 -64
- data/lib/sidekiq/web/application.rb +376 -268
- data/lib/sidekiq/web/config.rb +117 -0
- data/lib/sidekiq/web/helpers.rb +213 -87
- data/lib/sidekiq/web/router.rb +61 -74
- data/lib/sidekiq/web.rb +71 -100
- data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
- data/lib/sidekiq.rb +95 -196
- data/sidekiq.gemspec +14 -11
- data/web/assets/images/logo.png +0 -0
- data/web/assets/images/status.png +0 -0
- data/web/assets/javascripts/application.js +171 -57
- data/web/assets/javascripts/base-charts.js +120 -0
- data/web/assets/javascripts/chart.min.js +13 -0
- data/web/assets/javascripts/chartjs-adapter-date-fns.min.js +7 -0
- data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
- data/web/assets/javascripts/dashboard-charts.js +194 -0
- data/web/assets/javascripts/dashboard.js +41 -274
- data/web/assets/javascripts/metrics.js +280 -0
- data/web/assets/stylesheets/style.css +776 -0
- data/web/locales/ar.yml +72 -70
- data/web/locales/cs.yml +64 -62
- data/web/locales/da.yml +62 -53
- data/web/locales/de.yml +67 -65
- data/web/locales/el.yml +45 -24
- data/web/locales/en.yml +93 -69
- data/web/locales/es.yml +91 -68
- data/web/locales/fa.yml +67 -65
- data/web/locales/fr.yml +82 -67
- data/web/locales/gd.yml +110 -0
- data/web/locales/he.yml +67 -64
- data/web/locales/hi.yml +61 -59
- data/web/locales/it.yml +94 -54
- data/web/locales/ja.yml +74 -68
- data/web/locales/ko.yml +54 -52
- data/web/locales/lt.yml +68 -66
- data/web/locales/nb.yml +63 -61
- data/web/locales/nl.yml +54 -52
- data/web/locales/pl.yml +47 -45
- data/web/locales/{pt-br.yml → pt-BR.yml} +85 -56
- data/web/locales/pt.yml +53 -51
- data/web/locales/ru.yml +69 -66
- data/web/locales/sv.yml +55 -53
- data/web/locales/ta.yml +62 -60
- data/web/locales/tr.yml +102 -0
- data/web/locales/uk.yml +87 -61
- data/web/locales/ur.yml +66 -64
- data/web/locales/vi.yml +69 -67
- data/web/locales/zh-CN.yml +107 -0
- data/web/locales/{zh-tw.yml → zh-TW.yml} +44 -9
- data/web/views/_footer.html.erb +32 -0
- data/web/views/_job_info.html.erb +115 -0
- data/web/views/_metrics_period_select.html.erb +15 -0
- data/web/views/_nav.html.erb +45 -0
- data/web/views/_paging.html.erb +26 -0
- data/web/views/_poll_link.html.erb +4 -0
- data/web/views/_summary.html.erb +40 -0
- data/web/views/busy.html.erb +151 -0
- data/web/views/dashboard.html.erb +104 -0
- data/web/views/dead.html.erb +38 -0
- data/web/views/filtering.html.erb +6 -0
- data/web/views/layout.html.erb +26 -0
- data/web/views/metrics.html.erb +85 -0
- data/web/views/metrics_for_job.html.erb +58 -0
- data/web/views/morgue.html.erb +69 -0
- data/web/views/profiles.html.erb +43 -0
- data/web/views/queue.html.erb +57 -0
- data/web/views/queues.html.erb +46 -0
- data/web/views/retries.html.erb +77 -0
- data/web/views/retry.html.erb +39 -0
- data/web/views/scheduled.html.erb +64 -0
- data/web/views/{scheduled_job_info.erb → scheduled_job_info.html.erb} +3 -3
- metadata +130 -61
- data/LICENSE +0 -9
- data/lib/generators/sidekiq/worker_generator.rb +0 -57
- data/lib/sidekiq/delay.rb +0 -41
- data/lib/sidekiq/exception_handler.rb +0 -27
- data/lib/sidekiq/extensions/action_mailer.rb +0 -48
- data/lib/sidekiq/extensions/active_record.rb +0 -43
- data/lib/sidekiq/extensions/class_methods.rb +0 -43
- data/lib/sidekiq/extensions/generic_proxy.rb +0 -33
- data/lib/sidekiq/util.rb +0 -95
- data/lib/sidekiq/web/csrf_protection.rb +0 -180
- data/lib/sidekiq/worker.rb +0 -244
- data/web/assets/stylesheets/application-dark.css +0 -147
- data/web/assets/stylesheets/application-rtl.css +0 -246
- data/web/assets/stylesheets/application.css +0 -1053
- data/web/assets/stylesheets/bootstrap-rtl.min.css +0 -9
- data/web/assets/stylesheets/bootstrap.css +0 -5
- data/web/locales/zh-cn.yml +0 -68
- data/web/views/_footer.erb +0 -20
- data/web/views/_job_info.erb +0 -89
- data/web/views/_nav.erb +0 -52
- data/web/views/_paging.erb +0 -23
- data/web/views/_poll_link.erb +0 -7
- data/web/views/_status.erb +0 -4
- data/web/views/_summary.erb +0 -40
- data/web/views/busy.erb +0 -132
- data/web/views/dashboard.erb +0 -83
- data/web/views/dead.erb +0 -34
- data/web/views/layout.erb +0 -42
- data/web/views/morgue.erb +0 -78
- data/web/views/queue.erb +0 -55
- data/web/views/queues.erb +0 -38
- data/web/views/retries.erb +0 -83
- data/web/views/retry.erb +0 -34
- data/web/views/scheduled.erb +0 -57
data/lib/sidekiq/manager.rb
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "sidekiq/util"
|
|
4
3
|
require "sidekiq/processor"
|
|
5
|
-
require "sidekiq/fetch"
|
|
6
|
-
require "set"
|
|
7
4
|
|
|
8
5
|
module Sidekiq
|
|
9
6
|
##
|
|
@@ -21,46 +18,38 @@ module Sidekiq
|
|
|
21
18
|
# the shutdown process. The other tasks are performed by other threads.
|
|
22
19
|
#
|
|
23
20
|
class Manager
|
|
24
|
-
include
|
|
21
|
+
include Sidekiq::Component
|
|
25
22
|
|
|
26
23
|
attr_reader :workers
|
|
27
|
-
attr_reader :
|
|
24
|
+
attr_reader :capsule
|
|
28
25
|
|
|
29
|
-
def initialize(
|
|
30
|
-
|
|
31
|
-
@
|
|
32
|
-
@count = options[:concurrency] || 10
|
|
26
|
+
def initialize(capsule)
|
|
27
|
+
@config = @capsule = capsule
|
|
28
|
+
@count = capsule.concurrency
|
|
33
29
|
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
|
34
30
|
|
|
35
31
|
@done = false
|
|
36
32
|
@workers = Set.new
|
|
33
|
+
@plock = Mutex.new
|
|
37
34
|
@count.times do
|
|
38
|
-
@workers << Processor.new(
|
|
35
|
+
@workers << Processor.new(@config, &method(:processor_result))
|
|
39
36
|
end
|
|
40
|
-
@plock = Mutex.new
|
|
41
37
|
end
|
|
42
38
|
|
|
43
39
|
def start
|
|
44
|
-
@workers.each
|
|
45
|
-
x.start
|
|
46
|
-
end
|
|
40
|
+
@workers.each(&:start)
|
|
47
41
|
end
|
|
48
42
|
|
|
49
43
|
def quiet
|
|
50
44
|
return if @done
|
|
51
45
|
@done = true
|
|
52
46
|
|
|
53
|
-
logger.info { "Terminating quiet
|
|
54
|
-
@workers.each
|
|
55
|
-
fire_event(:quiet, reverse: true)
|
|
47
|
+
logger.info { "Terminating quiet threads for #{capsule.name} capsule" }
|
|
48
|
+
@workers.each(&:terminate)
|
|
56
49
|
end
|
|
57
50
|
|
|
58
|
-
# hack for quicker development / testing environment #2774
|
|
59
|
-
PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
|
|
60
|
-
|
|
61
51
|
def stop(deadline)
|
|
62
52
|
quiet
|
|
63
|
-
fire_event(:shutdown, reverse: true)
|
|
64
53
|
|
|
65
54
|
# some of the shutdown events can be async,
|
|
66
55
|
# we don't have any way to know when they're done but
|
|
@@ -68,29 +57,20 @@ module Sidekiq
|
|
|
68
57
|
sleep PAUSE_TIME
|
|
69
58
|
return if @workers.empty?
|
|
70
59
|
|
|
71
|
-
logger.info { "Pausing to allow
|
|
72
|
-
|
|
73
|
-
while remaining > PAUSE_TIME
|
|
74
|
-
return if @workers.empty?
|
|
75
|
-
sleep PAUSE_TIME
|
|
76
|
-
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
77
|
-
end
|
|
60
|
+
logger.info { "Pausing to allow jobs to finish..." }
|
|
61
|
+
wait_for(deadline) { @workers.empty? }
|
|
78
62
|
return if @workers.empty?
|
|
79
63
|
|
|
80
64
|
hard_shutdown
|
|
65
|
+
ensure
|
|
66
|
+
capsule.stop
|
|
81
67
|
end
|
|
82
68
|
|
|
83
|
-
def
|
|
69
|
+
def processor_result(processor, reason = nil)
|
|
84
70
|
@plock.synchronize do
|
|
85
71
|
@workers.delete(processor)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def processor_died(processor, reason)
|
|
90
|
-
@plock.synchronize do
|
|
91
|
-
@workers.delete(processor)
|
|
92
|
-
unless @done
|
|
93
|
-
p = Processor.new(self, options)
|
|
72
|
+
if !@done && @count > @workers.size
|
|
73
|
+
p = Processor.new(@config, &method(:processor_result))
|
|
94
74
|
@workers << p
|
|
95
75
|
p.start
|
|
96
76
|
end
|
|
@@ -104,7 +84,7 @@ module Sidekiq
|
|
|
104
84
|
private
|
|
105
85
|
|
|
106
86
|
def hard_shutdown
|
|
107
|
-
# We've reached the timeout and we still have busy
|
|
87
|
+
# We've reached the timeout and we still have busy threads.
|
|
108
88
|
# They must die but their jobs shall live on.
|
|
109
89
|
cleanup = nil
|
|
110
90
|
@plock.synchronize do
|
|
@@ -114,22 +94,40 @@ module Sidekiq
|
|
|
114
94
|
if cleanup.size > 0
|
|
115
95
|
jobs = cleanup.map { |p| p.job }.compact
|
|
116
96
|
|
|
117
|
-
logger.warn { "Terminating #{cleanup.size} busy
|
|
118
|
-
logger.
|
|
97
|
+
logger.warn { "Terminating #{cleanup.size} busy threads" }
|
|
98
|
+
logger.debug { "Jobs still in progress #{jobs.inspect}" }
|
|
119
99
|
|
|
120
100
|
# Re-enqueue unfinished jobs
|
|
121
101
|
# NOTE: You may notice that we may push a job back to redis before
|
|
122
|
-
# the
|
|
102
|
+
# the thread is terminated. This is ok because Sidekiq's
|
|
123
103
|
# contract says that jobs are run AT LEAST once. Process termination
|
|
124
104
|
# is delayed until we're certain the jobs are back in Redis because
|
|
125
105
|
# it is worse to lose a job than to run it twice.
|
|
126
|
-
|
|
127
|
-
strategy.bulk_requeue(jobs, @options)
|
|
106
|
+
capsule.fetcher.bulk_requeue(jobs)
|
|
128
107
|
end
|
|
129
108
|
|
|
130
109
|
cleanup.each do |processor|
|
|
131
110
|
processor.kill
|
|
132
111
|
end
|
|
112
|
+
|
|
113
|
+
# when this method returns, we immediately call `exit` which may not give
|
|
114
|
+
# the remaining threads time to run `ensure` blocks, etc. We pause here up
|
|
115
|
+
# to 3 seconds to give threads a minimal amount of time to run `ensure` blocks.
|
|
116
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + 3
|
|
117
|
+
wait_for(deadline) { @workers.empty? }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# hack for quicker development / testing environment #2774
|
|
121
|
+
PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
|
|
122
|
+
|
|
123
|
+
# Wait for the orblock to be true or the deadline passed.
|
|
124
|
+
def wait_for(deadline, &condblock)
|
|
125
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
126
|
+
while remaining > PAUSE_TIME
|
|
127
|
+
return if condblock.call
|
|
128
|
+
sleep PAUSE_TIME
|
|
129
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
130
|
+
end
|
|
133
131
|
end
|
|
134
132
|
end
|
|
135
133
|
end
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
require "sidekiq"
|
|
5
|
+
require "sidekiq/metrics/shared"
|
|
6
|
+
|
|
7
|
+
module Sidekiq
|
|
8
|
+
module Metrics
|
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
|
11
|
+
# Redis and return a Hash of results.
|
|
12
|
+
#
|
|
13
|
+
# NB: all metrics and times/dates are UTC only. We explicitly do not
|
|
14
|
+
# support timezones.
|
|
15
|
+
class Query
|
|
16
|
+
def initialize(pool: nil, now: Time.now)
|
|
17
|
+
@time = now.utc
|
|
18
|
+
@pool = pool || Sidekiq.default_configuration.redis_pool
|
|
19
|
+
@klass = nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
ROLLUPS = {
|
|
23
|
+
# minutely aggregates per minute
|
|
24
|
+
minutely: [60, ->(time) { time.strftime("j|%y%m%d|%-H:%M") }],
|
|
25
|
+
# hourly aggregates every 10 minutes so we'll have six data points per hour
|
|
26
|
+
hourly: [600, ->(time) {
|
|
27
|
+
m = time.min
|
|
28
|
+
mins = (m < 10) ? "0" : m.to_s[0]
|
|
29
|
+
time.strftime("j|%y%m%d|%-H:#{mins}")
|
|
30
|
+
}]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Get metric data for all jobs from the last hour
|
|
34
|
+
# +class_filter+: return only results for classes matching filter
|
|
35
|
+
# +minutes+: the number of fine-grained minute buckets to retrieve
|
|
36
|
+
# +hours+: the number of coarser-grained 10-minute buckets to retrieve, in hours
|
|
37
|
+
def top_jobs(class_filter: nil, minutes: nil, hours: nil)
|
|
38
|
+
time = @time
|
|
39
|
+
minutes = 60 unless minutes || hours
|
|
40
|
+
|
|
41
|
+
# DoS protection, sanity check
|
|
42
|
+
minutes = 60 if minutes && minutes > 480
|
|
43
|
+
hours = 72 if hours && hours > 72
|
|
44
|
+
|
|
45
|
+
granularity = hours ? :hourly : :minutely
|
|
46
|
+
result = Result.new(granularity)
|
|
47
|
+
result.ends_at = time
|
|
48
|
+
count = hours ? hours * 6 : minutes
|
|
49
|
+
stride, keyproc = ROLLUPS[granularity]
|
|
50
|
+
|
|
51
|
+
redis_results = @pool.with do |conn|
|
|
52
|
+
conn.pipelined do |pipe|
|
|
53
|
+
count.times do |idx|
|
|
54
|
+
key = keyproc.call(time)
|
|
55
|
+
pipe.hgetall key
|
|
56
|
+
time -= stride
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
result.starts_at = time
|
|
62
|
+
time = @time
|
|
63
|
+
redis_results.each do |hash|
|
|
64
|
+
hash.each do |k, v|
|
|
65
|
+
kls, metric = k.split("|")
|
|
66
|
+
next if class_filter && !class_filter.match?(kls)
|
|
67
|
+
result.job_results[kls].add_metric metric, time, v.to_i
|
|
68
|
+
end
|
|
69
|
+
time -= stride
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at, granularity)
|
|
73
|
+
result
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def for_job(klass, minutes: nil, hours: nil)
|
|
77
|
+
time = @time
|
|
78
|
+
minutes = 60 unless minutes || hours
|
|
79
|
+
|
|
80
|
+
# DoS protection, sanity check
|
|
81
|
+
minutes = 60 if minutes && minutes > 480
|
|
82
|
+
hours = 72 if hours && hours > 72
|
|
83
|
+
|
|
84
|
+
granularity = hours ? :hourly : :minutely
|
|
85
|
+
result = Result.new(granularity)
|
|
86
|
+
result.ends_at = time
|
|
87
|
+
count = hours ? hours * 6 : minutes
|
|
88
|
+
stride, keyproc = ROLLUPS[granularity]
|
|
89
|
+
|
|
90
|
+
redis_results = @pool.with do |conn|
|
|
91
|
+
conn.pipelined do |pipe|
|
|
92
|
+
count.times do |idx|
|
|
93
|
+
key = keyproc.call(time)
|
|
94
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
|
95
|
+
time -= stride
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
result.starts_at = time
|
|
101
|
+
time = @time
|
|
102
|
+
@pool.with do |conn|
|
|
103
|
+
redis_results.each do |(ms, p, f)|
|
|
104
|
+
result.job_results[klass].add_metric "ms", time, ms.to_i if ms
|
|
105
|
+
result.job_results[klass].add_metric "p", time, p.to_i if p
|
|
106
|
+
result.job_results[klass].add_metric "f", time, f.to_i if f
|
|
107
|
+
result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time).reverse if minutes
|
|
108
|
+
time -= stride
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at, granularity)
|
|
113
|
+
result
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
class Result < Struct.new(:granularity, :starts_at, :ends_at, :size, :job_results, :marks)
|
|
117
|
+
def initialize(granularity = :minutely)
|
|
118
|
+
super
|
|
119
|
+
self.granularity = granularity
|
|
120
|
+
self.marks = []
|
|
121
|
+
self.job_results = Hash.new { |h, k| h[k] = JobResult.new(granularity) }
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
class JobResult < Struct.new(:granularity, :series, :hist, :totals)
|
|
126
|
+
def initialize(granularity = :minutely)
|
|
127
|
+
super
|
|
128
|
+
self.granularity = granularity
|
|
129
|
+
self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
|
|
130
|
+
self.hist = Hash.new { |h, k| h[k] = [] }
|
|
131
|
+
self.totals = Hash.new(0)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def add_metric(metric, time, value)
|
|
135
|
+
totals[metric] += value
|
|
136
|
+
series[metric][Query.bkt_time_s(time, granularity)] += value
|
|
137
|
+
|
|
138
|
+
# Include timing measurements in seconds for convenience
|
|
139
|
+
add_metric("s", time, value / 1000.0) if metric == "ms"
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def add_hist(time, hist_result)
|
|
143
|
+
hist[Query.bkt_time_s(time, granularity)] = hist_result
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def total_avg(metric = "ms")
|
|
147
|
+
completed = totals["p"] - totals["f"]
|
|
148
|
+
return 0 if completed.zero?
|
|
149
|
+
totals[metric].to_f / completed
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def series_avg(metric = "ms")
|
|
153
|
+
series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
|
|
154
|
+
completed = series.dig("p", bucket) - series.dig("f", bucket)
|
|
155
|
+
result[bucket] = (completed == 0) ? 0 : value.to_f / completed
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
MarkResult = Struct.new(:time, :label, :bucket)
|
|
161
|
+
|
|
162
|
+
def self.bkt_time_s(time, granularity)
|
|
163
|
+
# truncate time to ten minutes ("8:40", not "8:43") or one minute
|
|
164
|
+
truncation = (granularity == :hourly) ? 600 : 60
|
|
165
|
+
Time.at(time.to_i - time.to_i % truncation).utc.iso8601
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
private
|
|
169
|
+
|
|
170
|
+
def fetch_marks(time_range, granularity)
|
|
171
|
+
[].tap do |result|
|
|
172
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
|
173
|
+
|
|
174
|
+
marks.each do |timestamp, label|
|
|
175
|
+
time = Time.parse(timestamp)
|
|
176
|
+
if time_range.cover? time
|
|
177
|
+
result << MarkResult.new(time, label, Query.bkt_time_s(time, granularity))
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sidekiq
|
|
4
|
+
module Metrics
|
|
5
|
+
class Counter
|
|
6
|
+
def initialize
|
|
7
|
+
@value = 0
|
|
8
|
+
@lock = Mutex.new
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def increment
|
|
12
|
+
@lock.synchronize { @value += 1 }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def value
|
|
16
|
+
@lock.synchronize { @value }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Implements space-efficient but statistically useful histogram storage.
|
|
21
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
|
22
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
|
23
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
|
24
|
+
# NB: needs to be thread-safe or resiliant to races.
|
|
25
|
+
#
|
|
26
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
|
27
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
|
28
|
+
# than 1000 job/sec for a full minute of a specific type (i.e. overflow 65,536).
|
|
29
|
+
#
|
|
30
|
+
# Histograms are only stored at the fine-grained level, they are not rolled up
|
|
31
|
+
# for longer-term buckets.
|
|
32
|
+
class Histogram
|
|
33
|
+
include Enumerable
|
|
34
|
+
|
|
35
|
+
# This number represents the maximum milliseconds for this bucket.
|
|
36
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
|
37
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
|
38
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
|
39
|
+
# not too concerned with its performance.
|
|
40
|
+
BUCKET_INTERVALS = [
|
|
41
|
+
20, 30, 45, 65, 100,
|
|
42
|
+
150, 225, 335, 500, 750,
|
|
43
|
+
1100, 1700, 2500, 3800, 5750,
|
|
44
|
+
8500, 13000, 20000, 30000, 45000,
|
|
45
|
+
65000, 100000, 150000, 225000, 335000,
|
|
46
|
+
1e20 # the "maybe your job is too long" bucket
|
|
47
|
+
].freeze
|
|
48
|
+
LABELS = [
|
|
49
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
|
50
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
|
51
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
|
52
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
|
53
|
+
"65s", "100s", "150s", "225s", "335s",
|
|
54
|
+
"Slow"
|
|
55
|
+
].freeze
|
|
56
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
|
57
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
|
58
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
|
59
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
|
60
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
|
61
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
|
62
|
+
GET u16 #24 GET u16 #25".split
|
|
63
|
+
HISTOGRAM_TTL = 8 * 60 * 60
|
|
64
|
+
|
|
65
|
+
def each
|
|
66
|
+
buckets.each { |counter| yield counter.value }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def label(idx)
|
|
70
|
+
LABELS[idx]
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
attr_reader :buckets
|
|
74
|
+
def initialize(klass)
|
|
75
|
+
@klass = klass
|
|
76
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def record_time(ms)
|
|
80
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
|
81
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
@buckets[index_to_use].increment
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def fetch(conn, now = Time.now)
|
|
88
|
+
window = now.utc.strftime("%-d-%-H:%-M")
|
|
89
|
+
key = "h|#{@klass}-#{window}"
|
|
90
|
+
conn.bitfield_ro(key, *FETCH)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def persist(conn, now = Time.now)
|
|
94
|
+
buckets, @buckets = @buckets, []
|
|
95
|
+
window = now.utc.strftime("%-d-%-H:%-M")
|
|
96
|
+
key = "h|#{@klass}-#{window}"
|
|
97
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
|
98
|
+
buckets.each_with_index do |counter, idx|
|
|
99
|
+
val = counter.value
|
|
100
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
|
104
|
+
conn.expire(key, HISTOGRAM_TTL)
|
|
105
|
+
key
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
require "sidekiq"
|
|
5
|
+
require "sidekiq/metrics/shared"
|
|
6
|
+
|
|
7
|
+
# This file contains the components which track execution metrics within Sidekiq.
|
|
8
|
+
module Sidekiq
|
|
9
|
+
module Metrics
|
|
10
|
+
class ExecutionTracker
|
|
11
|
+
include Sidekiq::Component
|
|
12
|
+
|
|
13
|
+
def initialize(config)
|
|
14
|
+
@config = config
|
|
15
|
+
@jobs = Hash.new(0)
|
|
16
|
+
@totals = Hash.new(0)
|
|
17
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
|
18
|
+
@lock = Mutex.new
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def track(queue, klass)
|
|
22
|
+
start = mono_ms
|
|
23
|
+
time_ms = 0
|
|
24
|
+
begin
|
|
25
|
+
begin
|
|
26
|
+
yield
|
|
27
|
+
ensure
|
|
28
|
+
finish = mono_ms
|
|
29
|
+
time_ms = finish - start
|
|
30
|
+
end
|
|
31
|
+
# We don't track time for failed jobs as they can have very unpredictable
|
|
32
|
+
# execution times. more important to know average time for successful jobs so we
|
|
33
|
+
# can better recognize when a perf regression is introduced.
|
|
34
|
+
track_time(klass, time_ms)
|
|
35
|
+
rescue JobRetry::Skip
|
|
36
|
+
# This is raised when iterable job is interrupted.
|
|
37
|
+
track_time(klass, time_ms)
|
|
38
|
+
raise
|
|
39
|
+
rescue Exception
|
|
40
|
+
@lock.synchronize {
|
|
41
|
+
@jobs["#{klass}|f"] += 1
|
|
42
|
+
@totals["f"] += 1
|
|
43
|
+
}
|
|
44
|
+
raise
|
|
45
|
+
ensure
|
|
46
|
+
@lock.synchronize {
|
|
47
|
+
@jobs["#{klass}|p"] += 1
|
|
48
|
+
@totals["p"] += 1
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# LONG_TERM = 90 * 24 * 60 * 60
|
|
54
|
+
MID_TERM = 3 * 24 * 60 * 60
|
|
55
|
+
SHORT_TERM = 8 * 60 * 60
|
|
56
|
+
|
|
57
|
+
def flush(time = Time.now)
|
|
58
|
+
totals, jobs, grams = reset
|
|
59
|
+
procd = totals["p"]
|
|
60
|
+
fails = totals["f"]
|
|
61
|
+
return if procd == 0 && fails == 0
|
|
62
|
+
|
|
63
|
+
now = time.utc
|
|
64
|
+
# nowdate = now.strftime("%Y%m%d")
|
|
65
|
+
# "250214|8:4" is the 10 minute bucket for Feb 14 2025, 08:43
|
|
66
|
+
nowmid = now.strftime("%y%m%d|%-H:%M")[0..-2]
|
|
67
|
+
# "250214|8:43" is the 1 minute bucket for Feb 14 2025, 08:43
|
|
68
|
+
nowshort = now.strftime("%y%m%d|%-H:%M")
|
|
69
|
+
count = 0
|
|
70
|
+
|
|
71
|
+
redis do |conn|
|
|
72
|
+
# persist fine-grained histogram data
|
|
73
|
+
if grams.size > 0
|
|
74
|
+
conn.pipelined do |pipe|
|
|
75
|
+
grams.each do |_, gram|
|
|
76
|
+
gram.persist(pipe, now)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# persist coarse grained execution count + execution millis.
|
|
82
|
+
# note as of today we don't use or do anything with the
|
|
83
|
+
# daily or hourly rollups.
|
|
84
|
+
[
|
|
85
|
+
# ["j", jobs, nowdate, LONG_TERM],
|
|
86
|
+
["j", jobs, nowmid, MID_TERM],
|
|
87
|
+
["j", jobs, nowshort, SHORT_TERM]
|
|
88
|
+
].each do |prefix, data, bucket, ttl|
|
|
89
|
+
conn.pipelined do |xa|
|
|
90
|
+
stats = "#{prefix}|#{bucket}"
|
|
91
|
+
data.each_pair do |key, value|
|
|
92
|
+
xa.hincrby stats, key, value
|
|
93
|
+
count += 1
|
|
94
|
+
end
|
|
95
|
+
xa.expire(stats, ttl)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
logger.debug "Flushed #{count} metrics"
|
|
99
|
+
count
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
private
|
|
104
|
+
|
|
105
|
+
def track_time(klass, time_ms)
|
|
106
|
+
@lock.synchronize {
|
|
107
|
+
@grams[klass].record_time(time_ms)
|
|
108
|
+
@jobs["#{klass}|ms"] += time_ms
|
|
109
|
+
@totals["ms"] += time_ms
|
|
110
|
+
}
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def reset
|
|
114
|
+
@lock.synchronize {
|
|
115
|
+
array = [@totals, @jobs, @grams]
|
|
116
|
+
reset_instance_variables
|
|
117
|
+
array
|
|
118
|
+
}
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def reset_instance_variables
|
|
122
|
+
@totals = Hash.new(0)
|
|
123
|
+
@jobs = Hash.new(0)
|
|
124
|
+
@grams = Hash.new { |hash, key| hash[key] = Histogram.new(key) }
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
class Middleware
|
|
129
|
+
include Sidekiq::ServerMiddleware
|
|
130
|
+
|
|
131
|
+
def initialize(options)
|
|
132
|
+
@exec = options
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def call(_instance, hash, queue, &block)
|
|
136
|
+
@exec.track(queue, hash["wrapped"] || hash["class"], &block)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
Sidekiq.configure_server do |config|
|
|
143
|
+
exec = Sidekiq::Metrics::ExecutionTracker.new(config)
|
|
144
|
+
config.server_middleware do |chain|
|
|
145
|
+
chain.add Sidekiq::Metrics::Middleware, exec
|
|
146
|
+
end
|
|
147
|
+
config.on(:beat) do
|
|
148
|
+
exec.flush
|
|
149
|
+
end
|
|
150
|
+
config.on(:exit) do
|
|
151
|
+
exec.flush
|
|
152
|
+
end
|
|
153
|
+
end
|