sidekiq 3.5.4 → 7.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Changes.md +992 -6
- data/LICENSE.txt +9 -0
- data/README.md +52 -43
- data/bin/sidekiq +22 -4
- data/bin/sidekiqload +209 -115
- data/bin/sidekiqmon +11 -0
- data/lib/generators/sidekiq/job_generator.rb +57 -0
- data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +2 -2
- data/lib/generators/sidekiq/templates/job_spec.rb.erb +6 -0
- data/lib/generators/sidekiq/templates/job_test.rb.erb +8 -0
- data/lib/sidekiq/api.rb +633 -295
- data/lib/sidekiq/capsule.rb +127 -0
- data/lib/sidekiq/cli.rb +270 -248
- data/lib/sidekiq/client.rb +139 -108
- data/lib/sidekiq/component.rb +68 -0
- data/lib/sidekiq/config.rb +287 -0
- data/lib/sidekiq/deploy.rb +62 -0
- data/lib/sidekiq/embedded.rb +61 -0
- data/lib/sidekiq/fetch.rb +53 -121
- data/lib/sidekiq/job.rb +374 -0
- data/lib/sidekiq/job_logger.rb +51 -0
- data/lib/sidekiq/job_retry.rb +301 -0
- data/lib/sidekiq/job_util.rb +107 -0
- data/lib/sidekiq/launcher.rb +241 -69
- data/lib/sidekiq/logger.rb +131 -0
- data/lib/sidekiq/manager.rb +88 -190
- data/lib/sidekiq/metrics/query.rb +155 -0
- data/lib/sidekiq/metrics/shared.rb +95 -0
- data/lib/sidekiq/metrics/tracking.rb +136 -0
- data/lib/sidekiq/middleware/chain.rb +114 -56
- data/lib/sidekiq/middleware/current_attributes.rb +95 -0
- data/lib/sidekiq/middleware/i18n.rb +8 -7
- data/lib/sidekiq/middleware/modules.rb +21 -0
- data/lib/sidekiq/monitor.rb +146 -0
- data/lib/sidekiq/paginator.rb +29 -16
- data/lib/sidekiq/processor.rb +238 -118
- data/lib/sidekiq/rails.rb +57 -27
- data/lib/sidekiq/redis_client_adapter.rb +111 -0
- data/lib/sidekiq/redis_connection.rb +49 -50
- data/lib/sidekiq/ring_buffer.rb +29 -0
- data/lib/sidekiq/scheduled.rb +173 -52
- data/lib/sidekiq/sd_notify.rb +149 -0
- data/lib/sidekiq/systemd.rb +24 -0
- data/lib/sidekiq/testing/inline.rb +7 -5
- data/lib/sidekiq/testing.rb +197 -65
- data/lib/sidekiq/transaction_aware_client.rb +44 -0
- data/lib/sidekiq/version.rb +4 -1
- data/lib/sidekiq/web/action.rb +93 -0
- data/lib/sidekiq/web/application.rb +463 -0
- data/lib/sidekiq/web/csrf_protection.rb +180 -0
- data/lib/sidekiq/web/helpers.rb +364 -0
- data/lib/sidekiq/web/router.rb +104 -0
- data/lib/sidekiq/web.rb +113 -216
- data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
- data/lib/sidekiq.rb +99 -142
- data/sidekiq.gemspec +26 -23
- data/web/assets/images/apple-touch-icon.png +0 -0
- data/web/assets/javascripts/application.js +163 -74
- data/web/assets/javascripts/base-charts.js +106 -0
- data/web/assets/javascripts/chart.min.js +13 -0
- data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
- data/web/assets/javascripts/dashboard-charts.js +182 -0
- data/web/assets/javascripts/dashboard.js +37 -280
- data/web/assets/javascripts/metrics.js +298 -0
- data/web/assets/stylesheets/application-dark.css +147 -0
- data/web/assets/stylesheets/application-rtl.css +153 -0
- data/web/assets/stylesheets/application.css +181 -198
- data/web/assets/stylesheets/bootstrap-rtl.min.css +9 -0
- data/web/assets/stylesheets/bootstrap.css +4 -8
- data/web/locales/ar.yml +87 -0
- data/web/locales/cs.yml +62 -52
- data/web/locales/da.yml +60 -53
- data/web/locales/de.yml +65 -53
- data/web/locales/el.yml +43 -24
- data/web/locales/en.yml +86 -62
- data/web/locales/es.yml +70 -53
- data/web/locales/fa.yml +80 -0
- data/web/locales/fr.yml +86 -56
- data/web/locales/gd.yml +99 -0
- data/web/locales/he.yml +80 -0
- data/web/locales/hi.yml +59 -59
- data/web/locales/it.yml +53 -53
- data/web/locales/ja.yml +78 -56
- data/web/locales/ko.yml +52 -52
- data/web/locales/lt.yml +83 -0
- data/web/locales/nb.yml +61 -61
- data/web/locales/nl.yml +52 -52
- data/web/locales/pl.yml +45 -45
- data/web/locales/pt-br.yml +83 -55
- data/web/locales/pt.yml +51 -51
- data/web/locales/ru.yml +68 -60
- data/web/locales/sv.yml +53 -53
- data/web/locales/ta.yml +60 -60
- data/web/locales/uk.yml +62 -61
- data/web/locales/ur.yml +80 -0
- data/web/locales/vi.yml +83 -0
- data/web/locales/zh-cn.yml +43 -16
- data/web/locales/zh-tw.yml +42 -8
- data/web/views/_footer.erb +10 -9
- data/web/views/_job_info.erb +26 -5
- data/web/views/_metrics_period_select.erb +12 -0
- data/web/views/_nav.erb +6 -20
- data/web/views/_paging.erb +3 -1
- data/web/views/_poll_link.erb +3 -6
- data/web/views/_summary.erb +7 -7
- data/web/views/busy.erb +87 -28
- data/web/views/dashboard.erb +51 -21
- data/web/views/dead.erb +4 -4
- data/web/views/filtering.erb +7 -0
- data/web/views/layout.erb +15 -5
- data/web/views/metrics.erb +91 -0
- data/web/views/metrics_for_job.erb +59 -0
- data/web/views/morgue.erb +25 -22
- data/web/views/queue.erb +35 -25
- data/web/views/queues.erb +23 -7
- data/web/views/retries.erb +28 -23
- data/web/views/retry.erb +5 -5
- data/web/views/scheduled.erb +19 -17
- data/web/views/scheduled_job_info.erb +1 -1
- metadata +86 -268
- data/.gitignore +0 -12
- data/.travis.yml +0 -16
- data/3.0-Upgrade.md +0 -70
- data/COMM-LICENSE +0 -95
- data/Contributing.md +0 -32
- data/Ent-Changes.md +0 -39
- data/Gemfile +0 -27
- data/LICENSE +0 -9
- data/Pro-2.0-Upgrade.md +0 -138
- data/Pro-Changes.md +0 -454
- data/Rakefile +0 -9
- data/bin/sidekiqctl +0 -93
- data/lib/generators/sidekiq/templates/worker_spec.rb.erb +0 -6
- data/lib/generators/sidekiq/templates/worker_test.rb.erb +0 -8
- data/lib/generators/sidekiq/worker_generator.rb +0 -49
- data/lib/sidekiq/actor.rb +0 -39
- data/lib/sidekiq/core_ext.rb +0 -105
- data/lib/sidekiq/exception_handler.rb +0 -30
- data/lib/sidekiq/extensions/action_mailer.rb +0 -56
- data/lib/sidekiq/extensions/active_record.rb +0 -39
- data/lib/sidekiq/extensions/class_methods.rb +0 -39
- data/lib/sidekiq/extensions/generic_proxy.rb +0 -24
- data/lib/sidekiq/logging.rb +0 -104
- data/lib/sidekiq/middleware/server/active_record.rb +0 -13
- data/lib/sidekiq/middleware/server/logging.rb +0 -40
- data/lib/sidekiq/middleware/server/retry_jobs.rb +0 -206
- data/lib/sidekiq/util.rb +0 -68
- data/lib/sidekiq/web_helpers.rb +0 -249
- data/lib/sidekiq/worker.rb +0 -103
- data/test/config.yml +0 -9
- data/test/env_based_config.yml +0 -11
- data/test/fake_env.rb +0 -0
- data/test/fixtures/en.yml +0 -2
- data/test/helper.rb +0 -49
- data/test/test_api.rb +0 -493
- data/test/test_cli.rb +0 -335
- data/test/test_client.rb +0 -194
- data/test/test_exception_handler.rb +0 -55
- data/test/test_extensions.rb +0 -126
- data/test/test_fetch.rb +0 -104
- data/test/test_logging.rb +0 -34
- data/test/test_manager.rb +0 -168
- data/test/test_middleware.rb +0 -159
- data/test/test_processor.rb +0 -237
- data/test/test_rails.rb +0 -21
- data/test/test_redis_connection.rb +0 -126
- data/test/test_retry.rb +0 -325
- data/test/test_scheduled.rb +0 -114
- data/test/test_scheduling.rb +0 -49
- data/test/test_sidekiq.rb +0 -99
- data/test/test_testing.rb +0 -142
- data/test/test_testing_fake.rb +0 -268
- data/test/test_testing_inline.rb +0 -93
- data/test/test_util.rb +0 -16
- data/test/test_web.rb +0 -608
- data/test/test_web_helpers.rb +0 -53
- data/web/assets/images/bootstrap/glyphicons-halflings-white.png +0 -0
- data/web/assets/images/bootstrap/glyphicons-halflings.png +0 -0
- data/web/assets/images/status/active.png +0 -0
- data/web/assets/images/status/idle.png +0 -0
- data/web/assets/javascripts/locales/README.md +0 -27
- data/web/assets/javascripts/locales/jquery.timeago.ar.js +0 -96
- data/web/assets/javascripts/locales/jquery.timeago.bg.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.bs.js +0 -49
- data/web/assets/javascripts/locales/jquery.timeago.ca.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.cs.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.cy.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.da.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.de.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.el.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.en-short.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.en.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.es.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.et.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.fa.js +0 -22
- data/web/assets/javascripts/locales/jquery.timeago.fi.js +0 -28
- data/web/assets/javascripts/locales/jquery.timeago.fr-short.js +0 -16
- data/web/assets/javascripts/locales/jquery.timeago.fr.js +0 -17
- data/web/assets/javascripts/locales/jquery.timeago.he.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.hr.js +0 -49
- data/web/assets/javascripts/locales/jquery.timeago.hu.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.hy.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.id.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.it.js +0 -16
- data/web/assets/javascripts/locales/jquery.timeago.ja.js +0 -19
- data/web/assets/javascripts/locales/jquery.timeago.ko.js +0 -17
- data/web/assets/javascripts/locales/jquery.timeago.lt.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.mk.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.nl.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.no.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.pl.js +0 -31
- data/web/assets/javascripts/locales/jquery.timeago.pt-br.js +0 -16
- data/web/assets/javascripts/locales/jquery.timeago.pt.js +0 -16
- data/web/assets/javascripts/locales/jquery.timeago.ro.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.rs.js +0 -49
- data/web/assets/javascripts/locales/jquery.timeago.ru.js +0 -34
- data/web/assets/javascripts/locales/jquery.timeago.sk.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.sl.js +0 -44
- data/web/assets/javascripts/locales/jquery.timeago.sv.js +0 -18
- data/web/assets/javascripts/locales/jquery.timeago.th.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.tr.js +0 -16
- data/web/assets/javascripts/locales/jquery.timeago.uk.js +0 -34
- data/web/assets/javascripts/locales/jquery.timeago.uz.js +0 -19
- data/web/assets/javascripts/locales/jquery.timeago.zh-cn.js +0 -20
- data/web/assets/javascripts/locales/jquery.timeago.zh-tw.js +0 -20
- data/web/views/_poll_js.erb +0 -5
- /data/web/assets/images/{status-sd8051fd480.png → status.png} +0 -0
data/lib/sidekiq/manager.rb
CHANGED
|
@@ -1,236 +1,134 @@
|
|
|
1
|
-
#
|
|
2
|
-
require 'sidekiq/util'
|
|
3
|
-
require 'sidekiq/actor'
|
|
4
|
-
require 'sidekiq/processor'
|
|
5
|
-
require 'sidekiq/fetch'
|
|
1
|
+
# frozen_string_literal: true
|
|
6
2
|
|
|
7
|
-
|
|
3
|
+
require "sidekiq/processor"
|
|
4
|
+
require "set"
|
|
8
5
|
|
|
6
|
+
module Sidekiq
|
|
9
7
|
##
|
|
10
|
-
# The
|
|
11
|
-
#
|
|
12
|
-
#
|
|
8
|
+
# The Manager is the central coordination point in Sidekiq, controlling
|
|
9
|
+
# the lifecycle of the Processors.
|
|
10
|
+
#
|
|
11
|
+
# Tasks:
|
|
12
|
+
#
|
|
13
|
+
# 1. start: Spin up Processors.
|
|
14
|
+
# 3. processor_died: Handle job failure, throw away Processor, create new one.
|
|
15
|
+
# 4. quiet: shutdown idle Processors.
|
|
16
|
+
# 5. stop: hard stop the Processors by deadline.
|
|
17
|
+
#
|
|
18
|
+
# Note that only the last task requires its own Thread since it has to monitor
|
|
19
|
+
# the shutdown process. The other tasks are performed by other threads.
|
|
13
20
|
#
|
|
14
21
|
class Manager
|
|
15
|
-
include
|
|
16
|
-
include Actor
|
|
17
|
-
trap_exit :processor_died
|
|
22
|
+
include Sidekiq::Component
|
|
18
23
|
|
|
19
|
-
attr_reader :
|
|
20
|
-
attr_reader :
|
|
21
|
-
attr_accessor :fetcher
|
|
24
|
+
attr_reader :workers
|
|
25
|
+
attr_reader :capsule
|
|
22
26
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def initialize(condvar, options={})
|
|
27
|
-
logger.debug { options.inspect }
|
|
28
|
-
@options = options
|
|
29
|
-
@count = options[:concurrency] || 25
|
|
27
|
+
def initialize(capsule)
|
|
28
|
+
@config = @capsule = capsule
|
|
29
|
+
@count = capsule.concurrency
|
|
30
30
|
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
|
31
|
-
@done_callback = nil
|
|
32
|
-
@finished = condvar
|
|
33
31
|
|
|
34
|
-
@in_progress = {}
|
|
35
|
-
@threads = {}
|
|
36
32
|
@done = false
|
|
37
|
-
@
|
|
38
|
-
@
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
p
|
|
33
|
+
@workers = Set.new
|
|
34
|
+
@plock = Mutex.new
|
|
35
|
+
@count.times do
|
|
36
|
+
@workers << Processor.new(@config, &method(:processor_result))
|
|
42
37
|
end
|
|
43
38
|
end
|
|
44
39
|
|
|
45
|
-
def
|
|
46
|
-
|
|
47
|
-
should_shutdown = options[:shutdown]
|
|
48
|
-
timeout = options[:timeout]
|
|
49
|
-
|
|
50
|
-
@done = true
|
|
51
|
-
|
|
52
|
-
logger.info { "Terminating #{@ready.size} quiet workers" }
|
|
53
|
-
@ready.each { |x| x.terminate if x.alive? }
|
|
54
|
-
@ready.clear
|
|
55
|
-
|
|
56
|
-
return if clean_up_for_graceful_shutdown
|
|
57
|
-
|
|
58
|
-
hard_shutdown_in timeout if should_shutdown
|
|
59
|
-
end
|
|
40
|
+
def start
|
|
41
|
+
@workers.each(&:start)
|
|
60
42
|
end
|
|
61
43
|
|
|
62
|
-
def
|
|
63
|
-
if @
|
|
64
|
-
|
|
65
|
-
return true
|
|
66
|
-
end
|
|
44
|
+
def quiet
|
|
45
|
+
return if @done
|
|
46
|
+
@done = true
|
|
67
47
|
|
|
68
|
-
|
|
69
|
-
|
|
48
|
+
logger.info { "Terminating quiet threads for #{capsule.name} capsule" }
|
|
49
|
+
@workers.each(&:terminate)
|
|
70
50
|
end
|
|
71
51
|
|
|
72
|
-
def
|
|
73
|
-
|
|
74
|
-
end
|
|
52
|
+
def stop(deadline)
|
|
53
|
+
quiet
|
|
75
54
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
55
|
+
# some of the shutdown events can be async,
|
|
56
|
+
# we don't have any way to know when they're done but
|
|
57
|
+
# give them a little time to take effect
|
|
58
|
+
sleep PAUSE_TIME
|
|
59
|
+
return if @workers.empty?
|
|
79
60
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
@in_progress.delete(processor.object_id)
|
|
84
|
-
@threads.delete(processor.object_id)
|
|
85
|
-
@busy.delete(processor)
|
|
86
|
-
if stopped?
|
|
87
|
-
processor.terminate if processor.alive?
|
|
88
|
-
shutdown if @busy.empty?
|
|
89
|
-
else
|
|
90
|
-
@ready << processor if processor.alive?
|
|
91
|
-
end
|
|
92
|
-
dispatch
|
|
93
|
-
end
|
|
94
|
-
end
|
|
61
|
+
logger.info { "Pausing to allow jobs to finish..." }
|
|
62
|
+
wait_for(deadline) { @workers.empty? }
|
|
63
|
+
return if @workers.empty?
|
|
95
64
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
@threads.delete(processor.object_id)
|
|
100
|
-
@busy.delete(processor)
|
|
101
|
-
|
|
102
|
-
unless stopped?
|
|
103
|
-
p = Processor.new_link(current_actor)
|
|
104
|
-
p.proxy_id = p.object_id
|
|
105
|
-
@ready << p
|
|
106
|
-
dispatch
|
|
107
|
-
else
|
|
108
|
-
shutdown if @busy.empty?
|
|
109
|
-
end
|
|
110
|
-
end
|
|
65
|
+
hard_shutdown
|
|
66
|
+
ensure
|
|
67
|
+
capsule.stop
|
|
111
68
|
end
|
|
112
69
|
|
|
113
|
-
def
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
work.requeue
|
|
121
|
-
else
|
|
122
|
-
processor = @ready.pop
|
|
123
|
-
@in_progress[processor.object_id] = work
|
|
124
|
-
@busy << processor
|
|
125
|
-
processor.async.process(work)
|
|
70
|
+
def processor_result(processor, reason = nil)
|
|
71
|
+
@plock.synchronize do
|
|
72
|
+
@workers.delete(processor)
|
|
73
|
+
unless @done
|
|
74
|
+
p = Processor.new(@config, &method(:processor_result))
|
|
75
|
+
@workers << p
|
|
76
|
+
p.start
|
|
126
77
|
end
|
|
127
78
|
end
|
|
128
79
|
end
|
|
129
80
|
|
|
130
|
-
# A hack worthy of Rube Goldberg. We need to be able
|
|
131
|
-
# to hard stop a working thread. But there's no way for us to
|
|
132
|
-
# get handle to the underlying thread performing work for a processor
|
|
133
|
-
# so we have it call us and tell us.
|
|
134
|
-
def real_thread(proxy_id, thr)
|
|
135
|
-
@threads[proxy_id] = thr if thr.alive?
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
PROCTITLES = [
|
|
139
|
-
proc { 'sidekiq'.freeze },
|
|
140
|
-
proc { Sidekiq::VERSION },
|
|
141
|
-
proc { |mgr, data| data['tag'] },
|
|
142
|
-
proc { |mgr, data| "[#{mgr.busy.size} of #{data['concurrency']} busy]" },
|
|
143
|
-
proc { |mgr, data| "stopping" if mgr.stopped? },
|
|
144
|
-
]
|
|
145
|
-
|
|
146
|
-
def heartbeat(key, data, json)
|
|
147
|
-
results = PROCTITLES.map {|x| x.(self, data) }
|
|
148
|
-
results.compact!
|
|
149
|
-
$0 = results.join(' ')
|
|
150
|
-
|
|
151
|
-
❤(key, json)
|
|
152
|
-
after(5) do
|
|
153
|
-
heartbeat(key, data, json)
|
|
154
|
-
end
|
|
155
|
-
end
|
|
156
|
-
|
|
157
81
|
def stopped?
|
|
158
82
|
@done
|
|
159
83
|
end
|
|
160
84
|
|
|
161
85
|
private
|
|
162
86
|
|
|
163
|
-
def
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
conn.expire(key, 60)
|
|
170
|
-
conn.rpop("#{key}-signals")
|
|
171
|
-
end
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
return unless msg
|
|
175
|
-
|
|
176
|
-
if JVM_RESERVED_SIGNALS.include?(msg)
|
|
177
|
-
Sidekiq::CLI.instance.handle_signal(msg)
|
|
178
|
-
else
|
|
179
|
-
::Process.kill(msg, $$)
|
|
180
|
-
end
|
|
181
|
-
rescue => e
|
|
182
|
-
# ignore all redis/network issues
|
|
183
|
-
logger.error("heartbeat: #{e.message}")
|
|
87
|
+
def hard_shutdown
|
|
88
|
+
# We've reached the timeout and we still have busy threads.
|
|
89
|
+
# They must die but their jobs shall live on.
|
|
90
|
+
cleanup = nil
|
|
91
|
+
@plock.synchronize do
|
|
92
|
+
cleanup = @workers.dup
|
|
184
93
|
end
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
def hard_shutdown_in(delay)
|
|
188
|
-
logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }
|
|
189
94
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
# We've reached the timeout and we still have busy workers.
|
|
193
|
-
# They must die but their messages shall live on.
|
|
194
|
-
logger.warn { "Terminating #{@busy.size} busy worker threads" }
|
|
195
|
-
logger.warn { "Work still in progress #{@in_progress.values.inspect}" }
|
|
95
|
+
if cleanup.size > 0
|
|
96
|
+
jobs = cleanup.map { |p| p.job }.compact
|
|
196
97
|
|
|
197
|
-
|
|
98
|
+
logger.warn { "Terminating #{cleanup.size} busy threads" }
|
|
99
|
+
logger.debug { "Jobs still in progress #{jobs.inspect}" }
|
|
198
100
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
end
|
|
101
|
+
# Re-enqueue unfinished jobs
|
|
102
|
+
# NOTE: You may notice that we may push a job back to redis before
|
|
103
|
+
# the thread is terminated. This is ok because Sidekiq's
|
|
104
|
+
# contract says that jobs are run AT LEAST once. Process termination
|
|
105
|
+
# is delayed until we're certain the jobs are back in Redis because
|
|
106
|
+
# it is worse to lose a job than to run it twice.
|
|
107
|
+
capsule.fetcher.bulk_requeue(jobs)
|
|
207
108
|
end
|
|
208
|
-
end
|
|
209
109
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
# processors somehow.
|
|
214
|
-
raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
|
|
215
|
-
raise "No ready processor!?" if @ready.empty?
|
|
110
|
+
cleanup.each do |processor|
|
|
111
|
+
processor.kill
|
|
112
|
+
end
|
|
216
113
|
|
|
217
|
-
|
|
114
|
+
# when this method returns, we immediately call `exit` which may not give
|
|
115
|
+
# the remaining threads time to run `ensure` blocks, etc. We pause here up
|
|
116
|
+
# to 3 seconds to give threads a minimal amount of time to run `ensure` blocks.
|
|
117
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + 3
|
|
118
|
+
wait_for(deadline) { @workers.empty? }
|
|
218
119
|
end
|
|
219
120
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
@finished.signal
|
|
223
|
-
end
|
|
121
|
+
# hack for quicker development / testing environment #2774
|
|
122
|
+
PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
|
|
224
123
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
@in_progress.clear
|
|
124
|
+
# Wait for the orblock to be true or the deadline passed.
|
|
125
|
+
def wait_for(deadline, &condblock)
|
|
126
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
127
|
+
while remaining > PAUSE_TIME
|
|
128
|
+
return if condblock.call
|
|
129
|
+
sleep PAUSE_TIME
|
|
130
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
131
|
+
end
|
|
234
132
|
end
|
|
235
133
|
end
|
|
236
134
|
end
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
require "sidekiq"
|
|
2
|
+
require "date"
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
require "sidekiq/metrics/shared"
|
|
6
|
+
|
|
7
|
+
module Sidekiq
|
|
8
|
+
module Metrics
|
|
9
|
+
# Allows caller to query for Sidekiq execution metrics within Redis.
|
|
10
|
+
# Caller sets a set of attributes to act as filters. {#fetch} will call
|
|
11
|
+
# Redis and return a Hash of results.
|
|
12
|
+
#
|
|
13
|
+
# NB: all metrics and times/dates are UTC only. We specifically do not
|
|
14
|
+
# support timezones.
|
|
15
|
+
class Query
|
|
16
|
+
def initialize(pool: nil, now: Time.now)
|
|
17
|
+
@time = now.utc
|
|
18
|
+
@pool = pool || Sidekiq.default_configuration.redis_pool
|
|
19
|
+
@klass = nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Get metric data for all jobs from the last hour
|
|
23
|
+
# +class_filter+: return only results for classes matching filter
|
|
24
|
+
def top_jobs(class_filter: nil, minutes: 60)
|
|
25
|
+
result = Result.new
|
|
26
|
+
|
|
27
|
+
time = @time
|
|
28
|
+
redis_results = @pool.with do |conn|
|
|
29
|
+
conn.pipelined do |pipe|
|
|
30
|
+
minutes.times do |idx|
|
|
31
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
|
32
|
+
pipe.hgetall key
|
|
33
|
+
result.prepend_bucket time
|
|
34
|
+
time -= 60
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
time = @time
|
|
40
|
+
redis_results.each do |hash|
|
|
41
|
+
hash.each do |k, v|
|
|
42
|
+
kls, metric = k.split("|")
|
|
43
|
+
next if class_filter && !class_filter.match?(kls)
|
|
44
|
+
result.job_results[kls].add_metric metric, time, v.to_i
|
|
45
|
+
end
|
|
46
|
+
time -= 60
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
|
50
|
+
|
|
51
|
+
result
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def for_job(klass, minutes: 60)
|
|
55
|
+
result = Result.new
|
|
56
|
+
|
|
57
|
+
time = @time
|
|
58
|
+
redis_results = @pool.with do |conn|
|
|
59
|
+
conn.pipelined do |pipe|
|
|
60
|
+
minutes.times do |idx|
|
|
61
|
+
key = "j|#{time.strftime("%Y%m%d")}|#{time.hour}:#{time.min}"
|
|
62
|
+
pipe.hmget key, "#{klass}|ms", "#{klass}|p", "#{klass}|f"
|
|
63
|
+
result.prepend_bucket time
|
|
64
|
+
time -= 60
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
time = @time
|
|
70
|
+
@pool.with do |conn|
|
|
71
|
+
redis_results.each do |(ms, p, f)|
|
|
72
|
+
result.job_results[klass].add_metric "ms", time, ms.to_i if ms
|
|
73
|
+
result.job_results[klass].add_metric "p", time, p.to_i if p
|
|
74
|
+
result.job_results[klass].add_metric "f", time, f.to_i if f
|
|
75
|
+
result.job_results[klass].add_hist time, Histogram.new(klass).fetch(conn, time).reverse
|
|
76
|
+
time -= 60
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
result.marks = fetch_marks(result.starts_at..result.ends_at)
|
|
81
|
+
|
|
82
|
+
result
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
class Result < Struct.new(:starts_at, :ends_at, :size, :buckets, :job_results, :marks)
|
|
86
|
+
def initialize
|
|
87
|
+
super
|
|
88
|
+
self.buckets = []
|
|
89
|
+
self.marks = []
|
|
90
|
+
self.job_results = Hash.new { |h, k| h[k] = JobResult.new }
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def prepend_bucket(time)
|
|
94
|
+
buckets.unshift time.strftime("%H:%M")
|
|
95
|
+
self.ends_at ||= time
|
|
96
|
+
self.starts_at = time
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
class JobResult < Struct.new(:series, :hist, :totals)
|
|
101
|
+
def initialize
|
|
102
|
+
super
|
|
103
|
+
self.series = Hash.new { |h, k| h[k] = Hash.new(0) }
|
|
104
|
+
self.hist = Hash.new { |h, k| h[k] = [] }
|
|
105
|
+
self.totals = Hash.new(0)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def add_metric(metric, time, value)
|
|
109
|
+
totals[metric] += value
|
|
110
|
+
series[metric][time.strftime("%H:%M")] += value
|
|
111
|
+
|
|
112
|
+
# Include timing measurements in seconds for convenience
|
|
113
|
+
add_metric("s", time, value / 1000.0) if metric == "ms"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def add_hist(time, hist_result)
|
|
117
|
+
hist[time.strftime("%H:%M")] = hist_result
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def total_avg(metric = "ms")
|
|
121
|
+
completed = totals["p"] - totals["f"]
|
|
122
|
+
totals[metric].to_f / completed
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def series_avg(metric = "ms")
|
|
126
|
+
series[metric].each_with_object(Hash.new(0)) do |(bucket, value), result|
|
|
127
|
+
completed = series.dig("p", bucket) - series.dig("f", bucket)
|
|
128
|
+
result[bucket] = (completed == 0) ? 0 : value.to_f / completed
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
class MarkResult < Struct.new(:time, :label)
|
|
134
|
+
def bucket
|
|
135
|
+
time.strftime("%H:%M")
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
private
|
|
140
|
+
|
|
141
|
+
def fetch_marks(time_range)
|
|
142
|
+
[].tap do |result|
|
|
143
|
+
marks = @pool.with { |c| c.hgetall("#{@time.strftime("%Y%m%d")}-marks") }
|
|
144
|
+
|
|
145
|
+
marks.each do |timestamp, label|
|
|
146
|
+
time = Time.parse(timestamp)
|
|
147
|
+
if time_range.cover? time
|
|
148
|
+
result << MarkResult.new(time, label)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
require "concurrent"
|
|
2
|
+
|
|
3
|
+
module Sidekiq
|
|
4
|
+
module Metrics
|
|
5
|
+
# This is the only dependency on concurrent-ruby in Sidekiq but it's
|
|
6
|
+
# mandatory for thread-safety until MRI supports atomic operations on values.
|
|
7
|
+
Counter = ::Concurrent::AtomicFixnum
|
|
8
|
+
|
|
9
|
+
# Implements space-efficient but statistically useful histogram storage.
|
|
10
|
+
# A precise time histogram stores every time. Instead we break times into a set of
|
|
11
|
+
# known buckets and increment counts of the associated time bucket. Even if we call
|
|
12
|
+
# the histogram a million times, we'll still only store 26 buckets.
|
|
13
|
+
# NB: needs to be thread-safe or resiliant to races.
|
|
14
|
+
#
|
|
15
|
+
# To store this data, we use Redis' BITFIELD command to store unsigned 16-bit counters
|
|
16
|
+
# per bucket per klass per minute. It's unlikely that most people will be executing more
|
|
17
|
+
# than 1000 job/sec for a full minute of a specific type.
|
|
18
|
+
class Histogram
|
|
19
|
+
include Enumerable
|
|
20
|
+
|
|
21
|
+
# This number represents the maximum milliseconds for this bucket.
|
|
22
|
+
# 20 means all job executions up to 20ms, e.g. if a job takes
|
|
23
|
+
# 280ms, it'll increment bucket[7]. Note we can track job executions
|
|
24
|
+
# up to about 5.5 minutes. After that, it's assumed you're probably
|
|
25
|
+
# not too concerned with its performance.
|
|
26
|
+
BUCKET_INTERVALS = [
|
|
27
|
+
20, 30, 45, 65, 100,
|
|
28
|
+
150, 225, 335, 500, 750,
|
|
29
|
+
1100, 1700, 2500, 3800, 5750,
|
|
30
|
+
8500, 13000, 20000, 30000, 45000,
|
|
31
|
+
65000, 100000, 150000, 225000, 335000,
|
|
32
|
+
1e20 # the "maybe your job is too long" bucket
|
|
33
|
+
].freeze
|
|
34
|
+
LABELS = [
|
|
35
|
+
"20ms", "30ms", "45ms", "65ms", "100ms",
|
|
36
|
+
"150ms", "225ms", "335ms", "500ms", "750ms",
|
|
37
|
+
"1.1s", "1.7s", "2.5s", "3.8s", "5.75s",
|
|
38
|
+
"8.5s", "13s", "20s", "30s", "45s",
|
|
39
|
+
"65s", "100s", "150s", "225s", "335s",
|
|
40
|
+
"Slow"
|
|
41
|
+
].freeze
|
|
42
|
+
FETCH = "GET u16 #0 GET u16 #1 GET u16 #2 GET u16 #3 \
|
|
43
|
+
GET u16 #4 GET u16 #5 GET u16 #6 GET u16 #7 \
|
|
44
|
+
GET u16 #8 GET u16 #9 GET u16 #10 GET u16 #11 \
|
|
45
|
+
GET u16 #12 GET u16 #13 GET u16 #14 GET u16 #15 \
|
|
46
|
+
GET u16 #16 GET u16 #17 GET u16 #18 GET u16 #19 \
|
|
47
|
+
GET u16 #20 GET u16 #21 GET u16 #22 GET u16 #23 \
|
|
48
|
+
GET u16 #24 GET u16 #25".split
|
|
49
|
+
HISTOGRAM_TTL = 8 * 60 * 60
|
|
50
|
+
|
|
51
|
+
def each
|
|
52
|
+
buckets.each { |counter| yield counter.value }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def label(idx)
|
|
56
|
+
LABELS[idx]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
attr_reader :buckets
|
|
60
|
+
def initialize(klass)
|
|
61
|
+
@klass = klass
|
|
62
|
+
@buckets = Array.new(BUCKET_INTERVALS.size) { Counter.new }
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def record_time(ms)
|
|
66
|
+
index_to_use = BUCKET_INTERVALS.each_index do |idx|
|
|
67
|
+
break idx if ms < BUCKET_INTERVALS[idx]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
@buckets[index_to_use].increment
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def fetch(conn, now = Time.now)
|
|
74
|
+
window = now.utc.strftime("%d-%H:%-M")
|
|
75
|
+
key = "#{@klass}-#{window}"
|
|
76
|
+
conn.bitfield_ro(key, *FETCH)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def persist(conn, now = Time.now)
|
|
80
|
+
buckets, @buckets = @buckets, []
|
|
81
|
+
window = now.utc.strftime("%d-%H:%-M")
|
|
82
|
+
key = "#{@klass}-#{window}"
|
|
83
|
+
cmd = [key, "OVERFLOW", "SAT"]
|
|
84
|
+
buckets.each_with_index do |counter, idx|
|
|
85
|
+
val = counter.value
|
|
86
|
+
cmd << "INCRBY" << "u16" << "##{idx}" << val.to_s if val > 0
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
conn.bitfield(*cmd) if cmd.size > 3
|
|
90
|
+
conn.expire(key, HISTOGRAM_TTL)
|
|
91
|
+
key
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|