sidekiq 6.3.1 → 7.0.7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Changes.md +205 -11
- data/LICENSE.txt +9 -0
- data/README.md +45 -32
- data/bin/sidekiq +4 -9
- data/bin/sidekiqload +189 -117
- data/bin/sidekiqmon +4 -1
- data/lib/generators/sidekiq/job_generator.rb +57 -0
- data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +2 -2
- data/lib/generators/sidekiq/templates/{worker_spec.rb.erb → job_spec.rb.erb} +1 -1
- data/lib/generators/sidekiq/templates/{worker_test.rb.erb → job_test.rb.erb} +1 -1
- data/lib/sidekiq/api.rb +308 -188
- data/lib/sidekiq/capsule.rb +127 -0
- data/lib/sidekiq/cli.rb +85 -80
- data/lib/sidekiq/client.rb +74 -81
- data/lib/sidekiq/{util.rb → component.rb} +13 -40
- data/lib/sidekiq/config.rb +270 -0
- data/lib/sidekiq/deploy.rb +62 -0
- data/lib/sidekiq/embedded.rb +61 -0
- data/lib/sidekiq/fetch.rb +23 -24
- data/lib/sidekiq/job.rb +375 -10
- data/lib/sidekiq/job_logger.rb +16 -28
- data/lib/sidekiq/job_retry.rb +81 -57
- data/lib/sidekiq/job_util.rb +105 -0
- data/lib/sidekiq/launcher.rb +103 -95
- data/lib/sidekiq/logger.rb +9 -44
- data/lib/sidekiq/manager.rb +40 -41
- data/lib/sidekiq/metrics/query.rb +153 -0
- data/lib/sidekiq/metrics/shared.rb +95 -0
- data/lib/sidekiq/metrics/tracking.rb +136 -0
- data/lib/sidekiq/middleware/chain.rb +96 -51
- data/lib/sidekiq/middleware/current_attributes.rb +17 -13
- data/lib/sidekiq/middleware/i18n.rb +6 -4
- data/lib/sidekiq/middleware/modules.rb +21 -0
- data/lib/sidekiq/monitor.rb +17 -4
- data/lib/sidekiq/paginator.rb +17 -9
- data/lib/sidekiq/processor.rb +60 -60
- data/lib/sidekiq/rails.rb +12 -10
- data/lib/sidekiq/redis_client_adapter.rb +115 -0
- data/lib/sidekiq/redis_connection.rb +13 -82
- data/lib/sidekiq/ring_buffer.rb +29 -0
- data/lib/sidekiq/scheduled.rb +75 -37
- data/lib/sidekiq/testing/inline.rb +4 -4
- data/lib/sidekiq/testing.rb +41 -68
- data/lib/sidekiq/transaction_aware_client.rb +44 -0
- data/lib/sidekiq/version.rb +2 -1
- data/lib/sidekiq/web/action.rb +3 -3
- data/lib/sidekiq/web/application.rb +45 -11
- data/lib/sidekiq/web/csrf_protection.rb +3 -3
- data/lib/sidekiq/web/helpers.rb +35 -21
- data/lib/sidekiq/web.rb +10 -17
- data/lib/sidekiq/worker_compatibility_alias.rb +13 -0
- data/lib/sidekiq.rb +85 -202
- data/sidekiq.gemspec +20 -10
- data/web/assets/javascripts/application.js +76 -26
- data/web/assets/javascripts/base-charts.js +106 -0
- data/web/assets/javascripts/chart.min.js +13 -0
- data/web/assets/javascripts/chartjs-plugin-annotation.min.js +7 -0
- data/web/assets/javascripts/dashboard-charts.js +166 -0
- data/web/assets/javascripts/dashboard.js +3 -240
- data/web/assets/javascripts/metrics.js +264 -0
- data/web/assets/stylesheets/application-dark.css +17 -17
- data/web/assets/stylesheets/application-rtl.css +2 -91
- data/web/assets/stylesheets/application.css +69 -302
- data/web/locales/ar.yml +70 -70
- data/web/locales/cs.yml +62 -62
- data/web/locales/da.yml +60 -53
- data/web/locales/de.yml +65 -65
- data/web/locales/el.yml +43 -24
- data/web/locales/en.yml +82 -69
- data/web/locales/es.yml +68 -68
- data/web/locales/fa.yml +65 -65
- data/web/locales/fr.yml +67 -67
- data/web/locales/he.yml +65 -64
- data/web/locales/hi.yml +59 -59
- data/web/locales/it.yml +53 -53
- data/web/locales/ja.yml +73 -68
- data/web/locales/ko.yml +52 -52
- data/web/locales/lt.yml +66 -66
- data/web/locales/nb.yml +61 -61
- data/web/locales/nl.yml +52 -52
- data/web/locales/pl.yml +45 -45
- data/web/locales/pt-br.yml +63 -55
- data/web/locales/pt.yml +51 -51
- data/web/locales/ru.yml +67 -66
- data/web/locales/sv.yml +53 -53
- data/web/locales/ta.yml +60 -60
- data/web/locales/uk.yml +62 -61
- data/web/locales/ur.yml +64 -64
- data/web/locales/vi.yml +67 -67
- data/web/locales/zh-cn.yml +43 -16
- data/web/locales/zh-tw.yml +42 -8
- data/web/views/_footer.erb +5 -2
- data/web/views/_job_info.erb +18 -2
- data/web/views/_metrics_period_select.erb +12 -0
- data/web/views/_nav.erb +1 -1
- data/web/views/_paging.erb +2 -0
- data/web/views/_poll_link.erb +1 -1
- data/web/views/_summary.erb +1 -1
- data/web/views/busy.erb +42 -26
- data/web/views/dashboard.erb +36 -4
- data/web/views/metrics.erb +82 -0
- data/web/views/metrics_for_job.erb +71 -0
- data/web/views/morgue.erb +5 -9
- data/web/views/queue.erb +15 -15
- data/web/views/queues.erb +3 -1
- data/web/views/retries.erb +5 -9
- data/web/views/scheduled.erb +12 -13
- metadata +68 -32
- data/LICENSE +0 -9
- data/lib/generators/sidekiq/worker_generator.rb +0 -57
- data/lib/sidekiq/delay.rb +0 -41
- data/lib/sidekiq/exception_handler.rb +0 -27
- data/lib/sidekiq/extensions/action_mailer.rb +0 -48
- data/lib/sidekiq/extensions/active_record.rb +0 -43
- data/lib/sidekiq/extensions/class_methods.rb +0 -43
- data/lib/sidekiq/extensions/generic_proxy.rb +0 -33
- data/lib/sidekiq/worker.rb +0 -311
data/lib/sidekiq/job_retry.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sidekiq/scheduled"
|
4
|
-
require "sidekiq/api"
|
5
|
-
|
6
3
|
require "zlib"
|
7
4
|
require "base64"
|
5
|
+
require "sidekiq/component"
|
8
6
|
|
9
7
|
module Sidekiq
|
10
8
|
##
|
@@ -25,18 +23,19 @@ module Sidekiq
|
|
25
23
|
#
|
26
24
|
# A job looks like:
|
27
25
|
#
|
28
|
-
# { 'class' => '
|
26
|
+
# { 'class' => 'HardJob', 'args' => [1, 2, 'foo'], 'retry' => true }
|
29
27
|
#
|
30
28
|
# The 'retry' option also accepts a number (in place of 'true'):
|
31
29
|
#
|
32
|
-
# { 'class' => '
|
30
|
+
# { 'class' => 'HardJob', 'args' => [1, 2, 'foo'], 'retry' => 5 }
|
33
31
|
#
|
34
32
|
# The job will be retried this number of times before giving up. (If simply
|
35
33
|
# 'true', Sidekiq retries 25 times)
|
36
34
|
#
|
37
|
-
#
|
35
|
+
# Relevant options for job retries:
|
38
36
|
#
|
39
|
-
# * 'queue' - the queue
|
37
|
+
# * 'queue' - the queue for the initial job
|
38
|
+
# * 'retry_queue' - if job retries should be pushed to a different (e.g. lower priority) queue
|
40
39
|
# * 'retry_count' - number of times we've retried so far.
|
41
40
|
# * 'error_message' - the message from the exception
|
42
41
|
# * 'error_class' - the exception class
|
@@ -50,13 +49,14 @@ module Sidekiq
|
|
50
49
|
# The default number of retries is 25 which works out to about 3 weeks
|
51
50
|
# You can change the default maximum number of retries in your initializer:
|
52
51
|
#
|
53
|
-
# Sidekiq.
|
52
|
+
# Sidekiq.default_configuration[:max_retries] = 7
|
54
53
|
#
|
55
|
-
# or limit the number of retries for a particular
|
54
|
+
# or limit the number of retries for a particular job and send retries to
|
55
|
+
# a low priority queue with:
|
56
56
|
#
|
57
|
-
# class
|
58
|
-
# include Sidekiq::
|
59
|
-
# sidekiq_options :
|
57
|
+
# class MyJob
|
58
|
+
# include Sidekiq::Job
|
59
|
+
# sidekiq_options retry: 10, retry_queue: 'low'
|
60
60
|
# end
|
61
61
|
#
|
62
62
|
class JobRetry
|
@@ -64,17 +64,18 @@ module Sidekiq
|
|
64
64
|
|
65
65
|
class Skip < Handled; end
|
66
66
|
|
67
|
-
include Sidekiq::
|
67
|
+
include Sidekiq::Component
|
68
68
|
|
69
69
|
DEFAULT_MAX_RETRY_ATTEMPTS = 25
|
70
70
|
|
71
|
-
def initialize(
|
72
|
-
@
|
71
|
+
def initialize(capsule)
|
72
|
+
@config = @capsule = capsule
|
73
|
+
@max_retries = Sidekiq.default_configuration[:max_retries] || DEFAULT_MAX_RETRY_ATTEMPTS
|
73
74
|
end
|
74
75
|
|
75
76
|
# The global retry handler requires only the barest of data.
|
76
77
|
# We want to be able to retry as much as possible so we don't
|
77
|
-
# require the
|
78
|
+
# require the job to be instantiated.
|
78
79
|
def global(jobstr, queue)
|
79
80
|
yield
|
80
81
|
rescue Handled => ex
|
@@ -88,9 +89,9 @@ module Sidekiq
|
|
88
89
|
|
89
90
|
msg = Sidekiq.load_json(jobstr)
|
90
91
|
if msg["retry"]
|
91
|
-
|
92
|
+
process_retry(nil, msg, queue, e)
|
92
93
|
else
|
93
|
-
|
94
|
+
@capsule.config.death_handlers.each do |handler|
|
94
95
|
handler.call(msg, e)
|
95
96
|
rescue => handler_ex
|
96
97
|
handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
|
@@ -101,14 +102,14 @@ module Sidekiq
|
|
101
102
|
end
|
102
103
|
|
103
104
|
# The local retry support means that any errors that occur within
|
104
|
-
# this block can be associated with the given
|
105
|
+
# this block can be associated with the given job instance.
|
105
106
|
# This is required to support the `sidekiq_retries_exhausted` block.
|
106
107
|
#
|
107
108
|
# Note that any exception from the block is wrapped in the Skip
|
108
109
|
# exception so the global block does not reprocess the error. The
|
109
110
|
# Skip exception is unwrapped within Sidekiq::Processor#process before
|
110
111
|
# calling the handle_exception handlers.
|
111
|
-
def local(
|
112
|
+
def local(jobinst, jobstr, queue)
|
112
113
|
yield
|
113
114
|
rescue Handled => ex
|
114
115
|
raise ex
|
@@ -121,11 +122,11 @@ module Sidekiq
|
|
121
122
|
|
122
123
|
msg = Sidekiq.load_json(jobstr)
|
123
124
|
if msg["retry"].nil?
|
124
|
-
msg["retry"] =
|
125
|
+
msg["retry"] = jobinst.class.get_sidekiq_options["retry"]
|
125
126
|
end
|
126
127
|
|
127
128
|
raise e unless msg["retry"]
|
128
|
-
|
129
|
+
process_retry(jobinst, msg, queue, e)
|
129
130
|
# We've handled this error associated with this job, don't
|
130
131
|
# need to handle it at the global level
|
131
132
|
raise Skip
|
@@ -133,10 +134,10 @@ module Sidekiq
|
|
133
134
|
|
134
135
|
private
|
135
136
|
|
136
|
-
# Note that +
|
137
|
-
# instantiate the
|
137
|
+
# Note that +jobinst+ can be nil here if an error is raised before we can
|
138
|
+
# instantiate the job instance. All access must be guarded and
|
138
139
|
# best effort.
|
139
|
-
def
|
140
|
+
def process_retry(jobinst, msg, queue, exception)
|
140
141
|
max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
|
141
142
|
|
142
143
|
msg["queue"] = (msg["retry_queue"] || queue)
|
@@ -167,24 +168,55 @@ module Sidekiq
|
|
167
168
|
msg["error_backtrace"] = compress_backtrace(lines)
|
168
169
|
end
|
169
170
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
171
|
+
# Goodbye dear message, you (re)tried your best I'm sure.
|
172
|
+
return retries_exhausted(jobinst, msg, exception) if count >= max_retry_attempts
|
173
|
+
|
174
|
+
strategy, delay = delay_for(jobinst, count, exception)
|
175
|
+
case strategy
|
176
|
+
when :discard
|
177
|
+
return # poof!
|
178
|
+
when :kill
|
179
|
+
return retries_exhausted(jobinst, msg, exception)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Logging here can break retries if the logging device raises ENOSPC #3979
|
183
|
+
# logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
184
|
+
jitter = rand(10) * (count + 1)
|
185
|
+
retry_at = Time.now.to_f + delay + jitter
|
186
|
+
payload = Sidekiq.dump_json(msg)
|
187
|
+
redis do |conn|
|
188
|
+
conn.zadd("retry", retry_at.to_s, payload)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# returns (strategy, seconds)
|
193
|
+
def delay_for(jobinst, count, exception)
|
194
|
+
rv = begin
|
195
|
+
# sidekiq_retry_in can return two different things:
|
196
|
+
# 1. When to retry next, as an integer of seconds
|
197
|
+
# 2. A symbol which re-routes the job elsewhere, e.g. :discard, :kill, :default
|
198
|
+
jobinst&.sidekiq_retry_in_block&.call(count, exception)
|
199
|
+
rescue Exception => e
|
200
|
+
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"})
|
201
|
+
nil
|
202
|
+
end
|
203
|
+
|
204
|
+
rv = rv.to_i if rv.respond_to?(:to_i)
|
205
|
+
delay = (count**4) + 15
|
206
|
+
if Integer === rv && rv > 0
|
207
|
+
delay = rv
|
208
|
+
elsif rv == :discard
|
209
|
+
return [:discard, nil] # do nothing, job goes poof
|
210
|
+
elsif rv == :kill
|
211
|
+
return [:kill, nil]
|
182
212
|
end
|
213
|
+
|
214
|
+
[:default, delay]
|
183
215
|
end
|
184
216
|
|
185
|
-
def retries_exhausted(
|
217
|
+
def retries_exhausted(jobinst, msg, exception)
|
186
218
|
begin
|
187
|
-
block =
|
219
|
+
block = jobinst&.sidekiq_retries_exhausted_block
|
188
220
|
block&.call(msg, exception)
|
189
221
|
rescue => e
|
190
222
|
handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
|
@@ -192,7 +224,7 @@ module Sidekiq
|
|
192
224
|
|
193
225
|
send_to_morgue(msg) unless msg["dead"] == false
|
194
226
|
|
195
|
-
|
227
|
+
@capsule.config.death_handlers.each do |handler|
|
196
228
|
handler.call(msg, exception)
|
197
229
|
rescue => e
|
198
230
|
handle_exception(e, {context: "Error calling death handler", job: msg})
|
@@ -202,7 +234,15 @@ module Sidekiq
|
|
202
234
|
def send_to_morgue(msg)
|
203
235
|
logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
|
204
236
|
payload = Sidekiq.dump_json(msg)
|
205
|
-
|
237
|
+
now = Time.now.to_f
|
238
|
+
|
239
|
+
redis do |conn|
|
240
|
+
conn.multi do |xa|
|
241
|
+
xa.zadd("dead", now.to_s, payload)
|
242
|
+
xa.zremrangebyscore("dead", "-inf", now - @capsule.config[:dead_timeout_in_seconds])
|
243
|
+
xa.zremrangebyrank("dead", 0, - @capsule.config[:dead_max_jobs])
|
244
|
+
end
|
245
|
+
end
|
206
246
|
end
|
207
247
|
|
208
248
|
def retry_attempts_from(msg_retry, default)
|
@@ -213,22 +253,6 @@ module Sidekiq
|
|
213
253
|
end
|
214
254
|
end
|
215
255
|
|
216
|
-
def delay_for(worker, count, exception)
|
217
|
-
jitter = rand(10) * (count + 1)
|
218
|
-
if worker&.sidekiq_retry_in_block
|
219
|
-
custom_retry_in = retry_in(worker, count, exception).to_i
|
220
|
-
return custom_retry_in + jitter if custom_retry_in > 0
|
221
|
-
end
|
222
|
-
(count**4) + 15 + jitter
|
223
|
-
end
|
224
|
-
|
225
|
-
def retry_in(worker, count, exception)
|
226
|
-
worker.sidekiq_retry_in_block.call(count, exception)
|
227
|
-
rescue Exception => e
|
228
|
-
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
|
229
|
-
nil
|
230
|
-
end
|
231
|
-
|
232
256
|
def exception_caused_by_shutdown?(e, checked_causes = [])
|
233
257
|
return false unless e.cause
|
234
258
|
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require "securerandom"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
module Sidekiq
|
5
|
+
module JobUtil
|
6
|
+
# These functions encapsulate various job utilities.
|
7
|
+
|
8
|
+
TRANSIENT_ATTRIBUTES = %w[]
|
9
|
+
|
10
|
+
def validate(item)
|
11
|
+
raise(ArgumentError, "Job must be a Hash with 'class' and 'args' keys: `#{item}`") unless item.is_a?(Hash) && item.key?("class") && item.key?("args")
|
12
|
+
raise(ArgumentError, "Job args must be an Array: `#{item}`") unless item["args"].is_a?(Array)
|
13
|
+
raise(ArgumentError, "Job class must be either a Class or String representation of the class name: `#{item}`") unless item["class"].is_a?(Class) || item["class"].is_a?(String)
|
14
|
+
raise(ArgumentError, "Job 'at' must be a Numeric timestamp: `#{item}`") if item.key?("at") && !item["at"].is_a?(Numeric)
|
15
|
+
raise(ArgumentError, "Job tags must be an Array: `#{item}`") if item["tags"] && !item["tags"].is_a?(Array)
|
16
|
+
end
|
17
|
+
|
18
|
+
def verify_json(item)
|
19
|
+
job_class = item["wrapped"] || item["class"]
|
20
|
+
args = item["args"]
|
21
|
+
mode = Sidekiq::Config::DEFAULTS[:on_complex_arguments]
|
22
|
+
|
23
|
+
if mode == :raise || mode == :warn
|
24
|
+
if (unsafe_item = json_unsafe?(args))
|
25
|
+
msg = <<~EOM
|
26
|
+
Job arguments to #{job_class} must be native JSON types, but #{unsafe_item.inspect} is a #{unsafe_item.class}.
|
27
|
+
See https://github.com/sidekiq/sidekiq/wiki/Best-Practices.
|
28
|
+
To disable this error, add `Sidekiq.strict_args!(false)` to your initializer.
|
29
|
+
EOM
|
30
|
+
|
31
|
+
if mode == :raise
|
32
|
+
raise(ArgumentError, msg)
|
33
|
+
else
|
34
|
+
warn(msg)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def normalize_item(item)
|
41
|
+
validate(item)
|
42
|
+
|
43
|
+
# merge in the default sidekiq_options for the item's class and/or wrapped element
|
44
|
+
# this allows ActiveJobs to control sidekiq_options too.
|
45
|
+
defaults = normalized_hash(item["class"])
|
46
|
+
defaults = defaults.merge(item["wrapped"].get_sidekiq_options) if item["wrapped"].respond_to?(:get_sidekiq_options)
|
47
|
+
item = defaults.merge(item)
|
48
|
+
|
49
|
+
raise(ArgumentError, "Job must include a valid queue name") if item["queue"].nil? || item["queue"] == ""
|
50
|
+
|
51
|
+
# remove job attributes which aren't necessary to persist into Redis
|
52
|
+
TRANSIENT_ATTRIBUTES.each { |key| item.delete(key) }
|
53
|
+
|
54
|
+
item["jid"] ||= SecureRandom.hex(12)
|
55
|
+
item["class"] = item["class"].to_s
|
56
|
+
item["queue"] = item["queue"].to_s
|
57
|
+
item["created_at"] ||= Time.now.to_f
|
58
|
+
item
|
59
|
+
end
|
60
|
+
|
61
|
+
def normalized_hash(item_class)
|
62
|
+
if item_class.is_a?(Class)
|
63
|
+
raise(ArgumentError, "Message must include a Sidekiq::Job class, not class name: #{item_class.ancestors.inspect}") unless item_class.respond_to?(:get_sidekiq_options)
|
64
|
+
item_class.get_sidekiq_options
|
65
|
+
else
|
66
|
+
Sidekiq.default_job_options
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
RECURSIVE_JSON_UNSAFE = {
|
73
|
+
Integer => ->(val) {},
|
74
|
+
Float => ->(val) {},
|
75
|
+
TrueClass => ->(val) {},
|
76
|
+
FalseClass => ->(val) {},
|
77
|
+
NilClass => ->(val) {},
|
78
|
+
String => ->(val) {},
|
79
|
+
Array => ->(val) {
|
80
|
+
val.each do |e|
|
81
|
+
unsafe_item = RECURSIVE_JSON_UNSAFE[e.class].call(e)
|
82
|
+
return unsafe_item unless unsafe_item.nil?
|
83
|
+
end
|
84
|
+
nil
|
85
|
+
},
|
86
|
+
Hash => ->(val) {
|
87
|
+
val.each do |k, v|
|
88
|
+
return k unless String === k
|
89
|
+
|
90
|
+
unsafe_item = RECURSIVE_JSON_UNSAFE[v.class].call(v)
|
91
|
+
return unsafe_item unless unsafe_item.nil?
|
92
|
+
end
|
93
|
+
nil
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
RECURSIVE_JSON_UNSAFE.default = ->(val) { val }
|
98
|
+
RECURSIVE_JSON_UNSAFE.compare_by_identity
|
99
|
+
private_constant :RECURSIVE_JSON_UNSAFE
|
100
|
+
|
101
|
+
def json_unsafe?(item)
|
102
|
+
RECURSIVE_JSON_UNSAFE[item.class].call(item)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
data/lib/sidekiq/launcher.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "sidekiq/manager"
|
4
|
-
require "sidekiq/
|
4
|
+
require "sidekiq/capsule"
|
5
5
|
require "sidekiq/scheduled"
|
6
|
+
require "sidekiq/ring_buffer"
|
6
7
|
|
7
8
|
module Sidekiq
|
8
|
-
# The Launcher starts the
|
9
|
+
# The Launcher starts the Capsule Managers, the Poller thread and provides the process heartbeat.
|
9
10
|
class Launcher
|
10
|
-
include
|
11
|
+
include Sidekiq::Component
|
11
12
|
|
12
13
|
STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
|
13
14
|
|
@@ -15,50 +16,56 @@ module Sidekiq
|
|
15
16
|
proc { "sidekiq" },
|
16
17
|
proc { Sidekiq::VERSION },
|
17
18
|
proc { |me, data| data["tag"] },
|
18
|
-
proc { |me, data| "[#{Processor::
|
19
|
+
proc { |me, data| "[#{Processor::WORK_STATE.size} of #{me.config.total_concurrency} busy]" },
|
19
20
|
proc { |me, data| "stopping" if me.stopping? }
|
20
21
|
]
|
21
22
|
|
22
|
-
attr_accessor :
|
23
|
+
attr_accessor :managers, :poller
|
23
24
|
|
24
|
-
def initialize(
|
25
|
-
|
26
|
-
@
|
27
|
-
@
|
25
|
+
def initialize(config, embedded: false)
|
26
|
+
@config = config
|
27
|
+
@embedded = embedded
|
28
|
+
@managers = config.capsules.values.map do |cap|
|
29
|
+
Sidekiq::Manager.new(cap)
|
30
|
+
end
|
31
|
+
@poller = Sidekiq::Scheduled::Poller.new(@config)
|
28
32
|
@done = false
|
29
|
-
@options = options
|
30
33
|
end
|
31
34
|
|
32
|
-
|
33
|
-
|
35
|
+
# Start this Sidekiq instance. If an embedding process already
|
36
|
+
# has a heartbeat thread, caller can use `async_beat: false`
|
37
|
+
# and instead have thread call Launcher#heartbeat every N seconds.
|
38
|
+
def run(async_beat: true)
|
39
|
+
Sidekiq.freeze!
|
40
|
+
logger.debug { @config.merge!({}) }
|
41
|
+
@thread = safe_thread("heartbeat", &method(:start_heartbeat)) if async_beat
|
34
42
|
@poller.start
|
35
|
-
@
|
43
|
+
@managers.each(&:start)
|
36
44
|
end
|
37
45
|
|
38
46
|
# Stops this instance from processing any more jobs,
|
39
|
-
#
|
40
47
|
def quiet
|
48
|
+
return if @done
|
49
|
+
|
41
50
|
@done = true
|
42
|
-
@
|
51
|
+
@managers.each(&:quiet)
|
43
52
|
@poller.terminate
|
53
|
+
fire_event(:quiet, reverse: true)
|
44
54
|
end
|
45
55
|
|
46
|
-
# Shuts down
|
47
|
-
# return until all work is complete and cleaned up.
|
48
|
-
# It can take up to the timeout to complete.
|
56
|
+
# Shuts down this Sidekiq instance. Waits up to the deadline for all jobs to complete.
|
49
57
|
def stop
|
50
|
-
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @
|
58
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @config[:timeout]
|
51
59
|
|
52
|
-
|
53
|
-
@
|
54
|
-
|
55
|
-
|
56
|
-
|
60
|
+
quiet
|
61
|
+
stoppers = @managers.map do |mgr|
|
62
|
+
Thread.new do
|
63
|
+
mgr.stop(deadline)
|
64
|
+
end
|
65
|
+
end
|
57
66
|
|
58
|
-
|
59
|
-
|
60
|
-
strategy = @options[:fetch]
|
61
|
-
strategy.bulk_requeue([], @options)
|
67
|
+
fire_event(:shutdown, reverse: true)
|
68
|
+
stoppers.each(&:join)
|
62
69
|
|
63
70
|
clear_heartbeat
|
64
71
|
end
|
@@ -67,91 +74,86 @@ module Sidekiq
|
|
67
74
|
@done
|
68
75
|
end
|
69
76
|
|
77
|
+
# If embedding Sidekiq, you can have the process heartbeat
|
78
|
+
# call this method to regularly heartbeat rather than creating
|
79
|
+
# a separate thread.
|
80
|
+
def heartbeat
|
81
|
+
❤
|
82
|
+
end
|
83
|
+
|
70
84
|
private unless $TESTING
|
71
85
|
|
72
|
-
BEAT_PAUSE =
|
86
|
+
BEAT_PAUSE = 10
|
73
87
|
|
74
88
|
def start_heartbeat
|
75
89
|
loop do
|
76
|
-
|
90
|
+
beat
|
77
91
|
sleep BEAT_PAUSE
|
78
92
|
end
|
79
|
-
|
93
|
+
logger.info("Heartbeat stopping...")
|
94
|
+
end
|
95
|
+
|
96
|
+
def beat
|
97
|
+
$0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ") unless @embedded
|
98
|
+
❤
|
80
99
|
end
|
81
100
|
|
82
101
|
def clear_heartbeat
|
102
|
+
flush_stats
|
103
|
+
|
83
104
|
# Remove record from Redis since we are shutting down.
|
84
105
|
# Note we don't stop the heartbeat thread; if the process
|
85
106
|
# doesn't actually exit, it'll reappear in the Web UI.
|
86
|
-
|
87
|
-
conn.pipelined do
|
88
|
-
|
89
|
-
|
107
|
+
redis do |conn|
|
108
|
+
conn.pipelined do |pipeline|
|
109
|
+
pipeline.srem("processes", [identity])
|
110
|
+
pipeline.unlink("#{identity}:work")
|
90
111
|
end
|
91
112
|
end
|
92
113
|
rescue
|
93
114
|
# best effort, ignore network errors
|
94
115
|
end
|
95
116
|
|
96
|
-
def
|
97
|
-
$0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
|
98
|
-
|
99
|
-
❤
|
100
|
-
end
|
101
|
-
|
102
|
-
def self.flush_stats
|
117
|
+
def flush_stats
|
103
118
|
fails = Processor::FAILURE.reset
|
104
119
|
procd = Processor::PROCESSED.reset
|
105
120
|
return if fails + procd == 0
|
106
121
|
|
107
122
|
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
108
123
|
begin
|
109
|
-
|
110
|
-
conn.pipelined do
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
124
|
+
redis do |conn|
|
125
|
+
conn.pipelined do |pipeline|
|
126
|
+
pipeline.incrby("stat:processed", procd)
|
127
|
+
pipeline.incrby("stat:processed:#{nowdate}", procd)
|
128
|
+
pipeline.expire("stat:processed:#{nowdate}", STATS_TTL)
|
129
|
+
|
130
|
+
pipeline.incrby("stat:failed", fails)
|
131
|
+
pipeline.incrby("stat:failed:#{nowdate}", fails)
|
132
|
+
pipeline.expire("stat:failed:#{nowdate}", STATS_TTL)
|
118
133
|
end
|
119
134
|
end
|
120
135
|
rescue => ex
|
121
|
-
|
122
|
-
# try to handle the exception
|
123
|
-
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
136
|
+
logger.warn("Unable to flush stats: #{ex}")
|
124
137
|
end
|
125
138
|
end
|
126
|
-
at_exit(&method(:flush_stats))
|
127
139
|
|
128
140
|
def ❤
|
129
141
|
key = identity
|
130
142
|
fails = procd = 0
|
131
143
|
|
132
144
|
begin
|
133
|
-
|
134
|
-
|
135
|
-
curstate = Processor::
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
conn.multi do
|
142
|
-
conn.incrby("stat:processed", procd)
|
143
|
-
conn.incrby("stat:processed:#{nowdate}", procd)
|
144
|
-
conn.expire("stat:processed:#{nowdate}", STATS_TTL)
|
145
|
-
|
146
|
-
conn.incrby("stat:failed", fails)
|
147
|
-
conn.incrby("stat:failed:#{nowdate}", fails)
|
148
|
-
conn.expire("stat:failed:#{nowdate}", STATS_TTL)
|
149
|
-
|
150
|
-
conn.unlink(workers_key)
|
145
|
+
flush_stats
|
146
|
+
|
147
|
+
curstate = Processor::WORK_STATE.dup
|
148
|
+
redis do |conn|
|
149
|
+
# work is the current set of executing jobs
|
150
|
+
work_key = "#{key}:work"
|
151
|
+
conn.pipelined do |transaction|
|
152
|
+
transaction.unlink(work_key)
|
151
153
|
curstate.each_pair do |tid, hash|
|
152
|
-
|
154
|
+
transaction.hset(work_key, tid, Sidekiq.dump_json(hash))
|
153
155
|
end
|
154
|
-
|
156
|
+
transaction.expire(work_key, 60)
|
155
157
|
end
|
156
158
|
end
|
157
159
|
|
@@ -160,27 +162,26 @@ module Sidekiq
|
|
160
162
|
fails = procd = 0
|
161
163
|
kb = memory_usage(::Process.pid)
|
162
164
|
|
163
|
-
_, exists, _, _,
|
164
|
-
conn.multi {
|
165
|
-
|
166
|
-
|
167
|
-
|
165
|
+
_, exists, _, _, signal = redis { |conn|
|
166
|
+
conn.multi { |transaction|
|
167
|
+
transaction.sadd("processes", [key])
|
168
|
+
transaction.exists(key)
|
169
|
+
transaction.hmset(key, "info", to_json,
|
168
170
|
"busy", curstate.size,
|
169
171
|
"beat", Time.now.to_f,
|
170
172
|
"rtt_us", rtt,
|
171
|
-
"quiet", @done,
|
173
|
+
"quiet", @done.to_s,
|
172
174
|
"rss", kb)
|
173
|
-
|
174
|
-
|
175
|
+
transaction.expire(key, 60)
|
176
|
+
transaction.rpop("#{key}-signals")
|
175
177
|
}
|
176
178
|
}
|
177
179
|
|
178
180
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
179
|
-
fire_event(:heartbeat) unless exists
|
181
|
+
fire_event(:heartbeat) unless exists > 0
|
182
|
+
fire_event(:beat, oneshot: false)
|
180
183
|
|
181
|
-
|
182
|
-
|
183
|
-
::Process.kill(msg, ::Process.pid)
|
184
|
+
::Process.kill(signal, ::Process.pid) if signal && !@embedded
|
184
185
|
rescue => e
|
185
186
|
# ignore all redis/network issues
|
186
187
|
logger.error("heartbeat: #{e}")
|
@@ -198,7 +199,7 @@ module Sidekiq
|
|
198
199
|
|
199
200
|
def check_rtt
|
200
201
|
a = b = 0
|
201
|
-
|
202
|
+
redis do |x|
|
202
203
|
a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
203
204
|
x.ping
|
204
205
|
b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
@@ -209,12 +210,12 @@ module Sidekiq
|
|
209
210
|
# Workable is < 10,000µs
|
210
211
|
# Log a warning if it's a disaster.
|
211
212
|
if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
|
212
|
-
|
213
|
+
logger.warn <<~EOM
|
213
214
|
Your Redis network connection is performing extremely poorly.
|
214
215
|
Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
|
215
216
|
Ensure Redis is running in the same AZ or datacenter as Sidekiq.
|
216
217
|
If these values are close to 100,000, that means your Sidekiq process may be
|
217
|
-
CPU
|
218
|
+
CPU-saturated; reduce your concurrency and/or see https://github.com/sidekiq/sidekiq/discussions/5039
|
218
219
|
EOM
|
219
220
|
RTT_READINGS.reset
|
220
221
|
end
|
@@ -246,14 +247,21 @@ module Sidekiq
|
|
246
247
|
"hostname" => hostname,
|
247
248
|
"started_at" => Time.now.to_f,
|
248
249
|
"pid" => ::Process.pid,
|
249
|
-
"tag" => @
|
250
|
-
"concurrency" => @
|
251
|
-
"queues" => @
|
252
|
-
"
|
253
|
-
"
|
250
|
+
"tag" => @config[:tag] || "",
|
251
|
+
"concurrency" => @config.total_concurrency,
|
252
|
+
"queues" => @config.capsules.values.flat_map { |cap| cap.queues }.uniq,
|
253
|
+
"weights" => to_weights,
|
254
|
+
"labels" => @config[:labels].to_a,
|
255
|
+
"identity" => identity,
|
256
|
+
"version" => Sidekiq::VERSION,
|
257
|
+
"embedded" => @embedded
|
254
258
|
}
|
255
259
|
end
|
256
260
|
|
261
|
+
def to_weights
|
262
|
+
@config.capsules.values.map(&:weights)
|
263
|
+
end
|
264
|
+
|
257
265
|
def to_json
|
258
266
|
# this data changes infrequently so dump it to a string
|
259
267
|
# now so we don't need to dump it every heartbeat.
|