sidekiq 5.2.9 → 6.4.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Changes.md +318 -1
- data/LICENSE +3 -3
- data/README.md +23 -34
- data/bin/sidekiq +27 -3
- data/bin/sidekiqload +67 -61
- data/bin/sidekiqmon +8 -0
- data/lib/generators/sidekiq/job_generator.rb +57 -0
- data/lib/generators/sidekiq/templates/{worker.rb.erb → job.rb.erb} +2 -2
- data/lib/generators/sidekiq/templates/{worker_spec.rb.erb → job_spec.rb.erb} +1 -1
- data/lib/generators/sidekiq/templates/{worker_test.rb.erb → job_test.rb.erb} +1 -1
- data/lib/sidekiq/api.rb +335 -267
- data/lib/sidekiq/cli.rb +164 -182
- data/lib/sidekiq/client.rb +58 -61
- data/lib/sidekiq/delay.rb +7 -6
- data/lib/sidekiq/exception_handler.rb +10 -12
- data/lib/sidekiq/extensions/action_mailer.rb +13 -22
- data/lib/sidekiq/extensions/active_record.rb +13 -10
- data/lib/sidekiq/extensions/class_methods.rb +14 -11
- data/lib/sidekiq/extensions/generic_proxy.rb +6 -4
- data/lib/sidekiq/fetch.rb +40 -32
- data/lib/sidekiq/job.rb +13 -0
- data/lib/sidekiq/job_logger.rb +33 -7
- data/lib/sidekiq/job_retry.rb +70 -71
- data/lib/sidekiq/job_util.rb +65 -0
- data/lib/sidekiq/launcher.rb +161 -71
- data/lib/sidekiq/logger.rb +170 -0
- data/lib/sidekiq/manager.rb +17 -21
- data/lib/sidekiq/middleware/chain.rb +20 -8
- data/lib/sidekiq/middleware/current_attributes.rb +57 -0
- data/lib/sidekiq/middleware/i18n.rb +5 -7
- data/lib/sidekiq/monitor.rb +133 -0
- data/lib/sidekiq/paginator.rb +20 -16
- data/lib/sidekiq/processor.rb +71 -70
- data/lib/sidekiq/rails.rb +40 -37
- data/lib/sidekiq/redis_connection.rb +48 -48
- data/lib/sidekiq/scheduled.rb +62 -28
- data/lib/sidekiq/sd_notify.rb +149 -0
- data/lib/sidekiq/systemd.rb +24 -0
- data/lib/sidekiq/testing/inline.rb +2 -1
- data/lib/sidekiq/testing.rb +36 -27
- data/lib/sidekiq/util.rb +57 -15
- data/lib/sidekiq/version.rb +2 -1
- data/lib/sidekiq/web/action.rb +15 -11
- data/lib/sidekiq/web/application.rb +88 -75
- data/lib/sidekiq/web/csrf_protection.rb +180 -0
- data/lib/sidekiq/web/helpers.rb +109 -92
- data/lib/sidekiq/web/router.rb +23 -19
- data/lib/sidekiq/web.rb +61 -105
- data/lib/sidekiq/worker.rb +247 -105
- data/lib/sidekiq.rb +77 -44
- data/sidekiq.gemspec +23 -16
- data/web/assets/images/apple-touch-icon.png +0 -0
- data/web/assets/javascripts/application.js +83 -64
- data/web/assets/javascripts/dashboard.js +54 -73
- data/web/assets/stylesheets/application-dark.css +143 -0
- data/web/assets/stylesheets/application-rtl.css +0 -4
- data/web/assets/stylesheets/application.css +45 -232
- data/web/locales/ar.yml +8 -2
- data/web/locales/de.yml +14 -2
- data/web/locales/en.yml +6 -1
- data/web/locales/es.yml +18 -2
- data/web/locales/fr.yml +10 -3
- data/web/locales/ja.yml +7 -1
- data/web/locales/lt.yml +83 -0
- data/web/locales/pl.yml +4 -4
- data/web/locales/ru.yml +4 -0
- data/web/locales/vi.yml +83 -0
- data/web/views/_footer.erb +1 -1
- data/web/views/_job_info.erb +3 -2
- data/web/views/_poll_link.erb +2 -5
- data/web/views/_summary.erb +7 -7
- data/web/views/busy.erb +54 -20
- data/web/views/dashboard.erb +22 -14
- data/web/views/dead.erb +3 -3
- data/web/views/layout.erb +3 -1
- data/web/views/morgue.erb +9 -6
- data/web/views/queue.erb +19 -10
- data/web/views/queues.erb +10 -2
- data/web/views/retries.erb +11 -8
- data/web/views/retry.erb +3 -3
- data/web/views/scheduled.erb +5 -2
- metadata +34 -64
- data/.circleci/config.yml +0 -61
- data/.github/contributing.md +0 -32
- data/.github/issue_template.md +0 -11
- data/.gitignore +0 -15
- data/.travis.yml +0 -11
- data/3.0-Upgrade.md +0 -70
- data/4.0-Upgrade.md +0 -53
- data/5.0-Upgrade.md +0 -56
- data/COMM-LICENSE +0 -97
- data/Ent-Changes.md +0 -238
- data/Gemfile +0 -23
- data/Pro-2.0-Upgrade.md +0 -138
- data/Pro-3.0-Upgrade.md +0 -44
- data/Pro-4.0-Upgrade.md +0 -35
- data/Pro-Changes.md +0 -759
- data/Rakefile +0 -9
- data/bin/sidekiqctl +0 -20
- data/code_of_conduct.md +0 -50
- data/lib/generators/sidekiq/worker_generator.rb +0 -49
- data/lib/sidekiq/core_ext.rb +0 -1
- data/lib/sidekiq/ctl.rb +0 -221
- data/lib/sidekiq/logging.rb +0 -122
- data/lib/sidekiq/middleware/server/active_record.rb +0 -23
data/lib/sidekiq/job_retry.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require
|
2
|
+
|
3
|
+
require "sidekiq/scheduled"
|
4
|
+
require "sidekiq/api"
|
5
|
+
|
6
|
+
require "zlib"
|
7
|
+
require "base64"
|
4
8
|
|
5
9
|
module Sidekiq
|
6
10
|
##
|
@@ -30,9 +34,10 @@ module Sidekiq
|
|
30
34
|
# The job will be retried this number of times before giving up. (If simply
|
31
35
|
# 'true', Sidekiq retries 25 times)
|
32
36
|
#
|
33
|
-
#
|
37
|
+
# Relevant options for job retries:
|
34
38
|
#
|
35
|
-
# * 'queue' - the queue
|
39
|
+
# * 'queue' - the queue for the initial job
|
40
|
+
# * 'retry_queue' - if job retries should be pushed to a different (e.g. lower priority) queue
|
36
41
|
# * 'retry_count' - number of times we've retried so far.
|
37
42
|
# * 'error_message' - the message from the exception
|
38
43
|
# * 'error_class' - the exception class
|
@@ -48,15 +53,17 @@ module Sidekiq
|
|
48
53
|
#
|
49
54
|
# Sidekiq.options[:max_retries] = 7
|
50
55
|
#
|
51
|
-
# or limit the number of retries for a particular worker
|
56
|
+
# or limit the number of retries for a particular worker and send retries to
|
57
|
+
# a low priority queue with:
|
52
58
|
#
|
53
59
|
# class MyWorker
|
54
60
|
# include Sidekiq::Worker
|
55
|
-
# sidekiq_options :
|
61
|
+
# sidekiq_options retry: 10, retry_queue: 'low'
|
56
62
|
# end
|
57
63
|
#
|
58
64
|
class JobRetry
|
59
65
|
class Handled < ::RuntimeError; end
|
66
|
+
|
60
67
|
class Skip < Handled; end
|
61
68
|
|
62
69
|
include Sidekiq::Util
|
@@ -70,7 +77,7 @@ module Sidekiq
|
|
70
77
|
# The global retry handler requires only the barest of data.
|
71
78
|
# We want to be able to retry as much as possible so we don't
|
72
79
|
# require the worker to be instantiated.
|
73
|
-
def global(
|
80
|
+
def global(jobstr, queue)
|
74
81
|
yield
|
75
82
|
rescue Handled => ex
|
76
83
|
raise ex
|
@@ -81,22 +88,20 @@ module Sidekiq
|
|
81
88
|
# ignore, will be pushed back onto queue during hard_shutdown
|
82
89
|
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
83
90
|
|
84
|
-
|
91
|
+
msg = Sidekiq.load_json(jobstr)
|
92
|
+
if msg["retry"]
|
85
93
|
attempt_retry(nil, msg, queue, e)
|
86
94
|
else
|
87
95
|
Sidekiq.death_handlers.each do |handler|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
handle_exception(handler_ex, { context: "Error calling death handler", job: msg })
|
92
|
-
end
|
96
|
+
handler.call(msg, e)
|
97
|
+
rescue => handler_ex
|
98
|
+
handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
|
93
99
|
end
|
94
100
|
end
|
95
101
|
|
96
102
|
raise Handled
|
97
103
|
end
|
98
104
|
|
99
|
-
|
100
105
|
# The local retry support means that any errors that occur within
|
101
106
|
# this block can be associated with the given worker instance.
|
102
107
|
# This is required to support the `sidekiq_retries_exhausted` block.
|
@@ -105,7 +110,7 @@ module Sidekiq
|
|
105
110
|
# exception so the global block does not reprocess the error. The
|
106
111
|
# Skip exception is unwrapped within Sidekiq::Processor#process before
|
107
112
|
# calling the handle_exception handlers.
|
108
|
-
def local(worker,
|
113
|
+
def local(worker, jobstr, queue)
|
109
114
|
yield
|
110
115
|
rescue Handled => ex
|
111
116
|
raise ex
|
@@ -116,11 +121,12 @@ module Sidekiq
|
|
116
121
|
# ignore, will be pushed back onto queue during hard_shutdown
|
117
122
|
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
118
123
|
|
119
|
-
|
120
|
-
|
124
|
+
msg = Sidekiq.load_json(jobstr)
|
125
|
+
if msg["retry"].nil?
|
126
|
+
msg["retry"] = worker.class.get_sidekiq_options["retry"]
|
121
127
|
end
|
122
128
|
|
123
|
-
raise e unless msg[
|
129
|
+
raise e unless msg["retry"]
|
124
130
|
attempt_retry(worker, msg, queue, e)
|
125
131
|
# We've handled this error associated with this job, don't
|
126
132
|
# need to handle it at the global level
|
@@ -133,13 +139,9 @@ module Sidekiq
|
|
133
139
|
# instantiate the worker instance. All access must be guarded and
|
134
140
|
# best effort.
|
135
141
|
def attempt_retry(worker, msg, queue, exception)
|
136
|
-
max_retry_attempts = retry_attempts_from(msg[
|
142
|
+
max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
|
137
143
|
|
138
|
-
msg[
|
139
|
-
msg['retry_queue']
|
140
|
-
else
|
141
|
-
queue
|
142
|
-
end
|
144
|
+
msg["queue"] = (msg["retry_queue"] || queue)
|
143
145
|
|
144
146
|
m = exception_message(exception)
|
145
147
|
if m.respond_to?(:scrub!)
|
@@ -147,32 +149,34 @@ module Sidekiq
|
|
147
149
|
m.scrub!
|
148
150
|
end
|
149
151
|
|
150
|
-
msg[
|
151
|
-
msg[
|
152
|
-
count = if msg[
|
153
|
-
msg[
|
154
|
-
msg[
|
152
|
+
msg["error_message"] = m
|
153
|
+
msg["error_class"] = exception.class.name
|
154
|
+
count = if msg["retry_count"]
|
155
|
+
msg["retried_at"] = Time.now.to_f
|
156
|
+
msg["retry_count"] += 1
|
155
157
|
else
|
156
|
-
msg[
|
157
|
-
msg[
|
158
|
+
msg["failed_at"] = Time.now.to_f
|
159
|
+
msg["retry_count"] = 0
|
158
160
|
end
|
159
161
|
|
160
|
-
if msg[
|
161
|
-
msg[
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
162
|
+
if msg["backtrace"]
|
163
|
+
lines = if msg["backtrace"] == true
|
164
|
+
exception.backtrace
|
165
|
+
else
|
166
|
+
exception.backtrace[0...msg["backtrace"].to_i]
|
167
|
+
end
|
168
|
+
|
169
|
+
msg["error_backtrace"] = compress_backtrace(lines)
|
166
170
|
end
|
167
171
|
|
168
172
|
if count < max_retry_attempts
|
169
173
|
delay = delay_for(worker, count, exception)
|
170
174
|
# Logging here can break retries if the logging device raises ENOSPC #3979
|
171
|
-
#logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
175
|
+
# logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
172
176
|
retry_at = Time.now.to_f + delay
|
173
177
|
payload = Sidekiq.dump_json(msg)
|
174
178
|
Sidekiq.redis do |conn|
|
175
|
-
conn.zadd(
|
179
|
+
conn.zadd("retry", retry_at.to_s, payload)
|
176
180
|
end
|
177
181
|
else
|
178
182
|
# Goodbye dear message, you (re)tried your best I'm sure.
|
@@ -182,25 +186,23 @@ module Sidekiq
|
|
182
186
|
|
183
187
|
def retries_exhausted(worker, msg, exception)
|
184
188
|
begin
|
185
|
-
block = worker
|
186
|
-
block
|
189
|
+
block = worker&.sidekiq_retries_exhausted_block
|
190
|
+
block&.call(msg, exception)
|
187
191
|
rescue => e
|
188
|
-
handle_exception(e, {
|
192
|
+
handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
|
189
193
|
end
|
190
194
|
|
195
|
+
send_to_morgue(msg) unless msg["dead"] == false
|
196
|
+
|
191
197
|
Sidekiq.death_handlers.each do |handler|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
handle_exception(e, { context: "Error calling death handler", job: msg })
|
196
|
-
end
|
198
|
+
handler.call(msg, exception)
|
199
|
+
rescue => e
|
200
|
+
handle_exception(e, {context: "Error calling death handler", job: msg})
|
197
201
|
end
|
198
|
-
|
199
|
-
send_to_morgue(msg) unless msg['dead'] == false
|
200
202
|
end
|
201
203
|
|
202
204
|
def send_to_morgue(msg)
|
203
|
-
logger.info { "Adding dead #{msg[
|
205
|
+
logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
|
204
206
|
payload = Sidekiq.dump_json(msg)
|
205
207
|
DeadSet.new.kill(payload, notify_failure: false)
|
206
208
|
end
|
@@ -214,25 +216,19 @@ module Sidekiq
|
|
214
216
|
end
|
215
217
|
|
216
218
|
def delay_for(worker, count, exception)
|
217
|
-
|
219
|
+
jitter = rand(10) * (count + 1)
|
220
|
+
if worker&.sidekiq_retry_in_block
|
218
221
|
custom_retry_in = retry_in(worker, count, exception).to_i
|
219
|
-
return custom_retry_in if custom_retry_in > 0
|
222
|
+
return custom_retry_in + jitter if custom_retry_in > 0
|
220
223
|
end
|
221
|
-
|
222
|
-
end
|
223
|
-
|
224
|
-
# delayed_job uses the same basic formula
|
225
|
-
def seconds_to_delay(count)
|
226
|
-
(count ** 4) + 15 + (rand(30)*(count+1))
|
224
|
+
(count**4) + 15 + jitter
|
227
225
|
end
|
228
226
|
|
229
227
|
def retry_in(worker, count, exception)
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
nil
|
235
|
-
end
|
228
|
+
worker.sidekiq_retry_in_block.call(count, exception)
|
229
|
+
rescue Exception => e
|
230
|
+
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
|
231
|
+
nil
|
236
232
|
end
|
237
233
|
|
238
234
|
def exception_caused_by_shutdown?(e, checked_causes = [])
|
@@ -249,14 +245,17 @@ module Sidekiq
|
|
249
245
|
# Extract message from exception.
|
250
246
|
# Set a default if the message raises an error
|
251
247
|
def exception_message(exception)
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
"!!! ERROR MESSAGE THREW AN ERROR !!!".dup
|
258
|
-
end
|
248
|
+
# App code can stuff all sorts of crazy binary data into the error message
|
249
|
+
# that won't convert to JSON.
|
250
|
+
exception.message.to_s[0, 10_000]
|
251
|
+
rescue
|
252
|
+
+"!!! ERROR MESSAGE THREW AN ERROR !!!"
|
259
253
|
end
|
260
254
|
|
255
|
+
def compress_backtrace(backtrace)
|
256
|
+
serialized = Sidekiq.dump_json(backtrace)
|
257
|
+
compressed = Zlib::Deflate.deflate(serialized)
|
258
|
+
Base64.encode64(compressed)
|
259
|
+
end
|
261
260
|
end
|
262
261
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require "securerandom"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
module Sidekiq
|
5
|
+
module JobUtil
|
6
|
+
# These functions encapsulate various job utilities.
|
7
|
+
# They must be simple and free from side effects.
|
8
|
+
|
9
|
+
def validate(item)
|
10
|
+
raise(ArgumentError, "Job must be a Hash with 'class' and 'args' keys: `#{item}`") unless item.is_a?(Hash) && item.key?("class") && item.key?("args")
|
11
|
+
raise(ArgumentError, "Job args must be an Array: `#{item}`") unless item["args"].is_a?(Array)
|
12
|
+
raise(ArgumentError, "Job class must be either a Class or String representation of the class name: `#{item}`") unless item["class"].is_a?(Class) || item["class"].is_a?(String)
|
13
|
+
raise(ArgumentError, "Job 'at' must be a Numeric timestamp: `#{item}`") if item.key?("at") && !item["at"].is_a?(Numeric)
|
14
|
+
raise(ArgumentError, "Job tags must be an Array: `#{item}`") if item["tags"] && !item["tags"].is_a?(Array)
|
15
|
+
|
16
|
+
if Sidekiq.options[:on_complex_arguments] == :raise
|
17
|
+
msg = <<~EOM
|
18
|
+
Job arguments to #{item["class"]} must be native JSON types, see https://github.com/mperham/sidekiq/wiki/Best-Practices.
|
19
|
+
To disable this error, remove `Sidekiq.strict_args!` from your initializer.
|
20
|
+
EOM
|
21
|
+
raise(ArgumentError, msg) unless json_safe?(item)
|
22
|
+
elsif Sidekiq.options[:on_complex_arguments] == :warn
|
23
|
+
Sidekiq.logger.warn <<~EOM unless json_safe?(item)
|
24
|
+
Job arguments to #{item["class"]} do not serialize to JSON safely. This will raise an error in
|
25
|
+
Sidekiq 7.0. See https://github.com/mperham/sidekiq/wiki/Best-Practices or raise an error today
|
26
|
+
by calling `Sidekiq.strict_args!` during Sidekiq initialization.
|
27
|
+
EOM
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def normalize_item(item)
|
32
|
+
validate(item)
|
33
|
+
|
34
|
+
# merge in the default sidekiq_options for the item's class and/or wrapped element
|
35
|
+
# this allows ActiveJobs to control sidekiq_options too.
|
36
|
+
defaults = normalized_hash(item["class"])
|
37
|
+
defaults = defaults.merge(item["wrapped"].get_sidekiq_options) if item["wrapped"].respond_to?(:get_sidekiq_options)
|
38
|
+
item = defaults.merge(item)
|
39
|
+
|
40
|
+
raise(ArgumentError, "Job must include a valid queue name") if item["queue"].nil? || item["queue"] == ""
|
41
|
+
|
42
|
+
item["class"] = item["class"].to_s
|
43
|
+
item["queue"] = item["queue"].to_s
|
44
|
+
item["jid"] ||= SecureRandom.hex(12)
|
45
|
+
item["created_at"] ||= Time.now.to_f
|
46
|
+
|
47
|
+
item
|
48
|
+
end
|
49
|
+
|
50
|
+
def normalized_hash(item_class)
|
51
|
+
if item_class.is_a?(Class)
|
52
|
+
raise(ArgumentError, "Message must include a Sidekiq::Worker class, not class name: #{item_class.ancestors.inspect}") unless item_class.respond_to?(:get_sidekiq_options)
|
53
|
+
item_class.get_sidekiq_options
|
54
|
+
else
|
55
|
+
Sidekiq.default_worker_options
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def json_safe?(item)
|
62
|
+
JSON.parse(JSON.dump(item["args"])) == item["args"]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/sidekiq/launcher.rb
CHANGED
@@ -1,21 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require
|
4
|
-
require
|
2
|
+
|
3
|
+
require "sidekiq/manager"
|
4
|
+
require "sidekiq/fetch"
|
5
|
+
require "sidekiq/scheduled"
|
5
6
|
|
6
7
|
module Sidekiq
|
7
|
-
# The Launcher
|
8
|
-
# start, monitor and stop the core Actors in Sidekiq.
|
9
|
-
# If any of these actors die, the Sidekiq process exits
|
10
|
-
# immediately.
|
8
|
+
# The Launcher starts the Manager and Poller threads and provides the process heartbeat.
|
11
9
|
class Launcher
|
12
10
|
include Util
|
13
11
|
|
14
|
-
|
12
|
+
STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
|
15
13
|
|
16
|
-
|
14
|
+
PROCTITLES = [
|
15
|
+
proc { "sidekiq" },
|
16
|
+
proc { Sidekiq::VERSION },
|
17
|
+
proc { |me, data| data["tag"] },
|
18
|
+
proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
|
19
|
+
proc { |me, data| "stopping" if me.stopping? }
|
20
|
+
]
|
21
|
+
|
22
|
+
attr_accessor :manager, :poller, :fetcher
|
17
23
|
|
18
24
|
def initialize(options)
|
25
|
+
options[:fetch] ||= BasicFetch.new(options)
|
19
26
|
@manager = Sidekiq::Manager.new(options)
|
20
27
|
@poller = Sidekiq::Scheduled::Poller.new
|
21
28
|
@done = false
|
@@ -50,7 +57,7 @@ module Sidekiq
|
|
50
57
|
|
51
58
|
# Requeue everything in case there was a worker who grabbed work while stopped
|
52
59
|
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
53
|
-
strategy =
|
60
|
+
strategy = @options[:fetch]
|
54
61
|
strategy.bulk_requeue([], @options)
|
55
62
|
|
56
63
|
clear_heartbeat
|
@@ -62,17 +69,66 @@ module Sidekiq
|
|
62
69
|
|
63
70
|
private unless $TESTING
|
64
71
|
|
72
|
+
BEAT_PAUSE = 5
|
73
|
+
|
74
|
+
def start_heartbeat
|
75
|
+
loop do
|
76
|
+
heartbeat
|
77
|
+
sleep BEAT_PAUSE
|
78
|
+
end
|
79
|
+
Sidekiq.logger.info("Heartbeat stopping...")
|
80
|
+
end
|
81
|
+
|
82
|
+
def clear_heartbeat
|
83
|
+
# Remove record from Redis since we are shutting down.
|
84
|
+
# Note we don't stop the heartbeat thread; if the process
|
85
|
+
# doesn't actually exit, it'll reappear in the Web UI.
|
86
|
+
Sidekiq.redis do |conn|
|
87
|
+
conn.pipelined do |pipeline|
|
88
|
+
pipeline.srem("processes", identity)
|
89
|
+
pipeline.unlink("#{identity}:workers")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
rescue
|
93
|
+
# best effort, ignore network errors
|
94
|
+
end
|
95
|
+
|
65
96
|
def heartbeat
|
66
|
-
|
67
|
-
results.compact!
|
68
|
-
$0 = results.join(' ')
|
97
|
+
$0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
|
69
98
|
|
70
99
|
❤
|
71
100
|
end
|
72
101
|
|
102
|
+
def self.flush_stats
|
103
|
+
fails = Processor::FAILURE.reset
|
104
|
+
procd = Processor::PROCESSED.reset
|
105
|
+
return if fails + procd == 0
|
106
|
+
|
107
|
+
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
108
|
+
begin
|
109
|
+
Sidekiq.redis do |conn|
|
110
|
+
conn.pipelined do |pipeline|
|
111
|
+
pipeline.incrby("stat:processed", procd)
|
112
|
+
pipeline.incrby("stat:processed:#{nowdate}", procd)
|
113
|
+
pipeline.expire("stat:processed:#{nowdate}", STATS_TTL)
|
114
|
+
|
115
|
+
pipeline.incrby("stat:failed", fails)
|
116
|
+
pipeline.incrby("stat:failed:#{nowdate}", fails)
|
117
|
+
pipeline.expire("stat:failed:#{nowdate}", STATS_TTL)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
rescue => ex
|
121
|
+
# we're exiting the process, things might be shut down so don't
|
122
|
+
# try to handle the exception
|
123
|
+
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
at_exit(&method(:flush_stats))
|
127
|
+
|
73
128
|
def ❤
|
74
129
|
key = identity
|
75
130
|
fails = procd = 0
|
131
|
+
|
76
132
|
begin
|
77
133
|
fails = Processor::FAILURE.reset
|
78
134
|
procd = Processor::PROCESSED.reset
|
@@ -80,94 +136,128 @@ module Sidekiq
|
|
80
136
|
|
81
137
|
workers_key = "#{key}:workers"
|
82
138
|
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
139
|
+
|
83
140
|
Sidekiq.redis do |conn|
|
84
|
-
conn.multi do
|
85
|
-
|
86
|
-
|
87
|
-
|
141
|
+
conn.multi do |transaction|
|
142
|
+
transaction.incrby("stat:processed", procd)
|
143
|
+
transaction.incrby("stat:processed:#{nowdate}", procd)
|
144
|
+
transaction.expire("stat:processed:#{nowdate}", STATS_TTL)
|
88
145
|
|
89
|
-
|
90
|
-
|
91
|
-
|
146
|
+
transaction.incrby("stat:failed", fails)
|
147
|
+
transaction.incrby("stat:failed:#{nowdate}", fails)
|
148
|
+
transaction.expire("stat:failed:#{nowdate}", STATS_TTL)
|
92
149
|
|
93
|
-
|
150
|
+
transaction.unlink(workers_key)
|
94
151
|
curstate.each_pair do |tid, hash|
|
95
|
-
|
152
|
+
transaction.hset(workers_key, tid, Sidekiq.dump_json(hash))
|
96
153
|
end
|
97
|
-
|
154
|
+
transaction.expire(workers_key, 60)
|
98
155
|
end
|
99
156
|
end
|
157
|
+
|
158
|
+
rtt = check_rtt
|
159
|
+
|
100
160
|
fails = procd = 0
|
161
|
+
kb = memory_usage(::Process.pid)
|
101
162
|
|
102
|
-
_, exists, _, _, msg = Sidekiq.redis
|
103
|
-
conn.multi
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
163
|
+
_, exists, _, _, msg = Sidekiq.redis { |conn|
|
164
|
+
conn.multi { |transaction|
|
165
|
+
transaction.sadd("processes", key)
|
166
|
+
transaction.exists?(key)
|
167
|
+
transaction.hmset(key, "info", to_json,
|
168
|
+
"busy", curstate.size,
|
169
|
+
"beat", Time.now.to_f,
|
170
|
+
"rtt_us", rtt,
|
171
|
+
"quiet", @done,
|
172
|
+
"rss", kb)
|
173
|
+
transaction.expire(key, 60)
|
174
|
+
transaction.rpop("#{key}-signals")
|
175
|
+
}
|
176
|
+
}
|
111
177
|
|
112
178
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
113
|
-
fire_event(:heartbeat)
|
179
|
+
fire_event(:heartbeat) unless exists
|
114
180
|
|
115
181
|
return unless msg
|
116
182
|
|
117
|
-
::Process.kill(msg,
|
183
|
+
::Process.kill(msg, ::Process.pid)
|
118
184
|
rescue => e
|
119
185
|
# ignore all redis/network issues
|
120
|
-
logger.error("heartbeat: #{e
|
186
|
+
logger.error("heartbeat: #{e}")
|
121
187
|
# don't lose the counts if there was a network issue
|
122
188
|
Processor::PROCESSED.incr(procd)
|
123
189
|
Processor::FAILURE.incr(fails)
|
124
190
|
end
|
125
191
|
end
|
126
192
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
193
|
+
# We run the heartbeat every five seconds.
|
194
|
+
# Capture five samples of RTT, log a warning if each sample
|
195
|
+
# is above our warning threshold.
|
196
|
+
RTT_READINGS = RingBuffer.new(5)
|
197
|
+
RTT_WARNING_LEVEL = 50_000
|
198
|
+
|
199
|
+
def check_rtt
|
200
|
+
a = b = 0
|
201
|
+
Sidekiq.redis do |x|
|
202
|
+
a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
203
|
+
x.ping
|
204
|
+
b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
131
205
|
end
|
132
|
-
|
206
|
+
rtt = b - a
|
207
|
+
RTT_READINGS << rtt
|
208
|
+
# Ideal RTT for Redis is < 1000µs
|
209
|
+
# Workable is < 10,000µs
|
210
|
+
# Log a warning if it's a disaster.
|
211
|
+
if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
|
212
|
+
Sidekiq.logger.warn <<~EOM
|
213
|
+
Your Redis network connection is performing extremely poorly.
|
214
|
+
Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
|
215
|
+
Ensure Redis is running in the same AZ or datacenter as Sidekiq.
|
216
|
+
If these values are close to 100,000, that means your Sidekiq process may be
|
217
|
+
CPU overloaded; see https://github.com/mperham/sidekiq/discussions/5039
|
218
|
+
EOM
|
219
|
+
RTT_READINGS.reset
|
220
|
+
end
|
221
|
+
rtt
|
133
222
|
end
|
134
223
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
224
|
+
MEMORY_GRABBER = case RUBY_PLATFORM
|
225
|
+
when /linux/
|
226
|
+
->(pid) {
|
227
|
+
IO.readlines("/proc/#{$$}/status").each do |line|
|
228
|
+
next unless line.start_with?("VmRSS:")
|
229
|
+
break line.split[1].to_i
|
230
|
+
end
|
231
|
+
}
|
232
|
+
when /darwin|bsd/
|
233
|
+
->(pid) {
|
234
|
+
`ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
|
235
|
+
}
|
236
|
+
else
|
237
|
+
->(pid) { 0 }
|
148
238
|
end
|
149
239
|
|
150
|
-
def
|
151
|
-
|
152
|
-
# this data changes infrequently so dump it to a string
|
153
|
-
# now so we don't need to dump it every heartbeat.
|
154
|
-
Sidekiq.dump_json(to_data)
|
155
|
-
end
|
240
|
+
def memory_usage(pid)
|
241
|
+
MEMORY_GRABBER.call(pid)
|
156
242
|
end
|
157
243
|
|
158
|
-
def
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
# best effort, ignore network errors
|
244
|
+
def to_data
|
245
|
+
@data ||= {
|
246
|
+
"hostname" => hostname,
|
247
|
+
"started_at" => Time.now.to_f,
|
248
|
+
"pid" => ::Process.pid,
|
249
|
+
"tag" => @options[:tag] || "",
|
250
|
+
"concurrency" => @options[:concurrency],
|
251
|
+
"queues" => @options[:queues].uniq,
|
252
|
+
"labels" => @options[:labels],
|
253
|
+
"identity" => identity
|
254
|
+
}
|
170
255
|
end
|
171
256
|
|
257
|
+
def to_json
|
258
|
+
# this data changes infrequently so dump it to a string
|
259
|
+
# now so we don't need to dump it every heartbeat.
|
260
|
+
@json ||= Sidekiq.dump_json(to_data)
|
261
|
+
end
|
172
262
|
end
|
173
263
|
end
|