sidekiq 4.2.2 → 6.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/Changes.md +516 -0
- data/LICENSE +2 -2
- data/README.md +23 -36
- data/bin/sidekiq +26 -2
- data/bin/sidekiqload +28 -38
- data/bin/sidekiqmon +8 -0
- data/lib/generators/sidekiq/templates/worker_spec.rb.erb +1 -1
- data/lib/generators/sidekiq/templates/worker_test.rb.erb +2 -2
- data/lib/generators/sidekiq/worker_generator.rb +21 -13
- data/lib/sidekiq/api.rb +401 -243
- data/lib/sidekiq/cli.rb +228 -212
- data/lib/sidekiq/client.rb +76 -53
- data/lib/sidekiq/delay.rb +41 -0
- data/lib/sidekiq/exception_handler.rb +12 -16
- data/lib/sidekiq/extensions/action_mailer.rb +13 -22
- data/lib/sidekiq/extensions/active_record.rb +13 -10
- data/lib/sidekiq/extensions/class_methods.rb +14 -11
- data/lib/sidekiq/extensions/generic_proxy.rb +12 -4
- data/lib/sidekiq/fetch.rb +39 -31
- data/lib/sidekiq/job.rb +13 -0
- data/lib/sidekiq/job_logger.rb +63 -0
- data/lib/sidekiq/job_retry.rb +259 -0
- data/lib/sidekiq/launcher.rb +170 -71
- data/lib/sidekiq/logger.rb +166 -0
- data/lib/sidekiq/manager.rb +17 -20
- data/lib/sidekiq/middleware/chain.rb +20 -8
- data/lib/sidekiq/middleware/current_attributes.rb +52 -0
- data/lib/sidekiq/middleware/i18n.rb +5 -7
- data/lib/sidekiq/monitor.rb +133 -0
- data/lib/sidekiq/paginator.rb +18 -14
- data/lib/sidekiq/processor.rb +169 -78
- data/lib/sidekiq/rails.rb +41 -36
- data/lib/sidekiq/redis_connection.rb +65 -20
- data/lib/sidekiq/scheduled.rb +85 -34
- data/lib/sidekiq/sd_notify.rb +149 -0
- data/lib/sidekiq/systemd.rb +24 -0
- data/lib/sidekiq/testing/inline.rb +2 -1
- data/lib/sidekiq/testing.rb +52 -26
- data/lib/sidekiq/util.rb +48 -15
- data/lib/sidekiq/version.rb +2 -1
- data/lib/sidekiq/web/action.rb +15 -17
- data/lib/sidekiq/web/application.rb +114 -92
- data/lib/sidekiq/web/csrf_protection.rb +180 -0
- data/lib/sidekiq/web/helpers.rb +151 -83
- data/lib/sidekiq/web/router.rb +27 -19
- data/lib/sidekiq/web.rb +85 -76
- data/lib/sidekiq/worker.rb +233 -43
- data/lib/sidekiq.rb +88 -64
- data/sidekiq.gemspec +24 -22
- data/web/assets/images/apple-touch-icon.png +0 -0
- data/web/assets/javascripts/application.js +86 -59
- data/web/assets/javascripts/dashboard.js +81 -85
- data/web/assets/stylesheets/application-dark.css +147 -0
- data/web/assets/stylesheets/application-rtl.css +242 -0
- data/web/assets/stylesheets/application.css +319 -141
- data/web/assets/stylesheets/bootstrap-rtl.min.css +9 -0
- data/web/assets/stylesheets/bootstrap.css +2 -2
- data/web/locales/ar.yml +87 -0
- data/web/locales/de.yml +14 -2
- data/web/locales/en.yml +8 -1
- data/web/locales/es.yml +22 -5
- data/web/locales/fa.yml +80 -0
- data/web/locales/fr.yml +10 -3
- data/web/locales/he.yml +79 -0
- data/web/locales/ja.yml +12 -4
- data/web/locales/lt.yml +83 -0
- data/web/locales/pl.yml +4 -4
- data/web/locales/ru.yml +4 -0
- data/web/locales/ur.yml +80 -0
- data/web/locales/vi.yml +83 -0
- data/web/views/_footer.erb +5 -2
- data/web/views/_job_info.erb +4 -3
- data/web/views/_nav.erb +4 -18
- data/web/views/_paging.erb +1 -1
- data/web/views/_poll_link.erb +2 -5
- data/web/views/_summary.erb +7 -7
- data/web/views/busy.erb +60 -22
- data/web/views/dashboard.erb +23 -15
- data/web/views/dead.erb +3 -3
- data/web/views/layout.erb +14 -3
- data/web/views/morgue.erb +19 -12
- data/web/views/queue.erb +24 -14
- data/web/views/queues.erb +14 -4
- data/web/views/retries.erb +22 -13
- data/web/views/retry.erb +4 -4
- data/web/views/scheduled.erb +7 -4
- metadata +44 -194
- data/.github/contributing.md +0 -32
- data/.github/issue_template.md +0 -4
- data/.gitignore +0 -12
- data/.travis.yml +0 -12
- data/3.0-Upgrade.md +0 -70
- data/4.0-Upgrade.md +0 -53
- data/COMM-LICENSE +0 -95
- data/Ent-Changes.md +0 -146
- data/Gemfile +0 -29
- data/Pro-2.0-Upgrade.md +0 -138
- data/Pro-3.0-Upgrade.md +0 -44
- data/Pro-Changes.md +0 -570
- data/Rakefile +0 -9
- data/bin/sidekiqctl +0 -99
- data/code_of_conduct.md +0 -50
- data/lib/sidekiq/core_ext.rb +0 -106
- data/lib/sidekiq/logging.rb +0 -106
- data/lib/sidekiq/middleware/server/active_record.rb +0 -13
- data/lib/sidekiq/middleware/server/logging.rb +0 -40
- data/lib/sidekiq/middleware/server/retry_jobs.rb +0 -205
- data/test/config.yml +0 -9
- data/test/env_based_config.yml +0 -11
- data/test/fake_env.rb +0 -1
- data/test/fixtures/en.yml +0 -2
- data/test/helper.rb +0 -75
- data/test/test_actors.rb +0 -138
- data/test/test_api.rb +0 -528
- data/test/test_cli.rb +0 -418
- data/test/test_client.rb +0 -266
- data/test/test_exception_handler.rb +0 -56
- data/test/test_extensions.rb +0 -127
- data/test/test_fetch.rb +0 -50
- data/test/test_launcher.rb +0 -95
- data/test/test_logging.rb +0 -35
- data/test/test_manager.rb +0 -50
- data/test/test_middleware.rb +0 -158
- data/test/test_processor.rb +0 -201
- data/test/test_rails.rb +0 -22
- data/test/test_redis_connection.rb +0 -132
- data/test/test_retry.rb +0 -326
- data/test/test_retry_exhausted.rb +0 -149
- data/test/test_scheduled.rb +0 -115
- data/test/test_scheduling.rb +0 -50
- data/test/test_sidekiq.rb +0 -107
- data/test/test_testing.rb +0 -143
- data/test/test_testing_fake.rb +0 -357
- data/test/test_testing_inline.rb +0 -94
- data/test/test_util.rb +0 -13
- data/test/test_web.rb +0 -666
- data/test/test_web_helpers.rb +0 -54
data/lib/sidekiq/job.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "sidekiq/worker"
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
# Sidekiq::Job is a new alias for Sidekiq::Worker as of Sidekiq 6.3.0.
|
5
|
+
# Use `include Sidekiq::Job` rather than `include Sidekiq::Worker`.
|
6
|
+
#
|
7
|
+
# The term "worker" is too generic and overly confusing, used in several
|
8
|
+
# different contexts meaning different things. Many people call a Sidekiq
|
9
|
+
# process a "worker". Some people call the thread that executes jobs a
|
10
|
+
# "worker". This change brings Sidekiq closer to ActiveJob where your job
|
11
|
+
# classes extend ApplicationJob.
|
12
|
+
Job = Worker
|
13
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
class JobLogger
|
5
|
+
def initialize(logger = Sidekiq.logger)
|
6
|
+
@logger = logger
|
7
|
+
end
|
8
|
+
|
9
|
+
def call(item, queue)
|
10
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
11
|
+
@logger.info("start")
|
12
|
+
|
13
|
+
yield
|
14
|
+
|
15
|
+
with_elapsed_time_context(start) do
|
16
|
+
@logger.info("done")
|
17
|
+
end
|
18
|
+
rescue Exception
|
19
|
+
with_elapsed_time_context(start) do
|
20
|
+
@logger.info("fail")
|
21
|
+
end
|
22
|
+
|
23
|
+
raise
|
24
|
+
end
|
25
|
+
|
26
|
+
def prepare(job_hash, &block)
|
27
|
+
level = job_hash["log_level"]
|
28
|
+
if level
|
29
|
+
@logger.log_at(level) do
|
30
|
+
Sidekiq::Context.with(job_hash_context(job_hash), &block)
|
31
|
+
end
|
32
|
+
else
|
33
|
+
Sidekiq::Context.with(job_hash_context(job_hash), &block)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def job_hash_context(job_hash)
|
38
|
+
# If we're using a wrapper class, like ActiveJob, use the "wrapped"
|
39
|
+
# attribute to expose the underlying thing.
|
40
|
+
h = {
|
41
|
+
class: job_hash["display_class"] || job_hash["wrapped"] || job_hash["class"],
|
42
|
+
jid: job_hash["jid"]
|
43
|
+
}
|
44
|
+
h[:bid] = job_hash["bid"] if job_hash["bid"]
|
45
|
+
h[:tags] = job_hash["tags"] if job_hash["tags"]
|
46
|
+
h
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_elapsed_time_context(start, &block)
|
50
|
+
Sidekiq::Context.with(elapsed_time_context(start), &block)
|
51
|
+
end
|
52
|
+
|
53
|
+
def elapsed_time_context(start)
|
54
|
+
{elapsed: elapsed(start).to_s}
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def elapsed(start)
|
60
|
+
(::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start).round(3)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq/scheduled"
|
4
|
+
require "sidekiq/api"
|
5
|
+
|
6
|
+
require "zlib"
|
7
|
+
require "base64"
|
8
|
+
|
9
|
+
module Sidekiq
|
10
|
+
##
|
11
|
+
# Automatically retry jobs that fail in Sidekiq.
|
12
|
+
# Sidekiq's retry support assumes a typical development lifecycle:
|
13
|
+
#
|
14
|
+
# 0. Push some code changes with a bug in it.
|
15
|
+
# 1. Bug causes job processing to fail, Sidekiq's middleware captures
|
16
|
+
# the job and pushes it onto a retry queue.
|
17
|
+
# 2. Sidekiq retries jobs in the retry queue multiple times with
|
18
|
+
# an exponential delay, the job continues to fail.
|
19
|
+
# 3. After a few days, a developer deploys a fix. The job is
|
20
|
+
# reprocessed successfully.
|
21
|
+
# 4. Once retries are exhausted, Sidekiq will give up and move the
|
22
|
+
# job to the Dead Job Queue (aka morgue) where it must be dealt with
|
23
|
+
# manually in the Web UI.
|
24
|
+
# 5. After 6 months on the DJQ, Sidekiq will discard the job.
|
25
|
+
#
|
26
|
+
# A job looks like:
|
27
|
+
#
|
28
|
+
# { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => true }
|
29
|
+
#
|
30
|
+
# The 'retry' option also accepts a number (in place of 'true'):
|
31
|
+
#
|
32
|
+
# { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => 5 }
|
33
|
+
#
|
34
|
+
# The job will be retried this number of times before giving up. (If simply
|
35
|
+
# 'true', Sidekiq retries 25 times)
|
36
|
+
#
|
37
|
+
# We'll add a bit more data to the job to support retries:
|
38
|
+
#
|
39
|
+
# * 'queue' - the queue to use
|
40
|
+
# * 'retry_count' - number of times we've retried so far.
|
41
|
+
# * 'error_message' - the message from the exception
|
42
|
+
# * 'error_class' - the exception class
|
43
|
+
# * 'failed_at' - the first time it failed
|
44
|
+
# * 'retried_at' - the last time it was retried
|
45
|
+
# * 'backtrace' - the number of lines of error backtrace to store
|
46
|
+
#
|
47
|
+
# We don't store the backtrace by default as that can add a lot of overhead
|
48
|
+
# to the job and everyone is using an error service, right?
|
49
|
+
#
|
50
|
+
# The default number of retries is 25 which works out to about 3 weeks
|
51
|
+
# You can change the default maximum number of retries in your initializer:
|
52
|
+
#
|
53
|
+
# Sidekiq.options[:max_retries] = 7
|
54
|
+
#
|
55
|
+
# or limit the number of retries for a particular worker with:
|
56
|
+
#
|
57
|
+
# class MyWorker
|
58
|
+
# include Sidekiq::Worker
|
59
|
+
# sidekiq_options :retry => 10
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
class JobRetry
|
63
|
+
class Handled < ::RuntimeError; end
|
64
|
+
|
65
|
+
class Skip < Handled; end
|
66
|
+
|
67
|
+
include Sidekiq::Util
|
68
|
+
|
69
|
+
DEFAULT_MAX_RETRY_ATTEMPTS = 25
|
70
|
+
|
71
|
+
def initialize(options = {})
|
72
|
+
@max_retries = Sidekiq.options.merge(options).fetch(:max_retries, DEFAULT_MAX_RETRY_ATTEMPTS)
|
73
|
+
end
|
74
|
+
|
75
|
+
# The global retry handler requires only the barest of data.
|
76
|
+
# We want to be able to retry as much as possible so we don't
|
77
|
+
# require the worker to be instantiated.
|
78
|
+
def global(jobstr, queue)
|
79
|
+
yield
|
80
|
+
rescue Handled => ex
|
81
|
+
raise ex
|
82
|
+
rescue Sidekiq::Shutdown => ey
|
83
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
84
|
+
raise ey
|
85
|
+
rescue Exception => e
|
86
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
87
|
+
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
88
|
+
|
89
|
+
msg = Sidekiq.load_json(jobstr)
|
90
|
+
if msg["retry"]
|
91
|
+
attempt_retry(nil, msg, queue, e)
|
92
|
+
else
|
93
|
+
Sidekiq.death_handlers.each do |handler|
|
94
|
+
handler.call(msg, e)
|
95
|
+
rescue => handler_ex
|
96
|
+
handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
raise Handled
|
101
|
+
end
|
102
|
+
|
103
|
+
# The local retry support means that any errors that occur within
|
104
|
+
# this block can be associated with the given worker instance.
|
105
|
+
# This is required to support the `sidekiq_retries_exhausted` block.
|
106
|
+
#
|
107
|
+
# Note that any exception from the block is wrapped in the Skip
|
108
|
+
# exception so the global block does not reprocess the error. The
|
109
|
+
# Skip exception is unwrapped within Sidekiq::Processor#process before
|
110
|
+
# calling the handle_exception handlers.
|
111
|
+
def local(worker, jobstr, queue)
|
112
|
+
yield
|
113
|
+
rescue Handled => ex
|
114
|
+
raise ex
|
115
|
+
rescue Sidekiq::Shutdown => ey
|
116
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
117
|
+
raise ey
|
118
|
+
rescue Exception => e
|
119
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
120
|
+
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
121
|
+
|
122
|
+
msg = Sidekiq.load_json(jobstr)
|
123
|
+
if msg["retry"].nil?
|
124
|
+
msg["retry"] = worker.class.get_sidekiq_options["retry"]
|
125
|
+
end
|
126
|
+
|
127
|
+
raise e unless msg["retry"]
|
128
|
+
attempt_retry(worker, msg, queue, e)
|
129
|
+
# We've handled this error associated with this job, don't
|
130
|
+
# need to handle it at the global level
|
131
|
+
raise Skip
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
# Note that +worker+ can be nil here if an error is raised before we can
|
137
|
+
# instantiate the worker instance. All access must be guarded and
|
138
|
+
# best effort.
|
139
|
+
def attempt_retry(worker, msg, queue, exception)
|
140
|
+
max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
|
141
|
+
|
142
|
+
msg["queue"] = (msg["retry_queue"] || queue)
|
143
|
+
|
144
|
+
m = exception_message(exception)
|
145
|
+
if m.respond_to?(:scrub!)
|
146
|
+
m.force_encoding("utf-8")
|
147
|
+
m.scrub!
|
148
|
+
end
|
149
|
+
|
150
|
+
msg["error_message"] = m
|
151
|
+
msg["error_class"] = exception.class.name
|
152
|
+
count = if msg["retry_count"]
|
153
|
+
msg["retried_at"] = Time.now.to_f
|
154
|
+
msg["retry_count"] += 1
|
155
|
+
else
|
156
|
+
msg["failed_at"] = Time.now.to_f
|
157
|
+
msg["retry_count"] = 0
|
158
|
+
end
|
159
|
+
|
160
|
+
if msg["backtrace"]
|
161
|
+
lines = if msg["backtrace"] == true
|
162
|
+
exception.backtrace
|
163
|
+
else
|
164
|
+
exception.backtrace[0...msg["backtrace"].to_i]
|
165
|
+
end
|
166
|
+
|
167
|
+
msg["error_backtrace"] = compress_backtrace(lines)
|
168
|
+
end
|
169
|
+
|
170
|
+
if count < max_retry_attempts
|
171
|
+
delay = delay_for(worker, count, exception)
|
172
|
+
# Logging here can break retries if the logging device raises ENOSPC #3979
|
173
|
+
# logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
174
|
+
retry_at = Time.now.to_f + delay
|
175
|
+
payload = Sidekiq.dump_json(msg)
|
176
|
+
Sidekiq.redis do |conn|
|
177
|
+
conn.zadd("retry", retry_at.to_s, payload)
|
178
|
+
end
|
179
|
+
else
|
180
|
+
# Goodbye dear message, you (re)tried your best I'm sure.
|
181
|
+
retries_exhausted(worker, msg, exception)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def retries_exhausted(worker, msg, exception)
|
186
|
+
begin
|
187
|
+
block = worker&.sidekiq_retries_exhausted_block
|
188
|
+
block&.call(msg, exception)
|
189
|
+
rescue => e
|
190
|
+
handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
|
191
|
+
end
|
192
|
+
|
193
|
+
send_to_morgue(msg) unless msg["dead"] == false
|
194
|
+
|
195
|
+
Sidekiq.death_handlers.each do |handler|
|
196
|
+
handler.call(msg, exception)
|
197
|
+
rescue => e
|
198
|
+
handle_exception(e, {context: "Error calling death handler", job: msg})
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def send_to_morgue(msg)
|
203
|
+
logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
|
204
|
+
payload = Sidekiq.dump_json(msg)
|
205
|
+
DeadSet.new.kill(payload, notify_failure: false)
|
206
|
+
end
|
207
|
+
|
208
|
+
def retry_attempts_from(msg_retry, default)
|
209
|
+
if msg_retry.is_a?(Integer)
|
210
|
+
msg_retry
|
211
|
+
else
|
212
|
+
default
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
def delay_for(worker, count, exception)
|
217
|
+
jitter = rand(10) * (count + 1)
|
218
|
+
if worker&.sidekiq_retry_in_block
|
219
|
+
custom_retry_in = retry_in(worker, count, exception).to_i
|
220
|
+
return custom_retry_in + jitter if custom_retry_in > 0
|
221
|
+
end
|
222
|
+
(count**4) + 15 + jitter
|
223
|
+
end
|
224
|
+
|
225
|
+
def retry_in(worker, count, exception)
|
226
|
+
worker.sidekiq_retry_in_block.call(count, exception)
|
227
|
+
rescue Exception => e
|
228
|
+
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
|
229
|
+
nil
|
230
|
+
end
|
231
|
+
|
232
|
+
def exception_caused_by_shutdown?(e, checked_causes = [])
|
233
|
+
return false unless e.cause
|
234
|
+
|
235
|
+
# Handle circular causes
|
236
|
+
checked_causes << e.object_id
|
237
|
+
return false if checked_causes.include?(e.cause.object_id)
|
238
|
+
|
239
|
+
e.cause.instance_of?(Sidekiq::Shutdown) ||
|
240
|
+
exception_caused_by_shutdown?(e.cause, checked_causes)
|
241
|
+
end
|
242
|
+
|
243
|
+
# Extract message from exception.
|
244
|
+
# Set a default if the message raises an error
|
245
|
+
def exception_message(exception)
|
246
|
+
# App code can stuff all sorts of crazy binary data into the error message
|
247
|
+
# that won't convert to JSON.
|
248
|
+
exception.message.to_s[0, 10_000]
|
249
|
+
rescue
|
250
|
+
+"!!! ERROR MESSAGE THREW AN ERROR !!!"
|
251
|
+
end
|
252
|
+
|
253
|
+
def compress_backtrace(backtrace)
|
254
|
+
serialized = Sidekiq.dump_json(backtrace)
|
255
|
+
compressed = Zlib::Deflate.deflate(serialized)
|
256
|
+
Base64.encode64(compressed)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
data/lib/sidekiq/launcher.rb
CHANGED
@@ -1,20 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
2
|
+
|
3
|
+
require "sidekiq/manager"
|
4
|
+
require "sidekiq/fetch"
|
5
|
+
require "sidekiq/scheduled"
|
6
6
|
|
7
7
|
module Sidekiq
|
8
|
-
# The Launcher
|
9
|
-
# start, monitor and stop the core Actors in Sidekiq.
|
10
|
-
# If any of these actors die, the Sidekiq process exits
|
11
|
-
# immediately.
|
8
|
+
# The Launcher starts the Manager and Poller threads and provides the process heartbeat.
|
12
9
|
class Launcher
|
13
10
|
include Util
|
14
11
|
|
12
|
+
STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
|
13
|
+
|
14
|
+
PROCTITLES = [
|
15
|
+
proc { "sidekiq" },
|
16
|
+
proc { Sidekiq::VERSION },
|
17
|
+
proc { |me, data| data["tag"] },
|
18
|
+
proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
|
19
|
+
proc { |me, data| "stopping" if me.stopping? }
|
20
|
+
]
|
21
|
+
|
15
22
|
attr_accessor :manager, :poller, :fetcher
|
16
23
|
|
17
24
|
def initialize(options)
|
25
|
+
options[:fetch] ||= BasicFetch.new(options)
|
18
26
|
@manager = Sidekiq::Manager.new(options)
|
19
27
|
@poller = Sidekiq::Scheduled::Poller.new
|
20
28
|
@done = false
|
@@ -39,7 +47,7 @@ module Sidekiq
|
|
39
47
|
# return until all work is complete and cleaned up.
|
40
48
|
# It can take up to the timeout to complete.
|
41
49
|
def stop
|
42
|
-
deadline =
|
50
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @options[:timeout]
|
43
51
|
|
44
52
|
@done = true
|
45
53
|
@manager.quiet
|
@@ -49,7 +57,7 @@ module Sidekiq
|
|
49
57
|
|
50
58
|
# Requeue everything in case there was a worker who grabbed work while stopped
|
51
59
|
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
52
|
-
strategy =
|
60
|
+
strategy = @options[:fetch]
|
53
61
|
strategy.bulk_requeue([], @options)
|
54
62
|
|
55
63
|
clear_heartbeat
|
@@ -61,104 +69,195 @@ module Sidekiq
|
|
61
69
|
|
62
70
|
private unless $TESTING
|
63
71
|
|
64
|
-
|
72
|
+
BEAT_PAUSE = 5
|
73
|
+
|
74
|
+
def start_heartbeat
|
75
|
+
loop do
|
76
|
+
heartbeat
|
77
|
+
sleep BEAT_PAUSE
|
78
|
+
end
|
79
|
+
Sidekiq.logger.info("Heartbeat stopping...")
|
80
|
+
end
|
81
|
+
|
82
|
+
def clear_heartbeat
|
83
|
+
# Remove record from Redis since we are shutting down.
|
84
|
+
# Note we don't stop the heartbeat thread; if the process
|
85
|
+
# doesn't actually exit, it'll reappear in the Web UI.
|
86
|
+
Sidekiq.redis do |conn|
|
87
|
+
conn.pipelined do
|
88
|
+
conn.srem("processes", identity)
|
89
|
+
conn.unlink("#{identity}:workers")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
rescue
|
93
|
+
# best effort, ignore network errors
|
94
|
+
end
|
95
|
+
|
96
|
+
def heartbeat
|
97
|
+
$0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
|
98
|
+
|
99
|
+
❤
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.flush_stats
|
103
|
+
fails = Processor::FAILURE.reset
|
104
|
+
procd = Processor::PROCESSED.reset
|
105
|
+
return if fails + procd == 0
|
65
106
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
107
|
+
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
108
|
+
begin
|
109
|
+
Sidekiq.redis do |conn|
|
110
|
+
conn.pipelined do
|
111
|
+
conn.incrby("stat:processed", procd)
|
112
|
+
conn.incrby("stat:processed:#{nowdate}", procd)
|
113
|
+
conn.expire("stat:processed:#{nowdate}", STATS_TTL)
|
70
114
|
|
71
|
-
|
115
|
+
conn.incrby("stat:failed", fails)
|
116
|
+
conn.incrby("stat:failed:#{nowdate}", fails)
|
117
|
+
conn.expire("stat:failed:#{nowdate}", STATS_TTL)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
rescue => ex
|
121
|
+
# we're exiting the process, things might be shut down so don't
|
122
|
+
# try to handle the exception
|
123
|
+
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
124
|
+
end
|
72
125
|
end
|
126
|
+
at_exit(&method(:flush_stats))
|
73
127
|
|
74
|
-
def ❤
|
128
|
+
def ❤
|
129
|
+
key = identity
|
75
130
|
fails = procd = 0
|
131
|
+
|
76
132
|
begin
|
77
|
-
Processor::FAILURE.
|
78
|
-
Processor::PROCESSED.
|
133
|
+
fails = Processor::FAILURE.reset
|
134
|
+
procd = Processor::PROCESSED.reset
|
135
|
+
curstate = Processor::WORKER_STATE.dup
|
136
|
+
|
137
|
+
workers_key = "#{key}:workers"
|
138
|
+
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
79
139
|
|
80
|
-
workers_key = "#{key}:workers".freeze
|
81
|
-
nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
|
82
140
|
Sidekiq.redis do |conn|
|
83
141
|
conn.multi do
|
84
|
-
conn.incrby("stat:processed"
|
142
|
+
conn.incrby("stat:processed", procd)
|
85
143
|
conn.incrby("stat:processed:#{nowdate}", procd)
|
86
|
-
conn.
|
144
|
+
conn.expire("stat:processed:#{nowdate}", STATS_TTL)
|
145
|
+
|
146
|
+
conn.incrby("stat:failed", fails)
|
87
147
|
conn.incrby("stat:failed:#{nowdate}", fails)
|
88
|
-
conn.
|
89
|
-
|
148
|
+
conn.expire("stat:failed:#{nowdate}", STATS_TTL)
|
149
|
+
|
150
|
+
conn.unlink(workers_key)
|
151
|
+
curstate.each_pair do |tid, hash|
|
90
152
|
conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
|
91
153
|
end
|
92
154
|
conn.expire(workers_key, 60)
|
93
155
|
end
|
94
156
|
end
|
157
|
+
|
158
|
+
rtt = check_rtt
|
159
|
+
|
95
160
|
fails = procd = 0
|
161
|
+
kb = memory_usage(::Process.pid)
|
96
162
|
|
97
|
-
_, exists, _, _, msg = Sidekiq.redis
|
98
|
-
conn.multi
|
99
|
-
conn.sadd(
|
100
|
-
conn.exists(key)
|
101
|
-
conn.hmset(key,
|
163
|
+
_, exists, _, _, msg = Sidekiq.redis { |conn|
|
164
|
+
conn.multi {
|
165
|
+
conn.sadd("processes", key)
|
166
|
+
conn.exists?(key)
|
167
|
+
conn.hmset(key, "info", to_json,
|
168
|
+
"busy", curstate.size,
|
169
|
+
"beat", Time.now.to_f,
|
170
|
+
"rtt_us", rtt,
|
171
|
+
"quiet", @done,
|
172
|
+
"rss", kb)
|
102
173
|
conn.expire(key, 60)
|
103
174
|
conn.rpop("#{key}-signals")
|
104
|
-
|
105
|
-
|
175
|
+
}
|
176
|
+
}
|
106
177
|
|
107
178
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
108
|
-
fire_event(:heartbeat)
|
179
|
+
fire_event(:heartbeat) unless exists
|
109
180
|
|
110
181
|
return unless msg
|
111
182
|
|
112
|
-
|
113
|
-
Sidekiq::CLI.instance.handle_signal(msg)
|
114
|
-
else
|
115
|
-
::Process.kill(msg, $$)
|
116
|
-
end
|
183
|
+
::Process.kill(msg, ::Process.pid)
|
117
184
|
rescue => e
|
118
185
|
# ignore all redis/network issues
|
119
|
-
logger.error("heartbeat: #{e
|
186
|
+
logger.error("heartbeat: #{e}")
|
120
187
|
# don't lose the counts if there was a network issue
|
121
|
-
Processor::PROCESSED.
|
122
|
-
Processor::FAILURE.
|
188
|
+
Processor::PROCESSED.incr(procd)
|
189
|
+
Processor::FAILURE.incr(fails)
|
123
190
|
end
|
124
191
|
end
|
125
192
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
'pid' => $$,
|
132
|
-
'tag' => @options[:tag] || '',
|
133
|
-
'concurrency' => @options[:concurrency],
|
134
|
-
'queues' => @options[:queues].uniq,
|
135
|
-
'labels' => @options[:labels],
|
136
|
-
'identity' => k,
|
137
|
-
}
|
138
|
-
# this data doesn't change so dump it to a string
|
139
|
-
# now so we don't need to dump it every heartbeat.
|
140
|
-
json = Sidekiq.dump_json(data)
|
193
|
+
# We run the heartbeat every five seconds.
|
194
|
+
# Capture five samples of RTT, log a warning if each sample
|
195
|
+
# is above our warning threshold.
|
196
|
+
RTT_READINGS = RingBuffer.new(5)
|
197
|
+
RTT_WARNING_LEVEL = 50_000
|
141
198
|
|
142
|
-
|
143
|
-
|
144
|
-
|
199
|
+
def check_rtt
|
200
|
+
a = b = 0
|
201
|
+
Sidekiq.redis do |x|
|
202
|
+
a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
203
|
+
x.ping
|
204
|
+
b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
145
205
|
end
|
146
|
-
|
206
|
+
rtt = b - a
|
207
|
+
RTT_READINGS << rtt
|
208
|
+
# Ideal RTT for Redis is < 1000µs
|
209
|
+
# Workable is < 10,000µs
|
210
|
+
# Log a warning if it's a disaster.
|
211
|
+
if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
|
212
|
+
Sidekiq.logger.warn <<~EOM
|
213
|
+
Your Redis network connection is performing extremely poorly.
|
214
|
+
Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
|
215
|
+
Ensure Redis is running in the same AZ or datacenter as Sidekiq.
|
216
|
+
If these values are close to 100,000, that means your Sidekiq process may be
|
217
|
+
CPU overloaded; see https://github.com/mperham/sidekiq/discussions/5039
|
218
|
+
EOM
|
219
|
+
RTT_READINGS.reset
|
220
|
+
end
|
221
|
+
rtt
|
147
222
|
end
|
148
223
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
conn.srem('processes', identity)
|
156
|
-
conn.del("#{identity}:workers")
|
224
|
+
MEMORY_GRABBER = case RUBY_PLATFORM
|
225
|
+
when /linux/
|
226
|
+
->(pid) {
|
227
|
+
IO.readlines("/proc/#{$$}/status").each do |line|
|
228
|
+
next unless line.start_with?("VmRSS:")
|
229
|
+
break line.split[1].to_i
|
157
230
|
end
|
158
|
-
|
159
|
-
|
160
|
-
|
231
|
+
}
|
232
|
+
when /darwin|bsd/
|
233
|
+
->(pid) {
|
234
|
+
`ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
|
235
|
+
}
|
236
|
+
else
|
237
|
+
->(pid) { 0 }
|
238
|
+
end
|
239
|
+
|
240
|
+
def memory_usage(pid)
|
241
|
+
MEMORY_GRABBER.call(pid)
|
242
|
+
end
|
243
|
+
|
244
|
+
def to_data
|
245
|
+
@data ||= {
|
246
|
+
"hostname" => hostname,
|
247
|
+
"started_at" => Time.now.to_f,
|
248
|
+
"pid" => ::Process.pid,
|
249
|
+
"tag" => @options[:tag] || "",
|
250
|
+
"concurrency" => @options[:concurrency],
|
251
|
+
"queues" => @options[:queues].uniq,
|
252
|
+
"labels" => @options[:labels],
|
253
|
+
"identity" => identity
|
254
|
+
}
|
161
255
|
end
|
162
256
|
|
257
|
+
def to_json
|
258
|
+
# this data changes infrequently so dump it to a string
|
259
|
+
# now so we don't need to dump it every heartbeat.
|
260
|
+
@json ||= Sidekiq.dump_json(to_data)
|
261
|
+
end
|
163
262
|
end
|
164
263
|
end
|