sidekiq 5.2.8 → 6.2.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sidekiq might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Changes.md +248 -0
- data/LICENSE +1 -1
- data/README.md +18 -34
- data/bin/sidekiq +26 -2
- data/bin/sidekiqload +32 -24
- data/bin/sidekiqmon +8 -0
- data/lib/generators/sidekiq/templates/worker_test.rb.erb +1 -1
- data/lib/generators/sidekiq/worker_generator.rb +21 -13
- data/lib/sidekiq/api.rb +310 -249
- data/lib/sidekiq/cli.rb +144 -180
- data/lib/sidekiq/client.rb +64 -48
- data/lib/sidekiq/delay.rb +5 -6
- data/lib/sidekiq/exception_handler.rb +10 -12
- data/lib/sidekiq/extensions/action_mailer.rb +13 -22
- data/lib/sidekiq/extensions/active_record.rb +13 -10
- data/lib/sidekiq/extensions/class_methods.rb +14 -11
- data/lib/sidekiq/extensions/generic_proxy.rb +6 -4
- data/lib/sidekiq/fetch.rb +38 -31
- data/lib/sidekiq/job.rb +8 -0
- data/lib/sidekiq/job_logger.rb +45 -7
- data/lib/sidekiq/job_retry.rb +64 -67
- data/lib/sidekiq/launcher.rb +146 -60
- data/lib/sidekiq/logger.rb +166 -0
- data/lib/sidekiq/manager.rb +11 -13
- data/lib/sidekiq/middleware/chain.rb +20 -8
- data/lib/sidekiq/middleware/i18n.rb +5 -7
- data/lib/sidekiq/monitor.rb +133 -0
- data/lib/sidekiq/paginator.rb +18 -14
- data/lib/sidekiq/processor.rb +71 -70
- data/lib/sidekiq/rails.rb +29 -37
- data/lib/sidekiq/redis_connection.rb +50 -48
- data/lib/sidekiq/scheduled.rb +35 -30
- data/lib/sidekiq/sd_notify.rb +149 -0
- data/lib/sidekiq/systemd.rb +24 -0
- data/lib/sidekiq/testing/inline.rb +2 -1
- data/lib/sidekiq/testing.rb +36 -27
- data/lib/sidekiq/util.rb +45 -16
- data/lib/sidekiq/version.rb +2 -1
- data/lib/sidekiq/web/action.rb +15 -11
- data/lib/sidekiq/web/application.rb +86 -76
- data/lib/sidekiq/web/csrf_protection.rb +180 -0
- data/lib/sidekiq/web/helpers.rb +114 -86
- data/lib/sidekiq/web/router.rb +23 -19
- data/lib/sidekiq/web.rb +61 -105
- data/lib/sidekiq/worker.rb +126 -102
- data/lib/sidekiq.rb +69 -44
- data/sidekiq.gemspec +23 -16
- data/web/assets/images/apple-touch-icon.png +0 -0
- data/web/assets/javascripts/application.js +25 -27
- data/web/assets/javascripts/dashboard.js +4 -23
- data/web/assets/stylesheets/application-dark.css +147 -0
- data/web/assets/stylesheets/application.css +37 -128
- data/web/locales/ar.yml +8 -2
- data/web/locales/de.yml +14 -2
- data/web/locales/en.yml +5 -0
- data/web/locales/es.yml +18 -2
- data/web/locales/fr.yml +10 -3
- data/web/locales/ja.yml +7 -1
- data/web/locales/lt.yml +83 -0
- data/web/locales/pl.yml +4 -4
- data/web/locales/ru.yml +4 -0
- data/web/locales/vi.yml +83 -0
- data/web/views/_job_info.erb +3 -2
- data/web/views/busy.erb +54 -20
- data/web/views/dashboard.erb +14 -6
- data/web/views/dead.erb +3 -3
- data/web/views/layout.erb +2 -0
- data/web/views/morgue.erb +9 -6
- data/web/views/queue.erb +11 -2
- data/web/views/queues.erb +10 -2
- data/web/views/retries.erb +11 -8
- data/web/views/retry.erb +3 -3
- data/web/views/scheduled.erb +5 -2
- metadata +32 -64
- data/.circleci/config.yml +0 -61
- data/.github/contributing.md +0 -32
- data/.github/issue_template.md +0 -11
- data/.gitignore +0 -15
- data/.travis.yml +0 -11
- data/3.0-Upgrade.md +0 -70
- data/4.0-Upgrade.md +0 -53
- data/5.0-Upgrade.md +0 -56
- data/COMM-LICENSE +0 -97
- data/Ent-Changes.md +0 -238
- data/Gemfile +0 -23
- data/Pro-2.0-Upgrade.md +0 -138
- data/Pro-3.0-Upgrade.md +0 -44
- data/Pro-4.0-Upgrade.md +0 -35
- data/Pro-Changes.md +0 -759
- data/Rakefile +0 -9
- data/bin/sidekiqctl +0 -20
- data/code_of_conduct.md +0 -50
- data/lib/sidekiq/core_ext.rb +0 -1
- data/lib/sidekiq/ctl.rb +0 -221
- data/lib/sidekiq/logging.rb +0 -122
- data/lib/sidekiq/middleware/server/active_record.rb +0 -23
data/lib/sidekiq/fetch.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
2
|
+
|
3
|
+
require "sidekiq"
|
3
4
|
|
4
5
|
module Sidekiq
|
5
6
|
class BasicFetch
|
@@ -7,68 +8,60 @@ module Sidekiq
|
|
7
8
|
# can check if the process is shutting down.
|
8
9
|
TIMEOUT = 2
|
9
10
|
|
10
|
-
UnitOfWork = Struct.new(:queue, :job)
|
11
|
+
UnitOfWork = Struct.new(:queue, :job) {
|
11
12
|
def acknowledge
|
12
13
|
# nothing to do
|
13
14
|
end
|
14
15
|
|
15
16
|
def queue_name
|
16
|
-
queue.
|
17
|
+
queue.delete_prefix("queue:")
|
17
18
|
end
|
18
19
|
|
19
20
|
def requeue
|
20
21
|
Sidekiq.redis do |conn|
|
21
|
-
conn.rpush(
|
22
|
+
conn.rpush(queue, job)
|
22
23
|
end
|
23
24
|
end
|
24
|
-
|
25
|
+
}
|
25
26
|
|
26
27
|
def initialize(options)
|
27
|
-
|
28
|
-
@
|
28
|
+
raise ArgumentError, "missing queue list" unless options[:queues]
|
29
|
+
@options = options
|
30
|
+
@strictly_ordered_queues = !!@options[:strict]
|
31
|
+
@queues = @options[:queues].map { |q| "queue:#{q}" }
|
29
32
|
if @strictly_ordered_queues
|
30
|
-
@queues
|
33
|
+
@queues.uniq!
|
31
34
|
@queues << TIMEOUT
|
32
35
|
end
|
33
36
|
end
|
34
37
|
|
35
38
|
def retrieve_work
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# data from the first queue that has pending elements. We
|
43
|
-
# recreate the queue command each time we invoke Redis#brpop
|
44
|
-
# to honor weights and avoid queue starvation.
|
45
|
-
def queues_cmd
|
46
|
-
if @strictly_ordered_queues
|
47
|
-
@queues
|
48
|
-
else
|
49
|
-
queues = @queues.shuffle.uniq
|
50
|
-
queues << TIMEOUT
|
51
|
-
queues
|
39
|
+
qs = queues_cmd
|
40
|
+
# 4825 Sidekiq Pro with all queues paused will return an
|
41
|
+
# empty set of queues with a trailing TIMEOUT value.
|
42
|
+
if qs.size <= 1
|
43
|
+
sleep(TIMEOUT)
|
44
|
+
return nil
|
52
45
|
end
|
53
|
-
end
|
54
46
|
|
47
|
+
work = Sidekiq.redis { |conn| conn.brpop(*qs) }
|
48
|
+
UnitOfWork.new(*work) if work
|
49
|
+
end
|
55
50
|
|
56
|
-
|
57
|
-
# an instance method will make it async to the Fetcher actor
|
58
|
-
def self.bulk_requeue(inprogress, options)
|
51
|
+
def bulk_requeue(inprogress, options)
|
59
52
|
return if inprogress.empty?
|
60
53
|
|
61
54
|
Sidekiq.logger.debug { "Re-queueing terminated jobs" }
|
62
55
|
jobs_to_requeue = {}
|
63
56
|
inprogress.each do |unit_of_work|
|
64
|
-
jobs_to_requeue[unit_of_work.
|
65
|
-
jobs_to_requeue[unit_of_work.
|
57
|
+
jobs_to_requeue[unit_of_work.queue] ||= []
|
58
|
+
jobs_to_requeue[unit_of_work.queue] << unit_of_work.job
|
66
59
|
end
|
67
60
|
|
68
61
|
Sidekiq.redis do |conn|
|
69
62
|
conn.pipelined do
|
70
63
|
jobs_to_requeue.each do |queue, jobs|
|
71
|
-
conn.rpush(
|
64
|
+
conn.rpush(queue, jobs)
|
72
65
|
end
|
73
66
|
end
|
74
67
|
end
|
@@ -77,5 +70,19 @@ module Sidekiq
|
|
77
70
|
Sidekiq.logger.warn("Failed to requeue #{inprogress.size} jobs: #{ex.message}")
|
78
71
|
end
|
79
72
|
|
73
|
+
# Creating the Redis#brpop command takes into account any
|
74
|
+
# configured queue weights. By default Redis#brpop returns
|
75
|
+
# data from the first queue that has pending elements. We
|
76
|
+
# recreate the queue command each time we invoke Redis#brpop
|
77
|
+
# to honor weights and avoid queue starvation.
|
78
|
+
def queues_cmd
|
79
|
+
if @strictly_ordered_queues
|
80
|
+
@queues
|
81
|
+
else
|
82
|
+
queues = @queues.shuffle!.uniq
|
83
|
+
queues << TIMEOUT
|
84
|
+
queues
|
85
|
+
end
|
86
|
+
end
|
80
87
|
end
|
81
88
|
end
|
data/lib/sidekiq/job.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
require "sidekiq/worker"
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
# Sidekiq::Job is a new alias for Sidekiq::Worker, coming in 6.3.0.
|
5
|
+
# You can opt into this by requiring 'sidekiq/job' in your initializer
|
6
|
+
# and then using `include Sidekiq::Job` rather than `Sidekiq::Worker`.
|
7
|
+
Job = Worker
|
8
|
+
end
|
data/lib/sidekiq/job_logger.rb
CHANGED
@@ -1,25 +1,63 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Sidekiq
|
3
4
|
class JobLogger
|
5
|
+
def initialize(logger = Sidekiq.logger)
|
6
|
+
@logger = logger
|
7
|
+
end
|
4
8
|
|
5
9
|
def call(item, queue)
|
6
10
|
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
7
|
-
logger.info("start")
|
11
|
+
@logger.info("start")
|
12
|
+
|
8
13
|
yield
|
9
|
-
|
14
|
+
|
15
|
+
with_elapsed_time_context(start) do
|
16
|
+
@logger.info("done")
|
17
|
+
end
|
10
18
|
rescue Exception
|
11
|
-
|
19
|
+
with_elapsed_time_context(start) do
|
20
|
+
@logger.info("fail")
|
21
|
+
end
|
22
|
+
|
12
23
|
raise
|
13
24
|
end
|
14
25
|
|
26
|
+
def prepare(job_hash, &block)
|
27
|
+
level = job_hash["log_level"]
|
28
|
+
if level
|
29
|
+
@logger.log_at(level) do
|
30
|
+
Sidekiq::Context.with(job_hash_context(job_hash), &block)
|
31
|
+
end
|
32
|
+
else
|
33
|
+
Sidekiq::Context.with(job_hash_context(job_hash), &block)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def job_hash_context(job_hash)
|
38
|
+
# If we're using a wrapper class, like ActiveJob, use the "wrapped"
|
39
|
+
# attribute to expose the underlying thing.
|
40
|
+
h = {
|
41
|
+
class: job_hash["display_class"] || job_hash["wrapped"] || job_hash["class"],
|
42
|
+
jid: job_hash["jid"]
|
43
|
+
}
|
44
|
+
h[:bid] = job_hash["bid"] if job_hash["bid"]
|
45
|
+
h[:tags] = job_hash["tags"] if job_hash["tags"]
|
46
|
+
h
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_elapsed_time_context(start, &block)
|
50
|
+
Sidekiq::Context.with(elapsed_time_context(start), &block)
|
51
|
+
end
|
52
|
+
|
53
|
+
def elapsed_time_context(start)
|
54
|
+
{elapsed: elapsed(start).to_s}
|
55
|
+
end
|
56
|
+
|
15
57
|
private
|
16
58
|
|
17
59
|
def elapsed(start)
|
18
60
|
(::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start).round(3)
|
19
61
|
end
|
20
|
-
|
21
|
-
def logger
|
22
|
-
Sidekiq.logger
|
23
|
-
end
|
24
62
|
end
|
25
63
|
end
|
data/lib/sidekiq/job_retry.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require
|
2
|
+
|
3
|
+
require "sidekiq/scheduled"
|
4
|
+
require "sidekiq/api"
|
5
|
+
|
6
|
+
require "zlib"
|
7
|
+
require "base64"
|
4
8
|
|
5
9
|
module Sidekiq
|
6
10
|
##
|
@@ -57,6 +61,7 @@ module Sidekiq
|
|
57
61
|
#
|
58
62
|
class JobRetry
|
59
63
|
class Handled < ::RuntimeError; end
|
64
|
+
|
60
65
|
class Skip < Handled; end
|
61
66
|
|
62
67
|
include Sidekiq::Util
|
@@ -70,7 +75,7 @@ module Sidekiq
|
|
70
75
|
# The global retry handler requires only the barest of data.
|
71
76
|
# We want to be able to retry as much as possible so we don't
|
72
77
|
# require the worker to be instantiated.
|
73
|
-
def global(
|
78
|
+
def global(jobstr, queue)
|
74
79
|
yield
|
75
80
|
rescue Handled => ex
|
76
81
|
raise ex
|
@@ -81,22 +86,20 @@ module Sidekiq
|
|
81
86
|
# ignore, will be pushed back onto queue during hard_shutdown
|
82
87
|
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
83
88
|
|
84
|
-
|
89
|
+
msg = Sidekiq.load_json(jobstr)
|
90
|
+
if msg["retry"]
|
85
91
|
attempt_retry(nil, msg, queue, e)
|
86
92
|
else
|
87
93
|
Sidekiq.death_handlers.each do |handler|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
handle_exception(handler_ex, { context: "Error calling death handler", job: msg })
|
92
|
-
end
|
94
|
+
handler.call(msg, e)
|
95
|
+
rescue => handler_ex
|
96
|
+
handle_exception(handler_ex, {context: "Error calling death handler", job: msg})
|
93
97
|
end
|
94
98
|
end
|
95
99
|
|
96
100
|
raise Handled
|
97
101
|
end
|
98
102
|
|
99
|
-
|
100
103
|
# The local retry support means that any errors that occur within
|
101
104
|
# this block can be associated with the given worker instance.
|
102
105
|
# This is required to support the `sidekiq_retries_exhausted` block.
|
@@ -105,7 +108,7 @@ module Sidekiq
|
|
105
108
|
# exception so the global block does not reprocess the error. The
|
106
109
|
# Skip exception is unwrapped within Sidekiq::Processor#process before
|
107
110
|
# calling the handle_exception handlers.
|
108
|
-
def local(worker,
|
111
|
+
def local(worker, jobstr, queue)
|
109
112
|
yield
|
110
113
|
rescue Handled => ex
|
111
114
|
raise ex
|
@@ -116,11 +119,12 @@ module Sidekiq
|
|
116
119
|
# ignore, will be pushed back onto queue during hard_shutdown
|
117
120
|
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
118
121
|
|
119
|
-
|
120
|
-
|
122
|
+
msg = Sidekiq.load_json(jobstr)
|
123
|
+
if msg["retry"].nil?
|
124
|
+
msg["retry"] = worker.class.get_sidekiq_options["retry"]
|
121
125
|
end
|
122
126
|
|
123
|
-
raise e unless msg[
|
127
|
+
raise e unless msg["retry"]
|
124
128
|
attempt_retry(worker, msg, queue, e)
|
125
129
|
# We've handled this error associated with this job, don't
|
126
130
|
# need to handle it at the global level
|
@@ -133,13 +137,9 @@ module Sidekiq
|
|
133
137
|
# instantiate the worker instance. All access must be guarded and
|
134
138
|
# best effort.
|
135
139
|
def attempt_retry(worker, msg, queue, exception)
|
136
|
-
max_retry_attempts = retry_attempts_from(msg[
|
140
|
+
max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries)
|
137
141
|
|
138
|
-
msg[
|
139
|
-
msg['retry_queue']
|
140
|
-
else
|
141
|
-
queue
|
142
|
-
end
|
142
|
+
msg["queue"] = (msg["retry_queue"] || queue)
|
143
143
|
|
144
144
|
m = exception_message(exception)
|
145
145
|
if m.respond_to?(:scrub!)
|
@@ -147,32 +147,34 @@ module Sidekiq
|
|
147
147
|
m.scrub!
|
148
148
|
end
|
149
149
|
|
150
|
-
msg[
|
151
|
-
msg[
|
152
|
-
count = if msg[
|
153
|
-
msg[
|
154
|
-
msg[
|
150
|
+
msg["error_message"] = m
|
151
|
+
msg["error_class"] = exception.class.name
|
152
|
+
count = if msg["retry_count"]
|
153
|
+
msg["retried_at"] = Time.now.to_f
|
154
|
+
msg["retry_count"] += 1
|
155
155
|
else
|
156
|
-
msg[
|
157
|
-
msg[
|
156
|
+
msg["failed_at"] = Time.now.to_f
|
157
|
+
msg["retry_count"] = 0
|
158
158
|
end
|
159
159
|
|
160
|
-
if msg[
|
161
|
-
msg[
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
160
|
+
if msg["backtrace"]
|
161
|
+
lines = if msg["backtrace"] == true
|
162
|
+
exception.backtrace
|
163
|
+
else
|
164
|
+
exception.backtrace[0...msg["backtrace"].to_i]
|
165
|
+
end
|
166
|
+
|
167
|
+
msg["error_backtrace"] = compress_backtrace(lines)
|
166
168
|
end
|
167
169
|
|
168
170
|
if count < max_retry_attempts
|
169
171
|
delay = delay_for(worker, count, exception)
|
170
172
|
# Logging here can break retries if the logging device raises ENOSPC #3979
|
171
|
-
#logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
173
|
+
# logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
172
174
|
retry_at = Time.now.to_f + delay
|
173
175
|
payload = Sidekiq.dump_json(msg)
|
174
176
|
Sidekiq.redis do |conn|
|
175
|
-
conn.zadd(
|
177
|
+
conn.zadd("retry", retry_at.to_s, payload)
|
176
178
|
end
|
177
179
|
else
|
178
180
|
# Goodbye dear message, you (re)tried your best I'm sure.
|
@@ -182,25 +184,23 @@ module Sidekiq
|
|
182
184
|
|
183
185
|
def retries_exhausted(worker, msg, exception)
|
184
186
|
begin
|
185
|
-
block = worker
|
186
|
-
block
|
187
|
+
block = worker&.sidekiq_retries_exhausted_block
|
188
|
+
block&.call(msg, exception)
|
187
189
|
rescue => e
|
188
|
-
handle_exception(e, {
|
190
|
+
handle_exception(e, {context: "Error calling retries_exhausted", job: msg})
|
189
191
|
end
|
190
192
|
|
193
|
+
send_to_morgue(msg) unless msg["dead"] == false
|
194
|
+
|
191
195
|
Sidekiq.death_handlers.each do |handler|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
handle_exception(e, { context: "Error calling death handler", job: msg })
|
196
|
-
end
|
196
|
+
handler.call(msg, exception)
|
197
|
+
rescue => e
|
198
|
+
handle_exception(e, {context: "Error calling death handler", job: msg})
|
197
199
|
end
|
198
|
-
|
199
|
-
send_to_morgue(msg) unless msg['dead'] == false
|
200
200
|
end
|
201
201
|
|
202
202
|
def send_to_morgue(msg)
|
203
|
-
logger.info { "Adding dead #{msg[
|
203
|
+
logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" }
|
204
204
|
payload = Sidekiq.dump_json(msg)
|
205
205
|
DeadSet.new.kill(payload, notify_failure: false)
|
206
206
|
end
|
@@ -214,25 +214,19 @@ module Sidekiq
|
|
214
214
|
end
|
215
215
|
|
216
216
|
def delay_for(worker, count, exception)
|
217
|
-
|
217
|
+
jitter = rand(10) * (count + 1)
|
218
|
+
if worker&.sidekiq_retry_in_block
|
218
219
|
custom_retry_in = retry_in(worker, count, exception).to_i
|
219
|
-
return custom_retry_in if custom_retry_in > 0
|
220
|
+
return custom_retry_in + jitter if custom_retry_in > 0
|
220
221
|
end
|
221
|
-
|
222
|
-
end
|
223
|
-
|
224
|
-
# delayed_job uses the same basic formula
|
225
|
-
def seconds_to_delay(count)
|
226
|
-
(count ** 4) + 15 + (rand(30)*(count+1))
|
222
|
+
(count**4) + 15 + jitter
|
227
223
|
end
|
228
224
|
|
229
225
|
def retry_in(worker, count, exception)
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
nil
|
235
|
-
end
|
226
|
+
worker.sidekiq_retry_in_block.call(count, exception)
|
227
|
+
rescue Exception => e
|
228
|
+
handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default"})
|
229
|
+
nil
|
236
230
|
end
|
237
231
|
|
238
232
|
def exception_caused_by_shutdown?(e, checked_causes = [])
|
@@ -249,14 +243,17 @@ module Sidekiq
|
|
249
243
|
# Extract message from exception.
|
250
244
|
# Set a default if the message raises an error
|
251
245
|
def exception_message(exception)
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
"!!! ERROR MESSAGE THREW AN ERROR !!!".dup
|
258
|
-
end
|
246
|
+
# App code can stuff all sorts of crazy binary data into the error message
|
247
|
+
# that won't convert to JSON.
|
248
|
+
exception.message.to_s[0, 10_000]
|
249
|
+
rescue
|
250
|
+
+"!!! ERROR MESSAGE THREW AN ERROR !!!"
|
259
251
|
end
|
260
252
|
|
253
|
+
def compress_backtrace(backtrace)
|
254
|
+
serialized = Sidekiq.dump_json(backtrace)
|
255
|
+
compressed = Zlib::Deflate.deflate(serialized)
|
256
|
+
Base64.encode64(compressed)
|
257
|
+
end
|
261
258
|
end
|
262
259
|
end
|
data/lib/sidekiq/launcher.rb
CHANGED
@@ -1,21 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require
|
4
|
-
require
|
2
|
+
|
3
|
+
require "sidekiq/manager"
|
4
|
+
require "sidekiq/fetch"
|
5
|
+
require "sidekiq/scheduled"
|
5
6
|
|
6
7
|
module Sidekiq
|
7
|
-
# The Launcher
|
8
|
-
# start, monitor and stop the core Actors in Sidekiq.
|
9
|
-
# If any of these actors die, the Sidekiq process exits
|
10
|
-
# immediately.
|
8
|
+
# The Launcher starts the Manager and Poller threads and provides the process heartbeat.
|
11
9
|
class Launcher
|
12
10
|
include Util
|
13
11
|
|
14
|
-
|
12
|
+
STATS_TTL = 5 * 365 * 24 * 60 * 60 # 5 years
|
15
13
|
|
16
|
-
|
14
|
+
PROCTITLES = [
|
15
|
+
proc { "sidekiq" },
|
16
|
+
proc { Sidekiq::VERSION },
|
17
|
+
proc { |me, data| data["tag"] },
|
18
|
+
proc { |me, data| "[#{Processor::WORKER_STATE.size} of #{data["concurrency"]} busy]" },
|
19
|
+
proc { |me, data| "stopping" if me.stopping? }
|
20
|
+
]
|
21
|
+
|
22
|
+
attr_accessor :manager, :poller, :fetcher
|
17
23
|
|
18
24
|
def initialize(options)
|
25
|
+
options[:fetch] ||= BasicFetch.new(options)
|
19
26
|
@manager = Sidekiq::Manager.new(options)
|
20
27
|
@poller = Sidekiq::Scheduled::Poller.new
|
21
28
|
@done = false
|
@@ -50,7 +57,7 @@ module Sidekiq
|
|
50
57
|
|
51
58
|
# Requeue everything in case there was a worker who grabbed work while stopped
|
52
59
|
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
53
|
-
strategy =
|
60
|
+
strategy = @options[:fetch]
|
54
61
|
strategy.bulk_requeue([], @options)
|
55
62
|
|
56
63
|
clear_heartbeat
|
@@ -62,17 +69,64 @@ module Sidekiq
|
|
62
69
|
|
63
70
|
private unless $TESTING
|
64
71
|
|
72
|
+
def start_heartbeat
|
73
|
+
loop do
|
74
|
+
heartbeat
|
75
|
+
sleep 5
|
76
|
+
end
|
77
|
+
Sidekiq.logger.info("Heartbeat stopping...")
|
78
|
+
end
|
79
|
+
|
80
|
+
def clear_heartbeat
|
81
|
+
# Remove record from Redis since we are shutting down.
|
82
|
+
# Note we don't stop the heartbeat thread; if the process
|
83
|
+
# doesn't actually exit, it'll reappear in the Web UI.
|
84
|
+
Sidekiq.redis do |conn|
|
85
|
+
conn.pipelined do
|
86
|
+
conn.srem("processes", identity)
|
87
|
+
conn.unlink("#{identity}:workers")
|
88
|
+
end
|
89
|
+
end
|
90
|
+
rescue
|
91
|
+
# best effort, ignore network errors
|
92
|
+
end
|
93
|
+
|
65
94
|
def heartbeat
|
66
|
-
|
67
|
-
results.compact!
|
68
|
-
$0 = results.join(' ')
|
95
|
+
$0 = PROCTITLES.map { |proc| proc.call(self, to_data) }.compact.join(" ")
|
69
96
|
|
70
97
|
❤
|
71
98
|
end
|
72
99
|
|
100
|
+
def self.flush_stats
|
101
|
+
fails = Processor::FAILURE.reset
|
102
|
+
procd = Processor::PROCESSED.reset
|
103
|
+
return if fails + procd == 0
|
104
|
+
|
105
|
+
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
106
|
+
begin
|
107
|
+
Sidekiq.redis do |conn|
|
108
|
+
conn.pipelined do
|
109
|
+
conn.incrby("stat:processed", procd)
|
110
|
+
conn.incrby("stat:processed:#{nowdate}", procd)
|
111
|
+
conn.expire("stat:processed:#{nowdate}", STATS_TTL)
|
112
|
+
|
113
|
+
conn.incrby("stat:failed", fails)
|
114
|
+
conn.incrby("stat:failed:#{nowdate}", fails)
|
115
|
+
conn.expire("stat:failed:#{nowdate}", STATS_TTL)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
rescue => ex
|
119
|
+
# we're exiting the process, things might be shut down so don't
|
120
|
+
# try to handle the exception
|
121
|
+
Sidekiq.logger.warn("Unable to flush stats: #{ex}")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
at_exit(&method(:flush_stats))
|
125
|
+
|
73
126
|
def ❤
|
74
127
|
key = identity
|
75
128
|
fails = procd = 0
|
129
|
+
|
76
130
|
begin
|
77
131
|
fails = Processor::FAILURE.reset
|
78
132
|
procd = Processor::PROCESSED.reset
|
@@ -80,6 +134,7 @@ module Sidekiq
|
|
80
134
|
|
81
135
|
workers_key = "#{key}:workers"
|
82
136
|
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
137
|
+
|
83
138
|
Sidekiq.redis do |conn|
|
84
139
|
conn.multi do
|
85
140
|
conn.incrby("stat:processed", procd)
|
@@ -90,84 +145,115 @@ module Sidekiq
|
|
90
145
|
conn.incrby("stat:failed:#{nowdate}", fails)
|
91
146
|
conn.expire("stat:failed:#{nowdate}", STATS_TTL)
|
92
147
|
|
93
|
-
conn.
|
148
|
+
conn.unlink(workers_key)
|
94
149
|
curstate.each_pair do |tid, hash|
|
95
150
|
conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
|
96
151
|
end
|
97
152
|
conn.expire(workers_key, 60)
|
98
153
|
end
|
99
154
|
end
|
155
|
+
|
156
|
+
rtt = check_rtt
|
157
|
+
|
100
158
|
fails = procd = 0
|
159
|
+
kb = memory_usage(::Process.pid)
|
101
160
|
|
102
|
-
_, exists, _, _, msg = Sidekiq.redis
|
103
|
-
conn.multi
|
104
|
-
conn.sadd(
|
105
|
-
conn.exists(key)
|
106
|
-
conn.hmset(key,
|
161
|
+
_, exists, _, _, msg = Sidekiq.redis { |conn|
|
162
|
+
conn.multi {
|
163
|
+
conn.sadd("processes", key)
|
164
|
+
conn.exists?(key)
|
165
|
+
conn.hmset(key, "info", to_json,
|
166
|
+
"busy", curstate.size,
|
167
|
+
"beat", Time.now.to_f,
|
168
|
+
"rtt_us", rtt,
|
169
|
+
"quiet", @done,
|
170
|
+
"rss", kb)
|
107
171
|
conn.expire(key, 60)
|
108
172
|
conn.rpop("#{key}-signals")
|
109
|
-
|
110
|
-
|
173
|
+
}
|
174
|
+
}
|
111
175
|
|
112
176
|
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
113
|
-
fire_event(:heartbeat)
|
177
|
+
fire_event(:heartbeat) unless exists
|
114
178
|
|
115
179
|
return unless msg
|
116
180
|
|
117
|
-
::Process.kill(msg,
|
181
|
+
::Process.kill(msg, ::Process.pid)
|
118
182
|
rescue => e
|
119
183
|
# ignore all redis/network issues
|
120
|
-
logger.error("heartbeat: #{e
|
184
|
+
logger.error("heartbeat: #{e}")
|
121
185
|
# don't lose the counts if there was a network issue
|
122
186
|
Processor::PROCESSED.incr(procd)
|
123
187
|
Processor::FAILURE.incr(fails)
|
124
188
|
end
|
125
189
|
end
|
126
190
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
191
|
+
# We run the heartbeat every five seconds.
|
192
|
+
# Capture five samples of RTT, log a warning if each sample
|
193
|
+
# is above our warning threshold.
|
194
|
+
RTT_READINGS = RingBuffer.new(5)
|
195
|
+
RTT_WARNING_LEVEL = 50_000
|
196
|
+
|
197
|
+
def check_rtt
|
198
|
+
a = b = 0
|
199
|
+
Sidekiq.redis do |x|
|
200
|
+
a = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
201
|
+
x.ping
|
202
|
+
b = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :microsecond)
|
131
203
|
end
|
132
|
-
|
204
|
+
rtt = b - a
|
205
|
+
RTT_READINGS << rtt
|
206
|
+
# Ideal RTT for Redis is < 1000µs
|
207
|
+
# Workable is < 10,000µs
|
208
|
+
# Log a warning if it's a disaster.
|
209
|
+
if RTT_READINGS.all? { |x| x > RTT_WARNING_LEVEL }
|
210
|
+
Sidekiq.logger.warn <<~EOM
|
211
|
+
Your Redis network connection is performing extremely poorly.
|
212
|
+
Last RTT readings were #{RTT_READINGS.buffer.inspect}, ideally these should be < 1000.
|
213
|
+
Ensure Redis is running in the same AZ or datacenter as Sidekiq.
|
214
|
+
EOM
|
215
|
+
RTT_READINGS.reset
|
216
|
+
end
|
217
|
+
rtt
|
133
218
|
end
|
134
219
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
220
|
+
MEMORY_GRABBER = case RUBY_PLATFORM
|
221
|
+
when /linux/
|
222
|
+
->(pid) {
|
223
|
+
IO.readlines("/proc/#{$$}/status").each do |line|
|
224
|
+
next unless line.start_with?("VmRSS:")
|
225
|
+
break line.split[1].to_i
|
226
|
+
end
|
227
|
+
}
|
228
|
+
when /darwin|bsd/
|
229
|
+
->(pid) {
|
230
|
+
`ps -o pid,rss -p #{pid}`.lines.last.split.last.to_i
|
231
|
+
}
|
232
|
+
else
|
233
|
+
->(pid) { 0 }
|
148
234
|
end
|
149
235
|
|
150
|
-
def
|
151
|
-
|
152
|
-
# this data changes infrequently so dump it to a string
|
153
|
-
# now so we don't need to dump it every heartbeat.
|
154
|
-
Sidekiq.dump_json(to_data)
|
155
|
-
end
|
236
|
+
def memory_usage(pid)
|
237
|
+
MEMORY_GRABBER.call(pid)
|
156
238
|
end
|
157
239
|
|
158
|
-
def
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
# best effort, ignore network errors
|
240
|
+
def to_data
|
241
|
+
@data ||= {
|
242
|
+
"hostname" => hostname,
|
243
|
+
"started_at" => Time.now.to_f,
|
244
|
+
"pid" => ::Process.pid,
|
245
|
+
"tag" => @options[:tag] || "",
|
246
|
+
"concurrency" => @options[:concurrency],
|
247
|
+
"queues" => @options[:queues].uniq,
|
248
|
+
"labels" => @options[:labels],
|
249
|
+
"identity" => identity
|
250
|
+
}
|
170
251
|
end
|
171
252
|
|
253
|
+
def to_json
|
254
|
+
# this data changes infrequently so dump it to a string
|
255
|
+
# now so we don't need to dump it every heartbeat.
|
256
|
+
@json ||= Sidekiq.dump_json(to_data)
|
257
|
+
end
|
172
258
|
end
|
173
259
|
end
|