sidekiq_cleaner 5.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +61 -0
- data/.github/contributing.md +32 -0
- data/.github/issue_template.md +11 -0
- data/.gitignore +15 -0
- data/.travis.yml +11 -0
- data/3.0-Upgrade.md +70 -0
- data/4.0-Upgrade.md +53 -0
- data/5.0-Upgrade.md +56 -0
- data/COMM-LICENSE +97 -0
- data/Changes.md +1536 -0
- data/Ent-Changes.md +238 -0
- data/Gemfile +23 -0
- data/LICENSE +9 -0
- data/Pro-2.0-Upgrade.md +138 -0
- data/Pro-3.0-Upgrade.md +44 -0
- data/Pro-4.0-Upgrade.md +35 -0
- data/Pro-Changes.md +759 -0
- data/README.md +55 -0
- data/Rakefile +9 -0
- data/bin/sidekiq +18 -0
- data/bin/sidekiqctl +20 -0
- data/bin/sidekiqload +149 -0
- data/cleaner/assets/images/favicon.ico +0 -0
- data/cleaner/assets/images/logo.png +0 -0
- data/cleaner/assets/images/status.png +0 -0
- data/cleaner/assets/javascripts/application.js +172 -0
- data/cleaner/assets/javascripts/dashboard.js +315 -0
- data/cleaner/assets/stylesheets/application-rtl.css +246 -0
- data/cleaner/assets/stylesheets/application.css +1144 -0
- data/cleaner/assets/stylesheets/bootstrap-rtl.min.css +9 -0
- data/cleaner/assets/stylesheets/bootstrap.css +5 -0
- data/cleaner/locales/ar.yml +81 -0
- data/cleaner/locales/cs.yml +78 -0
- data/cleaner/locales/da.yml +68 -0
- data/cleaner/locales/de.yml +69 -0
- data/cleaner/locales/el.yml +68 -0
- data/cleaner/locales/en.yml +81 -0
- data/cleaner/locales/es.yml +70 -0
- data/cleaner/locales/fa.yml +80 -0
- data/cleaner/locales/fr.yml +78 -0
- data/cleaner/locales/he.yml +79 -0
- data/cleaner/locales/hi.yml +75 -0
- data/cleaner/locales/it.yml +69 -0
- data/cleaner/locales/ja.yml +80 -0
- data/cleaner/locales/ko.yml +68 -0
- data/cleaner/locales/nb.yml +77 -0
- data/cleaner/locales/nl.yml +68 -0
- data/cleaner/locales/pl.yml +59 -0
- data/cleaner/locales/pt-br.yml +68 -0
- data/cleaner/locales/pt.yml +67 -0
- data/cleaner/locales/ru.yml +78 -0
- data/cleaner/locales/sv.yml +68 -0
- data/cleaner/locales/ta.yml +75 -0
- data/cleaner/locales/uk.yml +76 -0
- data/cleaner/locales/ur.yml +80 -0
- data/cleaner/locales/zh-cn.yml +68 -0
- data/cleaner/locales/zh-tw.yml +68 -0
- data/cleaner/views/_footer.erb +20 -0
- data/cleaner/views/_job_info.erb +88 -0
- data/cleaner/views/_nav.erb +52 -0
- data/cleaner/views/_paging.erb +23 -0
- data/cleaner/views/_poll_link.erb +7 -0
- data/cleaner/views/_status.erb +4 -0
- data/cleaner/views/_summary.erb +40 -0
- data/cleaner/views/busy.erb +98 -0
- data/cleaner/views/dashboard.erb +75 -0
- data/cleaner/views/dead.erb +34 -0
- data/cleaner/views/errors.erb +84 -0
- data/cleaner/views/layout.erb +40 -0
- data/cleaner/views/morgue.erb +75 -0
- data/cleaner/views/queue.erb +46 -0
- data/cleaner/views/queues.erb +30 -0
- data/cleaner/views/retries.erb +80 -0
- data/cleaner/views/retry.erb +34 -0
- data/cleaner/views/scheduled.erb +54 -0
- data/cleaner/views/scheduled_job_info.erb +8 -0
- data/cleaner-stats.png +0 -0
- data/cleaner.png +0 -0
- data/code_of_conduct.md +50 -0
- data/lib/generators/sidekiq/templates/worker.rb.erb +9 -0
- data/lib/generators/sidekiq/templates/worker_spec.rb.erb +6 -0
- data/lib/generators/sidekiq/templates/worker_test.rb.erb +8 -0
- data/lib/generators/sidekiq/worker_generator.rb +49 -0
- data/lib/sidekiq/api.rb +940 -0
- data/lib/sidekiq/cleaner/action.rb +89 -0
- data/lib/sidekiq/cleaner/application.rb +385 -0
- data/lib/sidekiq/cleaner/helpers.rb +325 -0
- data/lib/sidekiq/cleaner/router.rb +100 -0
- data/lib/sidekiq/cleaner.rb +214 -0
- data/lib/sidekiq/cli.rb +445 -0
- data/lib/sidekiq/client.rb +243 -0
- data/lib/sidekiq/core_ext.rb +1 -0
- data/lib/sidekiq/ctl.rb +221 -0
- data/lib/sidekiq/delay.rb +42 -0
- data/lib/sidekiq/exception_handler.rb +29 -0
- data/lib/sidekiq/extensions/action_mailer.rb +57 -0
- data/lib/sidekiq/extensions/active_record.rb +40 -0
- data/lib/sidekiq/extensions/class_methods.rb +40 -0
- data/lib/sidekiq/extensions/generic_proxy.rb +31 -0
- data/lib/sidekiq/fetch.rb +81 -0
- data/lib/sidekiq/job_logger.rb +25 -0
- data/lib/sidekiq/job_retry.rb +262 -0
- data/lib/sidekiq/launcher.rb +173 -0
- data/lib/sidekiq/logging.rb +122 -0
- data/lib/sidekiq/manager.rb +137 -0
- data/lib/sidekiq/middleware/chain.rb +150 -0
- data/lib/sidekiq/middleware/i18n.rb +42 -0
- data/lib/sidekiq/middleware/server/active_record.rb +23 -0
- data/lib/sidekiq/paginator.rb +43 -0
- data/lib/sidekiq/processor.rb +279 -0
- data/lib/sidekiq/rails.rb +58 -0
- data/lib/sidekiq/redis_connection.rb +144 -0
- data/lib/sidekiq/scheduled.rb +174 -0
- data/lib/sidekiq/testing/inline.rb +29 -0
- data/lib/sidekiq/testing.rb +333 -0
- data/lib/sidekiq/util.rb +66 -0
- data/lib/sidekiq/version.rb +4 -0
- data/lib/sidekiq/worker.rb +220 -0
- data/lib/sidekiq.rb +237 -0
- data/sidekiq_cleaner.gemspec +21 -0
- metadata +235 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require 'sidekiq/scheduled'
|
|
3
|
+
require 'sidekiq/api'
|
|
4
|
+
|
|
5
|
+
module Sidekiq
|
|
6
|
+
##
|
|
7
|
+
# Automatically retry jobs that fail in Sidekiq.
|
|
8
|
+
# Sidekiq's retry support assumes a typical development lifecycle:
|
|
9
|
+
#
|
|
10
|
+
# 0. Push some code changes with a bug in it.
|
|
11
|
+
# 1. Bug causes job processing to fail, Sidekiq's middleware captures
|
|
12
|
+
# the job and pushes it onto a retry queue.
|
|
13
|
+
# 2. Sidekiq retries jobs in the retry queue multiple times with
|
|
14
|
+
# an exponential delay, the job continues to fail.
|
|
15
|
+
# 3. After a few days, a developer deploys a fix. The job is
|
|
16
|
+
# reprocessed successfully.
|
|
17
|
+
# 4. Once retries are exhausted, Sidekiq will give up and move the
|
|
18
|
+
# job to the Dead Job Queue (aka morgue) where it must be dealt with
|
|
19
|
+
# manually in the Web UI.
|
|
20
|
+
# 5. After 6 months on the DJQ, Sidekiq will discard the job.
|
|
21
|
+
#
|
|
22
|
+
# A job looks like:
|
|
23
|
+
#
|
|
24
|
+
# { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => true }
|
|
25
|
+
#
|
|
26
|
+
# The 'retry' option also accepts a number (in place of 'true'):
|
|
27
|
+
#
|
|
28
|
+
# { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => 5 }
|
|
29
|
+
#
|
|
30
|
+
# The job will be retried this number of times before giving up. (If simply
|
|
31
|
+
# 'true', Sidekiq retries 25 times)
|
|
32
|
+
#
|
|
33
|
+
# We'll add a bit more data to the job to support retries:
|
|
34
|
+
#
|
|
35
|
+
# * 'queue' - the queue to use
|
|
36
|
+
# * 'retry_count' - number of times we've retried so far.
|
|
37
|
+
# * 'error_message' - the message from the exception
|
|
38
|
+
# * 'error_class' - the exception class
|
|
39
|
+
# * 'failed_at' - the first time it failed
|
|
40
|
+
# * 'retried_at' - the last time it was retried
|
|
41
|
+
# * 'backtrace' - the number of lines of error backtrace to store
|
|
42
|
+
#
|
|
43
|
+
# We don't store the backtrace by default as that can add a lot of overhead
|
|
44
|
+
# to the job and everyone is using an error service, right?
|
|
45
|
+
#
|
|
46
|
+
# The default number of retries is 25 which works out to about 3 weeks
|
|
47
|
+
# You can change the default maximum number of retries in your initializer:
|
|
48
|
+
#
|
|
49
|
+
# Sidekiq.options[:max_retries] = 7
|
|
50
|
+
#
|
|
51
|
+
# or limit the number of retries for a particular worker with:
|
|
52
|
+
#
|
|
53
|
+
# class MyWorker
|
|
54
|
+
# include Sidekiq::Worker
|
|
55
|
+
# sidekiq_options :retry => 10
|
|
56
|
+
# end
|
|
57
|
+
#
|
|
58
|
+
class JobRetry
|
|
59
|
+
class Handled < ::RuntimeError; end
|
|
60
|
+
class Skip < Handled; end
|
|
61
|
+
|
|
62
|
+
include Sidekiq::Util
|
|
63
|
+
|
|
64
|
+
DEFAULT_MAX_RETRY_ATTEMPTS = 25
|
|
65
|
+
|
|
66
|
+
def initialize(options = {})
|
|
67
|
+
@max_retries = Sidekiq.options.merge(options).fetch(:max_retries, DEFAULT_MAX_RETRY_ATTEMPTS)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# The global retry handler requires only the barest of data.
|
|
71
|
+
# We want to be able to retry as much as possible so we don't
|
|
72
|
+
# require the worker to be instantiated.
|
|
73
|
+
def global(msg, queue)
|
|
74
|
+
yield
|
|
75
|
+
rescue Handled => ex
|
|
76
|
+
raise ex
|
|
77
|
+
rescue Sidekiq::Shutdown => ey
|
|
78
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
|
79
|
+
raise ey
|
|
80
|
+
rescue Exception => e
|
|
81
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
|
82
|
+
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
|
83
|
+
|
|
84
|
+
if msg['retry']
|
|
85
|
+
attempt_retry(nil, msg, queue, e)
|
|
86
|
+
else
|
|
87
|
+
Sidekiq.death_handlers.each do |handler|
|
|
88
|
+
begin
|
|
89
|
+
handler.call(msg, e)
|
|
90
|
+
rescue => handler_ex
|
|
91
|
+
handle_exception(handler_ex, { context: "Error calling death handler", job: msg })
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
raise Handled
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# The local retry support means that any errors that occur within
|
|
101
|
+
# this block can be associated with the given worker instance.
|
|
102
|
+
# This is required to support the `sidekiq_retries_exhausted` block.
|
|
103
|
+
#
|
|
104
|
+
# Note that any exception from the block is wrapped in the Skip
|
|
105
|
+
# exception so the global block does not reprocess the error. The
|
|
106
|
+
# Skip exception is unwrapped within Sidekiq::Processor#process before
|
|
107
|
+
# calling the handle_exception handlers.
|
|
108
|
+
def local(worker, msg, queue)
|
|
109
|
+
yield
|
|
110
|
+
rescue Handled => ex
|
|
111
|
+
raise ex
|
|
112
|
+
rescue Sidekiq::Shutdown => ey
|
|
113
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
|
114
|
+
raise ey
|
|
115
|
+
rescue Exception => e
|
|
116
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
|
117
|
+
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
|
118
|
+
|
|
119
|
+
if msg['retry'] == nil
|
|
120
|
+
msg['retry'] = worker.class.get_sidekiq_options['retry']
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
raise e unless msg['retry']
|
|
124
|
+
attempt_retry(worker, msg, queue, e)
|
|
125
|
+
# We've handled this error associated with this job, don't
|
|
126
|
+
# need to handle it at the global level
|
|
127
|
+
raise Skip
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private
|
|
131
|
+
|
|
132
|
+
# Note that +worker+ can be nil here if an error is raised before we can
|
|
133
|
+
# instantiate the worker instance. All access must be guarded and
|
|
134
|
+
# best effort.
|
|
135
|
+
def attempt_retry(worker, msg, queue, exception)
|
|
136
|
+
max_retry_attempts = retry_attempts_from(msg['retry'], @max_retries)
|
|
137
|
+
|
|
138
|
+
msg['queue'] = if msg['retry_queue']
|
|
139
|
+
msg['retry_queue']
|
|
140
|
+
else
|
|
141
|
+
queue
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
m = exception_message(exception)
|
|
145
|
+
if m.respond_to?(:scrub!)
|
|
146
|
+
m.force_encoding("utf-8")
|
|
147
|
+
m.scrub!
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
msg['error_message'] = m
|
|
151
|
+
msg['error_class'] = exception.class.name
|
|
152
|
+
count = if msg['retry_count']
|
|
153
|
+
msg['retried_at'] = Time.now.to_f
|
|
154
|
+
msg['retry_count'] += 1
|
|
155
|
+
else
|
|
156
|
+
msg['failed_at'] = Time.now.to_f
|
|
157
|
+
msg['retry_count'] = 0
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
if msg['backtrace'] == true
|
|
161
|
+
msg['error_backtrace'] = exception.backtrace
|
|
162
|
+
elsif !msg['backtrace']
|
|
163
|
+
# do nothing
|
|
164
|
+
elsif msg['backtrace'].to_i != 0
|
|
165
|
+
msg['error_backtrace'] = exception.backtrace[0...msg['backtrace'].to_i]
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
if count < max_retry_attempts
|
|
169
|
+
delay = delay_for(worker, count, exception)
|
|
170
|
+
# Logging here can break retries if the logging device raises ENOSPC #3979
|
|
171
|
+
#logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
|
172
|
+
retry_at = Time.now.to_f + delay
|
|
173
|
+
payload = Sidekiq.dump_json(msg)
|
|
174
|
+
Sidekiq.redis do |conn|
|
|
175
|
+
conn.zadd('retry', retry_at.to_s, payload)
|
|
176
|
+
end
|
|
177
|
+
else
|
|
178
|
+
# Goodbye dear message, you (re)tried your best I'm sure.
|
|
179
|
+
retries_exhausted(worker, msg, exception)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def retries_exhausted(worker, msg, exception)
|
|
184
|
+
begin
|
|
185
|
+
block = worker && worker.sidekiq_retries_exhausted_block
|
|
186
|
+
block.call(msg, exception) if block
|
|
187
|
+
rescue => e
|
|
188
|
+
handle_exception(e, { context: "Error calling retries_exhausted", job: msg })
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
Sidekiq.death_handlers.each do |handler|
|
|
192
|
+
begin
|
|
193
|
+
handler.call(msg, exception)
|
|
194
|
+
rescue => e
|
|
195
|
+
handle_exception(e, { context: "Error calling death handler", job: msg })
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
send_to_morgue(msg) unless msg['dead'] == false
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def send_to_morgue(msg)
|
|
203
|
+
logger.info { "Adding dead #{msg['class']} job #{msg['jid']}" }
|
|
204
|
+
payload = Sidekiq.dump_json(msg)
|
|
205
|
+
DeadSet.new.kill(payload, notify_failure: false)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def retry_attempts_from(msg_retry, default)
|
|
209
|
+
if msg_retry.is_a?(Integer)
|
|
210
|
+
msg_retry
|
|
211
|
+
else
|
|
212
|
+
default
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def delay_for(worker, count, exception)
|
|
217
|
+
if worker && worker.sidekiq_retry_in_block
|
|
218
|
+
custom_retry_in = retry_in(worker, count, exception).to_i
|
|
219
|
+
return custom_retry_in if custom_retry_in > 0
|
|
220
|
+
end
|
|
221
|
+
seconds_to_delay(count)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# delayed_job uses the same basic formula
|
|
225
|
+
def seconds_to_delay(count)
|
|
226
|
+
(count ** 4) + 15 + (rand(30)*(count+1))
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def retry_in(worker, count, exception)
|
|
230
|
+
begin
|
|
231
|
+
worker.sidekiq_retry_in_block.call(count, exception)
|
|
232
|
+
rescue Exception => e
|
|
233
|
+
handle_exception(e, { context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default" })
|
|
234
|
+
nil
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def exception_caused_by_shutdown?(e, checked_causes = [])
|
|
239
|
+
return false unless e.cause
|
|
240
|
+
|
|
241
|
+
# Handle circular causes
|
|
242
|
+
checked_causes << e.object_id
|
|
243
|
+
return false if checked_causes.include?(e.cause.object_id)
|
|
244
|
+
|
|
245
|
+
e.cause.instance_of?(Sidekiq::Shutdown) ||
|
|
246
|
+
exception_caused_by_shutdown?(e.cause, checked_causes)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Extract message from exception.
|
|
250
|
+
# Set a default if the message raises an error
|
|
251
|
+
def exception_message(exception)
|
|
252
|
+
begin
|
|
253
|
+
# App code can stuff all sorts of crazy binary data into the error message
|
|
254
|
+
# that won't convert to JSON.
|
|
255
|
+
exception.message.to_s[0, 10_000]
|
|
256
|
+
rescue
|
|
257
|
+
"!!! ERROR MESSAGE THREW AN ERROR !!!".dup
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
end
|
|
262
|
+
end
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require 'sidekiq/manager'
|
|
3
|
+
require 'sidekiq/fetch'
|
|
4
|
+
require 'sidekiq/scheduled'
|
|
5
|
+
|
|
6
|
+
module Sidekiq
|
|
7
|
+
# The Launcher is a very simple Actor whose job is to
|
|
8
|
+
# start, monitor and stop the core Actors in Sidekiq.
|
|
9
|
+
# If any of these actors die, the Sidekiq process exits
|
|
10
|
+
# immediately.
|
|
11
|
+
class Launcher
|
|
12
|
+
include Util
|
|
13
|
+
|
|
14
|
+
attr_accessor :manager, :poller, :fetcher
|
|
15
|
+
|
|
16
|
+
STATS_TTL = 5*365*24*60*60
|
|
17
|
+
|
|
18
|
+
def initialize(options)
|
|
19
|
+
@manager = Sidekiq::Manager.new(options)
|
|
20
|
+
@poller = Sidekiq::Scheduled::Poller.new
|
|
21
|
+
@done = false
|
|
22
|
+
@options = options
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def run
|
|
26
|
+
@thread = safe_thread("heartbeat", &method(:start_heartbeat))
|
|
27
|
+
@poller.start
|
|
28
|
+
@manager.start
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Stops this instance from processing any more jobs,
|
|
32
|
+
#
|
|
33
|
+
def quiet
|
|
34
|
+
@done = true
|
|
35
|
+
@manager.quiet
|
|
36
|
+
@poller.terminate
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Shuts down the process. This method does not
|
|
40
|
+
# return until all work is complete and cleaned up.
|
|
41
|
+
# It can take up to the timeout to complete.
|
|
42
|
+
def stop
|
|
43
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @options[:timeout]
|
|
44
|
+
|
|
45
|
+
@done = true
|
|
46
|
+
@manager.quiet
|
|
47
|
+
@poller.terminate
|
|
48
|
+
|
|
49
|
+
@manager.stop(deadline)
|
|
50
|
+
|
|
51
|
+
# Requeue everything in case there was a worker who grabbed work while stopped
|
|
52
|
+
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
|
53
|
+
strategy = (@options[:fetch] || Sidekiq::BasicFetch)
|
|
54
|
+
strategy.bulk_requeue([], @options)
|
|
55
|
+
|
|
56
|
+
clear_heartbeat
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def stopping?
|
|
60
|
+
@done
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private unless $TESTING
|
|
64
|
+
|
|
65
|
+
def heartbeat
|
|
66
|
+
results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, to_data) }
|
|
67
|
+
results.compact!
|
|
68
|
+
$0 = results.join(' ')
|
|
69
|
+
|
|
70
|
+
❤
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def ❤
|
|
74
|
+
key = identity
|
|
75
|
+
fails = procd = 0
|
|
76
|
+
begin
|
|
77
|
+
fails = Processor::FAILURE.reset
|
|
78
|
+
procd = Processor::PROCESSED.reset
|
|
79
|
+
curstate = Processor::WORKER_STATE.dup
|
|
80
|
+
|
|
81
|
+
workers_key = "#{key}:workers"
|
|
82
|
+
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
|
83
|
+
Sidekiq.redis do |conn|
|
|
84
|
+
conn.multi do
|
|
85
|
+
conn.incrby("stat:processed", procd)
|
|
86
|
+
conn.incrby("stat:processed:#{nowdate}", procd)
|
|
87
|
+
conn.expire("stat:processed:#{nowdate}", STATS_TTL)
|
|
88
|
+
|
|
89
|
+
conn.incrby("stat:failed", fails)
|
|
90
|
+
conn.incrby("stat:failed:#{nowdate}", fails)
|
|
91
|
+
conn.expire("stat:failed:#{nowdate}", STATS_TTL)
|
|
92
|
+
|
|
93
|
+
conn.del(workers_key)
|
|
94
|
+
curstate.each_pair do |tid, hash|
|
|
95
|
+
conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
|
|
96
|
+
end
|
|
97
|
+
conn.expire(workers_key, 60)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
fails = procd = 0
|
|
101
|
+
|
|
102
|
+
_, exists, _, _, msg = Sidekiq.redis do |conn|
|
|
103
|
+
conn.multi do
|
|
104
|
+
conn.sadd('processes', key)
|
|
105
|
+
conn.exists(key)
|
|
106
|
+
conn.hmset(key, 'info', to_json, 'busy', curstate.size, 'beat', Time.now.to_f, 'quiet', @done)
|
|
107
|
+
conn.expire(key, 60)
|
|
108
|
+
conn.rpop("#{key}-signals")
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
|
113
|
+
fire_event(:heartbeat) if !exists
|
|
114
|
+
|
|
115
|
+
return unless msg
|
|
116
|
+
|
|
117
|
+
::Process.kill(msg, $$)
|
|
118
|
+
rescue => e
|
|
119
|
+
# ignore all redis/network issues
|
|
120
|
+
logger.error("heartbeat: #{e.message}")
|
|
121
|
+
# don't lose the counts if there was a network issue
|
|
122
|
+
Processor::PROCESSED.incr(procd)
|
|
123
|
+
Processor::FAILURE.incr(fails)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def start_heartbeat
|
|
128
|
+
while true
|
|
129
|
+
heartbeat
|
|
130
|
+
sleep 5
|
|
131
|
+
end
|
|
132
|
+
Sidekiq.logger.info("Heartbeat stopping...")
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def to_data
|
|
136
|
+
@data ||= begin
|
|
137
|
+
{
|
|
138
|
+
'hostname' => hostname,
|
|
139
|
+
'started_at' => Time.now.to_f,
|
|
140
|
+
'pid' => $$,
|
|
141
|
+
'tag' => @options[:tag] || '',
|
|
142
|
+
'concurrency' => @options[:concurrency],
|
|
143
|
+
'queues' => @options[:queues].uniq,
|
|
144
|
+
'labels' => @options[:labels],
|
|
145
|
+
'identity' => identity,
|
|
146
|
+
}
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def to_json
|
|
151
|
+
@json ||= begin
|
|
152
|
+
# this data changes infrequently so dump it to a string
|
|
153
|
+
# now so we don't need to dump it every heartbeat.
|
|
154
|
+
Sidekiq.dump_json(to_data)
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def clear_heartbeat
|
|
159
|
+
# Remove record from Redis since we are shutting down.
|
|
160
|
+
# Note we don't stop the heartbeat thread; if the process
|
|
161
|
+
# doesn't actually exit, it'll reappear in the Web UI.
|
|
162
|
+
Sidekiq.redis do |conn|
|
|
163
|
+
conn.pipelined do
|
|
164
|
+
conn.srem('processes', identity)
|
|
165
|
+
conn.del("#{identity}:workers")
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
rescue
|
|
169
|
+
# best effort, ignore network errors
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
end
|
|
173
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require 'time'
|
|
3
|
+
require 'logger'
|
|
4
|
+
require 'fcntl'
|
|
5
|
+
|
|
6
|
+
module Sidekiq
|
|
7
|
+
module Logging
|
|
8
|
+
|
|
9
|
+
class Pretty < Logger::Formatter
|
|
10
|
+
SPACE = " "
|
|
11
|
+
|
|
12
|
+
# Provide a call() method that returns the formatted message.
|
|
13
|
+
def call(severity, time, program_name, message)
|
|
14
|
+
"#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Sidekiq::Logging.tid}#{context} #{severity}: #{message}\n"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def context
|
|
18
|
+
c = Thread.current[:sidekiq_context]
|
|
19
|
+
" #{c.join(SPACE)}" if c && c.any?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
class WithoutTimestamp < Pretty
|
|
24
|
+
def call(severity, time, program_name, message)
|
|
25
|
+
"#{::Process.pid} TID-#{Sidekiq::Logging.tid}#{context} #{severity}: #{message}\n"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.tid
|
|
30
|
+
Thread.current['sidekiq_tid'] ||= (Thread.current.object_id ^ ::Process.pid).to_s(36)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def self.job_hash_context(job_hash)
|
|
34
|
+
# If we're using a wrapper class, like ActiveJob, use the "wrapped"
|
|
35
|
+
# attribute to expose the underlying thing.
|
|
36
|
+
klass = job_hash['wrapped'] || job_hash["class"]
|
|
37
|
+
bid = job_hash['bid']
|
|
38
|
+
"#{klass} JID-#{job_hash['jid']}#{" BID-#{bid}" if bid}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def self.with_job_hash_context(job_hash, &block)
|
|
42
|
+
with_context(job_hash_context(job_hash), &block)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def self.with_context(msg)
|
|
46
|
+
Thread.current[:sidekiq_context] ||= []
|
|
47
|
+
Thread.current[:sidekiq_context] << msg
|
|
48
|
+
yield
|
|
49
|
+
ensure
|
|
50
|
+
Thread.current[:sidekiq_context].pop
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def self.initialize_logger(log_target = STDOUT)
|
|
54
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
|
55
|
+
@logger = Logger.new(log_target)
|
|
56
|
+
@logger.level = Logger::INFO
|
|
57
|
+
@logger.formatter = ENV['DYNO'] ? WithoutTimestamp.new : Pretty.new
|
|
58
|
+
oldlogger.close if oldlogger && !$TESTING # don't want to close testing's STDOUT logging
|
|
59
|
+
@logger
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.logger
|
|
63
|
+
defined?(@logger) ? @logger : initialize_logger
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.logger=(log)
|
|
67
|
+
@logger = (log ? log : Logger.new(File::NULL))
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# This reopens ALL logfiles in the process that have been rotated
|
|
71
|
+
# using logrotate(8) (without copytruncate) or similar tools.
|
|
72
|
+
# A +File+ object is considered for reopening if it is:
|
|
73
|
+
# 1) opened with the O_APPEND and O_WRONLY flags
|
|
74
|
+
# 2) the current open file handle does not match its original open path
|
|
75
|
+
# 3) unbuffered (as far as userspace buffering goes, not O_SYNC)
|
|
76
|
+
# Returns the number of files reopened
|
|
77
|
+
def self.reopen_logs
|
|
78
|
+
to_reopen = []
|
|
79
|
+
append_flags = File::WRONLY | File::APPEND
|
|
80
|
+
|
|
81
|
+
ObjectSpace.each_object(File) do |fp|
|
|
82
|
+
begin
|
|
83
|
+
if !fp.closed? && fp.stat.file? && fp.sync && (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags
|
|
84
|
+
to_reopen << fp
|
|
85
|
+
end
|
|
86
|
+
rescue IOError, Errno::EBADF
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
nr = 0
|
|
91
|
+
to_reopen.each do |fp|
|
|
92
|
+
orig_st = begin
|
|
93
|
+
fp.stat
|
|
94
|
+
rescue IOError, Errno::EBADF
|
|
95
|
+
next
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
begin
|
|
99
|
+
b = File.stat(fp.path)
|
|
100
|
+
next if orig_st.ino == b.ino && orig_st.dev == b.dev
|
|
101
|
+
rescue Errno::ENOENT
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
begin
|
|
105
|
+
File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) }
|
|
106
|
+
fp.sync = true
|
|
107
|
+
nr += 1
|
|
108
|
+
rescue IOError, Errno::EBADF
|
|
109
|
+
# not much we can do...
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
nr
|
|
113
|
+
rescue RuntimeError => ex
|
|
114
|
+
# RuntimeError: ObjectSpace is disabled; each_object will only work with Class, pass -X+O to enable
|
|
115
|
+
puts "Unable to reopen logs: #{ex.message}"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def logger
|
|
119
|
+
Sidekiq::Logging.logger
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require 'sidekiq/util'
|
|
3
|
+
require 'sidekiq/processor'
|
|
4
|
+
require 'sidekiq/fetch'
|
|
5
|
+
require 'thread'
|
|
6
|
+
require 'set'
|
|
7
|
+
|
|
8
|
+
module Sidekiq
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
# The Manager is the central coordination point in Sidekiq, controlling
|
|
12
|
+
# the lifecycle of the Processors.
|
|
13
|
+
#
|
|
14
|
+
# Tasks:
|
|
15
|
+
#
|
|
16
|
+
# 1. start: Spin up Processors.
|
|
17
|
+
# 3. processor_died: Handle job failure, throw away Processor, create new one.
|
|
18
|
+
# 4. quiet: shutdown idle Processors.
|
|
19
|
+
# 5. stop: hard stop the Processors by deadline.
|
|
20
|
+
#
|
|
21
|
+
# Note that only the last task requires its own Thread since it has to monitor
|
|
22
|
+
# the shutdown process. The other tasks are performed by other threads.
|
|
23
|
+
#
|
|
24
|
+
class Manager
|
|
25
|
+
include Util
|
|
26
|
+
|
|
27
|
+
attr_reader :workers
|
|
28
|
+
attr_reader :options
|
|
29
|
+
|
|
30
|
+
def initialize(options={})
|
|
31
|
+
logger.debug { options.inspect }
|
|
32
|
+
@options = options
|
|
33
|
+
@count = options[:concurrency] || 10
|
|
34
|
+
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
|
35
|
+
|
|
36
|
+
@done = false
|
|
37
|
+
@workers = Set.new
|
|
38
|
+
@count.times do
|
|
39
|
+
@workers << Processor.new(self)
|
|
40
|
+
end
|
|
41
|
+
@plock = Mutex.new
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def start
|
|
45
|
+
@workers.each do |x|
|
|
46
|
+
x.start
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def quiet
|
|
51
|
+
return if @done
|
|
52
|
+
@done = true
|
|
53
|
+
|
|
54
|
+
logger.info { "Terminating quiet workers" }
|
|
55
|
+
@workers.each { |x| x.terminate }
|
|
56
|
+
fire_event(:quiet, reverse: true)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# hack for quicker development / testing environment #2774
|
|
60
|
+
PAUSE_TIME = STDOUT.tty? ? 0.1 : 0.5
|
|
61
|
+
|
|
62
|
+
def stop(deadline)
|
|
63
|
+
quiet
|
|
64
|
+
fire_event(:shutdown, reverse: true)
|
|
65
|
+
|
|
66
|
+
# some of the shutdown events can be async,
|
|
67
|
+
# we don't have any way to know when they're done but
|
|
68
|
+
# give them a little time to take effect
|
|
69
|
+
sleep PAUSE_TIME
|
|
70
|
+
return if @workers.empty?
|
|
71
|
+
|
|
72
|
+
logger.info { "Pausing to allow workers to finish..." }
|
|
73
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
74
|
+
while remaining > PAUSE_TIME
|
|
75
|
+
return if @workers.empty?
|
|
76
|
+
sleep PAUSE_TIME
|
|
77
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
78
|
+
end
|
|
79
|
+
return if @workers.empty?
|
|
80
|
+
|
|
81
|
+
hard_shutdown
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def processor_stopped(processor)
|
|
85
|
+
@plock.synchronize do
|
|
86
|
+
@workers.delete(processor)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def processor_died(processor, reason)
|
|
91
|
+
@plock.synchronize do
|
|
92
|
+
@workers.delete(processor)
|
|
93
|
+
unless @done
|
|
94
|
+
p = Processor.new(self)
|
|
95
|
+
@workers << p
|
|
96
|
+
p.start
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def stopped?
|
|
102
|
+
@done
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
def hard_shutdown
|
|
108
|
+
# We've reached the timeout and we still have busy workers.
|
|
109
|
+
# They must die but their jobs shall live on.
|
|
110
|
+
cleanup = nil
|
|
111
|
+
@plock.synchronize do
|
|
112
|
+
cleanup = @workers.dup
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
if cleanup.size > 0
|
|
116
|
+
jobs = cleanup.map {|p| p.job }.compact
|
|
117
|
+
|
|
118
|
+
logger.warn { "Terminating #{cleanup.size} busy worker threads" }
|
|
119
|
+
logger.warn { "Work still in progress #{jobs.inspect}" }
|
|
120
|
+
|
|
121
|
+
# Re-enqueue unfinished jobs
|
|
122
|
+
# NOTE: You may notice that we may push a job back to redis before
|
|
123
|
+
# the worker thread is terminated. This is ok because Sidekiq's
|
|
124
|
+
# contract says that jobs are run AT LEAST once. Process termination
|
|
125
|
+
# is delayed until we're certain the jobs are back in Redis because
|
|
126
|
+
# it is worse to lose a job than to run it twice.
|
|
127
|
+
strategy = (@options[:fetch] || Sidekiq::BasicFetch)
|
|
128
|
+
strategy.bulk_requeue(jobs, @options)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
cleanup.each do |processor|
|
|
132
|
+
processor.kill
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
end
|
|
137
|
+
end
|