sidekiq_cleaner 5.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/config.yml +61 -0
- data/.github/contributing.md +32 -0
- data/.github/issue_template.md +11 -0
- data/.gitignore +15 -0
- data/.travis.yml +11 -0
- data/3.0-Upgrade.md +70 -0
- data/4.0-Upgrade.md +53 -0
- data/5.0-Upgrade.md +56 -0
- data/COMM-LICENSE +97 -0
- data/Changes.md +1536 -0
- data/Ent-Changes.md +238 -0
- data/Gemfile +23 -0
- data/LICENSE +9 -0
- data/Pro-2.0-Upgrade.md +138 -0
- data/Pro-3.0-Upgrade.md +44 -0
- data/Pro-4.0-Upgrade.md +35 -0
- data/Pro-Changes.md +759 -0
- data/README.md +55 -0
- data/Rakefile +9 -0
- data/bin/sidekiq +18 -0
- data/bin/sidekiqctl +20 -0
- data/bin/sidekiqload +149 -0
- data/cleaner/assets/images/favicon.ico +0 -0
- data/cleaner/assets/images/logo.png +0 -0
- data/cleaner/assets/images/status.png +0 -0
- data/cleaner/assets/javascripts/application.js +172 -0
- data/cleaner/assets/javascripts/dashboard.js +315 -0
- data/cleaner/assets/stylesheets/application-rtl.css +246 -0
- data/cleaner/assets/stylesheets/application.css +1144 -0
- data/cleaner/assets/stylesheets/bootstrap-rtl.min.css +9 -0
- data/cleaner/assets/stylesheets/bootstrap.css +5 -0
- data/cleaner/locales/ar.yml +81 -0
- data/cleaner/locales/cs.yml +78 -0
- data/cleaner/locales/da.yml +68 -0
- data/cleaner/locales/de.yml +69 -0
- data/cleaner/locales/el.yml +68 -0
- data/cleaner/locales/en.yml +81 -0
- data/cleaner/locales/es.yml +70 -0
- data/cleaner/locales/fa.yml +80 -0
- data/cleaner/locales/fr.yml +78 -0
- data/cleaner/locales/he.yml +79 -0
- data/cleaner/locales/hi.yml +75 -0
- data/cleaner/locales/it.yml +69 -0
- data/cleaner/locales/ja.yml +80 -0
- data/cleaner/locales/ko.yml +68 -0
- data/cleaner/locales/nb.yml +77 -0
- data/cleaner/locales/nl.yml +68 -0
- data/cleaner/locales/pl.yml +59 -0
- data/cleaner/locales/pt-br.yml +68 -0
- data/cleaner/locales/pt.yml +67 -0
- data/cleaner/locales/ru.yml +78 -0
- data/cleaner/locales/sv.yml +68 -0
- data/cleaner/locales/ta.yml +75 -0
- data/cleaner/locales/uk.yml +76 -0
- data/cleaner/locales/ur.yml +80 -0
- data/cleaner/locales/zh-cn.yml +68 -0
- data/cleaner/locales/zh-tw.yml +68 -0
- data/cleaner/views/_footer.erb +20 -0
- data/cleaner/views/_job_info.erb +88 -0
- data/cleaner/views/_nav.erb +52 -0
- data/cleaner/views/_paging.erb +23 -0
- data/cleaner/views/_poll_link.erb +7 -0
- data/cleaner/views/_status.erb +4 -0
- data/cleaner/views/_summary.erb +40 -0
- data/cleaner/views/busy.erb +98 -0
- data/cleaner/views/dashboard.erb +75 -0
- data/cleaner/views/dead.erb +34 -0
- data/cleaner/views/errors.erb +84 -0
- data/cleaner/views/layout.erb +40 -0
- data/cleaner/views/morgue.erb +75 -0
- data/cleaner/views/queue.erb +46 -0
- data/cleaner/views/queues.erb +30 -0
- data/cleaner/views/retries.erb +80 -0
- data/cleaner/views/retry.erb +34 -0
- data/cleaner/views/scheduled.erb +54 -0
- data/cleaner/views/scheduled_job_info.erb +8 -0
- data/cleaner-stats.png +0 -0
- data/cleaner.png +0 -0
- data/code_of_conduct.md +50 -0
- data/lib/generators/sidekiq/templates/worker.rb.erb +9 -0
- data/lib/generators/sidekiq/templates/worker_spec.rb.erb +6 -0
- data/lib/generators/sidekiq/templates/worker_test.rb.erb +8 -0
- data/lib/generators/sidekiq/worker_generator.rb +49 -0
- data/lib/sidekiq/api.rb +940 -0
- data/lib/sidekiq/cleaner/action.rb +89 -0
- data/lib/sidekiq/cleaner/application.rb +385 -0
- data/lib/sidekiq/cleaner/helpers.rb +325 -0
- data/lib/sidekiq/cleaner/router.rb +100 -0
- data/lib/sidekiq/cleaner.rb +214 -0
- data/lib/sidekiq/cli.rb +445 -0
- data/lib/sidekiq/client.rb +243 -0
- data/lib/sidekiq/core_ext.rb +1 -0
- data/lib/sidekiq/ctl.rb +221 -0
- data/lib/sidekiq/delay.rb +42 -0
- data/lib/sidekiq/exception_handler.rb +29 -0
- data/lib/sidekiq/extensions/action_mailer.rb +57 -0
- data/lib/sidekiq/extensions/active_record.rb +40 -0
- data/lib/sidekiq/extensions/class_methods.rb +40 -0
- data/lib/sidekiq/extensions/generic_proxy.rb +31 -0
- data/lib/sidekiq/fetch.rb +81 -0
- data/lib/sidekiq/job_logger.rb +25 -0
- data/lib/sidekiq/job_retry.rb +262 -0
- data/lib/sidekiq/launcher.rb +173 -0
- data/lib/sidekiq/logging.rb +122 -0
- data/lib/sidekiq/manager.rb +137 -0
- data/lib/sidekiq/middleware/chain.rb +150 -0
- data/lib/sidekiq/middleware/i18n.rb +42 -0
- data/lib/sidekiq/middleware/server/active_record.rb +23 -0
- data/lib/sidekiq/paginator.rb +43 -0
- data/lib/sidekiq/processor.rb +279 -0
- data/lib/sidekiq/rails.rb +58 -0
- data/lib/sidekiq/redis_connection.rb +144 -0
- data/lib/sidekiq/scheduled.rb +174 -0
- data/lib/sidekiq/testing/inline.rb +29 -0
- data/lib/sidekiq/testing.rb +333 -0
- data/lib/sidekiq/util.rb +66 -0
- data/lib/sidekiq/version.rb +4 -0
- data/lib/sidekiq/worker.rb +220 -0
- data/lib/sidekiq.rb +237 -0
- data/sidekiq_cleaner.gemspec +21 -0
- metadata +235 -0
@@ -0,0 +1,262 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'sidekiq/scheduled'
|
3
|
+
require 'sidekiq/api'
|
4
|
+
|
5
|
+
module Sidekiq
|
6
|
+
##
|
7
|
+
# Automatically retry jobs that fail in Sidekiq.
|
8
|
+
# Sidekiq's retry support assumes a typical development lifecycle:
|
9
|
+
#
|
10
|
+
# 0. Push some code changes with a bug in it.
|
11
|
+
# 1. Bug causes job processing to fail, Sidekiq's middleware captures
|
12
|
+
# the job and pushes it onto a retry queue.
|
13
|
+
# 2. Sidekiq retries jobs in the retry queue multiple times with
|
14
|
+
# an exponential delay, the job continues to fail.
|
15
|
+
# 3. After a few days, a developer deploys a fix. The job is
|
16
|
+
# reprocessed successfully.
|
17
|
+
# 4. Once retries are exhausted, Sidekiq will give up and move the
|
18
|
+
# job to the Dead Job Queue (aka morgue) where it must be dealt with
|
19
|
+
# manually in the Web UI.
|
20
|
+
# 5. After 6 months on the DJQ, Sidekiq will discard the job.
|
21
|
+
#
|
22
|
+
# A job looks like:
|
23
|
+
#
|
24
|
+
# { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => true }
|
25
|
+
#
|
26
|
+
# The 'retry' option also accepts a number (in place of 'true'):
|
27
|
+
#
|
28
|
+
# { 'class' => 'HardWorker', 'args' => [1, 2, 'foo'], 'retry' => 5 }
|
29
|
+
#
|
30
|
+
# The job will be retried this number of times before giving up. (If simply
|
31
|
+
# 'true', Sidekiq retries 25 times)
|
32
|
+
#
|
33
|
+
# We'll add a bit more data to the job to support retries:
|
34
|
+
#
|
35
|
+
# * 'queue' - the queue to use
|
36
|
+
# * 'retry_count' - number of times we've retried so far.
|
37
|
+
# * 'error_message' - the message from the exception
|
38
|
+
# * 'error_class' - the exception class
|
39
|
+
# * 'failed_at' - the first time it failed
|
40
|
+
# * 'retried_at' - the last time it was retried
|
41
|
+
# * 'backtrace' - the number of lines of error backtrace to store
|
42
|
+
#
|
43
|
+
# We don't store the backtrace by default as that can add a lot of overhead
|
44
|
+
# to the job and everyone is using an error service, right?
|
45
|
+
#
|
46
|
+
# The default number of retries is 25 which works out to about 3 weeks
|
47
|
+
# You can change the default maximum number of retries in your initializer:
|
48
|
+
#
|
49
|
+
# Sidekiq.options[:max_retries] = 7
|
50
|
+
#
|
51
|
+
# or limit the number of retries for a particular worker with:
|
52
|
+
#
|
53
|
+
# class MyWorker
|
54
|
+
# include Sidekiq::Worker
|
55
|
+
# sidekiq_options :retry => 10
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
class JobRetry
|
59
|
+
class Handled < ::RuntimeError; end
|
60
|
+
class Skip < Handled; end
|
61
|
+
|
62
|
+
include Sidekiq::Util
|
63
|
+
|
64
|
+
DEFAULT_MAX_RETRY_ATTEMPTS = 25
|
65
|
+
|
66
|
+
def initialize(options = {})
|
67
|
+
@max_retries = Sidekiq.options.merge(options).fetch(:max_retries, DEFAULT_MAX_RETRY_ATTEMPTS)
|
68
|
+
end
|
69
|
+
|
70
|
+
# The global retry handler requires only the barest of data.
|
71
|
+
# We want to be able to retry as much as possible so we don't
|
72
|
+
# require the worker to be instantiated.
|
73
|
+
def global(msg, queue)
|
74
|
+
yield
|
75
|
+
rescue Handled => ex
|
76
|
+
raise ex
|
77
|
+
rescue Sidekiq::Shutdown => ey
|
78
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
79
|
+
raise ey
|
80
|
+
rescue Exception => e
|
81
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
82
|
+
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
83
|
+
|
84
|
+
if msg['retry']
|
85
|
+
attempt_retry(nil, msg, queue, e)
|
86
|
+
else
|
87
|
+
Sidekiq.death_handlers.each do |handler|
|
88
|
+
begin
|
89
|
+
handler.call(msg, e)
|
90
|
+
rescue => handler_ex
|
91
|
+
handle_exception(handler_ex, { context: "Error calling death handler", job: msg })
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
raise Handled
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# The local retry support means that any errors that occur within
|
101
|
+
# this block can be associated with the given worker instance.
|
102
|
+
# This is required to support the `sidekiq_retries_exhausted` block.
|
103
|
+
#
|
104
|
+
# Note that any exception from the block is wrapped in the Skip
|
105
|
+
# exception so the global block does not reprocess the error. The
|
106
|
+
# Skip exception is unwrapped within Sidekiq::Processor#process before
|
107
|
+
# calling the handle_exception handlers.
|
108
|
+
def local(worker, msg, queue)
|
109
|
+
yield
|
110
|
+
rescue Handled => ex
|
111
|
+
raise ex
|
112
|
+
rescue Sidekiq::Shutdown => ey
|
113
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
114
|
+
raise ey
|
115
|
+
rescue Exception => e
|
116
|
+
# ignore, will be pushed back onto queue during hard_shutdown
|
117
|
+
raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e)
|
118
|
+
|
119
|
+
if msg['retry'] == nil
|
120
|
+
msg['retry'] = worker.class.get_sidekiq_options['retry']
|
121
|
+
end
|
122
|
+
|
123
|
+
raise e unless msg['retry']
|
124
|
+
attempt_retry(worker, msg, queue, e)
|
125
|
+
# We've handled this error associated with this job, don't
|
126
|
+
# need to handle it at the global level
|
127
|
+
raise Skip
|
128
|
+
end
|
129
|
+
|
130
|
+
private
|
131
|
+
|
132
|
+
# Note that +worker+ can be nil here if an error is raised before we can
|
133
|
+
# instantiate the worker instance. All access must be guarded and
|
134
|
+
# best effort.
|
135
|
+
def attempt_retry(worker, msg, queue, exception)
|
136
|
+
max_retry_attempts = retry_attempts_from(msg['retry'], @max_retries)
|
137
|
+
|
138
|
+
msg['queue'] = if msg['retry_queue']
|
139
|
+
msg['retry_queue']
|
140
|
+
else
|
141
|
+
queue
|
142
|
+
end
|
143
|
+
|
144
|
+
m = exception_message(exception)
|
145
|
+
if m.respond_to?(:scrub!)
|
146
|
+
m.force_encoding("utf-8")
|
147
|
+
m.scrub!
|
148
|
+
end
|
149
|
+
|
150
|
+
msg['error_message'] = m
|
151
|
+
msg['error_class'] = exception.class.name
|
152
|
+
count = if msg['retry_count']
|
153
|
+
msg['retried_at'] = Time.now.to_f
|
154
|
+
msg['retry_count'] += 1
|
155
|
+
else
|
156
|
+
msg['failed_at'] = Time.now.to_f
|
157
|
+
msg['retry_count'] = 0
|
158
|
+
end
|
159
|
+
|
160
|
+
if msg['backtrace'] == true
|
161
|
+
msg['error_backtrace'] = exception.backtrace
|
162
|
+
elsif !msg['backtrace']
|
163
|
+
# do nothing
|
164
|
+
elsif msg['backtrace'].to_i != 0
|
165
|
+
msg['error_backtrace'] = exception.backtrace[0...msg['backtrace'].to_i]
|
166
|
+
end
|
167
|
+
|
168
|
+
if count < max_retry_attempts
|
169
|
+
delay = delay_for(worker, count, exception)
|
170
|
+
# Logging here can break retries if the logging device raises ENOSPC #3979
|
171
|
+
#logger.debug { "Failure! Retry #{count} in #{delay} seconds" }
|
172
|
+
retry_at = Time.now.to_f + delay
|
173
|
+
payload = Sidekiq.dump_json(msg)
|
174
|
+
Sidekiq.redis do |conn|
|
175
|
+
conn.zadd('retry', retry_at.to_s, payload)
|
176
|
+
end
|
177
|
+
else
|
178
|
+
# Goodbye dear message, you (re)tried your best I'm sure.
|
179
|
+
retries_exhausted(worker, msg, exception)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def retries_exhausted(worker, msg, exception)
|
184
|
+
begin
|
185
|
+
block = worker && worker.sidekiq_retries_exhausted_block
|
186
|
+
block.call(msg, exception) if block
|
187
|
+
rescue => e
|
188
|
+
handle_exception(e, { context: "Error calling retries_exhausted", job: msg })
|
189
|
+
end
|
190
|
+
|
191
|
+
Sidekiq.death_handlers.each do |handler|
|
192
|
+
begin
|
193
|
+
handler.call(msg, exception)
|
194
|
+
rescue => e
|
195
|
+
handle_exception(e, { context: "Error calling death handler", job: msg })
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
send_to_morgue(msg) unless msg['dead'] == false
|
200
|
+
end
|
201
|
+
|
202
|
+
def send_to_morgue(msg)
|
203
|
+
logger.info { "Adding dead #{msg['class']} job #{msg['jid']}" }
|
204
|
+
payload = Sidekiq.dump_json(msg)
|
205
|
+
DeadSet.new.kill(payload, notify_failure: false)
|
206
|
+
end
|
207
|
+
|
208
|
+
def retry_attempts_from(msg_retry, default)
|
209
|
+
if msg_retry.is_a?(Integer)
|
210
|
+
msg_retry
|
211
|
+
else
|
212
|
+
default
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
def delay_for(worker, count, exception)
|
217
|
+
if worker && worker.sidekiq_retry_in_block
|
218
|
+
custom_retry_in = retry_in(worker, count, exception).to_i
|
219
|
+
return custom_retry_in if custom_retry_in > 0
|
220
|
+
end
|
221
|
+
seconds_to_delay(count)
|
222
|
+
end
|
223
|
+
|
224
|
+
# delayed_job uses the same basic formula
|
225
|
+
def seconds_to_delay(count)
|
226
|
+
(count ** 4) + 15 + (rand(30)*(count+1))
|
227
|
+
end
|
228
|
+
|
229
|
+
def retry_in(worker, count, exception)
|
230
|
+
begin
|
231
|
+
worker.sidekiq_retry_in_block.call(count, exception)
|
232
|
+
rescue Exception => e
|
233
|
+
handle_exception(e, { context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{worker.class.name}, falling back to default" })
|
234
|
+
nil
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def exception_caused_by_shutdown?(e, checked_causes = [])
|
239
|
+
return false unless e.cause
|
240
|
+
|
241
|
+
# Handle circular causes
|
242
|
+
checked_causes << e.object_id
|
243
|
+
return false if checked_causes.include?(e.cause.object_id)
|
244
|
+
|
245
|
+
e.cause.instance_of?(Sidekiq::Shutdown) ||
|
246
|
+
exception_caused_by_shutdown?(e.cause, checked_causes)
|
247
|
+
end
|
248
|
+
|
249
|
+
# Extract message from exception.
|
250
|
+
# Set a default if the message raises an error
|
251
|
+
def exception_message(exception)
|
252
|
+
begin
|
253
|
+
# App code can stuff all sorts of crazy binary data into the error message
|
254
|
+
# that won't convert to JSON.
|
255
|
+
exception.message.to_s[0, 10_000]
|
256
|
+
rescue
|
257
|
+
"!!! ERROR MESSAGE THREW AN ERROR !!!".dup
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
end
|
262
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'sidekiq/manager'
|
3
|
+
require 'sidekiq/fetch'
|
4
|
+
require 'sidekiq/scheduled'
|
5
|
+
|
6
|
+
module Sidekiq
|
7
|
+
# The Launcher is a very simple Actor whose job is to
|
8
|
+
# start, monitor and stop the core Actors in Sidekiq.
|
9
|
+
# If any of these actors die, the Sidekiq process exits
|
10
|
+
# immediately.
|
11
|
+
class Launcher
|
12
|
+
include Util
|
13
|
+
|
14
|
+
attr_accessor :manager, :poller, :fetcher
|
15
|
+
|
16
|
+
STATS_TTL = 5*365*24*60*60
|
17
|
+
|
18
|
+
def initialize(options)
|
19
|
+
@manager = Sidekiq::Manager.new(options)
|
20
|
+
@poller = Sidekiq::Scheduled::Poller.new
|
21
|
+
@done = false
|
22
|
+
@options = options
|
23
|
+
end
|
24
|
+
|
25
|
+
def run
|
26
|
+
@thread = safe_thread("heartbeat", &method(:start_heartbeat))
|
27
|
+
@poller.start
|
28
|
+
@manager.start
|
29
|
+
end
|
30
|
+
|
31
|
+
# Stops this instance from processing any more jobs,
|
32
|
+
#
|
33
|
+
def quiet
|
34
|
+
@done = true
|
35
|
+
@manager.quiet
|
36
|
+
@poller.terminate
|
37
|
+
end
|
38
|
+
|
39
|
+
# Shuts down the process. This method does not
|
40
|
+
# return until all work is complete and cleaned up.
|
41
|
+
# It can take up to the timeout to complete.
|
42
|
+
def stop
|
43
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + @options[:timeout]
|
44
|
+
|
45
|
+
@done = true
|
46
|
+
@manager.quiet
|
47
|
+
@poller.terminate
|
48
|
+
|
49
|
+
@manager.stop(deadline)
|
50
|
+
|
51
|
+
# Requeue everything in case there was a worker who grabbed work while stopped
|
52
|
+
# This call is a no-op in Sidekiq but necessary for Sidekiq Pro.
|
53
|
+
strategy = (@options[:fetch] || Sidekiq::BasicFetch)
|
54
|
+
strategy.bulk_requeue([], @options)
|
55
|
+
|
56
|
+
clear_heartbeat
|
57
|
+
end
|
58
|
+
|
59
|
+
def stopping?
|
60
|
+
@done
|
61
|
+
end
|
62
|
+
|
63
|
+
private unless $TESTING
|
64
|
+
|
65
|
+
def heartbeat
|
66
|
+
results = Sidekiq::CLI::PROCTITLES.map {|x| x.(self, to_data) }
|
67
|
+
results.compact!
|
68
|
+
$0 = results.join(' ')
|
69
|
+
|
70
|
+
❤
|
71
|
+
end
|
72
|
+
|
73
|
+
def ❤
|
74
|
+
key = identity
|
75
|
+
fails = procd = 0
|
76
|
+
begin
|
77
|
+
fails = Processor::FAILURE.reset
|
78
|
+
procd = Processor::PROCESSED.reset
|
79
|
+
curstate = Processor::WORKER_STATE.dup
|
80
|
+
|
81
|
+
workers_key = "#{key}:workers"
|
82
|
+
nowdate = Time.now.utc.strftime("%Y-%m-%d")
|
83
|
+
Sidekiq.redis do |conn|
|
84
|
+
conn.multi do
|
85
|
+
conn.incrby("stat:processed", procd)
|
86
|
+
conn.incrby("stat:processed:#{nowdate}", procd)
|
87
|
+
conn.expire("stat:processed:#{nowdate}", STATS_TTL)
|
88
|
+
|
89
|
+
conn.incrby("stat:failed", fails)
|
90
|
+
conn.incrby("stat:failed:#{nowdate}", fails)
|
91
|
+
conn.expire("stat:failed:#{nowdate}", STATS_TTL)
|
92
|
+
|
93
|
+
conn.del(workers_key)
|
94
|
+
curstate.each_pair do |tid, hash|
|
95
|
+
conn.hset(workers_key, tid, Sidekiq.dump_json(hash))
|
96
|
+
end
|
97
|
+
conn.expire(workers_key, 60)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
fails = procd = 0
|
101
|
+
|
102
|
+
_, exists, _, _, msg = Sidekiq.redis do |conn|
|
103
|
+
conn.multi do
|
104
|
+
conn.sadd('processes', key)
|
105
|
+
conn.exists(key)
|
106
|
+
conn.hmset(key, 'info', to_json, 'busy', curstate.size, 'beat', Time.now.to_f, 'quiet', @done)
|
107
|
+
conn.expire(key, 60)
|
108
|
+
conn.rpop("#{key}-signals")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# first heartbeat or recovering from an outage and need to reestablish our heartbeat
|
113
|
+
fire_event(:heartbeat) if !exists
|
114
|
+
|
115
|
+
return unless msg
|
116
|
+
|
117
|
+
::Process.kill(msg, $$)
|
118
|
+
rescue => e
|
119
|
+
# ignore all redis/network issues
|
120
|
+
logger.error("heartbeat: #{e.message}")
|
121
|
+
# don't lose the counts if there was a network issue
|
122
|
+
Processor::PROCESSED.incr(procd)
|
123
|
+
Processor::FAILURE.incr(fails)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def start_heartbeat
|
128
|
+
while true
|
129
|
+
heartbeat
|
130
|
+
sleep 5
|
131
|
+
end
|
132
|
+
Sidekiq.logger.info("Heartbeat stopping...")
|
133
|
+
end
|
134
|
+
|
135
|
+
def to_data
|
136
|
+
@data ||= begin
|
137
|
+
{
|
138
|
+
'hostname' => hostname,
|
139
|
+
'started_at' => Time.now.to_f,
|
140
|
+
'pid' => $$,
|
141
|
+
'tag' => @options[:tag] || '',
|
142
|
+
'concurrency' => @options[:concurrency],
|
143
|
+
'queues' => @options[:queues].uniq,
|
144
|
+
'labels' => @options[:labels],
|
145
|
+
'identity' => identity,
|
146
|
+
}
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def to_json
|
151
|
+
@json ||= begin
|
152
|
+
# this data changes infrequently so dump it to a string
|
153
|
+
# now so we don't need to dump it every heartbeat.
|
154
|
+
Sidekiq.dump_json(to_data)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def clear_heartbeat
|
159
|
+
# Remove record from Redis since we are shutting down.
|
160
|
+
# Note we don't stop the heartbeat thread; if the process
|
161
|
+
# doesn't actually exit, it'll reappear in the Web UI.
|
162
|
+
Sidekiq.redis do |conn|
|
163
|
+
conn.pipelined do
|
164
|
+
conn.srem('processes', identity)
|
165
|
+
conn.del("#{identity}:workers")
|
166
|
+
end
|
167
|
+
end
|
168
|
+
rescue
|
169
|
+
# best effort, ignore network errors
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'time'
|
3
|
+
require 'logger'
|
4
|
+
require 'fcntl'
|
5
|
+
|
6
|
+
module Sidekiq
|
7
|
+
module Logging
|
8
|
+
|
9
|
+
class Pretty < Logger::Formatter
|
10
|
+
SPACE = " "
|
11
|
+
|
12
|
+
# Provide a call() method that returns the formatted message.
|
13
|
+
def call(severity, time, program_name, message)
|
14
|
+
"#{time.utc.iso8601(3)} #{::Process.pid} TID-#{Sidekiq::Logging.tid}#{context} #{severity}: #{message}\n"
|
15
|
+
end
|
16
|
+
|
17
|
+
def context
|
18
|
+
c = Thread.current[:sidekiq_context]
|
19
|
+
" #{c.join(SPACE)}" if c && c.any?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class WithoutTimestamp < Pretty
|
24
|
+
def call(severity, time, program_name, message)
|
25
|
+
"#{::Process.pid} TID-#{Sidekiq::Logging.tid}#{context} #{severity}: #{message}\n"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.tid
|
30
|
+
Thread.current['sidekiq_tid'] ||= (Thread.current.object_id ^ ::Process.pid).to_s(36)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.job_hash_context(job_hash)
|
34
|
+
# If we're using a wrapper class, like ActiveJob, use the "wrapped"
|
35
|
+
# attribute to expose the underlying thing.
|
36
|
+
klass = job_hash['wrapped'] || job_hash["class"]
|
37
|
+
bid = job_hash['bid']
|
38
|
+
"#{klass} JID-#{job_hash['jid']}#{" BID-#{bid}" if bid}"
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.with_job_hash_context(job_hash, &block)
|
42
|
+
with_context(job_hash_context(job_hash), &block)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.with_context(msg)
|
46
|
+
Thread.current[:sidekiq_context] ||= []
|
47
|
+
Thread.current[:sidekiq_context] << msg
|
48
|
+
yield
|
49
|
+
ensure
|
50
|
+
Thread.current[:sidekiq_context].pop
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.initialize_logger(log_target = STDOUT)
|
54
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
55
|
+
@logger = Logger.new(log_target)
|
56
|
+
@logger.level = Logger::INFO
|
57
|
+
@logger.formatter = ENV['DYNO'] ? WithoutTimestamp.new : Pretty.new
|
58
|
+
oldlogger.close if oldlogger && !$TESTING # don't want to close testing's STDOUT logging
|
59
|
+
@logger
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.logger
|
63
|
+
defined?(@logger) ? @logger : initialize_logger
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.logger=(log)
|
67
|
+
@logger = (log ? log : Logger.new(File::NULL))
|
68
|
+
end
|
69
|
+
|
70
|
+
# This reopens ALL logfiles in the process that have been rotated
|
71
|
+
# using logrotate(8) (without copytruncate) or similar tools.
|
72
|
+
# A +File+ object is considered for reopening if it is:
|
73
|
+
# 1) opened with the O_APPEND and O_WRONLY flags
|
74
|
+
# 2) the current open file handle does not match its original open path
|
75
|
+
# 3) unbuffered (as far as userspace buffering goes, not O_SYNC)
|
76
|
+
# Returns the number of files reopened
|
77
|
+
def self.reopen_logs
|
78
|
+
to_reopen = []
|
79
|
+
append_flags = File::WRONLY | File::APPEND
|
80
|
+
|
81
|
+
ObjectSpace.each_object(File) do |fp|
|
82
|
+
begin
|
83
|
+
if !fp.closed? && fp.stat.file? && fp.sync && (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags
|
84
|
+
to_reopen << fp
|
85
|
+
end
|
86
|
+
rescue IOError, Errno::EBADF
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
nr = 0
|
91
|
+
to_reopen.each do |fp|
|
92
|
+
orig_st = begin
|
93
|
+
fp.stat
|
94
|
+
rescue IOError, Errno::EBADF
|
95
|
+
next
|
96
|
+
end
|
97
|
+
|
98
|
+
begin
|
99
|
+
b = File.stat(fp.path)
|
100
|
+
next if orig_st.ino == b.ino && orig_st.dev == b.dev
|
101
|
+
rescue Errno::ENOENT
|
102
|
+
end
|
103
|
+
|
104
|
+
begin
|
105
|
+
File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) }
|
106
|
+
fp.sync = true
|
107
|
+
nr += 1
|
108
|
+
rescue IOError, Errno::EBADF
|
109
|
+
# not much we can do...
|
110
|
+
end
|
111
|
+
end
|
112
|
+
nr
|
113
|
+
rescue RuntimeError => ex
|
114
|
+
# RuntimeError: ObjectSpace is disabled; each_object will only work with Class, pass -X+O to enable
|
115
|
+
puts "Unable to reopen logs: #{ex.message}"
|
116
|
+
end
|
117
|
+
|
118
|
+
def logger
|
119
|
+
Sidekiq::Logging.logger
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'sidekiq/util'
|
3
|
+
require 'sidekiq/processor'
|
4
|
+
require 'sidekiq/fetch'
|
5
|
+
require 'thread'
|
6
|
+
require 'set'
|
7
|
+
|
8
|
+
module Sidekiq
|
9
|
+
|
10
|
+
##
|
11
|
+
# The Manager is the central coordination point in Sidekiq, controlling
|
12
|
+
# the lifecycle of the Processors.
|
13
|
+
#
|
14
|
+
# Tasks:
|
15
|
+
#
|
16
|
+
# 1. start: Spin up Processors.
|
17
|
+
# 3. processor_died: Handle job failure, throw away Processor, create new one.
|
18
|
+
# 4. quiet: shutdown idle Processors.
|
19
|
+
# 5. stop: hard stop the Processors by deadline.
|
20
|
+
#
|
21
|
+
# Note that only the last task requires its own Thread since it has to monitor
|
22
|
+
# the shutdown process. The other tasks are performed by other threads.
|
23
|
+
#
|
24
|
+
class Manager
|
25
|
+
include Util
|
26
|
+
|
27
|
+
attr_reader :workers
|
28
|
+
attr_reader :options
|
29
|
+
|
30
|
+
def initialize(options={})
|
31
|
+
logger.debug { options.inspect }
|
32
|
+
@options = options
|
33
|
+
@count = options[:concurrency] || 10
|
34
|
+
raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
|
35
|
+
|
36
|
+
@done = false
|
37
|
+
@workers = Set.new
|
38
|
+
@count.times do
|
39
|
+
@workers << Processor.new(self)
|
40
|
+
end
|
41
|
+
@plock = Mutex.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def start
|
45
|
+
@workers.each do |x|
|
46
|
+
x.start
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def quiet
|
51
|
+
return if @done
|
52
|
+
@done = true
|
53
|
+
|
54
|
+
logger.info { "Terminating quiet workers" }
|
55
|
+
@workers.each { |x| x.terminate }
|
56
|
+
fire_event(:quiet, reverse: true)
|
57
|
+
end
|
58
|
+
|
59
|
+
# hack for quicker development / testing environment #2774
|
60
|
+
PAUSE_TIME = STDOUT.tty? ? 0.1 : 0.5
|
61
|
+
|
62
|
+
def stop(deadline)
|
63
|
+
quiet
|
64
|
+
fire_event(:shutdown, reverse: true)
|
65
|
+
|
66
|
+
# some of the shutdown events can be async,
|
67
|
+
# we don't have any way to know when they're done but
|
68
|
+
# give them a little time to take effect
|
69
|
+
sleep PAUSE_TIME
|
70
|
+
return if @workers.empty?
|
71
|
+
|
72
|
+
logger.info { "Pausing to allow workers to finish..." }
|
73
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
74
|
+
while remaining > PAUSE_TIME
|
75
|
+
return if @workers.empty?
|
76
|
+
sleep PAUSE_TIME
|
77
|
+
remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
78
|
+
end
|
79
|
+
return if @workers.empty?
|
80
|
+
|
81
|
+
hard_shutdown
|
82
|
+
end
|
83
|
+
|
84
|
+
def processor_stopped(processor)
|
85
|
+
@plock.synchronize do
|
86
|
+
@workers.delete(processor)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def processor_died(processor, reason)
|
91
|
+
@plock.synchronize do
|
92
|
+
@workers.delete(processor)
|
93
|
+
unless @done
|
94
|
+
p = Processor.new(self)
|
95
|
+
@workers << p
|
96
|
+
p.start
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def stopped?
|
102
|
+
@done
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
def hard_shutdown
|
108
|
+
# We've reached the timeout and we still have busy workers.
|
109
|
+
# They must die but their jobs shall live on.
|
110
|
+
cleanup = nil
|
111
|
+
@plock.synchronize do
|
112
|
+
cleanup = @workers.dup
|
113
|
+
end
|
114
|
+
|
115
|
+
if cleanup.size > 0
|
116
|
+
jobs = cleanup.map {|p| p.job }.compact
|
117
|
+
|
118
|
+
logger.warn { "Terminating #{cleanup.size} busy worker threads" }
|
119
|
+
logger.warn { "Work still in progress #{jobs.inspect}" }
|
120
|
+
|
121
|
+
# Re-enqueue unfinished jobs
|
122
|
+
# NOTE: You may notice that we may push a job back to redis before
|
123
|
+
# the worker thread is terminated. This is ok because Sidekiq's
|
124
|
+
# contract says that jobs are run AT LEAST once. Process termination
|
125
|
+
# is delayed until we're certain the jobs are back in Redis because
|
126
|
+
# it is worse to lose a job than to run it twice.
|
127
|
+
strategy = (@options[:fetch] || Sidekiq::BasicFetch)
|
128
|
+
strategy.bulk_requeue(jobs, @options)
|
129
|
+
end
|
130
|
+
|
131
|
+
cleanup.each do |processor|
|
132
|
+
processor.kill
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|