resque 1.23.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.md +271 -0
- data/README.markdown +454 -484
- data/Rakefile +4 -17
- data/bin/resque-web +10 -22
- data/lib/resque/data_store.rb +335 -0
- data/lib/resque/errors.rb +15 -1
- data/lib/resque/failure/airbrake.rb +32 -4
- data/lib/resque/failure/base.rb +16 -7
- data/lib/resque/failure/multiple.rb +26 -8
- data/lib/resque/failure/redis.rb +92 -15
- data/lib/resque/failure/redis_multi_queue.rb +104 -0
- data/lib/resque/failure.rb +62 -32
- data/lib/resque/helpers.rb +11 -57
- data/lib/resque/job.rb +79 -12
- data/lib/resque/log_formatters/quiet_formatter.rb +7 -0
- data/lib/resque/log_formatters/verbose_formatter.rb +7 -0
- data/lib/resque/log_formatters/very_verbose_formatter.rb +8 -0
- data/lib/resque/logging.rb +18 -0
- data/lib/resque/plugin.rb +22 -10
- data/lib/resque/railtie.rb +10 -0
- data/lib/resque/server/public/jquery-3.6.0.min.js +2 -0
- data/lib/resque/server/public/jquery.relatize_date.js +4 -4
- data/lib/resque/server/public/main.js +3 -0
- data/lib/resque/server/public/ranger.js +16 -8
- data/lib/resque/server/public/style.css +13 -8
- data/lib/resque/server/views/error.erb +1 -1
- data/lib/resque/server/views/failed.erb +27 -59
- data/lib/resque/server/views/failed_job.erb +50 -0
- data/lib/resque/server/views/failed_queues_overview.erb +24 -0
- data/lib/resque/server/views/job_class.erb +8 -0
- data/lib/resque/server/views/key_sets.erb +2 -4
- data/lib/resque/server/views/key_string.erb +1 -1
- data/lib/resque/server/views/layout.erb +7 -6
- data/lib/resque/server/views/next_more.erb +22 -10
- data/lib/resque/server/views/processing.erb +2 -0
- data/lib/resque/server/views/queues.erb +22 -13
- data/lib/resque/server/views/stats.erb +5 -5
- data/lib/resque/server/views/workers.erb +4 -4
- data/lib/resque/server/views/working.erb +10 -11
- data/lib/resque/server.rb +51 -108
- data/lib/resque/server_helper.rb +185 -0
- data/lib/resque/stat.rb +19 -7
- data/lib/resque/tasks.rb +26 -25
- data/lib/resque/thread_signal.rb +24 -0
- data/lib/resque/vendor/utf8_util.rb +2 -8
- data/lib/resque/version.rb +1 -1
- data/lib/resque/web_runner.rb +374 -0
- data/lib/resque/worker.rb +487 -163
- data/lib/resque.rb +332 -52
- data/lib/tasks/redis.rake +11 -11
- metadata +169 -149
- data/lib/resque/failure/hoptoad.rb +0 -33
- data/lib/resque/failure/thoughtbot.rb +0 -33
- data/lib/resque/server/public/jquery-1.3.2.min.js +0 -19
- data/lib/resque/server/test_helper.rb +0 -19
- data/lib/resque/vendor/utf8_util/utf8_util_18.rb +0 -91
- data/lib/resque/vendor/utf8_util/utf8_util_19.rb +0 -5
- data/test/airbrake_test.rb +0 -27
- data/test/hoptoad_test.rb +0 -26
- data/test/job_hooks_test.rb +0 -464
- data/test/job_plugins_test.rb +0 -230
- data/test/plugin_test.rb +0 -116
- data/test/redis-test-cluster.conf +0 -115
- data/test/redis-test.conf +0 -115
- data/test/resque-web_test.rb +0 -59
- data/test/resque_failure_redis_test.rb +0 -19
- data/test/resque_test.rb +0 -278
- data/test/test_helper.rb +0 -178
- data/test/worker_test.rb +0 -657
data/lib/resque/worker.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'set'
|
3
|
+
require 'redis/distributed'
|
4
|
+
|
1
5
|
module Resque
|
2
6
|
# A Resque Worker processes jobs. On platforms that support fork(2),
|
3
7
|
# the worker will fork off a child to process each job. This ensures
|
@@ -9,27 +13,63 @@ module Resque
|
|
9
13
|
class Worker
|
10
14
|
include Resque::Helpers
|
11
15
|
extend Resque::Helpers
|
16
|
+
include Resque::Logging
|
17
|
+
|
18
|
+
@@all_heartbeat_threads = []
|
19
|
+
def self.kill_all_heartbeat_threads
|
20
|
+
@@all_heartbeat_threads.each(&:kill).each(&:join)
|
21
|
+
@@all_heartbeat_threads = []
|
22
|
+
end
|
23
|
+
|
24
|
+
def redis
|
25
|
+
Resque.redis
|
26
|
+
end
|
27
|
+
alias :data_store :redis
|
28
|
+
|
29
|
+
def self.redis
|
30
|
+
Resque.redis
|
31
|
+
end
|
12
32
|
|
13
|
-
|
14
|
-
|
33
|
+
def self.data_store
|
34
|
+
self.redis
|
35
|
+
end
|
15
36
|
|
16
|
-
#
|
17
|
-
|
37
|
+
# Given a Ruby object, returns a string suitable for storage in a
|
38
|
+
# queue.
|
39
|
+
def encode(object)
|
40
|
+
Resque.encode(object)
|
41
|
+
end
|
18
42
|
|
19
|
-
#
|
20
|
-
|
21
|
-
|
43
|
+
# Given a string, returns a Ruby object.
|
44
|
+
def decode(object)
|
45
|
+
Resque.decode(object)
|
46
|
+
end
|
22
47
|
|
23
48
|
attr_accessor :term_timeout
|
24
49
|
|
50
|
+
attr_accessor :pre_shutdown_timeout
|
51
|
+
|
52
|
+
attr_accessor :term_child_signal
|
53
|
+
|
25
54
|
# decide whether to use new_kill_child logic
|
26
55
|
attr_accessor :term_child
|
27
56
|
|
57
|
+
# should term kill workers gracefully (vs. immediately)
|
58
|
+
# Makes SIGTERM work like SIGQUIT
|
59
|
+
attr_accessor :graceful_term
|
60
|
+
|
61
|
+
# When set to true, forked workers will exit with `exit`, calling any `at_exit` code handlers that have been
|
62
|
+
# registered in the application. Otherwise, forked workers exit with `exit!`
|
63
|
+
attr_accessor :run_at_exit_hooks
|
64
|
+
|
65
|
+
attr_writer :fork_per_job
|
66
|
+
attr_writer :hostname
|
28
67
|
attr_writer :to_s
|
68
|
+
attr_writer :pid
|
29
69
|
|
30
70
|
# Returns an array of all worker objects.
|
31
71
|
def self.all
|
32
|
-
|
72
|
+
data_store.worker_ids.map { |id| find(id, :skip_exists => true) }.compact
|
33
73
|
end
|
34
74
|
|
35
75
|
# Returns an array of all worker objects currently processing
|
@@ -38,32 +78,37 @@ module Resque
|
|
38
78
|
names = all
|
39
79
|
return [] unless names.any?
|
40
80
|
|
41
|
-
names.map! { |name| "worker:#{name}" }
|
42
|
-
|
43
81
|
reportedly_working = {}
|
44
82
|
|
45
83
|
begin
|
46
|
-
reportedly_working =
|
84
|
+
reportedly_working = data_store.workers_map(names).reject do |key, value|
|
47
85
|
value.nil? || value.empty?
|
48
86
|
end
|
49
87
|
rescue Redis::Distributed::CannotDistribute
|
50
88
|
names.each do |name|
|
51
|
-
value =
|
89
|
+
value = data_store.get_worker_payload(name)
|
52
90
|
reportedly_working[name] = value unless value.nil? || value.empty?
|
53
91
|
end
|
54
92
|
end
|
55
93
|
|
56
94
|
reportedly_working.keys.map do |key|
|
57
|
-
find
|
95
|
+
worker = find(key.sub("worker:", ''), :skip_exists => true)
|
96
|
+
worker.job = worker.decode(reportedly_working[key])
|
97
|
+
worker
|
58
98
|
end.compact
|
59
99
|
end
|
60
100
|
|
61
101
|
# Returns a single worker object. Accepts a string id.
|
62
|
-
def self.find(worker_id)
|
63
|
-
|
64
|
-
|
102
|
+
def self.find(worker_id, options = {})
|
103
|
+
skip_exists = options[:skip_exists]
|
104
|
+
|
105
|
+
if skip_exists || exists?(worker_id)
|
106
|
+
host, pid, queues_raw = worker_id.split(':', 3)
|
107
|
+
queues = queues_raw.split(',')
|
65
108
|
worker = new(*queues)
|
109
|
+
worker.hostname = host
|
66
110
|
worker.to_s = worker_id
|
111
|
+
worker.pid = pid.to_i
|
67
112
|
worker
|
68
113
|
else
|
69
114
|
nil
|
@@ -78,7 +123,7 @@ module Resque
|
|
78
123
|
# Given a string worker id, return a boolean indicating whether the
|
79
124
|
# worker exists
|
80
125
|
def self.exists?(worker_id)
|
81
|
-
|
126
|
+
data_store.worker_exists?(worker_id)
|
82
127
|
end
|
83
128
|
|
84
129
|
# Workers should be initialized with an array of string queue
|
@@ -92,10 +137,60 @@ module Resque
|
|
92
137
|
# If passed a single "*", this Worker will operate on all queues
|
93
138
|
# in alphabetical order. Queues can be dynamically added or
|
94
139
|
# removed without needing to restart workers using this method.
|
140
|
+
#
|
141
|
+
# Workers should have `#prepare` called after they are initialized
|
142
|
+
# if you are running work on the worker.
|
95
143
|
def initialize(*queues)
|
96
|
-
@queues = queues.map { |queue| queue.to_s.strip }
|
97
144
|
@shutdown = nil
|
98
145
|
@paused = nil
|
146
|
+
@before_first_fork_hook_ran = false
|
147
|
+
|
148
|
+
@heartbeat_thread = nil
|
149
|
+
@heartbeat_thread_signal = nil
|
150
|
+
|
151
|
+
@last_state = :idle
|
152
|
+
|
153
|
+
verbose_value = ENV['LOGGING'] || ENV['VERBOSE']
|
154
|
+
self.verbose = verbose_value if verbose_value
|
155
|
+
self.very_verbose = ENV['VVERBOSE'] if ENV['VVERBOSE']
|
156
|
+
self.pre_shutdown_timeout = (ENV['RESQUE_PRE_SHUTDOWN_TIMEOUT'] || 0.0).to_f
|
157
|
+
self.term_timeout = (ENV['RESQUE_TERM_TIMEOUT'] || 4.0).to_f
|
158
|
+
self.term_child = ENV['TERM_CHILD']
|
159
|
+
self.graceful_term = ENV['GRACEFUL_TERM']
|
160
|
+
self.run_at_exit_hooks = ENV['RUN_AT_EXIT_HOOKS']
|
161
|
+
|
162
|
+
self.queues = queues
|
163
|
+
end
|
164
|
+
|
165
|
+
# Daemonizes the worker if ENV['BACKGROUND'] is set and writes
|
166
|
+
# the process id to ENV['PIDFILE'] if set. Should only be called
|
167
|
+
# once per worker.
|
168
|
+
def prepare
|
169
|
+
if ENV['BACKGROUND']
|
170
|
+
Process.daemon(true)
|
171
|
+
end
|
172
|
+
|
173
|
+
if ENV['PIDFILE']
|
174
|
+
File.open(ENV['PIDFILE'], 'w') { |f| f << pid }
|
175
|
+
end
|
176
|
+
|
177
|
+
self.reconnect if ENV['BACKGROUND']
|
178
|
+
end
|
179
|
+
|
180
|
+
WILDCARDS = ['*', '?', '{', '}', '[', ']'].freeze
|
181
|
+
|
182
|
+
def queues=(queues)
|
183
|
+
queues = (ENV["QUEUES"] || ENV['QUEUE']).to_s.split(',') if queues.empty?
|
184
|
+
queues = queues.map { |queue| queue.to_s.strip }
|
185
|
+
|
186
|
+
@skip_queues, @queues = queues.partition { |queue| queue.start_with?('!') }
|
187
|
+
@skip_queues.map! { |queue| queue[1..-1] }
|
188
|
+
|
189
|
+
# The behavior of `queues` is dependent on the value of `@has_dynamic_queues: if it's true, the method returns the result of filtering @queues with `glob_match`
|
190
|
+
# if it's false, the method returns @queues directly. Since `glob_match` will cause skipped queues to be filtered out, we want to make sure it's called if we have @skip_queues.any?
|
191
|
+
@has_dynamic_queues =
|
192
|
+
@skip_queues.any? || WILDCARDS.any? { |char| @queues.join.include?(char) }
|
193
|
+
|
99
194
|
validate_queues
|
100
195
|
end
|
101
196
|
|
@@ -109,6 +204,25 @@ module Resque
|
|
109
204
|
end
|
110
205
|
end
|
111
206
|
|
207
|
+
# Returns a list of queues to use when searching for a job.
|
208
|
+
# A splat ("*") means you want every queue (in alpha order) - this
|
209
|
+
# can be useful for dynamically adding new queues.
|
210
|
+
def queues
|
211
|
+
if @has_dynamic_queues
|
212
|
+
current_queues = Resque.queues
|
213
|
+
@queues.map { |queue| glob_match(current_queues, queue) }.flatten.uniq
|
214
|
+
else
|
215
|
+
@queues
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def glob_match(list, pattern)
|
220
|
+
list.select do |queue|
|
221
|
+
File.fnmatch?(pattern, queue) &&
|
222
|
+
@skip_queues.none? { |skip_pattern| File.fnmatch?(skip_pattern, queue) }
|
223
|
+
end.sort
|
224
|
+
end
|
225
|
+
|
112
226
|
# This is the main workhorse method. Called on a Worker instance,
|
113
227
|
# it begins the worker life cycle.
|
114
228
|
#
|
@@ -127,46 +241,47 @@ module Resque
|
|
127
241
|
# has completed processing. Useful for testing.
|
128
242
|
def work(interval = 5.0, &block)
|
129
243
|
interval = Float(interval)
|
130
|
-
$0 = "resque: Starting"
|
131
244
|
startup
|
132
245
|
|
133
246
|
loop do
|
134
247
|
break if shutdown?
|
135
248
|
|
136
|
-
|
137
|
-
|
138
|
-
job.worker = self
|
139
|
-
working_on job
|
140
|
-
|
141
|
-
if @child = fork(job)
|
142
|
-
srand # Reseeding
|
143
|
-
procline "Forked #{@child} at #{Time.now.to_i}"
|
144
|
-
begin
|
145
|
-
Process.waitpid(@child)
|
146
|
-
rescue SystemCallError
|
147
|
-
nil
|
148
|
-
end
|
149
|
-
else
|
150
|
-
unregister_signal_handlers if will_fork? && term_child
|
151
|
-
procline "Processing #{job.queue} since #{Time.now.to_i}"
|
152
|
-
reconnect
|
153
|
-
perform(job, &block)
|
154
|
-
exit! if will_fork?
|
155
|
-
end
|
156
|
-
|
157
|
-
done_working
|
158
|
-
@child = nil
|
159
|
-
else
|
249
|
+
unless work_one_job(&block)
|
250
|
+
state_change
|
160
251
|
break if interval.zero?
|
161
|
-
|
162
|
-
procline paused? ? "Paused" : "Waiting for #{
|
252
|
+
log_with_severity :debug, "Sleeping for #{interval} seconds"
|
253
|
+
procline paused? ? "Paused" : "Waiting for #{queues.join(',')}"
|
163
254
|
sleep interval
|
164
255
|
end
|
165
256
|
end
|
166
257
|
|
167
258
|
unregister_worker
|
259
|
+
run_hook :worker_exit
|
168
260
|
rescue Exception => exception
|
261
|
+
return if exception.class == SystemExit && !@child && run_at_exit_hooks
|
262
|
+
log_with_severity :error, "Failed to start worker : #{exception.inspect}"
|
169
263
|
unregister_worker(exception)
|
264
|
+
run_hook :worker_exit
|
265
|
+
end
|
266
|
+
|
267
|
+
def work_one_job(job = nil, &block)
|
268
|
+
return false if paused?
|
269
|
+
return false unless job ||= reserve
|
270
|
+
|
271
|
+
working_on job
|
272
|
+
procline "Processing #{job.queue} since #{Time.now.to_i} [#{job.payload_class_name}]"
|
273
|
+
|
274
|
+
log_with_severity :info, "got: #{job.inspect}"
|
275
|
+
job.worker = self
|
276
|
+
|
277
|
+
if fork_per_job?
|
278
|
+
perform_with_fork(job, &block)
|
279
|
+
else
|
280
|
+
perform(job, &block)
|
281
|
+
end
|
282
|
+
|
283
|
+
done_working
|
284
|
+
true
|
170
285
|
end
|
171
286
|
|
172
287
|
# DEPRECATED. Processes a single job. If none is given, it will
|
@@ -181,21 +296,34 @@ module Resque
|
|
181
296
|
done_working
|
182
297
|
end
|
183
298
|
|
299
|
+
# Reports the exception and marks the job as failed
|
300
|
+
def report_failed_job(job,exception)
|
301
|
+
log_with_severity :error, "#{job.inspect} failed: #{exception.inspect}"
|
302
|
+
begin
|
303
|
+
job.fail(exception)
|
304
|
+
rescue Object => exception
|
305
|
+
log_with_severity :error, "Received exception when reporting failure: #{exception.inspect}"
|
306
|
+
end
|
307
|
+
begin
|
308
|
+
failed!
|
309
|
+
rescue Object => exception
|
310
|
+
log_with_severity :error, "Received exception when increasing failed jobs counter (redis issue) : #{exception.inspect}"
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
|
184
315
|
# Processes a given job in the child.
|
185
316
|
def perform(job)
|
186
317
|
begin
|
187
|
-
|
318
|
+
if fork_per_job?
|
319
|
+
reconnect
|
320
|
+
run_hook :after_fork, job
|
321
|
+
end
|
188
322
|
job.perform
|
189
323
|
rescue Object => e
|
190
|
-
|
191
|
-
begin
|
192
|
-
job.fail(e)
|
193
|
-
rescue Object => e
|
194
|
-
log "Received exception when reporting failure: #{e.inspect}"
|
195
|
-
end
|
196
|
-
failed!
|
324
|
+
report_failed_job(job,e)
|
197
325
|
else
|
198
|
-
|
326
|
+
log_with_severity :info, "done: #{job.inspect}"
|
199
327
|
ensure
|
200
328
|
yield job if block_given?
|
201
329
|
end
|
@@ -205,17 +333,17 @@ module Resque
|
|
205
333
|
# nil if no job can be found.
|
206
334
|
def reserve
|
207
335
|
queues.each do |queue|
|
208
|
-
|
336
|
+
log_with_severity :debug, "Checking #{queue}"
|
209
337
|
if job = Resque.reserve(queue)
|
210
|
-
|
338
|
+
log_with_severity :debug, "Found job on #{queue}"
|
211
339
|
return job
|
212
340
|
end
|
213
341
|
end
|
214
342
|
|
215
343
|
nil
|
216
344
|
rescue Exception => e
|
217
|
-
|
218
|
-
|
345
|
+
log_with_severity :error, "Error reserving job: #{e.inspect}"
|
346
|
+
log_with_severity :error, e.backtrace.join("\n")
|
219
347
|
raise e
|
220
348
|
end
|
221
349
|
|
@@ -224,53 +352,26 @@ module Resque
|
|
224
352
|
def reconnect
|
225
353
|
tries = 0
|
226
354
|
begin
|
227
|
-
|
355
|
+
data_store.reconnect
|
228
356
|
rescue Redis::BaseConnectionError
|
229
357
|
if (tries += 1) <= 3
|
230
|
-
|
358
|
+
log_with_severity :error, "Error reconnecting to Redis; retrying"
|
231
359
|
sleep(tries)
|
232
360
|
retry
|
233
361
|
else
|
234
|
-
|
362
|
+
log_with_severity :error, "Error reconnecting to Redis; quitting"
|
235
363
|
raise
|
236
364
|
end
|
237
365
|
end
|
238
366
|
end
|
239
367
|
|
240
|
-
# Returns a list of queues to use when searching for a job.
|
241
|
-
# A splat ("*") means you want every queue (in alpha order) - this
|
242
|
-
# can be useful for dynamically adding new queues.
|
243
|
-
def queues
|
244
|
-
@queues.map {|queue| queue == "*" ? Resque.queues.sort : queue }.flatten.uniq
|
245
|
-
end
|
246
|
-
|
247
|
-
# Not every platform supports fork. Here we do our magic to
|
248
|
-
# determine if yours does.
|
249
|
-
def fork(job)
|
250
|
-
return if @cant_fork
|
251
|
-
|
252
|
-
# Only run before_fork hooks if we're actually going to fork
|
253
|
-
# (after checking @cant_fork)
|
254
|
-
run_hook :before_fork, job
|
255
|
-
|
256
|
-
begin
|
257
|
-
# IronRuby doesn't support `Kernel.fork` yet
|
258
|
-
if Kernel.respond_to?(:fork)
|
259
|
-
Kernel.fork if will_fork?
|
260
|
-
else
|
261
|
-
raise NotImplementedError
|
262
|
-
end
|
263
|
-
rescue NotImplementedError
|
264
|
-
@cant_fork = true
|
265
|
-
nil
|
266
|
-
end
|
267
|
-
end
|
268
|
-
|
269
368
|
# Runs all the methods needed when a worker begins its lifecycle.
|
270
369
|
def startup
|
271
|
-
|
370
|
+
$0 = "resque: Starting"
|
371
|
+
|
272
372
|
enable_gc_optimizations
|
273
373
|
register_signal_handlers
|
374
|
+
start_heartbeat
|
274
375
|
prune_dead_workers
|
275
376
|
run_hook :before_first_fork
|
276
377
|
register_worker
|
@@ -297,7 +398,7 @@ module Resque
|
|
297
398
|
# USR2: Don't process any new jobs
|
298
399
|
# CONT: Start processing jobs again after a USR2
|
299
400
|
def register_signal_handlers
|
300
|
-
trap('TERM') { shutdown! }
|
401
|
+
trap('TERM') { graceful_term ? shutdown : shutdown! }
|
301
402
|
trap('INT') { shutdown! }
|
302
403
|
|
303
404
|
begin
|
@@ -310,14 +411,21 @@ module Resque
|
|
310
411
|
trap('USR2') { pause_processing }
|
311
412
|
trap('CONT') { unpause_processing }
|
312
413
|
rescue ArgumentError
|
313
|
-
warn "Signals QUIT, USR1, USR2, and/or CONT not supported."
|
414
|
+
log_with_severity :warn, "Signals QUIT, USR1, USR2, and/or CONT not supported."
|
314
415
|
end
|
315
416
|
|
316
|
-
|
417
|
+
log_with_severity :debug, "Registered signals"
|
317
418
|
end
|
318
419
|
|
319
420
|
def unregister_signal_handlers
|
320
|
-
trap('TERM')
|
421
|
+
trap('TERM') do
|
422
|
+
trap('TERM') do
|
423
|
+
# Ignore subsequent term signals
|
424
|
+
end
|
425
|
+
|
426
|
+
raise TermException.new("SIGTERM")
|
427
|
+
end
|
428
|
+
|
321
429
|
trap('INT', 'DEFAULT')
|
322
430
|
|
323
431
|
begin
|
@@ -331,15 +439,24 @@ module Resque
|
|
331
439
|
# Schedule this worker for shutdown. Will finish processing the
|
332
440
|
# current job.
|
333
441
|
def shutdown
|
334
|
-
|
442
|
+
log_with_severity :info, 'Exiting...'
|
335
443
|
@shutdown = true
|
336
444
|
end
|
337
445
|
|
338
446
|
# Kill the child and shutdown immediately.
|
447
|
+
# If not forking, abort this process.
|
339
448
|
def shutdown!
|
340
449
|
shutdown
|
341
450
|
if term_child
|
342
|
-
|
451
|
+
if fork_per_job?
|
452
|
+
new_kill_child
|
453
|
+
else
|
454
|
+
# Raise TermException in the same process
|
455
|
+
trap('TERM') do
|
456
|
+
# ignore subsequent terms
|
457
|
+
end
|
458
|
+
raise TermException.new("SIGTERM")
|
459
|
+
end
|
343
460
|
else
|
344
461
|
kill_child
|
345
462
|
end
|
@@ -354,54 +471,130 @@ module Resque
|
|
354
471
|
# is processing will not be completed.
|
355
472
|
def kill_child
|
356
473
|
if @child
|
357
|
-
|
358
|
-
if
|
474
|
+
log_with_severity :debug, "Killing child at #{@child}"
|
475
|
+
if `ps -o pid,state -p #{@child}`
|
359
476
|
Process.kill("KILL", @child) rescue nil
|
360
477
|
else
|
361
|
-
|
478
|
+
log_with_severity :debug, "Child #{@child} not found, restarting."
|
362
479
|
shutdown
|
363
480
|
end
|
364
481
|
end
|
365
482
|
end
|
366
483
|
|
484
|
+
def heartbeat
|
485
|
+
data_store.heartbeat(self)
|
486
|
+
end
|
487
|
+
|
488
|
+
def remove_heartbeat
|
489
|
+
data_store.remove_heartbeat(self)
|
490
|
+
end
|
491
|
+
|
492
|
+
def heartbeat!(time = data_store.server_time)
|
493
|
+
data_store.heartbeat!(self, time)
|
494
|
+
end
|
495
|
+
|
496
|
+
def self.all_heartbeats
|
497
|
+
data_store.all_heartbeats
|
498
|
+
end
|
499
|
+
|
500
|
+
# Returns a list of workers that have sent a heartbeat in the past, but which
|
501
|
+
# already expired (does NOT include workers that have never sent a heartbeat at all).
|
502
|
+
def self.all_workers_with_expired_heartbeats
|
503
|
+
# Use `Worker.all_heartbeats` instead of `Worker.all`
|
504
|
+
# to prune workers which haven't been registered but have set a heartbeat.
|
505
|
+
# https://github.com/resque/resque/pull/1751
|
506
|
+
heartbeats = Worker.all_heartbeats
|
507
|
+
now = data_store.server_time
|
508
|
+
|
509
|
+
heartbeats.select do |id, heartbeat|
|
510
|
+
if heartbeat
|
511
|
+
seconds_since_heartbeat = (now - Time.parse(heartbeat)).to_i
|
512
|
+
seconds_since_heartbeat > Resque.prune_interval
|
513
|
+
else
|
514
|
+
false
|
515
|
+
end
|
516
|
+
end.each_key.map do |id|
|
517
|
+
# skip_exists must be true to include not registered workers
|
518
|
+
find(id, :skip_exists => true)
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
def start_heartbeat
|
523
|
+
remove_heartbeat
|
524
|
+
|
525
|
+
@heartbeat_thread_signal = Resque::ThreadSignal.new
|
526
|
+
|
527
|
+
@heartbeat_thread = Thread.new do
|
528
|
+
loop do
|
529
|
+
heartbeat!
|
530
|
+
signaled = @heartbeat_thread_signal.wait_for_signal(Resque.heartbeat_interval)
|
531
|
+
break if signaled
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
@@all_heartbeat_threads << @heartbeat_thread
|
536
|
+
end
|
537
|
+
|
367
538
|
# Kills the forked child immediately with minimal remorse. The job it
|
368
539
|
# is processing will not be completed. Send the child a TERM signal,
|
369
|
-
# wait
|
540
|
+
# wait <term_timeout> seconds, and then a KILL signal if it has not quit
|
541
|
+
# If pre_shutdown_timeout has been set to a positive number, it will allow
|
542
|
+
# the child that many seconds before sending the aforementioned TERM and KILL.
|
370
543
|
def new_kill_child
|
371
544
|
if @child
|
372
|
-
unless
|
373
|
-
|
545
|
+
unless child_already_exited?
|
546
|
+
if pre_shutdown_timeout && pre_shutdown_timeout > 0.0
|
547
|
+
log_with_severity :debug, "Waiting #{pre_shutdown_timeout.to_f}s for child process to exit"
|
548
|
+
return if wait_for_child_exit(pre_shutdown_timeout)
|
549
|
+
end
|
550
|
+
|
551
|
+
log_with_severity :debug, "Sending TERM signal to child #{@child}"
|
374
552
|
Process.kill("TERM", @child)
|
375
|
-
|
376
|
-
|
377
|
-
return
|
553
|
+
|
554
|
+
if wait_for_child_exit(term_timeout)
|
555
|
+
return
|
556
|
+
else
|
557
|
+
log_with_severity :debug, "Sending KILL signal to child #{@child}"
|
558
|
+
Process.kill("KILL", @child)
|
378
559
|
end
|
379
|
-
log! "Sending KILL signal to child #{@child}"
|
380
|
-
Process.kill("KILL", @child)
|
381
560
|
else
|
382
|
-
|
561
|
+
log_with_severity :debug, "Child #{@child} already quit."
|
383
562
|
end
|
384
563
|
end
|
385
564
|
rescue SystemCallError
|
386
|
-
|
565
|
+
log_with_severity :error, "Child #{@child} already quit and reaped."
|
566
|
+
end
|
567
|
+
|
568
|
+
def child_already_exited?
|
569
|
+
Process.waitpid(@child, Process::WNOHANG)
|
570
|
+
end
|
571
|
+
|
572
|
+
def wait_for_child_exit(timeout)
|
573
|
+
(timeout * 10).round.times do |i|
|
574
|
+
sleep(0.1)
|
575
|
+
return true if child_already_exited?
|
576
|
+
end
|
577
|
+
false
|
387
578
|
end
|
388
579
|
|
389
580
|
# are we paused?
|
390
581
|
def paused?
|
391
|
-
@paused
|
582
|
+
@paused || redis.get('pause-all-workers').to_s.strip.downcase == 'true'
|
392
583
|
end
|
393
584
|
|
394
585
|
# Stop processing jobs after the current one has completed (if we're
|
395
586
|
# currently running one).
|
396
587
|
def pause_processing
|
397
|
-
|
588
|
+
log_with_severity :info, "USR2 received; pausing job processing"
|
589
|
+
run_hook :before_pause, self
|
398
590
|
@paused = true
|
399
591
|
end
|
400
592
|
|
401
593
|
# Start processing jobs again after a pause
|
402
594
|
def unpause_processing
|
403
|
-
|
595
|
+
log_with_severity :info, "CONT received; resuming job processing"
|
404
596
|
@paused = false
|
597
|
+
run_hook :after_pause, self
|
405
598
|
end
|
406
599
|
|
407
600
|
# Looks for any workers which should be running on this server
|
@@ -415,13 +608,45 @@ module Resque
|
|
415
608
|
# By checking the current Redis state against the actual
|
416
609
|
# environment, we can determine if Redis is old and clean it up a bit.
|
417
610
|
def prune_dead_workers
|
611
|
+
return unless data_store.acquire_pruning_dead_worker_lock(self, Resque.heartbeat_interval)
|
612
|
+
|
418
613
|
all_workers = Worker.all
|
419
|
-
|
614
|
+
|
615
|
+
known_workers = worker_pids
|
616
|
+
all_workers_with_expired_heartbeats = Worker.all_workers_with_expired_heartbeats
|
617
|
+
all_workers_with_expired_heartbeats.each do |worker|
|
618
|
+
# If the worker hasn't sent a heartbeat, remove it from the registry.
|
619
|
+
#
|
620
|
+
# If the worker hasn't ever sent a heartbeat, we won't remove it since
|
621
|
+
# the first heartbeat is sent before the worker is registred it means
|
622
|
+
# that this is a worker that doesn't support heartbeats, e.g., another
|
623
|
+
# client library or an older version of Resque. We won't touch these.
|
624
|
+
log_with_severity :info, "Pruning dead worker: #{worker}"
|
625
|
+
|
626
|
+
job_class = worker.job(false)['payload']['class'] rescue nil
|
627
|
+
worker.unregister_worker(PruneDeadWorkerDirtyExit.new(worker.to_s, job_class))
|
628
|
+
end
|
629
|
+
|
420
630
|
all_workers.each do |worker|
|
421
|
-
|
631
|
+
if all_workers_with_expired_heartbeats.include?(worker)
|
632
|
+
next
|
633
|
+
end
|
634
|
+
|
635
|
+
host, pid, worker_queues_raw = worker.id.split(':')
|
636
|
+
worker_queues = worker_queues_raw.split(",")
|
637
|
+
unless @queues.include?("*") || (worker_queues.to_set == @queues.to_set)
|
638
|
+
# If the worker we are trying to prune does not belong to the queues
|
639
|
+
# we are listening to, we should not touch it.
|
640
|
+
# Attempt to prune a worker from different queues may easily result in
|
641
|
+
# an unknown class exception, since that worker could easily be even
|
642
|
+
# written in different language.
|
643
|
+
next
|
644
|
+
end
|
645
|
+
|
422
646
|
next unless host == hostname
|
423
647
|
next if known_workers.include?(pid)
|
424
|
-
|
648
|
+
|
649
|
+
log_with_severity :debug, "Pruning dead worker: #{worker}"
|
425
650
|
worker.unregister_worker
|
426
651
|
end
|
427
652
|
end
|
@@ -429,18 +654,29 @@ module Resque
|
|
429
654
|
# Registers ourself as a worker. Useful when entering the worker
|
430
655
|
# lifecycle on startup.
|
431
656
|
def register_worker
|
432
|
-
|
433
|
-
started!
|
657
|
+
data_store.register_worker(self)
|
434
658
|
end
|
435
659
|
|
436
660
|
# Runs a named hook, passing along any arguments.
|
437
661
|
def run_hook(name, *args)
|
438
|
-
|
439
|
-
|
662
|
+
hooks = Resque.send(name)
|
663
|
+
return if hooks.empty?
|
664
|
+
return if name == :before_first_fork && @before_first_fork_hook_ran
|
665
|
+
msg = "Running #{name} hooks"
|
440
666
|
msg << " with #{args.inspect}" if args.any?
|
441
|
-
|
667
|
+
log_with_severity :info, msg
|
668
|
+
|
669
|
+
hooks.each do |hook|
|
670
|
+
args.any? ? hook.call(*args) : hook.call
|
671
|
+
@before_first_fork_hook_ran = true if name == :before_first_fork
|
672
|
+
end
|
673
|
+
end
|
442
674
|
|
443
|
-
|
675
|
+
def kill_background_threads
|
676
|
+
if @heartbeat_thread
|
677
|
+
@heartbeat_thread_signal.signal
|
678
|
+
@heartbeat_thread.join
|
679
|
+
end
|
444
680
|
end
|
445
681
|
|
446
682
|
# Unregisters ourself as a worker. Useful when shutting down.
|
@@ -452,15 +688,28 @@ module Resque
|
|
452
688
|
# Ensure the proper worker is attached to this job, even if
|
453
689
|
# it's not the precise instance that died.
|
454
690
|
job.worker = self
|
455
|
-
|
691
|
+
begin
|
692
|
+
job.fail(exception || DirtyExit.new("Job still being processed"))
|
693
|
+
rescue RuntimeError => e
|
694
|
+
log_with_severity :error, e.message
|
695
|
+
end
|
456
696
|
end
|
457
697
|
|
458
|
-
|
459
|
-
redis.del("worker:#{self}")
|
460
|
-
redis.del("worker:#{self}:started")
|
698
|
+
kill_background_threads
|
461
699
|
|
462
|
-
|
463
|
-
|
700
|
+
data_store.unregister_worker(self) do |**opts|
|
701
|
+
Stat.clear("processed:#{self}", **opts)
|
702
|
+
Stat.clear("failed:#{self}", **opts)
|
703
|
+
end
|
704
|
+
rescue Exception => exception_while_unregistering
|
705
|
+
message = exception_while_unregistering.message
|
706
|
+
if exception
|
707
|
+
message += "\nOriginal Exception (#{exception.class}): #{exception.message}"
|
708
|
+
message += "\n #{exception.backtrace.join(" \n")}" if exception.backtrace
|
709
|
+
end
|
710
|
+
fail(exception_while_unregistering.class,
|
711
|
+
message,
|
712
|
+
exception_while_unregistering.backtrace)
|
464
713
|
end
|
465
714
|
|
466
715
|
# Given a job, tells Redis we're working on it. Useful for seeing
|
@@ -468,16 +717,26 @@ module Resque
|
|
468
717
|
def working_on(job)
|
469
718
|
data = encode \
|
470
719
|
:queue => job.queue,
|
471
|
-
:run_at => Time.now.
|
720
|
+
:run_at => Time.now.utc.iso8601,
|
472
721
|
:payload => job.payload
|
473
|
-
|
722
|
+
data_store.set_worker_payload(self,data)
|
723
|
+
state_change
|
474
724
|
end
|
475
725
|
|
476
726
|
# Called when we are done working - clears our `working_on` state
|
477
727
|
# and tells Redis we processed a job.
|
478
728
|
def done_working
|
479
|
-
|
480
|
-
|
729
|
+
data_store.worker_done_working(self) do |**opts|
|
730
|
+
processed!(**opts)
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
def state_change
|
735
|
+
current_state = state
|
736
|
+
if current_state != @last_state
|
737
|
+
run_hook :queue_empty if current_state == :idle
|
738
|
+
@last_state = current_state
|
739
|
+
end
|
481
740
|
end
|
482
741
|
|
483
742
|
# How many jobs has this worker processed? Returns an int.
|
@@ -486,9 +745,9 @@ module Resque
|
|
486
745
|
end
|
487
746
|
|
488
747
|
# Tell Redis we've processed a job.
|
489
|
-
def processed!
|
490
|
-
Stat
|
491
|
-
Stat
|
748
|
+
def processed!(**opts)
|
749
|
+
Stat.incr("processed", 1, **opts)
|
750
|
+
Stat.incr("processed:#{self}", 1, **opts)
|
492
751
|
end
|
493
752
|
|
494
753
|
# How many failed jobs has this worker seen? Returns an int.
|
@@ -504,18 +763,20 @@ module Resque
|
|
504
763
|
|
505
764
|
# What time did this worker start? Returns an instance of `Time`
|
506
765
|
def started
|
507
|
-
|
766
|
+
data_store.worker_start_time(self)
|
508
767
|
end
|
509
768
|
|
510
769
|
# Tell Redis we've started
|
511
770
|
def started!
|
512
|
-
|
771
|
+
data_store.worker_started(self)
|
513
772
|
end
|
514
773
|
|
515
774
|
# Returns a hash explaining the Job we're currently processing, if any.
|
516
|
-
def job
|
517
|
-
|
775
|
+
def job(reload = true)
|
776
|
+
@job = nil if reload
|
777
|
+
@job ||= decode(data_store.get_worker_payload(self)) || {}
|
518
778
|
end
|
779
|
+
attr_writer :job
|
519
780
|
alias_method :processing, :job
|
520
781
|
|
521
782
|
# Boolean - true if working, false if not
|
@@ -527,15 +788,16 @@ module Resque
|
|
527
788
|
def idle?
|
528
789
|
state == :idle
|
529
790
|
end
|
530
|
-
|
531
|
-
def
|
532
|
-
|
791
|
+
|
792
|
+
def fork_per_job?
|
793
|
+
return @fork_per_job if defined?(@fork_per_job)
|
794
|
+
@fork_per_job = ENV["FORK_PER_JOB"] != 'false' && Kernel.respond_to?(:fork)
|
533
795
|
end
|
534
796
|
|
535
797
|
# Returns a symbol representing the current worker state,
|
536
798
|
# which can be either :working or :idle
|
537
799
|
def state
|
538
|
-
|
800
|
+
data_store.get_worker_payload(self) ? :working : :idle
|
539
801
|
end
|
540
802
|
|
541
803
|
# Is this worker the same as another worker?
|
@@ -550,18 +812,18 @@ module Resque
|
|
550
812
|
# The string representation is the same as the id for this worker
|
551
813
|
# instance. Can be used with `Worker.find`.
|
552
814
|
def to_s
|
553
|
-
@to_s ||= "#{hostname}:#{
|
815
|
+
@to_s ||= "#{hostname}:#{pid}:#{@queues.join(',')}"
|
554
816
|
end
|
555
817
|
alias_method :id, :to_s
|
556
818
|
|
557
|
-
# chomp'd hostname of this machine
|
819
|
+
# chomp'd hostname of this worker's machine
|
558
820
|
def hostname
|
559
|
-
@hostname ||=
|
821
|
+
@hostname ||= Socket.gethostname
|
560
822
|
end
|
561
823
|
|
562
824
|
# Returns Integer PID of running worker
|
563
825
|
def pid
|
564
|
-
Process.pid
|
826
|
+
@pid ||= Process.pid
|
565
827
|
end
|
566
828
|
|
567
829
|
# Returns an Array of string pids of all the other workers on this
|
@@ -569,17 +831,24 @@ module Resque
|
|
569
831
|
def worker_pids
|
570
832
|
if RUBY_PLATFORM =~ /solaris/
|
571
833
|
solaris_worker_pids
|
834
|
+
elsif RUBY_PLATFORM =~ /mingw32/
|
835
|
+
windows_worker_pids
|
572
836
|
else
|
573
837
|
linux_worker_pids
|
574
838
|
end
|
575
839
|
end
|
576
840
|
|
577
|
-
# Find Resque worker pids on Linux and OS X.
|
578
|
-
#
|
579
841
|
# Returns an Array of string pids of all the other workers on this
|
580
842
|
# machine. Useful when pruning dead workers on startup.
|
843
|
+
def windows_worker_pids
|
844
|
+
tasklist_output = `tasklist /FI "IMAGENAME eq ruby.exe" /FO list`.encode("UTF-8", Encoding.locale_charmap)
|
845
|
+
tasklist_output.split($/).select { |line| line =~ /^PID:/ }.collect { |line| line.gsub(/PID:\s+/, '') }
|
846
|
+
end
|
847
|
+
|
848
|
+
# Find Resque worker pids on Linux and OS X.
|
849
|
+
#
|
581
850
|
def linux_worker_pids
|
582
|
-
`ps -A -o pid,command | grep "[r]esque" | grep -v "resque-web"`.split("\n").map do |line|
|
851
|
+
`ps -A -o pid,command | grep -E "[r]esque:work|[r]esque:\sStarting|[r]esque-[0-9]" | grep -v "resque-web"`.split("\n").map do |line|
|
583
852
|
line.split(' ')[0]
|
584
853
|
end
|
585
854
|
end
|
@@ -592,7 +861,7 @@ module Resque
|
|
592
861
|
`ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
|
593
862
|
real_pid = line.split(' ')[0]
|
594
863
|
pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
|
595
|
-
if pargs_command.split(':')[1] == " resque-#{Resque::
|
864
|
+
if pargs_command.split(':')[1] == " resque-#{Resque::VERSION}"
|
596
865
|
real_pid
|
597
866
|
end
|
598
867
|
end.compact
|
@@ -600,25 +869,80 @@ module Resque
|
|
600
869
|
|
601
870
|
# Given a string, sets the procline ($0) and logs.
|
602
871
|
# Procline is always in the format of:
|
603
|
-
#
|
872
|
+
# RESQUE_PROCLINE_PREFIXresque-VERSION: STRING
|
604
873
|
def procline(string)
|
605
|
-
$0 = "resque-#{Resque::
|
606
|
-
|
874
|
+
$0 = "#{ENV['RESQUE_PROCLINE_PREFIX']}resque-#{Resque::VERSION}: #{string}"
|
875
|
+
log_with_severity :debug, $0
|
607
876
|
end
|
608
877
|
|
609
|
-
# Log a message to STDOUT if we are verbose or very_verbose.
|
610
878
|
def log(message)
|
611
|
-
|
612
|
-
puts "*** #{message}"
|
613
|
-
elsif very_verbose
|
614
|
-
time = Time.now.strftime('%H:%M:%S %Y-%m-%d')
|
615
|
-
puts "** [#{time}] #$$: #{message}"
|
616
|
-
end
|
879
|
+
info(message)
|
617
880
|
end
|
618
881
|
|
619
|
-
# Logs a very verbose message to STDOUT.
|
620
882
|
def log!(message)
|
621
|
-
|
883
|
+
debug(message)
|
884
|
+
end
|
885
|
+
|
886
|
+
|
887
|
+
attr_reader :verbose, :very_verbose
|
888
|
+
|
889
|
+
def verbose=(value);
|
890
|
+
if value && !very_verbose
|
891
|
+
Resque.logger.formatter = VerboseFormatter.new
|
892
|
+
Resque.logger.level = Logger::INFO
|
893
|
+
elsif !value
|
894
|
+
Resque.logger.formatter = QuietFormatter.new
|
895
|
+
end
|
896
|
+
|
897
|
+
@verbose = value
|
898
|
+
end
|
899
|
+
|
900
|
+
def very_verbose=(value)
|
901
|
+
if value
|
902
|
+
Resque.logger.formatter = VeryVerboseFormatter.new
|
903
|
+
Resque.logger.level = Logger::DEBUG
|
904
|
+
elsif !value && verbose
|
905
|
+
Resque.logger.formatter = VerboseFormatter.new
|
906
|
+
Resque.logger.level = Logger::INFO
|
907
|
+
else
|
908
|
+
Resque.logger.formatter = QuietFormatter.new
|
909
|
+
end
|
910
|
+
|
911
|
+
@very_verbose = value
|
912
|
+
end
|
913
|
+
|
914
|
+
private
|
915
|
+
|
916
|
+
def perform_with_fork(job, &block)
|
917
|
+
run_hook :before_fork, job
|
918
|
+
|
919
|
+
begin
|
920
|
+
@child = fork do
|
921
|
+
unregister_signal_handlers if term_child
|
922
|
+
perform(job, &block)
|
923
|
+
exit! unless run_at_exit_hooks
|
924
|
+
end
|
925
|
+
rescue NotImplementedError
|
926
|
+
@fork_per_job = false
|
927
|
+
perform(job, &block)
|
928
|
+
return
|
929
|
+
end
|
930
|
+
|
931
|
+
srand # Reseeding
|
932
|
+
procline "Forked #{@child} at #{Time.now.to_i}"
|
933
|
+
|
934
|
+
begin
|
935
|
+
Process.waitpid(@child)
|
936
|
+
rescue SystemCallError
|
937
|
+
nil
|
938
|
+
end
|
939
|
+
|
940
|
+
job.fail(DirtyExit.new("Child process received unhandled signal #{$?}", $?)) if $?.signaled?
|
941
|
+
@child = nil
|
942
|
+
end
|
943
|
+
|
944
|
+
def log_with_severity(severity, message)
|
945
|
+
Logging.log(severity, message)
|
622
946
|
end
|
623
947
|
end
|
624
948
|
end
|