resque 1.23.0 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/HISTORY.md +271 -0
- data/README.markdown +454 -484
- data/Rakefile +4 -17
- data/bin/resque-web +10 -22
- data/lib/resque/data_store.rb +335 -0
- data/lib/resque/errors.rb +15 -1
- data/lib/resque/failure/airbrake.rb +32 -4
- data/lib/resque/failure/base.rb +16 -7
- data/lib/resque/failure/multiple.rb +26 -8
- data/lib/resque/failure/redis.rb +92 -15
- data/lib/resque/failure/redis_multi_queue.rb +104 -0
- data/lib/resque/failure.rb +62 -32
- data/lib/resque/helpers.rb +11 -57
- data/lib/resque/job.rb +79 -12
- data/lib/resque/log_formatters/quiet_formatter.rb +7 -0
- data/lib/resque/log_formatters/verbose_formatter.rb +7 -0
- data/lib/resque/log_formatters/very_verbose_formatter.rb +8 -0
- data/lib/resque/logging.rb +18 -0
- data/lib/resque/plugin.rb +22 -10
- data/lib/resque/railtie.rb +10 -0
- data/lib/resque/server/public/jquery-3.6.0.min.js +2 -0
- data/lib/resque/server/public/jquery.relatize_date.js +4 -4
- data/lib/resque/server/public/main.js +3 -0
- data/lib/resque/server/public/ranger.js +16 -8
- data/lib/resque/server/public/style.css +13 -8
- data/lib/resque/server/views/error.erb +1 -1
- data/lib/resque/server/views/failed.erb +27 -59
- data/lib/resque/server/views/failed_job.erb +50 -0
- data/lib/resque/server/views/failed_queues_overview.erb +24 -0
- data/lib/resque/server/views/job_class.erb +8 -0
- data/lib/resque/server/views/key_sets.erb +2 -4
- data/lib/resque/server/views/key_string.erb +1 -1
- data/lib/resque/server/views/layout.erb +7 -6
- data/lib/resque/server/views/next_more.erb +22 -10
- data/lib/resque/server/views/processing.erb +2 -0
- data/lib/resque/server/views/queues.erb +22 -13
- data/lib/resque/server/views/stats.erb +5 -5
- data/lib/resque/server/views/workers.erb +4 -4
- data/lib/resque/server/views/working.erb +10 -11
- data/lib/resque/server.rb +51 -108
- data/lib/resque/server_helper.rb +185 -0
- data/lib/resque/stat.rb +19 -7
- data/lib/resque/tasks.rb +26 -25
- data/lib/resque/thread_signal.rb +24 -0
- data/lib/resque/vendor/utf8_util.rb +2 -8
- data/lib/resque/version.rb +1 -1
- data/lib/resque/web_runner.rb +374 -0
- data/lib/resque/worker.rb +487 -163
- data/lib/resque.rb +332 -52
- data/lib/tasks/redis.rake +11 -11
- metadata +169 -149
- data/lib/resque/failure/hoptoad.rb +0 -33
- data/lib/resque/failure/thoughtbot.rb +0 -33
- data/lib/resque/server/public/jquery-1.3.2.min.js +0 -19
- data/lib/resque/server/test_helper.rb +0 -19
- data/lib/resque/vendor/utf8_util/utf8_util_18.rb +0 -91
- data/lib/resque/vendor/utf8_util/utf8_util_19.rb +0 -5
- data/test/airbrake_test.rb +0 -27
- data/test/hoptoad_test.rb +0 -26
- data/test/job_hooks_test.rb +0 -464
- data/test/job_plugins_test.rb +0 -230
- data/test/plugin_test.rb +0 -116
- data/test/redis-test-cluster.conf +0 -115
- data/test/redis-test.conf +0 -115
- data/test/resque-web_test.rb +0 -59
- data/test/resque_failure_redis_test.rb +0 -19
- data/test/resque_test.rb +0 -278
- data/test/test_helper.rb +0 -178
- data/test/worker_test.rb +0 -657
data/lib/resque/worker.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'set'
|
3
|
+
require 'redis/distributed'
|
4
|
+
|
1
5
|
module Resque
|
2
6
|
# A Resque Worker processes jobs. On platforms that support fork(2),
|
3
7
|
# the worker will fork off a child to process each job. This ensures
|
@@ -9,27 +13,63 @@ module Resque
|
|
9
13
|
class Worker
|
10
14
|
include Resque::Helpers
|
11
15
|
extend Resque::Helpers
|
16
|
+
include Resque::Logging
|
17
|
+
|
18
|
+
@@all_heartbeat_threads = []
|
19
|
+
def self.kill_all_heartbeat_threads
|
20
|
+
@@all_heartbeat_threads.each(&:kill).each(&:join)
|
21
|
+
@@all_heartbeat_threads = []
|
22
|
+
end
|
23
|
+
|
24
|
+
def redis
|
25
|
+
Resque.redis
|
26
|
+
end
|
27
|
+
alias :data_store :redis
|
28
|
+
|
29
|
+
def self.redis
|
30
|
+
Resque.redis
|
31
|
+
end
|
12
32
|
|
13
|
-
|
14
|
-
|
33
|
+
def self.data_store
|
34
|
+
self.redis
|
35
|
+
end
|
15
36
|
|
16
|
-
#
|
17
|
-
|
37
|
+
# Given a Ruby object, returns a string suitable for storage in a
|
38
|
+
# queue.
|
39
|
+
def encode(object)
|
40
|
+
Resque.encode(object)
|
41
|
+
end
|
18
42
|
|
19
|
-
#
|
20
|
-
|
21
|
-
|
43
|
+
# Given a string, returns a Ruby object.
|
44
|
+
def decode(object)
|
45
|
+
Resque.decode(object)
|
46
|
+
end
|
22
47
|
|
23
48
|
attr_accessor :term_timeout
|
24
49
|
|
50
|
+
attr_accessor :pre_shutdown_timeout
|
51
|
+
|
52
|
+
attr_accessor :term_child_signal
|
53
|
+
|
25
54
|
# decide whether to use new_kill_child logic
|
26
55
|
attr_accessor :term_child
|
27
56
|
|
57
|
+
# should term kill workers gracefully (vs. immediately)
|
58
|
+
# Makes SIGTERM work like SIGQUIT
|
59
|
+
attr_accessor :graceful_term
|
60
|
+
|
61
|
+
# When set to true, forked workers will exit with `exit`, calling any `at_exit` code handlers that have been
|
62
|
+
# registered in the application. Otherwise, forked workers exit with `exit!`
|
63
|
+
attr_accessor :run_at_exit_hooks
|
64
|
+
|
65
|
+
attr_writer :fork_per_job
|
66
|
+
attr_writer :hostname
|
28
67
|
attr_writer :to_s
|
68
|
+
attr_writer :pid
|
29
69
|
|
30
70
|
# Returns an array of all worker objects.
|
31
71
|
def self.all
|
32
|
-
|
72
|
+
data_store.worker_ids.map { |id| find(id, :skip_exists => true) }.compact
|
33
73
|
end
|
34
74
|
|
35
75
|
# Returns an array of all worker objects currently processing
|
@@ -38,32 +78,37 @@ module Resque
|
|
38
78
|
names = all
|
39
79
|
return [] unless names.any?
|
40
80
|
|
41
|
-
names.map! { |name| "worker:#{name}" }
|
42
|
-
|
43
81
|
reportedly_working = {}
|
44
82
|
|
45
83
|
begin
|
46
|
-
reportedly_working =
|
84
|
+
reportedly_working = data_store.workers_map(names).reject do |key, value|
|
47
85
|
value.nil? || value.empty?
|
48
86
|
end
|
49
87
|
rescue Redis::Distributed::CannotDistribute
|
50
88
|
names.each do |name|
|
51
|
-
value =
|
89
|
+
value = data_store.get_worker_payload(name)
|
52
90
|
reportedly_working[name] = value unless value.nil? || value.empty?
|
53
91
|
end
|
54
92
|
end
|
55
93
|
|
56
94
|
reportedly_working.keys.map do |key|
|
57
|
-
find
|
95
|
+
worker = find(key.sub("worker:", ''), :skip_exists => true)
|
96
|
+
worker.job = worker.decode(reportedly_working[key])
|
97
|
+
worker
|
58
98
|
end.compact
|
59
99
|
end
|
60
100
|
|
61
101
|
# Returns a single worker object. Accepts a string id.
|
62
|
-
def self.find(worker_id)
|
63
|
-
|
64
|
-
|
102
|
+
def self.find(worker_id, options = {})
|
103
|
+
skip_exists = options[:skip_exists]
|
104
|
+
|
105
|
+
if skip_exists || exists?(worker_id)
|
106
|
+
host, pid, queues_raw = worker_id.split(':', 3)
|
107
|
+
queues = queues_raw.split(',')
|
65
108
|
worker = new(*queues)
|
109
|
+
worker.hostname = host
|
66
110
|
worker.to_s = worker_id
|
111
|
+
worker.pid = pid.to_i
|
67
112
|
worker
|
68
113
|
else
|
69
114
|
nil
|
@@ -78,7 +123,7 @@ module Resque
|
|
78
123
|
# Given a string worker id, return a boolean indicating whether the
|
79
124
|
# worker exists
|
80
125
|
def self.exists?(worker_id)
|
81
|
-
|
126
|
+
data_store.worker_exists?(worker_id)
|
82
127
|
end
|
83
128
|
|
84
129
|
# Workers should be initialized with an array of string queue
|
@@ -92,10 +137,60 @@ module Resque
|
|
92
137
|
# If passed a single "*", this Worker will operate on all queues
|
93
138
|
# in alphabetical order. Queues can be dynamically added or
|
94
139
|
# removed without needing to restart workers using this method.
|
140
|
+
#
|
141
|
+
# Workers should have `#prepare` called after they are initialized
|
142
|
+
# if you are running work on the worker.
|
95
143
|
def initialize(*queues)
|
96
|
-
@queues = queues.map { |queue| queue.to_s.strip }
|
97
144
|
@shutdown = nil
|
98
145
|
@paused = nil
|
146
|
+
@before_first_fork_hook_ran = false
|
147
|
+
|
148
|
+
@heartbeat_thread = nil
|
149
|
+
@heartbeat_thread_signal = nil
|
150
|
+
|
151
|
+
@last_state = :idle
|
152
|
+
|
153
|
+
verbose_value = ENV['LOGGING'] || ENV['VERBOSE']
|
154
|
+
self.verbose = verbose_value if verbose_value
|
155
|
+
self.very_verbose = ENV['VVERBOSE'] if ENV['VVERBOSE']
|
156
|
+
self.pre_shutdown_timeout = (ENV['RESQUE_PRE_SHUTDOWN_TIMEOUT'] || 0.0).to_f
|
157
|
+
self.term_timeout = (ENV['RESQUE_TERM_TIMEOUT'] || 4.0).to_f
|
158
|
+
self.term_child = ENV['TERM_CHILD']
|
159
|
+
self.graceful_term = ENV['GRACEFUL_TERM']
|
160
|
+
self.run_at_exit_hooks = ENV['RUN_AT_EXIT_HOOKS']
|
161
|
+
|
162
|
+
self.queues = queues
|
163
|
+
end
|
164
|
+
|
165
|
+
# Daemonizes the worker if ENV['BACKGROUND'] is set and writes
|
166
|
+
# the process id to ENV['PIDFILE'] if set. Should only be called
|
167
|
+
# once per worker.
|
168
|
+
def prepare
|
169
|
+
if ENV['BACKGROUND']
|
170
|
+
Process.daemon(true)
|
171
|
+
end
|
172
|
+
|
173
|
+
if ENV['PIDFILE']
|
174
|
+
File.open(ENV['PIDFILE'], 'w') { |f| f << pid }
|
175
|
+
end
|
176
|
+
|
177
|
+
self.reconnect if ENV['BACKGROUND']
|
178
|
+
end
|
179
|
+
|
180
|
+
WILDCARDS = ['*', '?', '{', '}', '[', ']'].freeze
|
181
|
+
|
182
|
+
def queues=(queues)
|
183
|
+
queues = (ENV["QUEUES"] || ENV['QUEUE']).to_s.split(',') if queues.empty?
|
184
|
+
queues = queues.map { |queue| queue.to_s.strip }
|
185
|
+
|
186
|
+
@skip_queues, @queues = queues.partition { |queue| queue.start_with?('!') }
|
187
|
+
@skip_queues.map! { |queue| queue[1..-1] }
|
188
|
+
|
189
|
+
# The behavior of `queues` is dependent on the value of `@has_dynamic_queues: if it's true, the method returns the result of filtering @queues with `glob_match`
|
190
|
+
# if it's false, the method returns @queues directly. Since `glob_match` will cause skipped queues to be filtered out, we want to make sure it's called if we have @skip_queues.any?
|
191
|
+
@has_dynamic_queues =
|
192
|
+
@skip_queues.any? || WILDCARDS.any? { |char| @queues.join.include?(char) }
|
193
|
+
|
99
194
|
validate_queues
|
100
195
|
end
|
101
196
|
|
@@ -109,6 +204,25 @@ module Resque
|
|
109
204
|
end
|
110
205
|
end
|
111
206
|
|
207
|
+
# Returns a list of queues to use when searching for a job.
|
208
|
+
# A splat ("*") means you want every queue (in alpha order) - this
|
209
|
+
# can be useful for dynamically adding new queues.
|
210
|
+
def queues
|
211
|
+
if @has_dynamic_queues
|
212
|
+
current_queues = Resque.queues
|
213
|
+
@queues.map { |queue| glob_match(current_queues, queue) }.flatten.uniq
|
214
|
+
else
|
215
|
+
@queues
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def glob_match(list, pattern)
|
220
|
+
list.select do |queue|
|
221
|
+
File.fnmatch?(pattern, queue) &&
|
222
|
+
@skip_queues.none? { |skip_pattern| File.fnmatch?(skip_pattern, queue) }
|
223
|
+
end.sort
|
224
|
+
end
|
225
|
+
|
112
226
|
# This is the main workhorse method. Called on a Worker instance,
|
113
227
|
# it begins the worker life cycle.
|
114
228
|
#
|
@@ -127,46 +241,47 @@ module Resque
|
|
127
241
|
# has completed processing. Useful for testing.
|
128
242
|
def work(interval = 5.0, &block)
|
129
243
|
interval = Float(interval)
|
130
|
-
$0 = "resque: Starting"
|
131
244
|
startup
|
132
245
|
|
133
246
|
loop do
|
134
247
|
break if shutdown?
|
135
248
|
|
136
|
-
|
137
|
-
|
138
|
-
job.worker = self
|
139
|
-
working_on job
|
140
|
-
|
141
|
-
if @child = fork(job)
|
142
|
-
srand # Reseeding
|
143
|
-
procline "Forked #{@child} at #{Time.now.to_i}"
|
144
|
-
begin
|
145
|
-
Process.waitpid(@child)
|
146
|
-
rescue SystemCallError
|
147
|
-
nil
|
148
|
-
end
|
149
|
-
else
|
150
|
-
unregister_signal_handlers if will_fork? && term_child
|
151
|
-
procline "Processing #{job.queue} since #{Time.now.to_i}"
|
152
|
-
reconnect
|
153
|
-
perform(job, &block)
|
154
|
-
exit! if will_fork?
|
155
|
-
end
|
156
|
-
|
157
|
-
done_working
|
158
|
-
@child = nil
|
159
|
-
else
|
249
|
+
unless work_one_job(&block)
|
250
|
+
state_change
|
160
251
|
break if interval.zero?
|
161
|
-
|
162
|
-
procline paused? ? "Paused" : "Waiting for #{
|
252
|
+
log_with_severity :debug, "Sleeping for #{interval} seconds"
|
253
|
+
procline paused? ? "Paused" : "Waiting for #{queues.join(',')}"
|
163
254
|
sleep interval
|
164
255
|
end
|
165
256
|
end
|
166
257
|
|
167
258
|
unregister_worker
|
259
|
+
run_hook :worker_exit
|
168
260
|
rescue Exception => exception
|
261
|
+
return if exception.class == SystemExit && !@child && run_at_exit_hooks
|
262
|
+
log_with_severity :error, "Failed to start worker : #{exception.inspect}"
|
169
263
|
unregister_worker(exception)
|
264
|
+
run_hook :worker_exit
|
265
|
+
end
|
266
|
+
|
267
|
+
def work_one_job(job = nil, &block)
|
268
|
+
return false if paused?
|
269
|
+
return false unless job ||= reserve
|
270
|
+
|
271
|
+
working_on job
|
272
|
+
procline "Processing #{job.queue} since #{Time.now.to_i} [#{job.payload_class_name}]"
|
273
|
+
|
274
|
+
log_with_severity :info, "got: #{job.inspect}"
|
275
|
+
job.worker = self
|
276
|
+
|
277
|
+
if fork_per_job?
|
278
|
+
perform_with_fork(job, &block)
|
279
|
+
else
|
280
|
+
perform(job, &block)
|
281
|
+
end
|
282
|
+
|
283
|
+
done_working
|
284
|
+
true
|
170
285
|
end
|
171
286
|
|
172
287
|
# DEPRECATED. Processes a single job. If none is given, it will
|
@@ -181,21 +296,34 @@ module Resque
|
|
181
296
|
done_working
|
182
297
|
end
|
183
298
|
|
299
|
+
# Reports the exception and marks the job as failed
|
300
|
+
def report_failed_job(job,exception)
|
301
|
+
log_with_severity :error, "#{job.inspect} failed: #{exception.inspect}"
|
302
|
+
begin
|
303
|
+
job.fail(exception)
|
304
|
+
rescue Object => exception
|
305
|
+
log_with_severity :error, "Received exception when reporting failure: #{exception.inspect}"
|
306
|
+
end
|
307
|
+
begin
|
308
|
+
failed!
|
309
|
+
rescue Object => exception
|
310
|
+
log_with_severity :error, "Received exception when increasing failed jobs counter (redis issue) : #{exception.inspect}"
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
|
184
315
|
# Processes a given job in the child.
|
185
316
|
def perform(job)
|
186
317
|
begin
|
187
|
-
|
318
|
+
if fork_per_job?
|
319
|
+
reconnect
|
320
|
+
run_hook :after_fork, job
|
321
|
+
end
|
188
322
|
job.perform
|
189
323
|
rescue Object => e
|
190
|
-
|
191
|
-
begin
|
192
|
-
job.fail(e)
|
193
|
-
rescue Object => e
|
194
|
-
log "Received exception when reporting failure: #{e.inspect}"
|
195
|
-
end
|
196
|
-
failed!
|
324
|
+
report_failed_job(job,e)
|
197
325
|
else
|
198
|
-
|
326
|
+
log_with_severity :info, "done: #{job.inspect}"
|
199
327
|
ensure
|
200
328
|
yield job if block_given?
|
201
329
|
end
|
@@ -205,17 +333,17 @@ module Resque
|
|
205
333
|
# nil if no job can be found.
|
206
334
|
def reserve
|
207
335
|
queues.each do |queue|
|
208
|
-
|
336
|
+
log_with_severity :debug, "Checking #{queue}"
|
209
337
|
if job = Resque.reserve(queue)
|
210
|
-
|
338
|
+
log_with_severity :debug, "Found job on #{queue}"
|
211
339
|
return job
|
212
340
|
end
|
213
341
|
end
|
214
342
|
|
215
343
|
nil
|
216
344
|
rescue Exception => e
|
217
|
-
|
218
|
-
|
345
|
+
log_with_severity :error, "Error reserving job: #{e.inspect}"
|
346
|
+
log_with_severity :error, e.backtrace.join("\n")
|
219
347
|
raise e
|
220
348
|
end
|
221
349
|
|
@@ -224,53 +352,26 @@ module Resque
|
|
224
352
|
def reconnect
|
225
353
|
tries = 0
|
226
354
|
begin
|
227
|
-
|
355
|
+
data_store.reconnect
|
228
356
|
rescue Redis::BaseConnectionError
|
229
357
|
if (tries += 1) <= 3
|
230
|
-
|
358
|
+
log_with_severity :error, "Error reconnecting to Redis; retrying"
|
231
359
|
sleep(tries)
|
232
360
|
retry
|
233
361
|
else
|
234
|
-
|
362
|
+
log_with_severity :error, "Error reconnecting to Redis; quitting"
|
235
363
|
raise
|
236
364
|
end
|
237
365
|
end
|
238
366
|
end
|
239
367
|
|
240
|
-
# Returns a list of queues to use when searching for a job.
|
241
|
-
# A splat ("*") means you want every queue (in alpha order) - this
|
242
|
-
# can be useful for dynamically adding new queues.
|
243
|
-
def queues
|
244
|
-
@queues.map {|queue| queue == "*" ? Resque.queues.sort : queue }.flatten.uniq
|
245
|
-
end
|
246
|
-
|
247
|
-
# Not every platform supports fork. Here we do our magic to
|
248
|
-
# determine if yours does.
|
249
|
-
def fork(job)
|
250
|
-
return if @cant_fork
|
251
|
-
|
252
|
-
# Only run before_fork hooks if we're actually going to fork
|
253
|
-
# (after checking @cant_fork)
|
254
|
-
run_hook :before_fork, job
|
255
|
-
|
256
|
-
begin
|
257
|
-
# IronRuby doesn't support `Kernel.fork` yet
|
258
|
-
if Kernel.respond_to?(:fork)
|
259
|
-
Kernel.fork if will_fork?
|
260
|
-
else
|
261
|
-
raise NotImplementedError
|
262
|
-
end
|
263
|
-
rescue NotImplementedError
|
264
|
-
@cant_fork = true
|
265
|
-
nil
|
266
|
-
end
|
267
|
-
end
|
268
|
-
|
269
368
|
# Runs all the methods needed when a worker begins its lifecycle.
|
270
369
|
def startup
|
271
|
-
|
370
|
+
$0 = "resque: Starting"
|
371
|
+
|
272
372
|
enable_gc_optimizations
|
273
373
|
register_signal_handlers
|
374
|
+
start_heartbeat
|
274
375
|
prune_dead_workers
|
275
376
|
run_hook :before_first_fork
|
276
377
|
register_worker
|
@@ -297,7 +398,7 @@ module Resque
|
|
297
398
|
# USR2: Don't process any new jobs
|
298
399
|
# CONT: Start processing jobs again after a USR2
|
299
400
|
def register_signal_handlers
|
300
|
-
trap('TERM') { shutdown! }
|
401
|
+
trap('TERM') { graceful_term ? shutdown : shutdown! }
|
301
402
|
trap('INT') { shutdown! }
|
302
403
|
|
303
404
|
begin
|
@@ -310,14 +411,21 @@ module Resque
|
|
310
411
|
trap('USR2') { pause_processing }
|
311
412
|
trap('CONT') { unpause_processing }
|
312
413
|
rescue ArgumentError
|
313
|
-
warn "Signals QUIT, USR1, USR2, and/or CONT not supported."
|
414
|
+
log_with_severity :warn, "Signals QUIT, USR1, USR2, and/or CONT not supported."
|
314
415
|
end
|
315
416
|
|
316
|
-
|
417
|
+
log_with_severity :debug, "Registered signals"
|
317
418
|
end
|
318
419
|
|
319
420
|
def unregister_signal_handlers
|
320
|
-
trap('TERM')
|
421
|
+
trap('TERM') do
|
422
|
+
trap('TERM') do
|
423
|
+
# Ignore subsequent term signals
|
424
|
+
end
|
425
|
+
|
426
|
+
raise TermException.new("SIGTERM")
|
427
|
+
end
|
428
|
+
|
321
429
|
trap('INT', 'DEFAULT')
|
322
430
|
|
323
431
|
begin
|
@@ -331,15 +439,24 @@ module Resque
|
|
331
439
|
# Schedule this worker for shutdown. Will finish processing the
|
332
440
|
# current job.
|
333
441
|
def shutdown
|
334
|
-
|
442
|
+
log_with_severity :info, 'Exiting...'
|
335
443
|
@shutdown = true
|
336
444
|
end
|
337
445
|
|
338
446
|
# Kill the child and shutdown immediately.
|
447
|
+
# If not forking, abort this process.
|
339
448
|
def shutdown!
|
340
449
|
shutdown
|
341
450
|
if term_child
|
342
|
-
|
451
|
+
if fork_per_job?
|
452
|
+
new_kill_child
|
453
|
+
else
|
454
|
+
# Raise TermException in the same process
|
455
|
+
trap('TERM') do
|
456
|
+
# ignore subsequent terms
|
457
|
+
end
|
458
|
+
raise TermException.new("SIGTERM")
|
459
|
+
end
|
343
460
|
else
|
344
461
|
kill_child
|
345
462
|
end
|
@@ -354,54 +471,130 @@ module Resque
|
|
354
471
|
# is processing will not be completed.
|
355
472
|
def kill_child
|
356
473
|
if @child
|
357
|
-
|
358
|
-
if
|
474
|
+
log_with_severity :debug, "Killing child at #{@child}"
|
475
|
+
if `ps -o pid,state -p #{@child}`
|
359
476
|
Process.kill("KILL", @child) rescue nil
|
360
477
|
else
|
361
|
-
|
478
|
+
log_with_severity :debug, "Child #{@child} not found, restarting."
|
362
479
|
shutdown
|
363
480
|
end
|
364
481
|
end
|
365
482
|
end
|
366
483
|
|
484
|
+
def heartbeat
|
485
|
+
data_store.heartbeat(self)
|
486
|
+
end
|
487
|
+
|
488
|
+
def remove_heartbeat
|
489
|
+
data_store.remove_heartbeat(self)
|
490
|
+
end
|
491
|
+
|
492
|
+
def heartbeat!(time = data_store.server_time)
|
493
|
+
data_store.heartbeat!(self, time)
|
494
|
+
end
|
495
|
+
|
496
|
+
def self.all_heartbeats
|
497
|
+
data_store.all_heartbeats
|
498
|
+
end
|
499
|
+
|
500
|
+
# Returns a list of workers that have sent a heartbeat in the past, but which
|
501
|
+
# already expired (does NOT include workers that have never sent a heartbeat at all).
|
502
|
+
def self.all_workers_with_expired_heartbeats
|
503
|
+
# Use `Worker.all_heartbeats` instead of `Worker.all`
|
504
|
+
# to prune workers which haven't been registered but have set a heartbeat.
|
505
|
+
# https://github.com/resque/resque/pull/1751
|
506
|
+
heartbeats = Worker.all_heartbeats
|
507
|
+
now = data_store.server_time
|
508
|
+
|
509
|
+
heartbeats.select do |id, heartbeat|
|
510
|
+
if heartbeat
|
511
|
+
seconds_since_heartbeat = (now - Time.parse(heartbeat)).to_i
|
512
|
+
seconds_since_heartbeat > Resque.prune_interval
|
513
|
+
else
|
514
|
+
false
|
515
|
+
end
|
516
|
+
end.each_key.map do |id|
|
517
|
+
# skip_exists must be true to include not registered workers
|
518
|
+
find(id, :skip_exists => true)
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
def start_heartbeat
|
523
|
+
remove_heartbeat
|
524
|
+
|
525
|
+
@heartbeat_thread_signal = Resque::ThreadSignal.new
|
526
|
+
|
527
|
+
@heartbeat_thread = Thread.new do
|
528
|
+
loop do
|
529
|
+
heartbeat!
|
530
|
+
signaled = @heartbeat_thread_signal.wait_for_signal(Resque.heartbeat_interval)
|
531
|
+
break if signaled
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
@@all_heartbeat_threads << @heartbeat_thread
|
536
|
+
end
|
537
|
+
|
367
538
|
# Kills the forked child immediately with minimal remorse. The job it
|
368
539
|
# is processing will not be completed. Send the child a TERM signal,
|
369
|
-
# wait
|
540
|
+
# wait <term_timeout> seconds, and then a KILL signal if it has not quit
|
541
|
+
# If pre_shutdown_timeout has been set to a positive number, it will allow
|
542
|
+
# the child that many seconds before sending the aforementioned TERM and KILL.
|
370
543
|
def new_kill_child
|
371
544
|
if @child
|
372
|
-
unless
|
373
|
-
|
545
|
+
unless child_already_exited?
|
546
|
+
if pre_shutdown_timeout && pre_shutdown_timeout > 0.0
|
547
|
+
log_with_severity :debug, "Waiting #{pre_shutdown_timeout.to_f}s for child process to exit"
|
548
|
+
return if wait_for_child_exit(pre_shutdown_timeout)
|
549
|
+
end
|
550
|
+
|
551
|
+
log_with_severity :debug, "Sending TERM signal to child #{@child}"
|
374
552
|
Process.kill("TERM", @child)
|
375
|
-
|
376
|
-
|
377
|
-
return
|
553
|
+
|
554
|
+
if wait_for_child_exit(term_timeout)
|
555
|
+
return
|
556
|
+
else
|
557
|
+
log_with_severity :debug, "Sending KILL signal to child #{@child}"
|
558
|
+
Process.kill("KILL", @child)
|
378
559
|
end
|
379
|
-
log! "Sending KILL signal to child #{@child}"
|
380
|
-
Process.kill("KILL", @child)
|
381
560
|
else
|
382
|
-
|
561
|
+
log_with_severity :debug, "Child #{@child} already quit."
|
383
562
|
end
|
384
563
|
end
|
385
564
|
rescue SystemCallError
|
386
|
-
|
565
|
+
log_with_severity :error, "Child #{@child} already quit and reaped."
|
566
|
+
end
|
567
|
+
|
568
|
+
def child_already_exited?
|
569
|
+
Process.waitpid(@child, Process::WNOHANG)
|
570
|
+
end
|
571
|
+
|
572
|
+
def wait_for_child_exit(timeout)
|
573
|
+
(timeout * 10).round.times do |i|
|
574
|
+
sleep(0.1)
|
575
|
+
return true if child_already_exited?
|
576
|
+
end
|
577
|
+
false
|
387
578
|
end
|
388
579
|
|
389
580
|
# are we paused?
|
390
581
|
def paused?
|
391
|
-
@paused
|
582
|
+
@paused || redis.get('pause-all-workers').to_s.strip.downcase == 'true'
|
392
583
|
end
|
393
584
|
|
394
585
|
# Stop processing jobs after the current one has completed (if we're
|
395
586
|
# currently running one).
|
396
587
|
def pause_processing
|
397
|
-
|
588
|
+
log_with_severity :info, "USR2 received; pausing job processing"
|
589
|
+
run_hook :before_pause, self
|
398
590
|
@paused = true
|
399
591
|
end
|
400
592
|
|
401
593
|
# Start processing jobs again after a pause
|
402
594
|
def unpause_processing
|
403
|
-
|
595
|
+
log_with_severity :info, "CONT received; resuming job processing"
|
404
596
|
@paused = false
|
597
|
+
run_hook :after_pause, self
|
405
598
|
end
|
406
599
|
|
407
600
|
# Looks for any workers which should be running on this server
|
@@ -415,13 +608,45 @@ module Resque
|
|
415
608
|
# By checking the current Redis state against the actual
|
416
609
|
# environment, we can determine if Redis is old and clean it up a bit.
|
417
610
|
def prune_dead_workers
|
611
|
+
return unless data_store.acquire_pruning_dead_worker_lock(self, Resque.heartbeat_interval)
|
612
|
+
|
418
613
|
all_workers = Worker.all
|
419
|
-
|
614
|
+
|
615
|
+
known_workers = worker_pids
|
616
|
+
all_workers_with_expired_heartbeats = Worker.all_workers_with_expired_heartbeats
|
617
|
+
all_workers_with_expired_heartbeats.each do |worker|
|
618
|
+
# If the worker hasn't sent a heartbeat, remove it from the registry.
|
619
|
+
#
|
620
|
+
# If the worker hasn't ever sent a heartbeat, we won't remove it since
|
621
|
+
# the first heartbeat is sent before the worker is registred it means
|
622
|
+
# that this is a worker that doesn't support heartbeats, e.g., another
|
623
|
+
# client library or an older version of Resque. We won't touch these.
|
624
|
+
log_with_severity :info, "Pruning dead worker: #{worker}"
|
625
|
+
|
626
|
+
job_class = worker.job(false)['payload']['class'] rescue nil
|
627
|
+
worker.unregister_worker(PruneDeadWorkerDirtyExit.new(worker.to_s, job_class))
|
628
|
+
end
|
629
|
+
|
420
630
|
all_workers.each do |worker|
|
421
|
-
|
631
|
+
if all_workers_with_expired_heartbeats.include?(worker)
|
632
|
+
next
|
633
|
+
end
|
634
|
+
|
635
|
+
host, pid, worker_queues_raw = worker.id.split(':')
|
636
|
+
worker_queues = worker_queues_raw.split(",")
|
637
|
+
unless @queues.include?("*") || (worker_queues.to_set == @queues.to_set)
|
638
|
+
# If the worker we are trying to prune does not belong to the queues
|
639
|
+
# we are listening to, we should not touch it.
|
640
|
+
# Attempt to prune a worker from different queues may easily result in
|
641
|
+
# an unknown class exception, since that worker could easily be even
|
642
|
+
# written in different language.
|
643
|
+
next
|
644
|
+
end
|
645
|
+
|
422
646
|
next unless host == hostname
|
423
647
|
next if known_workers.include?(pid)
|
424
|
-
|
648
|
+
|
649
|
+
log_with_severity :debug, "Pruning dead worker: #{worker}"
|
425
650
|
worker.unregister_worker
|
426
651
|
end
|
427
652
|
end
|
@@ -429,18 +654,29 @@ module Resque
|
|
429
654
|
# Registers ourself as a worker. Useful when entering the worker
|
430
655
|
# lifecycle on startup.
|
431
656
|
def register_worker
|
432
|
-
|
433
|
-
started!
|
657
|
+
data_store.register_worker(self)
|
434
658
|
end
|
435
659
|
|
436
660
|
# Runs a named hook, passing along any arguments.
|
437
661
|
def run_hook(name, *args)
|
438
|
-
|
439
|
-
|
662
|
+
hooks = Resque.send(name)
|
663
|
+
return if hooks.empty?
|
664
|
+
return if name == :before_first_fork && @before_first_fork_hook_ran
|
665
|
+
msg = "Running #{name} hooks"
|
440
666
|
msg << " with #{args.inspect}" if args.any?
|
441
|
-
|
667
|
+
log_with_severity :info, msg
|
668
|
+
|
669
|
+
hooks.each do |hook|
|
670
|
+
args.any? ? hook.call(*args) : hook.call
|
671
|
+
@before_first_fork_hook_ran = true if name == :before_first_fork
|
672
|
+
end
|
673
|
+
end
|
442
674
|
|
443
|
-
|
675
|
+
def kill_background_threads
|
676
|
+
if @heartbeat_thread
|
677
|
+
@heartbeat_thread_signal.signal
|
678
|
+
@heartbeat_thread.join
|
679
|
+
end
|
444
680
|
end
|
445
681
|
|
446
682
|
# Unregisters ourself as a worker. Useful when shutting down.
|
@@ -452,15 +688,28 @@ module Resque
|
|
452
688
|
# Ensure the proper worker is attached to this job, even if
|
453
689
|
# it's not the precise instance that died.
|
454
690
|
job.worker = self
|
455
|
-
|
691
|
+
begin
|
692
|
+
job.fail(exception || DirtyExit.new("Job still being processed"))
|
693
|
+
rescue RuntimeError => e
|
694
|
+
log_with_severity :error, e.message
|
695
|
+
end
|
456
696
|
end
|
457
697
|
|
458
|
-
|
459
|
-
redis.del("worker:#{self}")
|
460
|
-
redis.del("worker:#{self}:started")
|
698
|
+
kill_background_threads
|
461
699
|
|
462
|
-
|
463
|
-
|
700
|
+
data_store.unregister_worker(self) do |**opts|
|
701
|
+
Stat.clear("processed:#{self}", **opts)
|
702
|
+
Stat.clear("failed:#{self}", **opts)
|
703
|
+
end
|
704
|
+
rescue Exception => exception_while_unregistering
|
705
|
+
message = exception_while_unregistering.message
|
706
|
+
if exception
|
707
|
+
message += "\nOriginal Exception (#{exception.class}): #{exception.message}"
|
708
|
+
message += "\n #{exception.backtrace.join(" \n")}" if exception.backtrace
|
709
|
+
end
|
710
|
+
fail(exception_while_unregistering.class,
|
711
|
+
message,
|
712
|
+
exception_while_unregistering.backtrace)
|
464
713
|
end
|
465
714
|
|
466
715
|
# Given a job, tells Redis we're working on it. Useful for seeing
|
@@ -468,16 +717,26 @@ module Resque
|
|
468
717
|
def working_on(job)
|
469
718
|
data = encode \
|
470
719
|
:queue => job.queue,
|
471
|
-
:run_at => Time.now.
|
720
|
+
:run_at => Time.now.utc.iso8601,
|
472
721
|
:payload => job.payload
|
473
|
-
|
722
|
+
data_store.set_worker_payload(self,data)
|
723
|
+
state_change
|
474
724
|
end
|
475
725
|
|
476
726
|
# Called when we are done working - clears our `working_on` state
|
477
727
|
# and tells Redis we processed a job.
|
478
728
|
def done_working
|
479
|
-
|
480
|
-
|
729
|
+
data_store.worker_done_working(self) do |**opts|
|
730
|
+
processed!(**opts)
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
def state_change
|
735
|
+
current_state = state
|
736
|
+
if current_state != @last_state
|
737
|
+
run_hook :queue_empty if current_state == :idle
|
738
|
+
@last_state = current_state
|
739
|
+
end
|
481
740
|
end
|
482
741
|
|
483
742
|
# How many jobs has this worker processed? Returns an int.
|
@@ -486,9 +745,9 @@ module Resque
|
|
486
745
|
end
|
487
746
|
|
488
747
|
# Tell Redis we've processed a job.
|
489
|
-
def processed!
|
490
|
-
Stat
|
491
|
-
Stat
|
748
|
+
def processed!(**opts)
|
749
|
+
Stat.incr("processed", 1, **opts)
|
750
|
+
Stat.incr("processed:#{self}", 1, **opts)
|
492
751
|
end
|
493
752
|
|
494
753
|
# How many failed jobs has this worker seen? Returns an int.
|
@@ -504,18 +763,20 @@ module Resque
|
|
504
763
|
|
505
764
|
# What time did this worker start? Returns an instance of `Time`
|
506
765
|
def started
|
507
|
-
|
766
|
+
data_store.worker_start_time(self)
|
508
767
|
end
|
509
768
|
|
510
769
|
# Tell Redis we've started
|
511
770
|
def started!
|
512
|
-
|
771
|
+
data_store.worker_started(self)
|
513
772
|
end
|
514
773
|
|
515
774
|
# Returns a hash explaining the Job we're currently processing, if any.
|
516
|
-
def job
|
517
|
-
|
775
|
+
def job(reload = true)
|
776
|
+
@job = nil if reload
|
777
|
+
@job ||= decode(data_store.get_worker_payload(self)) || {}
|
518
778
|
end
|
779
|
+
attr_writer :job
|
519
780
|
alias_method :processing, :job
|
520
781
|
|
521
782
|
# Boolean - true if working, false if not
|
@@ -527,15 +788,16 @@ module Resque
|
|
527
788
|
def idle?
|
528
789
|
state == :idle
|
529
790
|
end
|
530
|
-
|
531
|
-
def
|
532
|
-
|
791
|
+
|
792
|
+
def fork_per_job?
|
793
|
+
return @fork_per_job if defined?(@fork_per_job)
|
794
|
+
@fork_per_job = ENV["FORK_PER_JOB"] != 'false' && Kernel.respond_to?(:fork)
|
533
795
|
end
|
534
796
|
|
535
797
|
# Returns a symbol representing the current worker state,
|
536
798
|
# which can be either :working or :idle
|
537
799
|
def state
|
538
|
-
|
800
|
+
data_store.get_worker_payload(self) ? :working : :idle
|
539
801
|
end
|
540
802
|
|
541
803
|
# Is this worker the same as another worker?
|
@@ -550,18 +812,18 @@ module Resque
|
|
550
812
|
# The string representation is the same as the id for this worker
|
551
813
|
# instance. Can be used with `Worker.find`.
|
552
814
|
def to_s
|
553
|
-
@to_s ||= "#{hostname}:#{
|
815
|
+
@to_s ||= "#{hostname}:#{pid}:#{@queues.join(',')}"
|
554
816
|
end
|
555
817
|
alias_method :id, :to_s
|
556
818
|
|
557
|
-
# chomp'd hostname of this machine
|
819
|
+
# chomp'd hostname of this worker's machine
|
558
820
|
def hostname
|
559
|
-
@hostname ||=
|
821
|
+
@hostname ||= Socket.gethostname
|
560
822
|
end
|
561
823
|
|
562
824
|
# Returns Integer PID of running worker
|
563
825
|
def pid
|
564
|
-
Process.pid
|
826
|
+
@pid ||= Process.pid
|
565
827
|
end
|
566
828
|
|
567
829
|
# Returns an Array of string pids of all the other workers on this
|
@@ -569,17 +831,24 @@ module Resque
|
|
569
831
|
def worker_pids
|
570
832
|
if RUBY_PLATFORM =~ /solaris/
|
571
833
|
solaris_worker_pids
|
834
|
+
elsif RUBY_PLATFORM =~ /mingw32/
|
835
|
+
windows_worker_pids
|
572
836
|
else
|
573
837
|
linux_worker_pids
|
574
838
|
end
|
575
839
|
end
|
576
840
|
|
577
|
-
# Find Resque worker pids on Linux and OS X.
|
578
|
-
#
|
579
841
|
# Returns an Array of string pids of all the other workers on this
|
580
842
|
# machine. Useful when pruning dead workers on startup.
|
843
|
+
def windows_worker_pids
|
844
|
+
tasklist_output = `tasklist /FI "IMAGENAME eq ruby.exe" /FO list`.encode("UTF-8", Encoding.locale_charmap)
|
845
|
+
tasklist_output.split($/).select { |line| line =~ /^PID:/ }.collect { |line| line.gsub(/PID:\s+/, '') }
|
846
|
+
end
|
847
|
+
|
848
|
+
# Find Resque worker pids on Linux and OS X.
|
849
|
+
#
|
581
850
|
def linux_worker_pids
|
582
|
-
`ps -A -o pid,command | grep "[r]esque" | grep -v "resque-web"`.split("\n").map do |line|
|
851
|
+
`ps -A -o pid,command | grep -E "[r]esque:work|[r]esque:\sStarting|[r]esque-[0-9]" | grep -v "resque-web"`.split("\n").map do |line|
|
583
852
|
line.split(' ')[0]
|
584
853
|
end
|
585
854
|
end
|
@@ -592,7 +861,7 @@ module Resque
|
|
592
861
|
`ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
|
593
862
|
real_pid = line.split(' ')[0]
|
594
863
|
pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
|
595
|
-
if pargs_command.split(':')[1] == " resque-#{Resque::
|
864
|
+
if pargs_command.split(':')[1] == " resque-#{Resque::VERSION}"
|
596
865
|
real_pid
|
597
866
|
end
|
598
867
|
end.compact
|
@@ -600,25 +869,80 @@ module Resque
|
|
600
869
|
|
601
870
|
# Given a string, sets the procline ($0) and logs.
|
602
871
|
# Procline is always in the format of:
|
603
|
-
#
|
872
|
+
# RESQUE_PROCLINE_PREFIXresque-VERSION: STRING
|
604
873
|
def procline(string)
|
605
|
-
$0 = "resque-#{Resque::
|
606
|
-
|
874
|
+
$0 = "#{ENV['RESQUE_PROCLINE_PREFIX']}resque-#{Resque::VERSION}: #{string}"
|
875
|
+
log_with_severity :debug, $0
|
607
876
|
end
|
608
877
|
|
609
|
-
# Log a message to STDOUT if we are verbose or very_verbose.
|
610
878
|
def log(message)
|
611
|
-
|
612
|
-
puts "*** #{message}"
|
613
|
-
elsif very_verbose
|
614
|
-
time = Time.now.strftime('%H:%M:%S %Y-%m-%d')
|
615
|
-
puts "** [#{time}] #$$: #{message}"
|
616
|
-
end
|
879
|
+
info(message)
|
617
880
|
end
|
618
881
|
|
619
|
-
# Logs a very verbose message to STDOUT.
|
620
882
|
def log!(message)
|
621
|
-
|
883
|
+
debug(message)
|
884
|
+
end
|
885
|
+
|
886
|
+
|
887
|
+
attr_reader :verbose, :very_verbose
|
888
|
+
|
889
|
+
def verbose=(value);
|
890
|
+
if value && !very_verbose
|
891
|
+
Resque.logger.formatter = VerboseFormatter.new
|
892
|
+
Resque.logger.level = Logger::INFO
|
893
|
+
elsif !value
|
894
|
+
Resque.logger.formatter = QuietFormatter.new
|
895
|
+
end
|
896
|
+
|
897
|
+
@verbose = value
|
898
|
+
end
|
899
|
+
|
900
|
+
def very_verbose=(value)
|
901
|
+
if value
|
902
|
+
Resque.logger.formatter = VeryVerboseFormatter.new
|
903
|
+
Resque.logger.level = Logger::DEBUG
|
904
|
+
elsif !value && verbose
|
905
|
+
Resque.logger.formatter = VerboseFormatter.new
|
906
|
+
Resque.logger.level = Logger::INFO
|
907
|
+
else
|
908
|
+
Resque.logger.formatter = QuietFormatter.new
|
909
|
+
end
|
910
|
+
|
911
|
+
@very_verbose = value
|
912
|
+
end
|
913
|
+
|
914
|
+
private
|
915
|
+
|
916
|
+
def perform_with_fork(job, &block)
|
917
|
+
run_hook :before_fork, job
|
918
|
+
|
919
|
+
begin
|
920
|
+
@child = fork do
|
921
|
+
unregister_signal_handlers if term_child
|
922
|
+
perform(job, &block)
|
923
|
+
exit! unless run_at_exit_hooks
|
924
|
+
end
|
925
|
+
rescue NotImplementedError
|
926
|
+
@fork_per_job = false
|
927
|
+
perform(job, &block)
|
928
|
+
return
|
929
|
+
end
|
930
|
+
|
931
|
+
srand # Reseeding
|
932
|
+
procline "Forked #{@child} at #{Time.now.to_i}"
|
933
|
+
|
934
|
+
begin
|
935
|
+
Process.waitpid(@child)
|
936
|
+
rescue SystemCallError
|
937
|
+
nil
|
938
|
+
end
|
939
|
+
|
940
|
+
job.fail(DirtyExit.new("Child process received unhandled signal #{$?}", $?)) if $?.signaled?
|
941
|
+
@child = nil
|
942
|
+
end
|
943
|
+
|
944
|
+
def log_with_severity(severity, message)
|
945
|
+
Logging.log(severity, message)
|
622
946
|
end
|
623
947
|
end
|
624
948
|
end
|