resque 1.23.0 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.md +271 -0
  3. data/README.markdown +454 -484
  4. data/Rakefile +4 -17
  5. data/bin/resque-web +10 -22
  6. data/lib/resque/data_store.rb +335 -0
  7. data/lib/resque/errors.rb +15 -1
  8. data/lib/resque/failure/airbrake.rb +32 -4
  9. data/lib/resque/failure/base.rb +16 -7
  10. data/lib/resque/failure/multiple.rb +26 -8
  11. data/lib/resque/failure/redis.rb +92 -15
  12. data/lib/resque/failure/redis_multi_queue.rb +104 -0
  13. data/lib/resque/failure.rb +62 -32
  14. data/lib/resque/helpers.rb +11 -57
  15. data/lib/resque/job.rb +79 -12
  16. data/lib/resque/log_formatters/quiet_formatter.rb +7 -0
  17. data/lib/resque/log_formatters/verbose_formatter.rb +7 -0
  18. data/lib/resque/log_formatters/very_verbose_formatter.rb +8 -0
  19. data/lib/resque/logging.rb +18 -0
  20. data/lib/resque/plugin.rb +22 -10
  21. data/lib/resque/railtie.rb +10 -0
  22. data/lib/resque/server/public/jquery-3.6.0.min.js +2 -0
  23. data/lib/resque/server/public/jquery.relatize_date.js +4 -4
  24. data/lib/resque/server/public/main.js +3 -0
  25. data/lib/resque/server/public/ranger.js +16 -8
  26. data/lib/resque/server/public/style.css +13 -8
  27. data/lib/resque/server/views/error.erb +1 -1
  28. data/lib/resque/server/views/failed.erb +27 -59
  29. data/lib/resque/server/views/failed_job.erb +50 -0
  30. data/lib/resque/server/views/failed_queues_overview.erb +24 -0
  31. data/lib/resque/server/views/job_class.erb +8 -0
  32. data/lib/resque/server/views/key_sets.erb +2 -4
  33. data/lib/resque/server/views/key_string.erb +1 -1
  34. data/lib/resque/server/views/layout.erb +7 -6
  35. data/lib/resque/server/views/next_more.erb +22 -10
  36. data/lib/resque/server/views/processing.erb +2 -0
  37. data/lib/resque/server/views/queues.erb +22 -13
  38. data/lib/resque/server/views/stats.erb +5 -5
  39. data/lib/resque/server/views/workers.erb +4 -4
  40. data/lib/resque/server/views/working.erb +10 -11
  41. data/lib/resque/server.rb +51 -108
  42. data/lib/resque/server_helper.rb +185 -0
  43. data/lib/resque/stat.rb +19 -7
  44. data/lib/resque/tasks.rb +26 -25
  45. data/lib/resque/thread_signal.rb +24 -0
  46. data/lib/resque/vendor/utf8_util.rb +2 -8
  47. data/lib/resque/version.rb +1 -1
  48. data/lib/resque/web_runner.rb +374 -0
  49. data/lib/resque/worker.rb +487 -163
  50. data/lib/resque.rb +332 -52
  51. data/lib/tasks/redis.rake +11 -11
  52. metadata +169 -149
  53. data/lib/resque/failure/hoptoad.rb +0 -33
  54. data/lib/resque/failure/thoughtbot.rb +0 -33
  55. data/lib/resque/server/public/jquery-1.3.2.min.js +0 -19
  56. data/lib/resque/server/test_helper.rb +0 -19
  57. data/lib/resque/vendor/utf8_util/utf8_util_18.rb +0 -91
  58. data/lib/resque/vendor/utf8_util/utf8_util_19.rb +0 -5
  59. data/test/airbrake_test.rb +0 -27
  60. data/test/hoptoad_test.rb +0 -26
  61. data/test/job_hooks_test.rb +0 -464
  62. data/test/job_plugins_test.rb +0 -230
  63. data/test/plugin_test.rb +0 -116
  64. data/test/redis-test-cluster.conf +0 -115
  65. data/test/redis-test.conf +0 -115
  66. data/test/resque-web_test.rb +0 -59
  67. data/test/resque_failure_redis_test.rb +0 -19
  68. data/test/resque_test.rb +0 -278
  69. data/test/test_helper.rb +0 -178
  70. data/test/worker_test.rb +0 -657
data/lib/resque/worker.rb CHANGED
@@ -1,3 +1,7 @@
1
+ require 'time'
2
+ require 'set'
3
+ require 'redis/distributed'
4
+
1
5
  module Resque
2
6
  # A Resque Worker processes jobs. On platforms that support fork(2),
3
7
  # the worker will fork off a child to process each job. This ensures
@@ -9,27 +13,63 @@ module Resque
9
13
  class Worker
10
14
  include Resque::Helpers
11
15
  extend Resque::Helpers
16
+ include Resque::Logging
17
+
18
+ @@all_heartbeat_threads = []
19
+ def self.kill_all_heartbeat_threads
20
+ @@all_heartbeat_threads.each(&:kill).each(&:join)
21
+ @@all_heartbeat_threads = []
22
+ end
23
+
24
+ def redis
25
+ Resque.redis
26
+ end
27
+ alias :data_store :redis
28
+
29
+ def self.redis
30
+ Resque.redis
31
+ end
12
32
 
13
- # Whether the worker should log basic info to STDOUT
14
- attr_accessor :verbose
33
+ def self.data_store
34
+ self.redis
35
+ end
15
36
 
16
- # Whether the worker should log lots of info to STDOUT
17
- attr_accessor :very_verbose
37
+ # Given a Ruby object, returns a string suitable for storage in a
38
+ # queue.
39
+ def encode(object)
40
+ Resque.encode(object)
41
+ end
18
42
 
19
- # Boolean indicating whether this worker can or can not fork.
20
- # Automatically set if a fork(2) fails.
21
- attr_accessor :cant_fork
43
+ # Given a string, returns a Ruby object.
44
+ def decode(object)
45
+ Resque.decode(object)
46
+ end
22
47
 
23
48
  attr_accessor :term_timeout
24
49
 
50
+ attr_accessor :pre_shutdown_timeout
51
+
52
+ attr_accessor :term_child_signal
53
+
25
54
  # decide whether to use new_kill_child logic
26
55
  attr_accessor :term_child
27
56
 
57
+ # should term kill workers gracefully (vs. immediately)
58
+ # Makes SIGTERM work like SIGQUIT
59
+ attr_accessor :graceful_term
60
+
61
+ # When set to true, forked workers will exit with `exit`, calling any `at_exit` code handlers that have been
62
+ # registered in the application. Otherwise, forked workers exit with `exit!`
63
+ attr_accessor :run_at_exit_hooks
64
+
65
+ attr_writer :fork_per_job
66
+ attr_writer :hostname
28
67
  attr_writer :to_s
68
+ attr_writer :pid
29
69
 
30
70
  # Returns an array of all worker objects.
31
71
  def self.all
32
- Array(redis.smembers(:workers)).map { |id| find(id) }.compact
72
+ data_store.worker_ids.map { |id| find(id, :skip_exists => true) }.compact
33
73
  end
34
74
 
35
75
  # Returns an array of all worker objects currently processing
@@ -38,32 +78,37 @@ module Resque
38
78
  names = all
39
79
  return [] unless names.any?
40
80
 
41
- names.map! { |name| "worker:#{name}" }
42
-
43
81
  reportedly_working = {}
44
82
 
45
83
  begin
46
- reportedly_working = redis.mapped_mget(*names).reject do |key, value|
84
+ reportedly_working = data_store.workers_map(names).reject do |key, value|
47
85
  value.nil? || value.empty?
48
86
  end
49
87
  rescue Redis::Distributed::CannotDistribute
50
88
  names.each do |name|
51
- value = redis.get name
89
+ value = data_store.get_worker_payload(name)
52
90
  reportedly_working[name] = value unless value.nil? || value.empty?
53
91
  end
54
92
  end
55
93
 
56
94
  reportedly_working.keys.map do |key|
57
- find key.sub("worker:", '')
95
+ worker = find(key.sub("worker:", ''), :skip_exists => true)
96
+ worker.job = worker.decode(reportedly_working[key])
97
+ worker
58
98
  end.compact
59
99
  end
60
100
 
61
101
  # Returns a single worker object. Accepts a string id.
62
- def self.find(worker_id)
63
- if exists? worker_id
64
- queues = worker_id.split(':')[-1].split(',')
102
+ def self.find(worker_id, options = {})
103
+ skip_exists = options[:skip_exists]
104
+
105
+ if skip_exists || exists?(worker_id)
106
+ host, pid, queues_raw = worker_id.split(':', 3)
107
+ queues = queues_raw.split(',')
65
108
  worker = new(*queues)
109
+ worker.hostname = host
66
110
  worker.to_s = worker_id
111
+ worker.pid = pid.to_i
67
112
  worker
68
113
  else
69
114
  nil
@@ -78,7 +123,7 @@ module Resque
78
123
  # Given a string worker id, return a boolean indicating whether the
79
124
  # worker exists
80
125
  def self.exists?(worker_id)
81
- redis.sismember(:workers, worker_id)
126
+ data_store.worker_exists?(worker_id)
82
127
  end
83
128
 
84
129
  # Workers should be initialized with an array of string queue
@@ -92,10 +137,60 @@ module Resque
92
137
  # If passed a single "*", this Worker will operate on all queues
93
138
  # in alphabetical order. Queues can be dynamically added or
94
139
  # removed without needing to restart workers using this method.
140
+ #
141
+ # Workers should have `#prepare` called after they are initialized
142
+ # if you are running work on the worker.
95
143
  def initialize(*queues)
96
- @queues = queues.map { |queue| queue.to_s.strip }
97
144
  @shutdown = nil
98
145
  @paused = nil
146
+ @before_first_fork_hook_ran = false
147
+
148
+ @heartbeat_thread = nil
149
+ @heartbeat_thread_signal = nil
150
+
151
+ @last_state = :idle
152
+
153
+ verbose_value = ENV['LOGGING'] || ENV['VERBOSE']
154
+ self.verbose = verbose_value if verbose_value
155
+ self.very_verbose = ENV['VVERBOSE'] if ENV['VVERBOSE']
156
+ self.pre_shutdown_timeout = (ENV['RESQUE_PRE_SHUTDOWN_TIMEOUT'] || 0.0).to_f
157
+ self.term_timeout = (ENV['RESQUE_TERM_TIMEOUT'] || 4.0).to_f
158
+ self.term_child = ENV['TERM_CHILD']
159
+ self.graceful_term = ENV['GRACEFUL_TERM']
160
+ self.run_at_exit_hooks = ENV['RUN_AT_EXIT_HOOKS']
161
+
162
+ self.queues = queues
163
+ end
164
+
165
+ # Daemonizes the worker if ENV['BACKGROUND'] is set and writes
166
+ # the process id to ENV['PIDFILE'] if set. Should only be called
167
+ # once per worker.
168
+ def prepare
169
+ if ENV['BACKGROUND']
170
+ Process.daemon(true)
171
+ end
172
+
173
+ if ENV['PIDFILE']
174
+ File.open(ENV['PIDFILE'], 'w') { |f| f << pid }
175
+ end
176
+
177
+ self.reconnect if ENV['BACKGROUND']
178
+ end
179
+
180
+ WILDCARDS = ['*', '?', '{', '}', '[', ']'].freeze
181
+
182
+ def queues=(queues)
183
+ queues = (ENV["QUEUES"] || ENV['QUEUE']).to_s.split(',') if queues.empty?
184
+ queues = queues.map { |queue| queue.to_s.strip }
185
+
186
+ @skip_queues, @queues = queues.partition { |queue| queue.start_with?('!') }
187
+ @skip_queues.map! { |queue| queue[1..-1] }
188
+
189
+ # The behavior of `queues` is dependent on the value of `@has_dynamic_queues: if it's true, the method returns the result of filtering @queues with `glob_match`
190
+ # if it's false, the method returns @queues directly. Since `glob_match` will cause skipped queues to be filtered out, we want to make sure it's called if we have @skip_queues.any?
191
+ @has_dynamic_queues =
192
+ @skip_queues.any? || WILDCARDS.any? { |char| @queues.join.include?(char) }
193
+
99
194
  validate_queues
100
195
  end
101
196
 
@@ -109,6 +204,25 @@ module Resque
109
204
  end
110
205
  end
111
206
 
207
+ # Returns a list of queues to use when searching for a job.
208
+ # A splat ("*") means you want every queue (in alpha order) - this
209
+ # can be useful for dynamically adding new queues.
210
+ def queues
211
+ if @has_dynamic_queues
212
+ current_queues = Resque.queues
213
+ @queues.map { |queue| glob_match(current_queues, queue) }.flatten.uniq
214
+ else
215
+ @queues
216
+ end
217
+ end
218
+
219
+ def glob_match(list, pattern)
220
+ list.select do |queue|
221
+ File.fnmatch?(pattern, queue) &&
222
+ @skip_queues.none? { |skip_pattern| File.fnmatch?(skip_pattern, queue) }
223
+ end.sort
224
+ end
225
+
112
226
  # This is the main workhorse method. Called on a Worker instance,
113
227
  # it begins the worker life cycle.
114
228
  #
@@ -127,46 +241,47 @@ module Resque
127
241
  # has completed processing. Useful for testing.
128
242
  def work(interval = 5.0, &block)
129
243
  interval = Float(interval)
130
- $0 = "resque: Starting"
131
244
  startup
132
245
 
133
246
  loop do
134
247
  break if shutdown?
135
248
 
136
- if not paused? and job = reserve
137
- log "got: #{job.inspect}"
138
- job.worker = self
139
- working_on job
140
-
141
- if @child = fork(job)
142
- srand # Reseeding
143
- procline "Forked #{@child} at #{Time.now.to_i}"
144
- begin
145
- Process.waitpid(@child)
146
- rescue SystemCallError
147
- nil
148
- end
149
- else
150
- unregister_signal_handlers if will_fork? && term_child
151
- procline "Processing #{job.queue} since #{Time.now.to_i}"
152
- reconnect
153
- perform(job, &block)
154
- exit! if will_fork?
155
- end
156
-
157
- done_working
158
- @child = nil
159
- else
249
+ unless work_one_job(&block)
250
+ state_change
160
251
  break if interval.zero?
161
- log! "Sleeping for #{interval} seconds"
162
- procline paused? ? "Paused" : "Waiting for #{@queues.join(',')}"
252
+ log_with_severity :debug, "Sleeping for #{interval} seconds"
253
+ procline paused? ? "Paused" : "Waiting for #{queues.join(',')}"
163
254
  sleep interval
164
255
  end
165
256
  end
166
257
 
167
258
  unregister_worker
259
+ run_hook :worker_exit
168
260
  rescue Exception => exception
261
+ return if exception.class == SystemExit && !@child && run_at_exit_hooks
262
+ log_with_severity :error, "Failed to start worker : #{exception.inspect}"
169
263
  unregister_worker(exception)
264
+ run_hook :worker_exit
265
+ end
266
+
267
+ def work_one_job(job = nil, &block)
268
+ return false if paused?
269
+ return false unless job ||= reserve
270
+
271
+ working_on job
272
+ procline "Processing #{job.queue} since #{Time.now.to_i} [#{job.payload_class_name}]"
273
+
274
+ log_with_severity :info, "got: #{job.inspect}"
275
+ job.worker = self
276
+
277
+ if fork_per_job?
278
+ perform_with_fork(job, &block)
279
+ else
280
+ perform(job, &block)
281
+ end
282
+
283
+ done_working
284
+ true
170
285
  end
171
286
 
172
287
  # DEPRECATED. Processes a single job. If none is given, it will
@@ -181,21 +296,34 @@ module Resque
181
296
  done_working
182
297
  end
183
298
 
299
+ # Reports the exception and marks the job as failed
300
+ def report_failed_job(job,exception)
301
+ log_with_severity :error, "#{job.inspect} failed: #{exception.inspect}"
302
+ begin
303
+ job.fail(exception)
304
+ rescue Object => exception
305
+ log_with_severity :error, "Received exception when reporting failure: #{exception.inspect}"
306
+ end
307
+ begin
308
+ failed!
309
+ rescue Object => exception
310
+ log_with_severity :error, "Received exception when increasing failed jobs counter (redis issue) : #{exception.inspect}"
311
+ end
312
+ end
313
+
314
+
184
315
  # Processes a given job in the child.
185
316
  def perform(job)
186
317
  begin
187
- run_hook :after_fork, job if will_fork?
318
+ if fork_per_job?
319
+ reconnect
320
+ run_hook :after_fork, job
321
+ end
188
322
  job.perform
189
323
  rescue Object => e
190
- log "#{job.inspect} failed: #{e.inspect}"
191
- begin
192
- job.fail(e)
193
- rescue Object => e
194
- log "Received exception when reporting failure: #{e.inspect}"
195
- end
196
- failed!
324
+ report_failed_job(job,e)
197
325
  else
198
- log "done: #{job.inspect}"
326
+ log_with_severity :info, "done: #{job.inspect}"
199
327
  ensure
200
328
  yield job if block_given?
201
329
  end
@@ -205,17 +333,17 @@ module Resque
205
333
  # nil if no job can be found.
206
334
  def reserve
207
335
  queues.each do |queue|
208
- log! "Checking #{queue}"
336
+ log_with_severity :debug, "Checking #{queue}"
209
337
  if job = Resque.reserve(queue)
210
- log! "Found job on #{queue}"
338
+ log_with_severity :debug, "Found job on #{queue}"
211
339
  return job
212
340
  end
213
341
  end
214
342
 
215
343
  nil
216
344
  rescue Exception => e
217
- log "Error reserving job: #{e.inspect}"
218
- log e.backtrace.join("\n")
345
+ log_with_severity :error, "Error reserving job: #{e.inspect}"
346
+ log_with_severity :error, e.backtrace.join("\n")
219
347
  raise e
220
348
  end
221
349
 
@@ -224,53 +352,26 @@ module Resque
224
352
  def reconnect
225
353
  tries = 0
226
354
  begin
227
- redis.client.reconnect
355
+ data_store.reconnect
228
356
  rescue Redis::BaseConnectionError
229
357
  if (tries += 1) <= 3
230
- log "Error reconnecting to Redis; retrying"
358
+ log_with_severity :error, "Error reconnecting to Redis; retrying"
231
359
  sleep(tries)
232
360
  retry
233
361
  else
234
- log "Error reconnecting to Redis; quitting"
362
+ log_with_severity :error, "Error reconnecting to Redis; quitting"
235
363
  raise
236
364
  end
237
365
  end
238
366
  end
239
367
 
240
- # Returns a list of queues to use when searching for a job.
241
- # A splat ("*") means you want every queue (in alpha order) - this
242
- # can be useful for dynamically adding new queues.
243
- def queues
244
- @queues.map {|queue| queue == "*" ? Resque.queues.sort : queue }.flatten.uniq
245
- end
246
-
247
- # Not every platform supports fork. Here we do our magic to
248
- # determine if yours does.
249
- def fork(job)
250
- return if @cant_fork
251
-
252
- # Only run before_fork hooks if we're actually going to fork
253
- # (after checking @cant_fork)
254
- run_hook :before_fork, job
255
-
256
- begin
257
- # IronRuby doesn't support `Kernel.fork` yet
258
- if Kernel.respond_to?(:fork)
259
- Kernel.fork if will_fork?
260
- else
261
- raise NotImplementedError
262
- end
263
- rescue NotImplementedError
264
- @cant_fork = true
265
- nil
266
- end
267
- end
268
-
269
368
  # Runs all the methods needed when a worker begins its lifecycle.
270
369
  def startup
271
- warn "WARNING: This way of doing signal handling is now deprecated. Please see http://hone.heroku.com/resque/2012/08/21/resque-signals.html for more info." unless term_child
370
+ $0 = "resque: Starting"
371
+
272
372
  enable_gc_optimizations
273
373
  register_signal_handlers
374
+ start_heartbeat
274
375
  prune_dead_workers
275
376
  run_hook :before_first_fork
276
377
  register_worker
@@ -297,7 +398,7 @@ module Resque
297
398
  # USR2: Don't process any new jobs
298
399
  # CONT: Start processing jobs again after a USR2
299
400
  def register_signal_handlers
300
- trap('TERM') { shutdown! }
401
+ trap('TERM') { graceful_term ? shutdown : shutdown! }
301
402
  trap('INT') { shutdown! }
302
403
 
303
404
  begin
@@ -310,14 +411,21 @@ module Resque
310
411
  trap('USR2') { pause_processing }
311
412
  trap('CONT') { unpause_processing }
312
413
  rescue ArgumentError
313
- warn "Signals QUIT, USR1, USR2, and/or CONT not supported."
414
+ log_with_severity :warn, "Signals QUIT, USR1, USR2, and/or CONT not supported."
314
415
  end
315
416
 
316
- log! "Registered signals"
417
+ log_with_severity :debug, "Registered signals"
317
418
  end
318
419
 
319
420
  def unregister_signal_handlers
320
- trap('TERM') { raise TermException.new("SIGTERM") }
421
+ trap('TERM') do
422
+ trap('TERM') do
423
+ # Ignore subsequent term signals
424
+ end
425
+
426
+ raise TermException.new("SIGTERM")
427
+ end
428
+
321
429
  trap('INT', 'DEFAULT')
322
430
 
323
431
  begin
@@ -331,15 +439,24 @@ module Resque
331
439
  # Schedule this worker for shutdown. Will finish processing the
332
440
  # current job.
333
441
  def shutdown
334
- log 'Exiting...'
442
+ log_with_severity :info, 'Exiting...'
335
443
  @shutdown = true
336
444
  end
337
445
 
338
446
  # Kill the child and shutdown immediately.
447
+ # If not forking, abort this process.
339
448
  def shutdown!
340
449
  shutdown
341
450
  if term_child
342
- new_kill_child
451
+ if fork_per_job?
452
+ new_kill_child
453
+ else
454
+ # Raise TermException in the same process
455
+ trap('TERM') do
456
+ # ignore subsequent terms
457
+ end
458
+ raise TermException.new("SIGTERM")
459
+ end
343
460
  else
344
461
  kill_child
345
462
  end
@@ -354,54 +471,130 @@ module Resque
354
471
  # is processing will not be completed.
355
472
  def kill_child
356
473
  if @child
357
- log! "Killing child at #{@child}"
358
- if system("ps -o pid,state -p #{@child}")
474
+ log_with_severity :debug, "Killing child at #{@child}"
475
+ if `ps -o pid,state -p #{@child}`
359
476
  Process.kill("KILL", @child) rescue nil
360
477
  else
361
- log! "Child #{@child} not found, restarting."
478
+ log_with_severity :debug, "Child #{@child} not found, restarting."
362
479
  shutdown
363
480
  end
364
481
  end
365
482
  end
366
483
 
484
+ def heartbeat
485
+ data_store.heartbeat(self)
486
+ end
487
+
488
+ def remove_heartbeat
489
+ data_store.remove_heartbeat(self)
490
+ end
491
+
492
+ def heartbeat!(time = data_store.server_time)
493
+ data_store.heartbeat!(self, time)
494
+ end
495
+
496
+ def self.all_heartbeats
497
+ data_store.all_heartbeats
498
+ end
499
+
500
+ # Returns a list of workers that have sent a heartbeat in the past, but which
501
+ # already expired (does NOT include workers that have never sent a heartbeat at all).
502
+ def self.all_workers_with_expired_heartbeats
503
+ # Use `Worker.all_heartbeats` instead of `Worker.all`
504
+ # to prune workers which haven't been registered but have set a heartbeat.
505
+ # https://github.com/resque/resque/pull/1751
506
+ heartbeats = Worker.all_heartbeats
507
+ now = data_store.server_time
508
+
509
+ heartbeats.select do |id, heartbeat|
510
+ if heartbeat
511
+ seconds_since_heartbeat = (now - Time.parse(heartbeat)).to_i
512
+ seconds_since_heartbeat > Resque.prune_interval
513
+ else
514
+ false
515
+ end
516
+ end.each_key.map do |id|
517
+ # skip_exists must be true to include not registered workers
518
+ find(id, :skip_exists => true)
519
+ end
520
+ end
521
+
522
+ def start_heartbeat
523
+ remove_heartbeat
524
+
525
+ @heartbeat_thread_signal = Resque::ThreadSignal.new
526
+
527
+ @heartbeat_thread = Thread.new do
528
+ loop do
529
+ heartbeat!
530
+ signaled = @heartbeat_thread_signal.wait_for_signal(Resque.heartbeat_interval)
531
+ break if signaled
532
+ end
533
+ end
534
+
535
+ @@all_heartbeat_threads << @heartbeat_thread
536
+ end
537
+
367
538
  # Kills the forked child immediately with minimal remorse. The job it
368
539
  # is processing will not be completed. Send the child a TERM signal,
369
- # wait 5 seconds, and then a KILL signal if it has not quit
540
+ # wait <term_timeout> seconds, and then a KILL signal if it has not quit
541
+ # If pre_shutdown_timeout has been set to a positive number, it will allow
542
+ # the child that many seconds before sending the aforementioned TERM and KILL.
370
543
  def new_kill_child
371
544
  if @child
372
- unless Process.waitpid(@child, Process::WNOHANG)
373
- log! "Sending TERM signal to child #{@child}"
545
+ unless child_already_exited?
546
+ if pre_shutdown_timeout && pre_shutdown_timeout > 0.0
547
+ log_with_severity :debug, "Waiting #{pre_shutdown_timeout.to_f}s for child process to exit"
548
+ return if wait_for_child_exit(pre_shutdown_timeout)
549
+ end
550
+
551
+ log_with_severity :debug, "Sending TERM signal to child #{@child}"
374
552
  Process.kill("TERM", @child)
375
- (term_timeout.to_f * 10).round.times do |i|
376
- sleep(0.1)
377
- return if Process.waitpid(@child, Process::WNOHANG)
553
+
554
+ if wait_for_child_exit(term_timeout)
555
+ return
556
+ else
557
+ log_with_severity :debug, "Sending KILL signal to child #{@child}"
558
+ Process.kill("KILL", @child)
378
559
  end
379
- log! "Sending KILL signal to child #{@child}"
380
- Process.kill("KILL", @child)
381
560
  else
382
- log! "Child #{@child} already quit."
561
+ log_with_severity :debug, "Child #{@child} already quit."
383
562
  end
384
563
  end
385
564
  rescue SystemCallError
386
- log! "Child #{@child} already quit and reaped."
565
+ log_with_severity :error, "Child #{@child} already quit and reaped."
566
+ end
567
+
568
+ def child_already_exited?
569
+ Process.waitpid(@child, Process::WNOHANG)
570
+ end
571
+
572
+ def wait_for_child_exit(timeout)
573
+ (timeout * 10).round.times do |i|
574
+ sleep(0.1)
575
+ return true if child_already_exited?
576
+ end
577
+ false
387
578
  end
388
579
 
389
580
  # are we paused?
390
581
  def paused?
391
- @paused
582
+ @paused || redis.get('pause-all-workers').to_s.strip.downcase == 'true'
392
583
  end
393
584
 
394
585
  # Stop processing jobs after the current one has completed (if we're
395
586
  # currently running one).
396
587
  def pause_processing
397
- log "USR2 received; pausing job processing"
588
+ log_with_severity :info, "USR2 received; pausing job processing"
589
+ run_hook :before_pause, self
398
590
  @paused = true
399
591
  end
400
592
 
401
593
  # Start processing jobs again after a pause
402
594
  def unpause_processing
403
- log "CONT received; resuming job processing"
595
+ log_with_severity :info, "CONT received; resuming job processing"
404
596
  @paused = false
597
+ run_hook :after_pause, self
405
598
  end
406
599
 
407
600
  # Looks for any workers which should be running on this server
@@ -415,13 +608,45 @@ module Resque
415
608
  # By checking the current Redis state against the actual
416
609
  # environment, we can determine if Redis is old and clean it up a bit.
417
610
  def prune_dead_workers
611
+ return unless data_store.acquire_pruning_dead_worker_lock(self, Resque.heartbeat_interval)
612
+
418
613
  all_workers = Worker.all
419
- known_workers = worker_pids unless all_workers.empty?
614
+
615
+ known_workers = worker_pids
616
+ all_workers_with_expired_heartbeats = Worker.all_workers_with_expired_heartbeats
617
+ all_workers_with_expired_heartbeats.each do |worker|
618
+ # If the worker hasn't sent a heartbeat, remove it from the registry.
619
+ #
620
+ # If the worker hasn't ever sent a heartbeat, we won't remove it since
621
+ # the first heartbeat is sent before the worker is registred it means
622
+ # that this is a worker that doesn't support heartbeats, e.g., another
623
+ # client library or an older version of Resque. We won't touch these.
624
+ log_with_severity :info, "Pruning dead worker: #{worker}"
625
+
626
+ job_class = worker.job(false)['payload']['class'] rescue nil
627
+ worker.unregister_worker(PruneDeadWorkerDirtyExit.new(worker.to_s, job_class))
628
+ end
629
+
420
630
  all_workers.each do |worker|
421
- host, pid, queues = worker.id.split(':')
631
+ if all_workers_with_expired_heartbeats.include?(worker)
632
+ next
633
+ end
634
+
635
+ host, pid, worker_queues_raw = worker.id.split(':')
636
+ worker_queues = worker_queues_raw.split(",")
637
+ unless @queues.include?("*") || (worker_queues.to_set == @queues.to_set)
638
+ # If the worker we are trying to prune does not belong to the queues
639
+ # we are listening to, we should not touch it.
640
+ # Attempt to prune a worker from different queues may easily result in
641
+ # an unknown class exception, since that worker could easily be even
642
+ # written in different language.
643
+ next
644
+ end
645
+
422
646
  next unless host == hostname
423
647
  next if known_workers.include?(pid)
424
- log! "Pruning dead worker: #{worker}"
648
+
649
+ log_with_severity :debug, "Pruning dead worker: #{worker}"
425
650
  worker.unregister_worker
426
651
  end
427
652
  end
@@ -429,18 +654,29 @@ module Resque
429
654
  # Registers ourself as a worker. Useful when entering the worker
430
655
  # lifecycle on startup.
431
656
  def register_worker
432
- redis.sadd(:workers, self)
433
- started!
657
+ data_store.register_worker(self)
434
658
  end
435
659
 
436
660
  # Runs a named hook, passing along any arguments.
437
661
  def run_hook(name, *args)
438
- return unless hook = Resque.send(name)
439
- msg = "Running #{name} hook"
662
+ hooks = Resque.send(name)
663
+ return if hooks.empty?
664
+ return if name == :before_first_fork && @before_first_fork_hook_ran
665
+ msg = "Running #{name} hooks"
440
666
  msg << " with #{args.inspect}" if args.any?
441
- log msg
667
+ log_with_severity :info, msg
668
+
669
+ hooks.each do |hook|
670
+ args.any? ? hook.call(*args) : hook.call
671
+ @before_first_fork_hook_ran = true if name == :before_first_fork
672
+ end
673
+ end
442
674
 
443
- args.any? ? hook.call(*args) : hook.call
675
+ def kill_background_threads
676
+ if @heartbeat_thread
677
+ @heartbeat_thread_signal.signal
678
+ @heartbeat_thread.join
679
+ end
444
680
  end
445
681
 
446
682
  # Unregisters ourself as a worker. Useful when shutting down.
@@ -452,15 +688,28 @@ module Resque
452
688
  # Ensure the proper worker is attached to this job, even if
453
689
  # it's not the precise instance that died.
454
690
  job.worker = self
455
- job.fail(exception || DirtyExit.new)
691
+ begin
692
+ job.fail(exception || DirtyExit.new("Job still being processed"))
693
+ rescue RuntimeError => e
694
+ log_with_severity :error, e.message
695
+ end
456
696
  end
457
697
 
458
- redis.srem(:workers, self)
459
- redis.del("worker:#{self}")
460
- redis.del("worker:#{self}:started")
698
+ kill_background_threads
461
699
 
462
- Stat.clear("processed:#{self}")
463
- Stat.clear("failed:#{self}")
700
+ data_store.unregister_worker(self) do |**opts|
701
+ Stat.clear("processed:#{self}", **opts)
702
+ Stat.clear("failed:#{self}", **opts)
703
+ end
704
+ rescue Exception => exception_while_unregistering
705
+ message = exception_while_unregistering.message
706
+ if exception
707
+ message += "\nOriginal Exception (#{exception.class}): #{exception.message}"
708
+ message += "\n #{exception.backtrace.join(" \n")}" if exception.backtrace
709
+ end
710
+ fail(exception_while_unregistering.class,
711
+ message,
712
+ exception_while_unregistering.backtrace)
464
713
  end
465
714
 
466
715
  # Given a job, tells Redis we're working on it. Useful for seeing
@@ -468,16 +717,26 @@ module Resque
468
717
  def working_on(job)
469
718
  data = encode \
470
719
  :queue => job.queue,
471
- :run_at => Time.now.strftime("%Y/%m/%d %H:%M:%S %Z"),
720
+ :run_at => Time.now.utc.iso8601,
472
721
  :payload => job.payload
473
- redis.set("worker:#{self}", data)
722
+ data_store.set_worker_payload(self,data)
723
+ state_change
474
724
  end
475
725
 
476
726
  # Called when we are done working - clears our `working_on` state
477
727
  # and tells Redis we processed a job.
478
728
  def done_working
479
- processed!
480
- redis.del("worker:#{self}")
729
+ data_store.worker_done_working(self) do |**opts|
730
+ processed!(**opts)
731
+ end
732
+ end
733
+
734
+ def state_change
735
+ current_state = state
736
+ if current_state != @last_state
737
+ run_hook :queue_empty if current_state == :idle
738
+ @last_state = current_state
739
+ end
481
740
  end
482
741
 
483
742
  # How many jobs has this worker processed? Returns an int.
@@ -486,9 +745,9 @@ module Resque
486
745
  end
487
746
 
488
747
  # Tell Redis we've processed a job.
489
- def processed!
490
- Stat << "processed"
491
- Stat << "processed:#{self}"
748
+ def processed!(**opts)
749
+ Stat.incr("processed", 1, **opts)
750
+ Stat.incr("processed:#{self}", 1, **opts)
492
751
  end
493
752
 
494
753
  # How many failed jobs has this worker seen? Returns an int.
@@ -504,18 +763,20 @@ module Resque
504
763
 
505
764
  # What time did this worker start? Returns an instance of `Time`
506
765
  def started
507
- redis.get "worker:#{self}:started"
766
+ data_store.worker_start_time(self)
508
767
  end
509
768
 
510
769
  # Tell Redis we've started
511
770
  def started!
512
- redis.set("worker:#{self}:started", Time.now.to_s)
771
+ data_store.worker_started(self)
513
772
  end
514
773
 
515
774
  # Returns a hash explaining the Job we're currently processing, if any.
516
- def job
517
- decode(redis.get("worker:#{self}")) || {}
775
+ def job(reload = true)
776
+ @job = nil if reload
777
+ @job ||= decode(data_store.get_worker_payload(self)) || {}
518
778
  end
779
+ attr_writer :job
519
780
  alias_method :processing, :job
520
781
 
521
782
  # Boolean - true if working, false if not
@@ -527,15 +788,16 @@ module Resque
527
788
  def idle?
528
789
  state == :idle
529
790
  end
530
-
531
- def will_fork?
532
- !(@cant_fork || $TESTING)
791
+
792
+ def fork_per_job?
793
+ return @fork_per_job if defined?(@fork_per_job)
794
+ @fork_per_job = ENV["FORK_PER_JOB"] != 'false' && Kernel.respond_to?(:fork)
533
795
  end
534
796
 
535
797
  # Returns a symbol representing the current worker state,
536
798
  # which can be either :working or :idle
537
799
  def state
538
- redis.exists("worker:#{self}") ? :working : :idle
800
+ data_store.get_worker_payload(self) ? :working : :idle
539
801
  end
540
802
 
541
803
  # Is this worker the same as another worker?
@@ -550,18 +812,18 @@ module Resque
550
812
  # The string representation is the same as the id for this worker
551
813
  # instance. Can be used with `Worker.find`.
552
814
  def to_s
553
- @to_s ||= "#{hostname}:#{Process.pid}:#{@queues.join(',')}"
815
+ @to_s ||= "#{hostname}:#{pid}:#{@queues.join(',')}"
554
816
  end
555
817
  alias_method :id, :to_s
556
818
 
557
- # chomp'd hostname of this machine
819
+ # chomp'd hostname of this worker's machine
558
820
  def hostname
559
- @hostname ||= `hostname`.chomp
821
+ @hostname ||= Socket.gethostname
560
822
  end
561
823
 
562
824
  # Returns Integer PID of running worker
563
825
  def pid
564
- Process.pid
826
+ @pid ||= Process.pid
565
827
  end
566
828
 
567
829
  # Returns an Array of string pids of all the other workers on this
@@ -569,17 +831,24 @@ module Resque
569
831
  def worker_pids
570
832
  if RUBY_PLATFORM =~ /solaris/
571
833
  solaris_worker_pids
834
+ elsif RUBY_PLATFORM =~ /mingw32/
835
+ windows_worker_pids
572
836
  else
573
837
  linux_worker_pids
574
838
  end
575
839
  end
576
840
 
577
- # Find Resque worker pids on Linux and OS X.
578
- #
579
841
  # Returns an Array of string pids of all the other workers on this
580
842
  # machine. Useful when pruning dead workers on startup.
843
+ def windows_worker_pids
844
+ tasklist_output = `tasklist /FI "IMAGENAME eq ruby.exe" /FO list`.encode("UTF-8", Encoding.locale_charmap)
845
+ tasklist_output.split($/).select { |line| line =~ /^PID:/ }.collect { |line| line.gsub(/PID:\s+/, '') }
846
+ end
847
+
848
+ # Find Resque worker pids on Linux and OS X.
849
+ #
581
850
  def linux_worker_pids
582
- `ps -A -o pid,command | grep "[r]esque" | grep -v "resque-web"`.split("\n").map do |line|
851
+ `ps -A -o pid,command | grep -E "[r]esque:work|[r]esque:\sStarting|[r]esque-[0-9]" | grep -v "resque-web"`.split("\n").map do |line|
583
852
  line.split(' ')[0]
584
853
  end
585
854
  end
@@ -592,7 +861,7 @@ module Resque
592
861
  `ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
593
862
  real_pid = line.split(' ')[0]
594
863
  pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
595
- if pargs_command.split(':')[1] == " resque-#{Resque::Version}"
864
+ if pargs_command.split(':')[1] == " resque-#{Resque::VERSION}"
596
865
  real_pid
597
866
  end
598
867
  end.compact
@@ -600,25 +869,80 @@ module Resque
600
869
 
601
870
  # Given a string, sets the procline ($0) and logs.
602
871
  # Procline is always in the format of:
603
- # resque-VERSION: STRING
872
+ # RESQUE_PROCLINE_PREFIXresque-VERSION: STRING
604
873
  def procline(string)
605
- $0 = "resque-#{Resque::Version}: #{string}"
606
- log! $0
874
+ $0 = "#{ENV['RESQUE_PROCLINE_PREFIX']}resque-#{Resque::VERSION}: #{string}"
875
+ log_with_severity :debug, $0
607
876
  end
608
877
 
609
- # Log a message to STDOUT if we are verbose or very_verbose.
610
878
  def log(message)
611
- if verbose
612
- puts "*** #{message}"
613
- elsif very_verbose
614
- time = Time.now.strftime('%H:%M:%S %Y-%m-%d')
615
- puts "** [#{time}] #$$: #{message}"
616
- end
879
+ info(message)
617
880
  end
618
881
 
619
- # Logs a very verbose message to STDOUT.
620
882
  def log!(message)
621
- log message if very_verbose
883
+ debug(message)
884
+ end
885
+
886
+
887
+ attr_reader :verbose, :very_verbose
888
+
889
+ def verbose=(value);
890
+ if value && !very_verbose
891
+ Resque.logger.formatter = VerboseFormatter.new
892
+ Resque.logger.level = Logger::INFO
893
+ elsif !value
894
+ Resque.logger.formatter = QuietFormatter.new
895
+ end
896
+
897
+ @verbose = value
898
+ end
899
+
900
+ def very_verbose=(value)
901
+ if value
902
+ Resque.logger.formatter = VeryVerboseFormatter.new
903
+ Resque.logger.level = Logger::DEBUG
904
+ elsif !value && verbose
905
+ Resque.logger.formatter = VerboseFormatter.new
906
+ Resque.logger.level = Logger::INFO
907
+ else
908
+ Resque.logger.formatter = QuietFormatter.new
909
+ end
910
+
911
+ @very_verbose = value
912
+ end
913
+
914
+ private
915
+
916
+ def perform_with_fork(job, &block)
917
+ run_hook :before_fork, job
918
+
919
+ begin
920
+ @child = fork do
921
+ unregister_signal_handlers if term_child
922
+ perform(job, &block)
923
+ exit! unless run_at_exit_hooks
924
+ end
925
+ rescue NotImplementedError
926
+ @fork_per_job = false
927
+ perform(job, &block)
928
+ return
929
+ end
930
+
931
+ srand # Reseeding
932
+ procline "Forked #{@child} at #{Time.now.to_i}"
933
+
934
+ begin
935
+ Process.waitpid(@child)
936
+ rescue SystemCallError
937
+ nil
938
+ end
939
+
940
+ job.fail(DirtyExit.new("Child process received unhandled signal #{$?}", $?)) if $?.signaled?
941
+ @child = nil
942
+ end
943
+
944
+ def log_with_severity(severity, message)
945
+ Logging.log(severity, message)
622
946
  end
623
947
  end
624
948
  end