resque 1.23.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.md +271 -0
  3. data/README.markdown +454 -484
  4. data/Rakefile +4 -17
  5. data/bin/resque-web +10 -22
  6. data/lib/resque/data_store.rb +335 -0
  7. data/lib/resque/errors.rb +15 -1
  8. data/lib/resque/failure/airbrake.rb +32 -4
  9. data/lib/resque/failure/base.rb +16 -7
  10. data/lib/resque/failure/multiple.rb +26 -8
  11. data/lib/resque/failure/redis.rb +92 -15
  12. data/lib/resque/failure/redis_multi_queue.rb +104 -0
  13. data/lib/resque/failure.rb +62 -32
  14. data/lib/resque/helpers.rb +11 -57
  15. data/lib/resque/job.rb +79 -12
  16. data/lib/resque/log_formatters/quiet_formatter.rb +7 -0
  17. data/lib/resque/log_formatters/verbose_formatter.rb +7 -0
  18. data/lib/resque/log_formatters/very_verbose_formatter.rb +8 -0
  19. data/lib/resque/logging.rb +18 -0
  20. data/lib/resque/plugin.rb +22 -10
  21. data/lib/resque/railtie.rb +10 -0
  22. data/lib/resque/server/public/jquery-3.6.0.min.js +2 -0
  23. data/lib/resque/server/public/jquery.relatize_date.js +4 -4
  24. data/lib/resque/server/public/main.js +3 -0
  25. data/lib/resque/server/public/ranger.js +16 -8
  26. data/lib/resque/server/public/style.css +13 -8
  27. data/lib/resque/server/views/error.erb +1 -1
  28. data/lib/resque/server/views/failed.erb +27 -59
  29. data/lib/resque/server/views/failed_job.erb +50 -0
  30. data/lib/resque/server/views/failed_queues_overview.erb +24 -0
  31. data/lib/resque/server/views/job_class.erb +8 -0
  32. data/lib/resque/server/views/key_sets.erb +2 -4
  33. data/lib/resque/server/views/key_string.erb +1 -1
  34. data/lib/resque/server/views/layout.erb +7 -6
  35. data/lib/resque/server/views/next_more.erb +22 -10
  36. data/lib/resque/server/views/processing.erb +2 -0
  37. data/lib/resque/server/views/queues.erb +22 -13
  38. data/lib/resque/server/views/stats.erb +5 -5
  39. data/lib/resque/server/views/workers.erb +4 -4
  40. data/lib/resque/server/views/working.erb +10 -11
  41. data/lib/resque/server.rb +51 -108
  42. data/lib/resque/server_helper.rb +185 -0
  43. data/lib/resque/stat.rb +19 -7
  44. data/lib/resque/tasks.rb +26 -25
  45. data/lib/resque/thread_signal.rb +24 -0
  46. data/lib/resque/vendor/utf8_util.rb +2 -8
  47. data/lib/resque/version.rb +1 -1
  48. data/lib/resque/web_runner.rb +374 -0
  49. data/lib/resque/worker.rb +487 -163
  50. data/lib/resque.rb +332 -52
  51. data/lib/tasks/redis.rake +11 -11
  52. metadata +169 -149
  53. data/lib/resque/failure/hoptoad.rb +0 -33
  54. data/lib/resque/failure/thoughtbot.rb +0 -33
  55. data/lib/resque/server/public/jquery-1.3.2.min.js +0 -19
  56. data/lib/resque/server/test_helper.rb +0 -19
  57. data/lib/resque/vendor/utf8_util/utf8_util_18.rb +0 -91
  58. data/lib/resque/vendor/utf8_util/utf8_util_19.rb +0 -5
  59. data/test/airbrake_test.rb +0 -27
  60. data/test/hoptoad_test.rb +0 -26
  61. data/test/job_hooks_test.rb +0 -464
  62. data/test/job_plugins_test.rb +0 -230
  63. data/test/plugin_test.rb +0 -116
  64. data/test/redis-test-cluster.conf +0 -115
  65. data/test/redis-test.conf +0 -115
  66. data/test/resque-web_test.rb +0 -59
  67. data/test/resque_failure_redis_test.rb +0 -19
  68. data/test/resque_test.rb +0 -278
  69. data/test/test_helper.rb +0 -178
  70. data/test/worker_test.rb +0 -657
data/lib/resque/worker.rb CHANGED
@@ -1,3 +1,7 @@
1
+ require 'time'
2
+ require 'set'
3
+ require 'redis/distributed'
4
+
1
5
  module Resque
2
6
  # A Resque Worker processes jobs. On platforms that support fork(2),
3
7
  # the worker will fork off a child to process each job. This ensures
@@ -9,27 +13,63 @@ module Resque
9
13
  class Worker
10
14
  include Resque::Helpers
11
15
  extend Resque::Helpers
16
+ include Resque::Logging
17
+
18
+ @@all_heartbeat_threads = []
19
+ def self.kill_all_heartbeat_threads
20
+ @@all_heartbeat_threads.each(&:kill).each(&:join)
21
+ @@all_heartbeat_threads = []
22
+ end
23
+
24
+ def redis
25
+ Resque.redis
26
+ end
27
+ alias :data_store :redis
28
+
29
+ def self.redis
30
+ Resque.redis
31
+ end
12
32
 
13
- # Whether the worker should log basic info to STDOUT
14
- attr_accessor :verbose
33
+ def self.data_store
34
+ self.redis
35
+ end
15
36
 
16
- # Whether the worker should log lots of info to STDOUT
17
- attr_accessor :very_verbose
37
+ # Given a Ruby object, returns a string suitable for storage in a
38
+ # queue.
39
+ def encode(object)
40
+ Resque.encode(object)
41
+ end
18
42
 
19
- # Boolean indicating whether this worker can or can not fork.
20
- # Automatically set if a fork(2) fails.
21
- attr_accessor :cant_fork
43
+ # Given a string, returns a Ruby object.
44
+ def decode(object)
45
+ Resque.decode(object)
46
+ end
22
47
 
23
48
  attr_accessor :term_timeout
24
49
 
50
+ attr_accessor :pre_shutdown_timeout
51
+
52
+ attr_accessor :term_child_signal
53
+
25
54
  # decide whether to use new_kill_child logic
26
55
  attr_accessor :term_child
27
56
 
57
+ # should term kill workers gracefully (vs. immediately)
58
+ # Makes SIGTERM work like SIGQUIT
59
+ attr_accessor :graceful_term
60
+
61
+ # When set to true, forked workers will exit with `exit`, calling any `at_exit` code handlers that have been
62
+ # registered in the application. Otherwise, forked workers exit with `exit!`
63
+ attr_accessor :run_at_exit_hooks
64
+
65
+ attr_writer :fork_per_job
66
+ attr_writer :hostname
28
67
  attr_writer :to_s
68
+ attr_writer :pid
29
69
 
30
70
  # Returns an array of all worker objects.
31
71
  def self.all
32
- Array(redis.smembers(:workers)).map { |id| find(id) }.compact
72
+ data_store.worker_ids.map { |id| find(id, :skip_exists => true) }.compact
33
73
  end
34
74
 
35
75
  # Returns an array of all worker objects currently processing
@@ -38,32 +78,37 @@ module Resque
38
78
  names = all
39
79
  return [] unless names.any?
40
80
 
41
- names.map! { |name| "worker:#{name}" }
42
-
43
81
  reportedly_working = {}
44
82
 
45
83
  begin
46
- reportedly_working = redis.mapped_mget(*names).reject do |key, value|
84
+ reportedly_working = data_store.workers_map(names).reject do |key, value|
47
85
  value.nil? || value.empty?
48
86
  end
49
87
  rescue Redis::Distributed::CannotDistribute
50
88
  names.each do |name|
51
- value = redis.get name
89
+ value = data_store.get_worker_payload(name)
52
90
  reportedly_working[name] = value unless value.nil? || value.empty?
53
91
  end
54
92
  end
55
93
 
56
94
  reportedly_working.keys.map do |key|
57
- find key.sub("worker:", '')
95
+ worker = find(key.sub("worker:", ''), :skip_exists => true)
96
+ worker.job = worker.decode(reportedly_working[key])
97
+ worker
58
98
  end.compact
59
99
  end
60
100
 
61
101
  # Returns a single worker object. Accepts a string id.
62
- def self.find(worker_id)
63
- if exists? worker_id
64
- queues = worker_id.split(':')[-1].split(',')
102
+ def self.find(worker_id, options = {})
103
+ skip_exists = options[:skip_exists]
104
+
105
+ if skip_exists || exists?(worker_id)
106
+ host, pid, queues_raw = worker_id.split(':', 3)
107
+ queues = queues_raw.split(',')
65
108
  worker = new(*queues)
109
+ worker.hostname = host
66
110
  worker.to_s = worker_id
111
+ worker.pid = pid.to_i
67
112
  worker
68
113
  else
69
114
  nil
@@ -78,7 +123,7 @@ module Resque
78
123
  # Given a string worker id, return a boolean indicating whether the
79
124
  # worker exists
80
125
  def self.exists?(worker_id)
81
- redis.sismember(:workers, worker_id)
126
+ data_store.worker_exists?(worker_id)
82
127
  end
83
128
 
84
129
  # Workers should be initialized with an array of string queue
@@ -92,10 +137,60 @@ module Resque
92
137
  # If passed a single "*", this Worker will operate on all queues
93
138
  # in alphabetical order. Queues can be dynamically added or
94
139
  # removed without needing to restart workers using this method.
140
+ #
141
+ # Workers should have `#prepare` called after they are initialized
142
+ # if you are running work on the worker.
95
143
  def initialize(*queues)
96
- @queues = queues.map { |queue| queue.to_s.strip }
97
144
  @shutdown = nil
98
145
  @paused = nil
146
+ @before_first_fork_hook_ran = false
147
+
148
+ @heartbeat_thread = nil
149
+ @heartbeat_thread_signal = nil
150
+
151
+ @last_state = :idle
152
+
153
+ verbose_value = ENV['LOGGING'] || ENV['VERBOSE']
154
+ self.verbose = verbose_value if verbose_value
155
+ self.very_verbose = ENV['VVERBOSE'] if ENV['VVERBOSE']
156
+ self.pre_shutdown_timeout = (ENV['RESQUE_PRE_SHUTDOWN_TIMEOUT'] || 0.0).to_f
157
+ self.term_timeout = (ENV['RESQUE_TERM_TIMEOUT'] || 4.0).to_f
158
+ self.term_child = ENV['TERM_CHILD']
159
+ self.graceful_term = ENV['GRACEFUL_TERM']
160
+ self.run_at_exit_hooks = ENV['RUN_AT_EXIT_HOOKS']
161
+
162
+ self.queues = queues
163
+ end
164
+
165
+ # Daemonizes the worker if ENV['BACKGROUND'] is set and writes
166
+ # the process id to ENV['PIDFILE'] if set. Should only be called
167
+ # once per worker.
168
+ def prepare
169
+ if ENV['BACKGROUND']
170
+ Process.daemon(true)
171
+ end
172
+
173
+ if ENV['PIDFILE']
174
+ File.open(ENV['PIDFILE'], 'w') { |f| f << pid }
175
+ end
176
+
177
+ self.reconnect if ENV['BACKGROUND']
178
+ end
179
+
180
+ WILDCARDS = ['*', '?', '{', '}', '[', ']'].freeze
181
+
182
+ def queues=(queues)
183
+ queues = (ENV["QUEUES"] || ENV['QUEUE']).to_s.split(',') if queues.empty?
184
+ queues = queues.map { |queue| queue.to_s.strip }
185
+
186
+ @skip_queues, @queues = queues.partition { |queue| queue.start_with?('!') }
187
+ @skip_queues.map! { |queue| queue[1..-1] }
188
+
189
+ # The behavior of `queues` is dependent on the value of `@has_dynamic_queues: if it's true, the method returns the result of filtering @queues with `glob_match`
190
+ # if it's false, the method returns @queues directly. Since `glob_match` will cause skipped queues to be filtered out, we want to make sure it's called if we have @skip_queues.any?
191
+ @has_dynamic_queues =
192
+ @skip_queues.any? || WILDCARDS.any? { |char| @queues.join.include?(char) }
193
+
99
194
  validate_queues
100
195
  end
101
196
 
@@ -109,6 +204,25 @@ module Resque
109
204
  end
110
205
  end
111
206
 
207
+ # Returns a list of queues to use when searching for a job.
208
+ # A splat ("*") means you want every queue (in alpha order) - this
209
+ # can be useful for dynamically adding new queues.
210
+ def queues
211
+ if @has_dynamic_queues
212
+ current_queues = Resque.queues
213
+ @queues.map { |queue| glob_match(current_queues, queue) }.flatten.uniq
214
+ else
215
+ @queues
216
+ end
217
+ end
218
+
219
+ def glob_match(list, pattern)
220
+ list.select do |queue|
221
+ File.fnmatch?(pattern, queue) &&
222
+ @skip_queues.none? { |skip_pattern| File.fnmatch?(skip_pattern, queue) }
223
+ end.sort
224
+ end
225
+
112
226
  # This is the main workhorse method. Called on a Worker instance,
113
227
  # it begins the worker life cycle.
114
228
  #
@@ -127,46 +241,47 @@ module Resque
127
241
  # has completed processing. Useful for testing.
128
242
  def work(interval = 5.0, &block)
129
243
  interval = Float(interval)
130
- $0 = "resque: Starting"
131
244
  startup
132
245
 
133
246
  loop do
134
247
  break if shutdown?
135
248
 
136
- if not paused? and job = reserve
137
- log "got: #{job.inspect}"
138
- job.worker = self
139
- working_on job
140
-
141
- if @child = fork(job)
142
- srand # Reseeding
143
- procline "Forked #{@child} at #{Time.now.to_i}"
144
- begin
145
- Process.waitpid(@child)
146
- rescue SystemCallError
147
- nil
148
- end
149
- else
150
- unregister_signal_handlers if will_fork? && term_child
151
- procline "Processing #{job.queue} since #{Time.now.to_i}"
152
- reconnect
153
- perform(job, &block)
154
- exit! if will_fork?
155
- end
156
-
157
- done_working
158
- @child = nil
159
- else
249
+ unless work_one_job(&block)
250
+ state_change
160
251
  break if interval.zero?
161
- log! "Sleeping for #{interval} seconds"
162
- procline paused? ? "Paused" : "Waiting for #{@queues.join(',')}"
252
+ log_with_severity :debug, "Sleeping for #{interval} seconds"
253
+ procline paused? ? "Paused" : "Waiting for #{queues.join(',')}"
163
254
  sleep interval
164
255
  end
165
256
  end
166
257
 
167
258
  unregister_worker
259
+ run_hook :worker_exit
168
260
  rescue Exception => exception
261
+ return if exception.class == SystemExit && !@child && run_at_exit_hooks
262
+ log_with_severity :error, "Failed to start worker : #{exception.inspect}"
169
263
  unregister_worker(exception)
264
+ run_hook :worker_exit
265
+ end
266
+
267
+ def work_one_job(job = nil, &block)
268
+ return false if paused?
269
+ return false unless job ||= reserve
270
+
271
+ working_on job
272
+ procline "Processing #{job.queue} since #{Time.now.to_i} [#{job.payload_class_name}]"
273
+
274
+ log_with_severity :info, "got: #{job.inspect}"
275
+ job.worker = self
276
+
277
+ if fork_per_job?
278
+ perform_with_fork(job, &block)
279
+ else
280
+ perform(job, &block)
281
+ end
282
+
283
+ done_working
284
+ true
170
285
  end
171
286
 
172
287
  # DEPRECATED. Processes a single job. If none is given, it will
@@ -181,21 +296,34 @@ module Resque
181
296
  done_working
182
297
  end
183
298
 
299
+ # Reports the exception and marks the job as failed
300
+ def report_failed_job(job,exception)
301
+ log_with_severity :error, "#{job.inspect} failed: #{exception.inspect}"
302
+ begin
303
+ job.fail(exception)
304
+ rescue Object => exception
305
+ log_with_severity :error, "Received exception when reporting failure: #{exception.inspect}"
306
+ end
307
+ begin
308
+ failed!
309
+ rescue Object => exception
310
+ log_with_severity :error, "Received exception when increasing failed jobs counter (redis issue) : #{exception.inspect}"
311
+ end
312
+ end
313
+
314
+
184
315
  # Processes a given job in the child.
185
316
  def perform(job)
186
317
  begin
187
- run_hook :after_fork, job if will_fork?
318
+ if fork_per_job?
319
+ reconnect
320
+ run_hook :after_fork, job
321
+ end
188
322
  job.perform
189
323
  rescue Object => e
190
- log "#{job.inspect} failed: #{e.inspect}"
191
- begin
192
- job.fail(e)
193
- rescue Object => e
194
- log "Received exception when reporting failure: #{e.inspect}"
195
- end
196
- failed!
324
+ report_failed_job(job,e)
197
325
  else
198
- log "done: #{job.inspect}"
326
+ log_with_severity :info, "done: #{job.inspect}"
199
327
  ensure
200
328
  yield job if block_given?
201
329
  end
@@ -205,17 +333,17 @@ module Resque
205
333
  # nil if no job can be found.
206
334
  def reserve
207
335
  queues.each do |queue|
208
- log! "Checking #{queue}"
336
+ log_with_severity :debug, "Checking #{queue}"
209
337
  if job = Resque.reserve(queue)
210
- log! "Found job on #{queue}"
338
+ log_with_severity :debug, "Found job on #{queue}"
211
339
  return job
212
340
  end
213
341
  end
214
342
 
215
343
  nil
216
344
  rescue Exception => e
217
- log "Error reserving job: #{e.inspect}"
218
- log e.backtrace.join("\n")
345
+ log_with_severity :error, "Error reserving job: #{e.inspect}"
346
+ log_with_severity :error, e.backtrace.join("\n")
219
347
  raise e
220
348
  end
221
349
 
@@ -224,53 +352,26 @@ module Resque
224
352
  def reconnect
225
353
  tries = 0
226
354
  begin
227
- redis.client.reconnect
355
+ data_store.reconnect
228
356
  rescue Redis::BaseConnectionError
229
357
  if (tries += 1) <= 3
230
- log "Error reconnecting to Redis; retrying"
358
+ log_with_severity :error, "Error reconnecting to Redis; retrying"
231
359
  sleep(tries)
232
360
  retry
233
361
  else
234
- log "Error reconnecting to Redis; quitting"
362
+ log_with_severity :error, "Error reconnecting to Redis; quitting"
235
363
  raise
236
364
  end
237
365
  end
238
366
  end
239
367
 
240
- # Returns a list of queues to use when searching for a job.
241
- # A splat ("*") means you want every queue (in alpha order) - this
242
- # can be useful for dynamically adding new queues.
243
- def queues
244
- @queues.map {|queue| queue == "*" ? Resque.queues.sort : queue }.flatten.uniq
245
- end
246
-
247
- # Not every platform supports fork. Here we do our magic to
248
- # determine if yours does.
249
- def fork(job)
250
- return if @cant_fork
251
-
252
- # Only run before_fork hooks if we're actually going to fork
253
- # (after checking @cant_fork)
254
- run_hook :before_fork, job
255
-
256
- begin
257
- # IronRuby doesn't support `Kernel.fork` yet
258
- if Kernel.respond_to?(:fork)
259
- Kernel.fork if will_fork?
260
- else
261
- raise NotImplementedError
262
- end
263
- rescue NotImplementedError
264
- @cant_fork = true
265
- nil
266
- end
267
- end
268
-
269
368
  # Runs all the methods needed when a worker begins its lifecycle.
270
369
  def startup
271
- warn "WARNING: This way of doing signal handling is now deprecated. Please see http://hone.heroku.com/resque/2012/08/21/resque-signals.html for more info." unless term_child
370
+ $0 = "resque: Starting"
371
+
272
372
  enable_gc_optimizations
273
373
  register_signal_handlers
374
+ start_heartbeat
274
375
  prune_dead_workers
275
376
  run_hook :before_first_fork
276
377
  register_worker
@@ -297,7 +398,7 @@ module Resque
297
398
  # USR2: Don't process any new jobs
298
399
  # CONT: Start processing jobs again after a USR2
299
400
  def register_signal_handlers
300
- trap('TERM') { shutdown! }
401
+ trap('TERM') { graceful_term ? shutdown : shutdown! }
301
402
  trap('INT') { shutdown! }
302
403
 
303
404
  begin
@@ -310,14 +411,21 @@ module Resque
310
411
  trap('USR2') { pause_processing }
311
412
  trap('CONT') { unpause_processing }
312
413
  rescue ArgumentError
313
- warn "Signals QUIT, USR1, USR2, and/or CONT not supported."
414
+ log_with_severity :warn, "Signals QUIT, USR1, USR2, and/or CONT not supported."
314
415
  end
315
416
 
316
- log! "Registered signals"
417
+ log_with_severity :debug, "Registered signals"
317
418
  end
318
419
 
319
420
  def unregister_signal_handlers
320
- trap('TERM') { raise TermException.new("SIGTERM") }
421
+ trap('TERM') do
422
+ trap('TERM') do
423
+ # Ignore subsequent term signals
424
+ end
425
+
426
+ raise TermException.new("SIGTERM")
427
+ end
428
+
321
429
  trap('INT', 'DEFAULT')
322
430
 
323
431
  begin
@@ -331,15 +439,24 @@ module Resque
331
439
  # Schedule this worker for shutdown. Will finish processing the
332
440
  # current job.
333
441
  def shutdown
334
- log 'Exiting...'
442
+ log_with_severity :info, 'Exiting...'
335
443
  @shutdown = true
336
444
  end
337
445
 
338
446
  # Kill the child and shutdown immediately.
447
+ # If not forking, abort this process.
339
448
  def shutdown!
340
449
  shutdown
341
450
  if term_child
342
- new_kill_child
451
+ if fork_per_job?
452
+ new_kill_child
453
+ else
454
+ # Raise TermException in the same process
455
+ trap('TERM') do
456
+ # ignore subsequent terms
457
+ end
458
+ raise TermException.new("SIGTERM")
459
+ end
343
460
  else
344
461
  kill_child
345
462
  end
@@ -354,54 +471,130 @@ module Resque
354
471
  # is processing will not be completed.
355
472
  def kill_child
356
473
  if @child
357
- log! "Killing child at #{@child}"
358
- if system("ps -o pid,state -p #{@child}")
474
+ log_with_severity :debug, "Killing child at #{@child}"
475
+ if `ps -o pid,state -p #{@child}`
359
476
  Process.kill("KILL", @child) rescue nil
360
477
  else
361
- log! "Child #{@child} not found, restarting."
478
+ log_with_severity :debug, "Child #{@child} not found, restarting."
362
479
  shutdown
363
480
  end
364
481
  end
365
482
  end
366
483
 
484
+ def heartbeat
485
+ data_store.heartbeat(self)
486
+ end
487
+
488
+ def remove_heartbeat
489
+ data_store.remove_heartbeat(self)
490
+ end
491
+
492
+ def heartbeat!(time = data_store.server_time)
493
+ data_store.heartbeat!(self, time)
494
+ end
495
+
496
+ def self.all_heartbeats
497
+ data_store.all_heartbeats
498
+ end
499
+
500
+ # Returns a list of workers that have sent a heartbeat in the past, but which
501
+ # already expired (does NOT include workers that have never sent a heartbeat at all).
502
+ def self.all_workers_with_expired_heartbeats
503
+ # Use `Worker.all_heartbeats` instead of `Worker.all`
504
+ # to prune workers which haven't been registered but have set a heartbeat.
505
+ # https://github.com/resque/resque/pull/1751
506
+ heartbeats = Worker.all_heartbeats
507
+ now = data_store.server_time
508
+
509
+ heartbeats.select do |id, heartbeat|
510
+ if heartbeat
511
+ seconds_since_heartbeat = (now - Time.parse(heartbeat)).to_i
512
+ seconds_since_heartbeat > Resque.prune_interval
513
+ else
514
+ false
515
+ end
516
+ end.each_key.map do |id|
517
+ # skip_exists must be true to include not registered workers
518
+ find(id, :skip_exists => true)
519
+ end
520
+ end
521
+
522
+ def start_heartbeat
523
+ remove_heartbeat
524
+
525
+ @heartbeat_thread_signal = Resque::ThreadSignal.new
526
+
527
+ @heartbeat_thread = Thread.new do
528
+ loop do
529
+ heartbeat!
530
+ signaled = @heartbeat_thread_signal.wait_for_signal(Resque.heartbeat_interval)
531
+ break if signaled
532
+ end
533
+ end
534
+
535
+ @@all_heartbeat_threads << @heartbeat_thread
536
+ end
537
+
367
538
  # Kills the forked child immediately with minimal remorse. The job it
368
539
  # is processing will not be completed. Send the child a TERM signal,
369
- # wait 5 seconds, and then a KILL signal if it has not quit
540
+ # wait <term_timeout> seconds, and then a KILL signal if it has not quit
541
+ # If pre_shutdown_timeout has been set to a positive number, it will allow
542
+ # the child that many seconds before sending the aforementioned TERM and KILL.
370
543
  def new_kill_child
371
544
  if @child
372
- unless Process.waitpid(@child, Process::WNOHANG)
373
- log! "Sending TERM signal to child #{@child}"
545
+ unless child_already_exited?
546
+ if pre_shutdown_timeout && pre_shutdown_timeout > 0.0
547
+ log_with_severity :debug, "Waiting #{pre_shutdown_timeout.to_f}s for child process to exit"
548
+ return if wait_for_child_exit(pre_shutdown_timeout)
549
+ end
550
+
551
+ log_with_severity :debug, "Sending TERM signal to child #{@child}"
374
552
  Process.kill("TERM", @child)
375
- (term_timeout.to_f * 10).round.times do |i|
376
- sleep(0.1)
377
- return if Process.waitpid(@child, Process::WNOHANG)
553
+
554
+ if wait_for_child_exit(term_timeout)
555
+ return
556
+ else
557
+ log_with_severity :debug, "Sending KILL signal to child #{@child}"
558
+ Process.kill("KILL", @child)
378
559
  end
379
- log! "Sending KILL signal to child #{@child}"
380
- Process.kill("KILL", @child)
381
560
  else
382
- log! "Child #{@child} already quit."
561
+ log_with_severity :debug, "Child #{@child} already quit."
383
562
  end
384
563
  end
385
564
  rescue SystemCallError
386
- log! "Child #{@child} already quit and reaped."
565
+ log_with_severity :error, "Child #{@child} already quit and reaped."
566
+ end
567
+
568
+ def child_already_exited?
569
+ Process.waitpid(@child, Process::WNOHANG)
570
+ end
571
+
572
+ def wait_for_child_exit(timeout)
573
+ (timeout * 10).round.times do |i|
574
+ sleep(0.1)
575
+ return true if child_already_exited?
576
+ end
577
+ false
387
578
  end
388
579
 
389
580
  # are we paused?
390
581
  def paused?
391
- @paused
582
+ @paused || redis.get('pause-all-workers').to_s.strip.downcase == 'true'
392
583
  end
393
584
 
394
585
  # Stop processing jobs after the current one has completed (if we're
395
586
  # currently running one).
396
587
  def pause_processing
397
- log "USR2 received; pausing job processing"
588
+ log_with_severity :info, "USR2 received; pausing job processing"
589
+ run_hook :before_pause, self
398
590
  @paused = true
399
591
  end
400
592
 
401
593
  # Start processing jobs again after a pause
402
594
  def unpause_processing
403
- log "CONT received; resuming job processing"
595
+ log_with_severity :info, "CONT received; resuming job processing"
404
596
  @paused = false
597
+ run_hook :after_pause, self
405
598
  end
406
599
 
407
600
  # Looks for any workers which should be running on this server
@@ -415,13 +608,45 @@ module Resque
415
608
  # By checking the current Redis state against the actual
416
609
  # environment, we can determine if Redis is old and clean it up a bit.
417
610
  def prune_dead_workers
611
+ return unless data_store.acquire_pruning_dead_worker_lock(self, Resque.heartbeat_interval)
612
+
418
613
  all_workers = Worker.all
419
- known_workers = worker_pids unless all_workers.empty?
614
+
615
+ known_workers = worker_pids
616
+ all_workers_with_expired_heartbeats = Worker.all_workers_with_expired_heartbeats
617
+ all_workers_with_expired_heartbeats.each do |worker|
618
+ # If the worker hasn't sent a heartbeat, remove it from the registry.
619
+ #
620
+ # If the worker hasn't ever sent a heartbeat, we won't remove it since
621
+ # the first heartbeat is sent before the worker is registred it means
622
+ # that this is a worker that doesn't support heartbeats, e.g., another
623
+ # client library or an older version of Resque. We won't touch these.
624
+ log_with_severity :info, "Pruning dead worker: #{worker}"
625
+
626
+ job_class = worker.job(false)['payload']['class'] rescue nil
627
+ worker.unregister_worker(PruneDeadWorkerDirtyExit.new(worker.to_s, job_class))
628
+ end
629
+
420
630
  all_workers.each do |worker|
421
- host, pid, queues = worker.id.split(':')
631
+ if all_workers_with_expired_heartbeats.include?(worker)
632
+ next
633
+ end
634
+
635
+ host, pid, worker_queues_raw = worker.id.split(':')
636
+ worker_queues = worker_queues_raw.split(",")
637
+ unless @queues.include?("*") || (worker_queues.to_set == @queues.to_set)
638
+ # If the worker we are trying to prune does not belong to the queues
639
+ # we are listening to, we should not touch it.
640
+ # Attempt to prune a worker from different queues may easily result in
641
+ # an unknown class exception, since that worker could easily be even
642
+ # written in different language.
643
+ next
644
+ end
645
+
422
646
  next unless host == hostname
423
647
  next if known_workers.include?(pid)
424
- log! "Pruning dead worker: #{worker}"
648
+
649
+ log_with_severity :debug, "Pruning dead worker: #{worker}"
425
650
  worker.unregister_worker
426
651
  end
427
652
  end
@@ -429,18 +654,29 @@ module Resque
429
654
  # Registers ourself as a worker. Useful when entering the worker
430
655
  # lifecycle on startup.
431
656
  def register_worker
432
- redis.sadd(:workers, self)
433
- started!
657
+ data_store.register_worker(self)
434
658
  end
435
659
 
436
660
  # Runs a named hook, passing along any arguments.
437
661
  def run_hook(name, *args)
438
- return unless hook = Resque.send(name)
439
- msg = "Running #{name} hook"
662
+ hooks = Resque.send(name)
663
+ return if hooks.empty?
664
+ return if name == :before_first_fork && @before_first_fork_hook_ran
665
+ msg = "Running #{name} hooks"
440
666
  msg << " with #{args.inspect}" if args.any?
441
- log msg
667
+ log_with_severity :info, msg
668
+
669
+ hooks.each do |hook|
670
+ args.any? ? hook.call(*args) : hook.call
671
+ @before_first_fork_hook_ran = true if name == :before_first_fork
672
+ end
673
+ end
442
674
 
443
- args.any? ? hook.call(*args) : hook.call
675
+ def kill_background_threads
676
+ if @heartbeat_thread
677
+ @heartbeat_thread_signal.signal
678
+ @heartbeat_thread.join
679
+ end
444
680
  end
445
681
 
446
682
  # Unregisters ourself as a worker. Useful when shutting down.
@@ -452,15 +688,28 @@ module Resque
452
688
  # Ensure the proper worker is attached to this job, even if
453
689
  # it's not the precise instance that died.
454
690
  job.worker = self
455
- job.fail(exception || DirtyExit.new)
691
+ begin
692
+ job.fail(exception || DirtyExit.new("Job still being processed"))
693
+ rescue RuntimeError => e
694
+ log_with_severity :error, e.message
695
+ end
456
696
  end
457
697
 
458
- redis.srem(:workers, self)
459
- redis.del("worker:#{self}")
460
- redis.del("worker:#{self}:started")
698
+ kill_background_threads
461
699
 
462
- Stat.clear("processed:#{self}")
463
- Stat.clear("failed:#{self}")
700
+ data_store.unregister_worker(self) do |**opts|
701
+ Stat.clear("processed:#{self}", **opts)
702
+ Stat.clear("failed:#{self}", **opts)
703
+ end
704
+ rescue Exception => exception_while_unregistering
705
+ message = exception_while_unregistering.message
706
+ if exception
707
+ message += "\nOriginal Exception (#{exception.class}): #{exception.message}"
708
+ message += "\n #{exception.backtrace.join(" \n")}" if exception.backtrace
709
+ end
710
+ fail(exception_while_unregistering.class,
711
+ message,
712
+ exception_while_unregistering.backtrace)
464
713
  end
465
714
 
466
715
  # Given a job, tells Redis we're working on it. Useful for seeing
@@ -468,16 +717,26 @@ module Resque
468
717
  def working_on(job)
469
718
  data = encode \
470
719
  :queue => job.queue,
471
- :run_at => Time.now.strftime("%Y/%m/%d %H:%M:%S %Z"),
720
+ :run_at => Time.now.utc.iso8601,
472
721
  :payload => job.payload
473
- redis.set("worker:#{self}", data)
722
+ data_store.set_worker_payload(self,data)
723
+ state_change
474
724
  end
475
725
 
476
726
  # Called when we are done working - clears our `working_on` state
477
727
  # and tells Redis we processed a job.
478
728
  def done_working
479
- processed!
480
- redis.del("worker:#{self}")
729
+ data_store.worker_done_working(self) do |**opts|
730
+ processed!(**opts)
731
+ end
732
+ end
733
+
734
+ def state_change
735
+ current_state = state
736
+ if current_state != @last_state
737
+ run_hook :queue_empty if current_state == :idle
738
+ @last_state = current_state
739
+ end
481
740
  end
482
741
 
483
742
  # How many jobs has this worker processed? Returns an int.
@@ -486,9 +745,9 @@ module Resque
486
745
  end
487
746
 
488
747
  # Tell Redis we've processed a job.
489
- def processed!
490
- Stat << "processed"
491
- Stat << "processed:#{self}"
748
+ def processed!(**opts)
749
+ Stat.incr("processed", 1, **opts)
750
+ Stat.incr("processed:#{self}", 1, **opts)
492
751
  end
493
752
 
494
753
  # How many failed jobs has this worker seen? Returns an int.
@@ -504,18 +763,20 @@ module Resque
504
763
 
505
764
  # What time did this worker start? Returns an instance of `Time`
506
765
  def started
507
- redis.get "worker:#{self}:started"
766
+ data_store.worker_start_time(self)
508
767
  end
509
768
 
510
769
  # Tell Redis we've started
511
770
  def started!
512
- redis.set("worker:#{self}:started", Time.now.to_s)
771
+ data_store.worker_started(self)
513
772
  end
514
773
 
515
774
  # Returns a hash explaining the Job we're currently processing, if any.
516
- def job
517
- decode(redis.get("worker:#{self}")) || {}
775
+ def job(reload = true)
776
+ @job = nil if reload
777
+ @job ||= decode(data_store.get_worker_payload(self)) || {}
518
778
  end
779
+ attr_writer :job
519
780
  alias_method :processing, :job
520
781
 
521
782
  # Boolean - true if working, false if not
@@ -527,15 +788,16 @@ module Resque
527
788
  def idle?
528
789
  state == :idle
529
790
  end
530
-
531
- def will_fork?
532
- !(@cant_fork || $TESTING)
791
+
792
+ def fork_per_job?
793
+ return @fork_per_job if defined?(@fork_per_job)
794
+ @fork_per_job = ENV["FORK_PER_JOB"] != 'false' && Kernel.respond_to?(:fork)
533
795
  end
534
796
 
535
797
  # Returns a symbol representing the current worker state,
536
798
  # which can be either :working or :idle
537
799
  def state
538
- redis.exists("worker:#{self}") ? :working : :idle
800
+ data_store.get_worker_payload(self) ? :working : :idle
539
801
  end
540
802
 
541
803
  # Is this worker the same as another worker?
@@ -550,18 +812,18 @@ module Resque
550
812
  # The string representation is the same as the id for this worker
551
813
  # instance. Can be used with `Worker.find`.
552
814
  def to_s
553
- @to_s ||= "#{hostname}:#{Process.pid}:#{@queues.join(',')}"
815
+ @to_s ||= "#{hostname}:#{pid}:#{@queues.join(',')}"
554
816
  end
555
817
  alias_method :id, :to_s
556
818
 
557
- # chomp'd hostname of this machine
819
+ # chomp'd hostname of this worker's machine
558
820
  def hostname
559
- @hostname ||= `hostname`.chomp
821
+ @hostname ||= Socket.gethostname
560
822
  end
561
823
 
562
824
  # Returns Integer PID of running worker
563
825
  def pid
564
- Process.pid
826
+ @pid ||= Process.pid
565
827
  end
566
828
 
567
829
  # Returns an Array of string pids of all the other workers on this
@@ -569,17 +831,24 @@ module Resque
569
831
  def worker_pids
570
832
  if RUBY_PLATFORM =~ /solaris/
571
833
  solaris_worker_pids
834
+ elsif RUBY_PLATFORM =~ /mingw32/
835
+ windows_worker_pids
572
836
  else
573
837
  linux_worker_pids
574
838
  end
575
839
  end
576
840
 
577
- # Find Resque worker pids on Linux and OS X.
578
- #
579
841
  # Returns an Array of string pids of all the other workers on this
580
842
  # machine. Useful when pruning dead workers on startup.
843
+ def windows_worker_pids
844
+ tasklist_output = `tasklist /FI "IMAGENAME eq ruby.exe" /FO list`.encode("UTF-8", Encoding.locale_charmap)
845
+ tasklist_output.split($/).select { |line| line =~ /^PID:/ }.collect { |line| line.gsub(/PID:\s+/, '') }
846
+ end
847
+
848
+ # Find Resque worker pids on Linux and OS X.
849
+ #
581
850
  def linux_worker_pids
582
- `ps -A -o pid,command | grep "[r]esque" | grep -v "resque-web"`.split("\n").map do |line|
851
+ `ps -A -o pid,command | grep -E "[r]esque:work|[r]esque:\sStarting|[r]esque-[0-9]" | grep -v "resque-web"`.split("\n").map do |line|
583
852
  line.split(' ')[0]
584
853
  end
585
854
  end
@@ -592,7 +861,7 @@ module Resque
592
861
  `ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
593
862
  real_pid = line.split(' ')[0]
594
863
  pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
595
- if pargs_command.split(':')[1] == " resque-#{Resque::Version}"
864
+ if pargs_command.split(':')[1] == " resque-#{Resque::VERSION}"
596
865
  real_pid
597
866
  end
598
867
  end.compact
@@ -600,25 +869,80 @@ module Resque
600
869
 
601
870
  # Given a string, sets the procline ($0) and logs.
602
871
  # Procline is always in the format of:
603
- # resque-VERSION: STRING
872
+ # RESQUE_PROCLINE_PREFIXresque-VERSION: STRING
604
873
  def procline(string)
605
- $0 = "resque-#{Resque::Version}: #{string}"
606
- log! $0
874
+ $0 = "#{ENV['RESQUE_PROCLINE_PREFIX']}resque-#{Resque::VERSION}: #{string}"
875
+ log_with_severity :debug, $0
607
876
  end
608
877
 
609
- # Log a message to STDOUT if we are verbose or very_verbose.
610
878
  def log(message)
611
- if verbose
612
- puts "*** #{message}"
613
- elsif very_verbose
614
- time = Time.now.strftime('%H:%M:%S %Y-%m-%d')
615
- puts "** [#{time}] #$$: #{message}"
616
- end
879
+ info(message)
617
880
  end
618
881
 
619
- # Logs a very verbose message to STDOUT.
620
882
  def log!(message)
621
- log message if very_verbose
883
+ debug(message)
884
+ end
885
+
886
+
887
+ attr_reader :verbose, :very_verbose
888
+
889
+ def verbose=(value);
890
+ if value && !very_verbose
891
+ Resque.logger.formatter = VerboseFormatter.new
892
+ Resque.logger.level = Logger::INFO
893
+ elsif !value
894
+ Resque.logger.formatter = QuietFormatter.new
895
+ end
896
+
897
+ @verbose = value
898
+ end
899
+
900
+ def very_verbose=(value)
901
+ if value
902
+ Resque.logger.formatter = VeryVerboseFormatter.new
903
+ Resque.logger.level = Logger::DEBUG
904
+ elsif !value && verbose
905
+ Resque.logger.formatter = VerboseFormatter.new
906
+ Resque.logger.level = Logger::INFO
907
+ else
908
+ Resque.logger.formatter = QuietFormatter.new
909
+ end
910
+
911
+ @very_verbose = value
912
+ end
913
+
914
+ private
915
+
916
+ def perform_with_fork(job, &block)
917
+ run_hook :before_fork, job
918
+
919
+ begin
920
+ @child = fork do
921
+ unregister_signal_handlers if term_child
922
+ perform(job, &block)
923
+ exit! unless run_at_exit_hooks
924
+ end
925
+ rescue NotImplementedError
926
+ @fork_per_job = false
927
+ perform(job, &block)
928
+ return
929
+ end
930
+
931
+ srand # Reseeding
932
+ procline "Forked #{@child} at #{Time.now.to_i}"
933
+
934
+ begin
935
+ Process.waitpid(@child)
936
+ rescue SystemCallError
937
+ nil
938
+ end
939
+
940
+ job.fail(DirtyExit.new("Child process received unhandled signal #{$?}", $?)) if $?.signaled?
941
+ @child = nil
942
+ end
943
+
944
+ def log_with_severity(severity, message)
945
+ Logging.log(severity, message)
622
946
  end
623
947
  end
624
948
  end