resqueue 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.md +488 -0
  3. data/LICENSE +20 -0
  4. data/README.markdown +920 -0
  5. data/Rakefile +57 -0
  6. data/bin/resque +81 -0
  7. data/bin/resque-web +31 -0
  8. data/lib/resque.rb +578 -0
  9. data/lib/resque/data_store.rb +326 -0
  10. data/lib/resque/errors.rb +21 -0
  11. data/lib/resque/failure.rb +119 -0
  12. data/lib/resque/failure/airbrake.rb +33 -0
  13. data/lib/resque/failure/base.rb +73 -0
  14. data/lib/resque/failure/multiple.rb +68 -0
  15. data/lib/resque/failure/redis.rb +128 -0
  16. data/lib/resque/failure/redis_multi_queue.rb +104 -0
  17. data/lib/resque/helpers.rb +48 -0
  18. data/lib/resque/job.rb +296 -0
  19. data/lib/resque/log_formatters/quiet_formatter.rb +7 -0
  20. data/lib/resque/log_formatters/verbose_formatter.rb +7 -0
  21. data/lib/resque/log_formatters/very_verbose_formatter.rb +8 -0
  22. data/lib/resque/logging.rb +18 -0
  23. data/lib/resque/plugin.rb +78 -0
  24. data/lib/resque/server.rb +299 -0
  25. data/lib/resque/server/helpers.rb +64 -0
  26. data/lib/resque/server/public/favicon.ico +0 -0
  27. data/lib/resque/server/public/idle.png +0 -0
  28. data/lib/resque/server/public/jquery-1.12.4.min.js +5 -0
  29. data/lib/resque/server/public/jquery.relatize_date.js +95 -0
  30. data/lib/resque/server/public/poll.png +0 -0
  31. data/lib/resque/server/public/ranger.js +78 -0
  32. data/lib/resque/server/public/reset.css +44 -0
  33. data/lib/resque/server/public/style.css +91 -0
  34. data/lib/resque/server/public/working.png +0 -0
  35. data/lib/resque/server/test_helper.rb +19 -0
  36. data/lib/resque/server/views/error.erb +1 -0
  37. data/lib/resque/server/views/failed.erb +29 -0
  38. data/lib/resque/server/views/failed_job.erb +50 -0
  39. data/lib/resque/server/views/failed_queues_overview.erb +24 -0
  40. data/lib/resque/server/views/key_sets.erb +17 -0
  41. data/lib/resque/server/views/key_string.erb +11 -0
  42. data/lib/resque/server/views/layout.erb +44 -0
  43. data/lib/resque/server/views/next_more.erb +22 -0
  44. data/lib/resque/server/views/overview.erb +4 -0
  45. data/lib/resque/server/views/queues.erb +58 -0
  46. data/lib/resque/server/views/stats.erb +62 -0
  47. data/lib/resque/server/views/workers.erb +111 -0
  48. data/lib/resque/server/views/working.erb +72 -0
  49. data/lib/resque/stat.rb +58 -0
  50. data/lib/resque/tasks.rb +72 -0
  51. data/lib/resque/thread_signal.rb +45 -0
  52. data/lib/resque/vendor/utf8_util.rb +26 -0
  53. data/lib/resque/vendor/utf8_util/utf8_util_18.rb +91 -0
  54. data/lib/resque/vendor/utf8_util/utf8_util_19.rb +6 -0
  55. data/lib/resque/version.rb +3 -0
  56. data/lib/resque/worker.rb +892 -0
  57. data/lib/resqueue.rb +4 -0
  58. data/lib/tasks/redis.rake +161 -0
  59. data/lib/tasks/resque.rake +2 -0
  60. metadata +197 -0
@@ -0,0 +1,72 @@
1
+ # require 'resque/tasks'
2
+ # will give you the resque tasks
3
+
4
+
5
+ namespace :resque do
6
+ task :setup
7
+
8
+ desc "Start a Resque worker"
9
+ task :work => [ :preload, :setup ] do
10
+ require 'resque'
11
+
12
+ begin
13
+ worker = Resque::Worker.new
14
+ rescue Resque::NoQueueError
15
+ abort "set QUEUE env var, e.g. $ QUEUE=critical,high rake resque:work"
16
+ end
17
+
18
+ worker.prepare
19
+ worker.log "Starting worker #{self}"
20
+ worker.work(ENV['INTERVAL'] || 5) # interval, will block
21
+ end
22
+
23
+ desc "Start multiple Resque workers. Should only be used in dev mode."
24
+ task :workers do
25
+ threads = []
26
+
27
+ if ENV['COUNT'].to_i < 1
28
+ abort "set COUNT env var, e.g. $ COUNT=2 rake resque:workers"
29
+ end
30
+
31
+ ENV['COUNT'].to_i.times do
32
+ threads << Thread.new do
33
+ system "rake resque:work"
34
+ end
35
+ end
36
+
37
+ threads.each { |thread| thread.join }
38
+ end
39
+
40
+ # Preload app files if this is Rails
41
+ task :preload => :setup do
42
+ if defined?(Rails)
43
+ if Rails::VERSION::MAJOR > 3
44
+ ActiveSupport.run_load_hooks(:before_eager_load, Rails.application)
45
+ Rails.application.config.eager_load_namespaces.each(&:eager_load!)
46
+
47
+ elsif Rails::VERSION::MAJOR == 3
48
+ ActiveSupport.run_load_hooks(:before_eager_load, Rails.application)
49
+ Rails.application.eager_load!
50
+
51
+ elsif defined?(Rails::Initializer)
52
+ $rails_rake_task = false
53
+ Rails::Initializer.run :load_application_classes
54
+ end
55
+ end
56
+ end
57
+
58
+ namespace :failures do
59
+ desc "Sort the 'failed' queue for the redis_multi_queue failure backend"
60
+ task :sort do
61
+ require 'resque'
62
+ require 'resque/failure/redis'
63
+
64
+ warn "Sorting #{Resque::Failure.count} failures..."
65
+ Resque::Failure.each(0, Resque::Failure.count) do |_, failure|
66
+ data = Resque.encode(failure)
67
+ Resque.redis.rpush(Resque::Failure.failure_queue_name(failure['queue']), data)
68
+ end
69
+ warn "done!"
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,45 @@
1
+ class Resque::ThreadSignal
2
+ if RUBY_VERSION <= "1.9"
3
+ def initialize
4
+ @signaled = false
5
+ end
6
+
7
+ def signal
8
+ @signaled = true
9
+ end
10
+
11
+ def wait_for_signal(timeout)
12
+ (10 * timeout).times do
13
+ sleep(0.1)
14
+ return true if @signaled
15
+ end
16
+
17
+ @signaled
18
+ end
19
+
20
+ else
21
+ def initialize
22
+ @mutex = Mutex.new
23
+ @signaled = false
24
+ @received = ConditionVariable.new
25
+ end
26
+
27
+ def signal
28
+ @mutex.synchronize do
29
+ @signaled = true
30
+ @received.signal
31
+ end
32
+ end
33
+
34
+ def wait_for_signal(timeout)
35
+ @mutex.synchronize do
36
+ unless @signaled
37
+ @received.wait(@mutex, timeout)
38
+ end
39
+
40
+ @signaled
41
+ end
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,26 @@
1
+ module UTF8Util
2
+ # use '?' intsead of the unicode replace char, since that is 3 bytes
3
+ # and can increase the string size if it's done a lot
4
+ REPLACEMENT_CHAR = "?"
5
+
6
+ # Replace invalid UTF-8 character sequences with a replacement character
7
+ #
8
+ # Returns self as valid UTF-8.
9
+ def self.clean!(str)
10
+ raise NotImplementedError
11
+ end
12
+
13
+ # Replace invalid UTF-8 character sequences with a replacement character
14
+ #
15
+ # Returns a copy of this String as valid UTF-8.
16
+ def self.clean(str)
17
+ clean!(str.dup)
18
+ end
19
+
20
+ end
21
+
22
+ if RUBY_VERSION <= '1.9'
23
+ require 'resque/vendor/utf8_util/utf8_util_18'
24
+ else
25
+ require 'resque/vendor/utf8_util/utf8_util_19'
26
+ end
@@ -0,0 +1,91 @@
1
+ require 'strscan'
2
+
3
+ module UTF8Util
4
+ HIGH_BIT_RANGE = /[\x80-\xff]/
5
+
6
+ # Check if this String is valid UTF-8
7
+ #
8
+ # Returns true or false.
9
+ def self.valid?(str)
10
+ sc = StringScanner.new(str)
11
+
12
+ while sc.skip_until(HIGH_BIT_RANGE)
13
+ sc.pos -= 1
14
+
15
+ if !sequence_length(sc)
16
+ return false
17
+ end
18
+ end
19
+
20
+ true
21
+ end
22
+
23
+ # Replace invalid UTF-8 character sequences with a replacement character
24
+ #
25
+ # Returns self as valid UTF-8.
26
+ def self.clean!(str)
27
+ sc = StringScanner.new(str)
28
+ while sc.skip_until(HIGH_BIT_RANGE)
29
+ pos = sc.pos = sc.pos-1
30
+
31
+ if !sequence_length(sc)
32
+ str[pos] = REPLACEMENT_CHAR
33
+ end
34
+ end
35
+
36
+ str
37
+ end
38
+
39
+ # Validate the UTF-8 sequence at the current scanner position.
40
+ #
41
+ # scanner - StringScanner instance so we can advance the pointer as we verify.
42
+ #
43
+ # Returns The length in bytes of this UTF-8 sequence, false if invalid.
44
+ def self.sequence_length(scanner)
45
+ leader = scanner.get_byte[0]
46
+
47
+ if (leader >> 5) == 0x6
48
+ if check_next_sequence(scanner)
49
+ return 2
50
+ else
51
+ scanner.pos -= 1
52
+ end
53
+ elsif (leader >> 4) == 0x0e
54
+ if check_next_sequence(scanner)
55
+ if check_next_sequence(scanner)
56
+ return 3
57
+ else
58
+ scanner.pos -= 2
59
+ end
60
+ else
61
+ scanner.pos -= 1
62
+ end
63
+ elsif (leader >> 3) == 0x1e
64
+ if check_next_sequence(scanner)
65
+ if check_next_sequence(scanner)
66
+ if check_next_sequence(scanner)
67
+ return 4
68
+ else
69
+ scanner.pos -= 3
70
+ end
71
+ else
72
+ scanner.pos -= 2
73
+ end
74
+ else
75
+ scanner.pos -= 1
76
+ end
77
+ end
78
+
79
+ false
80
+ end
81
+
82
+ private
83
+
84
+ # Read another byte off the scanner oving the scan position forward one place
85
+ #
86
+ # Returns nothing.
87
+ def self.check_next_sequence(scanner)
88
+ byte = scanner.get_byte[0]
89
+ (byte >> 6) == 0x2
90
+ end
91
+ end
@@ -0,0 +1,6 @@
1
+ module UTF8Util
2
+ def self.clean!(str)
3
+ return str if str.encoding.to_s == "UTF-8"
4
+ str.force_encoding("binary").encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => REPLACEMENT_CHAR)
5
+ end
6
+ end
@@ -0,0 +1,3 @@
1
+ module Resque
2
+ Version = VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,892 @@
1
+ require 'time'
2
+ require 'set'
3
+
4
+ module Resque
5
+ # A Resque Worker processes jobs. On platforms that support fork(2),
6
+ # the worker will fork off a child to process each job. This ensures
7
+ # a clean slate when beginning the next job and cuts down on gradual
8
+ # memory growth as well as low level failures.
9
+ #
10
+ # It also ensures workers are always listening to signals from you,
11
+ # their master, and can react accordingly.
12
+ class Worker
13
+ include Resque::Helpers
14
+ extend Resque::Helpers
15
+ include Resque::Logging
16
+
17
+ @@all_heartbeat_threads = []
18
+ def self.kill_all_heartbeat_threads
19
+ @@all_heartbeat_threads.each(&:kill).each(&:join)
20
+ @@all_heartbeat_threads = []
21
+ end
22
+
23
+ def redis
24
+ Resque.redis
25
+ end
26
+ alias :data_store :redis
27
+
28
+ def self.redis
29
+ Resque.redis
30
+ end
31
+
32
+ def self.data_store
33
+ self.redis
34
+ end
35
+
36
+ # Given a Ruby object, returns a string suitable for storage in a
37
+ # queue.
38
+ def encode(object)
39
+ Resque.encode(object)
40
+ end
41
+
42
+ # Given a string, returns a Ruby object.
43
+ def decode(object)
44
+ Resque.decode(object)
45
+ end
46
+
47
+ attr_accessor :term_timeout
48
+
49
+ # decide whether to use new_kill_child logic
50
+ attr_accessor :term_child
51
+
52
+ # should term kill workers gracefully (vs. immediately)
53
+ # Makes SIGTERM work like SIGQUIT
54
+ attr_accessor :graceful_term
55
+
56
+ # When set to true, forked workers will exit with `exit`, calling any `at_exit` code handlers that have been
57
+ # registered in the application. Otherwise, forked workers exit with `exit!`
58
+ attr_accessor :run_at_exit_hooks
59
+
60
+ attr_writer :fork_per_job
61
+ attr_writer :hostname
62
+ attr_writer :to_s
63
+ attr_writer :pid
64
+
65
+ # Returns an array of all worker objects.
66
+ def self.all
67
+ data_store.worker_ids.map { |id| find(id, :skip_exists => true) }.compact
68
+ end
69
+
70
+ # Returns an array of all worker objects currently processing
71
+ # jobs.
72
+ def self.working
73
+ names = all
74
+ return [] unless names.any?
75
+
76
+ reportedly_working = {}
77
+
78
+ begin
79
+ reportedly_working = data_store.workers_map(names).reject do |key, value|
80
+ value.nil? || value.empty?
81
+ end
82
+ rescue Redis::Distributed::CannotDistribute
83
+ names.each do |name|
84
+ value = data_store.get_worker_payload(name)
85
+ reportedly_working[name] = value unless value.nil? || value.empty?
86
+ end
87
+ end
88
+
89
+ reportedly_working.keys.map do |key|
90
+ worker = find(key.sub("worker:", ''), :skip_exists => true)
91
+ worker.job = worker.decode(reportedly_working[key])
92
+ worker
93
+ end.compact
94
+ end
95
+
96
+ # Returns a single worker object. Accepts a string id.
97
+ def self.find(worker_id, options = {})
98
+ skip_exists = options[:skip_exists]
99
+
100
+ if skip_exists || exists?(worker_id)
101
+ host, pid, queues_raw = worker_id.split(':')
102
+ queues = queues_raw.split(',')
103
+ worker = new(*queues)
104
+ worker.hostname = host
105
+ worker.to_s = worker_id
106
+ worker.pid = pid.to_i
107
+ worker
108
+ else
109
+ nil
110
+ end
111
+ end
112
+
113
+ # Alias of `find`
114
+ def self.attach(worker_id)
115
+ find(worker_id)
116
+ end
117
+
118
+ # Given a string worker id, return a boolean indicating whether the
119
+ # worker exists
120
+ def self.exists?(worker_id)
121
+ data_store.worker_exists?(worker_id)
122
+ end
123
+
124
+ # Workers should be initialized with an array of string queue
125
+ # names. The order is important: a Worker will check the first
126
+ # queue given for a job. If none is found, it will check the
127
+ # second queue name given. If a job is found, it will be
128
+ # processed. Upon completion, the Worker will again check the
129
+ # first queue given, and so forth. In this way the queue list
130
+ # passed to a Worker on startup defines the priorities of queues.
131
+ #
132
+ # If passed a single "*", this Worker will operate on all queues
133
+ # in alphabetical order. Queues can be dynamically added or
134
+ # removed without needing to restart workers using this method.
135
+ #
136
+ # Workers should have `#prepare` called after they are initialized
137
+ # if you are running work on the worker.
138
+ def initialize(*queues)
139
+ @shutdown = nil
140
+ @paused = nil
141
+ @before_first_fork_hook_ran = false
142
+
143
+ verbose_value = ENV['LOGGING'] || ENV['VERBOSE']
144
+ self.verbose = verbose_value if verbose_value
145
+ self.very_verbose = ENV['VVERBOSE'] if ENV['VVERBOSE']
146
+ self.term_timeout = ENV['RESQUE_TERM_TIMEOUT'] || 4.0
147
+ self.term_child = ENV['TERM_CHILD']
148
+ self.graceful_term = ENV['GRACEFUL_TERM']
149
+ self.run_at_exit_hooks = ENV['RUN_AT_EXIT_HOOKS']
150
+
151
+ self.queues = queues
152
+ end
153
+
154
+ # Daemonizes the worker if ENV['BACKGROUND'] is set and writes
155
+ # the process id to ENV['PIDFILE'] if set. Should only be called
156
+ # once per worker.
157
+ def prepare
158
+ if ENV['BACKGROUND']
159
+ unless Process.respond_to?('daemon')
160
+ abort "env var BACKGROUND is set, which requires ruby >= 1.9"
161
+ end
162
+ Process.daemon(true)
163
+ end
164
+
165
+ if ENV['PIDFILE']
166
+ File.open(ENV['PIDFILE'], 'w') { |f| f << pid }
167
+ end
168
+
169
+ self.reconnect if ENV['BACKGROUND']
170
+ end
171
+
172
+ def queues=(queues)
173
+ queues = queues.empty? ? (ENV["QUEUES"] || ENV['QUEUE']).to_s.split(',') : queues
174
+ @queues = queues.map { |queue| queue.to_s.strip }
175
+ unless ['*', '?', '{', '}', '[', ']'].any? {|char| @queues.join.include?(char) }
176
+ @static_queues = @queues.flatten.uniq
177
+ end
178
+ validate_queues
179
+ end
180
+
181
+ # A worker must be given a queue, otherwise it won't know what to
182
+ # do with itself.
183
+ #
184
+ # You probably never need to call this.
185
+ def validate_queues
186
+ if @queues.nil? || @queues.empty?
187
+ raise NoQueueError.new("Please give each worker at least one queue.")
188
+ end
189
+ end
190
+
191
+ # Returns a list of queues to use when searching for a job.
192
+ # A splat ("*") means you want every queue (in alpha order) - this
193
+ # can be useful for dynamically adding new queues.
194
+ def queues
195
+ return @static_queues if @static_queues
196
+ @queues.map { |queue| glob_match(queue) }.flatten.uniq
197
+ end
198
+
199
+ def glob_match(pattern)
200
+ Resque.queues.select do |queue|
201
+ File.fnmatch?(pattern, queue)
202
+ end.sort
203
+ end
204
+
205
+ # This is the main workhorse method. Called on a Worker instance,
206
+ # it begins the worker life cycle.
207
+ #
208
+ # The following events occur during a worker's life cycle:
209
+ #
210
+ # 1. Startup: Signals are registered, dead workers are pruned,
211
+ # and this worker is registered.
212
+ # 2. Work loop: Jobs are pulled from a queue and processed.
213
+ # 3. Teardown: This worker is unregistered.
214
+ #
215
+ # Can be passed a float representing the polling frequency.
216
+ # The default is 5 seconds, but for a semi-active site you may
217
+ # want to use a smaller value.
218
+ #
219
+ # Also accepts a block which will be passed the job as soon as it
220
+ # has completed processing. Useful for testing.
221
+ def work(interval = 5.0, &block)
222
+ interval = Float(interval)
223
+ startup
224
+
225
+ loop do
226
+ break if shutdown?
227
+
228
+ unless work_one_job(&block)
229
+ break if interval.zero?
230
+ log_with_severity :debug, "Sleeping for #{interval} seconds"
231
+ procline paused? ? "Paused" : "Waiting for #{queues.join(',')}"
232
+ sleep interval
233
+ end
234
+ end
235
+
236
+ unregister_worker
237
+ rescue Exception => exception
238
+ return if exception.class == SystemExit && !@child && run_at_exit_hooks
239
+ log_with_severity :error, "Failed to start worker : #{exception.inspect}"
240
+ unregister_worker(exception)
241
+ end
242
+
243
+ def work_one_job(job = nil, &block)
244
+ return false if paused?
245
+ return false unless job ||= reserve
246
+
247
+ working_on job
248
+ procline "Processing #{job.queue} since #{Time.now.to_i} [#{job.payload_class_name}]"
249
+
250
+ log_with_severity :info, "got: #{job.inspect}"
251
+ job.worker = self
252
+
253
+ if fork_per_job?
254
+ perform_with_fork(job, &block)
255
+ else
256
+ perform(job, &block)
257
+ end
258
+
259
+ done_working
260
+ true
261
+ end
262
+
263
+ # DEPRECATED. Processes a single job. If none is given, it will
264
+ # try to produce one. Usually run in the child.
265
+ def process(job = nil, &block)
266
+ return unless job ||= reserve
267
+
268
+ job.worker = self
269
+ working_on job
270
+ perform(job, &block)
271
+ ensure
272
+ done_working
273
+ end
274
+
275
+ # Reports the exception and marks the job as failed
276
+ def report_failed_job(job,exception)
277
+ log_with_severity :error, "#{job.inspect} failed: #{exception.inspect}"
278
+ begin
279
+ job.fail(exception)
280
+ rescue Object => exception
281
+ log_with_severity :error, "Received exception when reporting failure: #{exception.inspect}"
282
+ end
283
+ begin
284
+ failed!
285
+ rescue Object => exception
286
+ log_with_severity :error, "Received exception when increasing failed jobs counter (redis issue) : #{exception.inspect}"
287
+ end
288
+ end
289
+
290
+
291
+ # Processes a given job in the child.
292
+ def perform(job)
293
+ begin
294
+ if fork_per_job?
295
+ reconnect
296
+ run_hook :after_fork, job
297
+ end
298
+ job.perform
299
+ rescue Object => e
300
+ report_failed_job(job,e)
301
+ else
302
+ log_with_severity :info, "done: #{job.inspect}"
303
+ ensure
304
+ yield job if block_given?
305
+ end
306
+ end
307
+
308
+ # Attempts to grab a job off one of the provided queues. Returns
309
+ # nil if no job can be found.
310
+ def reserve
311
+ queues.each do |queue|
312
+ log_with_severity :debug, "Checking #{queue}"
313
+ if job = Resque.reserve(queue)
314
+ log_with_severity :debug, "Found job on #{queue}"
315
+ return job
316
+ end
317
+ end
318
+
319
+ nil
320
+ rescue Exception => e
321
+ log_with_severity :error, "Error reserving job: #{e.inspect}"
322
+ log_with_severity :error, e.backtrace.join("\n")
323
+ raise e
324
+ end
325
+
326
+ # Reconnect to Redis to avoid sharing a connection with the parent,
327
+ # retry up to 3 times with increasing delay before giving up.
328
+ def reconnect
329
+ tries = 0
330
+ begin
331
+ data_store.reconnect
332
+ rescue Redis::BaseConnectionError
333
+ if (tries += 1) <= 3
334
+ log_with_severity :error, "Error reconnecting to Redis; retrying"
335
+ sleep(tries)
336
+ retry
337
+ else
338
+ log_with_severity :error, "Error reconnecting to Redis; quitting"
339
+ raise
340
+ end
341
+ end
342
+ end
343
+
344
+ # Runs all the methods needed when a worker begins its lifecycle.
345
+ def startup
346
+ $0 = "resque: Starting"
347
+
348
+ enable_gc_optimizations
349
+ register_signal_handlers
350
+ start_heartbeat
351
+ prune_dead_workers
352
+ run_hook :before_first_fork
353
+ register_worker
354
+
355
+ # Fix buffering so we can `rake resque:work > resque.log` and
356
+ # get output from the child in there.
357
+ $stdout.sync = true
358
+ end
359
+
360
+ # Enables GC Optimizations if you're running REE.
361
+ # http://www.rubyenterpriseedition.com/faq.html#adapt_apps_for_cow
362
+ def enable_gc_optimizations
363
+ if GC.respond_to?(:copy_on_write_friendly=)
364
+ GC.copy_on_write_friendly = true
365
+ end
366
+ end
367
+
368
+ # Registers the various signal handlers a worker responds to.
369
+ #
370
+ # TERM: Shutdown immediately, stop processing jobs.
371
+ # INT: Shutdown immediately, stop processing jobs.
372
+ # QUIT: Shutdown after the current job has finished processing.
373
+ # USR1: Kill the forked child immediately, continue processing jobs.
374
+ # USR2: Don't process any new jobs
375
+ # CONT: Start processing jobs again after a USR2
376
+ def register_signal_handlers
377
+ trap('TERM') { graceful_term ? shutdown : shutdown! }
378
+ trap('INT') { shutdown! }
379
+
380
+ begin
381
+ trap('QUIT') { shutdown }
382
+ if term_child
383
+ trap('USR1') { new_kill_child }
384
+ else
385
+ trap('USR1') { kill_child }
386
+ end
387
+ trap('USR2') { pause_processing }
388
+ trap('CONT') { unpause_processing }
389
+ rescue ArgumentError
390
+ log_with_severity :warn, "Signals QUIT, USR1, USR2, and/or CONT not supported."
391
+ end
392
+
393
+ log_with_severity :debug, "Registered signals"
394
+ end
395
+
396
+ def unregister_signal_handlers
397
+ trap('TERM') do
398
+ trap ('TERM') do
399
+ # ignore subsequent terms
400
+ end
401
+ raise TermException.new("SIGTERM")
402
+ end
403
+ trap('INT', 'DEFAULT')
404
+
405
+ begin
406
+ trap('QUIT', 'DEFAULT')
407
+ trap('USR1', 'DEFAULT')
408
+ trap('USR2', 'DEFAULT')
409
+ rescue ArgumentError
410
+ end
411
+ end
412
+
413
+ # Schedule this worker for shutdown. Will finish processing the
414
+ # current job.
415
+ def shutdown
416
+ log_with_severity :info, 'Exiting...'
417
+ @shutdown = true
418
+ end
419
+
420
+ # Kill the child and shutdown immediately.
421
+ # If not forking, abort this process.
422
+ def shutdown!
423
+ shutdown
424
+ if term_child
425
+ if fork_per_job?
426
+ new_kill_child
427
+ else
428
+ # Raise TermException in the same process
429
+ trap('TERM') do
430
+ # ignore subsequent terms
431
+ end
432
+ raise TermException.new("SIGTERM")
433
+ end
434
+ else
435
+ kill_child
436
+ end
437
+ end
438
+
439
+ # Should this worker shutdown as soon as current job is finished?
440
+ def shutdown?
441
+ @shutdown
442
+ end
443
+
444
+ # Kills the forked child immediately, without remorse. The job it
445
+ # is processing will not be completed.
446
+ def kill_child
447
+ if @child
448
+ log_with_severity :debug, "Killing child at #{@child}"
449
+ if `ps -o pid,state -p #{@child}`
450
+ Process.kill("KILL", @child) rescue nil
451
+ else
452
+ log_with_severity :debug, "Child #{@child} not found, restarting."
453
+ shutdown
454
+ end
455
+ end
456
+ end
457
+
458
+ def heartbeat
459
+ data_store.heartbeat(self)
460
+ end
461
+
462
+ def remove_heartbeat
463
+ data_store.remove_heartbeat(self)
464
+ end
465
+
466
+ def heartbeat!(time = data_store.server_time)
467
+ data_store.heartbeat!(self, time)
468
+ end
469
+
470
+ def self.all_heartbeats
471
+ data_store.all_heartbeats
472
+ end
473
+
474
+ # Returns a list of workers that have sent a heartbeat in the past, but which
475
+ # already expired (does NOT include workers that have never sent a heartbeat at all).
476
+ def self.all_workers_with_expired_heartbeats
477
+ workers = Worker.all
478
+ heartbeats = Worker.all_heartbeats
479
+ now = data_store.server_time
480
+
481
+ workers.select do |worker|
482
+ id = worker.to_s
483
+ heartbeat = heartbeats[id]
484
+
485
+ if heartbeat
486
+ seconds_since_heartbeat = (now - Time.parse(heartbeat)).to_i
487
+ seconds_since_heartbeat > Resque.prune_interval
488
+ else
489
+ false
490
+ end
491
+ end
492
+ end
493
+
494
+ def start_heartbeat
495
+ remove_heartbeat
496
+
497
+ @heartbeat_thread_signal = Resque::ThreadSignal.new
498
+
499
+ @heartbeat_thread = Thread.new do
500
+ loop do
501
+ heartbeat!
502
+ signaled = @heartbeat_thread_signal.wait_for_signal(Resque.heartbeat_interval)
503
+ break if signaled
504
+ end
505
+ end
506
+
507
+ @@all_heartbeat_threads << @heartbeat_thread
508
+ end
509
+
510
+ # Kills the forked child immediately with minimal remorse. The job it
511
+ # is processing will not be completed. Send the child a TERM signal,
512
+ # wait 5 seconds, and then a KILL signal if it has not quit
513
+ def new_kill_child
514
+ if @child
515
+ unless Process.waitpid(@child, Process::WNOHANG)
516
+ log_with_severity :debug, "Sending TERM signal to child #{@child}"
517
+ Process.kill("TERM", @child)
518
+ (term_timeout.to_f * 10).round.times do |i|
519
+ sleep(0.1)
520
+ return if Process.waitpid(@child, Process::WNOHANG)
521
+ end
522
+ log_with_severity :debug, "Sending KILL signal to child #{@child}"
523
+ Process.kill("KILL", @child)
524
+ else
525
+ log_with_severity :debug, "Child #{@child} already quit."
526
+ end
527
+ end
528
+ rescue SystemCallError
529
+ log_with_severity :error, "Child #{@child} already quit and reaped."
530
+ end
531
+
532
+ # are we paused?
533
+ def paused?
534
+ @paused
535
+ end
536
+
537
+ # Stop processing jobs after the current one has completed (if we're
538
+ # currently running one).
539
+ def pause_processing
540
+ log_with_severity :info, "USR2 received; pausing job processing"
541
+ run_hook :before_pause, self
542
+ @paused = true
543
+ end
544
+
545
+ # Start processing jobs again after a pause
546
+ def unpause_processing
547
+ log_with_severity :info, "CONT received; resuming job processing"
548
+ @paused = false
549
+ run_hook :after_pause, self
550
+ end
551
+
552
+ # Looks for any workers which should be running on this server
553
+ # and, if they're not, removes them from Redis.
554
+ #
555
+ # This is a form of garbage collection. If a server is killed by a
556
+ # hard shutdown, power failure, or something else beyond our
557
+ # control, the Resque workers will not die gracefully and therefore
558
+ # will leave stale state information in Redis.
559
+ #
560
+ # By checking the current Redis state against the actual
561
+ # environment, we can determine if Redis is old and clean it up a bit.
562
+ def prune_dead_workers
563
+ all_workers = Worker.all
564
+
565
+ unless all_workers.empty?
566
+ known_workers = worker_pids
567
+ all_workers_with_expired_heartbeats = Worker.all_workers_with_expired_heartbeats
568
+ end
569
+
570
+ all_workers.each do |worker|
571
+ # If the worker hasn't sent a heartbeat, remove it from the registry.
572
+ #
573
+ # If the worker hasn't ever sent a heartbeat, we won't remove it since
574
+ # the first heartbeat is sent before the worker is registred it means
575
+ # that this is a worker that doesn't support heartbeats, e.g., another
576
+ # client library or an older version of Resque. We won't touch these.
577
+ if all_workers_with_expired_heartbeats.include?(worker)
578
+ log_with_severity :info, "Pruning dead worker: #{worker}"
579
+ worker.unregister_worker(PruneDeadWorkerDirtyExit.new(worker.to_s))
580
+ next
581
+ end
582
+
583
+ host, pid, worker_queues_raw = worker.id.split(':')
584
+ worker_queues = worker_queues_raw.split(",")
585
+ unless @queues.include?("*") || (worker_queues.to_set == @queues.to_set)
586
+ # If the worker we are trying to prune does not belong to the queues
587
+ # we are listening to, we should not touch it.
588
+ # Attempt to prune a worker from different queues may easily result in
589
+ # an unknown class exception, since that worker could easily be even
590
+ # written in different language.
591
+ next
592
+ end
593
+
594
+ next unless host == hostname
595
+ next if known_workers.include?(pid)
596
+
597
+ log_with_severity :debug, "Pruning dead worker: #{worker}"
598
+ worker.unregister_worker
599
+ end
600
+ end
601
+
602
+ # Registers ourself as a worker. Useful when entering the worker
603
+ # lifecycle on startup.
604
+ def register_worker
605
+ data_store.register_worker(self)
606
+ end
607
+
608
+ # Runs a named hook, passing along any arguments.
609
+ def run_hook(name, *args)
610
+ return unless hooks = Resque.send(name)
611
+ return if name == :before_first_fork && @before_first_fork_hook_ran
612
+ msg = "Running #{name} hooks"
613
+ msg << " with #{args.inspect}" if args.any?
614
+ log_with_severity :info, msg
615
+
616
+ hooks.each do |hook|
617
+ args.any? ? hook.call(*args) : hook.call
618
+ @before_first_fork_hook_ran = true if name == :before_first_fork
619
+ end
620
+ end
621
+
622
+ def kill_background_threads
623
+ if @heartbeat_thread
624
+ @heartbeat_thread_signal.signal
625
+ @heartbeat_thread.join
626
+ end
627
+ end
628
+
629
+ # Unregisters ourself as a worker. Useful when shutting down.
630
+ def unregister_worker(exception = nil)
631
+ # If we're still processing a job, make sure it gets logged as a
632
+ # failure.
633
+ if (hash = processing) && !hash.empty?
634
+ job = Job.new(hash['queue'], hash['payload'])
635
+ # Ensure the proper worker is attached to this job, even if
636
+ # it's not the precise instance that died.
637
+ job.worker = self
638
+ begin
639
+ job.fail(exception || DirtyExit.new("Job still being processed"))
640
+ rescue RuntimeError => e
641
+ log_with_severity :error, e.message
642
+ end
643
+ end
644
+
645
+ kill_background_threads
646
+
647
+ data_store.unregister_worker(self) do
648
+ Stat.clear("processed:#{self}")
649
+ Stat.clear("failed:#{self}")
650
+ end
651
+ rescue Exception => exception_while_unregistering
652
+ message = exception_while_unregistering.message
653
+ if exception
654
+ message += "\nOriginal Exception (#{exception.class}): #{exception.message}"
655
+ message += "\n #{exception.backtrace.join(" \n")}" if exception.backtrace
656
+ end
657
+ fail(exception_while_unregistering.class,
658
+ message,
659
+ exception_while_unregistering.backtrace)
660
+ end
661
+
662
+ # Given a job, tells Redis we're working on it. Useful for seeing
663
+ # what workers are doing and when.
664
+ def working_on(job)
665
+ data = encode \
666
+ :queue => job.queue,
667
+ :run_at => Time.now.utc.iso8601,
668
+ :payload => job.payload
669
+ data_store.set_worker_payload(self,data)
670
+ end
671
+
672
+ # Called when we are done working - clears our `working_on` state
673
+ # and tells Redis we processed a job.
674
+ def done_working
675
+ data_store.worker_done_working(self) do
676
+ processed!
677
+ end
678
+ end
679
+
680
+ # How many jobs has this worker processed? Returns an int.
681
+ def processed
682
+ Stat["processed:#{self}"]
683
+ end
684
+
685
+ # Tell Redis we've processed a job.
686
+ def processed!
687
+ Stat << "processed"
688
+ Stat << "processed:#{self}"
689
+ end
690
+
691
+ # How many failed jobs has this worker seen? Returns an int.
692
+ def failed
693
+ Stat["failed:#{self}"]
694
+ end
695
+
696
+ # Tells Redis we've failed a job.
697
+ def failed!
698
+ Stat << "failed"
699
+ Stat << "failed:#{self}"
700
+ end
701
+
702
+ # What time did this worker start? Returns an instance of `Time`
703
+ def started
704
+ data_store.worker_start_time(self)
705
+ end
706
+
707
+ # Tell Redis we've started
708
+ def started!
709
+ data_store.worker_started(self)
710
+ end
711
+
712
+ # Returns a hash explaining the Job we're currently processing, if any.
713
+ def job(reload = true)
714
+ @job = nil if reload
715
+ @job ||= decode(data_store.get_worker_payload(self)) || {}
716
+ end
717
+ attr_writer :job
718
+ alias_method :processing, :job
719
+
720
+ # Boolean - true if working, false if not
721
+ def working?
722
+ state == :working
723
+ end
724
+
725
+ # Boolean - true if idle, false if not
726
+ def idle?
727
+ state == :idle
728
+ end
729
+
730
+ def fork_per_job?
731
+ return @fork_per_job if defined?(@fork_per_job)
732
+ @fork_per_job = ENV["FORK_PER_JOB"] != 'false' && Kernel.respond_to?(:fork)
733
+ end
734
+
735
+ # Returns a symbol representing the current worker state,
736
+ # which can be either :working or :idle
737
+ def state
738
+ data_store.get_worker_payload(self) ? :working : :idle
739
+ end
740
+
741
+ # Is this worker the same as another worker?
742
+ def ==(other)
743
+ to_s == other.to_s
744
+ end
745
+
746
+ def inspect
747
+ "#<Worker #{to_s}>"
748
+ end
749
+
750
+ # The string representation is the same as the id for this worker
751
+ # instance. Can be used with `Worker.find`.
752
+ def to_s
753
+ @to_s ||= "#{hostname}:#{pid}:#{@queues.join(',')}"
754
+ end
755
+ alias_method :id, :to_s
756
+
757
+ # chomp'd hostname of this worker's machine
758
+ def hostname
759
+ @hostname ||= Socket.gethostname
760
+ end
761
+
762
+ # Returns Integer PID of running worker
763
+ def pid
764
+ @pid ||= Process.pid
765
+ end
766
+
767
+ # Returns an Array of string pids of all the other workers on this
768
+ # machine. Useful when pruning dead workers on startup.
769
+ def worker_pids
770
+ if RUBY_PLATFORM =~ /solaris/
771
+ solaris_worker_pids
772
+ elsif RUBY_PLATFORM =~ /mingw32/
773
+ windows_worker_pids
774
+ else
775
+ linux_worker_pids
776
+ end
777
+ end
778
+
779
+ # Returns an Array of string pids of all the other workers on this
780
+ # machine. Useful when pruning dead workers on startup.
781
+ def windows_worker_pids
782
+ tasklist_output = `tasklist /FI "IMAGENAME eq ruby.exe" /FO list`.encode("UTF-8", Encoding.locale_charmap)
783
+ tasklist_output.split($/).select { |line| line =~ /^PID:/}.collect{ |line| line.gsub /PID:\s+/, '' }
784
+ end
785
+
786
+ # Find Resque worker pids on Linux and OS X.
787
+ #
788
+ def linux_worker_pids
789
+ `ps -A -o pid,command | grep -E "[r]esque:work|[r]esque:\sStarting|[r]esque-[0-9]" | grep -v "resque-web"`.split("\n").map do |line|
790
+ line.split(' ')[0]
791
+ end
792
+ end
793
+
794
+ # Find Resque worker pids on Solaris.
795
+ #
796
+ # Returns an Array of string pids of all the other workers on this
797
+ # machine. Useful when pruning dead workers on startup.
798
+ def solaris_worker_pids
799
+ `ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
800
+ real_pid = line.split(' ')[0]
801
+ pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
802
+ if pargs_command.split(':')[1] == " resque-#{Resque::Version}"
803
+ real_pid
804
+ end
805
+ end.compact
806
+ end
807
+
808
+ # Given a string, sets the procline ($0) and logs.
809
+ # Procline is always in the format of:
810
+ # RESQUE_PROCLINE_PREFIXresque-VERSION: STRING
811
+ def procline(string)
812
+ $0 = "#{ENV['RESQUE_PROCLINE_PREFIX']}resque-#{Resque::Version}: #{string}"
813
+ log_with_severity :debug, $0
814
+ end
815
+
816
+ def log(message)
817
+ info(message)
818
+ end
819
+
820
+ def log!(message)
821
+ debug(message)
822
+ end
823
+
824
+
825
+ def verbose
826
+ @verbose
827
+ end
828
+
829
+ def very_verbose
830
+ @very_verbose
831
+ end
832
+
833
+ def verbose=(value);
834
+ if value && !very_verbose
835
+ Resque.logger.formatter = VerboseFormatter.new
836
+ Resque.logger.level = Logger::INFO
837
+ elsif !value
838
+ Resque.logger.formatter = QuietFormatter.new
839
+ end
840
+
841
+ @verbose = value
842
+ end
843
+
844
+ def very_verbose=(value)
845
+ if value
846
+ Resque.logger.formatter = VeryVerboseFormatter.new
847
+ Resque.logger.level = Logger::DEBUG
848
+ elsif !value && verbose
849
+ Resque.logger.formatter = VerboseFormatter.new
850
+ Resque.logger.level = Logger::INFO
851
+ else
852
+ Resque.logger.formatter = QuietFormatter.new
853
+ end
854
+
855
+ @very_verbose = value
856
+ end
857
+
858
+ private
859
+
860
+ def perform_with_fork(job, &block)
861
+ run_hook :before_fork, job
862
+
863
+ begin
864
+ @child = fork do
865
+ unregister_signal_handlers if term_child
866
+ perform(job, &block)
867
+ exit! unless run_at_exit_hooks
868
+ end
869
+ rescue NotImplementedError
870
+ @fork_per_job = false
871
+ perform(job, &block)
872
+ return
873
+ end
874
+
875
+ srand # Reseeding
876
+ procline "Forked #{@child} at #{Time.now.to_i}"
877
+
878
+ begin
879
+ Process.waitpid(@child)
880
+ rescue SystemCallError
881
+ nil
882
+ end
883
+
884
+ job.fail(DirtyExit.new("Child process received unhandled signal #{$?.stopsig}", $?)) if $?.signaled?
885
+ @child = nil
886
+ end
887
+
888
+ def log_with_severity(severity, message)
889
+ Logging.log(severity, message)
890
+ end
891
+ end
892
+ end