resque_admin 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.md +530 -0
  3. data/LICENSE +20 -0
  4. data/README.markdown +957 -0
  5. data/Rakefile +57 -0
  6. data/bin/resque-admin +81 -0
  7. data/bin/resque-admin-web +31 -0
  8. data/lib/resque_admin.rb +578 -0
  9. data/lib/resque_admin/data_store.rb +325 -0
  10. data/lib/resque_admin/errors.rb +21 -0
  11. data/lib/resque_admin/failure.rb +119 -0
  12. data/lib/resque_admin/failure/airbrake.rb +33 -0
  13. data/lib/resque_admin/failure/base.rb +73 -0
  14. data/lib/resque_admin/failure/multiple.rb +68 -0
  15. data/lib/resque_admin/failure/redis.rb +128 -0
  16. data/lib/resque_admin/failure/redis_multi_queue.rb +104 -0
  17. data/lib/resque_admin/helpers.rb +48 -0
  18. data/lib/resque_admin/job.rb +296 -0
  19. data/lib/resque_admin/log_formatters/quiet_formatter.rb +7 -0
  20. data/lib/resque_admin/log_formatters/verbose_formatter.rb +7 -0
  21. data/lib/resque_admin/log_formatters/very_verbose_formatter.rb +8 -0
  22. data/lib/resque_admin/logging.rb +18 -0
  23. data/lib/resque_admin/plugin.rb +78 -0
  24. data/lib/resque_admin/server.rb +301 -0
  25. data/lib/resque_admin/server/helpers.rb +64 -0
  26. data/lib/resque_admin/server/public/favicon.ico +0 -0
  27. data/lib/resque_admin/server/public/idle.png +0 -0
  28. data/lib/resque_admin/server/public/jquery-1.12.4.min.js +5 -0
  29. data/lib/resque_admin/server/public/jquery.relatize_date.js +95 -0
  30. data/lib/resque_admin/server/public/poll.png +0 -0
  31. data/lib/resque_admin/server/public/ranger.js +78 -0
  32. data/lib/resque_admin/server/public/reset.css +44 -0
  33. data/lib/resque_admin/server/public/style.css +91 -0
  34. data/lib/resque_admin/server/public/working.png +0 -0
  35. data/lib/resque_admin/server/test_helper.rb +19 -0
  36. data/lib/resque_admin/server/views/error.erb +1 -0
  37. data/lib/resque_admin/server/views/failed.erb +29 -0
  38. data/lib/resque_admin/server/views/failed_job.erb +50 -0
  39. data/lib/resque_admin/server/views/failed_queues_overview.erb +24 -0
  40. data/lib/resque_admin/server/views/job_class.erb +6 -0
  41. data/lib/resque_admin/server/views/key_sets.erb +17 -0
  42. data/lib/resque_admin/server/views/key_string.erb +11 -0
  43. data/lib/resque_admin/server/views/layout.erb +44 -0
  44. data/lib/resque_admin/server/views/next_more.erb +22 -0
  45. data/lib/resque_admin/server/views/overview.erb +4 -0
  46. data/lib/resque_admin/server/views/processing.erb +2 -0
  47. data/lib/resque_admin/server/views/queues.erb +58 -0
  48. data/lib/resque_admin/server/views/stats.erb +62 -0
  49. data/lib/resque_admin/server/views/workers.erb +109 -0
  50. data/lib/resque_admin/server/views/working.erb +71 -0
  51. data/lib/resque_admin/stat.rb +58 -0
  52. data/lib/resque_admin/tasks.rb +72 -0
  53. data/lib/resque_admin/thread_signal.rb +24 -0
  54. data/lib/resque_admin/vendor/utf8_util.rb +24 -0
  55. data/lib/resque_admin/version.rb +3 -0
  56. data/lib/resque_admin/worker.rb +917 -0
  57. data/lib/tasks/redis.rake +161 -0
  58. data/lib/tasks/resque_admin.rake +2 -0
  59. metadata +191 -0
@@ -0,0 +1,58 @@
1
+ module ResqueAdmin
2
+ # The stat subsystem. Used to keep track of integer counts.
3
+ #
4
+ # Get a stat: Stat[name]
5
+ # Incr a stat: Stat.incr(name)
6
+ # Decr a stat: Stat.decr(name)
7
+ # Kill a stat: Stat.clear(name)
8
+ module Stat
9
+ extend self
10
+
11
+ # Direct access to the Redis instance.
12
+ def redis
13
+ ResqueAdmin.redis
14
+ end
15
+ alias :data_store :redis
16
+
17
+ # Returns the int value of a stat, given a string stat name.
18
+ def get(stat)
19
+ data_store.stat(stat)
20
+ end
21
+
22
+ # Alias of `get`
23
+ def [](stat)
24
+ get(stat)
25
+ end
26
+
27
+ # For a string stat name, increments the stat by one.
28
+ #
29
+ # Can optionally accept a second int parameter. The stat is then
30
+ # incremented by that amount.
31
+ def incr(stat, by = 1)
32
+ data_store.increment_stat(stat,by)
33
+ end
34
+
35
+ # Increments a stat by one.
36
+ def <<(stat)
37
+ incr stat
38
+ end
39
+
40
+ # For a string stat name, decrements the stat by one.
41
+ #
42
+ # Can optionally accept a second int parameter. The stat is then
43
+ # decremented by that amount.
44
+ def decr(stat, by = 1)
45
+ data_store.decremet_stat(stat,by)
46
+ end
47
+
48
+ # Decrements a stat by one.
49
+ def >>(stat)
50
+ decr stat
51
+ end
52
+
53
+ # Removes a stat from Redis, effectively setting it to 0.
54
+ def clear(stat)
55
+ data_store.clear_stat(stat)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,72 @@
1
+ # require 'resque/tasks'
2
+ # will give you the resque tasks
3
+
4
+
5
+ namespace :resque do
6
+ task :setup
7
+
8
+ desc "Start a ResqueAdmin worker"
9
+ task :work => [ :preload, :setup ] do
10
+ require 'resque_admin'
11
+
12
+ begin
13
+ worker = ResqueAdmin::Worker.new
14
+ rescue ResqueAdmin::NoQueueError
15
+ abort "set QUEUE env var, e.g. $ QUEUE=critical,high rake resque:work"
16
+ end
17
+
18
+ worker.prepare
19
+ worker.log "Starting worker #{self}"
20
+ worker.work(ENV['INTERVAL'] || 5) # interval, will block
21
+ end
22
+
23
+ desc "Start multiple ResqueAdmin workers. Should only be used in dev mode."
24
+ task :workers do
25
+ threads = []
26
+
27
+ if ENV['COUNT'].to_i < 1
28
+ abort "set COUNT env var, e.g. $ COUNT=2 rake resque:workers"
29
+ end
30
+
31
+ ENV['COUNT'].to_i.times do
32
+ threads << Thread.new do
33
+ system "rake resque:work"
34
+ end
35
+ end
36
+
37
+ threads.each { |thread| thread.join }
38
+ end
39
+
40
+ # Preload app files if this is Rails
41
+ task :preload => :setup do
42
+ if defined?(Rails)
43
+ if Rails::VERSION::MAJOR > 3
44
+ ActiveSupport.run_load_hooks(:before_eager_load, Rails.application)
45
+ Rails.application.config.eager_load_namespaces.each(&:eager_load!)
46
+
47
+ elsif Rails::VERSION::MAJOR == 3
48
+ ActiveSupport.run_load_hooks(:before_eager_load, Rails.application)
49
+ Rails.application.eager_load!
50
+
51
+ elsif defined?(Rails::Initializer)
52
+ $rails_rake_task = false
53
+ Rails::Initializer.run :load_application_classes
54
+ end
55
+ end
56
+ end
57
+
58
+ namespace :failures do
59
+ desc "Sort the 'failed' queue for the redis_multi_queue failure backend"
60
+ task :sort do
61
+ require 'resque_admin'
62
+ require 'resque_admin/failure/redis'
63
+
64
+ warn "Sorting #{ResqueAdmin::Failure.count} failures..."
65
+ ResqueAdmin::Failure.each(0, ResqueAdmin::Failure.count) do |_, failure|
66
+ data = ResqueAdmin.encode(failure)
67
+ ResqueAdmin.redis.rpush(ResqueAdmin::Failure.failure_queue_name(failure['queue']), data)
68
+ end
69
+ warn "done!"
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,24 @@
1
+ class ResqueAdmin::ThreadSignal
2
+ def initialize
3
+ @mutex = Mutex.new
4
+ @signaled = false
5
+ @received = ConditionVariable.new
6
+ end
7
+
8
+ def signal
9
+ @mutex.synchronize do
10
+ @signaled = true
11
+ @received.signal
12
+ end
13
+ end
14
+
15
+ def wait_for_signal(timeout)
16
+ @mutex.synchronize do
17
+ unless @signaled
18
+ @received.wait(@mutex, timeout)
19
+ end
20
+
21
+ @signaled
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ module UTF8Util
2
+ # use '?' intsead of the unicode replace char, since that is 3 bytes
3
+ # and can increase the string size if it's done a lot
4
+ REPLACEMENT_CHAR = "?"
5
+
6
+ # Replace invalid UTF-8 character sequences with a replacement character
7
+ #
8
+ # Returns self as valid UTF-8.
9
+ def self.clean!(str)
10
+ raise NotImplementedError
11
+ end
12
+
13
+ # Replace invalid UTF-8 character sequences with a replacement character
14
+ #
15
+ # Returns a copy of this String as valid UTF-8.
16
+ def self.clean(str)
17
+ clean!(str.dup)
18
+ end
19
+
20
+ def self.clean!(str)
21
+ return str if str.encoding.to_s == "UTF-8"
22
+ str.force_encoding("binary").encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => REPLACEMENT_CHAR)
23
+ end
24
+ end
@@ -0,0 +1,3 @@
1
+ module ResqueAdmin
2
+ Version = VERSION = '0.2.0'
3
+ end
@@ -0,0 +1,917 @@
1
+ require 'time'
2
+ require 'set'
3
+ require 'redis/distributed'
4
+
5
+ module ResqueAdmin
6
+ # A ResqueAdmin Worker processes jobs. On platforms that support fork(2),
7
+ # the worker will fork off a child to process each job. This ensures
8
+ # a clean slate when beginning the next job and cuts down on gradual
9
+ # memory growth as well as low level failures.
10
+ #
11
+ # It also ensures workers are always listening to signals from you,
12
+ # their master, and can react accordingly.
13
+ class Worker
14
+ include ResqueAdmin::Helpers
15
+ extend ResqueAdmin::Helpers
16
+ include ResqueAdmin::Logging
17
+
18
+ @@all_heartbeat_threads = []
19
+ def self.kill_all_heartbeat_threads
20
+ @@all_heartbeat_threads.each(&:kill).each(&:join)
21
+ @@all_heartbeat_threads = []
22
+ end
23
+
24
+ def redis
25
+ ResqueAdmin.redis
26
+ end
27
+ alias :data_store :redis
28
+
29
+ def self.redis
30
+ ResqueAdmin.redis
31
+ end
32
+
33
+ def self.data_store
34
+ self.redis
35
+ end
36
+
37
+ # Given a Ruby object, returns a string suitable for storage in a
38
+ # queue.
39
+ def encode(object)
40
+ ResqueAdmin.encode(object)
41
+ end
42
+
43
+ # Given a string, returns a Ruby object.
44
+ def decode(object)
45
+ ResqueAdmin.decode(object)
46
+ end
47
+
48
+ attr_accessor :term_timeout
49
+
50
+ attr_accessor :pre_shutdown_timeout
51
+
52
+ attr_accessor :term_child_signal
53
+
54
+ # decide whether to use new_kill_child logic
55
+ attr_accessor :term_child
56
+
57
+ # should term kill workers gracefully (vs. immediately)
58
+ # Makes SIGTERM work like SIGQUIT
59
+ attr_accessor :graceful_term
60
+
61
+ # When set to true, forked workers will exit with `exit`, calling any `at_exit` code handlers that have been
62
+ # registered in the application. Otherwise, forked workers exit with `exit!`
63
+ attr_accessor :run_at_exit_hooks
64
+
65
+ attr_writer :fork_per_job
66
+ attr_writer :hostname
67
+ attr_writer :to_s
68
+ attr_writer :pid
69
+
70
+ # Returns an array of all worker objects.
71
+ def self.all
72
+ data_store.worker_ids.map { |id| find(id, :skip_exists => true) }.compact
73
+ end
74
+
75
+ # Returns an array of all worker objects currently processing
76
+ # jobs.
77
+ def self.working
78
+ names = all
79
+ return [] unless names.any?
80
+
81
+ reportedly_working = {}
82
+
83
+ begin
84
+ reportedly_working = data_store.workers_map(names).reject do |key, value|
85
+ value.nil? || value.empty?
86
+ end
87
+ rescue Redis::Distributed::CannotDistribute
88
+ names.each do |name|
89
+ value = data_store.get_worker_payload(name)
90
+ reportedly_working[name] = value unless value.nil? || value.empty?
91
+ end
92
+ end
93
+
94
+ reportedly_working.keys.map do |key|
95
+ worker = find(key.sub("worker:", ''), :skip_exists => true)
96
+ worker.job = worker.decode(reportedly_working[key])
97
+ worker
98
+ end.compact
99
+ end
100
+
101
+ # Returns a single worker object. Accepts a string id.
102
+ def self.find(worker_id, options = {})
103
+ skip_exists = options[:skip_exists]
104
+
105
+ if skip_exists || exists?(worker_id)
106
+ host, pid, queues_raw = worker_id.split(':')
107
+ queues = queues_raw.split(',')
108
+ worker = new(*queues)
109
+ worker.hostname = host
110
+ worker.to_s = worker_id
111
+ worker.pid = pid.to_i
112
+ worker
113
+ else
114
+ nil
115
+ end
116
+ end
117
+
118
+ # Alias of `find`
119
+ def self.attach(worker_id)
120
+ find(worker_id)
121
+ end
122
+
123
+ # Given a string worker id, return a boolean indicating whether the
124
+ # worker exists
125
+ def self.exists?(worker_id)
126
+ data_store.worker_exists?(worker_id)
127
+ end
128
+
129
+ # Workers should be initialized with an array of string queue
130
+ # names. The order is important: a Worker will check the first
131
+ # queue given for a job. If none is found, it will check the
132
+ # second queue name given. If a job is found, it will be
133
+ # processed. Upon completion, the Worker will again check the
134
+ # first queue given, and so forth. In this way the queue list
135
+ # passed to a Worker on startup defines the priorities of queues.
136
+ #
137
+ # If passed a single "*", this Worker will operate on all queues
138
+ # in alphabetical order. Queues can be dynamically added or
139
+ # removed without needing to restart workers using this method.
140
+ #
141
+ # Workers should have `#prepare` called after they are initialized
142
+ # if you are running work on the worker.
143
+ def initialize(*queues)
144
+ @shutdown = nil
145
+ @paused = nil
146
+ @before_first_fork_hook_ran = false
147
+
148
+ verbose_value = ENV['LOGGING'] || ENV['VERBOSE']
149
+ self.verbose = verbose_value if verbose_value
150
+ self.very_verbose = ENV['VVERBOSE'] if ENV['VVERBOSE']
151
+ self.pre_shutdown_timeout = (ENV['RESQUE_PRE_SHUTDOWN_TIMEOUT'] || 0.0).to_f
152
+ self.term_timeout = (ENV['RESQUE_TERM_TIMEOUT'] || 4.0).to_f
153
+ self.term_child = ENV['TERM_CHILD']
154
+ self.graceful_term = ENV['GRACEFUL_TERM']
155
+ self.run_at_exit_hooks = ENV['RUN_AT_EXIT_HOOKS']
156
+
157
+ self.queues = queues
158
+ end
159
+
160
+ # Daemonizes the worker if ENV['BACKGROUND'] is set and writes
161
+ # the process id to ENV['PIDFILE'] if set. Should only be called
162
+ # once per worker.
163
+ def prepare
164
+ if ENV['BACKGROUND']
165
+ Process.daemon(true)
166
+ end
167
+
168
+ if ENV['PIDFILE']
169
+ File.open(ENV['PIDFILE'], 'w') { |f| f << pid }
170
+ end
171
+
172
+ self.reconnect if ENV['BACKGROUND']
173
+ end
174
+
175
+ def queues=(queues)
176
+ queues = queues.empty? ? (ENV["QUEUES"] || ENV['QUEUE']).to_s.split(',') : queues
177
+ @queues = queues.map { |queue| queue.to_s.strip }
178
+ unless ['*', '?', '{', '}', '[', ']'].any? {|char| @queues.join.include?(char) }
179
+ @static_queues = @queues.flatten.uniq
180
+ end
181
+ validate_queues
182
+ end
183
+
184
+ # A worker must be given a queue, otherwise it won't know what to
185
+ # do with itself.
186
+ #
187
+ # You probably never need to call this.
188
+ def validate_queues
189
+ if @queues.nil? || @queues.empty?
190
+ raise NoQueueError.new("Please give each worker at least one queue.")
191
+ end
192
+ end
193
+
194
+ # Returns a list of queues to use when searching for a job.
195
+ # A splat ("*") means you want every queue (in alpha order) - this
196
+ # can be useful for dynamically adding new queues.
197
+ def queues
198
+ return @static_queues if @static_queues
199
+ @queues.map { |queue| glob_match(queue) }.flatten.uniq
200
+ end
201
+
202
+ def glob_match(pattern)
203
+ ResqueAdmin.queues.select do |queue|
204
+ File.fnmatch?(pattern, queue)
205
+ end.sort
206
+ end
207
+
208
+ # This is the main workhorse method. Called on a Worker instance,
209
+ # it begins the worker life cycle.
210
+ #
211
+ # The following events occur during a worker's life cycle:
212
+ #
213
+ # 1. Startup: Signals are registered, dead workers are pruned,
214
+ # and this worker is registered.
215
+ # 2. Work loop: Jobs are pulled from a queue and processed.
216
+ # 3. Teardown: This worker is unregistered.
217
+ #
218
+ # Can be passed a float representing the polling frequency.
219
+ # The default is 5 seconds, but for a semi-active site you may
220
+ # want to use a smaller value.
221
+ #
222
+ # Also accepts a block which will be passed the job as soon as it
223
+ # has completed processing. Useful for testing.
224
+ def work(interval = 5.0, &block)
225
+ interval = Float(interval)
226
+ startup
227
+
228
+ loop do
229
+ break if shutdown?
230
+
231
+ unless work_one_job(&block)
232
+ break if interval.zero?
233
+ log_with_severity :debug, "Sleeping for #{interval} seconds"
234
+ procline paused? ? "Paused" : "Waiting for #{queues.join(',')}"
235
+ sleep interval
236
+ end
237
+ end
238
+
239
+ unregister_worker
240
+ rescue Exception => exception
241
+ return if exception.class == SystemExit && !@child && run_at_exit_hooks
242
+ log_with_severity :error, "Failed to start worker : #{exception.inspect}"
243
+ unregister_worker(exception)
244
+ end
245
+
246
+ def work_one_job(job = nil, &block)
247
+ return false if paused?
248
+ return false unless job ||= reserve
249
+
250
+ working_on job
251
+ procline "Processing #{job.queue} since #{Time.now.to_i} [#{job.payload_class_name}]"
252
+
253
+ log_with_severity :info, "got: #{job.inspect}"
254
+ job.worker = self
255
+
256
+ if fork_per_job?
257
+ perform_with_fork(job, &block)
258
+ else
259
+ perform(job, &block)
260
+ end
261
+
262
+ done_working
263
+ true
264
+ end
265
+
266
+ # DEPRECATED. Processes a single job. If none is given, it will
267
+ # try to produce one. Usually run in the child.
268
+ def process(job = nil, &block)
269
+ return unless job ||= reserve
270
+
271
+ job.worker = self
272
+ working_on job
273
+ perform(job, &block)
274
+ ensure
275
+ done_working
276
+ end
277
+
278
+ # Reports the exception and marks the job as failed
279
+ def report_failed_job(job,exception)
280
+ log_with_severity :error, "#{job.inspect} failed: #{exception.inspect}"
281
+ begin
282
+ job.fail(exception)
283
+ rescue Object => exception
284
+ log_with_severity :error, "Received exception when reporting failure: #{exception.inspect}"
285
+ end
286
+ begin
287
+ failed!
288
+ rescue Object => exception
289
+ log_with_severity :error, "Received exception when increasing failed jobs counter (redis issue) : #{exception.inspect}"
290
+ end
291
+ end
292
+
293
+
294
+ # Processes a given job in the child.
295
+ def perform(job)
296
+ begin
297
+ if fork_per_job?
298
+ reconnect
299
+ run_hook :after_fork, job
300
+ end
301
+ job.perform
302
+ rescue Object => e
303
+ report_failed_job(job,e)
304
+ else
305
+ log_with_severity :info, "done: #{job.inspect}"
306
+ ensure
307
+ yield job if block_given?
308
+ end
309
+ end
310
+
311
+ # Attempts to grab a job off one of the provided queues. Returns
312
+ # nil if no job can be found.
313
+ def reserve
314
+ queues.each do |queue|
315
+ log_with_severity :debug, "Checking #{queue}"
316
+ if job = ResqueAdmin.reserve(queue)
317
+ log_with_severity :debug, "Found job on #{queue}"
318
+ return job
319
+ end
320
+ end
321
+
322
+ nil
323
+ rescue Exception => e
324
+ log_with_severity :error, "Error reserving job: #{e.inspect}"
325
+ log_with_severity :error, e.backtrace.join("\n")
326
+ raise e
327
+ end
328
+
329
+ # Reconnect to Redis to avoid sharing a connection with the parent,
330
+ # retry up to 3 times with increasing delay before giving up.
331
+ def reconnect
332
+ tries = 0
333
+ begin
334
+ data_store.reconnect
335
+ rescue Redis::BaseConnectionError
336
+ if (tries += 1) <= 3
337
+ log_with_severity :error, "Error reconnecting to Redis; retrying"
338
+ sleep(tries)
339
+ retry
340
+ else
341
+ log_with_severity :error, "Error reconnecting to Redis; quitting"
342
+ raise
343
+ end
344
+ end
345
+ end
346
+
347
+ # Runs all the methods needed when a worker begins its lifecycle.
348
+ def startup
349
+ $0 = "resque: Starting"
350
+
351
+ enable_gc_optimizations
352
+ register_signal_handlers
353
+ start_heartbeat
354
+ prune_dead_workers
355
+ run_hook :before_first_fork
356
+ register_worker
357
+
358
+ # Fix buffering so we can `rake resque:work > resque.log` and
359
+ # get output from the child in there.
360
+ $stdout.sync = true
361
+ end
362
+
363
+ # Enables GC Optimizations if you're running REE.
364
+ # http://www.rubyenterpriseedition.com/faq.html#adapt_apps_for_cow
365
+ def enable_gc_optimizations
366
+ if GC.respond_to?(:copy_on_write_friendly=)
367
+ GC.copy_on_write_friendly = true
368
+ end
369
+ end
370
+
371
+ # Registers the various signal handlers a worker responds to.
372
+ #
373
+ # TERM: Shutdown immediately, stop processing jobs.
374
+ # INT: Shutdown immediately, stop processing jobs.
375
+ # QUIT: Shutdown after the current job has finished processing.
376
+ # USR1: Kill the forked child immediately, continue processing jobs.
377
+ # USR2: Don't process any new jobs
378
+ # CONT: Start processing jobs again after a USR2
379
+ def register_signal_handlers
380
+ trap('TERM') { graceful_term ? shutdown : shutdown! }
381
+ trap('INT') { shutdown! }
382
+
383
+ begin
384
+ trap('QUIT') { shutdown }
385
+ if term_child
386
+ trap('USR1') { new_kill_child }
387
+ else
388
+ trap('USR1') { kill_child }
389
+ end
390
+ trap('USR2') { pause_processing }
391
+ trap('CONT') { unpause_processing }
392
+ rescue ArgumentError
393
+ log_with_severity :warn, "Signals QUIT, USR1, USR2, and/or CONT not supported."
394
+ end
395
+
396
+ log_with_severity :debug, "Registered signals"
397
+ end
398
+
399
+ def unregister_signal_handlers
400
+ trap('TERM') do
401
+ trap('TERM') do
402
+ # Ignore subsequent term signals
403
+ end
404
+
405
+ raise TermException.new("SIGTERM")
406
+ end
407
+
408
+ trap('INT', 'DEFAULT')
409
+
410
+ begin
411
+ trap('QUIT', 'DEFAULT')
412
+ trap('USR1', 'DEFAULT')
413
+ trap('USR2', 'DEFAULT')
414
+ rescue ArgumentError
415
+ end
416
+ end
417
+
418
+ # Schedule this worker for shutdown. Will finish processing the
419
+ # current job.
420
+ def shutdown
421
+ log_with_severity :info, 'Exiting...'
422
+ @shutdown = true
423
+ end
424
+
425
+ # Kill the child and shutdown immediately.
426
+ # If not forking, abort this process.
427
+ def shutdown!
428
+ shutdown
429
+ if term_child
430
+ if fork_per_job?
431
+ new_kill_child
432
+ else
433
+ # Raise TermException in the same process
434
+ trap('TERM') do
435
+ # ignore subsequent terms
436
+ end
437
+ raise TermException.new("SIGTERM")
438
+ end
439
+ else
440
+ kill_child
441
+ end
442
+ end
443
+
444
+ # Should this worker shutdown as soon as current job is finished?
445
+ def shutdown?
446
+ @shutdown
447
+ end
448
+
449
+ # Kills the forked child immediately, without remorse. The job it
450
+ # is processing will not be completed.
451
+ def kill_child
452
+ if @child
453
+ log_with_severity :debug, "Killing child at #{@child}"
454
+ if `ps -o pid,state -p #{@child}`
455
+ Process.kill("KILL", @child) rescue nil
456
+ else
457
+ log_with_severity :debug, "Child #{@child} not found, restarting."
458
+ shutdown
459
+ end
460
+ end
461
+ end
462
+
463
+ def heartbeat
464
+ data_store.heartbeat(self)
465
+ end
466
+
467
+ def remove_heartbeat
468
+ data_store.remove_heartbeat(self)
469
+ end
470
+
471
+ def heartbeat!(time = data_store.server_time)
472
+ data_store.heartbeat!(self, time)
473
+ end
474
+
475
+ def self.all_heartbeats
476
+ data_store.all_heartbeats
477
+ end
478
+
479
+ # Returns a list of workers that have sent a heartbeat in the past, but which
480
+ # already expired (does NOT include workers that have never sent a heartbeat at all).
481
+ def self.all_workers_with_expired_heartbeats
482
+ workers = Worker.all
483
+ heartbeats = Worker.all_heartbeats
484
+ now = data_store.server_time
485
+
486
+ workers.select do |worker|
487
+ id = worker.to_s
488
+ heartbeat = heartbeats[id]
489
+
490
+ if heartbeat
491
+ seconds_since_heartbeat = (now - Time.parse(heartbeat)).to_i
492
+ seconds_since_heartbeat > ResqueAdmin.prune_interval
493
+ else
494
+ false
495
+ end
496
+ end
497
+ end
498
+
499
+ def start_heartbeat
500
+ remove_heartbeat
501
+
502
+ @heartbeat_thread_signal = ResqueAdmin::ThreadSignal.new
503
+
504
+ @heartbeat_thread = Thread.new do
505
+ loop do
506
+ heartbeat!
507
+ signaled = @heartbeat_thread_signal.wait_for_signal(ResqueAdmin.heartbeat_interval)
508
+ break if signaled
509
+ end
510
+ end
511
+
512
+ @@all_heartbeat_threads << @heartbeat_thread
513
+ end
514
+
515
+ # Kills the forked child immediately with minimal remorse. The job it
516
+ # is processing will not be completed. Send the child a TERM signal,
517
+ # wait <term_timeout> seconds, and then a KILL signal if it has not quit
518
+ # If pre_shutdown_timeout has been set to a positive number, it will allow
519
+ # the child that many seconds before sending the aforementioned TERM and KILL.
520
+ def new_kill_child
521
+ if @child
522
+ unless child_already_exited?
523
+ if pre_shutdown_timeout && pre_shutdown_timeout > 0.0
524
+ log_with_severity :debug, "Waiting #{pre_shutdown_timeout.to_f}s for child process to exit"
525
+ return if wait_for_child_exit(pre_shutdown_timeout)
526
+ end
527
+
528
+ log_with_severity :debug, "Sending TERM signal to child #{@child}"
529
+ Process.kill("TERM", @child)
530
+
531
+ if wait_for_child_exit(term_timeout)
532
+ return
533
+ else
534
+ log_with_severity :debug, "Sending KILL signal to child #{@child}"
535
+ Process.kill("KILL", @child)
536
+ end
537
+ else
538
+ log_with_severity :debug, "Child #{@child} already quit."
539
+ end
540
+ end
541
+ rescue SystemCallError
542
+ log_with_severity :error, "Child #{@child} already quit and reaped."
543
+ end
544
+
545
+ def child_already_exited?
546
+ Process.waitpid(@child, Process::WNOHANG)
547
+ end
548
+
549
+ def wait_for_child_exit(timeout)
550
+ (timeout * 10).round.times do |i|
551
+ sleep(0.1)
552
+ return true if child_already_exited?
553
+ end
554
+ false
555
+ end
556
+
557
+ # are we paused?
558
+ def paused?
559
+ @paused
560
+ end
561
+
562
+ # Stop processing jobs after the current one has completed (if we're
563
+ # currently running one).
564
+ def pause_processing
565
+ log_with_severity :info, "USR2 received; pausing job processing"
566
+ run_hook :before_pause, self
567
+ @paused = true
568
+ end
569
+
570
+ # Start processing jobs again after a pause
571
+ def unpause_processing
572
+ log_with_severity :info, "CONT received; resuming job processing"
573
+ @paused = false
574
+ run_hook :after_pause, self
575
+ end
576
+
577
+ # Looks for any workers which should be running on this server
578
+ # and, if they're not, removes them from Redis.
579
+ #
580
+ # This is a form of garbage collection. If a server is killed by a
581
+ # hard shutdown, power failure, or something else beyond our
582
+ # control, the ResqueAdmin workers will not die gracefully and therefore
583
+ # will leave stale state information in Redis.
584
+ #
585
+ # By checking the current Redis state against the actual
586
+ # environment, we can determine if Redis is old and clean it up a bit.
587
+ def prune_dead_workers
588
+ all_workers = Worker.all
589
+
590
+ unless all_workers.empty?
591
+ known_workers = worker_pids
592
+ all_workers_with_expired_heartbeats = Worker.all_workers_with_expired_heartbeats
593
+ end
594
+
595
+ all_workers.each do |worker|
596
+ # If the worker hasn't sent a heartbeat, remove it from the registry.
597
+ #
598
+ # If the worker hasn't ever sent a heartbeat, we won't remove it since
599
+ # the first heartbeat is sent before the worker is registred it means
600
+ # that this is a worker that doesn't support heartbeats, e.g., another
601
+ # client library or an older version of ResqueAdmin. We won't touch these.
602
+ if all_workers_with_expired_heartbeats.include?(worker)
603
+ log_with_severity :info, "Pruning dead worker: #{worker}"
604
+ worker.unregister_worker(PruneDeadWorkerDirtyExit.new(worker.to_s))
605
+ next
606
+ end
607
+
608
+ host, pid, worker_queues_raw = worker.id.split(':')
609
+ worker_queues = worker_queues_raw.split(",")
610
+ unless @queues.include?("*") || (worker_queues.to_set == @queues.to_set)
611
+ # If the worker we are trying to prune does not belong to the queues
612
+ # we are listening to, we should not touch it.
613
+ # Attempt to prune a worker from different queues may easily result in
614
+ # an unknown class exception, since that worker could easily be even
615
+ # written in different language.
616
+ next
617
+ end
618
+
619
+ next unless host == hostname
620
+ next if known_workers.include?(pid)
621
+
622
+ log_with_severity :debug, "Pruning dead worker: #{worker}"
623
+ worker.unregister_worker
624
+ end
625
+ end
626
+
627
+ # Registers ourself as a worker. Useful when entering the worker
628
+ # lifecycle on startup.
629
+ def register_worker
630
+ data_store.register_worker(self)
631
+ end
632
+
633
+ # Runs a named hook, passing along any arguments.
634
+ def run_hook(name, *args)
635
+ return unless hooks = ResqueAdmin.send(name)
636
+ return if name == :before_first_fork && @before_first_fork_hook_ran
637
+ msg = "Running #{name} hooks"
638
+ msg << " with #{args.inspect}" if args.any?
639
+ log_with_severity :info, msg
640
+
641
+ hooks.each do |hook|
642
+ args.any? ? hook.call(*args) : hook.call
643
+ @before_first_fork_hook_ran = true if name == :before_first_fork
644
+ end
645
+ end
646
+
647
+ def kill_background_threads
648
+ if @heartbeat_thread
649
+ @heartbeat_thread_signal.signal
650
+ @heartbeat_thread.join
651
+ end
652
+ end
653
+
654
+ # Unregisters ourself as a worker. Useful when shutting down.
655
+ def unregister_worker(exception = nil)
656
+ # If we're still processing a job, make sure it gets logged as a
657
+ # failure.
658
+ if (hash = processing) && !hash.empty?
659
+ job = Job.new(hash['queue'], hash['payload'])
660
+ # Ensure the proper worker is attached to this job, even if
661
+ # it's not the precise instance that died.
662
+ job.worker = self
663
+ begin
664
+ job.fail(exception || DirtyExit.new("Job still being processed"))
665
+ rescue RuntimeError => e
666
+ log_with_severity :error, e.message
667
+ end
668
+ end
669
+
670
+ kill_background_threads
671
+
672
+ data_store.unregister_worker(self) do
673
+ Stat.clear("processed:#{self}")
674
+ Stat.clear("failed:#{self}")
675
+ end
676
+ rescue Exception => exception_while_unregistering
677
+ message = exception_while_unregistering.message
678
+ if exception
679
+ message += "\nOriginal Exception (#{exception.class}): #{exception.message}"
680
+ message += "\n #{exception.backtrace.join(" \n")}" if exception.backtrace
681
+ end
682
+ fail(exception_while_unregistering.class,
683
+ message,
684
+ exception_while_unregistering.backtrace)
685
+ end
686
+
687
+ # Given a job, tells Redis we're working on it. Useful for seeing
688
+ # what workers are doing and when.
689
+ def working_on(job)
690
+ data = encode \
691
+ :queue => job.queue,
692
+ :run_at => Time.now.utc.iso8601,
693
+ :payload => job.payload
694
+ data_store.set_worker_payload(self,data)
695
+ end
696
+
697
+ # Called when we are done working - clears our `working_on` state
698
+ # and tells Redis we processed a job.
699
+ def done_working
700
+ data_store.worker_done_working(self) do
701
+ processed!
702
+ end
703
+ end
704
+
705
+ # How many jobs has this worker processed? Returns an int.
706
+ def processed
707
+ Stat["processed:#{self}"]
708
+ end
709
+
710
+ # Tell Redis we've processed a job.
711
+ def processed!
712
+ Stat << "processed"
713
+ Stat << "processed:#{self}"
714
+ end
715
+
716
+ # How many failed jobs has this worker seen? Returns an int.
717
+ def failed
718
+ Stat["failed:#{self}"]
719
+ end
720
+
721
+ # Tells Redis we've failed a job.
722
+ def failed!
723
+ Stat << "failed"
724
+ Stat << "failed:#{self}"
725
+ end
726
+
727
+ # What time did this worker start? Returns an instance of `Time`
728
+ def started
729
+ data_store.worker_start_time(self)
730
+ end
731
+
732
+ # Tell Redis we've started
733
+ def started!
734
+ data_store.worker_started(self)
735
+ end
736
+
737
+ # Returns a hash explaining the Job we're currently processing, if any.
738
+ def job(reload = true)
739
+ @job = nil if reload
740
+ @job ||= decode(data_store.get_worker_payload(self)) || {}
741
+ end
742
+ attr_writer :job
743
+ alias_method :processing, :job
744
+
745
+ # Boolean - true if working, false if not
746
+ def working?
747
+ state == :working
748
+ end
749
+
750
+ # Boolean - true if idle, false if not
751
+ def idle?
752
+ state == :idle
753
+ end
754
+
755
+ def fork_per_job?
756
+ return @fork_per_job if defined?(@fork_per_job)
757
+ @fork_per_job = ENV["FORK_PER_JOB"] != 'false' && Kernel.respond_to?(:fork)
758
+ end
759
+
760
+ # Returns a symbol representing the current worker state,
761
+ # which can be either :working or :idle
762
+ def state
763
+ data_store.get_worker_payload(self) ? :working : :idle
764
+ end
765
+
766
+ # Is this worker the same as another worker?
767
+ def ==(other)
768
+ to_s == other.to_s
769
+ end
770
+
771
+ def inspect
772
+ "#<Worker #{to_s}>"
773
+ end
774
+
775
+ # The string representation is the same as the id for this worker
776
+ # instance. Can be used with `Worker.find`.
777
+ def to_s
778
+ @to_s ||= "#{hostname}:#{pid}:#{@queues.join(',')}"
779
+ end
780
+ alias_method :id, :to_s
781
+
782
+ # chomp'd hostname of this worker's machine
783
+ def hostname
784
+ @hostname ||= Socket.gethostname
785
+ end
786
+
787
+ # Returns Integer PID of running worker
788
+ def pid
789
+ @pid ||= Process.pid
790
+ end
791
+
792
+ # Returns an Array of string pids of all the other workers on this
793
+ # machine. Useful when pruning dead workers on startup.
794
+ def worker_pids
795
+ if RUBY_PLATFORM =~ /solaris/
796
+ solaris_worker_pids
797
+ elsif RUBY_PLATFORM =~ /mingw32/
798
+ windows_worker_pids
799
+ else
800
+ linux_worker_pids
801
+ end
802
+ end
803
+
804
+ # Returns an Array of string pids of all the other workers on this
805
+ # machine. Useful when pruning dead workers on startup.
806
+ def windows_worker_pids
807
+ tasklist_output = `tasklist /FI "IMAGENAME eq ruby.exe" /FO list`.encode("UTF-8", Encoding.locale_charmap)
808
+ tasklist_output.split($/).select { |line| line =~ /^PID:/}.collect{ |line| line.gsub /PID:\s+/, '' }
809
+ end
810
+
811
+ # Find ResqueAdmin worker pids on Linux and OS X.
812
+ #
813
+ def linux_worker_pids
814
+ `ps -A -o pid,command | grep -E "[r]esque:work|[r]esque:\sStarting|[r]esque-[0-9]" | grep -v "resque-web"`.split("\n").map do |line|
815
+ line.split(' ')[0]
816
+ end
817
+ end
818
+
819
+ # Find ResqueAdmin worker pids on Solaris.
820
+ #
821
+ # Returns an Array of string pids of all the other workers on this
822
+ # machine. Useful when pruning dead workers on startup.
823
+ def solaris_worker_pids
824
+ `ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
825
+ real_pid = line.split(' ')[0]
826
+ pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
827
+ if pargs_command.split(':')[1] == " resque-#{ResqueAdmin::Version}"
828
+ real_pid
829
+ end
830
+ end.compact
831
+ end
832
+
833
+ # Given a string, sets the procline ($0) and logs.
834
+ # Procline is always in the format of:
835
+ # RESQUE_PROCLINE_PREFIXresque-VERSION: STRING
836
+ def procline(string)
837
+ $0 = "#{ENV['RESQUE_PROCLINE_PREFIX']}resque-#{ResqueAdmin::Version}: #{string}"
838
+ log_with_severity :debug, $0
839
+ end
840
+
841
+ def log(message)
842
+ info(message)
843
+ end
844
+
845
+ def log!(message)
846
+ debug(message)
847
+ end
848
+
849
+
850
+ def verbose
851
+ @verbose
852
+ end
853
+
854
+ def very_verbose
855
+ @very_verbose
856
+ end
857
+
858
+ def verbose=(value);
859
+ if value && !very_verbose
860
+ ResqueAdmin.logger.formatter = VerboseFormatter.new
861
+ ResqueAdmin.logger.level = Logger::INFO
862
+ elsif !value
863
+ ResqueAdmin.logger.formatter = QuietFormatter.new
864
+ end
865
+
866
+ @verbose = value
867
+ end
868
+
869
+ def very_verbose=(value)
870
+ if value
871
+ ResqueAdmin.logger.formatter = VeryVerboseFormatter.new
872
+ ResqueAdmin.logger.level = Logger::DEBUG
873
+ elsif !value && verbose
874
+ ResqueAdmin.logger.formatter = VerboseFormatter.new
875
+ ResqueAdmin.logger.level = Logger::INFO
876
+ else
877
+ ResqueAdmin.logger.formatter = QuietFormatter.new
878
+ end
879
+
880
+ @very_verbose = value
881
+ end
882
+
883
+ private
884
+
885
+ def perform_with_fork(job, &block)
886
+ run_hook :before_fork, job
887
+
888
+ begin
889
+ @child = fork do
890
+ unregister_signal_handlers if term_child
891
+ perform(job, &block)
892
+ exit! unless run_at_exit_hooks
893
+ end
894
+ rescue NotImplementedError
895
+ @fork_per_job = false
896
+ perform(job, &block)
897
+ return
898
+ end
899
+
900
+ srand # Reseeding
901
+ procline "Forked #{@child} at #{Time.now.to_i}"
902
+
903
+ begin
904
+ Process.waitpid(@child)
905
+ rescue SystemCallError
906
+ nil
907
+ end
908
+
909
+ job.fail(DirtyExit.new("Child process received unhandled signal #{$?.stopsig}", $?)) if $?.signaled?
910
+ @child = nil
911
+ end
912
+
913
+ def log_with_severity(severity, message)
914
+ Logging.log(severity, message)
915
+ end
916
+ end
917
+ end