resque_admin 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.md +530 -0
  3. data/LICENSE +20 -0
  4. data/README.markdown +957 -0
  5. data/Rakefile +57 -0
  6. data/bin/resque-admin +81 -0
  7. data/bin/resque-admin-web +31 -0
  8. data/lib/resque_admin.rb +578 -0
  9. data/lib/resque_admin/data_store.rb +325 -0
  10. data/lib/resque_admin/errors.rb +21 -0
  11. data/lib/resque_admin/failure.rb +119 -0
  12. data/lib/resque_admin/failure/airbrake.rb +33 -0
  13. data/lib/resque_admin/failure/base.rb +73 -0
  14. data/lib/resque_admin/failure/multiple.rb +68 -0
  15. data/lib/resque_admin/failure/redis.rb +128 -0
  16. data/lib/resque_admin/failure/redis_multi_queue.rb +104 -0
  17. data/lib/resque_admin/helpers.rb +48 -0
  18. data/lib/resque_admin/job.rb +296 -0
  19. data/lib/resque_admin/log_formatters/quiet_formatter.rb +7 -0
  20. data/lib/resque_admin/log_formatters/verbose_formatter.rb +7 -0
  21. data/lib/resque_admin/log_formatters/very_verbose_formatter.rb +8 -0
  22. data/lib/resque_admin/logging.rb +18 -0
  23. data/lib/resque_admin/plugin.rb +78 -0
  24. data/lib/resque_admin/server.rb +301 -0
  25. data/lib/resque_admin/server/helpers.rb +64 -0
  26. data/lib/resque_admin/server/public/favicon.ico +0 -0
  27. data/lib/resque_admin/server/public/idle.png +0 -0
  28. data/lib/resque_admin/server/public/jquery-1.12.4.min.js +5 -0
  29. data/lib/resque_admin/server/public/jquery.relatize_date.js +95 -0
  30. data/lib/resque_admin/server/public/poll.png +0 -0
  31. data/lib/resque_admin/server/public/ranger.js +78 -0
  32. data/lib/resque_admin/server/public/reset.css +44 -0
  33. data/lib/resque_admin/server/public/style.css +91 -0
  34. data/lib/resque_admin/server/public/working.png +0 -0
  35. data/lib/resque_admin/server/test_helper.rb +19 -0
  36. data/lib/resque_admin/server/views/error.erb +1 -0
  37. data/lib/resque_admin/server/views/failed.erb +29 -0
  38. data/lib/resque_admin/server/views/failed_job.erb +50 -0
  39. data/lib/resque_admin/server/views/failed_queues_overview.erb +24 -0
  40. data/lib/resque_admin/server/views/job_class.erb +6 -0
  41. data/lib/resque_admin/server/views/key_sets.erb +17 -0
  42. data/lib/resque_admin/server/views/key_string.erb +11 -0
  43. data/lib/resque_admin/server/views/layout.erb +44 -0
  44. data/lib/resque_admin/server/views/next_more.erb +22 -0
  45. data/lib/resque_admin/server/views/overview.erb +4 -0
  46. data/lib/resque_admin/server/views/processing.erb +2 -0
  47. data/lib/resque_admin/server/views/queues.erb +58 -0
  48. data/lib/resque_admin/server/views/stats.erb +62 -0
  49. data/lib/resque_admin/server/views/workers.erb +109 -0
  50. data/lib/resque_admin/server/views/working.erb +71 -0
  51. data/lib/resque_admin/stat.rb +58 -0
  52. data/lib/resque_admin/tasks.rb +72 -0
  53. data/lib/resque_admin/thread_signal.rb +24 -0
  54. data/lib/resque_admin/vendor/utf8_util.rb +24 -0
  55. data/lib/resque_admin/version.rb +3 -0
  56. data/lib/resque_admin/worker.rb +917 -0
  57. data/lib/tasks/redis.rake +161 -0
  58. data/lib/tasks/resque_admin.rake +2 -0
  59. metadata +191 -0
@@ -0,0 +1,58 @@
1
+ module ResqueAdmin
2
+ # The stat subsystem. Used to keep track of integer counts.
3
+ #
4
+ # Get a stat: Stat[name]
5
+ # Incr a stat: Stat.incr(name)
6
+ # Decr a stat: Stat.decr(name)
7
+ # Kill a stat: Stat.clear(name)
8
+ module Stat
9
+ extend self
10
+
11
+ # Direct access to the Redis instance.
12
+ def redis
13
+ ResqueAdmin.redis
14
+ end
15
+ alias :data_store :redis
16
+
17
+ # Returns the int value of a stat, given a string stat name.
18
+ def get(stat)
19
+ data_store.stat(stat)
20
+ end
21
+
22
+ # Alias of `get`
23
+ def [](stat)
24
+ get(stat)
25
+ end
26
+
27
+ # For a string stat name, increments the stat by one.
28
+ #
29
+ # Can optionally accept a second int parameter. The stat is then
30
+ # incremented by that amount.
31
+ def incr(stat, by = 1)
32
+ data_store.increment_stat(stat,by)
33
+ end
34
+
35
+ # Increments a stat by one.
36
+ def <<(stat)
37
+ incr stat
38
+ end
39
+
40
+ # For a string stat name, decrements the stat by one.
41
+ #
42
+ # Can optionally accept a second int parameter. The stat is then
43
+ # decremented by that amount.
44
+ def decr(stat, by = 1)
45
+ data_store.decremet_stat(stat,by)
46
+ end
47
+
48
+ # Decrements a stat by one.
49
+ def >>(stat)
50
+ decr stat
51
+ end
52
+
53
+ # Removes a stat from Redis, effectively setting it to 0.
54
+ def clear(stat)
55
+ data_store.clear_stat(stat)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,72 @@
1
+ # require 'resque/tasks'
2
+ # will give you the resque tasks
3
+
4
+
5
+ namespace :resque do
6
+ task :setup
7
+
8
+ desc "Start a ResqueAdmin worker"
9
+ task :work => [ :preload, :setup ] do
10
+ require 'resque_admin'
11
+
12
+ begin
13
+ worker = ResqueAdmin::Worker.new
14
+ rescue ResqueAdmin::NoQueueError
15
+ abort "set QUEUE env var, e.g. $ QUEUE=critical,high rake resque:work"
16
+ end
17
+
18
+ worker.prepare
19
+ worker.log "Starting worker #{self}"
20
+ worker.work(ENV['INTERVAL'] || 5) # interval, will block
21
+ end
22
+
23
+ desc "Start multiple ResqueAdmin workers. Should only be used in dev mode."
24
+ task :workers do
25
+ threads = []
26
+
27
+ if ENV['COUNT'].to_i < 1
28
+ abort "set COUNT env var, e.g. $ COUNT=2 rake resque:workers"
29
+ end
30
+
31
+ ENV['COUNT'].to_i.times do
32
+ threads << Thread.new do
33
+ system "rake resque:work"
34
+ end
35
+ end
36
+
37
+ threads.each { |thread| thread.join }
38
+ end
39
+
40
+ # Preload app files if this is Rails
41
+ task :preload => :setup do
42
+ if defined?(Rails)
43
+ if Rails::VERSION::MAJOR > 3
44
+ ActiveSupport.run_load_hooks(:before_eager_load, Rails.application)
45
+ Rails.application.config.eager_load_namespaces.each(&:eager_load!)
46
+
47
+ elsif Rails::VERSION::MAJOR == 3
48
+ ActiveSupport.run_load_hooks(:before_eager_load, Rails.application)
49
+ Rails.application.eager_load!
50
+
51
+ elsif defined?(Rails::Initializer)
52
+ $rails_rake_task = false
53
+ Rails::Initializer.run :load_application_classes
54
+ end
55
+ end
56
+ end
57
+
58
+ namespace :failures do
59
+ desc "Sort the 'failed' queue for the redis_multi_queue failure backend"
60
+ task :sort do
61
+ require 'resque_admin'
62
+ require 'resque_admin/failure/redis'
63
+
64
+ warn "Sorting #{ResqueAdmin::Failure.count} failures..."
65
+ ResqueAdmin::Failure.each(0, ResqueAdmin::Failure.count) do |_, failure|
66
+ data = ResqueAdmin.encode(failure)
67
+ ResqueAdmin.redis.rpush(ResqueAdmin::Failure.failure_queue_name(failure['queue']), data)
68
+ end
69
+ warn "done!"
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,24 @@
1
+ class ResqueAdmin::ThreadSignal
2
+ def initialize
3
+ @mutex = Mutex.new
4
+ @signaled = false
5
+ @received = ConditionVariable.new
6
+ end
7
+
8
+ def signal
9
+ @mutex.synchronize do
10
+ @signaled = true
11
+ @received.signal
12
+ end
13
+ end
14
+
15
+ def wait_for_signal(timeout)
16
+ @mutex.synchronize do
17
+ unless @signaled
18
+ @received.wait(@mutex, timeout)
19
+ end
20
+
21
+ @signaled
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ module UTF8Util
2
+ # use '?' intsead of the unicode replace char, since that is 3 bytes
3
+ # and can increase the string size if it's done a lot
4
+ REPLACEMENT_CHAR = "?"
5
+
6
+ # Replace invalid UTF-8 character sequences with a replacement character
7
+ #
8
+ # Returns self as valid UTF-8.
9
+ def self.clean!(str)
10
+ raise NotImplementedError
11
+ end
12
+
13
+ # Replace invalid UTF-8 character sequences with a replacement character
14
+ #
15
+ # Returns a copy of this String as valid UTF-8.
16
+ def self.clean(str)
17
+ clean!(str.dup)
18
+ end
19
+
20
+ def self.clean!(str)
21
+ return str if str.encoding.to_s == "UTF-8"
22
+ str.force_encoding("binary").encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => REPLACEMENT_CHAR)
23
+ end
24
+ end
@@ -0,0 +1,3 @@
1
+ module ResqueAdmin
2
+ Version = VERSION = '0.2.0'
3
+ end
@@ -0,0 +1,917 @@
1
+ require 'time'
2
+ require 'set'
3
+ require 'redis/distributed'
4
+
5
+ module ResqueAdmin
6
+ # A ResqueAdmin Worker processes jobs. On platforms that support fork(2),
7
+ # the worker will fork off a child to process each job. This ensures
8
+ # a clean slate when beginning the next job and cuts down on gradual
9
+ # memory growth as well as low level failures.
10
+ #
11
+ # It also ensures workers are always listening to signals from you,
12
+ # their master, and can react accordingly.
13
+ class Worker
14
+ include ResqueAdmin::Helpers
15
+ extend ResqueAdmin::Helpers
16
+ include ResqueAdmin::Logging
17
+
18
+ @@all_heartbeat_threads = []
19
+ def self.kill_all_heartbeat_threads
20
+ @@all_heartbeat_threads.each(&:kill).each(&:join)
21
+ @@all_heartbeat_threads = []
22
+ end
23
+
24
+ def redis
25
+ ResqueAdmin.redis
26
+ end
27
+ alias :data_store :redis
28
+
29
+ def self.redis
30
+ ResqueAdmin.redis
31
+ end
32
+
33
+ def self.data_store
34
+ self.redis
35
+ end
36
+
37
+ # Given a Ruby object, returns a string suitable for storage in a
38
+ # queue.
39
+ def encode(object)
40
+ ResqueAdmin.encode(object)
41
+ end
42
+
43
+ # Given a string, returns a Ruby object.
44
+ def decode(object)
45
+ ResqueAdmin.decode(object)
46
+ end
47
+
48
+ attr_accessor :term_timeout
49
+
50
+ attr_accessor :pre_shutdown_timeout
51
+
52
+ attr_accessor :term_child_signal
53
+
54
+ # decide whether to use new_kill_child logic
55
+ attr_accessor :term_child
56
+
57
+ # should term kill workers gracefully (vs. immediately)
58
+ # Makes SIGTERM work like SIGQUIT
59
+ attr_accessor :graceful_term
60
+
61
+ # When set to true, forked workers will exit with `exit`, calling any `at_exit` code handlers that have been
62
+ # registered in the application. Otherwise, forked workers exit with `exit!`
63
+ attr_accessor :run_at_exit_hooks
64
+
65
+ attr_writer :fork_per_job
66
+ attr_writer :hostname
67
+ attr_writer :to_s
68
+ attr_writer :pid
69
+
70
+ # Returns an array of all worker objects.
71
+ def self.all
72
+ data_store.worker_ids.map { |id| find(id, :skip_exists => true) }.compact
73
+ end
74
+
75
+ # Returns an array of all worker objects currently processing
76
+ # jobs.
77
+ def self.working
78
+ names = all
79
+ return [] unless names.any?
80
+
81
+ reportedly_working = {}
82
+
83
+ begin
84
+ reportedly_working = data_store.workers_map(names).reject do |key, value|
85
+ value.nil? || value.empty?
86
+ end
87
+ rescue Redis::Distributed::CannotDistribute
88
+ names.each do |name|
89
+ value = data_store.get_worker_payload(name)
90
+ reportedly_working[name] = value unless value.nil? || value.empty?
91
+ end
92
+ end
93
+
94
+ reportedly_working.keys.map do |key|
95
+ worker = find(key.sub("worker:", ''), :skip_exists => true)
96
+ worker.job = worker.decode(reportedly_working[key])
97
+ worker
98
+ end.compact
99
+ end
100
+
101
+ # Returns a single worker object. Accepts a string id.
102
+ def self.find(worker_id, options = {})
103
+ skip_exists = options[:skip_exists]
104
+
105
+ if skip_exists || exists?(worker_id)
106
+ host, pid, queues_raw = worker_id.split(':')
107
+ queues = queues_raw.split(',')
108
+ worker = new(*queues)
109
+ worker.hostname = host
110
+ worker.to_s = worker_id
111
+ worker.pid = pid.to_i
112
+ worker
113
+ else
114
+ nil
115
+ end
116
+ end
117
+
118
+ # Alias of `find`
119
+ def self.attach(worker_id)
120
+ find(worker_id)
121
+ end
122
+
123
+ # Given a string worker id, return a boolean indicating whether the
124
+ # worker exists
125
+ def self.exists?(worker_id)
126
+ data_store.worker_exists?(worker_id)
127
+ end
128
+
129
+ # Workers should be initialized with an array of string queue
130
+ # names. The order is important: a Worker will check the first
131
+ # queue given for a job. If none is found, it will check the
132
+ # second queue name given. If a job is found, it will be
133
+ # processed. Upon completion, the Worker will again check the
134
+ # first queue given, and so forth. In this way the queue list
135
+ # passed to a Worker on startup defines the priorities of queues.
136
+ #
137
+ # If passed a single "*", this Worker will operate on all queues
138
+ # in alphabetical order. Queues can be dynamically added or
139
+ # removed without needing to restart workers using this method.
140
+ #
141
+ # Workers should have `#prepare` called after they are initialized
142
+ # if you are running work on the worker.
143
+ def initialize(*queues)
144
+ @shutdown = nil
145
+ @paused = nil
146
+ @before_first_fork_hook_ran = false
147
+
148
+ verbose_value = ENV['LOGGING'] || ENV['VERBOSE']
149
+ self.verbose = verbose_value if verbose_value
150
+ self.very_verbose = ENV['VVERBOSE'] if ENV['VVERBOSE']
151
+ self.pre_shutdown_timeout = (ENV['RESQUE_PRE_SHUTDOWN_TIMEOUT'] || 0.0).to_f
152
+ self.term_timeout = (ENV['RESQUE_TERM_TIMEOUT'] || 4.0).to_f
153
+ self.term_child = ENV['TERM_CHILD']
154
+ self.graceful_term = ENV['GRACEFUL_TERM']
155
+ self.run_at_exit_hooks = ENV['RUN_AT_EXIT_HOOKS']
156
+
157
+ self.queues = queues
158
+ end
159
+
160
+ # Daemonizes the worker if ENV['BACKGROUND'] is set and writes
161
+ # the process id to ENV['PIDFILE'] if set. Should only be called
162
+ # once per worker.
163
+ def prepare
164
+ if ENV['BACKGROUND']
165
+ Process.daemon(true)
166
+ end
167
+
168
+ if ENV['PIDFILE']
169
+ File.open(ENV['PIDFILE'], 'w') { |f| f << pid }
170
+ end
171
+
172
+ self.reconnect if ENV['BACKGROUND']
173
+ end
174
+
175
+ def queues=(queues)
176
+ queues = queues.empty? ? (ENV["QUEUES"] || ENV['QUEUE']).to_s.split(',') : queues
177
+ @queues = queues.map { |queue| queue.to_s.strip }
178
+ unless ['*', '?', '{', '}', '[', ']'].any? {|char| @queues.join.include?(char) }
179
+ @static_queues = @queues.flatten.uniq
180
+ end
181
+ validate_queues
182
+ end
183
+
184
+ # A worker must be given a queue, otherwise it won't know what to
185
+ # do with itself.
186
+ #
187
+ # You probably never need to call this.
188
+ def validate_queues
189
+ if @queues.nil? || @queues.empty?
190
+ raise NoQueueError.new("Please give each worker at least one queue.")
191
+ end
192
+ end
193
+
194
+ # Returns a list of queues to use when searching for a job.
195
+ # A splat ("*") means you want every queue (in alpha order) - this
196
+ # can be useful for dynamically adding new queues.
197
+ def queues
198
+ return @static_queues if @static_queues
199
+ @queues.map { |queue| glob_match(queue) }.flatten.uniq
200
+ end
201
+
202
+ def glob_match(pattern)
203
+ ResqueAdmin.queues.select do |queue|
204
+ File.fnmatch?(pattern, queue)
205
+ end.sort
206
+ end
207
+
208
+ # This is the main workhorse method. Called on a Worker instance,
209
+ # it begins the worker life cycle.
210
+ #
211
+ # The following events occur during a worker's life cycle:
212
+ #
213
+ # 1. Startup: Signals are registered, dead workers are pruned,
214
+ # and this worker is registered.
215
+ # 2. Work loop: Jobs are pulled from a queue and processed.
216
+ # 3. Teardown: This worker is unregistered.
217
+ #
218
+ # Can be passed a float representing the polling frequency.
219
+ # The default is 5 seconds, but for a semi-active site you may
220
+ # want to use a smaller value.
221
+ #
222
+ # Also accepts a block which will be passed the job as soon as it
223
+ # has completed processing. Useful for testing.
224
+ def work(interval = 5.0, &block)
225
+ interval = Float(interval)
226
+ startup
227
+
228
+ loop do
229
+ break if shutdown?
230
+
231
+ unless work_one_job(&block)
232
+ break if interval.zero?
233
+ log_with_severity :debug, "Sleeping for #{interval} seconds"
234
+ procline paused? ? "Paused" : "Waiting for #{queues.join(',')}"
235
+ sleep interval
236
+ end
237
+ end
238
+
239
+ unregister_worker
240
+ rescue Exception => exception
241
+ return if exception.class == SystemExit && !@child && run_at_exit_hooks
242
+ log_with_severity :error, "Failed to start worker : #{exception.inspect}"
243
+ unregister_worker(exception)
244
+ end
245
+
246
+ def work_one_job(job = nil, &block)
247
+ return false if paused?
248
+ return false unless job ||= reserve
249
+
250
+ working_on job
251
+ procline "Processing #{job.queue} since #{Time.now.to_i} [#{job.payload_class_name}]"
252
+
253
+ log_with_severity :info, "got: #{job.inspect}"
254
+ job.worker = self
255
+
256
+ if fork_per_job?
257
+ perform_with_fork(job, &block)
258
+ else
259
+ perform(job, &block)
260
+ end
261
+
262
+ done_working
263
+ true
264
+ end
265
+
266
+ # DEPRECATED. Processes a single job. If none is given, it will
267
+ # try to produce one. Usually run in the child.
268
+ def process(job = nil, &block)
269
+ return unless job ||= reserve
270
+
271
+ job.worker = self
272
+ working_on job
273
+ perform(job, &block)
274
+ ensure
275
+ done_working
276
+ end
277
+
278
+ # Reports the exception and marks the job as failed
279
+ def report_failed_job(job,exception)
280
+ log_with_severity :error, "#{job.inspect} failed: #{exception.inspect}"
281
+ begin
282
+ job.fail(exception)
283
+ rescue Object => exception
284
+ log_with_severity :error, "Received exception when reporting failure: #{exception.inspect}"
285
+ end
286
+ begin
287
+ failed!
288
+ rescue Object => exception
289
+ log_with_severity :error, "Received exception when increasing failed jobs counter (redis issue) : #{exception.inspect}"
290
+ end
291
+ end
292
+
293
+
294
+ # Processes a given job in the child.
295
+ def perform(job)
296
+ begin
297
+ if fork_per_job?
298
+ reconnect
299
+ run_hook :after_fork, job
300
+ end
301
+ job.perform
302
+ rescue Object => e
303
+ report_failed_job(job,e)
304
+ else
305
+ log_with_severity :info, "done: #{job.inspect}"
306
+ ensure
307
+ yield job if block_given?
308
+ end
309
+ end
310
+
311
+ # Attempts to grab a job off one of the provided queues. Returns
312
+ # nil if no job can be found.
313
+ def reserve
314
+ queues.each do |queue|
315
+ log_with_severity :debug, "Checking #{queue}"
316
+ if job = ResqueAdmin.reserve(queue)
317
+ log_with_severity :debug, "Found job on #{queue}"
318
+ return job
319
+ end
320
+ end
321
+
322
+ nil
323
+ rescue Exception => e
324
+ log_with_severity :error, "Error reserving job: #{e.inspect}"
325
+ log_with_severity :error, e.backtrace.join("\n")
326
+ raise e
327
+ end
328
+
329
+ # Reconnect to Redis to avoid sharing a connection with the parent,
330
+ # retry up to 3 times with increasing delay before giving up.
331
+ def reconnect
332
+ tries = 0
333
+ begin
334
+ data_store.reconnect
335
+ rescue Redis::BaseConnectionError
336
+ if (tries += 1) <= 3
337
+ log_with_severity :error, "Error reconnecting to Redis; retrying"
338
+ sleep(tries)
339
+ retry
340
+ else
341
+ log_with_severity :error, "Error reconnecting to Redis; quitting"
342
+ raise
343
+ end
344
+ end
345
+ end
346
+
347
+ # Runs all the methods needed when a worker begins its lifecycle.
348
+ def startup
349
+ $0 = "resque: Starting"
350
+
351
+ enable_gc_optimizations
352
+ register_signal_handlers
353
+ start_heartbeat
354
+ prune_dead_workers
355
+ run_hook :before_first_fork
356
+ register_worker
357
+
358
+ # Fix buffering so we can `rake resque:work > resque.log` and
359
+ # get output from the child in there.
360
+ $stdout.sync = true
361
+ end
362
+
363
+ # Enables GC Optimizations if you're running REE.
364
+ # http://www.rubyenterpriseedition.com/faq.html#adapt_apps_for_cow
365
+ def enable_gc_optimizations
366
+ if GC.respond_to?(:copy_on_write_friendly=)
367
+ GC.copy_on_write_friendly = true
368
+ end
369
+ end
370
+
371
+ # Registers the various signal handlers a worker responds to.
372
+ #
373
+ # TERM: Shutdown immediately, stop processing jobs.
374
+ # INT: Shutdown immediately, stop processing jobs.
375
+ # QUIT: Shutdown after the current job has finished processing.
376
+ # USR1: Kill the forked child immediately, continue processing jobs.
377
+ # USR2: Don't process any new jobs
378
+ # CONT: Start processing jobs again after a USR2
379
+ def register_signal_handlers
380
+ trap('TERM') { graceful_term ? shutdown : shutdown! }
381
+ trap('INT') { shutdown! }
382
+
383
+ begin
384
+ trap('QUIT') { shutdown }
385
+ if term_child
386
+ trap('USR1') { new_kill_child }
387
+ else
388
+ trap('USR1') { kill_child }
389
+ end
390
+ trap('USR2') { pause_processing }
391
+ trap('CONT') { unpause_processing }
392
+ rescue ArgumentError
393
+ log_with_severity :warn, "Signals QUIT, USR1, USR2, and/or CONT not supported."
394
+ end
395
+
396
+ log_with_severity :debug, "Registered signals"
397
+ end
398
+
399
+ def unregister_signal_handlers
400
+ trap('TERM') do
401
+ trap('TERM') do
402
+ # Ignore subsequent term signals
403
+ end
404
+
405
+ raise TermException.new("SIGTERM")
406
+ end
407
+
408
+ trap('INT', 'DEFAULT')
409
+
410
+ begin
411
+ trap('QUIT', 'DEFAULT')
412
+ trap('USR1', 'DEFAULT')
413
+ trap('USR2', 'DEFAULT')
414
+ rescue ArgumentError
415
+ end
416
+ end
417
+
418
+ # Schedule this worker for shutdown. Will finish processing the
419
+ # current job.
420
+ def shutdown
421
+ log_with_severity :info, 'Exiting...'
422
+ @shutdown = true
423
+ end
424
+
425
+ # Kill the child and shutdown immediately.
426
+ # If not forking, abort this process.
427
+ def shutdown!
428
+ shutdown
429
+ if term_child
430
+ if fork_per_job?
431
+ new_kill_child
432
+ else
433
+ # Raise TermException in the same process
434
+ trap('TERM') do
435
+ # ignore subsequent terms
436
+ end
437
+ raise TermException.new("SIGTERM")
438
+ end
439
+ else
440
+ kill_child
441
+ end
442
+ end
443
+
444
+ # Should this worker shutdown as soon as current job is finished?
445
+ def shutdown?
446
+ @shutdown
447
+ end
448
+
449
+ # Kills the forked child immediately, without remorse. The job it
450
+ # is processing will not be completed.
451
+ def kill_child
452
+ if @child
453
+ log_with_severity :debug, "Killing child at #{@child}"
454
+ if `ps -o pid,state -p #{@child}`
455
+ Process.kill("KILL", @child) rescue nil
456
+ else
457
+ log_with_severity :debug, "Child #{@child} not found, restarting."
458
+ shutdown
459
+ end
460
+ end
461
+ end
462
+
463
+ def heartbeat
464
+ data_store.heartbeat(self)
465
+ end
466
+
467
+ def remove_heartbeat
468
+ data_store.remove_heartbeat(self)
469
+ end
470
+
471
+ def heartbeat!(time = data_store.server_time)
472
+ data_store.heartbeat!(self, time)
473
+ end
474
+
475
+ def self.all_heartbeats
476
+ data_store.all_heartbeats
477
+ end
478
+
479
+ # Returns a list of workers that have sent a heartbeat in the past, but which
480
+ # already expired (does NOT include workers that have never sent a heartbeat at all).
481
+ def self.all_workers_with_expired_heartbeats
482
+ workers = Worker.all
483
+ heartbeats = Worker.all_heartbeats
484
+ now = data_store.server_time
485
+
486
+ workers.select do |worker|
487
+ id = worker.to_s
488
+ heartbeat = heartbeats[id]
489
+
490
+ if heartbeat
491
+ seconds_since_heartbeat = (now - Time.parse(heartbeat)).to_i
492
+ seconds_since_heartbeat > ResqueAdmin.prune_interval
493
+ else
494
+ false
495
+ end
496
+ end
497
+ end
498
+
499
+ def start_heartbeat
500
+ remove_heartbeat
501
+
502
+ @heartbeat_thread_signal = ResqueAdmin::ThreadSignal.new
503
+
504
+ @heartbeat_thread = Thread.new do
505
+ loop do
506
+ heartbeat!
507
+ signaled = @heartbeat_thread_signal.wait_for_signal(ResqueAdmin.heartbeat_interval)
508
+ break if signaled
509
+ end
510
+ end
511
+
512
+ @@all_heartbeat_threads << @heartbeat_thread
513
+ end
514
+
515
+ # Kills the forked child immediately with minimal remorse. The job it
516
+ # is processing will not be completed. Send the child a TERM signal,
517
+ # wait <term_timeout> seconds, and then a KILL signal if it has not quit
518
+ # If pre_shutdown_timeout has been set to a positive number, it will allow
519
+ # the child that many seconds before sending the aforementioned TERM and KILL.
520
+ def new_kill_child
521
+ if @child
522
+ unless child_already_exited?
523
+ if pre_shutdown_timeout && pre_shutdown_timeout > 0.0
524
+ log_with_severity :debug, "Waiting #{pre_shutdown_timeout.to_f}s for child process to exit"
525
+ return if wait_for_child_exit(pre_shutdown_timeout)
526
+ end
527
+
528
+ log_with_severity :debug, "Sending TERM signal to child #{@child}"
529
+ Process.kill("TERM", @child)
530
+
531
+ if wait_for_child_exit(term_timeout)
532
+ return
533
+ else
534
+ log_with_severity :debug, "Sending KILL signal to child #{@child}"
535
+ Process.kill("KILL", @child)
536
+ end
537
+ else
538
+ log_with_severity :debug, "Child #{@child} already quit."
539
+ end
540
+ end
541
+ rescue SystemCallError
542
+ log_with_severity :error, "Child #{@child} already quit and reaped."
543
+ end
544
+
545
+ def child_already_exited?
546
+ Process.waitpid(@child, Process::WNOHANG)
547
+ end
548
+
549
+ def wait_for_child_exit(timeout)
550
+ (timeout * 10).round.times do |i|
551
+ sleep(0.1)
552
+ return true if child_already_exited?
553
+ end
554
+ false
555
+ end
556
+
557
+ # are we paused?
558
+ def paused?
559
+ @paused
560
+ end
561
+
562
+ # Stop processing jobs after the current one has completed (if we're
563
+ # currently running one).
564
+ def pause_processing
565
+ log_with_severity :info, "USR2 received; pausing job processing"
566
+ run_hook :before_pause, self
567
+ @paused = true
568
+ end
569
+
570
+ # Start processing jobs again after a pause
571
+ def unpause_processing
572
+ log_with_severity :info, "CONT received; resuming job processing"
573
+ @paused = false
574
+ run_hook :after_pause, self
575
+ end
576
+
577
+ # Looks for any workers which should be running on this server
578
+ # and, if they're not, removes them from Redis.
579
+ #
580
+ # This is a form of garbage collection. If a server is killed by a
581
+ # hard shutdown, power failure, or something else beyond our
582
+ # control, the ResqueAdmin workers will not die gracefully and therefore
583
+ # will leave stale state information in Redis.
584
+ #
585
+ # By checking the current Redis state against the actual
586
+ # environment, we can determine if Redis is old and clean it up a bit.
587
+ def prune_dead_workers
588
+ all_workers = Worker.all
589
+
590
+ unless all_workers.empty?
591
+ known_workers = worker_pids
592
+ all_workers_with_expired_heartbeats = Worker.all_workers_with_expired_heartbeats
593
+ end
594
+
595
+ all_workers.each do |worker|
596
+ # If the worker hasn't sent a heartbeat, remove it from the registry.
597
+ #
598
+ # If the worker hasn't ever sent a heartbeat, we won't remove it since
599
+ # the first heartbeat is sent before the worker is registred it means
600
+ # that this is a worker that doesn't support heartbeats, e.g., another
601
+ # client library or an older version of ResqueAdmin. We won't touch these.
602
+ if all_workers_with_expired_heartbeats.include?(worker)
603
+ log_with_severity :info, "Pruning dead worker: #{worker}"
604
+ worker.unregister_worker(PruneDeadWorkerDirtyExit.new(worker.to_s))
605
+ next
606
+ end
607
+
608
+ host, pid, worker_queues_raw = worker.id.split(':')
609
+ worker_queues = worker_queues_raw.split(",")
610
+ unless @queues.include?("*") || (worker_queues.to_set == @queues.to_set)
611
+ # If the worker we are trying to prune does not belong to the queues
612
+ # we are listening to, we should not touch it.
613
+ # Attempt to prune a worker from different queues may easily result in
614
+ # an unknown class exception, since that worker could easily be even
615
+ # written in different language.
616
+ next
617
+ end
618
+
619
+ next unless host == hostname
620
+ next if known_workers.include?(pid)
621
+
622
+ log_with_severity :debug, "Pruning dead worker: #{worker}"
623
+ worker.unregister_worker
624
+ end
625
+ end
626
+
627
+ # Registers ourself as a worker. Useful when entering the worker
628
+ # lifecycle on startup.
629
+ def register_worker
630
+ data_store.register_worker(self)
631
+ end
632
+
633
+ # Runs a named hook, passing along any arguments.
634
+ def run_hook(name, *args)
635
+ return unless hooks = ResqueAdmin.send(name)
636
+ return if name == :before_first_fork && @before_first_fork_hook_ran
637
+ msg = "Running #{name} hooks"
638
+ msg << " with #{args.inspect}" if args.any?
639
+ log_with_severity :info, msg
640
+
641
+ hooks.each do |hook|
642
+ args.any? ? hook.call(*args) : hook.call
643
+ @before_first_fork_hook_ran = true if name == :before_first_fork
644
+ end
645
+ end
646
+
647
+ def kill_background_threads
648
+ if @heartbeat_thread
649
+ @heartbeat_thread_signal.signal
650
+ @heartbeat_thread.join
651
+ end
652
+ end
653
+
654
+ # Unregisters ourself as a worker. Useful when shutting down.
655
+ def unregister_worker(exception = nil)
656
+ # If we're still processing a job, make sure it gets logged as a
657
+ # failure.
658
+ if (hash = processing) && !hash.empty?
659
+ job = Job.new(hash['queue'], hash['payload'])
660
+ # Ensure the proper worker is attached to this job, even if
661
+ # it's not the precise instance that died.
662
+ job.worker = self
663
+ begin
664
+ job.fail(exception || DirtyExit.new("Job still being processed"))
665
+ rescue RuntimeError => e
666
+ log_with_severity :error, e.message
667
+ end
668
+ end
669
+
670
+ kill_background_threads
671
+
672
+ data_store.unregister_worker(self) do
673
+ Stat.clear("processed:#{self}")
674
+ Stat.clear("failed:#{self}")
675
+ end
676
+ rescue Exception => exception_while_unregistering
677
+ message = exception_while_unregistering.message
678
+ if exception
679
+ message += "\nOriginal Exception (#{exception.class}): #{exception.message}"
680
+ message += "\n #{exception.backtrace.join(" \n")}" if exception.backtrace
681
+ end
682
+ fail(exception_while_unregistering.class,
683
+ message,
684
+ exception_while_unregistering.backtrace)
685
+ end
686
+
687
+ # Given a job, tells Redis we're working on it. Useful for seeing
688
+ # what workers are doing and when.
689
+ def working_on(job)
690
+ data = encode \
691
+ :queue => job.queue,
692
+ :run_at => Time.now.utc.iso8601,
693
+ :payload => job.payload
694
+ data_store.set_worker_payload(self,data)
695
+ end
696
+
697
+ # Called when we are done working - clears our `working_on` state
698
+ # and tells Redis we processed a job.
699
+ def done_working
700
+ data_store.worker_done_working(self) do
701
+ processed!
702
+ end
703
+ end
704
+
705
+ # How many jobs has this worker processed? Returns an int.
706
+ def processed
707
+ Stat["processed:#{self}"]
708
+ end
709
+
710
+ # Tell Redis we've processed a job.
711
+ def processed!
712
+ Stat << "processed"
713
+ Stat << "processed:#{self}"
714
+ end
715
+
716
+ # How many failed jobs has this worker seen? Returns an int.
717
+ def failed
718
+ Stat["failed:#{self}"]
719
+ end
720
+
721
+ # Tells Redis we've failed a job.
722
+ def failed!
723
+ Stat << "failed"
724
+ Stat << "failed:#{self}"
725
+ end
726
+
727
+ # What time did this worker start? Returns an instance of `Time`
728
+ def started
729
+ data_store.worker_start_time(self)
730
+ end
731
+
732
+ # Tell Redis we've started
733
+ def started!
734
+ data_store.worker_started(self)
735
+ end
736
+
737
+ # Returns a hash explaining the Job we're currently processing, if any.
738
+ def job(reload = true)
739
+ @job = nil if reload
740
+ @job ||= decode(data_store.get_worker_payload(self)) || {}
741
+ end
742
+ attr_writer :job
743
+ alias_method :processing, :job
744
+
745
+ # Boolean - true if working, false if not
746
+ def working?
747
+ state == :working
748
+ end
749
+
750
+ # Boolean - true if idle, false if not
751
+ def idle?
752
+ state == :idle
753
+ end
754
+
755
+ def fork_per_job?
756
+ return @fork_per_job if defined?(@fork_per_job)
757
+ @fork_per_job = ENV["FORK_PER_JOB"] != 'false' && Kernel.respond_to?(:fork)
758
+ end
759
+
760
+ # Returns a symbol representing the current worker state,
761
+ # which can be either :working or :idle
762
+ def state
763
+ data_store.get_worker_payload(self) ? :working : :idle
764
+ end
765
+
766
+ # Is this worker the same as another worker?
767
+ def ==(other)
768
+ to_s == other.to_s
769
+ end
770
+
771
+ def inspect
772
+ "#<Worker #{to_s}>"
773
+ end
774
+
775
+ # The string representation is the same as the id for this worker
776
+ # instance. Can be used with `Worker.find`.
777
+ def to_s
778
+ @to_s ||= "#{hostname}:#{pid}:#{@queues.join(',')}"
779
+ end
780
+ alias_method :id, :to_s
781
+
782
+ # chomp'd hostname of this worker's machine
783
+ def hostname
784
+ @hostname ||= Socket.gethostname
785
+ end
786
+
787
+ # Returns Integer PID of running worker
788
+ def pid
789
+ @pid ||= Process.pid
790
+ end
791
+
792
+ # Returns an Array of string pids of all the other workers on this
793
+ # machine. Useful when pruning dead workers on startup.
794
+ def worker_pids
795
+ if RUBY_PLATFORM =~ /solaris/
796
+ solaris_worker_pids
797
+ elsif RUBY_PLATFORM =~ /mingw32/
798
+ windows_worker_pids
799
+ else
800
+ linux_worker_pids
801
+ end
802
+ end
803
+
804
+ # Returns an Array of string pids of all the other workers on this
805
+ # machine. Useful when pruning dead workers on startup.
806
+ def windows_worker_pids
807
+ tasklist_output = `tasklist /FI "IMAGENAME eq ruby.exe" /FO list`.encode("UTF-8", Encoding.locale_charmap)
808
+ tasklist_output.split($/).select { |line| line =~ /^PID:/}.collect{ |line| line.gsub /PID:\s+/, '' }
809
+ end
810
+
811
+ # Find ResqueAdmin worker pids on Linux and OS X.
812
+ #
813
+ def linux_worker_pids
814
+ `ps -A -o pid,command | grep -E "[r]esque:work|[r]esque:\sStarting|[r]esque-[0-9]" | grep -v "resque-web"`.split("\n").map do |line|
815
+ line.split(' ')[0]
816
+ end
817
+ end
818
+
819
+ # Find ResqueAdmin worker pids on Solaris.
820
+ #
821
+ # Returns an Array of string pids of all the other workers on this
822
+ # machine. Useful when pruning dead workers on startup.
823
+ def solaris_worker_pids
824
+ `ps -A -o pid,comm | grep "[r]uby" | grep -v "resque-web"`.split("\n").map do |line|
825
+ real_pid = line.split(' ')[0]
826
+ pargs_command = `pargs -a #{real_pid} 2>/dev/null | grep [r]esque | grep -v "resque-web"`
827
+ if pargs_command.split(':')[1] == " resque-#{ResqueAdmin::Version}"
828
+ real_pid
829
+ end
830
+ end.compact
831
+ end
832
+
833
+ # Given a string, sets the procline ($0) and logs.
834
+ # Procline is always in the format of:
835
+ # RESQUE_PROCLINE_PREFIXresque-VERSION: STRING
836
+ def procline(string)
837
+ $0 = "#{ENV['RESQUE_PROCLINE_PREFIX']}resque-#{ResqueAdmin::Version}: #{string}"
838
+ log_with_severity :debug, $0
839
+ end
840
+
841
+ def log(message)
842
+ info(message)
843
+ end
844
+
845
+ def log!(message)
846
+ debug(message)
847
+ end
848
+
849
+
850
+ def verbose
851
+ @verbose
852
+ end
853
+
854
+ def very_verbose
855
+ @very_verbose
856
+ end
857
+
858
+ def verbose=(value);
859
+ if value && !very_verbose
860
+ ResqueAdmin.logger.formatter = VerboseFormatter.new
861
+ ResqueAdmin.logger.level = Logger::INFO
862
+ elsif !value
863
+ ResqueAdmin.logger.formatter = QuietFormatter.new
864
+ end
865
+
866
+ @verbose = value
867
+ end
868
+
869
+ def very_verbose=(value)
870
+ if value
871
+ ResqueAdmin.logger.formatter = VeryVerboseFormatter.new
872
+ ResqueAdmin.logger.level = Logger::DEBUG
873
+ elsif !value && verbose
874
+ ResqueAdmin.logger.formatter = VerboseFormatter.new
875
+ ResqueAdmin.logger.level = Logger::INFO
876
+ else
877
+ ResqueAdmin.logger.formatter = QuietFormatter.new
878
+ end
879
+
880
+ @very_verbose = value
881
+ end
882
+
883
+ private
884
+
885
+ def perform_with_fork(job, &block)
886
+ run_hook :before_fork, job
887
+
888
+ begin
889
+ @child = fork do
890
+ unregister_signal_handlers if term_child
891
+ perform(job, &block)
892
+ exit! unless run_at_exit_hooks
893
+ end
894
+ rescue NotImplementedError
895
+ @fork_per_job = false
896
+ perform(job, &block)
897
+ return
898
+ end
899
+
900
+ srand # Reseeding
901
+ procline "Forked #{@child} at #{Time.now.to_i}"
902
+
903
+ begin
904
+ Process.waitpid(@child)
905
+ rescue SystemCallError
906
+ nil
907
+ end
908
+
909
+ job.fail(DirtyExit.new("Child process received unhandled signal #{$?.stopsig}", $?)) if $?.signaled?
910
+ @child = nil
911
+ end
912
+
913
+ def log_with_severity(severity, message)
914
+ Logging.log(severity, message)
915
+ end
916
+ end
917
+ end