resque_stuck_queue_revised 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,81 @@
1
+ module Resque
2
+ module StuckQueue
3
+
4
+ require 'logger'
5
+ # defaults
6
+ HEARTBEAT_INTERVAL = 5 * 60 # send heartbeat job every 5 minutes
7
+ WATCHER_INTERVAL = 5 # check key is udpated every 5 seconds.
8
+
9
+ TRIGGER_TIMEOUT = 60 * 60 # trigger after an hour of lagtime.
10
+
11
+ # must be called by convention: type_handler
12
+ TRIGGERED_HANDLER = proc { |queue_name, lag| Resque::StuckQueue::LOGGER.info("Shit gone bad with them queues...on #{queue_name}. Lag time is #{lag}") }
13
+ RECOVERED_HANDLER = proc { |queue_name, lag| Resque::StuckQueue::LOGGER.info("recovered queue phew #{queue_name}. Lag time is #{lag}") }
14
+
15
+ LOGGER = Logger.new($stdout)
16
+ HEARTBEAT_KEY = "resque-stuck-queue"
17
+ TRIGGERED_KEY = "resque-stuck-queue-last-triggered"
18
+
19
+ class Config < Hash
20
+
21
+ OPTIONS_DESCRIPTIONS = {
22
+ :triggered_handler => "set to what gets triggered when resque-stuck-queue will detect the latest heartbeat is older than the trigger_timeout time setting.\n\tExample:\n\tResque::StuckQueue.config[:triggered_handler] = proc { |queue_name, lagtime| send_email('queue \#{queue_name} isnt working, aaah the daemons') }",
23
+ :recovered_handler => "set to what gets triggered when resque-stuck-queue has triggered a problem, but then detects the queue went back down to functioning well again(it wont trigger again until it has recovered).\n\tExample:\n\tResque::StuckQueue.config[:recovered_handler] = proc { |queue_name, lagtime| send_email('phew, queue \#{queue_name} is ok') }",
24
+ :heartbeat_interval => "set to how often to push the 'heartbeat' job which will refresh the latest working time.\n\tExample:\n\tResque::StuckQueue.config[:heartbeat_interval] = 5.minutes",
25
+ :watcher_interval => "set to how often to check to see when the last time it worked was.\n\tExample:\n\tResque::StuckQueue.config[:watcher_interval] = 1.minute",
26
+ :trigger_timeout => "set to how much of a resque work lag you are willing to accept before being notified. note: take the :watcher_interval setting into account when setting this timeout.\n\tExample:\n\tResque::StuckQueue.config[:trigger_timeout] = 9.minutes",
27
+ :warn_interval => "optional: if set, it will continiously trigger/warn in spaces of this interval after first trigger. eg, as long as lagtime keeps on being above trigger_timeout/recover hasn't occured yet.",
28
+ :redis => "set the Redis StuckQueue will use. Either a Redis or Redis::Namespace instance.",
29
+ :heartbeat_key => "optional, name of keys to keep track of the last good resque heartbeat time",
30
+ :triggered_key => "optional, name of keys to keep track of the last trigger time",
31
+ :logger => "optional, pass a Logger. Default a ruby logger will be instantiated. Needs to respond to that interface.",
32
+ :queues => "optional, monitor specific queues you want to send a heartbeat/monitor to. default is [:app]",
33
+ :abort_on_exception => "optional, if you want the resque-stuck-queue threads to explicitly raise, default is true",
34
+ :heartbeat_job => "optional, your own custom refreshing job. if you are using something other than resque",
35
+ :enable_signals => "optional, allow resque::stuck's signal_handlers which do mostly nothing at this point. possible future plan: log info, reopen log file, etc.",
36
+ }
37
+
38
+ OPTIONS = OPTIONS_DESCRIPTIONS.keys
39
+
40
+ def []=(k,v)
41
+ validate_key_exists!(k)
42
+ super(k,v)
43
+ end
44
+
45
+ def [](k)
46
+ validate_key_exists!(k)
47
+ super(k)
48
+ end
49
+
50
+ REQUIRED_KEYS = [:redis]
51
+ def validate_required_keys!
52
+ REQUIRED_KEYS.each do |k|
53
+ if self[k].nil?
54
+ raise NoConfigError, "You must set config[:#{k}]"
55
+ end
56
+ end
57
+ end
58
+
59
+ class NoConfigError < StandardError; end
60
+
61
+ def validate_key_exists!(k)
62
+ if !OPTIONS.include?(k)
63
+ raise NoConfigError, "no such config key #{k} exists!"
64
+ end
65
+ end
66
+
67
+ def description_for(k)
68
+ OPTIONS_DESCRIPTIONS[k.to_sym]
69
+ end
70
+
71
+ def pretty_descriptions
72
+ out = "\n"
73
+ OPTIONS_DESCRIPTIONS.map{|key,msg|
74
+ out << "#{key}:\n\t#{msg}\n\n"
75
+ }
76
+ out
77
+ end
78
+
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,19 @@
1
+ module Resque
2
+ module StuckQueue
3
+ class HeartbeatJob
4
+ class << self
5
+
6
+ attr_accessor :redis
7
+
8
+ def perform(*args)
9
+ keyname,host,port,namespace,new_time = *args
10
+ # if set by config[:redis] earlier before loading this lib.
11
+ @redis = Resque::StuckQueue.redis || Redis::Namespace.new(namespace, :redis => Redis.new(:host => host, :port => port))
12
+ @redis.set(keyname, new_time)
13
+ Resque::StuckQueue.logger.info "successfully updated key #{keyname} to #{new_time} at #{Time.now} for #{@redis.inspect}"
14
+ end
15
+
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,5 @@
1
+ module Resque
2
+ module StuckQueue
3
+ VERSION = "0.5.1"
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'resque_stuck_queue/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "resque_stuck_queue_revised"
8
+ spec.version = Resque::StuckQueue::VERSION
9
+ spec.authors = ["Dave Kerr"]
10
+ spec.email = ["davek09@gmail.com"]
11
+ spec.summary = %q{fire a handler when your queues are wonky}
12
+ spec.description = %q{where the wild things are. err, when resque gets stuck}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "redis-mutex" # TODO rm this
22
+
23
+ spec.add_runtime_dependency "redis-namespace"
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.5"
26
+ spec.add_development_dependency "rake"
27
+ end
@@ -0,0 +1,9 @@
1
+ # fixture job
2
+ class SetRedisKey
3
+ NAME = "integration_test"
4
+ @queue = :app
5
+ def self.perform
6
+ # tests run on localhost
7
+ Redis.new.set(NAME, "1")
8
+ end
9
+ end
@@ -0,0 +1,47 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
2
+
3
+ class TestCollision < Minitest::Test
4
+
5
+ include TestHelper
6
+
7
+ def setup
8
+ Resque::StuckQueue.config[:redis] = Redis.new
9
+ Resque::StuckQueue.config[:watcher_interval] = 1
10
+ Resque::StuckQueue.redis.flushall
11
+ end
12
+
13
+ def test_two_processes_interacting
14
+ puts "#{__method__}"
15
+ # no resque should be running here so timeouts will be reached + trigger
16
+ Resque::StuckQueue.redis.del("test-incr-key")
17
+
18
+ p1 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
19
+ p2 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
20
+ p3 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
21
+ p4 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
22
+
23
+ Thread.new {
24
+ sleep 5 # let test run and trigger once occur (according to time below)
25
+ `kill -9 #{p1}`
26
+ `kill -9 #{p2}`
27
+ `kill -9 #{p3}`
28
+ `kill -9 #{p4}`
29
+ Process.waitpid # reap
30
+ }
31
+
32
+ Process.waitall
33
+
34
+ assert_equal 1, Resque::StuckQueue.redis.get("test-incr-key").to_i
35
+ end
36
+
37
+ private
38
+
39
+ def run_resque_stuck_daemon
40
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
41
+ Resque::StuckQueue.config[:abort_on_exception] = true
42
+ Resque::StuckQueue.config[:trigger_timeout] = 3
43
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test-incr-key") }
44
+ Resque::StuckQueue.start
45
+ end
46
+
47
+ end
@@ -0,0 +1,67 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
2
+
3
+ class TestConfig < Minitest::Test
4
+
5
+ include TestHelper
6
+
7
+ def setup
8
+ Resque::StuckQueue.config[:watcher_interval] = 1
9
+ Resque::StuckQueue.config[:trigger_timeout] = 1
10
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
11
+ Resque::StuckQueue.config[:abort_on_exception] = true
12
+ Resque::StuckQueue.config[:redis] = Redis.new
13
+ end
14
+
15
+ def teardown
16
+ Resque::StuckQueue.reset!
17
+ end
18
+
19
+ def test_config_has_descriptions
20
+ c = Resque::StuckQueue::Config.new
21
+ assert c.description_for(:logger) =~ /Logger/, "has descriptions"
22
+ end
23
+
24
+ def test_outputs_all_config_options
25
+ c = Resque::StuckQueue::Config.new
26
+ puts c.pretty_descriptions
27
+ assert true
28
+ end
29
+
30
+ def test_has_logger
31
+ puts "#{__method__}"
32
+ begin
33
+ Resque::StuckQueue.config[:logger] = Logger.new($stdout)
34
+ start_and_stop_loops_after(1)
35
+ assert true, "should not have raised"
36
+ rescue => e
37
+ assert false, "should have succeeded with good logger: #{e.inspect}\n#{e.backtrace.join("\n")}"
38
+ end
39
+ end
40
+
41
+ def test_must_set_redis
42
+ puts "#{__method__}"
43
+ Resque::StuckQueue.config[:redis] = nil
44
+ begin
45
+ start_and_stop_loops_after(1)
46
+ assert false, "redis cannot be nil"
47
+ rescue Resque::StuckQueue::Config::NoConfigError => e
48
+ assert true, "redis cannot be nil: #{e.inspect}\n#{e.backtrace.join("\n")}"
49
+ end
50
+ end
51
+
52
+ #def test_can_have_signals
53
+ #puts "#{__method__}"
54
+ #begin
55
+ #assert_equal ENV['SIGUSR1'], nil
56
+ #Resque::StuckQueue.config[:enable_signals] = true
57
+ #start_and_stop_loops_after(1)
58
+ #Process.kill "SIGUSR1", Process.pid
59
+ #assert_equal ENV['SIGUSR1'], "done be had"
60
+ #rescue => e
61
+ #assert false, "should have succeeded with signal handlers: #{e.inspect}\n#{e.backtrace.join("\n")}"
62
+ #end
63
+ #end
64
+
65
+ end
66
+
67
+
@@ -0,0 +1,57 @@
1
+ require 'minitest'
2
+ require "minitest/autorun"
3
+ require 'pry'
4
+ require 'mocha'
5
+ require "minitest/unit"
6
+ require "mocha/mini_test"
7
+ $:.unshift(".")
8
+ require 'resque_stuck_queue'
9
+ require File.join(File.expand_path(File.dirname(__FILE__)), "resque", "set_redis_key")
10
+
11
+ module TestHelper
12
+
13
+ extend self
14
+
15
+ def run_resque(queue_name = "*")
16
+ pid = fork { exec("export INTERVAL=1 QUEUE=#{queue_name}; bundle exec rake --trace resque:work") }
17
+ sleep 3 # wait for resque to boot up
18
+ pid
19
+ end
20
+
21
+ def with_no_resque_failures(&blk)
22
+ Resque::Failure.clear
23
+ blk.call
24
+ assert_nil Resque::Failure.all, "Resque hearbeat job cant fail: #{Resque::Failure.all.inspect}"
25
+ end
26
+
27
+ def hax_kill_resque
28
+ # ugly, FIXME how to get pid of forked forked process. run_resque pid is incorrect.
29
+ `ps aux |grep -E 'resque.*(Waiting|Forked|Processing)'| grep -v grep | awk '{print $2}' |xargs kill`
30
+ sleep 2 # wait for shutdown
31
+ end
32
+
33
+ def start_and_stop_loops_after(secs)
34
+ abort_or_not = Thread.abort_on_exception
35
+ Thread.abort_on_exception = Resque::StuckQueue.config[:abort_on_exception]
36
+
37
+ ops = []
38
+ ops << Thread.new { Resque::StuckQueue.start }
39
+ ops << Thread.new { sleep secs; Resque::StuckQueue.stop }
40
+ ops.map(&:join)
41
+
42
+ ensure
43
+ Thread.abort_on_exception = abort_or_not
44
+ Resque::StuckQueue.force_stop!
45
+ end
46
+
47
+ end
48
+
49
+ # http://stackoverflow.com/questions/9346101/how-to-get-stack-trace-from-a-testunittestcase
50
+ def MiniTest.filter_backtrace(bt)
51
+ bt
52
+ end
53
+
54
+ # hax ensure previous test runs that raised didn't leave a resque process runing beforehand
55
+ unless @before_all_hax_kill_resque
56
+ TestHelper.hax_kill_resque && @before_all_hax_kill_resque=true
57
+ end
@@ -0,0 +1,172 @@
1
+ require 'minitest'
2
+ require "minitest/autorun"
3
+ require 'pry'
4
+
5
+
6
+ $:.unshift(".")
7
+ require 'resque_stuck_queue'
8
+ require File.join(File.expand_path(File.dirname(__FILE__)), "resque", "set_redis_key")
9
+ require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
10
+
11
+ class TestIntegration < Minitest::Test
12
+
13
+ include TestHelper
14
+
15
+ # UBER HAXING no after(:all) or before(:all)
16
+ class << self
17
+ def tests_running?
18
+ test_count = public_instance_methods.select{|m| m.to_s.match(/^test_/)}.size
19
+ true if tests_ran != test_count
20
+ end
21
+
22
+ def tests_done?
23
+ !tests_running?
24
+ end
25
+
26
+ attr_accessor :tests_ran, :resque_pid
27
+ def tests_ran
28
+ @tests_ran ||= 0
29
+ end
30
+
31
+ def run_resque_before_all
32
+ return if @running_resque
33
+ @running_resque = true
34
+
35
+ @resque_pid = TestHelper.run_resque
36
+ end
37
+ end
38
+
39
+ def setup
40
+ Resque::StuckQueue.config[:redis] = Redis.new
41
+ Resque::StuckQueue.redis.flushall
42
+ Resque::StuckQueue.config[:watcher_interval] = 1
43
+ Resque::StuckQueue.config[:abort_on_exception] = true
44
+ self.class.run_resque_before_all
45
+ self.class.tests_ran += 1
46
+ end
47
+
48
+ def teardown
49
+ Resque::StuckQueue.reset!
50
+ if self.class.tests_done?
51
+ hax_kill_resque
52
+ Process.waitall
53
+ end
54
+ end
55
+
56
+ def test_resque_enqueues_a_job_does_not_trigger
57
+ puts "#{__method__}"
58
+
59
+ with_no_resque_failures do
60
+ Resque::StuckQueue.config[:trigger_timeout] = 10
61
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
62
+ Resque::StuckQueue.config[:redis] = Redis.new
63
+
64
+ @triggered = false
65
+ Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
66
+ start_and_stop_loops_after(5)
67
+ sleep 3 # job ran successfully, so don't trigger
68
+ assert_equal @triggered, false
69
+ end
70
+ end
71
+
72
+ # warn_interval #0
73
+ def test_resque_does_not_enqueues_a_job_does_trigger_once_with_no_warn_interval
74
+ puts "#{__method__}"
75
+
76
+ with_no_resque_failures do
77
+ Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
78
+ Resque::StuckQueue.config[:trigger_timeout] = 2
79
+ Resque::StuckQueue.config[:watcher_interval] = 1
80
+ Resque::StuckQueue.config[:warn_interval] = nil
81
+ Resque::StuckQueue.config[:redis] = Redis.new
82
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
83
+
84
+ start_and_stop_loops_after(5)
85
+ # check handler did get called once as there is no warn_interval
86
+ assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 1
87
+ end
88
+ end
89
+
90
+
91
+ # warn_interval #1
92
+ def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval
93
+ puts "#{__method__}"
94
+
95
+ with_no_resque_failures do
96
+ Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
97
+ Resque::StuckQueue.config[:trigger_timeout] = 2
98
+ Resque::StuckQueue.config[:watcher_interval] = 1
99
+ Resque::StuckQueue.config[:warn_interval] = 1
100
+ Resque::StuckQueue.config[:redis] = Redis.new
101
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
102
+
103
+ start_and_stop_loops_after(5)
104
+ # check handler did get called multiple times due to warn_interval
105
+ assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 3
106
+ end
107
+ end
108
+
109
+ # warn_interval #2
110
+ def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval_stops_on_recover
111
+ puts "#{__method__}"
112
+
113
+ with_no_resque_failures do
114
+ Resque::StuckQueue.config[:heartbeat_interval] = 2 # so we trigger, and recover in this timeframe
115
+ Resque::StuckQueue.config[:trigger_timeout] = 2
116
+ Resque::StuckQueue.config[:watcher_interval] = 1
117
+ Resque::StuckQueue.config[:warn_interval] = 1
118
+ Resque::StuckQueue.config[:redis] = Redis.new
119
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
120
+
121
+ @recovered = false
122
+ Resque::StuckQueue.config[:recovered_handler] = proc { @recovered = true }
123
+
124
+ start_and_stop_loops_after(5)
125
+
126
+ assert @recovered, "resque should have picked up heartbeat job after 2 seconds"
127
+
128
+ # check handler did get called multiple times due to warn_interval but less than previous test because recover
129
+ assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 2
130
+ end
131
+ end
132
+
133
+ def test_resque_does_not_enqueues_a_job_does_trigger
134
+ puts "#{__method__}"
135
+
136
+ with_no_resque_failures do
137
+ Resque::StuckQueue.config[:trigger_timeout] = 0
138
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
139
+ Resque::StuckQueue.config[:redis] = Redis.new
140
+
141
+ @triggered = false
142
+ Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
143
+ start_and_stop_loops_after(2)
144
+ # check handler did get called
145
+ assert_equal @triggered, true
146
+ end
147
+ end
148
+
149
+ def test_has_settable_custom_hearbeat_job
150
+ puts "#{__method__}"
151
+
152
+ with_no_resque_failures do
153
+ Resque::StuckQueue.config[:trigger_timeout] = 2 # won't allow waiting too much and will complain (eg trigger) sooner than later
154
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
155
+ Resque::StuckQueue.config[:redis] = Redis.new
156
+
157
+ begin
158
+ Resque::StuckQueue.config[:heartbeat_job] = proc { Resque.enqueue_to(:app, Resque::StuckQueue::HeartbeatJob, Resque::StuckQueue.heartbeat_key_for(:app)) }
159
+ @triggered = false
160
+ Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
161
+ start_and_stop_loops_after(4)
162
+
163
+ sleep 3 # allow trigger
164
+ assert true, "should not have raised"
165
+ assert @triggered, "should have triggered"
166
+ rescue => e
167
+ assert false, "should have succeeded with good refresh_job.\n #{e.inspect}"
168
+ end
169
+ end
170
+ end
171
+
172
+ end