resque_stuck_queue_revised 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ module Resque
2
+ module StuckQueue
3
+
4
+ require 'logger'
5
+ # defaults
6
+ HEARTBEAT_INTERVAL = 5 * 60 # send heartbeat job every 5 minutes
7
+ WATCHER_INTERVAL = 5 # check key is udpated every 5 seconds.
8
+
9
+ TRIGGER_TIMEOUT = 60 * 60 # trigger after an hour of lagtime.
10
+
11
+ # must be called by convention: type_handler
12
+ TRIGGERED_HANDLER = proc { |queue_name, lag| Resque::StuckQueue::LOGGER.info("Shit gone bad with them queues...on #{queue_name}. Lag time is #{lag}") }
13
+ RECOVERED_HANDLER = proc { |queue_name, lag| Resque::StuckQueue::LOGGER.info("recovered queue phew #{queue_name}. Lag time is #{lag}") }
14
+
15
+ LOGGER = Logger.new($stdout)
16
+ HEARTBEAT_KEY = "resque-stuck-queue"
17
+ TRIGGERED_KEY = "resque-stuck-queue-last-triggered"
18
+
19
+ class Config < Hash
20
+
21
+ OPTIONS_DESCRIPTIONS = {
22
+ :triggered_handler => "set to what gets triggered when resque-stuck-queue will detect the latest heartbeat is older than the trigger_timeout time setting.\n\tExample:\n\tResque::StuckQueue.config[:triggered_handler] = proc { |queue_name, lagtime| send_email('queue \#{queue_name} isnt working, aaah the daemons') }",
23
+ :recovered_handler => "set to what gets triggered when resque-stuck-queue has triggered a problem, but then detects the queue went back down to functioning well again(it wont trigger again until it has recovered).\n\tExample:\n\tResque::StuckQueue.config[:recovered_handler] = proc { |queue_name, lagtime| send_email('phew, queue \#{queue_name} is ok') }",
24
+ :heartbeat_interval => "set to how often to push the 'heartbeat' job which will refresh the latest working time.\n\tExample:\n\tResque::StuckQueue.config[:heartbeat_interval] = 5.minutes",
25
+ :watcher_interval => "set to how often to check to see when the last time it worked was.\n\tExample:\n\tResque::StuckQueue.config[:watcher_interval] = 1.minute",
26
+ :trigger_timeout => "set to how much of a resque work lag you are willing to accept before being notified. note: take the :watcher_interval setting into account when setting this timeout.\n\tExample:\n\tResque::StuckQueue.config[:trigger_timeout] = 9.minutes",
27
+ :warn_interval => "optional: if set, it will continiously trigger/warn in spaces of this interval after first trigger. eg, as long as lagtime keeps on being above trigger_timeout/recover hasn't occured yet.",
28
+ :redis => "set the Redis StuckQueue will use. Either a Redis or Redis::Namespace instance.",
29
+ :heartbeat_key => "optional, name of keys to keep track of the last good resque heartbeat time",
30
+ :triggered_key => "optional, name of keys to keep track of the last trigger time",
31
+ :logger => "optional, pass a Logger. Default a ruby logger will be instantiated. Needs to respond to that interface.",
32
+ :queues => "optional, monitor specific queues you want to send a heartbeat/monitor to. default is [:app]",
33
+ :abort_on_exception => "optional, if you want the resque-stuck-queue threads to explicitly raise, default is true",
34
+ :heartbeat_job => "optional, your own custom refreshing job. if you are using something other than resque",
35
+ :enable_signals => "optional, allow resque::stuck's signal_handlers which do mostly nothing at this point. possible future plan: log info, reopen log file, etc.",
36
+ }
37
+
38
+ OPTIONS = OPTIONS_DESCRIPTIONS.keys
39
+
40
+ def []=(k,v)
41
+ validate_key_exists!(k)
42
+ super(k,v)
43
+ end
44
+
45
+ def [](k)
46
+ validate_key_exists!(k)
47
+ super(k)
48
+ end
49
+
50
+ REQUIRED_KEYS = [:redis]
51
+ def validate_required_keys!
52
+ REQUIRED_KEYS.each do |k|
53
+ if self[k].nil?
54
+ raise NoConfigError, "You must set config[:#{k}]"
55
+ end
56
+ end
57
+ end
58
+
59
+ class NoConfigError < StandardError; end
60
+
61
+ def validate_key_exists!(k)
62
+ if !OPTIONS.include?(k)
63
+ raise NoConfigError, "no such config key #{k} exists!"
64
+ end
65
+ end
66
+
67
+ def description_for(k)
68
+ OPTIONS_DESCRIPTIONS[k.to_sym]
69
+ end
70
+
71
+ def pretty_descriptions
72
+ out = "\n"
73
+ OPTIONS_DESCRIPTIONS.map{|key,msg|
74
+ out << "#{key}:\n\t#{msg}\n\n"
75
+ }
76
+ out
77
+ end
78
+
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,19 @@
1
+ module Resque
2
+ module StuckQueue
3
+ class HeartbeatJob
4
+ class << self
5
+
6
+ attr_accessor :redis
7
+
8
+ def perform(*args)
9
+ keyname,host,port,namespace,new_time = *args
10
+ # if set by config[:redis] earlier before loading this lib.
11
+ @redis = Resque::StuckQueue.redis || Redis::Namespace.new(namespace, :redis => Redis.new(:host => host, :port => port))
12
+ @redis.set(keyname, new_time)
13
+ Resque::StuckQueue.logger.info "successfully updated key #{keyname} to #{new_time} at #{Time.now} for #{@redis.inspect}"
14
+ end
15
+
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,5 @@
1
+ module Resque
2
+ module StuckQueue
3
+ VERSION = "0.5.1"
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'resque_stuck_queue/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "resque_stuck_queue_revised"
8
+ spec.version = Resque::StuckQueue::VERSION
9
+ spec.authors = ["Dave Kerr"]
10
+ spec.email = ["davek09@gmail.com"]
11
+ spec.summary = %q{fire a handler when your queues are wonky}
12
+ spec.description = %q{where the wild things are. err, when resque gets stuck}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "redis-mutex" # TODO rm this
22
+
23
+ spec.add_runtime_dependency "redis-namespace"
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.5"
26
+ spec.add_development_dependency "rake"
27
+ end
@@ -0,0 +1,9 @@
1
+ # fixture job
2
+ class SetRedisKey
3
+ NAME = "integration_test"
4
+ @queue = :app
5
+ def self.perform
6
+ # tests run on localhost
7
+ Redis.new.set(NAME, "1")
8
+ end
9
+ end
@@ -0,0 +1,47 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
2
+
3
+ class TestCollision < Minitest::Test
4
+
5
+ include TestHelper
6
+
7
+ def setup
8
+ Resque::StuckQueue.config[:redis] = Redis.new
9
+ Resque::StuckQueue.config[:watcher_interval] = 1
10
+ Resque::StuckQueue.redis.flushall
11
+ end
12
+
13
+ def test_two_processes_interacting
14
+ puts "#{__method__}"
15
+ # no resque should be running here so timeouts will be reached + trigger
16
+ Resque::StuckQueue.redis.del("test-incr-key")
17
+
18
+ p1 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
19
+ p2 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
20
+ p3 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
21
+ p4 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon; }
22
+
23
+ Thread.new {
24
+ sleep 5 # let test run and trigger once occur (according to time below)
25
+ `kill -9 #{p1}`
26
+ `kill -9 #{p2}`
27
+ `kill -9 #{p3}`
28
+ `kill -9 #{p4}`
29
+ Process.waitpid # reap
30
+ }
31
+
32
+ Process.waitall
33
+
34
+ assert_equal 1, Resque::StuckQueue.redis.get("test-incr-key").to_i
35
+ end
36
+
37
+ private
38
+
39
+ def run_resque_stuck_daemon
40
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
41
+ Resque::StuckQueue.config[:abort_on_exception] = true
42
+ Resque::StuckQueue.config[:trigger_timeout] = 3
43
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test-incr-key") }
44
+ Resque::StuckQueue.start
45
+ end
46
+
47
+ end
@@ -0,0 +1,67 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
2
+
3
+ class TestConfig < Minitest::Test
4
+
5
+ include TestHelper
6
+
7
+ def setup
8
+ Resque::StuckQueue.config[:watcher_interval] = 1
9
+ Resque::StuckQueue.config[:trigger_timeout] = 1
10
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
11
+ Resque::StuckQueue.config[:abort_on_exception] = true
12
+ Resque::StuckQueue.config[:redis] = Redis.new
13
+ end
14
+
15
+ def teardown
16
+ Resque::StuckQueue.reset!
17
+ end
18
+
19
+ def test_config_has_descriptions
20
+ c = Resque::StuckQueue::Config.new
21
+ assert c.description_for(:logger) =~ /Logger/, "has descriptions"
22
+ end
23
+
24
+ def test_outputs_all_config_options
25
+ c = Resque::StuckQueue::Config.new
26
+ puts c.pretty_descriptions
27
+ assert true
28
+ end
29
+
30
+ def test_has_logger
31
+ puts "#{__method__}"
32
+ begin
33
+ Resque::StuckQueue.config[:logger] = Logger.new($stdout)
34
+ start_and_stop_loops_after(1)
35
+ assert true, "should not have raised"
36
+ rescue => e
37
+ assert false, "should have succeeded with good logger: #{e.inspect}\n#{e.backtrace.join("\n")}"
38
+ end
39
+ end
40
+
41
+ def test_must_set_redis
42
+ puts "#{__method__}"
43
+ Resque::StuckQueue.config[:redis] = nil
44
+ begin
45
+ start_and_stop_loops_after(1)
46
+ assert false, "redis cannot be nil"
47
+ rescue Resque::StuckQueue::Config::NoConfigError => e
48
+ assert true, "redis cannot be nil: #{e.inspect}\n#{e.backtrace.join("\n")}"
49
+ end
50
+ end
51
+
52
+ #def test_can_have_signals
53
+ #puts "#{__method__}"
54
+ #begin
55
+ #assert_equal ENV['SIGUSR1'], nil
56
+ #Resque::StuckQueue.config[:enable_signals] = true
57
+ #start_and_stop_loops_after(1)
58
+ #Process.kill "SIGUSR1", Process.pid
59
+ #assert_equal ENV['SIGUSR1'], "done be had"
60
+ #rescue => e
61
+ #assert false, "should have succeeded with signal handlers: #{e.inspect}\n#{e.backtrace.join("\n")}"
62
+ #end
63
+ #end
64
+
65
+ end
66
+
67
+
@@ -0,0 +1,57 @@
1
+ require 'minitest'
2
+ require "minitest/autorun"
3
+ require 'pry'
4
+ require 'mocha'
5
+ require "minitest/unit"
6
+ require "mocha/mini_test"
7
+ $:.unshift(".")
8
+ require 'resque_stuck_queue'
9
+ require File.join(File.expand_path(File.dirname(__FILE__)), "resque", "set_redis_key")
10
+
11
+ module TestHelper
12
+
13
+ extend self
14
+
15
+ def run_resque(queue_name = "*")
16
+ pid = fork { exec("export INTERVAL=1 QUEUE=#{queue_name}; bundle exec rake --trace resque:work") }
17
+ sleep 3 # wait for resque to boot up
18
+ pid
19
+ end
20
+
21
+ def with_no_resque_failures(&blk)
22
+ Resque::Failure.clear
23
+ blk.call
24
+ assert_nil Resque::Failure.all, "Resque hearbeat job cant fail: #{Resque::Failure.all.inspect}"
25
+ end
26
+
27
+ def hax_kill_resque
28
+ # ugly, FIXME how to get pid of forked forked process. run_resque pid is incorrect.
29
+ `ps aux |grep -E 'resque.*(Waiting|Forked|Processing)'| grep -v grep | awk '{print $2}' |xargs kill`
30
+ sleep 2 # wait for shutdown
31
+ end
32
+
33
+ def start_and_stop_loops_after(secs)
34
+ abort_or_not = Thread.abort_on_exception
35
+ Thread.abort_on_exception = Resque::StuckQueue.config[:abort_on_exception]
36
+
37
+ ops = []
38
+ ops << Thread.new { Resque::StuckQueue.start }
39
+ ops << Thread.new { sleep secs; Resque::StuckQueue.stop }
40
+ ops.map(&:join)
41
+
42
+ ensure
43
+ Thread.abort_on_exception = abort_or_not
44
+ Resque::StuckQueue.force_stop!
45
+ end
46
+
47
+ end
48
+
49
+ # http://stackoverflow.com/questions/9346101/how-to-get-stack-trace-from-a-testunittestcase
50
+ def MiniTest.filter_backtrace(bt)
51
+ bt
52
+ end
53
+
54
+ # hax ensure previous test runs that raised didn't leave a resque process runing beforehand
55
+ unless @before_all_hax_kill_resque
56
+ TestHelper.hax_kill_resque && @before_all_hax_kill_resque=true
57
+ end
@@ -0,0 +1,172 @@
1
+ require 'minitest'
2
+ require "minitest/autorun"
3
+ require 'pry'
4
+
5
+
6
+ $:.unshift(".")
7
+ require 'resque_stuck_queue'
8
+ require File.join(File.expand_path(File.dirname(__FILE__)), "resque", "set_redis_key")
9
+ require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
10
+
11
+ class TestIntegration < Minitest::Test
12
+
13
+ include TestHelper
14
+
15
+ # UBER HAXING no after(:all) or before(:all)
16
+ class << self
17
+ def tests_running?
18
+ test_count = public_instance_methods.select{|m| m.to_s.match(/^test_/)}.size
19
+ true if tests_ran != test_count
20
+ end
21
+
22
+ def tests_done?
23
+ !tests_running?
24
+ end
25
+
26
+ attr_accessor :tests_ran, :resque_pid
27
+ def tests_ran
28
+ @tests_ran ||= 0
29
+ end
30
+
31
+ def run_resque_before_all
32
+ return if @running_resque
33
+ @running_resque = true
34
+
35
+ @resque_pid = TestHelper.run_resque
36
+ end
37
+ end
38
+
39
+ def setup
40
+ Resque::StuckQueue.config[:redis] = Redis.new
41
+ Resque::StuckQueue.redis.flushall
42
+ Resque::StuckQueue.config[:watcher_interval] = 1
43
+ Resque::StuckQueue.config[:abort_on_exception] = true
44
+ self.class.run_resque_before_all
45
+ self.class.tests_ran += 1
46
+ end
47
+
48
+ def teardown
49
+ Resque::StuckQueue.reset!
50
+ if self.class.tests_done?
51
+ hax_kill_resque
52
+ Process.waitall
53
+ end
54
+ end
55
+
56
+ def test_resque_enqueues_a_job_does_not_trigger
57
+ puts "#{__method__}"
58
+
59
+ with_no_resque_failures do
60
+ Resque::StuckQueue.config[:trigger_timeout] = 10
61
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
62
+ Resque::StuckQueue.config[:redis] = Redis.new
63
+
64
+ @triggered = false
65
+ Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
66
+ start_and_stop_loops_after(5)
67
+ sleep 3 # job ran successfully, so don't trigger
68
+ assert_equal @triggered, false
69
+ end
70
+ end
71
+
72
+ # warn_interval #0
73
+ def test_resque_does_not_enqueues_a_job_does_trigger_once_with_no_warn_interval
74
+ puts "#{__method__}"
75
+
76
+ with_no_resque_failures do
77
+ Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
78
+ Resque::StuckQueue.config[:trigger_timeout] = 2
79
+ Resque::StuckQueue.config[:watcher_interval] = 1
80
+ Resque::StuckQueue.config[:warn_interval] = nil
81
+ Resque::StuckQueue.config[:redis] = Redis.new
82
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
83
+
84
+ start_and_stop_loops_after(5)
85
+ # check handler did get called once as there is no warn_interval
86
+ assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 1
87
+ end
88
+ end
89
+
90
+
91
+ # warn_interval #1
92
+ def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval
93
+ puts "#{__method__}"
94
+
95
+ with_no_resque_failures do
96
+ Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
97
+ Resque::StuckQueue.config[:trigger_timeout] = 2
98
+ Resque::StuckQueue.config[:watcher_interval] = 1
99
+ Resque::StuckQueue.config[:warn_interval] = 1
100
+ Resque::StuckQueue.config[:redis] = Redis.new
101
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
102
+
103
+ start_and_stop_loops_after(5)
104
+ # check handler did get called multiple times due to warn_interval
105
+ assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 3
106
+ end
107
+ end
108
+
109
+ # warn_interval #2
110
+ def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval_stops_on_recover
111
+ puts "#{__method__}"
112
+
113
+ with_no_resque_failures do
114
+ Resque::StuckQueue.config[:heartbeat_interval] = 2 # so we trigger, and recover in this timeframe
115
+ Resque::StuckQueue.config[:trigger_timeout] = 2
116
+ Resque::StuckQueue.config[:watcher_interval] = 1
117
+ Resque::StuckQueue.config[:warn_interval] = 1
118
+ Resque::StuckQueue.config[:redis] = Redis.new
119
+ Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
120
+
121
+ @recovered = false
122
+ Resque::StuckQueue.config[:recovered_handler] = proc { @recovered = true }
123
+
124
+ start_and_stop_loops_after(5)
125
+
126
+ assert @recovered, "resque should have picked up heartbeat job after 2 seconds"
127
+
128
+ # check handler did get called multiple times due to warn_interval but less than previous test because recover
129
+ assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 2
130
+ end
131
+ end
132
+
133
+ def test_resque_does_not_enqueues_a_job_does_trigger
134
+ puts "#{__method__}"
135
+
136
+ with_no_resque_failures do
137
+ Resque::StuckQueue.config[:trigger_timeout] = 0
138
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
139
+ Resque::StuckQueue.config[:redis] = Redis.new
140
+
141
+ @triggered = false
142
+ Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
143
+ start_and_stop_loops_after(2)
144
+ # check handler did get called
145
+ assert_equal @triggered, true
146
+ end
147
+ end
148
+
149
+ def test_has_settable_custom_hearbeat_job
150
+ puts "#{__method__}"
151
+
152
+ with_no_resque_failures do
153
+ Resque::StuckQueue.config[:trigger_timeout] = 2 # won't allow waiting too much and will complain (eg trigger) sooner than later
154
+ Resque::StuckQueue.config[:heartbeat_interval] = 1
155
+ Resque::StuckQueue.config[:redis] = Redis.new
156
+
157
+ begin
158
+ Resque::StuckQueue.config[:heartbeat_job] = proc { Resque.enqueue_to(:app, Resque::StuckQueue::HeartbeatJob, Resque::StuckQueue.heartbeat_key_for(:app)) }
159
+ @triggered = false
160
+ Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
161
+ start_and_stop_loops_after(4)
162
+
163
+ sleep 3 # allow trigger
164
+ assert true, "should not have raised"
165
+ assert @triggered, "should have triggered"
166
+ rescue => e
167
+ assert false, "should have succeeded with good refresh_job.\n #{e.inspect}"
168
+ end
169
+ end
170
+ end
171
+
172
+ end