resque_stuck_queue 0.4.4 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/THOUGHTS +2 -2
- data/lib/resque_stuck_queue/config.rb +2 -1
- data/lib/resque_stuck_queue/version.rb +1 -1
- data/lib/resque_stuck_queue.rb +63 -33
- data/test/test_helper.rb +1 -1
- data/test/test_integration.rb +61 -0
- metadata +1 -3
- data/test/test_resque_2.rb +0 -45
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: b23678d316199cc108bef89177912ff5eb396d89
|
4
|
-
data.tar.gz: 055f4654e2f966a573fd398f2f55cbc56b21b766
|
5
2
|
SHA512:
|
6
|
-
|
7
|
-
|
3
|
+
data.tar.gz: c007b83586030670c60f58d0a5e578a058a49f54588edba34367605afac5906bea2671f589a1f03f2617329e134bc3e34ea3a6959aec95920bf2b01c63be38c1
|
4
|
+
metadata.gz: 86e182c1fd44835e6b899fbd21762ebeb8eeb5ba5b09d233cef6203e130468bf8cca071dc3bf9c0d6ce86c55e3b9fb77ce3816b261f36f40652cb71ff398669f
|
5
|
+
SHA1:
|
6
|
+
data.tar.gz: 17d8ce55b4fb7e5bb071eada32ccb76060ccc7e0
|
7
|
+
metadata.gz: a3bed0e656224a0df49636c93e8344374dd612f4
|
data/THOUGHTS
CHANGED
@@ -3,5 +3,5 @@
|
|
3
3
|
rm redis locking (since it works by keys now, no need for it, recover/trigger ping pong).
|
4
4
|
rm require resque?
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
refactor tests to have an around(:suite) to run with resque beforehand (no startup time) and just run test_integration.rb
|
7
|
+
(& compact dup tests etc)
|
@@ -6,7 +6,7 @@ module Resque
|
|
6
6
|
HEARTBEAT_INTERVAL = 5 * 60 # send heartbeat job every 5 minutes
|
7
7
|
WATCHER_INTERVAL = 5 # check key is udpated every 5 seconds.
|
8
8
|
|
9
|
-
TRIGGER_TIMEOUT = 60 * 60 #
|
9
|
+
TRIGGER_TIMEOUT = 60 * 60 # trigger after an hour of lagtime.
|
10
10
|
|
11
11
|
# must be called by convention: type_handler
|
12
12
|
TRIGGERED_HANDLER = proc { |queue_name, lag| Resque::StuckQueue::LOGGER.info("Shit gone bad with them queues...on #{queue_name}. Lag time is #{lag}") }
|
@@ -24,6 +24,7 @@ module Resque
|
|
24
24
|
:heartbeat_interval => "set to how often to push the 'heartbeat' job which will refresh the latest working time.\n\tExample:\n\tResque::StuckQueue.config[:heartbeat_interval] = 5.minutes",
|
25
25
|
:watcher_interval => "set to how often to check to see when the last time it worked was.\n\tExample:\n\tResque::StuckQueue.config[:watcher_interval] = 1.minute",
|
26
26
|
:trigger_timeout => "set to how much of a resque work lag you are willing to accept before being notified. note: take the :watcher_interval setting into account when setting this timeout.\n\tExample:\n\tResque::StuckQueue.config[:trigger_timeout] = 9.minutes",
|
27
|
+
:warn_interval => "optional: if set, it will continiously trigger/warn in spaces of this interval after first trigger. eg, as long as lagtime keeps on being above trigger_timeout/recover hasn't occured yet.",
|
27
28
|
:redis => "set the Redis StuckQueue will use. Either a Redis or Redis::Namespace instance.",
|
28
29
|
:heartbeat_key => "optional, name of keys to keep track of the last good resque heartbeat time",
|
29
30
|
:triggered_key => "optional, name of keys to keep track of the last trigger time",
|
data/lib/resque_stuck_queue.rb
CHANGED
@@ -90,6 +90,8 @@ module Resque
|
|
90
90
|
setup_heartbeat_thread
|
91
91
|
setup_watcher_thread
|
92
92
|
|
93
|
+
setup_warn_thread
|
94
|
+
|
93
95
|
# fo-eva.
|
94
96
|
@threads.map(&:join)
|
95
97
|
|
@@ -158,24 +160,70 @@ module Resque
|
|
158
160
|
|
159
161
|
private
|
160
162
|
|
163
|
+
def log_starting_thread(type)
|
164
|
+
interval_keyname = "#{type}_interval".to_sym
|
165
|
+
logger.info("Starting #{type} thread with interval of #{config[interval_keyname]} seconds")
|
166
|
+
end
|
167
|
+
|
161
168
|
def read_from_redis(keyname)
|
162
169
|
redis.get(keyname)
|
163
170
|
end
|
164
171
|
|
172
|
+
def setup_watcher_thread
|
173
|
+
@threads << Thread.new do
|
174
|
+
Thread.current.abort_on_exception = abort_on_exception
|
175
|
+
log_starting_thread(:watcher)
|
176
|
+
while @running
|
177
|
+
mutex = Redis::Mutex.new('resque_stuck_queue_lock', block: 0)
|
178
|
+
if mutex.lock
|
179
|
+
begin
|
180
|
+
queues.each do |queue_name|
|
181
|
+
log_watcher_info(queue_name)
|
182
|
+
if should_trigger?(queue_name)
|
183
|
+
trigger_handler(queue_name, :triggered)
|
184
|
+
elsif should_recover?(queue_name)
|
185
|
+
trigger_handler(queue_name, :recovered)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
ensure
|
189
|
+
mutex.unlock
|
190
|
+
end
|
191
|
+
end
|
192
|
+
wait_for_it(:watcher_interval)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
165
197
|
def setup_heartbeat_thread
|
166
198
|
@threads << Thread.new do
|
167
199
|
Thread.current.abort_on_exception = abort_on_exception
|
168
|
-
|
200
|
+
log_starting_thread(:heartbeat)
|
169
201
|
while @running
|
170
202
|
# we want to go through resque jobs, because that's what we're trying to test here:
|
171
203
|
# ensure that jobs get executed and the time is updated!
|
204
|
+
wait_for_it(:heartbeat_interval)
|
172
205
|
logger.info("Sending heartbeat jobs")
|
173
206
|
enqueue_jobs
|
174
|
-
wait_for_it(:heartbeat_interval)
|
175
207
|
end
|
176
208
|
end
|
177
209
|
end
|
178
210
|
|
211
|
+
def setup_warn_thread
|
212
|
+
if config[:warn_interval]
|
213
|
+
@threads << Thread.new do
|
214
|
+
Thread.current.abort_on_exception = abort_on_exception
|
215
|
+
log_starting_thread(:warn)
|
216
|
+
while @running
|
217
|
+
queues.each do |qn|
|
218
|
+
trigger_handler(qn, :triggered) if should_trigger?(qn, true)
|
219
|
+
end
|
220
|
+
wait_for_it(:warn_interval)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
|
179
227
|
def enqueue_jobs
|
180
228
|
if config[:heartbeat_job]
|
181
229
|
# FIXME config[:heartbeat_job] with mutliple queues is bad semantics
|
@@ -189,31 +237,6 @@ module Resque
|
|
189
237
|
end
|
190
238
|
end
|
191
239
|
|
192
|
-
def setup_watcher_thread
|
193
|
-
@threads << Thread.new do
|
194
|
-
Thread.current.abort_on_exception = abort_on_exception
|
195
|
-
logger.info("Starting watcher thread")
|
196
|
-
while @running
|
197
|
-
mutex = Redis::Mutex.new('resque_stuck_queue_lock', block: 0)
|
198
|
-
if mutex.lock
|
199
|
-
begin
|
200
|
-
queues.each do |queue_name|
|
201
|
-
log_watcher_info(queue_name)
|
202
|
-
if should_trigger?(queue_name)
|
203
|
-
trigger_handler(queue_name, :triggered)
|
204
|
-
elsif should_recover?(queue_name)
|
205
|
-
trigger_handler(queue_name, :recovered)
|
206
|
-
end
|
207
|
-
end
|
208
|
-
ensure
|
209
|
-
mutex.unlock
|
210
|
-
end
|
211
|
-
end
|
212
|
-
wait_for_it(:watcher_interval)
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
240
|
def last_successful_heartbeat(queue_name)
|
218
241
|
time_set = read_from_redis(heartbeat_key_for(queue_name))
|
219
242
|
if time_set
|
@@ -255,17 +278,22 @@ module Resque
|
|
255
278
|
lag_time(queue_name) < max_wait_time
|
256
279
|
end
|
257
280
|
|
258
|
-
def should_trigger?(queue_name)
|
281
|
+
def should_trigger?(queue_name, force_trigger = false)
|
259
282
|
if lag_time(queue_name) >= max_wait_time
|
260
283
|
last_trigger = last_triggered(queue_name)
|
261
284
|
|
285
|
+
if force_trigger
|
286
|
+
return true
|
287
|
+
end
|
288
|
+
|
262
289
|
if last_trigger.nil?
|
290
|
+
# if it hasn't been triggered before, do it
|
263
291
|
return true
|
264
|
-
else
|
265
|
-
# if it already triggered in the past and needs to re-trigger,
|
266
|
-
# :recovered should have cleared last_triggered out by then
|
267
|
-
return false
|
268
292
|
end
|
293
|
+
|
294
|
+
# if it already triggered in the past don't trigger again.
|
295
|
+
# :recovered should clearn out last_triggered so the cycle (trigger<->recover) continues
|
296
|
+
return false
|
269
297
|
end
|
270
298
|
end
|
271
299
|
|
@@ -274,8 +302,10 @@ module Resque
|
|
274
302
|
sleep config[:heartbeat_interval] || HEARTBEAT_INTERVAL
|
275
303
|
elsif type == :watcher_interval
|
276
304
|
sleep config[:watcher_interval] || WATCHER_INTERVAL
|
305
|
+
elsif type == :warn_interval
|
306
|
+
sleep config[:warn_interval]
|
277
307
|
else
|
278
|
-
raise 'Must sleep for :watcher_interval interval or :heartbeat_interval interval!'
|
308
|
+
raise 'Must sleep for :watcher_interval interval or :heartbeat_interval or :warn_interval interval!'
|
279
309
|
end
|
280
310
|
end
|
281
311
|
|
data/test/test_helper.rb
CHANGED
@@ -26,7 +26,7 @@ module TestHelper
|
|
26
26
|
|
27
27
|
def hax_kill_resque
|
28
28
|
# ugly, FIXME how to get pid of forked forked process. run_resque pid is incorrect.
|
29
|
-
`ps aux |grep resque
|
29
|
+
`ps aux |grep -E 'resque.*(Waiting|Forked|Processing)'| grep -v grep | awk '{print $2}' |xargs kill`
|
30
30
|
sleep 2 # wait for shutdown
|
31
31
|
end
|
32
32
|
|
data/test/test_integration.rb
CHANGED
@@ -69,6 +69,67 @@ class TestIntegration < Minitest::Test
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
+
# warn_interval #0
|
73
|
+
def test_resque_does_not_enqueues_a_job_does_trigger_once_with_no_warn_interval
|
74
|
+
puts "#{__method__}"
|
75
|
+
|
76
|
+
with_no_resque_failures do
|
77
|
+
Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
|
78
|
+
Resque::StuckQueue.config[:trigger_timeout] = 2
|
79
|
+
Resque::StuckQueue.config[:watcher_interval] = 1
|
80
|
+
Resque::StuckQueue.config[:warn_interval] = nil
|
81
|
+
Resque::StuckQueue.config[:redis] = Redis.new
|
82
|
+
Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
|
83
|
+
|
84
|
+
start_and_stop_loops_after(5)
|
85
|
+
# check handler did get called once as there is no warn_interval
|
86
|
+
assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 1
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
# warn_interval #1
|
92
|
+
def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval
|
93
|
+
puts "#{__method__}"
|
94
|
+
|
95
|
+
with_no_resque_failures do
|
96
|
+
Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
|
97
|
+
Resque::StuckQueue.config[:trigger_timeout] = 2
|
98
|
+
Resque::StuckQueue.config[:watcher_interval] = 1
|
99
|
+
Resque::StuckQueue.config[:warn_interval] = 1
|
100
|
+
Resque::StuckQueue.config[:redis] = Redis.new
|
101
|
+
Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
|
102
|
+
|
103
|
+
start_and_stop_loops_after(5)
|
104
|
+
# check handler did get called multiple times due to warn_interval
|
105
|
+
assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 3
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# warn_interval #2
|
110
|
+
def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval_stops_on_recover
|
111
|
+
puts "#{__method__}"
|
112
|
+
|
113
|
+
with_no_resque_failures do
|
114
|
+
Resque::StuckQueue.config[:heartbeat_interval] = 2 # so we trigger, and recover in this timeframe
|
115
|
+
Resque::StuckQueue.config[:trigger_timeout] = 2
|
116
|
+
Resque::StuckQueue.config[:watcher_interval] = 1
|
117
|
+
Resque::StuckQueue.config[:warn_interval] = 1
|
118
|
+
Resque::StuckQueue.config[:redis] = Redis.new
|
119
|
+
Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
|
120
|
+
|
121
|
+
@recovered = false
|
122
|
+
Resque::StuckQueue.config[:recovered_handler] = proc { @recovered = true }
|
123
|
+
|
124
|
+
start_and_stop_loops_after(5)
|
125
|
+
|
126
|
+
assert @recovered, "resque should have picked up heartbeat job after 2 seconds"
|
127
|
+
|
128
|
+
# check handler did get called multiple times due to warn_interval but less than previous test because recover
|
129
|
+
assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 2
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
72
133
|
def test_resque_does_not_enqueues_a_job_does_trigger
|
73
134
|
puts "#{__method__}"
|
74
135
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: resque_stuck_queue
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shai Rosenfeld
|
@@ -78,7 +78,6 @@ files:
|
|
78
78
|
- test/test_integration.rb
|
79
79
|
- test/test_lagtime.rb
|
80
80
|
- test/test_named_queues.rb
|
81
|
-
- test/test_resque_2.rb
|
82
81
|
- test/test_resque_stuck_queue.rb
|
83
82
|
- test/test_set_custom_refresh_job.rb
|
84
83
|
- test/test_ver_2.rb
|
@@ -113,7 +112,6 @@ test_files:
|
|
113
112
|
- test/test_integration.rb
|
114
113
|
- test/test_lagtime.rb
|
115
114
|
- test/test_named_queues.rb
|
116
|
-
- test/test_resque_2.rb
|
117
115
|
- test/test_resque_stuck_queue.rb
|
118
116
|
- test/test_set_custom_refresh_job.rb
|
119
117
|
- test/test_ver_2.rb
|
data/test/test_resque_2.rb
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
# run with
|
2
|
-
# $ RESQUE_2=1 bi; RESQUE_2=1 be ruby -I. -Ilib/ test/test_resque_2.rb
|
3
|
-
if !ENV['RESQUE_2'].nil?
|
4
|
-
|
5
|
-
require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
|
6
|
-
|
7
|
-
class TestResque2 < Minitest::Test
|
8
|
-
|
9
|
-
include TestHelper
|
10
|
-
|
11
|
-
def setup
|
12
|
-
assert (Resque::VERSION.match /^2\./), "must run in 2.0"
|
13
|
-
Resque.redis = Redis.new
|
14
|
-
Resque::StuckQueue.config[:redis] = Redis.new
|
15
|
-
Redis.new.flushall
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_works_with_2_point_oh_do_not_trigger_because_key_is_updated
|
19
|
-
|
20
|
-
Resque::StuckQueue.config[:redis] = Redis.new
|
21
|
-
|
22
|
-
Resque::StuckQueue.config[:watcher_interval] = 1
|
23
|
-
Resque::StuckQueue.config[:heartbeat_interval] = 1
|
24
|
-
Resque::StuckQueue.config[:abort_on_exception] = true
|
25
|
-
Resque::StuckQueue.config[:trigger_timeout] = 5
|
26
|
-
Resque::StuckQueue.config[:logger] = Logger.new($stdout)
|
27
|
-
Resque::StuckQueue.config[:triggered_handler] = proc { Redis.new.incr("test-incr-key") }
|
28
|
-
Resque::StuckQueue.config[:redis] = Redis.new
|
29
|
-
Resque::StuckQueue.config[:queues] = [:app]
|
30
|
-
|
31
|
-
#binding.pry
|
32
|
-
Resque::StuckQueue.start_in_background
|
33
|
-
|
34
|
-
@r2_pid = fork { Resque::StuckQueue.config[:redis] = Redis.new ; Resque::Worker.new("*", :graceful_term => true).work ; Process.waitall }
|
35
|
-
sleep 10
|
36
|
-
|
37
|
-
# triggers once
|
38
|
-
assert_equal Redis.new.get("test-incr-key").to_i, 0
|
39
|
-
hax_kill_resque
|
40
|
-
Resque::StuckQueue.force_stop!
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|