RubyGems - resque_stuck_queue_revised - Versions diffs - 0.5.1 - Mend

resque_stuck_queue_revised 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/.gitignore +2 -0
data/Gemfile +19 -0
data/LICENSE.txt +22 -0
data/README.md +199 -0
data/Rakefile +26 -0
data/THOUGHTS +9 -0
data/lib/resque/stuck_queue.rb +1 -0
data/lib/resque_stuck_queue.rb +320 -0
data/lib/resque_stuck_queue/config.rb +81 -0
data/lib/resque_stuck_queue/heartbeat_job.rb +19 -0
data/lib/resque_stuck_queue/version.rb +5 -0
data/resque_stuck_queue.gemspec +27 -0
data/test/resque/set_redis_key.rb +9 -0
data/test/test_collision.rb +47 -0
data/test/test_config.rb +67 -0
data/test/test_helper.rb +57 -0
data/test/test_integration.rb +172 -0
data/test/test_lagtime.rb +34 -0
data/test/test_named_queues.rb +96 -0
data/test/test_resque_stuck_queue.rb +58 -0
data/test/test_set_custom_refresh_job.rb +41 -0
data/test/test_ver_2.rb +45 -0
metadata +132 -0

data/lib/resque_stuck_queue/config.rb ADDED Viewed

@@ -0,0 +1,81 @@
+module Resque
+  module StuckQueue
+    require 'logger'
+    # defaults
+    HEARTBEAT_INTERVAL   = 5 * 60                   # send heartbeat job every 5 minutes
+    WATCHER_INTERVAL     = 5                        # check key is udpated every 5 seconds.
+    TRIGGER_TIMEOUT      = 60 * 60                  # trigger after an hour of lagtime.
+    # must be called by convention: type_handler
+    TRIGGERED_HANDLER    = proc { |queue_name, lag| Resque::StuckQueue::LOGGER.info("Shit gone bad with them queues...on #{queue_name}. Lag time is #{lag}") }
+    RECOVERED_HANDLER    = proc { |queue_name, lag| Resque::StuckQueue::LOGGER.info("recovered queue phew #{queue_name}. Lag time is #{lag}") }
+    LOGGER               = Logger.new($stdout)
+    HEARTBEAT_KEY        = "resque-stuck-queue"
+    TRIGGERED_KEY        = "resque-stuck-queue-last-triggered"
+    class Config < Hash
+      OPTIONS_DESCRIPTIONS = {
+        :triggered_handler  => "set to what gets triggered when resque-stuck-queue will detect the latest heartbeat is older than the trigger_timeout time setting.\n\tExample:\n\tResque::StuckQueue.config[:triggered_handler] = proc { |queue_name, lagtime| send_email('queue \#{queue_name} isnt working, aaah the daemons') }",
+        :recovered_handler  => "set to what gets triggered when resque-stuck-queue has triggered a problem, but then detects the queue went back down to functioning well again(it wont trigger again until it has recovered).\n\tExample:\n\tResque::StuckQueue.config[:recovered_handler] = proc { |queue_name, lagtime| send_email('phew, queue \#{queue_name} is ok') }",
+        :heartbeat_interval => "set to how often to push the 'heartbeat' job which will refresh the latest working time.\n\tExample:\n\tResque::StuckQueue.config[:heartbeat_interval] = 5.minutes",
+        :watcher_interval            => "set to how often to check to see when the last time it worked was.\n\tExample:\n\tResque::StuckQueue.config[:watcher_interval] = 1.minute",
+        :trigger_timeout    => "set to how much of a resque work lag you are willing to accept before being notified. note: take the :watcher_interval setting into account when setting this timeout.\n\tExample:\n\tResque::StuckQueue.config[:trigger_timeout] = 9.minutes",
+        :warn_interval      => "optional: if set, it will continiously trigger/warn in spaces of this interval after first trigger. eg, as long as lagtime keeps on being above trigger_timeout/recover hasn't occured yet.",
+        :redis              => "set the Redis StuckQueue will use. Either a Redis or Redis::Namespace instance.",
+        :heartbeat_key      => "optional, name of keys to keep track of the last good resque heartbeat time",
+        :triggered_key      => "optional, name of keys to keep track of the last trigger time",
+        :logger             => "optional, pass a Logger. Default a ruby logger will be instantiated. Needs to respond to that interface.",
+        :queues             => "optional, monitor specific queues you want to send a heartbeat/monitor to. default is [:app]",
+        :abort_on_exception => "optional, if you want the resque-stuck-queue threads to explicitly raise, default is true",
+        :heartbeat_job      => "optional, your own custom refreshing job. if you are using something other than resque",
+        :enable_signals     => "optional, allow resque::stuck's signal_handlers which do mostly nothing at this point. possible future plan: log info, reopen log file, etc.",
+      }
+      OPTIONS = OPTIONS_DESCRIPTIONS.keys
+      def []=(k,v)
+        validate_key_exists!(k)
+        super(k,v)
+      end
+      def [](k)
+        validate_key_exists!(k)
+        super(k)
+      end
+      REQUIRED_KEYS = [:redis]
+      def validate_required_keys!
+        REQUIRED_KEYS.each do |k|
+          if self[k].nil?
+            raise NoConfigError, "You must set config[:#{k}]"
+          end
+        end
+      end
+      class NoConfigError < StandardError; end
+      def validate_key_exists!(k)
+        if !OPTIONS.include?(k)
+          raise NoConfigError, "no such config key #{k} exists!"
+        end
+      end
+      def description_for(k)
+        OPTIONS_DESCRIPTIONS[k.to_sym]
+      end
+      def pretty_descriptions
+        out = "\n"
+        OPTIONS_DESCRIPTIONS.map{|key,msg|
+          out << "#{key}:\n\t#{msg}\n\n"
+        }
+        out
+      end
+    end
+  end
+end

data/lib/resque_stuck_queue/heartbeat_job.rb ADDED Viewed

@@ -0,0 +1,19 @@
+module Resque
+  module StuckQueue
+    class HeartbeatJob
+      class << self
+        attr_accessor :redis
+        def perform(*args)
+          keyname,host,port,namespace,new_time = *args
+          # if set by config[:redis] earlier before loading this lib.
+          @redis = Resque::StuckQueue.redis || Redis::Namespace.new(namespace, :redis => Redis.new(:host => host, :port => port))
+          @redis.set(keyname, new_time)
+          Resque::StuckQueue.logger.info "successfully updated key #{keyname} to #{new_time} at #{Time.now} for #{@redis.inspect}"
+        end
+      end
+    end
+  end
+end

data/lib/resque_stuck_queue/version.rb ADDED Viewed

@@ -0,0 +1,5 @@
+module Resque
+  module StuckQueue
+    VERSION = "0.5.1"
+  end
+end

data/resque_stuck_queue.gemspec ADDED Viewed

@@ -0,0 +1,27 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'resque_stuck_queue/version'
+Gem::Specification.new do |spec|
+  spec.name          = "resque_stuck_queue_revised"
+  spec.version       = Resque::StuckQueue::VERSION
+  spec.authors       = ["Dave Kerr"]
+  spec.email         = ["davek09@gmail.com"]
+  spec.summary       = %q{fire a handler when your queues are wonky}
+  spec.description   = %q{where the wild things are. err, when resque gets stuck}
+  spec.homepage      = ""
+  spec.license       = "MIT"
+  spec.files         = `git ls-files`.split($/)
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_runtime_dependency "redis-mutex" # TODO rm this
+  spec.add_runtime_dependency "redis-namespace"
+  spec.add_development_dependency "bundler", "~> 1.5"
+  spec.add_development_dependency "rake"
+end

data/test/resque/set_redis_key.rb ADDED Viewed

@@ -0,0 +1,9 @@
+# fixture job
+class SetRedisKey
+  NAME = "integration_test"
+  @queue = :app
+  def self.perform
+    # tests run on localhost
+    Redis.new.set(NAME, "1")
+  end
+end

data/test/test_collision.rb ADDED Viewed

@@ -0,0 +1,47 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
+class TestCollision < Minitest::Test
+  include TestHelper
+  def setup
+    Resque::StuckQueue.config[:redis] = Redis.new
+    Resque::StuckQueue.config[:watcher_interval] = 1
+    Resque::StuckQueue.redis.flushall
+  end
+  def test_two_processes_interacting
+    puts "#{__method__}"
+    # no resque should be running here so timeouts will be reached + trigger
+    Resque::StuckQueue.redis.del("test-incr-key")
+    p1 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon;  }
+    p2 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon;  }
+    p3 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon;  }
+    p4 = fork { Resque::StuckQueue.redis.client.reconnect; run_resque_stuck_daemon;  }
+    Thread.new {
+      sleep 5 # let test run and trigger once occur (according to time below)
+      `kill -9 #{p1}`
+      `kill -9 #{p2}`
+      `kill -9 #{p3}`
+      `kill -9 #{p4}`
+      Process.waitpid # reap
+    }
+    Process.waitall
+    assert_equal 1, Resque::StuckQueue.redis.get("test-incr-key").to_i
+  end
+  private
+  def run_resque_stuck_daemon
+    Resque::StuckQueue.config[:heartbeat_interval] = 1
+    Resque::StuckQueue.config[:abort_on_exception] = true
+    Resque::StuckQueue.config[:trigger_timeout] = 3
+    Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test-incr-key") }
+    Resque::StuckQueue.start
+  end
+end

data/test/test_config.rb ADDED Viewed

@@ -0,0 +1,67 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
+class TestConfig < Minitest::Test
+  include TestHelper
+  def setup
+    Resque::StuckQueue.config[:watcher_interval] = 1
+    Resque::StuckQueue.config[:trigger_timeout] = 1
+    Resque::StuckQueue.config[:heartbeat_interval] = 1
+    Resque::StuckQueue.config[:abort_on_exception] = true
+    Resque::StuckQueue.config[:redis] = Redis.new
+  end
+  def teardown
+    Resque::StuckQueue.reset!
+  end
+  def test_config_has_descriptions
+    c = Resque::StuckQueue::Config.new
+    assert c.description_for(:logger) =~ /Logger/, "has descriptions"
+  end
+  def test_outputs_all_config_options
+    c = Resque::StuckQueue::Config.new
+    puts c.pretty_descriptions
+    assert true
+  end
+  def test_has_logger
+    puts "#{__method__}"
+    begin
+      Resque::StuckQueue.config[:logger] = Logger.new($stdout)
+      start_and_stop_loops_after(1)
+      assert true, "should not have raised"
+    rescue => e
+      assert false, "should have succeeded with good logger: #{e.inspect}\n#{e.backtrace.join("\n")}"
+    end
+  end
+  def test_must_set_redis
+    puts "#{__method__}"
+    Resque::StuckQueue.config[:redis] = nil
+    begin
+      start_and_stop_loops_after(1)
+      assert false, "redis cannot be nil"
+    rescue Resque::StuckQueue::Config::NoConfigError => e
+      assert true, "redis cannot be nil: #{e.inspect}\n#{e.backtrace.join("\n")}"
+    end
+  end
+  #def test_can_have_signals
+    #puts "#{__method__}"
+    #begin
+      #assert_equal ENV['SIGUSR1'], nil
+      #Resque::StuckQueue.config[:enable_signals] = true
+      #start_and_stop_loops_after(1)
+      #Process.kill "SIGUSR1", Process.pid
+      #assert_equal ENV['SIGUSR1'], "done be had"
+    #rescue => e
+      #assert false, "should have succeeded with signal handlers: #{e.inspect}\n#{e.backtrace.join("\n")}"
+    #end
+  #end
+end

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,57 @@
+require 'minitest'
+require "minitest/autorun"
+require 'pry'
+require 'mocha'
+require "minitest/unit"
+require "mocha/mini_test"
+$:.unshift(".")
+require 'resque_stuck_queue'
+require File.join(File.expand_path(File.dirname(__FILE__)), "resque", "set_redis_key")
+module TestHelper
+  extend self
+  def run_resque(queue_name = "*")
+    pid = fork { exec("export INTERVAL=1 QUEUE=#{queue_name}; bundle exec rake --trace resque:work") }
+    sleep 3 # wait for resque to boot up
+    pid
+  end
+  def with_no_resque_failures(&blk)
+    Resque::Failure.clear
+    blk.call
+    assert_nil Resque::Failure.all, "Resque hearbeat job cant fail: #{Resque::Failure.all.inspect}"
+  end
+  def hax_kill_resque
+    # ugly, FIXME how to get pid of forked forked process. run_resque pid is incorrect.
+   `ps aux |grep -E 'resque.*(Waiting|Forked|Processing)'| grep -v grep | awk '{print $2}' |xargs kill`
+   sleep 2 # wait for shutdown
+  end
+  def start_and_stop_loops_after(secs)
+    abort_or_not = Thread.abort_on_exception
+    Thread.abort_on_exception = Resque::StuckQueue.config[:abort_on_exception]
+    ops = []
+    ops << Thread.new { Resque::StuckQueue.start }
+    ops << Thread.new { sleep secs; Resque::StuckQueue.stop }
+    ops.map(&:join)
+  ensure
+    Thread.abort_on_exception = abort_or_not
+    Resque::StuckQueue.force_stop!
+  end
+end
+# http://stackoverflow.com/questions/9346101/how-to-get-stack-trace-from-a-testunittestcase
+def MiniTest.filter_backtrace(bt)
+  bt
+end
+# hax ensure previous test runs that raised didn't leave a resque process runing beforehand
+unless @before_all_hax_kill_resque
+  TestHelper.hax_kill_resque && @before_all_hax_kill_resque=true
+end

data/test/test_integration.rb ADDED Viewed

@@ -0,0 +1,172 @@
+require 'minitest'
+require "minitest/autorun"
+require 'pry'
+$:.unshift(".")
+require 'resque_stuck_queue'
+require File.join(File.expand_path(File.dirname(__FILE__)), "resque", "set_redis_key")
+require File.join(File.expand_path(File.dirname(__FILE__)), "test_helper")
+class TestIntegration < Minitest::Test
+  include TestHelper
+  # UBER HAXING no after(:all) or before(:all)
+  class << self
+    def tests_running?
+      test_count = public_instance_methods.select{|m| m.to_s.match(/^test_/)}.size
+      true if tests_ran != test_count
+    end
+    def tests_done?
+      !tests_running?
+    end
+    attr_accessor :tests_ran, :resque_pid
+    def tests_ran
+      @tests_ran ||= 0
+    end
+    def run_resque_before_all
+      return if @running_resque
+      @running_resque = true
+      @resque_pid = TestHelper.run_resque
+    end
+  end
+  def setup
+    Resque::StuckQueue.config[:redis] = Redis.new
+    Resque::StuckQueue.redis.flushall
+    Resque::StuckQueue.config[:watcher_interval] = 1
+    Resque::StuckQueue.config[:abort_on_exception] = true
+    self.class.run_resque_before_all
+    self.class.tests_ran += 1
+  end
+  def teardown
+    Resque::StuckQueue.reset!
+    if self.class.tests_done?
+      hax_kill_resque
+      Process.waitall
+    end
+  end
+  def test_resque_enqueues_a_job_does_not_trigger
+    puts "#{__method__}"
+    with_no_resque_failures do
+      Resque::StuckQueue.config[:trigger_timeout] = 10
+      Resque::StuckQueue.config[:heartbeat_interval] = 1
+      Resque::StuckQueue.config[:redis] = Redis.new
+      @triggered = false
+      Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
+      start_and_stop_loops_after(5)
+      sleep 3 # job ran successfully, so don't trigger
+      assert_equal @triggered, false
+    end
+  end
+  # warn_interval #0
+  def test_resque_does_not_enqueues_a_job_does_trigger_once_with_no_warn_interval
+  puts "#{__method__}"
+    with_no_resque_failures do
+      Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
+      Resque::StuckQueue.config[:trigger_timeout] = 2
+      Resque::StuckQueue.config[:watcher_interval] = 1
+      Resque::StuckQueue.config[:warn_interval] = nil
+      Resque::StuckQueue.config[:redis] = Redis.new
+      Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
+      start_and_stop_loops_after(5)
+      # check handler did get called once as there is no warn_interval
+      assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 1
+    end
+  end
+  # warn_interval #1
+  def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval
+  puts "#{__method__}"
+    with_no_resque_failures do
+      Resque::StuckQueue.config[:heartbeat_interval] = 5 # so heartbeats don't go through at all in this timeframe
+      Resque::StuckQueue.config[:trigger_timeout] = 2
+      Resque::StuckQueue.config[:watcher_interval] = 1
+      Resque::StuckQueue.config[:warn_interval] = 1
+      Resque::StuckQueue.config[:redis] = Redis.new
+      Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
+      start_and_stop_loops_after(5)
+      # check handler did get called multiple times due to warn_interval
+      assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 3
+    end
+  end
+  # warn_interval #2
+  def test_resque_does_not_enqueues_a_job_does_trigger_with_warn_interval_stops_on_recover
+  puts "#{__method__}"
+    with_no_resque_failures do
+      Resque::StuckQueue.config[:heartbeat_interval] = 2 # so we trigger, and recover in this timeframe
+      Resque::StuckQueue.config[:trigger_timeout] = 2
+      Resque::StuckQueue.config[:watcher_interval] = 1
+      Resque::StuckQueue.config[:warn_interval] = 1
+      Resque::StuckQueue.config[:redis] = Redis.new
+      Resque::StuckQueue.config[:triggered_handler] = proc { Resque::StuckQueue.redis.incr("test_incr_warn") }
+      @recovered = false
+      Resque::StuckQueue.config[:recovered_handler] = proc { @recovered = true }
+      start_and_stop_loops_after(5)
+      assert @recovered, "resque should have picked up heartbeat job after 2 seconds"
+      # check handler did get called multiple times due to warn_interval but less than previous test because recover
+      assert_equal Resque::StuckQueue.redis.get("test_incr_warn").to_i, 2
+    end
+  end
+  def test_resque_does_not_enqueues_a_job_does_trigger
+    puts "#{__method__}"
+    with_no_resque_failures do
+      Resque::StuckQueue.config[:trigger_timeout] = 0
+      Resque::StuckQueue.config[:heartbeat_interval] = 1
+      Resque::StuckQueue.config[:redis] = Redis.new
+      @triggered = false
+      Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
+      start_and_stop_loops_after(2)
+      # check handler did get called
+      assert_equal @triggered, true
+    end
+  end
+  def test_has_settable_custom_hearbeat_job
+    puts "#{__method__}"
+    with_no_resque_failures do
+      Resque::StuckQueue.config[:trigger_timeout] = 2 # won't allow waiting too much and will complain (eg trigger) sooner than later
+      Resque::StuckQueue.config[:heartbeat_interval] = 1
+      Resque::StuckQueue.config[:redis] = Redis.new
+      begin
+        Resque::StuckQueue.config[:heartbeat_job] = proc { Resque.enqueue_to(:app, Resque::StuckQueue::HeartbeatJob, Resque::StuckQueue.heartbeat_key_for(:app)) }
+        @triggered = false
+        Resque::StuckQueue.config[:triggered_handler] = proc { @triggered = true }
+        start_and_stop_loops_after(4)
+        sleep 3 # allow trigger
+        assert true, "should not have raised"
+        assert @triggered, "should have triggered"
+      rescue => e
+        assert false, "should have succeeded with good refresh_job.\n #{e.inspect}"
+      end
+    end
+  end
+end