RubyGems - onyx-resque-retry - Versions diffs - 0.1.0 - Mend

onyx-resque-retry 0.1.0

Files changed (21) hide show

data/HISTORY.md +33 -0
data/LICENSE +21 -0
data/README.md +289 -0
data/Rakefile +25 -0
data/lib/resque-retry.rb +6 -0
data/lib/resque-retry/server.rb +51 -0
data/lib/resque-retry/server/views/retry.erb +48 -0
data/lib/resque-retry/server/views/retry_timestamp.erb +59 -0
data/lib/resque/failure/multiple_with_retry_suppression.rb +93 -0
data/lib/resque/plugins/exponential_backoff.rb +64 -0
data/lib/resque/plugins/retry.rb +221 -0
data/test/exponential_backoff_test.rb +62 -0
data/test/multiple_failure_test.rb +86 -0
data/test/redis-test.conf +132 -0
data/test/resque_test.rb +18 -0
data/test/retry_criteria_test.rb +75 -0
data/test/retry_inheriting_checks_test.rb +33 -0
data/test/retry_test.rb +173 -0
data/test/test_helper.rb +78 -0
data/test/test_jobs.rb +280 -0
metadata +189 -0

data/lib/resque-retry/server/views/retry_timestamp.erb ADDED

@@ -0,0 +1,59 @@
+<% timestamp = params[:timestamp].to_i %>
+<h1>
+  Delayed Jobs scheduled for <%= format_time(Time.at(timestamp)) %>
+  (with Retry Information)
+</h1>
+<p class="intro">
+  This list below contains the delayed jobs scheduled for the current
+  timestamp, with retry information.
+</p>
+<p class="sub">
+  Showing <%= start = params[:start].to_i %> to <%= start + 20 %> of
+  <b><%= size = resque.delayed_timestamp_size(timestamp) %></b> jobs
+</p>
+<table class="jobs">
+  <tr>
+    <th>Class</th>
+    <th>Args</th>
+    <th>Retry Attempts</th>
+    <th>Exception</th>
+    <th>Backtrace</th>
+  </tr>
+  <% jobs = resque.delayed_timestamp_peek(timestamp, start, 20) %>
+  <% jobs.each do |job| %>
+    <% retry_key = retry_key_for_job(job) %>
+    <% retry_attempts = retry_attempts_for_job(job) %>
+    <tr>
+      <td class="class"><%= h job['class'] %></td>
+      <td class="args"><%= h job['args'].inspect %></td>
+      <% if retry_attempts.nil? %>
+        <td colspan="3"><i>n/a - normal delayed job</i></td>
+      <% else %>
+        <td><%= retry_attempts %></td>
+        <% failure = retry_failure_details(retry_key) %>
+        <td><code><%= failure['exception'] %></code></td>
+        <td class="error">
+          <% if failure['backtrace'] %>
+            <a href="#" class="backtrace"><%= h(failure['error']) %></a>
+            <pre style="display:none"><%= h failure['backtrace'].join("\n") %></pre>
+          <% else %>
+            <%= h failure['error'] %>
+          <% end %>
+        </td>
+      <% end %>
+    </tr>
+  <% end %>
+  <% if jobs.empty? %>
+    <tr>
+      <td class="no-data" colspan="5">
+        There are no pending jobs scheduled for this time.
+      </td>
+    </tr>
+  <% end %>
+</table>
+<%= partial :next_more, :start => start, :size => size %>

data/lib/resque/failure/multiple_with_retry_suppression.rb ADDED

@@ -0,0 +1,93 @@
+require 'resque/failure/multiple'
+module Resque
+  module Failure
+    # A multiple failure backend, with retry suppression.
+    #
+    # For example: if you had a job that could retry 5 times, your failure
+    # backends are not notified unless the _final_ retry attempt also fails.
+    #
+    # Example:
+    #
+    #   require 'resque-retry'
+    #   require 'resque/failure/redis'
+    #
+    #   Resque::Failure::MultipleWithRetrySuppression.classes = [Resque::Failure::Redis]
+    #   Resque::Failure.backend = Resque::Failure::MultipleWithRetrySuppression
+    #
+    class MultipleWithRetrySuppression < Multiple
+      include Resque::Helpers
+      module CleanupHooks
+        # Resque after_perform hook.
+        #
+        # Deletes retry failure information from Redis.
+        def after_perform_retry_failure_cleanup(*args)
+          retry_key = redis_retry_key(*args)
+          failure_key = Resque::Failure::MultipleWithRetrySuppression.failure_key(retry_key)
+          Resque.redis.del(failure_key)
+        end
+      end
+      # Called when the job fails.
+      #
+      # If the job will retry, suppress the failure from the other backends.
+      # Store the lastest failure information in redis, used by the web
+      # interface.
+      def save
+        unless retryable? && retrying?
+          cleanup_retry_failure_log!
+          super
+        else
+          data = {
+            :failed_at => Time.now.strftime("%Y/%m/%d %H:%M:%S"),
+            :payload   => payload,
+            :exception => exception.class.to_s,
+            :error     => exception.to_s,
+            :backtrace => Array(exception.backtrace),
+            :worker    => worker.to_s,
+            :queue     => queue
+          }
+          # Register cleanup hooks.
+          unless klass.respond_to?(:after_perform_retry_failure_cleanup)
+            klass.send(:extend, CleanupHooks)
+          end
+          redis[failure_key] = Resque.encode(data)
+        end
+      end
+      # Expose this for the hook's use.
+      def self.failure_key(retry_key)
+        'failure_' + retry_key
+      end
+      protected
+      def klass
+        constantize(payload['class'])
+      end
+      def retry_key
+        klass.redis_retry_key(payload['args'])
+      end
+      def failure_key
+        self.class.failure_key(retry_key)
+      end
+      def retryable?
+        klass.respond_to?(:redis_retry_key)
+      end
+      def retrying?
+        redis.exists(retry_key)
+      end
+      def cleanup_retry_failure_log!
+        redis.del(failure_key) if retryable?
+      end
+    end
+  end
+end

data/lib/resque/plugins/exponential_backoff.rb ADDED

@@ -0,0 +1,64 @@
+module Resque
+  module Plugins
+    # If you want your job to retry on failure using a varying delay, simply
+    # extend your module/class with this module:
+    #
+    #   class DeliverSMS
+    #     extend Resque::Plugins::ExponentialBackoff
+    #     @queue = :mt_messages
+    #
+    #     def self.perform(mt_id, mobile_number, message)
+    #       heavy_lifting
+    #     end
+    #   end
+    #
+    # Easily do something custom:
+    #
+    #   class DeliverSMS
+    #     extend Resque::Plugins::ExponentialBackoff
+    #     @queue = :mt_messages
+    #
+    #     @retry_limit = 4
+    #
+    #     # retry delay in seconds; [0] => 1st retry, [1] => 2nd..4th retry.
+    #     @backoff_strategy = [0, 60]
+    #
+    #     # used to build redis key, for counting job attempts.
+    #     def self.identifier(mt_id, mobile_number, message)
+    #       "#{mobile_number}:#{mt_id}"
+    #     end
+    #
+    #     self.perform(mt_id, mobile_number, message)
+    #       heavy_lifting
+    #     end
+    #   end
+    #
+    module ExponentialBackoff
+      include Resque::Plugins::Retry
+      # Defaults to the number of delays in the backoff strategy.
+      #
+      # @return [Number] maximum number of retries
+      def retry_limit
+        @retry_limit ||= backoff_strategy.length
+      end
+      # Selects the delay from the backoff strategy.
+      #
+      # @return [Number] seconds to delay until the next retry.
+      def retry_delay
+        backoff_strategy[retry_attempt] || backoff_strategy.last
+      end
+      # @abstract
+      # The backoff strategy is used to vary the delay between retry attempts.
+      #
+      # @return [Array] array of delays. index = retry attempt
+      def backoff_strategy
+        @backoff_strategy ||= [0, 60, 600, 3600, 10_800, 21_600]
+      end
+    end
+  end
+end

data/lib/resque/plugins/retry.rb ADDED

@@ -0,0 +1,221 @@
+module Resque
+  module Plugins
+    # If you want your job to retry on failure, simply extend your module/class
+    # with this module:
+    #
+    #   class DeliverWebHook
+    #     extend Resque::Plugins::Retry # allows 1 retry by default.
+    #     @queue = :web_hooks
+    #
+    #     def self.perform(url, hook_id, hmac_key)
+    #       heavy_lifting
+    #     end
+    #   end
+    #
+    # Easily do something custom:
+    #
+    #   class DeliverWebHook
+    #     extend Resque::Plugins::Retry
+    #     @queue = :web_hooks
+    #
+    #     @retry_limit = 8  # default: 1
+    #     @retry_delay = 60 # default: 0
+    #
+    #     # used to build redis key, for counting job attempts.
+    #     def self.identifier(url, hook_id, hmac_key)
+    #       "#{url}-#{hook_id}"
+    #     end
+    #
+    #     def self.perform(url, hook_id, hmac_key)
+    #       heavy_lifting
+    #     end
+    #   end
+    #
+    module Retry
+      # Copy retry criteria checks on inheritance.
+      def inherited(subclass)
+        super(subclass)
+        subclass.instance_variable_set("@retry_criteria_checks", retry_criteria_checks.dup)
+      end
+      # @abstract You may override to implement a custom identifier,
+      #           you should consider doing this if your job arguments
+      #           are many/long or may not cleanly cleanly to strings.
+      #
+      # Builds an identifier using the job arguments. This identifier
+      # is used as part of the redis key.
+      #
+      # @param [Array] args job arguments
+      # @return [String] job identifier
+      def identifier(*args)
+        args_string = args.join('-')
+        args_string.empty? ? nil : args_string
+      end
+      # Builds the redis key to be used for keeping state of the job
+      # attempts.
+      #
+      # @return [String] redis key
+      def redis_retry_key(*args)
+        ['resque-retry', name, identifier(*args)].compact.join(":").gsub(/\s/, '')
+      end
+      # Maximum number of retrys we can attempt to successfully perform the job.
+      # A retry limit of 0 or below will retry forever.
+      #
+      # @return [Fixnum]
+      def retry_limit
+        @retry_limit ||= 1
+      end
+      # Number of retry attempts used to try and perform the job.
+      #
+      # The real value is kept in Redis, it is accessed and incremented using
+      # a before_perform hook.
+      #
+      # @return [Fixnum] number of attempts
+      def retry_attempt
+        @retry_attempt ||= 0
+      end
+      # @abstract
+      # Number of seconds to delay until the job is retried.
+      #
+      # @return [Number] number of seconds to delay
+      def retry_delay
+        @retry_delay ||= 0
+      end
+      # @abstract
+      # Modify the arguments used to retry the job. Use this to do something
+      # other than try the exact same job again.
+      #
+      # @return [Array] new job arguments
+      def args_for_retry(*args)
+        args
+      end
+      # Convenience method to test whether you may retry on a given exception.
+      #
+      # @return [Boolean]
+      def retry_exception?(exception)
+        return true if retry_exceptions.nil?
+        !! retry_exceptions.any? { |ex| ex >= exception }
+      end
+      # @abstract
+      # Controls what exceptions may be retried.
+      #
+      # Default: `nil` - this will retry all exceptions.
+      #
+      # @return [Array, nil]
+      def retry_exceptions
+        @retry_exceptions ||= nil
+      end
+      # Test if the retry criteria is valid.
+      #
+      # @param [Exception] exception
+      # @param [Array] args job arguments
+      # @return [Boolean]
+      def retry_criteria_valid?(exception, *args)
+        # if the retry limit was reached, dont bother checking anything else.
+        return false if retry_limit_reached?
+        # We always want to retry if the exception matches.
+        should_retry = retry_exception?(exception.class)
+        # call user retry criteria check blocks.
+        retry_criteria_checks.each do |criteria_check|
+          should_retry ||= !!criteria_check.call(exception, *args)
+        end
+        should_retry
+      end
+      # Retry criteria checks.
+      #
+      # @return [Array]
+      def retry_criteria_checks
+        @retry_criteria_checks ||= []
+        @retry_criteria_checks
+      end
+      # Test if the retry limit has been reached.
+      #
+      # @return [Boolean]
+      def retry_limit_reached?
+        if retry_limit > 0
+          return true if retry_attempt >= retry_limit
+        end
+        false
+      end
+      # Register a retry criteria check callback to be run before retrying
+      # the job again.
+      #
+      # If any callback returns `true`, the job will be retried.
+      #
+      # @example Using a custom retry criteria check.
+      #
+      #   retry_criteria_check do |exception, *args|
+      #     if exception.message =~ /InvalidJobId/
+      #       # don't retry if we got passed a invalid job id.
+      #       false
+      #     else
+      #       true
+      #     end
+      #   end
+      #
+      # @yield [exception, *args]
+      # @yieldparam exception [Exception] the exception that was raised
+      # @yieldparam args [Array] job arguments
+      # @yieldreturn [Boolean] false == dont retry, true = can retry
+      def retry_criteria_check(&block)
+        retry_criteria_checks << block
+      end
+      # Will retry the job.
+      def try_again(*args)
+        @retry_job_class ||= self
+        if retry_delay <= 0
+          # If the delay is 0, no point passing it through the scheduler
+          Resque.enqueue(@retry_job_class, *args_for_retry(*args))
+        else
+          Resque.enqueue_in(retry_delay, @retry_job_class, *args_for_retry(*args))
+        end
+      end
+      # Resque before_perform hook.
+      #
+      # Increments and sets the `@retry_attempt` count.
+      def before_perform_retry(*args)
+        retry_key = redis_retry_key(*args)
+        Resque.redis.setnx(retry_key, -1)             # default to -1 if not set.
+        @retry_attempt = Resque.redis.incr(retry_key) # increment by 1.
+      end
+      # Resque after_perform hook.
+      #
+      # Deletes retry attempt count from Redis.
+      def after_perform_retry(*args)
+        Resque.redis.del(redis_retry_key(*args))
+      end
+      # Resque on_failure hook.
+      #
+      # Checks if our retry criteria is valid, if it is we try again.
+      # Otherwise the retry attempt count is deleted from Redis.
+      def on_failure_retry(exception, *args)
+        if retry_criteria_valid?(exception, *args)
+          try_again(*args)
+        else
+          Resque.redis.del(redis_retry_key(*args))
+        end
+      end
+    end
+  end
+end

data/test/exponential_backoff_test.rb ADDED

@@ -0,0 +1,62 @@
+require File.dirname(__FILE__) + '/test_helper'
+class ExponentialBackoffTest < Test::Unit::TestCase
+  def setup
+    Resque.redis.flushall
+    @worker = Resque::Worker.new(:testing)
+    @worker.register_worker
+  end
+  def test_resque_plugin_lint
+    assert_nothing_raised do
+      Resque::Plugin.lint(Resque::Plugins::ExponentialBackoff)
+    end
+  end
+  def test_default_backoff_strategy
+    now = Time.now
+    Resque.enqueue(ExponentialBackoffJob)
+    perform_next_job @worker
+    assert_equal 1, Resque.info[:processed], '1 processed job'
+    assert_equal 1, Resque.info[:failed], 'first ever run, and it should of failed, but never retried'
+    assert_equal 1, Resque.info[:pending], '1 pending job, because it never hits the scheduler'
+    perform_next_job @worker
+    assert_equal 2, Resque.info[:processed], '2nd run, but first retry'
+    assert_equal 2, Resque.info[:failed], 'should of failed again, this is the first retry attempt'
+    assert_equal 0, Resque.info[:pending], '0 pending jobs, it should be in the delayed queue'
+    delayed = Resque.delayed_queue_peek(0, 1)
+    assert_equal now.to_i + 60, delayed[0], '2nd delay' # the first had a zero delay.
+    5.times do
+      Resque.enqueue(ExponentialBackoffJob)
+      perform_next_job @worker
+    end
+    delayed = Resque.delayed_queue_peek(0, 5)
+    assert_equal now.to_i + 600, delayed[1], '3rd delay'
+    assert_equal now.to_i + 3600, delayed[2], '4th delay'
+    assert_equal now.to_i + 10_800, delayed[3], '5th delay'
+    assert_equal now.to_i + 21_600, delayed[4], '6th delay'
+  end
+  def test_custom_backoff_strategy
+    now = Time.now
+    4.times do
+      Resque.enqueue(CustomExponentialBackoffJob, 'http://lividpenguin.com', 1305, 'cd8079192d379dc612f17c660591a6cfb05f1dda')
+      perform_next_job @worker
+    end
+    delayed = Resque.delayed_queue_peek(0, 3)
+    assert_equal now.to_i + 10, delayed[0], '1st delay'
+    assert_equal now.to_i + 20, delayed[1], '2nd delay'
+    assert_equal now.to_i + 30, delayed[2], '3rd delay'
+    assert_equal 2, Resque.delayed_timestamp_size(delayed[2]), '4th delay should share delay with 3rd'
+    assert_equal 4, Resque.info[:processed], 'processed jobs'
+    assert_equal 4, Resque.info[:failed], 'failed jobs'
+    assert_equal 0, Resque.info[:pending], 'pending jobs'
+  end
+end