sidekiq-amigo 1.1.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/amigo/audit_logger.rb +1 -1
- data/lib/amigo/deprecated_jobs.rb +1 -1
- data/lib/amigo/job.rb +2 -1
- data/lib/amigo/queue_backoff_job.rb +151 -0
- data/lib/amigo/rate_limited_error_handler.rb +123 -0
- data/lib/amigo/retry.rb +96 -0
- data/lib/amigo/router.rb +1 -1
- data/lib/amigo/scheduled_job.rb +2 -1
- data/lib/amigo/semaphore_backoff_job.rb +133 -0
- data/lib/amigo/spec_helpers.rb +73 -4
- data/lib/amigo/version.rb +2 -2
- data/lib/amigo.rb +2 -1
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c014ebc6aa38c04f3c0229560139ce0316f17aa16061df1aea0385d70f02339
|
4
|
+
data.tar.gz: e9c3bf46ae1caff439be4d5d4df60f37a0bce23e41027756ae773b0563d1ccce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 17681116628e5947a0c12c07865139505ef3081616d20745196bacc626a409ab4cb8d2d5b19e22a5f72dfa77f920d39054bd054c6364ec12bbd4a35713ce84ce
|
7
|
+
data.tar.gz: 97db36323ebb5df4af8f3bd6962914a3c3fc02e7a2f0b6190b5126632b294d15d65092cfd4501399232b69a3f04b4cc7a6428aeadaab90861b589c0f509451d8
|
data/lib/amigo/audit_logger.rb
CHANGED
@@ -10,7 +10,7 @@ require "amigo/job"
|
|
10
10
|
# So, make the class exist, but noop so it won't be scheduled and won't be retried.
|
11
11
|
# Then it can be deleted later.
|
12
12
|
#
|
13
|
-
|
13
|
+
module Amigo
|
14
14
|
module DeprecatedJobs
|
15
15
|
def self.install(const_base, *names)
|
16
16
|
cls = self.noop_class
|
data/lib/amigo/job.rb
CHANGED
@@ -4,13 +4,14 @@ require "sidekiq"
|
|
4
4
|
|
5
5
|
require "amigo"
|
6
6
|
|
7
|
-
|
7
|
+
module Amigo
|
8
8
|
module Job
|
9
9
|
def self.extended(cls)
|
10
10
|
cls.include(Sidekiq::Worker)
|
11
11
|
cls.extend(ClassMethods)
|
12
12
|
cls.pattern = ""
|
13
13
|
cls.include(InstanceMethods)
|
14
|
+
Amigo.register_job(cls)
|
14
15
|
end
|
15
16
|
|
16
17
|
module InstanceMethods
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
require "sidekiq/api"
|
5
|
+
|
6
|
+
# Queue backoff jobs are used for jobs that should not saturate workers,
|
7
|
+
# such that jobs on dependent queues end up not running for a while.
|
8
|
+
#
|
9
|
+
# For example, imagine a queue dedicated to long-running jobs ('slow'),
|
10
|
+
# a queue of critical, short-running tasks ('critical'), and 10 worker threads.
|
11
|
+
# Imagine 20 'slow' jobs enter that queue, then 2 'critical' jobs.
|
12
|
+
#
|
13
|
+
# The 10 worker threads start processing the 'slow' queue,
|
14
|
+
# and one completes. When that worker thread goes to find its next job,
|
15
|
+
# it pulls a job off the 'slow' queue (even with Sidekiq queue priorities,
|
16
|
+
# lopsided queue sizes mean it's likely we'll ge ta 'slow' job).
|
17
|
+
#
|
18
|
+
# When this job starts, it checks the 'critical' queue,
|
19
|
+
# which is specified as a *dependent* queue of this job.
|
20
|
+
# If it sees the 'critical' queue has latency,
|
21
|
+
# the job reschedules itself in the future and then processes the next job.
|
22
|
+
#
|
23
|
+
# This keeps happening until the worker thread finds a job from
|
24
|
+
# the 'critical' queue and processes it successfully.
|
25
|
+
#
|
26
|
+
# Implementers can override two methods:
|
27
|
+
#
|
28
|
+
# - `dependent_queues` should return an array of the names of queues that should be checked,
|
29
|
+
# in order of higher-priority-first. See below for Redis performance notes.
|
30
|
+
# - `calculate_backoff` is passed a queue name and its latency,
|
31
|
+
# and should return either:
|
32
|
+
# - the backoff duration in seconds (ie the argument to `perform_in`),
|
33
|
+
# - 0 to perform the job immediately, or
|
34
|
+
# - nil to check the next queue with latency.
|
35
|
+
# - Note that if all calls to `calculate_backoff` return nil, the job is performed immediately.
|
36
|
+
#
|
37
|
+
# BackoffJob supports multiple dependent queues but it checks them one-at-a-time
|
38
|
+
# to avoid any unnecessary calls to Redis.
|
39
|
+
#
|
40
|
+
# == Redis Impacts
|
41
|
+
#
|
42
|
+
# Using BackoffJob adds an overhead to each perform of a job-
|
43
|
+
# specifically, a call to `Redis.lrange` through the Sidekiq API's Sidekiq:::Queue#latency
|
44
|
+
# potentially for each queue in `dependent_queues`.
|
45
|
+
# This is a fast call (it just gets the last item), but it's not free,
|
46
|
+
# so users should be aware of it.
|
47
|
+
#
|
48
|
+
module Amigo
|
49
|
+
module QueueBackoffJob
|
50
|
+
def self.included(cls)
|
51
|
+
cls.include InstanceMethods
|
52
|
+
cls.prepend PrependedMethods
|
53
|
+
end
|
54
|
+
|
55
|
+
class << self
|
56
|
+
# Reset class state. Mostly used just for testing.
|
57
|
+
def reset
|
58
|
+
@max_backoff = 10
|
59
|
+
is_testing = defined?(::Sidekiq::Testing) && ::Sidekiq::Testing.enabled?
|
60
|
+
@enabled = !is_testing
|
61
|
+
@cache_queue_names = true
|
62
|
+
@cache_latencies = true
|
63
|
+
@all_queue_names = nil
|
64
|
+
@latency_cache_duration = 5
|
65
|
+
@latency_cache = {}
|
66
|
+
end
|
67
|
+
|
68
|
+
# Maximum time into the future a job will reschedule itself for.
|
69
|
+
# Ie, if latency is 30s, and max_backoff is 10, the job will be scheduled
|
70
|
+
# for 10s into the future if it finds backoff pressure.
|
71
|
+
attr_accessor :max_backoff
|
72
|
+
|
73
|
+
# Return true if backoff checks are enabled.
|
74
|
+
attr_accessor :enabled
|
75
|
+
|
76
|
+
def enabled?
|
77
|
+
return @enabled
|
78
|
+
end
|
79
|
+
|
80
|
+
# Cached value of all Sidekiq queues, since they rarely change.
|
81
|
+
# If your queue names change at runtime, set +cache_queue_names+ to false.
|
82
|
+
def all_queue_names
|
83
|
+
return @all_queue_names if @cache_queue_names && @all_queue_names
|
84
|
+
@all_queue_names = ::Sidekiq::Queue.all.map(&:name)
|
85
|
+
return @all_queue_names
|
86
|
+
end
|
87
|
+
|
88
|
+
# Whether all_queue_names should be cached.
|
89
|
+
attr_reader :cache_queue_names
|
90
|
+
|
91
|
+
def cache_queue_names=(v)
|
92
|
+
@cache_queue_names = v
|
93
|
+
@all_queue_names = nil if v == false
|
94
|
+
end
|
95
|
+
|
96
|
+
# Return how long queue latencies should be cached before they are re-fetched from Redis.
|
97
|
+
# Avoids hitting Redis to check latency too often.
|
98
|
+
# Default to 5 seconds. Set to 0 to avoid caching.
|
99
|
+
attr_accessor :latency_cache_duration
|
100
|
+
|
101
|
+
# Check the latency of the queue with the given now.
|
102
|
+
# If the queue has been checked more recently than latency_cache_duration specified,
|
103
|
+
# return the cached value.
|
104
|
+
def check_latency(qname, now: Time.now)
|
105
|
+
return ::Sidekiq::Queue.new(qname).latency if self.latency_cache_duration.zero?
|
106
|
+
cached = @latency_cache[qname]
|
107
|
+
if cached.nil? || (cached[:at] + self.latency_cache_duration) < now
|
108
|
+
@latency_cache[qname] = {at: now, value: ::Sidekiq::Queue.new(qname).latency}
|
109
|
+
end
|
110
|
+
return @latency_cache[qname][:value]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
self.reset
|
114
|
+
|
115
|
+
module InstanceMethods
|
116
|
+
def dependent_queues
|
117
|
+
qname = self.class.get_sidekiq_options["queue"]
|
118
|
+
return ::Amigo::QueueBackoffJob.all_queue_names.reject { |x| x == qname }
|
119
|
+
end
|
120
|
+
|
121
|
+
def calculate_backoff(_queue_name, latency, _args)
|
122
|
+
return [latency, ::Amigo::QueueBackoffJob.max_backoff].min
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
module PrependedMethods
|
127
|
+
def perform(*args)
|
128
|
+
return super unless ::Amigo::QueueBackoffJob.enabled?
|
129
|
+
# rubocop:disable Style/GuardClause, Lint/NonLocalExitFromIterator
|
130
|
+
dependent_queues.each do |qname|
|
131
|
+
latency = Amigo::QueueBackoffJob.check_latency(qname)
|
132
|
+
# If latency is <= 0, we can skip this queue.
|
133
|
+
next unless latency.positive?
|
134
|
+
# If backoff is nil, ignore this queue and check the next
|
135
|
+
# If it's > 0, defer until the future
|
136
|
+
# If it's <= 0, run the job and check no more queues
|
137
|
+
backoff = calculate_backoff(qname, latency, args)
|
138
|
+
next if backoff.nil?
|
139
|
+
if backoff.positive?
|
140
|
+
self.class.perform_in(backoff, *args)
|
141
|
+
return
|
142
|
+
else
|
143
|
+
return super
|
144
|
+
end
|
145
|
+
end
|
146
|
+
# rubocop:enable Style/GuardClause, Lint/NonLocalExitFromIterator
|
147
|
+
super
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "digest"
|
4
|
+
|
5
|
+
# Wrap another Sidekiq error handler so invoking it is rate limited.
|
6
|
+
#
|
7
|
+
# Useful when wrapping a usage-based error reporter like Sentry,
|
8
|
+
# which can be hammered in the case of an issue like connectivity
|
9
|
+
# that causes all jobs and retries to fail.
|
10
|
+
# It is suggested that all errors are still reported to something
|
11
|
+
# like application logs, since entirely silencing errors
|
12
|
+
# can make debugging problems tricky.
|
13
|
+
#
|
14
|
+
# Usage:
|
15
|
+
#
|
16
|
+
# Sidekiq.configure_server do |config|
|
17
|
+
# config.error_handlers << Amigo::RateLimitedErrorHandler.new(
|
18
|
+
# Sentry::Sidekiq::ErrorHandler.new,
|
19
|
+
# sample_rate: ENV.fetch('ASYNC_ERROR_RATE_LIMITER_SAMPLE_RATE', '0.5').to_f,
|
20
|
+
# ttl: ENV.fetch('ASYNC_ERROR_RATE_LIMITER_TTL', '120').to_f,
|
21
|
+
# )
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# See notes about +sample_rate+ and +ttl+,
|
25
|
+
# and +fingerprint+ for how exceptions are fingerprinted for uniqueness.
|
26
|
+
#
|
27
|
+
# Rate limiting is done in-memory so is unique across the entire process-
|
28
|
+
# threads/workers share rate limiting, but multiple processes do not.
|
29
|
+
# So if 2 processes have 10 threads each,
|
30
|
+
# the error handler would be invoked twice if they all error
|
31
|
+
# for the same reason.
|
32
|
+
#
|
33
|
+
# Thread-based limiting (20 errors in the case above)
|
34
|
+
# or cross-process limiting (1 error in the case above)
|
35
|
+
# can be added in the future.
|
36
|
+
module Amigo
|
37
|
+
class RateLimitedErrorHandler
|
38
|
+
# The error handler that will be called to report the error.
|
39
|
+
attr_reader :wrapped
|
40
|
+
|
41
|
+
# After the first error with a fingerprint is seen,
|
42
|
+
# how many future errors with the same fingerprint should we sample,
|
43
|
+
# until the fingerprint expires +ttl+ after the first error?
|
44
|
+
# Use 1 to called the wrapped handler on all errors with the same fingerprint,
|
45
|
+
# and 0 to never call the wrapped handler on those errors until ttl has elapsed.
|
46
|
+
attr_reader :sample_rate
|
47
|
+
|
48
|
+
# How long does the fingerprint live for an error?
|
49
|
+
# For example, with a sample rate of 0 and a ttl of 2 minutes,
|
50
|
+
# the rate will be at most one of the same error every 2 minutes;
|
51
|
+
# the error is always sent when the key is set; then no events are sent until the key expires.
|
52
|
+
#
|
53
|
+
# Note that, unlike Redis TTL, the ttl is set only when the error is first seen
|
54
|
+
# (and then after it's seen once the fingerprint expires);
|
55
|
+
# this means that, if an error is seen once a minute, with a TTL of 2 minutes,
|
56
|
+
# even with a sample rate of 0, an error is recorded every 2 minutes,
|
57
|
+
# rather than just once and never again.
|
58
|
+
attr_reader :ttl
|
59
|
+
|
60
|
+
def initialize(wrapped, sample_rate: 0.1, ttl: 120)
|
61
|
+
@mutex = Mutex.new
|
62
|
+
@wrapped = wrapped
|
63
|
+
@sample_rate = sample_rate
|
64
|
+
@inverse_sample_rate = 1 - @sample_rate
|
65
|
+
@ttl = ttl
|
66
|
+
# Key is fingerprint, value is when to expire
|
67
|
+
@store = {}
|
68
|
+
# Add some fast-paths to handle 0 and 1 sample rates.
|
69
|
+
@call = if sample_rate == 1
|
70
|
+
->(*a) { @wrapped.call(*a) }
|
71
|
+
elsif sample_rate.zero?
|
72
|
+
self.method(:call_zero)
|
73
|
+
else
|
74
|
+
self.method(:call_sampled)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def call(ex, context)
|
79
|
+
@call[ex, context]
|
80
|
+
end
|
81
|
+
|
82
|
+
private def call_zero(ex, context)
|
83
|
+
call_impl(ex, context) { false }
|
84
|
+
end
|
85
|
+
|
86
|
+
private def call_sampled(ex, context)
|
87
|
+
call_impl(ex, context) { rand <= @sample_rate }
|
88
|
+
end
|
89
|
+
|
90
|
+
private def call_impl(ex, context)
|
91
|
+
now = Time.now
|
92
|
+
invoke = @mutex.synchronize do
|
93
|
+
@store.delete_if { |_sig, t| t < now }
|
94
|
+
fingerprint = self.fingerprint(ex)
|
95
|
+
if @store.key?(fingerprint)
|
96
|
+
yield
|
97
|
+
else
|
98
|
+
@store[fingerprint] = now + @ttl
|
99
|
+
true
|
100
|
+
end
|
101
|
+
end
|
102
|
+
@wrapped.call(ex, context) if invoke
|
103
|
+
end
|
104
|
+
|
105
|
+
# Fingerprint an exception.
|
106
|
+
# - No two exceptions with the same class can be the same.
|
107
|
+
# - If an exception has no backtrace (it was manually constructed),
|
108
|
+
# the identity of the exception instance (object_id) is the fingerprint.
|
109
|
+
# - If an exception has a backtrace,
|
110
|
+
# the md5 of the backtrace is the fingerprint.
|
111
|
+
def fingerprint(ex)
|
112
|
+
md5 = Digest::MD5.new
|
113
|
+
md5.update ex.class.to_s
|
114
|
+
if ex.backtrace.nil?
|
115
|
+
md5.update ex.object_id.to_s
|
116
|
+
else
|
117
|
+
ex.backtrace.each { |line| md5.update(line) }
|
118
|
+
end
|
119
|
+
md5.update(self.fingerprint(ex.cause)) if ex.cause
|
120
|
+
return md5.hexdigest
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/amigo/retry.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
|
5
|
+
# Middleware so Sidekiq workers can use a custom retry logic.
|
6
|
+
# See +Amigo::Retry::Retry+, +Amigo::Retry::Die+,
|
7
|
+
# and +Amigo::Retry::OrDie+ for more details
|
8
|
+
# on how these should be used.
|
9
|
+
#
|
10
|
+
# NOTE: You MUST register +Amigo::Retry::ServerMiddleware+,
|
11
|
+
# and you SHOULD increase the size of the dead set if you are relying on 'die' behavior:
|
12
|
+
#
|
13
|
+
# Sidekiq.configure_server do |config|
|
14
|
+
# config.options[:dead_max_jobs] = 999_999_999
|
15
|
+
# config.server_middleware.add(Amigo::Retry::ServerMiddleware)
|
16
|
+
# end
|
17
|
+
module Amigo
|
18
|
+
module Retry
|
19
|
+
class Error < StandardError; end
|
20
|
+
|
21
|
+
# Raise this class, or a subclass of it, to schedule a later retry,
|
22
|
+
# rather than using an error to trigger Sidekiq's default retry behavior.
|
23
|
+
# The benefit here is that it allows a consistent, customizable behavior,
|
24
|
+
# so is better for 'expected' errors like rate limiting.
|
25
|
+
class Retry < Error
|
26
|
+
attr_accessor :interval_or_timestamp
|
27
|
+
|
28
|
+
def initialize(interval_or_timestamp, msg=nil)
|
29
|
+
@interval_or_timestamp = interval_or_timestamp
|
30
|
+
super(msg || "retry job in #{interval_or_timestamp}")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Raise this class, or a subclass of it, to send the job to the DeadSet,
|
35
|
+
# rather than going through Sidekiq's retry mechanisms.
|
36
|
+
# This allows jobs to hard-fail when there is something like a total outage,
|
37
|
+
# rather than retrying.
|
38
|
+
class Die < Error
|
39
|
+
end
|
40
|
+
|
41
|
+
# Raise this class, or a subclass of it, to:
|
42
|
+
# - Use +Retry+ exception semantics while the current attempt is <= +attempts+, or
|
43
|
+
# - Use +Die+ exception semantics if the current attempt is > +attempts+.
|
44
|
+
class OrDie < Error
|
45
|
+
attr_reader :attempts, :interval_or_timestamp
|
46
|
+
|
47
|
+
def initialize(attempts, interval_or_timestamp, msg=nil)
|
48
|
+
@attempts = attempts
|
49
|
+
@interval_or_timestamp = interval_or_timestamp
|
50
|
+
super(msg || "retry every #{interval_or_timestamp} up to #{attempts} times")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class ServerMiddleware
|
55
|
+
def call(worker, job, _queue)
|
56
|
+
yield
|
57
|
+
rescue Amigo::Retry::Retry => e
|
58
|
+
handle_retry(worker, job, e)
|
59
|
+
rescue Amigo::Retry::Die => e
|
60
|
+
handle_die(worker, job, e)
|
61
|
+
rescue Amigo::Retry::OrDie => e
|
62
|
+
handle_retry_or_die(worker, job, e)
|
63
|
+
end
|
64
|
+
|
65
|
+
def handle_retry(worker, job, e)
|
66
|
+
Sidekiq.logger.info("scheduling_retry")
|
67
|
+
self.amigo_retry_in(worker.class, job, e.interval_or_timestamp)
|
68
|
+
end
|
69
|
+
|
70
|
+
def handle_die(_worker, job, _e)
|
71
|
+
Sidekiq.logger.warn("sending_to_deadset")
|
72
|
+
payload = Sidekiq.dump_json(job)
|
73
|
+
Sidekiq::DeadSet.new.kill(payload, notify_failure: false)
|
74
|
+
end
|
75
|
+
|
76
|
+
def handle_retry_or_die(worker, job, e)
|
77
|
+
retry_count = job.fetch("retry_count", 0)
|
78
|
+
if retry_count <= e.attempts
|
79
|
+
handle_retry(worker, job, e)
|
80
|
+
else
|
81
|
+
handle_die(worker, job, e)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def amigo_retry_in(worker_class, item, interval)
|
86
|
+
# pulled from perform_in
|
87
|
+
int = interval.to_f
|
88
|
+
now = Time.now.to_f
|
89
|
+
ts = (int < 1_000_000_000 ? now + int : int)
|
90
|
+
item["at"] = ts if ts > now
|
91
|
+
item["retry_count"] = item.fetch("retry_count", 0) + 1
|
92
|
+
worker_class.client_push(item)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/lib/amigo/router.rb
CHANGED
data/lib/amigo/scheduled_job.rb
CHANGED
@@ -5,7 +5,7 @@ require "sidekiq-cron"
|
|
5
5
|
|
6
6
|
require "amigo"
|
7
7
|
|
8
|
-
|
8
|
+
module Amigo
|
9
9
|
module ScheduledJob
|
10
10
|
def self.extended(cls)
|
11
11
|
cls.include(Sidekiq::Worker)
|
@@ -13,6 +13,7 @@ class Amigo
|
|
13
13
|
cls.extend(ClassMethods)
|
14
14
|
cls.splay_duration = 30
|
15
15
|
cls.include(InstanceMethods)
|
16
|
+
Amigo.register_job(cls)
|
16
17
|
end
|
17
18
|
|
18
19
|
module InstanceMethods
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
|
5
|
+
module Amigo
|
6
|
+
# This is a placeholder until it's migrated to Amigo proper
|
7
|
+
end
|
8
|
+
|
9
|
+
# Semaphore backoff jobs can reschedule themselves to happen at a later time
|
10
|
+
# if there is too high a contention on a semaphore.
|
11
|
+
# Ie, if there are too many jobs with the same key,
|
12
|
+
# they start to reschedule themselves for the future.
|
13
|
+
#
|
14
|
+
# This is useful when a certain job (or job for a certain target)
|
15
|
+
# can be slow so should not consume all available resources.
|
16
|
+
#
|
17
|
+
# In general, you should not use semaphore backoff jobs for singletons,
|
18
|
+
# as the guarantees are not strong enough.
|
19
|
+
# It is useful for many rapid jobs.
|
20
|
+
#
|
21
|
+
# Implementers must override the following methods:
|
22
|
+
#
|
23
|
+
# - `semaphore_key` must return the Redis key to use as the semaphore.
|
24
|
+
# This may be something like "sbj-user-1" to limit jobs for user 1.
|
25
|
+
# - `semaphore_size` is the number of concurrent jobs.
|
26
|
+
# Returning 5 would mean at most 5 jobs could be running at a time.
|
27
|
+
#
|
28
|
+
# And may override the following methods:
|
29
|
+
#
|
30
|
+
# - `semaphore_backoff` is called to know when to schedule the backoff retry.
|
31
|
+
# By default, it is 10 seconds, plus between 0 and 10 seconds more,
|
32
|
+
# so the job will be retried in between 10 and 20 seconds.
|
33
|
+
# Return whatever you want for the backoff.
|
34
|
+
# - `semaphore_expiry` should return the TTL of the semaphore key.
|
35
|
+
# Defaults to 30 seconds. See below for key expiry and negative semaphore value details.
|
36
|
+
# - `before_perform` is called before calling the `perform` method.
|
37
|
+
# This is required so that implementers can set worker state, based on job arguments,
|
38
|
+
# that can be used for calculating the semaphore key.
|
39
|
+
#
|
40
|
+
# Note that we give the semaphore key an expiry. This is to avoid situation where
|
41
|
+
# jobs are killed, the decrement is not done, and the counter increases to the point we
|
42
|
+
# have fewer than the expected number of jobs running.
|
43
|
+
#
|
44
|
+
# This does mean that, when a job runs longer than the semaphore expiry,
|
45
|
+
# another worker can be started, which would increment the counter back to 1.
|
46
|
+
# When the original job ends, the counter would be 0; then when the new job ends,
|
47
|
+
# the counter would be -1. To avoid negative counters (which create the same issue
|
48
|
+
# around missing decrements), if we ever detect a negative 'jobs running',
|
49
|
+
# we warn and remove the key entirely.
|
50
|
+
#
|
51
|
+
module Amigo
|
52
|
+
module SemaphoreBackoffJob
|
53
|
+
def self.included(cls)
|
54
|
+
cls.include InstanceMethods
|
55
|
+
cls.prepend PrependedMethods
|
56
|
+
end
|
57
|
+
|
58
|
+
class << self
|
59
|
+
# Reset class state. Mostly used just for testing.
|
60
|
+
def reset
|
61
|
+
is_testing = defined?(::Sidekiq::Testing) && ::Sidekiq::Testing.enabled?
|
62
|
+
@enabled = !is_testing
|
63
|
+
end
|
64
|
+
|
65
|
+
# Return true if backoff checks are enabled.
|
66
|
+
attr_accessor :enabled
|
67
|
+
|
68
|
+
def enabled?
|
69
|
+
return @enabled
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
self.reset
|
74
|
+
|
75
|
+
module InstanceMethods
|
76
|
+
def semaphore_key
|
77
|
+
raise NotImplementedError, "must be implemented on worker"
|
78
|
+
end
|
79
|
+
|
80
|
+
def semaphore_size
|
81
|
+
raise NotImplementedError, "must be implemented on worker"
|
82
|
+
end
|
83
|
+
|
84
|
+
def semaphore_backoff
|
85
|
+
return 10 + (rand * 10)
|
86
|
+
end
|
87
|
+
|
88
|
+
def semaphore_expiry
|
89
|
+
return 30
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
module PrependedMethods
|
94
|
+
def perform(*args)
|
95
|
+
self.before_perform(*args) if self.respond_to?(:before_perform)
|
96
|
+
return super unless ::Amigo::SemaphoreBackoffJob.enabled?
|
97
|
+
key = self.semaphore_key
|
98
|
+
size = self.semaphore_size
|
99
|
+
# Create a simple counter for the semaphore key.
|
100
|
+
# Always increment; also set an expiration if this is the first job.
|
101
|
+
# If we need to retry later, make sure we decrement, then schedule for the future.
|
102
|
+
# If we run it now, decrement the counter afterwards.
|
103
|
+
# If some corruption results in a negative number of jobs in the semaphore,
|
104
|
+
# we can delete the key and get back to a default state
|
105
|
+
# (this can cause problems but the idea is that
|
106
|
+
# we should run at least the configured number of jobs,
|
107
|
+
# and eventually the semaphore key will expire/get rebalanced).
|
108
|
+
jobs_in_semaphore = Sidekiq.redis do |conn|
|
109
|
+
cnt = conn.incr(key)
|
110
|
+
conn.expire(key, self.semaphore_expiry) if cnt == 1
|
111
|
+
cnt
|
112
|
+
end
|
113
|
+
if jobs_in_semaphore > size
|
114
|
+
Sidekiq.redis { |conn| conn.decr(key) }
|
115
|
+
backoff = self.semaphore_backoff
|
116
|
+
self.class.perform_in(backoff, *args)
|
117
|
+
return
|
118
|
+
end
|
119
|
+
begin
|
120
|
+
super
|
121
|
+
ensure
|
122
|
+
Sidekiq.redis do |conn|
|
123
|
+
new_job_count = conn.decr(key)
|
124
|
+
if new_job_count.negative?
|
125
|
+
conn.del(key)
|
126
|
+
Sidekiq.logger.warn("negative_semaphore_backoff_job_count")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
data/lib/amigo/spec_helpers.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "amigo"
|
4
|
+
require "sidekiq/worker"
|
4
5
|
|
5
|
-
|
6
|
+
module Amigo
|
6
7
|
module SpecHelpers
|
7
8
|
def self.included(context)
|
8
9
|
context.before(:each) do |example|
|
@@ -121,7 +122,7 @@ class Amigo
|
|
121
122
|
|
122
123
|
@missing.each do |event, payload|
|
123
124
|
message = "expected a '%s' event to be fired" % [event]
|
124
|
-
message << " with a payload of %p" % [payload] unless payload.nil?
|
125
|
+
message << (" with a payload of %p" % [payload]) unless payload.nil?
|
125
126
|
message << " but none was."
|
126
127
|
|
127
128
|
messages << message
|
@@ -131,7 +132,7 @@ class Amigo
|
|
131
132
|
messages << "No events were sent."
|
132
133
|
else
|
133
134
|
parts = @recorded_events.map(&:inspect)
|
134
|
-
messages << "The following events were recorded: %s" % [parts.join(", ")]
|
135
|
+
messages << ("The following events were recorded: %s" % [parts.join(", ")])
|
135
136
|
end
|
136
137
|
|
137
138
|
return messages.join("\n")
|
@@ -141,7 +142,7 @@ class Amigo
|
|
141
142
|
messages = []
|
142
143
|
@matched.each do |event, _payload|
|
143
144
|
message = "expected a '%s' event not to be fired" % [event]
|
144
|
-
message << " with a payload of %p" % [@expected_payload] if @expected_payload
|
145
|
+
message << (" with a payload of %p" % [@expected_payload]) if @expected_payload
|
145
146
|
message << " but one was."
|
146
147
|
messages << message
|
147
148
|
end
|
@@ -229,5 +230,73 @@ class Amigo
|
|
229
230
|
def perform_async_job(job)
|
230
231
|
return PerformAsyncJobMatcher.new(job)
|
231
232
|
end
|
233
|
+
|
234
|
+
# Like a Sidekiq worker's perform_inline,
|
235
|
+
# but allows an arbitrary item to be used, rather than just the
|
236
|
+
# given class and args. For example, when testing,
|
237
|
+
# you may need to assume something like 'retry_count' is in the job payload,
|
238
|
+
# but that can't be included with perform_inline.
|
239
|
+
# This allows those arbitrary job payload fields
|
240
|
+
# to be included when the job is run.
|
241
|
+
module_function def sidekiq_perform_inline(klass, args, item=nil)
|
242
|
+
Sidekiq::Worker::Setter.override_item = item
|
243
|
+
begin
|
244
|
+
klass.perform_inline(*args)
|
245
|
+
ensure
|
246
|
+
Sidekiq::Worker::Setter.override_item = nil
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
module_function def drain_sidekiq_jobs(q)
|
251
|
+
all_sidekiq_jobs(q).each do |job|
|
252
|
+
klass = job.item.fetch("class")
|
253
|
+
klass = Sidekiq::Testing.constantize(klass) if klass.is_a?(String)
|
254
|
+
sidekiq_perform_inline(klass, job.item["args"], job.item)
|
255
|
+
job.delete
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
module_function def all_sidekiq_jobs(q)
|
260
|
+
arr = []
|
261
|
+
q.each { |j| arr << j }
|
262
|
+
return arr
|
263
|
+
end
|
264
|
+
|
265
|
+
# Use this middleware to pass an arbitrary callback evaluated before a job runs.
|
266
|
+
# Make sure to call +reset+ after the test.
|
267
|
+
class ServerCallbackMiddleware
|
268
|
+
class << self
|
269
|
+
attr_accessor :callback
|
270
|
+
end
|
271
|
+
|
272
|
+
def self.reset
|
273
|
+
self.callback = nil
|
274
|
+
return self
|
275
|
+
end
|
276
|
+
|
277
|
+
def self.new
|
278
|
+
return self
|
279
|
+
end
|
280
|
+
|
281
|
+
def self.call(worker, job, queue)
|
282
|
+
self.callback[worker, job, queue] if self.callback
|
283
|
+
yield
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
module ::Sidekiq
|
290
|
+
module Worker
|
291
|
+
class Setter
|
292
|
+
class << self
|
293
|
+
attr_accessor :override_item
|
294
|
+
end
|
295
|
+
def normalize_item(item)
|
296
|
+
result = super
|
297
|
+
result.merge!(self.class.override_item || {})
|
298
|
+
return result
|
299
|
+
end
|
300
|
+
end
|
232
301
|
end
|
233
302
|
end
|
data/lib/amigo/version.rb
CHANGED
data/lib/amigo.rb
CHANGED
@@ -97,7 +97,7 @@ require "sidekiq-cron"
|
|
97
97
|
# Splay exists to avoid a "thundering herd" issue.
|
98
98
|
# Splay defaults to 30s; you may wish to always provide splay, whatever you think for your job.
|
99
99
|
#
|
100
|
-
|
100
|
+
module Amigo
|
101
101
|
class Error < StandardError; end
|
102
102
|
|
103
103
|
class StartSchedulerFailed < Error; end
|
@@ -199,6 +199,7 @@ class Amigo
|
|
199
199
|
|
200
200
|
def register_job(job)
|
201
201
|
self.registered_jobs << job
|
202
|
+
self.registered_jobs.uniq!
|
202
203
|
end
|
203
204
|
|
204
205
|
# Start the scheduler.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sidekiq-amigo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lithic Technology
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sidekiq
|
@@ -135,14 +135,19 @@ files:
|
|
135
135
|
- lib/amigo/audit_logger.rb
|
136
136
|
- lib/amigo/deprecated_jobs.rb
|
137
137
|
- lib/amigo/job.rb
|
138
|
+
- lib/amigo/queue_backoff_job.rb
|
139
|
+
- lib/amigo/rate_limited_error_handler.rb
|
140
|
+
- lib/amigo/retry.rb
|
138
141
|
- lib/amigo/router.rb
|
139
142
|
- lib/amigo/scheduled_job.rb
|
143
|
+
- lib/amigo/semaphore_backoff_job.rb
|
140
144
|
- lib/amigo/spec_helpers.rb
|
141
145
|
- lib/amigo/version.rb
|
142
146
|
homepage: https://github.com/lithictech/sidekiq-amigo
|
143
147
|
licenses:
|
144
148
|
- MIT
|
145
|
-
metadata:
|
149
|
+
metadata:
|
150
|
+
rubygems_mfa_required: 'true'
|
146
151
|
post_install_message:
|
147
152
|
rdoc_options: []
|
148
153
|
require_paths:
|