sidekiq-amigo 1.1.2 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/amigo/audit_logger.rb +1 -1
- data/lib/amigo/deprecated_jobs.rb +1 -1
- data/lib/amigo/job.rb +2 -1
- data/lib/amigo/queue_backoff_job.rb +151 -0
- data/lib/amigo/rate_limited_error_handler.rb +123 -0
- data/lib/amigo/retry.rb +96 -0
- data/lib/amigo/router.rb +1 -1
- data/lib/amigo/scheduled_job.rb +2 -1
- data/lib/amigo/semaphore_backoff_job.rb +133 -0
- data/lib/amigo/spec_helpers.rb +73 -4
- data/lib/amigo/version.rb +2 -2
- data/lib/amigo.rb +17 -9
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1dc6d4ac6f9e7a89afecc711780ceeeb3ab1204435a19be2683623256b2abc49
|
4
|
+
data.tar.gz: 725dc4719b949d5f93bd84125ed6d2bea713be872f7e8413e6509d90c9090544
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1a5e2c7beab44fecabb7d7b7f9f610ce1138029eb247b46c04d90399550d976990f6db36717c73f225c195318ea303b1607984878167dfac4087024e809c735
|
7
|
+
data.tar.gz: 807e4c88665fd4ac7a6a4004e654de2a697b4ebd7c90546be7f965955ecd84690f2e0f5eebe913d31e2673f32f7bbd560e05695f79154284f3d7bd8a6ee77eb9
|
data/lib/amigo/audit_logger.rb
CHANGED
@@ -10,7 +10,7 @@ require "amigo/job"
|
|
10
10
|
# So, make the class exist, but noop so it won't be scheduled and won't be retried.
|
11
11
|
# Then it can be deleted later.
|
12
12
|
#
|
13
|
-
|
13
|
+
module Amigo
|
14
14
|
module DeprecatedJobs
|
15
15
|
def self.install(const_base, *names)
|
16
16
|
cls = self.noop_class
|
data/lib/amigo/job.rb
CHANGED
@@ -4,13 +4,14 @@ require "sidekiq"
|
|
4
4
|
|
5
5
|
require "amigo"
|
6
6
|
|
7
|
-
|
7
|
+
module Amigo
|
8
8
|
module Job
|
9
9
|
def self.extended(cls)
|
10
10
|
cls.include(Sidekiq::Worker)
|
11
11
|
cls.extend(ClassMethods)
|
12
12
|
cls.pattern = ""
|
13
13
|
cls.include(InstanceMethods)
|
14
|
+
Amigo.register_job(cls)
|
14
15
|
end
|
15
16
|
|
16
17
|
module InstanceMethods
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
require "sidekiq/api"
|
5
|
+
|
6
|
+
# Queue backoff jobs are used for jobs that should not saturate workers,
|
7
|
+
# such that jobs on dependent queues end up not running for a while.
|
8
|
+
#
|
9
|
+
# For example, imagine a queue dedicated to long-running jobs ('slow'),
|
10
|
+
# a queue of critical, short-running tasks ('critical'), and 10 worker threads.
|
11
|
+
# Imagine 20 'slow' jobs enter that queue, then 2 'critical' jobs.
|
12
|
+
#
|
13
|
+
# The 10 worker threads start processing the 'slow' queue,
|
14
|
+
# and one completes. When that worker thread goes to find its next job,
|
15
|
+
# it pulls a job off the 'slow' queue (even with Sidekiq queue priorities,
|
16
|
+
# lopsided queue sizes mean it's likely we'll ge ta 'slow' job).
|
17
|
+
#
|
18
|
+
# When this job starts, it checks the 'critical' queue,
|
19
|
+
# which is specified as a *dependent* queue of this job.
|
20
|
+
# If it sees the 'critical' queue has latency,
|
21
|
+
# the job reschedules itself in the future and then processes the next job.
|
22
|
+
#
|
23
|
+
# This keeps happening until the worker thread finds a job from
|
24
|
+
# the 'critical' queue and processes it successfully.
|
25
|
+
#
|
26
|
+
# Implementers can override two methods:
|
27
|
+
#
|
28
|
+
# - `dependent_queues` should return an array of the names of queues that should be checked,
|
29
|
+
# in order of higher-priority-first. See below for Redis performance notes.
|
30
|
+
# - `calculate_backoff` is passed a queue name and its latency,
|
31
|
+
# and should return either:
|
32
|
+
# - the backoff duration in seconds (ie the argument to `perform_in`),
|
33
|
+
# - 0 to perform the job immediately, or
|
34
|
+
# - nil to check the next queue with latency.
|
35
|
+
# - Note that if all calls to `calculate_backoff` return nil, the job is performed immediately.
|
36
|
+
#
|
37
|
+
# BackoffJob supports multiple dependent queues but it checks them one-at-a-time
|
38
|
+
# to avoid any unnecessary calls to Redis.
|
39
|
+
#
|
40
|
+
# == Redis Impacts
|
41
|
+
#
|
42
|
+
# Using BackoffJob adds an overhead to each perform of a job-
|
43
|
+
# specifically, a call to `Redis.lrange` through the Sidekiq API's Sidekiq:::Queue#latency
|
44
|
+
# potentially for each queue in `dependent_queues`.
|
45
|
+
# This is a fast call (it just gets the last item), but it's not free,
|
46
|
+
# so users should be aware of it.
|
47
|
+
#
|
48
|
+
module Amigo
|
49
|
+
module QueueBackoffJob
|
50
|
+
def self.included(cls)
|
51
|
+
cls.include InstanceMethods
|
52
|
+
cls.prepend PrependedMethods
|
53
|
+
end
|
54
|
+
|
55
|
+
class << self
|
56
|
+
# Reset class state. Mostly used just for testing.
|
57
|
+
def reset
|
58
|
+
@max_backoff = 10
|
59
|
+
is_testing = defined?(::Sidekiq::Testing) && ::Sidekiq::Testing.enabled?
|
60
|
+
@enabled = !is_testing
|
61
|
+
@cache_queue_names = true
|
62
|
+
@cache_latencies = true
|
63
|
+
@all_queue_names = nil
|
64
|
+
@latency_cache_duration = 5
|
65
|
+
@latency_cache = {}
|
66
|
+
end
|
67
|
+
|
68
|
+
# Maximum time into the future a job will reschedule itself for.
|
69
|
+
# Ie, if latency is 30s, and max_backoff is 10, the job will be scheduled
|
70
|
+
# for 10s into the future if it finds backoff pressure.
|
71
|
+
attr_accessor :max_backoff
|
72
|
+
|
73
|
+
# Return true if backoff checks are enabled.
|
74
|
+
attr_accessor :enabled
|
75
|
+
|
76
|
+
def enabled?
|
77
|
+
return @enabled
|
78
|
+
end
|
79
|
+
|
80
|
+
# Cached value of all Sidekiq queues, since they rarely change.
|
81
|
+
# If your queue names change at runtime, set +cache_queue_names+ to false.
|
82
|
+
def all_queue_names
|
83
|
+
return @all_queue_names if @cache_queue_names && @all_queue_names
|
84
|
+
@all_queue_names = ::Sidekiq::Queue.all.map(&:name)
|
85
|
+
return @all_queue_names
|
86
|
+
end
|
87
|
+
|
88
|
+
# Whether all_queue_names should be cached.
|
89
|
+
attr_reader :cache_queue_names
|
90
|
+
|
91
|
+
def cache_queue_names=(v)
|
92
|
+
@cache_queue_names = v
|
93
|
+
@all_queue_names = nil if v == false
|
94
|
+
end
|
95
|
+
|
96
|
+
# Return how long queue latencies should be cached before they are re-fetched from Redis.
|
97
|
+
# Avoids hitting Redis to check latency too often.
|
98
|
+
# Default to 5 seconds. Set to 0 to avoid caching.
|
99
|
+
attr_accessor :latency_cache_duration
|
100
|
+
|
101
|
+
# Check the latency of the queue with the given now.
|
102
|
+
# If the queue has been checked more recently than latency_cache_duration specified,
|
103
|
+
# return the cached value.
|
104
|
+
def check_latency(qname, now: Time.now)
|
105
|
+
return ::Sidekiq::Queue.new(qname).latency if self.latency_cache_duration.zero?
|
106
|
+
cached = @latency_cache[qname]
|
107
|
+
if cached.nil? || (cached[:at] + self.latency_cache_duration) < now
|
108
|
+
@latency_cache[qname] = {at: now, value: ::Sidekiq::Queue.new(qname).latency}
|
109
|
+
end
|
110
|
+
return @latency_cache[qname][:value]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
self.reset
|
114
|
+
|
115
|
+
module InstanceMethods
|
116
|
+
def dependent_queues
|
117
|
+
qname = self.class.get_sidekiq_options["queue"]
|
118
|
+
return ::Amigo::QueueBackoffJob.all_queue_names.reject { |x| x == qname }
|
119
|
+
end
|
120
|
+
|
121
|
+
def calculate_backoff(_queue_name, latency, _args)
|
122
|
+
return [latency, ::Amigo::QueueBackoffJob.max_backoff].min
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
module PrependedMethods
|
127
|
+
def perform(*args)
|
128
|
+
return super unless ::Amigo::QueueBackoffJob.enabled?
|
129
|
+
# rubocop:disable Style/GuardClause, Lint/NonLocalExitFromIterator
|
130
|
+
dependent_queues.each do |qname|
|
131
|
+
latency = Amigo::QueueBackoffJob.check_latency(qname)
|
132
|
+
# If latency is <= 0, we can skip this queue.
|
133
|
+
next unless latency.positive?
|
134
|
+
# If backoff is nil, ignore this queue and check the next
|
135
|
+
# If it's > 0, defer until the future
|
136
|
+
# If it's <= 0, run the job and check no more queues
|
137
|
+
backoff = calculate_backoff(qname, latency, args)
|
138
|
+
next if backoff.nil?
|
139
|
+
if backoff.positive?
|
140
|
+
self.class.perform_in(backoff, *args)
|
141
|
+
return
|
142
|
+
else
|
143
|
+
return super
|
144
|
+
end
|
145
|
+
end
|
146
|
+
# rubocop:enable Style/GuardClause, Lint/NonLocalExitFromIterator
|
147
|
+
super
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "digest"
|
4
|
+
|
5
|
+
# Wrap another Sidekiq error handler so invoking it is rate limited.
|
6
|
+
#
|
7
|
+
# Useful when wrapping a usage-based error reporter like Sentry,
|
8
|
+
# which can be hammered in the case of an issue like connectivity
|
9
|
+
# that causes all jobs and retries to fail.
|
10
|
+
# It is suggested that all errors are still reported to something
|
11
|
+
# like application logs, since entirely silencing errors
|
12
|
+
# can make debugging problems tricky.
|
13
|
+
#
|
14
|
+
# Usage:
|
15
|
+
#
|
16
|
+
# Sidekiq.configure_server do |config|
|
17
|
+
# config.error_handlers << Amigo::RateLimitedErrorHandler.new(
|
18
|
+
# Sentry::Sidekiq::ErrorHandler.new,
|
19
|
+
# sample_rate: ENV.fetch('ASYNC_ERROR_RATE_LIMITER_SAMPLE_RATE', '0.5').to_f,
|
20
|
+
# ttl: ENV.fetch('ASYNC_ERROR_RATE_LIMITER_TTL', '120').to_f,
|
21
|
+
# )
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# See notes about +sample_rate+ and +ttl+,
|
25
|
+
# and +fingerprint+ for how exceptions are fingerprinted for uniqueness.
|
26
|
+
#
|
27
|
+
# Rate limiting is done in-memory so is unique across the entire process-
|
28
|
+
# threads/workers share rate limiting, but multiple processes do not.
|
29
|
+
# So if 2 processes have 10 threads each,
|
30
|
+
# the error handler would be invoked twice if they all error
|
31
|
+
# for the same reason.
|
32
|
+
#
|
33
|
+
# Thread-based limiting (20 errors in the case above)
|
34
|
+
# or cross-process limiting (1 error in the case above)
|
35
|
+
# can be added in the future.
|
36
|
+
module Amigo
|
37
|
+
class RateLimitedErrorHandler
|
38
|
+
# The error handler that will be called to report the error.
|
39
|
+
attr_reader :wrapped
|
40
|
+
|
41
|
+
# After the first error with a fingerprint is seen,
|
42
|
+
# how many future errors with the same fingerprint should we sample,
|
43
|
+
# until the fingerprint expires +ttl+ after the first error?
|
44
|
+
# Use 1 to called the wrapped handler on all errors with the same fingerprint,
|
45
|
+
# and 0 to never call the wrapped handler on those errors until ttl has elapsed.
|
46
|
+
attr_reader :sample_rate
|
47
|
+
|
48
|
+
# How long does the fingerprint live for an error?
|
49
|
+
# For example, with a sample rate of 0 and a ttl of 2 minutes,
|
50
|
+
# the rate will be at most one of the same error every 2 minutes;
|
51
|
+
# the error is always sent when the key is set; then no events are sent until the key expires.
|
52
|
+
#
|
53
|
+
# Note that, unlike Redis TTL, the ttl is set only when the error is first seen
|
54
|
+
# (and then after it's seen once the fingerprint expires);
|
55
|
+
# this means that, if an error is seen once a minute, with a TTL of 2 minutes,
|
56
|
+
# even with a sample rate of 0, an error is recorded every 2 minutes,
|
57
|
+
# rather than just once and never again.
|
58
|
+
attr_reader :ttl
|
59
|
+
|
60
|
+
def initialize(wrapped, sample_rate: 0.1, ttl: 120)
|
61
|
+
@mutex = Mutex.new
|
62
|
+
@wrapped = wrapped
|
63
|
+
@sample_rate = sample_rate
|
64
|
+
@inverse_sample_rate = 1 - @sample_rate
|
65
|
+
@ttl = ttl
|
66
|
+
# Key is fingerprint, value is when to expire
|
67
|
+
@store = {}
|
68
|
+
# Add some fast-paths to handle 0 and 1 sample rates.
|
69
|
+
@call = if sample_rate == 1
|
70
|
+
->(*a) { @wrapped.call(*a) }
|
71
|
+
elsif sample_rate.zero?
|
72
|
+
self.method(:call_zero)
|
73
|
+
else
|
74
|
+
self.method(:call_sampled)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def call(ex, context)
|
79
|
+
@call[ex, context]
|
80
|
+
end
|
81
|
+
|
82
|
+
private def call_zero(ex, context)
|
83
|
+
call_impl(ex, context) { false }
|
84
|
+
end
|
85
|
+
|
86
|
+
private def call_sampled(ex, context)
|
87
|
+
call_impl(ex, context) { rand <= @sample_rate }
|
88
|
+
end
|
89
|
+
|
90
|
+
private def call_impl(ex, context)
|
91
|
+
now = Time.now
|
92
|
+
invoke = @mutex.synchronize do
|
93
|
+
@store.delete_if { |_sig, t| t < now }
|
94
|
+
fingerprint = self.fingerprint(ex)
|
95
|
+
if @store.key?(fingerprint)
|
96
|
+
yield
|
97
|
+
else
|
98
|
+
@store[fingerprint] = now + @ttl
|
99
|
+
true
|
100
|
+
end
|
101
|
+
end
|
102
|
+
@wrapped.call(ex, context) if invoke
|
103
|
+
end
|
104
|
+
|
105
|
+
# Fingerprint an exception.
|
106
|
+
# - No two exceptions with the same class can be the same.
|
107
|
+
# - If an exception has no backtrace (it was manually constructed),
|
108
|
+
# the identity of the exception instance (object_id) is the fingerprint.
|
109
|
+
# - If an exception has a backtrace,
|
110
|
+
# the md5 of the backtrace is the fingerprint.
|
111
|
+
def fingerprint(ex)
|
112
|
+
md5 = Digest::MD5.new
|
113
|
+
md5.update ex.class.to_s
|
114
|
+
if ex.backtrace.nil?
|
115
|
+
md5.update ex.object_id.to_s
|
116
|
+
else
|
117
|
+
ex.backtrace.each { |line| md5.update(line) }
|
118
|
+
end
|
119
|
+
md5.update(self.fingerprint(ex.cause)) if ex.cause
|
120
|
+
return md5.hexdigest
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/amigo/retry.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
|
5
|
+
# Middleware so Sidekiq workers can use a custom retry logic.
|
6
|
+
# See +Amigo::Retry::Retry+, +Amigo::Retry::Die+,
|
7
|
+
# and +Amigo::Retry::OrDie+ for more details
|
8
|
+
# on how these should be used.
|
9
|
+
#
|
10
|
+
# NOTE: You MUST register +Amigo::Retry::ServerMiddleware+,
|
11
|
+
# and you SHOULD increase the size of the dead set if you are relying on 'die' behavior:
|
12
|
+
#
|
13
|
+
# Sidekiq.configure_server do |config|
|
14
|
+
# config.options[:dead_max_jobs] = 999_999_999
|
15
|
+
# config.server_middleware.add(Amigo::Retry::ServerMiddleware)
|
16
|
+
# end
|
17
|
+
module Amigo
|
18
|
+
module Retry
|
19
|
+
class Error < StandardError; end
|
20
|
+
|
21
|
+
# Raise this class, or a subclass of it, to schedule a later retry,
|
22
|
+
# rather than using an error to trigger Sidekiq's default retry behavior.
|
23
|
+
# The benefit here is that it allows a consistent, customizable behavior,
|
24
|
+
# so is better for 'expected' errors like rate limiting.
|
25
|
+
class Retry < Error
|
26
|
+
attr_accessor :interval_or_timestamp
|
27
|
+
|
28
|
+
def initialize(interval_or_timestamp, msg=nil)
|
29
|
+
@interval_or_timestamp = interval_or_timestamp
|
30
|
+
super(msg || "retry job in #{interval_or_timestamp}")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Raise this class, or a subclass of it, to send the job to the DeadSet,
|
35
|
+
# rather than going through Sidekiq's retry mechanisms.
|
36
|
+
# This allows jobs to hard-fail when there is something like a total outage,
|
37
|
+
# rather than retrying.
|
38
|
+
class Die < Error
|
39
|
+
end
|
40
|
+
|
41
|
+
# Raise this class, or a subclass of it, to:
|
42
|
+
# - Use +Retry+ exception semantics while the current attempt is <= +attempts+, or
|
43
|
+
# - Use +Die+ exception semantics if the current attempt is > +attempts+.
|
44
|
+
class OrDie < Error
|
45
|
+
attr_reader :attempts, :interval_or_timestamp
|
46
|
+
|
47
|
+
def initialize(attempts, interval_or_timestamp, msg=nil)
|
48
|
+
@attempts = attempts
|
49
|
+
@interval_or_timestamp = interval_or_timestamp
|
50
|
+
super(msg || "retry every #{interval_or_timestamp} up to #{attempts} times")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class ServerMiddleware
|
55
|
+
def call(worker, job, _queue)
|
56
|
+
yield
|
57
|
+
rescue Amigo::Retry::Retry => e
|
58
|
+
handle_retry(worker, job, e)
|
59
|
+
rescue Amigo::Retry::Die => e
|
60
|
+
handle_die(worker, job, e)
|
61
|
+
rescue Amigo::Retry::OrDie => e
|
62
|
+
handle_retry_or_die(worker, job, e)
|
63
|
+
end
|
64
|
+
|
65
|
+
def handle_retry(worker, job, e)
|
66
|
+
Sidekiq.logger.info("scheduling_retry")
|
67
|
+
self.amigo_retry_in(worker.class, job, e.interval_or_timestamp)
|
68
|
+
end
|
69
|
+
|
70
|
+
def handle_die(_worker, job, _e)
|
71
|
+
Sidekiq.logger.warn("sending_to_deadset")
|
72
|
+
payload = Sidekiq.dump_json(job)
|
73
|
+
Sidekiq::DeadSet.new.kill(payload, notify_failure: false)
|
74
|
+
end
|
75
|
+
|
76
|
+
def handle_retry_or_die(worker, job, e)
|
77
|
+
retry_count = job.fetch("retry_count", 0)
|
78
|
+
if retry_count <= e.attempts
|
79
|
+
handle_retry(worker, job, e)
|
80
|
+
else
|
81
|
+
handle_die(worker, job, e)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def amigo_retry_in(worker_class, item, interval)
|
86
|
+
# pulled from perform_in
|
87
|
+
int = interval.to_f
|
88
|
+
now = Time.now.to_f
|
89
|
+
ts = (int < 1_000_000_000 ? now + int : int)
|
90
|
+
item["at"] = ts if ts > now
|
91
|
+
item["retry_count"] = item.fetch("retry_count", 0) + 1
|
92
|
+
worker_class.client_push(item)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/lib/amigo/router.rb
CHANGED
data/lib/amigo/scheduled_job.rb
CHANGED
@@ -5,7 +5,7 @@ require "sidekiq-cron"
|
|
5
5
|
|
6
6
|
require "amigo"
|
7
7
|
|
8
|
-
|
8
|
+
module Amigo
|
9
9
|
module ScheduledJob
|
10
10
|
def self.extended(cls)
|
11
11
|
cls.include(Sidekiq::Worker)
|
@@ -13,6 +13,7 @@ class Amigo
|
|
13
13
|
cls.extend(ClassMethods)
|
14
14
|
cls.splay_duration = 30
|
15
15
|
cls.include(InstanceMethods)
|
16
|
+
Amigo.register_job(cls)
|
16
17
|
end
|
17
18
|
|
18
19
|
module InstanceMethods
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
|
5
|
+
module Amigo
|
6
|
+
# This is a placeholder until it's migrated to Amigo proper
|
7
|
+
end
|
8
|
+
|
9
|
+
# Semaphore backoff jobs can reschedule themselves to happen at a later time
|
10
|
+
# if there is too high a contention on a semaphore.
|
11
|
+
# Ie, if there are too many jobs with the same key,
|
12
|
+
# they start to reschedule themselves for the future.
|
13
|
+
#
|
14
|
+
# This is useful when a certain job (or job for a certain target)
|
15
|
+
# can be slow so should not consume all available resources.
|
16
|
+
#
|
17
|
+
# In general, you should not use semaphore backoff jobs for singletons,
|
18
|
+
# as the guarantees are not strong enough.
|
19
|
+
# It is useful for many rapid jobs.
|
20
|
+
#
|
21
|
+
# Implementers must override the following methods:
|
22
|
+
#
|
23
|
+
# - `semaphore_key` must return the Redis key to use as the semaphore.
|
24
|
+
# This may be something like "sbj-user-1" to limit jobs for user 1.
|
25
|
+
# - `semaphore_size` is the number of concurrent jobs.
|
26
|
+
# Returning 5 would mean at most 5 jobs could be running at a time.
|
27
|
+
#
|
28
|
+
# And may override the following methods:
|
29
|
+
#
|
30
|
+
# - `semaphore_backoff` is called to know when to schedule the backoff retry.
|
31
|
+
# By default, it is 10 seconds, plus between 0 and 10 seconds more,
|
32
|
+
# so the job will be retried in between 10 and 20 seconds.
|
33
|
+
# Return whatever you want for the backoff.
|
34
|
+
# - `semaphore_expiry` should return the TTL of the semaphore key.
|
35
|
+
# Defaults to 30 seconds. See below for key expiry and negative semaphore value details.
|
36
|
+
# - `before_perform` is called before calling the `perform` method.
|
37
|
+
# This is required so that implementers can set worker state, based on job arguments,
|
38
|
+
# that can be used for calculating the semaphore key.
|
39
|
+
#
|
40
|
+
# Note that we give the semaphore key an expiry. This is to avoid situation where
|
41
|
+
# jobs are killed, the decrement is not done, and the counter increases to the point we
|
42
|
+
# have fewer than the expected number of jobs running.
|
43
|
+
#
|
44
|
+
# This does mean that, when a job runs longer than the semaphore expiry,
|
45
|
+
# another worker can be started, which would increment the counter back to 1.
|
46
|
+
# When the original job ends, the counter would be 0; then when the new job ends,
|
47
|
+
# the counter would be -1. To avoid negative counters (which create the same issue
|
48
|
+
# around missing decrements), if we ever detect a negative 'jobs running',
|
49
|
+
# we warn and remove the key entirely.
|
50
|
+
#
|
51
|
+
module Amigo
|
52
|
+
module SemaphoreBackoffJob
|
53
|
+
def self.included(cls)
|
54
|
+
cls.include InstanceMethods
|
55
|
+
cls.prepend PrependedMethods
|
56
|
+
end
|
57
|
+
|
58
|
+
class << self
|
59
|
+
# Reset class state. Mostly used just for testing.
|
60
|
+
def reset
|
61
|
+
is_testing = defined?(::Sidekiq::Testing) && ::Sidekiq::Testing.enabled?
|
62
|
+
@enabled = !is_testing
|
63
|
+
end
|
64
|
+
|
65
|
+
# Return true if backoff checks are enabled.
|
66
|
+
attr_accessor :enabled
|
67
|
+
|
68
|
+
def enabled?
|
69
|
+
return @enabled
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
self.reset
|
74
|
+
|
75
|
+
module InstanceMethods
|
76
|
+
def semaphore_key
|
77
|
+
raise NotImplementedError, "must be implemented on worker"
|
78
|
+
end
|
79
|
+
|
80
|
+
def semaphore_size
|
81
|
+
raise NotImplementedError, "must be implemented on worker"
|
82
|
+
end
|
83
|
+
|
84
|
+
def semaphore_backoff
|
85
|
+
return 10 + (rand * 10)
|
86
|
+
end
|
87
|
+
|
88
|
+
def semaphore_expiry
|
89
|
+
return 30
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
module PrependedMethods
|
94
|
+
def perform(*args)
|
95
|
+
self.before_perform(*args) if self.respond_to?(:before_perform)
|
96
|
+
return super unless ::Amigo::SemaphoreBackoffJob.enabled?
|
97
|
+
key = self.semaphore_key
|
98
|
+
size = self.semaphore_size
|
99
|
+
# Create a simple counter for the semaphore key.
|
100
|
+
# Always increment; also set an expiration if this is the first job.
|
101
|
+
# If we need to retry later, make sure we decrement, then schedule for the future.
|
102
|
+
# If we run it now, decrement the counter afterwards.
|
103
|
+
# If some corruption results in a negative number of jobs in the semaphore,
|
104
|
+
# we can delete the key and get back to a default state
|
105
|
+
# (this can cause problems but the idea is that
|
106
|
+
# we should run at least the configured number of jobs,
|
107
|
+
# and eventually the semaphore key will expire/get rebalanced).
|
108
|
+
jobs_in_semaphore = Sidekiq.redis do |conn|
|
109
|
+
cnt = conn.incr(key)
|
110
|
+
conn.expire(key, self.semaphore_expiry) if cnt == 1
|
111
|
+
cnt
|
112
|
+
end
|
113
|
+
if jobs_in_semaphore > size
|
114
|
+
Sidekiq.redis { |conn| conn.decr(key) }
|
115
|
+
backoff = self.semaphore_backoff
|
116
|
+
self.class.perform_in(backoff, *args)
|
117
|
+
return
|
118
|
+
end
|
119
|
+
begin
|
120
|
+
super
|
121
|
+
ensure
|
122
|
+
Sidekiq.redis do |conn|
|
123
|
+
new_job_count = conn.decr(key)
|
124
|
+
if new_job_count.negative?
|
125
|
+
conn.del(key)
|
126
|
+
Sidekiq.logger.warn("negative_semaphore_backoff_job_count")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
data/lib/amigo/spec_helpers.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "amigo"
|
4
|
+
require "sidekiq/worker"
|
4
5
|
|
5
|
-
|
6
|
+
module Amigo
|
6
7
|
module SpecHelpers
|
7
8
|
def self.included(context)
|
8
9
|
context.before(:each) do |example|
|
@@ -121,7 +122,7 @@ class Amigo
|
|
121
122
|
|
122
123
|
@missing.each do |event, payload|
|
123
124
|
message = "expected a '%s' event to be fired" % [event]
|
124
|
-
message << " with a payload of %p" % [payload] unless payload.nil?
|
125
|
+
message << (" with a payload of %p" % [payload]) unless payload.nil?
|
125
126
|
message << " but none was."
|
126
127
|
|
127
128
|
messages << message
|
@@ -131,7 +132,7 @@ class Amigo
|
|
131
132
|
messages << "No events were sent."
|
132
133
|
else
|
133
134
|
parts = @recorded_events.map(&:inspect)
|
134
|
-
messages << "The following events were recorded: %s" % [parts.join(", ")]
|
135
|
+
messages << ("The following events were recorded: %s" % [parts.join(", ")])
|
135
136
|
end
|
136
137
|
|
137
138
|
return messages.join("\n")
|
@@ -141,7 +142,7 @@ class Amigo
|
|
141
142
|
messages = []
|
142
143
|
@matched.each do |event, _payload|
|
143
144
|
message = "expected a '%s' event not to be fired" % [event]
|
144
|
-
message << " with a payload of %p" % [@expected_payload] if @expected_payload
|
145
|
+
message << (" with a payload of %p" % [@expected_payload]) if @expected_payload
|
145
146
|
message << " but one was."
|
146
147
|
messages << message
|
147
148
|
end
|
@@ -229,5 +230,73 @@ class Amigo
|
|
229
230
|
def perform_async_job(job)
|
230
231
|
return PerformAsyncJobMatcher.new(job)
|
231
232
|
end
|
233
|
+
|
234
|
+
# Like a Sidekiq worker's perform_inline,
|
235
|
+
# but allows an arbitrary item to be used, rather than just the
|
236
|
+
# given class and args. For example, when testing,
|
237
|
+
# you may need to assume something like 'retry_count' is in the job payload,
|
238
|
+
# but that can't be included with perform_inline.
|
239
|
+
# This allows those arbitrary job payload fields
|
240
|
+
# to be included when the job is run.
|
241
|
+
module_function def sidekiq_perform_inline(klass, args, item=nil)
|
242
|
+
Sidekiq::Worker::Setter.override_item = item
|
243
|
+
begin
|
244
|
+
klass.perform_inline(*args)
|
245
|
+
ensure
|
246
|
+
Sidekiq::Worker::Setter.override_item = nil
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
module_function def drain_sidekiq_jobs(q)
|
251
|
+
all_sidekiq_jobs(q).each do |job|
|
252
|
+
klass = job.item.fetch("class")
|
253
|
+
klass = Sidekiq::Testing.constantize(klass) if klass.is_a?(String)
|
254
|
+
sidekiq_perform_inline(klass, job.item["args"], job.item)
|
255
|
+
job.delete
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
module_function def all_sidekiq_jobs(q)
|
260
|
+
arr = []
|
261
|
+
q.each { |j| arr << j }
|
262
|
+
return arr
|
263
|
+
end
|
264
|
+
|
265
|
+
# Use this middleware to pass an arbitrary callback evaluated before a job runs.
|
266
|
+
# Make sure to call +reset+ after the test.
|
267
|
+
class ServerCallbackMiddleware
|
268
|
+
class << self
|
269
|
+
attr_accessor :callback
|
270
|
+
end
|
271
|
+
|
272
|
+
def self.reset
|
273
|
+
self.callback = nil
|
274
|
+
return self
|
275
|
+
end
|
276
|
+
|
277
|
+
def self.new
|
278
|
+
return self
|
279
|
+
end
|
280
|
+
|
281
|
+
def self.call(worker, job, queue)
|
282
|
+
self.callback[worker, job, queue] if self.callback
|
283
|
+
yield
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
module ::Sidekiq
|
290
|
+
module Worker
|
291
|
+
class Setter
|
292
|
+
class << self
|
293
|
+
attr_accessor :override_item
|
294
|
+
end
|
295
|
+
def normalize_item(item)
|
296
|
+
result = super
|
297
|
+
result.merge!(self.class.override_item || {})
|
298
|
+
return result
|
299
|
+
end
|
300
|
+
end
|
232
301
|
end
|
233
302
|
end
|
data/lib/amigo/version.rb
CHANGED
data/lib/amigo.rb
CHANGED
@@ -97,18 +97,21 @@ require "sidekiq-cron"
|
|
97
97
|
# Splay exists to avoid a "thundering herd" issue.
|
98
98
|
# Splay defaults to 30s; you may wish to always provide splay, whatever you think for your job.
|
99
99
|
#
|
100
|
-
|
100
|
+
module Amigo
|
101
101
|
class Error < StandardError; end
|
102
102
|
|
103
103
|
class StartSchedulerFailed < Error; end
|
104
104
|
|
105
105
|
class << self
|
106
|
-
attr_accessor :structured_logging
|
106
|
+
attr_accessor :structured_logging, :audit_logger_class, :router_class
|
107
107
|
|
108
108
|
# Proc called with [job, level, message, params].
|
109
109
|
# By default, logs to the job's logger (or Sidekiq's if job is nil).
|
110
|
-
# If structured_logging is true, the message will be an 'event'
|
111
|
-
#
|
110
|
+
# If structured_logging is true, the message will be an 'event' string (like 'registered_subscriber')
|
111
|
+
# without any dynamic info.
|
112
|
+
# If structured_logging is false, the params will be rendered into the message
|
113
|
+
# so are suitable for unstructured logging. Also, the params will also have an :log_message key
|
114
|
+
# which will contain the original log message.
|
112
115
|
attr_accessor :log_callback
|
113
116
|
|
114
117
|
def reset_logging
|
@@ -118,8 +121,9 @@ class Amigo
|
|
118
121
|
|
119
122
|
def log(job, level, message, params)
|
120
123
|
params ||= {}
|
121
|
-
if self.structured_logging && !params.empty?
|
124
|
+
if !self.structured_logging && !params.empty?
|
122
125
|
paramstr = params.map { |k, v| "#{k}=#{v}" }.join(" ")
|
126
|
+
params[:log_message] = message
|
123
127
|
message = "#{message} #{paramstr}"
|
124
128
|
end
|
125
129
|
self.log_callback[job, level, message, params]
|
@@ -180,8 +184,6 @@ class Amigo
|
|
180
184
|
# Install Amigo so that every publish will be sent to the AuditLogger job
|
181
185
|
# and will invoke the relevant jobs in registered_jobs via the Router job.
|
182
186
|
def install_amigo_jobs
|
183
|
-
require "amigo/audit_logger"
|
184
|
-
require "amigo/router"
|
185
187
|
return self.register_subscriber do |ev|
|
186
188
|
self._subscriber(ev)
|
187
189
|
end
|
@@ -189,12 +191,13 @@ class Amigo
|
|
189
191
|
|
190
192
|
def _subscriber(event)
|
191
193
|
event_json = event.as_json
|
192
|
-
|
193
|
-
|
194
|
+
self.audit_logger_class.perform_async(event_json)
|
195
|
+
self.router_class.perform_async(event_json)
|
194
196
|
end
|
195
197
|
|
196
198
|
def register_job(job)
|
197
199
|
self.registered_jobs << job
|
200
|
+
self.registered_jobs.uniq!
|
198
201
|
end
|
199
202
|
|
200
203
|
# Start the scheduler.
|
@@ -276,3 +279,8 @@ Amigo.synchronous_mode = false
|
|
276
279
|
Amigo.registered_jobs = []
|
277
280
|
Amigo.subscribers = Set.new
|
278
281
|
Amigo.on_publish_error = proc {}
|
282
|
+
|
283
|
+
require "amigo/audit_logger"
|
284
|
+
require "amigo/router"
|
285
|
+
Amigo.audit_logger_class = Amigo::AuditLogger
|
286
|
+
Amigo.router_class = Amigo::Router
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sidekiq-amigo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lithic Technology
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sidekiq
|
@@ -135,14 +135,19 @@ files:
|
|
135
135
|
- lib/amigo/audit_logger.rb
|
136
136
|
- lib/amigo/deprecated_jobs.rb
|
137
137
|
- lib/amigo/job.rb
|
138
|
+
- lib/amigo/queue_backoff_job.rb
|
139
|
+
- lib/amigo/rate_limited_error_handler.rb
|
140
|
+
- lib/amigo/retry.rb
|
138
141
|
- lib/amigo/router.rb
|
139
142
|
- lib/amigo/scheduled_job.rb
|
143
|
+
- lib/amigo/semaphore_backoff_job.rb
|
140
144
|
- lib/amigo/spec_helpers.rb
|
141
145
|
- lib/amigo/version.rb
|
142
146
|
homepage: https://github.com/lithictech/sidekiq-amigo
|
143
147
|
licenses:
|
144
148
|
- MIT
|
145
|
-
metadata:
|
149
|
+
metadata:
|
150
|
+
rubygems_mfa_required: 'true'
|
146
151
|
post_install_message:
|
147
152
|
rdoc_options: []
|
148
153
|
require_paths:
|