sidekiq-amigo 1.2.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/amigo/autoscaler.rb +147 -0
- data/lib/amigo/version.rb +1 -1
- data/lib/amigo.rb +29 -5
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef7c94e7a10ea6d0023d775d364242fb3f361bd782a142ce6e969756d4067d14
|
4
|
+
data.tar.gz: a4fda58bbee1d07b9e3e009925d6ad7fd85d5cd378f837667b4ee2bb07adecb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d86da5a86d2bbdb5251552ab925ddb7e2b14231c9ddce0ea26b9d2c35ce96227addb95345f4e9b309bca6c4951b13601237d62040cfde69776a4d84c08d90bc
|
7
|
+
data.tar.gz: 38002ddf39af631ec5bee7ec6cdee6fdfb127094f41279719b53d4a85313444c80001a84b67bf10f5bc6c7e239adfa2aae6d4fda5ec4e8d6de6d13b6dc065ffa
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq/api"
|
4
|
+
|
5
|
+
require "amigo"
|
6
|
+
|
7
|
+
# When queues achieve a latency that is too high,
|
8
|
+
# take some action.
|
9
|
+
# You should start this up at Sidekiq application startup:
|
10
|
+
#
|
11
|
+
# # sidekiq.rb
|
12
|
+
# Amigo::Autoscaler.new.start
|
13
|
+
#
|
14
|
+
# Right now, this is pretty simple- we alert any time
|
15
|
+
# there is a latency over a threshold.
|
16
|
+
#
|
17
|
+
# In the future, we can:
|
18
|
+
#
|
19
|
+
# 1) actually autoscale rather than just alert
|
20
|
+
# (this may take the form of a POST to a configurable endpoint),
|
21
|
+
# 2) become more sophisticated with how we detect latency growth.
|
22
|
+
#
|
23
|
+
module Amigo
|
24
|
+
class Autoscaler
|
25
|
+
class InvalidHandler < StandardError; end
|
26
|
+
|
27
|
+
# How often should Autoscaler check for latency?
|
28
|
+
# @return [Integer]
|
29
|
+
attr_reader :poll_interval
|
30
|
+
# What latency should we alert on?
|
31
|
+
# @return [Integer]
|
32
|
+
attr_reader :latency_threshold
|
33
|
+
# What hosts/processes should this run on?
|
34
|
+
# Look at ENV['DYNO'] and Socket.gethostname.
|
35
|
+
# Default to only run on 'web.1', which is the first Heroku web dyno.
|
36
|
+
# We run on the web, not worker, dyno, so we report backed up queues
|
37
|
+
# in case we, say, turn off all workers (broken web processes
|
38
|
+
# are generally easier to find).
|
39
|
+
# @return [Regexp]
|
40
|
+
attr_reader :hostname_regex
|
41
|
+
# Methods to call when alerting.
|
42
|
+
# Valid values are 'log' and 'sentry' (requires Sentry to be required already).
|
43
|
+
# Anything that responds to +call+ will be invoked with a hash of
|
44
|
+
# `{queue name => latency in seconds}`.
|
45
|
+
# @return [Array<String,Proc>]
|
46
|
+
attr_reader :handlers
|
47
|
+
# Only alert this often.
|
48
|
+
# For example, with poll_interval of 10 seconds
|
49
|
+
# and alert_interval of 200 seconds,
|
50
|
+
# we'd alert once and then 210 seconds later.
|
51
|
+
# @return [Integer]
|
52
|
+
attr_reader :alert_interval
|
53
|
+
|
54
|
+
def initialize(
|
55
|
+
poll_interval: 20,
|
56
|
+
latency_threshold: 5,
|
57
|
+
hostname_regex: /^web\.1$/,
|
58
|
+
handlers: ["log"],
|
59
|
+
alert_interval: 120
|
60
|
+
)
|
61
|
+
|
62
|
+
@poll_interval = poll_interval
|
63
|
+
@latency_threshold = latency_threshold
|
64
|
+
@hostname_regex = hostname_regex
|
65
|
+
@handlers = handlers
|
66
|
+
@alert_interval = alert_interval
|
67
|
+
end
|
68
|
+
|
69
|
+
def polling_thread
|
70
|
+
return @polling_thread
|
71
|
+
end
|
72
|
+
|
73
|
+
def setup
|
74
|
+
# Store these as strings OR procs, rather than grabbing self.method here.
|
75
|
+
# It gets extremely hard ot test if we capture the method here.
|
76
|
+
@alert_methods = self.handlers.map do |a|
|
77
|
+
if a.respond_to?(:call)
|
78
|
+
a
|
79
|
+
else
|
80
|
+
method_name = meth = "alert_#{a.strip}".to_sym
|
81
|
+
raise InvalidHandler, a.inspect unless self.method(method_name)
|
82
|
+
meth
|
83
|
+
end
|
84
|
+
end
|
85
|
+
@last_alerted = Time.at(0)
|
86
|
+
@stop = false
|
87
|
+
end
|
88
|
+
|
89
|
+
def start
|
90
|
+
raise "already started" unless @polling_thread.nil?
|
91
|
+
|
92
|
+
hostname = ENV.fetch("DYNO") { Socket.gethostname }
|
93
|
+
return false unless self.hostname_regex.match?(hostname)
|
94
|
+
|
95
|
+
self.log(:info, "async_autoscaler_starting")
|
96
|
+
self.setup
|
97
|
+
@polling_thread = Thread.new do
|
98
|
+
until @stop
|
99
|
+
Kernel.sleep(self.poll_interval)
|
100
|
+
self.check unless @stop
|
101
|
+
end
|
102
|
+
end
|
103
|
+
return true
|
104
|
+
end
|
105
|
+
|
106
|
+
def stop
|
107
|
+
@stop = true
|
108
|
+
end
|
109
|
+
|
110
|
+
def check
|
111
|
+
now = Time.now
|
112
|
+
skip_check = now < (@last_alerted + self.poll_interval)
|
113
|
+
if skip_check
|
114
|
+
self.log(:debug, "async_autoscaler_skip_check")
|
115
|
+
return
|
116
|
+
end
|
117
|
+
self.log(:info, "async_autoscaler_check")
|
118
|
+
high_latency_queues = Sidekiq::Queue.all.
|
119
|
+
map { |q| [q.name, q.latency] }.
|
120
|
+
select { |(_, latency)| latency > self.latency_threshold }.
|
121
|
+
to_h
|
122
|
+
return if high_latency_queues.empty?
|
123
|
+
@alert_methods.each do |m|
|
124
|
+
m.respond_to?(:call) ? m.call(high_latency_queues) : self.send(m, high_latency_queues)
|
125
|
+
end
|
126
|
+
@last_alerted = now
|
127
|
+
end
|
128
|
+
|
129
|
+
def alert_sentry(names_and_latencies)
|
130
|
+
Sentry.with_scope do |scope|
|
131
|
+
scope.set_extras(high_latency_queues: names_and_latencies)
|
132
|
+
names = names_and_latencies.map(&:first).sort.join(", ")
|
133
|
+
Sentry.capture_message("Some queues have a high latency: #{names}")
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def alert_log(names_and_latencies)
|
138
|
+
self.log(:warn, "high_latency_queues", queues: names_and_latencies)
|
139
|
+
end
|
140
|
+
|
141
|
+
def alert_test(_names_and_latencies); end
|
142
|
+
|
143
|
+
protected def log(level, msg, **kw)
|
144
|
+
Amigo.log(nil, level, msg, kw)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
data/lib/amigo/version.rb
CHANGED
data/lib/amigo.rb
CHANGED
@@ -140,7 +140,18 @@ module Amigo
|
|
140
140
|
# An Array of callbacks to be run when an event is published.
|
141
141
|
attr_accessor :subscribers
|
142
142
|
|
143
|
-
# A single callback to be run when an event publication errors
|
143
|
+
# A single callback to be run when an event publication errors,
|
144
|
+
# almost always due to an error in a subscriber.
|
145
|
+
#
|
146
|
+
# The callback receives the exception, the event being published, and the erroring subscriber.
|
147
|
+
#
|
148
|
+
# If this is not set, errors from subscribers will be re-raised immediately,
|
149
|
+
# since broken subscribers usually indicate a broken application.
|
150
|
+
#
|
151
|
+
# Note also that when an error occurs, Amigo.log is always called first.
|
152
|
+
# You do NOT need a callback that just logs and swallows the error.
|
153
|
+
# If all you want to do is log, and not propogate the error,
|
154
|
+
# you can use `Amigo.on_publish_error = proc {}`.
|
144
155
|
attr_accessor :on_publish_error
|
145
156
|
|
146
157
|
# Publish an event with the specified +eventname+ and +payload+
|
@@ -151,12 +162,18 @@ module Amigo
|
|
151
162
|
self.subscribers.to_a.each do |hook|
|
152
163
|
hook.call(ev)
|
153
164
|
rescue StandardError => e
|
154
|
-
self.log(nil, :error, "amigo_subscriber_hook_error", error: e, hook: hook, event: ev)
|
155
|
-
self.on_publish_error.
|
165
|
+
self.log(nil, :error, "amigo_subscriber_hook_error", error: e, hook: hook, event: ev&.as_json)
|
166
|
+
raise e if self.on_publish_error.nil?
|
167
|
+
if self.on_publish_error.respond_to?(:arity) && self.on_publish_error.arity == 1
|
168
|
+
self.on_publish_error.call(e)
|
169
|
+
else
|
170
|
+
self.on_publish_error.call(e, ev, hook)
|
171
|
+
end
|
156
172
|
end
|
157
173
|
end
|
158
174
|
|
159
175
|
# Register a hook to be called when an event is sent.
|
176
|
+
# If a subscriber errors, on_publish_error is called with the exception, event, and subscriber.
|
160
177
|
def register_subscriber(&block)
|
161
178
|
raise LocalJumpError, "no block given" unless block
|
162
179
|
self.log nil, :info, "amigo_installed_subscriber", block: block
|
@@ -191,7 +208,15 @@ module Amigo
|
|
191
208
|
|
192
209
|
def _subscriber(event)
|
193
210
|
event_json = event.as_json
|
194
|
-
|
211
|
+
begin
|
212
|
+
self.audit_logger_class.perform_async(event_json)
|
213
|
+
rescue StandardError => e
|
214
|
+
# If the audit logger cannot perform, let's say because Redis is down,
|
215
|
+
# we can run the job manually. This is pretty important for anything used for auditing;
|
216
|
+
# it should be as resilient as possible.
|
217
|
+
self.log(nil, :error, "amigo_audit_log_subscriber_error", error: e, event: event_json)
|
218
|
+
self.audit_logger_class.new.perform(event_json)
|
219
|
+
end
|
195
220
|
self.router_class.perform_async(event_json)
|
196
221
|
end
|
197
222
|
|
@@ -272,7 +297,6 @@ Amigo.reset_logging
|
|
272
297
|
Amigo.synchronous_mode = false
|
273
298
|
Amigo.registered_jobs = []
|
274
299
|
Amigo.subscribers = Set.new
|
275
|
-
Amigo.on_publish_error = proc {}
|
276
300
|
|
277
301
|
require "amigo/audit_logger"
|
278
302
|
require "amigo/router"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sidekiq-amigo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lithic Technology
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sidekiq
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '1.10'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: sentry-ruby
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '5'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '5'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: timecop
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -133,6 +147,7 @@ extra_rdoc_files: []
|
|
133
147
|
files:
|
134
148
|
- lib/amigo.rb
|
135
149
|
- lib/amigo/audit_logger.rb
|
150
|
+
- lib/amigo/autoscaler.rb
|
136
151
|
- lib/amigo/deprecated_jobs.rb
|
137
152
|
- lib/amigo/job.rb
|
138
153
|
- lib/amigo/queue_backoff_job.rb
|