sidekiq-amigo 1.2.2 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 53361c37c9ec31b3886fb7b33ff61bd022802128b458f93e624c4f0d0cbcc963
4
- data.tar.gz: 54a9e75e5b011d6441b8906404a0d856394e19c4d27e66e7580c37d4928e3d41
3
+ metadata.gz: ef7c94e7a10ea6d0023d775d364242fb3f361bd782a142ce6e969756d4067d14
4
+ data.tar.gz: a4fda58bbee1d07b9e3e009925d6ad7fd85d5cd378f837667b4ee2bb07adecb7
5
5
  SHA512:
6
- metadata.gz: b469583cc5c2e339d359834763d3a67c154e9977212b35f039623f937b4b55997b7a523ba8f5c414898295c18c65d401a020b9679c4d695c50d636ef5c7334db
7
- data.tar.gz: ab70f5da69a2bb8868c9ec864c760eb1d6fc124a99d71e2ad332a8dccd7ba8503971dfb0ee3a60d19d122e5ce079b5f9a4dc6a5a3df2a8c05057b19f2963c172
6
+ metadata.gz: 8d86da5a86d2bbdb5251552ab925ddb7e2b14231c9ddce0ea26b9d2c35ce96227addb95345f4e9b309bca6c4951b13601237d62040cfde69776a4d84c08d90bc
7
+ data.tar.gz: 38002ddf39af631ec5bee7ec6cdee6fdfb127094f41279719b53d4a85313444c80001a84b67bf10f5bc6c7e239adfa2aae6d4fda5ec4e8d6de6d13b6dc065ffa
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq/api"
4
+
5
+ require "amigo"
6
+
7
+ # When queues achieve a latency that is too high,
8
+ # take some action.
9
+ # You should start this up at Sidekiq application startup:
10
+ #
11
+ # # sidekiq.rb
12
+ # Amigo::Autoscaler.new.start
13
+ #
14
+ # Right now, this is pretty simple- we alert any time
15
+ # there is a latency over a threshold.
16
+ #
17
+ # In the future, we can:
18
+ #
19
+ # 1) actually autoscale rather than just alert
20
+ # (this may take the form of a POST to a configurable endpoint),
21
+ # 2) become more sophisticated with how we detect latency growth.
22
+ #
23
+ module Amigo
24
+ class Autoscaler
25
+ class InvalidHandler < StandardError; end
26
+
27
+ # How often should Autoscaler check for latency?
28
+ # @return [Integer]
29
+ attr_reader :poll_interval
30
+ # What latency should we alert on?
31
+ # @return [Integer]
32
+ attr_reader :latency_threshold
33
+ # What hosts/processes should this run on?
34
+ # Look at ENV['DYNO'] and Socket.gethostname.
35
+ # Default to only run on 'web.1', which is the first Heroku web dyno.
36
+ # We run on the web, not worker, dyno, so we report backed up queues
37
+ # in case we, say, turn off all workers (broken web processes
38
+ # are generally easier to find).
39
+ # @return [Regexp]
40
+ attr_reader :hostname_regex
41
+ # Methods to call when alerting.
42
+ # Valid values are 'log' and 'sentry' (requires Sentry to be required already).
43
+ # Anything that responds to +call+ will be invoked with a hash of
44
+ # `{queue name => latency in seconds}`.
45
+ # @return [Array<String,Proc>]
46
+ attr_reader :handlers
47
+ # Only alert this often.
48
+ # For example, with poll_interval of 10 seconds
49
+ # and alert_interval of 200 seconds,
50
+ # we'd alert once and then 210 seconds later.
51
+ # @return [Integer]
52
+ attr_reader :alert_interval
53
+
54
+ def initialize(
55
+ poll_interval: 20,
56
+ latency_threshold: 5,
57
+ hostname_regex: /^web\.1$/,
58
+ handlers: ["log"],
59
+ alert_interval: 120
60
+ )
61
+
62
+ @poll_interval = poll_interval
63
+ @latency_threshold = latency_threshold
64
+ @hostname_regex = hostname_regex
65
+ @handlers = handlers
66
+ @alert_interval = alert_interval
67
+ end
68
+
69
+ def polling_thread
70
+ return @polling_thread
71
+ end
72
+
73
+ def setup
74
+ # Store these as strings OR procs, rather than grabbing self.method here.
75
+ # It gets extremely hard ot test if we capture the method here.
76
+ @alert_methods = self.handlers.map do |a|
77
+ if a.respond_to?(:call)
78
+ a
79
+ else
80
+ method_name = meth = "alert_#{a.strip}".to_sym
81
+ raise InvalidHandler, a.inspect unless self.method(method_name)
82
+ meth
83
+ end
84
+ end
85
+ @last_alerted = Time.at(0)
86
+ @stop = false
87
+ end
88
+
89
+ def start
90
+ raise "already started" unless @polling_thread.nil?
91
+
92
+ hostname = ENV.fetch("DYNO") { Socket.gethostname }
93
+ return false unless self.hostname_regex.match?(hostname)
94
+
95
+ self.log(:info, "async_autoscaler_starting")
96
+ self.setup
97
+ @polling_thread = Thread.new do
98
+ until @stop
99
+ Kernel.sleep(self.poll_interval)
100
+ self.check unless @stop
101
+ end
102
+ end
103
+ return true
104
+ end
105
+
106
+ def stop
107
+ @stop = true
108
+ end
109
+
110
+ def check
111
+ now = Time.now
112
+ skip_check = now < (@last_alerted + self.poll_interval)
113
+ if skip_check
114
+ self.log(:debug, "async_autoscaler_skip_check")
115
+ return
116
+ end
117
+ self.log(:info, "async_autoscaler_check")
118
+ high_latency_queues = Sidekiq::Queue.all.
119
+ map { |q| [q.name, q.latency] }.
120
+ select { |(_, latency)| latency > self.latency_threshold }.
121
+ to_h
122
+ return if high_latency_queues.empty?
123
+ @alert_methods.each do |m|
124
+ m.respond_to?(:call) ? m.call(high_latency_queues) : self.send(m, high_latency_queues)
125
+ end
126
+ @last_alerted = now
127
+ end
128
+
129
+ def alert_sentry(names_and_latencies)
130
+ Sentry.with_scope do |scope|
131
+ scope.set_extras(high_latency_queues: names_and_latencies)
132
+ names = names_and_latencies.map(&:first).sort.join(", ")
133
+ Sentry.capture_message("Some queues have a high latency: #{names}")
134
+ end
135
+ end
136
+
137
+ def alert_log(names_and_latencies)
138
+ self.log(:warn, "high_latency_queues", queues: names_and_latencies)
139
+ end
140
+
141
+ def alert_test(_names_and_latencies); end
142
+
143
+ protected def log(level, msg, **kw)
144
+ Amigo.log(nil, level, msg, kw)
145
+ end
146
+ end
147
+ end
data/lib/amigo/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Amigo
4
- VERSION = "1.2.2"
4
+ VERSION = "1.4.0"
5
5
  end
data/lib/amigo.rb CHANGED
@@ -140,7 +140,18 @@ module Amigo
140
140
  # An Array of callbacks to be run when an event is published.
141
141
  attr_accessor :subscribers
142
142
 
143
- # A single callback to be run when an event publication errors.
143
+ # A single callback to be run when an event publication errors,
144
+ # almost always due to an error in a subscriber.
145
+ #
146
+ # The callback receives the exception, the event being published, and the erroring subscriber.
147
+ #
148
+ # If this is not set, errors from subscribers will be re-raised immediately,
149
+ # since broken subscribers usually indicate a broken application.
150
+ #
151
+ # Note also that when an error occurs, Amigo.log is always called first.
152
+ # You do NOT need a callback that just logs and swallows the error.
153
+ # If all you want to do is log, and not propogate the error,
154
+ # you can use `Amigo.on_publish_error = proc {}`.
144
155
  attr_accessor :on_publish_error
145
156
 
146
157
  # Publish an event with the specified +eventname+ and +payload+
@@ -151,12 +162,18 @@ module Amigo
151
162
  self.subscribers.to_a.each do |hook|
152
163
  hook.call(ev)
153
164
  rescue StandardError => e
154
- self.log(nil, :error, "amigo_subscriber_hook_error", error: e, hook: hook, event: ev)
155
- self.on_publish_error.call(e)
165
+ self.log(nil, :error, "amigo_subscriber_hook_error", error: e, hook: hook, event: ev&.as_json)
166
+ raise e if self.on_publish_error.nil?
167
+ if self.on_publish_error.respond_to?(:arity) && self.on_publish_error.arity == 1
168
+ self.on_publish_error.call(e)
169
+ else
170
+ self.on_publish_error.call(e, ev, hook)
171
+ end
156
172
  end
157
173
  end
158
174
 
159
175
  # Register a hook to be called when an event is sent.
176
+ # If a subscriber errors, on_publish_error is called with the exception, event, and subscriber.
160
177
  def register_subscriber(&block)
161
178
  raise LocalJumpError, "no block given" unless block
162
179
  self.log nil, :info, "amigo_installed_subscriber", block: block
@@ -191,7 +208,15 @@ module Amigo
191
208
 
192
209
  def _subscriber(event)
193
210
  event_json = event.as_json
194
- self.audit_logger_class.perform_async(event_json)
211
+ begin
212
+ self.audit_logger_class.perform_async(event_json)
213
+ rescue StandardError => e
214
+ # If the audit logger cannot perform, let's say because Redis is down,
215
+ # we can run the job manually. This is pretty important for anything used for auditing;
216
+ # it should be as resilient as possible.
217
+ self.log(nil, :error, "amigo_audit_log_subscriber_error", error: e, event: event_json)
218
+ self.audit_logger_class.new.perform(event_json)
219
+ end
195
220
  self.router_class.perform_async(event_json)
196
221
  end
197
222
 
@@ -272,7 +297,6 @@ Amigo.reset_logging
272
297
  Amigo.synchronous_mode = false
273
298
  Amigo.registered_jobs = []
274
299
  Amigo.subscribers = Set.new
275
- Amigo.on_publish_error = proc {}
276
300
 
277
301
  require "amigo/audit_logger"
278
302
  require "amigo/router"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sidekiq-amigo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lithic Technology
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-05 00:00:00.000000000 Z
11
+ date: 2022-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sidekiq
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '1.10'
111
+ - !ruby/object:Gem::Dependency
112
+ name: sentry-ruby
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '5'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '5'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: timecop
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -133,6 +147,7 @@ extra_rdoc_files: []
133
147
  files:
134
148
  - lib/amigo.rb
135
149
  - lib/amigo/audit_logger.rb
150
+ - lib/amigo/autoscaler.rb
136
151
  - lib/amigo/deprecated_jobs.rb
137
152
  - lib/amigo/job.rb
138
153
  - lib/amigo/queue_backoff_job.rb