sidekiq-amigo 1.12.1 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab4f66f8d1d73bf0c9d9569ba86b4041d7642f86b8d10c246aa2eb155b3b6376
4
- data.tar.gz: 6f5726a5708706dcbff900c5dcfde0656ab797a67a698608249abb96b2e1d93f
3
+ metadata.gz: 70bb0febcaf8c1a36e19fc4dd56fb766be4fa046c941b1f2aa66b7dcf5372872
4
+ data.tar.gz: 620b2c3a6bf664931eb8a6e84cc7d8762597f4aae91aa17765bc1d1fb0b432ad
5
5
  SHA512:
6
- metadata.gz: 9ef0b460702cafafd2c25119434ef11fda3dc958de6ad575c2a3faae18ba5bd97de64302aa8facf27a97a87476be2ca3947faa9f5f65f68df0c078610e1d6239
7
- data.tar.gz: cf55e3a4308cb10cd81c4f4831cc1ab967ebd2614adaf62889c192c1b439cbea694928a57b525beb4fafe992fb5a5242ab6cecad2e79ee7888341f9c9c104057
6
+ metadata.gz: 20bcdab3b140069cd0eeb6a559032e8e4d986ffdc6bdd7ed03030a45b5b83a50600bb7d465736a0a7dcdd2e369a2a054bb2f968a77111ec5cef807876d19d8f0
7
+ data.tar.gz: ddbb6ac910db1687c6e5f471f965d2b5f2dc78b650564f28c24153e8ce1d1d7b350f03da4b0571673084865ac8c9c5f9a27453e090ad94f6b12f9507649dde6d
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "amigo/autoscaler"
4
+
5
+ module Amigo
6
+ class Autoscaler
7
+ module Checkers
8
+ class Chain < Amigo::Autoscaler::Checker
9
+ attr_accessor :chain
10
+
11
+ # Chain multiple checkers together.
12
+ # Latencies are merged, with the highest latency winning.
13
+ # Pool usage has the highest take precedence.
14
+ # @param chain [Array<Amigo::Autoscaler::Checker>]
15
+ def initialize(chain)
16
+ @chain = chain
17
+ super()
18
+ end
19
+
20
+ def get_latencies
21
+ h = {}
22
+ @chain.each do |c|
23
+ c.get_latencies.each do |k, v|
24
+ h[k] = [h[k], v].compact.max
25
+ end
26
+ end
27
+ return h
28
+ end
29
+
30
+ def get_pool_usage
31
+ return @chain.map(&:get_pool_usage).compact.max
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -6,8 +6,9 @@ module Amigo
6
6
  class Autoscaler
7
7
  module Checkers
8
8
  class Fake < Amigo::Autoscaler::Checker
9
- def initialize(latencies)
9
+ def initialize(latencies: {}, pool_usage: nil)
10
10
  @latencies = latencies
11
+ @pool_usage = pool_usage
11
12
  super()
12
13
  end
13
14
 
@@ -16,6 +17,12 @@ module Amigo
16
17
  return @latencies.shift if @latencies.is_a?(Array)
17
18
  return @latencies
18
19
  end
20
+
21
+ def get_pool_usage
22
+ return @pool_usage.call if @pool_usage.respond_to?(:call)
23
+ return @pool_usage.shift if @pool_usage.is_a?(Array)
24
+ return @pool_usage
25
+ end
19
26
  end
20
27
  end
21
28
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "puma/dsl"
4
+ require "amigo/autoscaler"
5
+
6
+ module Amigo
7
+ class Autoscaler
8
+ module Checkers
9
+ class PumaPoolUsage < Amigo::Autoscaler::Checker
10
+ NAMESPACE = "amigo/autoscaler/puma_pool_usage"
11
+
12
+ # The minimum number of usage readings before we report pool usage, to avoid spikes.
13
+ MIN_READINGS = 2
14
+
15
+ # How long to track the pool usage.
16
+ WINDOW = 60
17
+
18
+ def initialize(redis:, namespace: NAMESPACE, uid: SecureRandom.base64(4).delete_suffix("="))
19
+ @redis = redis
20
+ @key = "#{namespace}/v1"
21
+ @uid = uid
22
+ super()
23
+ end
24
+
25
+ # Set the pool usage, and trim old metrics.
26
+ def record(value, now:)
27
+ ts = now.to_f
28
+ member = "#{value}:#{@uid}:#{now.to_i}"
29
+ @redis.pipelined do |pipeline|
30
+ pipeline.call("ZADD", @key, ts, member)
31
+ pipeline.call("ZREMRANGEBYSCORE", @key, 0, ts - WINDOW)
32
+ end
33
+ end
34
+
35
+ def get_latencies = {}
36
+
37
+ def get_pool_usage
38
+ now = Time.now.to_f
39
+ members = @redis.call("ZRANGE", @key, now - WINDOW, now, "BYSCORE")
40
+ return nil if members.size < MIN_READINGS
41
+ values = members.map { |m| m.split(":", 2).first }
42
+ total_usage = values.sum(0, &:to_f)
43
+ return total_usage / values.size
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ module Puma
51
+ class DSL
52
+ def amigo_autoscaler_interval(interval)
53
+ @options[:amigo_autoscaler_interval] = interval
54
+ end
55
+
56
+ def amigo_puma_pool_usage_checker(ch)
57
+ @options[:amigo_puma_pool_usage_checker] = ch
58
+ end
59
+ end
60
+ end
@@ -13,6 +13,18 @@ module Amigo
13
13
  map { |q| [q.name, q.latency] }.
14
14
  to_h
15
15
  end
16
+
17
+ def get_pool_usage
18
+ ps = ::Sidekiq::ProcessSet.new
19
+ total_concurrency = 0
20
+ total_busy = 0
21
+ ps.each do |process|
22
+ total_concurrency += process["concurrency"] || 0
23
+ total_busy += process["busy"] || 0
24
+ end
25
+ return 0.0 if total_concurrency.zero?
26
+ return total_busy.to_f / total_concurrency
27
+ end
16
28
  end
17
29
  end
18
30
  end
@@ -34,6 +34,8 @@ module Amigo
34
34
  super()
35
35
  end
36
36
 
37
+ def get_pool_usage = nil
38
+
37
39
  def get_latencies
38
40
  now = Time.now.to_i
39
41
  window = (now - (WINDOW - 1))..now
@@ -15,12 +15,12 @@ module Amigo
15
15
  super()
16
16
  end
17
17
 
18
- def scale_up(*args, **kw)
19
- @chain.each { |c| c.scale_up(*args, **kw) }
18
+ def scale_up(**kw)
19
+ @chain.each { |c| c.scale_up(**kw) }
20
20
  end
21
21
 
22
- def scale_down(*args, **kw)
23
- @chain.each { |c| c.scale_down(*args, **kw) }
22
+ def scale_down(**kw)
23
+ @chain.each { |c| c.scale_down(**kw) }
24
24
  end
25
25
  end
26
26
  end
@@ -14,12 +14,12 @@ module Amigo
14
14
  super()
15
15
  end
16
16
 
17
- def scale_up(checked_latencies, depth:, duration:, **kw)
18
- @ups << [checked_latencies, depth, duration, kw]
17
+ def scale_up(**kw)
18
+ @ups << kw
19
19
  end
20
20
 
21
- def scale_down(depth:, duration:, **kw)
22
- @downs << [depth, duration, kw]
21
+ def scale_down(**kw)
22
+ @downs << kw
23
23
  end
24
24
  end
25
25
  end
@@ -103,7 +103,7 @@ module Amigo
103
103
  # Potentially add another worker to the formation.
104
104
  # @return [:noscale, :maxscale, :scaled] One of :noscale (no +active_event_initial_workers+),
105
105
  # :maxscale (+max_additional_workers+ reached), or :scaled.
106
- def scale_up(_queues_and_latencies, depth:, **)
106
+ def scale_up(depth:, **)
107
107
  # When the scaling event starts (or if this is the first time we've seen it
108
108
  # but the event is already in progress), store how many workers we have.
109
109
  # It needs to be stored in redis so it persists if
@@ -18,12 +18,12 @@ module Amigo
18
18
  super()
19
19
  end
20
20
 
21
- def scale_up(checked_latencies, depth:, duration:, **_kw)
22
- self._log(:warn, @message, queues: checked_latencies, depth: depth, duration: duration)
21
+ def scale_up(high_latencies:, depth:, duration:, pool_usage:, **)
22
+ self._log(:warn, @message, queues: high_latencies, depth:, duration:, pool_usage:)
23
23
  end
24
24
 
25
- def scale_down(depth:, duration:, **_kw)
26
- self._log(:info, "#{@message}_restored", depth: depth, duration: duration)
25
+ def scale_down(depth:, duration:, **)
26
+ self._log(:info, "#{@message}_restored", depth:, duration:)
27
27
  end
28
28
 
29
29
  protected def _log(level, msg, **kw)
@@ -20,12 +20,12 @@ module Amigo
20
20
  super()
21
21
  end
22
22
 
23
- def scale_up(checked_latencies, depth:, duration:, **)
23
+ def scale_up(high_latencies:, depth:, duration:, pool_usage:, **)
24
24
  now = Time.now
25
25
  call_sentry = @last_alerted < (now - @interval)
26
26
  return unless call_sentry
27
27
  ::Sentry.with_scope do |scope|
28
- scope&.set_extras(high_latency_queues: checked_latencies, depth:, duration:)
28
+ scope&.set_extras(high_latencies:, depth:, duration:, pool_usage:)
29
29
  ::Sentry.capture_message(@message, level: @level)
30
30
  end
31
31
  @last_alerted = now
@@ -3,6 +3,7 @@
3
3
  require "sidekiq/api"
4
4
 
5
5
  require "amigo"
6
+ require "amigo/threading_event"
6
7
 
7
8
  # Generic autoscaling handler that will check for latency
8
9
  # and take an action.
@@ -42,12 +43,19 @@ module Amigo
42
43
  # @!attribute latency_event_started_at [Time] 0-time if not in a latency event.
43
44
  Persisted = Struct.new(:last_alerted_at, :depth, :latency_event_started_at)
44
45
 
45
- # How often should Autoscaler check for latency?
46
+ # How often the Autoscaler checks for latency/usage statistics.
46
47
  # @return [Integer]
47
48
  attr_reader :poll_interval
48
- # What latency should we alert on?
49
- # @return [Integer]
49
+
50
+ # The latency, in seconds, that triggers an alert.
51
+ # @return [Numeric]
50
52
  attr_reader :latency_threshold
53
+
54
+ # The pool usage, as a float between 0 and 1 (or above), that triggers an alert.
55
+ # Note that usage-based autoscaling should generally not be used for background jobs.
56
+ # It is much more useful for web autoscaling, since it is more responsive than latency.
57
+ attr_reader :usage_threshold
58
+
51
59
  # What hosts/processes should this run on?
52
60
  # Looks at ENV['DYNO'] and Socket.gethostname for a match.
53
61
  # Default to only run on 'web.1', which is the first Heroku web dyno.
@@ -95,6 +103,7 @@ module Amigo
95
103
  checker:,
96
104
  poll_interval: 20,
97
105
  latency_threshold: 5,
106
+ usage_threshold: 1,
98
107
  hostname_regex: /^web\.1$/,
99
108
  alert_interval: 120,
100
109
  latency_restored_threshold: latency_threshold,
@@ -111,6 +120,7 @@ module Amigo
111
120
  @checker = checker
112
121
  @poll_interval = poll_interval
113
122
  @latency_threshold = latency_threshold
123
+ @usage_threshold = usage_threshold
114
124
  @hostname_regex = hostname_regex
115
125
  @alert_interval = alert_interval
116
126
  @latency_restored_threshold = latency_restored_threshold
@@ -124,9 +134,7 @@ module Amigo
124
134
  end
125
135
 
126
136
  def setup
127
- # Store these as strings OR procs, rather than grabbing self.method here.
128
- # It gets extremely hard ot test if we capture the method here.
129
- @stop = false
137
+ @thr_event = ThreadingEvent.new
130
138
  persisted = self.fetch_persisted
131
139
  @last_alerted = persisted.last_alerted_at
132
140
  @depth = persisted.depth
@@ -171,16 +179,17 @@ module Amigo
171
179
  self._debug(:info, "async_autoscaler_starting")
172
180
  self.setup
173
181
  @polling_thread = Thread.new do
174
- until @stop
175
- Kernel.sleep(self.poll_interval)
176
- self.check unless @stop
182
+ loop do
183
+ @thr_event.wait(self.poll_interval)
184
+ break if @thr_event.set?
185
+ self.check
177
186
  end
178
187
  end
179
188
  return true
180
189
  end
181
190
 
182
191
  def stop
183
- @stop = true
192
+ @thr_event.set
184
193
  end
185
194
 
186
195
  def check
@@ -201,7 +210,8 @@ module Amigo
201
210
  self._debug(:info, "async_autoscaler_check")
202
211
  high_latency_queues = self.checker.get_latencies.
203
212
  select { |_, latency| latency > self.latency_threshold }
204
- if high_latency_queues.empty?
213
+ high_pool_usage = !(pu = self.checker.get_pool_usage).nil? && pu > self.usage_threshold
214
+ if high_latency_queues.empty? && !high_pool_usage
205
215
  # Whenever we are in a latency event, we have a depth > 0. So a depth of 0 means
206
216
  # we're not in a latency event, and still have no latency, so can noop.
207
217
  return if @depth.zero?
@@ -224,9 +234,7 @@ module Amigo
224
234
  @latency_event_started = Time.now
225
235
  duration = 0.0
226
236
  end
227
- # Alert each handler. For legacy reasons, we support handlers that accept
228
- # ({queues and latencies}) and ({queues and latencies}, {}keywords}).
229
- @handler.scale_up(high_latency_queues, depth: @depth, duration: duration)
237
+ @handler.scale_up(high_latencies: high_latency_queues, depth: @depth, duration: duration, pool_usage: pu)
230
238
  @last_alerted = now
231
239
  self.persist
232
240
  end
@@ -239,14 +247,22 @@ module Amigo
239
247
  class Checker
240
248
  # Return relevant latencies for this checker.
241
249
  # This could be the latencies of each Sidekiq queue, or web latencies, etc.
250
+ # If this is a pool usage checker only, return {}.
242
251
  # @return [Hash] Key is the queue name (or some other value); value is the latency in seconds.
243
252
  def get_latencies = raise NotImplementedError
253
+
254
+ # Return the pool usage for this checker.
255
+ # Values should be between 0 and 1, with values over 1 meaning a backlog.
256
+ # If this is a latency checker only, or there is not enough information to report on pool usage, return nil.
257
+ # @return [nil,Float]
258
+ def get_pool_usage = raise NotImplementedError
244
259
  end
245
260
 
246
261
  class Handler
247
262
  # Called when a latency event starts, and as it fails to resolve.
248
- # @param checked_latencies [Hash] The +Hash+ returned from +Amigo::Autoscaler::Handler#check+.
263
+ # @param high_latencies [Hash] The +Hash+ returned from +Amigo::Autoscaler::Handler#check+.
249
264
  # For Sidekiq, this will look like `{queue name => latency in seconds}`
265
+ # @param pool_usage [Float,nil] The pool usage value from the checker, or nil.
250
266
  # @param depth [Integer] Number of alerts as part of this latency event.
251
267
  # For example, the first alert has a depth of 1, and if latency stays high,
252
268
  # it'll be 2 on the next call, etc. +depth+ can be used to incrementally provision
@@ -256,7 +272,7 @@ module Amigo
256
272
  # @param kw [Hash] Additional undefined keywords. Handlers should accept additional options,
257
273
  # like via `**kw` or `opts={}`, for compatibility.
258
274
  # @return [Array<String,Symbol,Proc,#call>]
259
- def scale_up(checked_latencies, depth:, duration:, **kw) = raise NotImplementedError
275
+ def scale_up(high_latencies:, pool_usage:, depth:, duration:, **kw) = raise NotImplementedError
260
276
 
261
277
  # Called when a latency of +latency_restored_threshold+ is reached
262
278
  # (ie, when we get back to normal latency after a high latency event).
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Amigo
4
+ # Threading event on +Concurrent::Event+, ManualResetEvent, etc.
5
+ # Efficient way to sleep and wake up.
6
+ class ThreadingEvent
7
+ def initialize(initial=false)
8
+ @mutex = Mutex.new
9
+ @cv = ConditionVariable.new
10
+ @signaled = initial
11
+ end
12
+
13
+ # Sleep the current thread until +set+ is called by another thread.
14
+ # @param timeout [Numeric,nil] Passed to +Mutex#sleep+.
15
+ # @return See +Mutex#sleep+.
16
+ def wait(timeout=nil)
17
+ # _debug("wait")
18
+ @mutex.synchronize do
19
+ @cv.wait(@mutex, timeout)
20
+ end
21
+ end
22
+
23
+ # Signal the event. The waiting threads will wake up.
24
+ def set
25
+ # _debug("set")
26
+ @mutex.synchronize do
27
+ @signaled = true
28
+ @cv.broadcast # wake up all waiters
29
+ end
30
+ end
31
+
32
+ # True if +set+ has been called.
33
+ def set? = @signaled
34
+
35
+ # Reset the event back to its original state.
36
+ def reset
37
+ # _debug("reset")
38
+ @mutex.synchronize do
39
+ @signaled = false
40
+ end
41
+ end
42
+
43
+ # # def _debug(msg)
44
+ # puts "#{Thread.current.name}: #{msg}"
45
+ # end
46
+ end
47
+ end
data/lib/amigo/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Amigo
4
- VERSION = "1.12.1"
4
+ VERSION = "1.13.0"
5
5
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "amigo/autoscaler/checkers/puma_pool_usage"
4
+
5
+ Puma::Plugin.create do
6
+ # @param [Puma::Launcher] launcher
7
+ def start(launcher)
8
+ interval = launcher.options[:amigo_autoscaler_interval] || 20
9
+ checker = launcher.options.fetch(:amigo_puma_pool_usage_checker)
10
+ event = Amigo::ThreadingEvent.new
11
+ in_background do
12
+ loop do
13
+ event.wait(interval)
14
+ break if event.set?
15
+ log_pool_usage(launcher, checker)
16
+ end
17
+ end
18
+
19
+ launcher.events.on_stopped do
20
+ event.set
21
+ end
22
+ end
23
+
24
+ # Find the Puma stats necessary depending on mode (single vs. cluster).
25
+ # Sends statistics for logging.
26
+ def log_pool_usage(launcher, checker)
27
+ now = Time.now
28
+ stats = launcher.stats
29
+ if stats[:worker_status]
30
+ stats[:worker_status].each { |worker| _log_pool_usage(checker, worker[:last_status], now:) }
31
+ else
32
+ _log_pool_usage(checker, stats, now:)
33
+ end
34
+ end
35
+
36
+ def _log_pool_usage(checker, stats, now:)
37
+ pool_usage = calculate_pool_usage(stats)
38
+ checker.record(pool_usage, now:)
39
+ end
40
+
41
+ # Pool usage is 0 at no busy threads, 1 at busy threads == max threads,
42
+ # or above 1 if there is a backlog (ie, 4 threads and 4 backlog items is a usage of 2).
43
+ # For our usage purposes, we don't want to deal with the case where we have a backlog,
44
+ # but fewer threads spawned than our max; in this case, we don't need to autoscale,
45
+ # since Puma can still launch threads.
46
+ def calculate_pool_usage(stats)
47
+ busy = stats[:busy_threads]
48
+ max = stats[:max_threads]
49
+ backlog = stats[:backlog]
50
+ return (busy + backlog) / max.to_f
51
+ end
52
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sidekiq-amigo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.12.1
4
+ version: 1.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lithic Technology
@@ -37,6 +37,20 @@ dependencies:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
39
  version: '2'
40
+ - !ruby/object:Gem::Dependency
41
+ name: ostruct
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">"
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">"
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
40
54
  - !ruby/object:Gem::Dependency
41
55
  name: platform-api
42
56
  requirement: !ruby/object:Gem::Requirement
@@ -51,6 +65,20 @@ dependencies:
51
65
  - - ">"
52
66
  - !ruby/object:Gem::Version
53
67
  version: '0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: puma
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '6'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '6'
54
82
  - !ruby/object:Gem::Dependency
55
83
  name: rack
56
84
  requirement: !ruby/object:Gem::Requirement
@@ -203,7 +231,9 @@ files:
203
231
  - lib/amigo.rb
204
232
  - lib/amigo/audit_logger.rb
205
233
  - lib/amigo/autoscaler.rb
234
+ - lib/amigo/autoscaler/checkers/chain.rb
206
235
  - lib/amigo/autoscaler/checkers/fake.rb
236
+ - lib/amigo/autoscaler/checkers/puma_pool_usage.rb
207
237
  - lib/amigo/autoscaler/checkers/sidekiq.rb
208
238
  - lib/amigo/autoscaler/checkers/web_latency.rb
209
239
  - lib/amigo/autoscaler/handlers/chain.rb
@@ -221,7 +251,9 @@ files:
221
251
  - lib/amigo/scheduled_job.rb
222
252
  - lib/amigo/semaphore_backoff_job.rb
223
253
  - lib/amigo/spec_helpers.rb
254
+ - lib/amigo/threading_event.rb
224
255
  - lib/amigo/version.rb
256
+ - lib/puma/plugin/amigo.rb
225
257
  homepage: https://github.com/lithictech/sidekiq-amigo
226
258
  licenses:
227
259
  - MIT