sidekiq-heroku-autoscale 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,266 @@
1
+ module Sidekiq
2
+ module HerokuAutoscale
3
+
4
+ class Process
5
+ WAKE_THROTTLE = PollInterval.new(:wait_for_update!, before_update: 2)
6
+ SHUTDOWN_POLL = PollInterval.new(:wait_for_shutdown!, before_update: 10)
7
+
8
+ attr_reader :app_name, :name, :throttle, :history, :client
9
+ attr_reader :queue_system, :scale_strategy
10
+
11
+ attr_accessor :active_at, :updated_at, :quieted_at
12
+ attr_accessor :dynos, :quieted_to, :quiet_buffer
13
+
14
+ def initialize(
15
+ name: 'worker',
16
+ app_name: nil,
17
+ client: nil,
18
+ throttle: 10, # 10 seconds
19
+ history: 3600, # 1 hour
20
+ quiet_buffer: 10,
21
+ system: {},
22
+ scale: {}
23
+ )
24
+ @app_name = app_name || name.to_s
25
+ @name = name.to_s
26
+ @client = client
27
+ @queue_system = QueueSystem.new(system)
28
+ @scale_strategy = ScaleStrategy.new(scale)
29
+
30
+ @dynos = 0
31
+ @active_at = nil
32
+ @updated_at = nil
33
+ @quieted_at = nil
34
+ @quieted_to = nil
35
+
36
+ @throttle = throttle
37
+ @history = history
38
+ @quiet_buffer = quiet_buffer
39
+ end
40
+
41
+ def status
42
+ if shutting_down?
43
+ 'stopping'
44
+ elsif quieting?
45
+ 'quieting'
46
+ elsif @dynos > 0
47
+ 'running'
48
+ else
49
+ 'stopped'
50
+ end
51
+ end
52
+
53
+ # request a throttled update
54
+ def ping!
55
+ @active_at = Time.now.utc
56
+ if ::Sidekiq.server?
57
+ # submit the process for runscaling (up or down)
58
+ # the process is polled until shutdown occurs
59
+ SHUTDOWN_POLL.call(self)
60
+ else
61
+ # submits the process for upscaling (wake up)
62
+ # the process is polled until an update is run
63
+ WAKE_THROTTLE.call(self)
64
+ end
65
+ end
66
+
67
+ # checks if the system is downscaling
68
+ # no other scaling is allowed during a cooling period
69
+ def quieting?
70
+ !!(@quieted_to && @quieted_at)
71
+ end
72
+
73
+ def shutting_down?
74
+ quieting? && @quieted_to.zero?
75
+ end
76
+
77
+ def fulfills_quietdown?
78
+ !!(@quieted_at && Time.now.utc >= @quieted_at + @quiet_buffer)
79
+ end
80
+
81
+ # check if a probe time is newer than the last update
82
+ def updated_since_last_activity?
83
+ !!(@active_at && @updated_at && @updated_at > @active_at)
84
+ end
85
+
86
+ # # check if last update falls within the throttle window
87
+ def throttled?
88
+ !!(@updated_at && Time.now.utc < @updated_at + @throttle)
89
+ end
90
+
91
+ # starts a quietdown period in which excess workers are quieted
92
+ # no formation changes are allowed during a quietdown window.
93
+ def quietdown(to=0)
94
+ quiet_to = [0, to].max
95
+ quiet_at = Time.now.utc
96
+ unless queue_system.quietdown!(quiet_to)
97
+ # omit quiet buffer if no workers were actually quieted
98
+ # allows direct downscaling without buffer delay
99
+ # (though uptime buffer may still have an effect)
100
+ quiet_at -= (@quiet_buffer + 1)
101
+ end
102
+ set_attributes(quieted_to: quiet_to, quieted_at: quiet_at)
103
+ end
104
+
105
+ # wrapper for throttling the upscale process (client)
106
+ # polling runs until the next update has been called.
107
+ def wait_for_update!
108
+ # resolve (true) when already updated by another process
109
+ # keep waiting (false) when:
110
+ # - redundant updates are called within the throttle window
111
+ # - the system has been fully quieted and must shutdown before upscaling
112
+ return true if updated_since_last_activity?
113
+ return false if throttled?
114
+
115
+ # first round of checks use local (process-specific) settings
116
+ # now hit the redis cache and double check settings from other processes
117
+ sync_attributes
118
+ return true if updated_since_last_activity?
119
+ return false if throttled?
120
+
121
+ update!
122
+ true
123
+ end
124
+
125
+ # wrapper for monitoring the downscale process (server)
126
+ # polling runs until an update returns zero dynos.
127
+ def wait_for_shutdown!
128
+ return false if throttled?
129
+
130
+ sync_attributes
131
+ return false if throttled?
132
+
133
+ update!.zero?
134
+ end
135
+
136
+ # update the process with live dyno count from Heroku,
137
+ # and then reassess workload and scale transitions.
138
+ # this method shouldn't be called directly... just ping! it.
139
+ def update!(current=nil, target=nil)
140
+ current ||= fetch_dyno_count
141
+
142
+ attrs = { dynos: current, updated_at: Time.now.utc }
143
+ if current.zero?
144
+ attrs[:quieted_to] = nil
145
+ attrs[:quieted_at] = nil
146
+ end
147
+ set_attributes(attrs)
148
+
149
+ # No changes are allowed while quieting...
150
+ # the quieted dyno needs to be removed (downscaled)
151
+ # before making other changes to the formation.
152
+ unless quieting?
153
+ # select a new scale target to shoot for
154
+ # (provides a trajectory, not necessarily a destination)
155
+ target ||= scale_strategy.call(queue_system)
156
+
157
+ # idle
158
+ if current == target
159
+ ::Sidekiq.logger.info("IDLE at #{ target } dynos")
160
+ return current
161
+
162
+ # upscale
163
+ elsif current < target
164
+ return set_dyno_count!(target)
165
+
166
+ # quietdown
167
+ elsif current > target
168
+ ::Sidekiq.logger.info("QUIET to #{ current - 1 } dynos")
169
+ quietdown(current - 1)
170
+ # do NOT return...
171
+ # allows downscale conditions to run during the same update
172
+ end
173
+ end
174
+
175
+ # downscale
176
+ if quieting? && fulfills_quietdown?
177
+ return set_dyno_count!(@quieted_to)
178
+ end
179
+
180
+ current
181
+ end
182
+
183
+ # gets a live dyno count from Heroku
184
+ def fetch_dyno_count
185
+ if @client
186
+ @client.formation.list(app_name)
187
+ .select { |item| item['type'] == name }
188
+ .map { |item| item['quantity'] }
189
+ .reduce(0, &:+)
190
+ else
191
+ @dynos
192
+ end
193
+ rescue StandardError => e
194
+ ::Sidekiq::HerokuAutoscale.exception_handler.call(e)
195
+ 0
196
+ end
197
+
198
+ # sets the live dyno count on Heroku
199
+ def set_dyno_count!(count)
200
+ ::Sidekiq.logger.info("SCALE to #{ count } dynos")
201
+ @client.formation.update(app_name, name, { quantity: count }) if @client
202
+ set_attributes(dynos: count, quieted_to: nil, quieted_at: nil, history_at: Time.now.utc)
203
+ count
204
+ rescue StandardError => e
205
+ ::Sidekiq::HerokuAutoscale.exception_handler.call(e)
206
+ @dynos
207
+ end
208
+
209
+ # sets redis-cached process attributes
210
+ def set_attributes(attrs)
211
+ cache = {}
212
+ prev_dynos = @dynos
213
+ if attrs.key?(:dynos)
214
+ cache['dynos'] = @dynos = attrs[:dynos]
215
+ end
216
+ if attrs.key?(:quieted_to)
217
+ cache['quieted_to'] = @quieted_to = attrs[:quieted_to]
218
+ end
219
+ if attrs.key?(:quieted_at)
220
+ @quieted_at = attrs[:quieted_at]
221
+ cache['quieted_at'] = @quieted_at ? @quieted_at.to_i : nil
222
+ end
223
+ if attrs.key?(:updated_at)
224
+ @updated_at = attrs[:updated_at]
225
+ cache['updated_at'] = @updated_at ? @updated_at.to_i : nil
226
+ end
227
+
228
+ ::Sidekiq.redis do |c|
229
+ c.pipelined do
230
+ # set new keys, delete expired keys
231
+ del, set = cache.partition { |k, v| v.nil? }
232
+ c.hmset(cache_key, *set.flatten) if set.any?
233
+ c.hdel(cache_key, *del.map(&:first)) if del.any?
234
+
235
+ if attrs[:history_at]
236
+ # set a dyno count history marker
237
+ event_time = (attrs[:history_at].to_f / @throttle).floor * @throttle
238
+ history_page = (attrs[:history_at].to_f / @history).floor * @history
239
+ history_key = "#{ cache_key }:#{ history_page }"
240
+
241
+ c.hmset(history_key, (event_time - @throttle).to_s, prev_dynos, event_time.to_s, @dynos)
242
+ c.expire(history_key, @history * 2)
243
+ end
244
+ end
245
+ end
246
+ end
247
+
248
+ # syncs configuration across process instances (dynos)
249
+ def sync_attributes
250
+ if cache = ::Sidekiq.redis { |c| c.hgetall(cache_key) }
251
+ @dynos = cache['dynos'] ? cache['dynos'].to_i : 0
252
+ @quieted_to = cache['quieted_to'] ? cache['quieted_to'].to_i : nil
253
+ @quieted_at = cache['quieted_at'] ? Time.at(cache['quieted_at'].to_i).utc : nil
254
+ @updated_at = cache['updated_at'] ? Time.at(cache['updated_at'].to_i).utc : nil
255
+ return true
256
+ end
257
+ false
258
+ end
259
+
260
+ def cache_key
261
+ [self.class.name.gsub('::', '/').downcase, app_name, name].join(':')
262
+ end
263
+ end
264
+
265
+ end
266
+ end
@@ -0,0 +1,102 @@
1
+ require 'sidekiq/api'
2
+
3
+ module Sidekiq
4
+ module HerokuAutoscale
5
+
6
+ class QueueSystem
7
+ ALL_QUEUES = '*'.freeze
8
+
9
+ attr_accessor :watch_queues, :include_retrying, :include_scheduled
10
+
11
+ def initialize(watch_queues: ALL_QUEUES, include_retrying: true, include_scheduled: true)
12
+ @watch_queues = [watch_queues].flatten.uniq
13
+ @include_retrying = include_retrying
14
+ @include_scheduled = include_scheduled
15
+ end
16
+
17
+ def all_queues?
18
+ @watch_queues.first == ALL_QUEUES
19
+ end
20
+
21
+ # number of dynos (process instances) running sidekiq
22
+ # this may include one-or-more instances of one-or-more heroku process types
23
+ # (though they should all be one process type if setup validation was observed)
24
+ def dynos
25
+ sidekiq_processes.size
26
+ end
27
+
28
+ # number of worker threads currently running sidekiq jobs
29
+ # counts all queue-specific threads across all dynos (process instances)
30
+ def threads
31
+ # work => { 'queue' => name, 'run_at' => timestamp, 'payload' => msg }
32
+ worker_set = ::Sidekiq::Workers.new.to_a
33
+ worker_set = worker_set.select { |pid, tid, work| watch_queues.include?(work['queue']) } unless all_queues?
34
+ worker_set.length
35
+ end
36
+
37
+ # number of jobs sitting in the active work queue
38
+ def enqueued
39
+ counts = all_queues? ? sidekiq_queues.values : sidekiq_queues.slice(*watch_queues).values
40
+ counts.map(&:to_i).reduce(&:+) || 0
41
+ end
42
+
43
+ # number of jobs in the scheduled set
44
+ def scheduled
45
+ return 0 unless @include_scheduled
46
+ count_jobs(::Sidekiq::ScheduledSet.new)
47
+ end
48
+
49
+ # number of jobs in the retry set
50
+ def retrying
51
+ return 0 unless @include_retrying
52
+ count_jobs(::Sidekiq::RetrySet.new)
53
+ end
54
+
55
+ def total_work
56
+ enqueued + scheduled + retrying + threads
57
+ end
58
+
59
+ def has_work?
60
+ total_work > 0
61
+ end
62
+
63
+ # When scaling down workers, heroku stops the one with the highest number...
64
+ # from https://stackoverflow.com/questions/25215334/scale-down-specific-heroku-worker-dynos
65
+ def quietdown!(scale)
66
+ quieted = false
67
+ # processes have hostnames formatted as "worker.1", "worker.2", "sidekiq.1", etc...
68
+ # this groups processes by type, then sorts by number, and then quiets beyond scale.
69
+ sidekiq_processes.group_by { |p| p['hostname'].split('.').first }.each_pair do |type, group|
70
+ # there should only ever be a single group here (assuming setup validations were observed)
71
+ group.sort_by { |p| p['hostname'].split('.').last.to_i }.each_with_index do |process, index|
72
+ if index + 1 > scale && !process.stopping?
73
+ process.quiet!
74
+ quieted = true
75
+ end
76
+ end
77
+ end
78
+
79
+ quieted
80
+ end
81
+
82
+ def sidekiq_queues
83
+ ::Sidekiq::Stats.new.queues
84
+ end
85
+
86
+ def sidekiq_processes
87
+ process_set = ::Sidekiq::ProcessSet.new
88
+ # select all processes with queues that intersect watched queues
89
+ process_set = process_set.select { |p| (p['queues'] & @watch_queues).any? } unless all_queues?
90
+ process_set
91
+ end
92
+
93
+ private
94
+
95
+ def count_jobs(job_set)
96
+ return job_set.size if all_queues?
97
+ job_set.count { |j| watch_queues.include?(j.queue) }
98
+ end
99
+ end
100
+
101
+ end
102
+ end
@@ -0,0 +1,53 @@
1
+ module Sidekiq
2
+ module HerokuAutoscale
3
+
4
+ class ScaleStrategy
5
+ attr_accessor :mode, :max_dynos, :workers_per_dyno, :min_factor
6
+
7
+ def initialize(mode: :binary, max_dynos: 1, workers_per_dyno: 25, min_factor: 0)
8
+ @mode = mode
9
+ @max_dynos = max_dynos
10
+ @workers_per_dyno = workers_per_dyno
11
+ @min_factor = min_factor
12
+ end
13
+
14
+ def call(sys)
15
+ case @mode.to_s
16
+ when 'linear'
17
+ linear(sys)
18
+ else
19
+ binary(sys)
20
+ end
21
+ end
22
+
23
+ def binary(sys)
24
+ sys.has_work? ? @max_dynos : 0
25
+ end
26
+
27
+ def linear(sys)
28
+ # total capacity of max workers
29
+ total_capacity = (@max_dynos * @workers_per_dyno).to_f
30
+
31
+ # min capacity required to scale first worker
32
+ min_capacity = [0, @min_factor].max.to_f * @workers_per_dyno
33
+
34
+ # min percentage of total capacity
35
+ min_capacity_percentage = min_capacity / total_capacity
36
+ requested_capacity_percentage = sys.total_work / total_capacity
37
+
38
+ # Scale requested capacity taking into account the minimum required
39
+ scale_factor = (requested_capacity_percentage - min_capacity_percentage) / (total_capacity - min_capacity_percentage)
40
+ scale_factor = 0 if scale_factor.nan? # Handle DIVZERO
41
+ scaled_capacity_percentage = scale_factor * total_capacity
42
+
43
+ # don't scale down past number of currently engaged workers,
44
+ # and don't scale up past maximum dynos
45
+ ideal_dynos = ([0, scaled_capacity_percentage].max * @max_dynos).ceil
46
+ minimum_dynos = [sys.dynos, ideal_dynos].max
47
+ maximum_dynos = [minimum_dynos, @max_dynos].min
48
+ [minimum_dynos, maximum_dynos].min
49
+ end
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sidekiq
4
+ module HerokuAutoscale
5
+ VERSION = '0.0.1'
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ require 'sidekiq/heroku_autoscale/process'
2
+ require 'sidekiq/heroku_autoscale/web_extension'
3
+
4
+ if defined?(::Sidekiq::Web)
5
+ ::Sidekiq::Web.register(::Sidekiq::HerokuAutoscale::WebExtension)
6
+ ::Sidekiq::Web.tabs["Dynos"] = "dynos"
7
+ end
@@ -0,0 +1,4 @@
1
+ <div class= "dashboard clearfix">
2
+ <h3>Dynos</h3>
3
+ <p>Heroku Autoscale is not initialized.</p>
4
+ </div>
@@ -0,0 +1,38 @@
1
+ <div class= "dashboard clearfix">
2
+ <h3>Heorku Dynos
3
+ <span class="beacon" id="beacon">
4
+ <span class="ring"></span>
5
+ <span class="dot"></span>
6
+ </span>
7
+ </h3>
8
+ </div>
9
+
10
+ <div class="row chart">
11
+ <div id="history" data-update-url="<%= root_path %>stats" data-dynos-url="<%= root_path %>dynos/stats"></div>
12
+ <div id="history-legend"></div>
13
+ <script id="history-data" type="text/json">
14
+ <%= JSON.generate(@dyno_stats) %>
15
+ </script>
16
+ </div>
17
+
18
+ <h5>Process types</h5>
19
+ <div class="table_container">
20
+ <table class="processes table table-hover table-bordered table-striped table-white">
21
+ <thead>
22
+ <th>Name</th>
23
+ <th>Updated at</th>
24
+ <th>Status</th>
25
+ <th>Dynos</th>
26
+ </thead>
27
+ <% @dyno_stats.each_pair do |key, stats| %>
28
+ <tr>
29
+ <td><%= key %></td>
30
+ <td id="<%= key %>-updated"><%= stats[:updated] %></td>
31
+ <td id="<%= key %>-status"><%= stats[:status] %></td>
32
+ <td id="<%= key %>-dynos"><%= stats[:dynos] %></td>
33
+ </tr>
34
+ <% end %>
35
+ </table>
36
+ </div>
37
+
38
+ <script type="text/javascript" src="<%= root_path %>dynos/index.js"></script>