sidekiq-heroku-autoscale 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,266 @@
1
+ module Sidekiq
2
+ module HerokuAutoscale
3
+
4
+ class Process
5
+ WAKE_THROTTLE = PollInterval.new(:wait_for_update!, before_update: 2)
6
+ SHUTDOWN_POLL = PollInterval.new(:wait_for_shutdown!, before_update: 10)
7
+
8
+ attr_reader :app_name, :name, :throttle, :history, :client
9
+ attr_reader :queue_system, :scale_strategy
10
+
11
+ attr_accessor :active_at, :updated_at, :quieted_at
12
+ attr_accessor :dynos, :quieted_to, :quiet_buffer
13
+
14
+ def initialize(
15
+ name: 'worker',
16
+ app_name: nil,
17
+ client: nil,
18
+ throttle: 10, # 10 seconds
19
+ history: 3600, # 1 hour
20
+ quiet_buffer: 10,
21
+ system: {},
22
+ scale: {}
23
+ )
24
+ @app_name = app_name || name.to_s
25
+ @name = name.to_s
26
+ @client = client
27
+ @queue_system = QueueSystem.new(system)
28
+ @scale_strategy = ScaleStrategy.new(scale)
29
+
30
+ @dynos = 0
31
+ @active_at = nil
32
+ @updated_at = nil
33
+ @quieted_at = nil
34
+ @quieted_to = nil
35
+
36
+ @throttle = throttle
37
+ @history = history
38
+ @quiet_buffer = quiet_buffer
39
+ end
40
+
41
+ def status
42
+ if shutting_down?
43
+ 'stopping'
44
+ elsif quieting?
45
+ 'quieting'
46
+ elsif @dynos > 0
47
+ 'running'
48
+ else
49
+ 'stopped'
50
+ end
51
+ end
52
+
53
+ # request a throttled update
54
+ def ping!
55
+ @active_at = Time.now.utc
56
+ if ::Sidekiq.server?
57
+ # submit the process for runscaling (up or down)
58
+ # the process is polled until shutdown occurs
59
+ SHUTDOWN_POLL.call(self)
60
+ else
61
+ # submits the process for upscaling (wake up)
62
+ # the process is polled until an update is run
63
+ WAKE_THROTTLE.call(self)
64
+ end
65
+ end
66
+
67
+ # checks if the system is downscaling
68
+ # no other scaling is allowed during a cooling period
69
+ def quieting?
70
+ !!(@quieted_to && @quieted_at)
71
+ end
72
+
73
+ def shutting_down?
74
+ quieting? && @quieted_to.zero?
75
+ end
76
+
77
+ def fulfills_quietdown?
78
+ !!(@quieted_at && Time.now.utc >= @quieted_at + @quiet_buffer)
79
+ end
80
+
81
+ # check if a probe time is newer than the last update
82
+ def updated_since_last_activity?
83
+ !!(@active_at && @updated_at && @updated_at > @active_at)
84
+ end
85
+
86
+ # # check if last update falls within the throttle window
87
+ def throttled?
88
+ !!(@updated_at && Time.now.utc < @updated_at + @throttle)
89
+ end
90
+
91
+ # starts a quietdown period in which excess workers are quieted
92
+ # no formation changes are allowed during a quietdown window.
93
+ def quietdown(to=0)
94
+ quiet_to = [0, to].max
95
+ quiet_at = Time.now.utc
96
+ unless queue_system.quietdown!(quiet_to)
97
+ # omit quiet buffer if no workers were actually quieted
98
+ # allows direct downscaling without buffer delay
99
+ # (though uptime buffer may still have an effect)
100
+ quiet_at -= (@quiet_buffer + 1)
101
+ end
102
+ set_attributes(quieted_to: quiet_to, quieted_at: quiet_at)
103
+ end
104
+
105
+ # wrapper for throttling the upscale process (client)
106
+ # polling runs until the next update has been called.
107
+ def wait_for_update!
108
+ # resolve (true) when already updated by another process
109
+ # keep waiting (false) when:
110
+ # - redundant updates are called within the throttle window
111
+ # - the system has been fully quieted and must shutdown before upscaling
112
+ return true if updated_since_last_activity?
113
+ return false if throttled?
114
+
115
+ # first round of checks use local (process-specific) settings
116
+ # now hit the redis cache and double check settings from other processes
117
+ sync_attributes
118
+ return true if updated_since_last_activity?
119
+ return false if throttled?
120
+
121
+ update!
122
+ true
123
+ end
124
+
125
+ # wrapper for monitoring the downscale process (server)
126
+ # polling runs until an update returns zero dynos.
127
+ def wait_for_shutdown!
128
+ return false if throttled?
129
+
130
+ sync_attributes
131
+ return false if throttled?
132
+
133
+ update!.zero?
134
+ end
135
+
136
+ # update the process with live dyno count from Heroku,
137
+ # and then reassess workload and scale transitions.
138
+ # this method shouldn't be called directly... just ping! it.
139
+ def update!(current=nil, target=nil)
140
+ current ||= fetch_dyno_count
141
+
142
+ attrs = { dynos: current, updated_at: Time.now.utc }
143
+ if current.zero?
144
+ attrs[:quieted_to] = nil
145
+ attrs[:quieted_at] = nil
146
+ end
147
+ set_attributes(attrs)
148
+
149
+ # No changes are allowed while quieting...
150
+ # the quieted dyno needs to be removed (downscaled)
151
+ # before making other changes to the formation.
152
+ unless quieting?
153
+ # select a new scale target to shoot for
154
+ # (provides a trajectory, not necessarily a destination)
155
+ target ||= scale_strategy.call(queue_system)
156
+
157
+ # idle
158
+ if current == target
159
+ ::Sidekiq.logger.info("IDLE at #{ target } dynos")
160
+ return current
161
+
162
+ # upscale
163
+ elsif current < target
164
+ return set_dyno_count!(target)
165
+
166
+ # quietdown
167
+ elsif current > target
168
+ ::Sidekiq.logger.info("QUIET to #{ current - 1 } dynos")
169
+ quietdown(current - 1)
170
+ # do NOT return...
171
+ # allows downscale conditions to run during the same update
172
+ end
173
+ end
174
+
175
+ # downscale
176
+ if quieting? && fulfills_quietdown?
177
+ return set_dyno_count!(@quieted_to)
178
+ end
179
+
180
+ current
181
+ end
182
+
183
+ # gets a live dyno count from Heroku
184
+ def fetch_dyno_count
185
+ if @client
186
+ @client.formation.list(app_name)
187
+ .select { |item| item['type'] == name }
188
+ .map { |item| item['quantity'] }
189
+ .reduce(0, &:+)
190
+ else
191
+ @dynos
192
+ end
193
+ rescue StandardError => e
194
+ ::Sidekiq::HerokuAutoscale.exception_handler.call(e)
195
+ 0
196
+ end
197
+
198
+ # sets the live dyno count on Heroku
199
+ def set_dyno_count!(count)
200
+ ::Sidekiq.logger.info("SCALE to #{ count } dynos")
201
+ @client.formation.update(app_name, name, { quantity: count }) if @client
202
+ set_attributes(dynos: count, quieted_to: nil, quieted_at: nil, history_at: Time.now.utc)
203
+ count
204
+ rescue StandardError => e
205
+ ::Sidekiq::HerokuAutoscale.exception_handler.call(e)
206
+ @dynos
207
+ end
208
+
209
+ # sets redis-cached process attributes
210
+ def set_attributes(attrs)
211
+ cache = {}
212
+ prev_dynos = @dynos
213
+ if attrs.key?(:dynos)
214
+ cache['dynos'] = @dynos = attrs[:dynos]
215
+ end
216
+ if attrs.key?(:quieted_to)
217
+ cache['quieted_to'] = @quieted_to = attrs[:quieted_to]
218
+ end
219
+ if attrs.key?(:quieted_at)
220
+ @quieted_at = attrs[:quieted_at]
221
+ cache['quieted_at'] = @quieted_at ? @quieted_at.to_i : nil
222
+ end
223
+ if attrs.key?(:updated_at)
224
+ @updated_at = attrs[:updated_at]
225
+ cache['updated_at'] = @updated_at ? @updated_at.to_i : nil
226
+ end
227
+
228
+ ::Sidekiq.redis do |c|
229
+ c.pipelined do
230
+ # set new keys, delete expired keys
231
+ del, set = cache.partition { |k, v| v.nil? }
232
+ c.hmset(cache_key, *set.flatten) if set.any?
233
+ c.hdel(cache_key, *del.map(&:first)) if del.any?
234
+
235
+ if attrs[:history_at]
236
+ # set a dyno count history marker
237
+ event_time = (attrs[:history_at].to_f / @throttle).floor * @throttle
238
+ history_page = (attrs[:history_at].to_f / @history).floor * @history
239
+ history_key = "#{ cache_key }:#{ history_page }"
240
+
241
+ c.hmset(history_key, (event_time - @throttle).to_s, prev_dynos, event_time.to_s, @dynos)
242
+ c.expire(history_key, @history * 2)
243
+ end
244
+ end
245
+ end
246
+ end
247
+
248
+ # syncs configuration across process instances (dynos)
249
+ def sync_attributes
250
+ if cache = ::Sidekiq.redis { |c| c.hgetall(cache_key) }
251
+ @dynos = cache['dynos'] ? cache['dynos'].to_i : 0
252
+ @quieted_to = cache['quieted_to'] ? cache['quieted_to'].to_i : nil
253
+ @quieted_at = cache['quieted_at'] ? Time.at(cache['quieted_at'].to_i).utc : nil
254
+ @updated_at = cache['updated_at'] ? Time.at(cache['updated_at'].to_i).utc : nil
255
+ return true
256
+ end
257
+ false
258
+ end
259
+
260
+ def cache_key
261
+ [self.class.name.gsub('::', '/').downcase, app_name, name].join(':')
262
+ end
263
+ end
264
+
265
+ end
266
+ end
@@ -0,0 +1,102 @@
1
+ require 'sidekiq/api'
2
+
3
+ module Sidekiq
4
+ module HerokuAutoscale
5
+
6
+ class QueueSystem
7
+ ALL_QUEUES = '*'.freeze
8
+
9
+ attr_accessor :watch_queues, :include_retrying, :include_scheduled
10
+
11
+ def initialize(watch_queues: ALL_QUEUES, include_retrying: true, include_scheduled: true)
12
+ @watch_queues = [watch_queues].flatten.uniq
13
+ @include_retrying = include_retrying
14
+ @include_scheduled = include_scheduled
15
+ end
16
+
17
+ def all_queues?
18
+ @watch_queues.first == ALL_QUEUES
19
+ end
20
+
21
+ # number of dynos (process instances) running sidekiq
22
+ # this may include one-or-more instances of one-or-more heroku process types
23
+ # (though they should all be one process type if setup validation was observed)
24
+ def dynos
25
+ sidekiq_processes.size
26
+ end
27
+
28
+ # number of worker threads currently running sidekiq jobs
29
+ # counts all queue-specific threads across all dynos (process instances)
30
+ def threads
31
+ # work => { 'queue' => name, 'run_at' => timestamp, 'payload' => msg }
32
+ worker_set = ::Sidekiq::Workers.new.to_a
33
+ worker_set = worker_set.select { |pid, tid, work| watch_queues.include?(work['queue']) } unless all_queues?
34
+ worker_set.length
35
+ end
36
+
37
+ # number of jobs sitting in the active work queue
38
+ def enqueued
39
+ counts = all_queues? ? sidekiq_queues.values : sidekiq_queues.slice(*watch_queues).values
40
+ counts.map(&:to_i).reduce(&:+) || 0
41
+ end
42
+
43
+ # number of jobs in the scheduled set
44
+ def scheduled
45
+ return 0 unless @include_scheduled
46
+ count_jobs(::Sidekiq::ScheduledSet.new)
47
+ end
48
+
49
+ # number of jobs in the retry set
50
+ def retrying
51
+ return 0 unless @include_retrying
52
+ count_jobs(::Sidekiq::RetrySet.new)
53
+ end
54
+
55
+ def total_work
56
+ enqueued + scheduled + retrying + threads
57
+ end
58
+
59
+ def has_work?
60
+ total_work > 0
61
+ end
62
+
63
+ # When scaling down workers, heroku stops the one with the highest number...
64
+ # from https://stackoverflow.com/questions/25215334/scale-down-specific-heroku-worker-dynos
65
+ def quietdown!(scale)
66
+ quieted = false
67
+ # processes have hostnames formatted as "worker.1", "worker.2", "sidekiq.1", etc...
68
+ # this groups processes by type, then sorts by number, and then quiets beyond scale.
69
+ sidekiq_processes.group_by { |p| p['hostname'].split('.').first }.each_pair do |type, group|
70
+ # there should only ever be a single group here (assuming setup validations were observed)
71
+ group.sort_by { |p| p['hostname'].split('.').last.to_i }.each_with_index do |process, index|
72
+ if index + 1 > scale && !process.stopping?
73
+ process.quiet!
74
+ quieted = true
75
+ end
76
+ end
77
+ end
78
+
79
+ quieted
80
+ end
81
+
82
+ def sidekiq_queues
83
+ ::Sidekiq::Stats.new.queues
84
+ end
85
+
86
+ def sidekiq_processes
87
+ process_set = ::Sidekiq::ProcessSet.new
88
+ # select all processes with queues that intersect watched queues
89
+ process_set = process_set.select { |p| (p['queues'] & @watch_queues).any? } unless all_queues?
90
+ process_set
91
+ end
92
+
93
+ private
94
+
95
+ def count_jobs(job_set)
96
+ return job_set.size if all_queues?
97
+ job_set.count { |j| watch_queues.include?(j.queue) }
98
+ end
99
+ end
100
+
101
+ end
102
+ end
@@ -0,0 +1,53 @@
1
+ module Sidekiq
2
+ module HerokuAutoscale
3
+
4
+ class ScaleStrategy
5
+ attr_accessor :mode, :max_dynos, :workers_per_dyno, :min_factor
6
+
7
+ def initialize(mode: :binary, max_dynos: 1, workers_per_dyno: 25, min_factor: 0)
8
+ @mode = mode
9
+ @max_dynos = max_dynos
10
+ @workers_per_dyno = workers_per_dyno
11
+ @min_factor = min_factor
12
+ end
13
+
14
+ def call(sys)
15
+ case @mode.to_s
16
+ when 'linear'
17
+ linear(sys)
18
+ else
19
+ binary(sys)
20
+ end
21
+ end
22
+
23
+ def binary(sys)
24
+ sys.has_work? ? @max_dynos : 0
25
+ end
26
+
27
+ def linear(sys)
28
+ # total capacity of max workers
29
+ total_capacity = (@max_dynos * @workers_per_dyno).to_f
30
+
31
+ # min capacity required to scale first worker
32
+ min_capacity = [0, @min_factor].max.to_f * @workers_per_dyno
33
+
34
+ # min percentage of total capacity
35
+ min_capacity_percentage = min_capacity / total_capacity
36
+ requested_capacity_percentage = sys.total_work / total_capacity
37
+
38
+ # Scale requested capacity taking into account the minimum required
39
+ scale_factor = (requested_capacity_percentage - min_capacity_percentage) / (total_capacity - min_capacity_percentage)
40
+ scale_factor = 0 if scale_factor.nan? # Handle DIVZERO
41
+ scaled_capacity_percentage = scale_factor * total_capacity
42
+
43
+ # don't scale down past number of currently engaged workers,
44
+ # and don't scale up past maximum dynos
45
+ ideal_dynos = ([0, scaled_capacity_percentage].max * @max_dynos).ceil
46
+ minimum_dynos = [sys.dynos, ideal_dynos].max
47
+ maximum_dynos = [minimum_dynos, @max_dynos].min
48
+ [minimum_dynos, maximum_dynos].min
49
+ end
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sidekiq
4
+ module HerokuAutoscale
5
+ VERSION = '0.0.1'
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ require 'sidekiq/heroku_autoscale/process'
2
+ require 'sidekiq/heroku_autoscale/web_extension'
3
+
4
+ if defined?(::Sidekiq::Web)
5
+ ::Sidekiq::Web.register(::Sidekiq::HerokuAutoscale::WebExtension)
6
+ ::Sidekiq::Web.tabs["Dynos"] = "dynos"
7
+ end
@@ -0,0 +1,4 @@
1
+ <div class= "dashboard clearfix">
2
+ <h3>Dynos</h3>
3
+ <p>Heroku Autoscale is not initialized.</p>
4
+ </div>
@@ -0,0 +1,38 @@
1
+ <div class= "dashboard clearfix">
2
+ <h3>Heorku Dynos
3
+ <span class="beacon" id="beacon">
4
+ <span class="ring"></span>
5
+ <span class="dot"></span>
6
+ </span>
7
+ </h3>
8
+ </div>
9
+
10
+ <div class="row chart">
11
+ <div id="history" data-update-url="<%= root_path %>stats" data-dynos-url="<%= root_path %>dynos/stats"></div>
12
+ <div id="history-legend"></div>
13
+ <script id="history-data" type="text/json">
14
+ <%= JSON.generate(@dyno_stats) %>
15
+ </script>
16
+ </div>
17
+
18
+ <h5>Process types</h5>
19
+ <div class="table_container">
20
+ <table class="processes table table-hover table-bordered table-striped table-white">
21
+ <thead>
22
+ <th>Name</th>
23
+ <th>Updated at</th>
24
+ <th>Status</th>
25
+ <th>Dynos</th>
26
+ </thead>
27
+ <% @dyno_stats.each_pair do |key, stats| %>
28
+ <tr>
29
+ <td><%= key %></td>
30
+ <td id="<%= key %>-updated"><%= stats[:updated] %></td>
31
+ <td id="<%= key %>-status"><%= stats[:status] %></td>
32
+ <td id="<%= key %>-dynos"><%= stats[:dynos] %></td>
33
+ </tr>
34
+ <% end %>
35
+ </table>
36
+ </div>
37
+
38
+ <script type="text/javascript" src="<%= root_path %>dynos/index.js"></script>