solid_queue_heroku_autoscaler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +128 -0
- data/LICENSE.txt +21 -0
- data/README.md +474 -0
- data/lib/generators/solid_queue_heroku_autoscaler/install_generator.rb +21 -0
- data/lib/generators/solid_queue_heroku_autoscaler/migration_generator.rb +29 -0
- data/lib/generators/solid_queue_heroku_autoscaler/templates/README +41 -0
- data/lib/generators/solid_queue_heroku_autoscaler/templates/create_solid_queue_autoscaler_state.rb.erb +15 -0
- data/lib/generators/solid_queue_heroku_autoscaler/templates/initializer.rb +52 -0
- data/lib/solid_queue_heroku_autoscaler/adapters/base.rb +102 -0
- data/lib/solid_queue_heroku_autoscaler/adapters/heroku.rb +93 -0
- data/lib/solid_queue_heroku_autoscaler/adapters/kubernetes.rb +158 -0
- data/lib/solid_queue_heroku_autoscaler/adapters.rb +57 -0
- data/lib/solid_queue_heroku_autoscaler/advisory_lock.rb +71 -0
- data/lib/solid_queue_heroku_autoscaler/autoscale_job.rb +71 -0
- data/lib/solid_queue_heroku_autoscaler/configuration.rb +217 -0
- data/lib/solid_queue_heroku_autoscaler/cooldown_tracker.rb +153 -0
- data/lib/solid_queue_heroku_autoscaler/decision_engine.rb +228 -0
- data/lib/solid_queue_heroku_autoscaler/errors.rb +44 -0
- data/lib/solid_queue_heroku_autoscaler/metrics.rb +172 -0
- data/lib/solid_queue_heroku_autoscaler/railtie.rb +149 -0
- data/lib/solid_queue_heroku_autoscaler/scaler.rb +227 -0
- data/lib/solid_queue_heroku_autoscaler/version.rb +5 -0
- data/lib/solid_queue_heroku_autoscaler.rb +106 -0
- metadata +169 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
|
|
5
|
+
module SolidQueueHerokuAutoscaler
|
|
6
|
+
class Configuration
|
|
7
|
+
# Configuration name (for multi-worker support)
|
|
8
|
+
attr_accessor :name
|
|
9
|
+
|
|
10
|
+
# Heroku settings
|
|
11
|
+
attr_accessor :heroku_api_key
|
|
12
|
+
|
|
13
|
+
# Worker limits
|
|
14
|
+
attr_accessor :min_workers
|
|
15
|
+
|
|
16
|
+
# Scale-up thresholds
|
|
17
|
+
attr_accessor :scale_up_queue_depth
|
|
18
|
+
|
|
19
|
+
# Scale-down thresholds
|
|
20
|
+
attr_accessor :scale_down_queue_depth
|
|
21
|
+
|
|
22
|
+
# Scaling strategy
|
|
23
|
+
attr_accessor :scaling_strategy
|
|
24
|
+
|
|
25
|
+
# Safety settings
|
|
26
|
+
attr_accessor :cooldown_seconds
|
|
27
|
+
|
|
28
|
+
# Advisory lock settings
|
|
29
|
+
attr_accessor :lock_timeout_seconds
|
|
30
|
+
|
|
31
|
+
# Behavior settings
|
|
32
|
+
attr_accessor :dry_run
|
|
33
|
+
|
|
34
|
+
# Queue filtering
|
|
35
|
+
attr_accessor :queues
|
|
36
|
+
|
|
37
|
+
# Database connection
|
|
38
|
+
attr_accessor :database_connection
|
|
39
|
+
|
|
40
|
+
# Solid Queue table prefix (default: 'solid_queue_')
|
|
41
|
+
attr_accessor :table_prefix
|
|
42
|
+
|
|
43
|
+
# Infrastructure adapter (defaults to Heroku)
|
|
44
|
+
attr_accessor :adapter_class
|
|
45
|
+
|
|
46
|
+
# Kubernetes settings (for Kubernetes adapter)
|
|
47
|
+
attr_accessor :kubernetes_deployment, :kubernetes_namespace, :kubernetes_context, :kubernetes_kubeconfig
|
|
48
|
+
|
|
49
|
+
# Additional Heroku settings
|
|
50
|
+
attr_accessor :heroku_app_name, :process_type, :max_workers
|
|
51
|
+
|
|
52
|
+
# Scale-up settings
|
|
53
|
+
attr_accessor :scale_up_latency_seconds, :scale_up_increment
|
|
54
|
+
|
|
55
|
+
# Scale-down settings
|
|
56
|
+
attr_accessor :scale_down_latency_seconds, :scale_down_idle_minutes, :scale_down_decrement
|
|
57
|
+
attr_accessor :scale_up_jobs_per_worker, :scale_up_latency_per_worker, :scale_up_cooldown_seconds, :scale_down_jobs_per_worker, :scale_down_cooldown_seconds
|
|
58
|
+
|
|
59
|
+
# Other settings
|
|
60
|
+
attr_accessor :enabled, :logger
|
|
61
|
+
attr_writer :lock_key
|
|
62
|
+
|
|
63
|
+
def initialize
|
|
64
|
+
# Configuration name (auto-set when using named configurations)
|
|
65
|
+
@name = :default
|
|
66
|
+
|
|
67
|
+
# Heroku settings - required
|
|
68
|
+
@heroku_api_key = ENV.fetch('HEROKU_API_KEY', nil)
|
|
69
|
+
@heroku_app_name = ENV.fetch('HEROKU_APP_NAME', nil)
|
|
70
|
+
@process_type = 'worker'
|
|
71
|
+
|
|
72
|
+
# Worker limits
|
|
73
|
+
@min_workers = 1
|
|
74
|
+
@max_workers = 10
|
|
75
|
+
|
|
76
|
+
# Scale-up thresholds
|
|
77
|
+
@scale_up_queue_depth = 100
|
|
78
|
+
@scale_up_latency_seconds = 300
|
|
79
|
+
@scale_up_increment = 1
|
|
80
|
+
|
|
81
|
+
# Scale-down thresholds
|
|
82
|
+
@scale_down_queue_depth = 10
|
|
83
|
+
@scale_down_latency_seconds = 30
|
|
84
|
+
@scale_down_idle_minutes = 5
|
|
85
|
+
@scale_down_decrement = 1
|
|
86
|
+
|
|
87
|
+
# Scaling strategy (:fixed or :proportional)
|
|
88
|
+
@scaling_strategy = :fixed
|
|
89
|
+
@scale_up_jobs_per_worker = 50
|
|
90
|
+
@scale_up_latency_per_worker = 60
|
|
91
|
+
@scale_down_jobs_per_worker = 50
|
|
92
|
+
|
|
93
|
+
# Safety settings
|
|
94
|
+
@cooldown_seconds = 120
|
|
95
|
+
@scale_up_cooldown_seconds = nil
|
|
96
|
+
@scale_down_cooldown_seconds = nil
|
|
97
|
+
|
|
98
|
+
# Advisory lock settings
|
|
99
|
+
@lock_timeout_seconds = 30
|
|
100
|
+
@lock_key = nil # Auto-generated based on name if not set
|
|
101
|
+
|
|
102
|
+
# Behavior
|
|
103
|
+
@dry_run = false
|
|
104
|
+
@enabled = true
|
|
105
|
+
@logger = default_logger
|
|
106
|
+
|
|
107
|
+
# Queue filtering (nil = all queues)
|
|
108
|
+
@queues = nil
|
|
109
|
+
|
|
110
|
+
# Database connection (defaults to ActiveRecord::Base.connection)
|
|
111
|
+
@database_connection = nil
|
|
112
|
+
|
|
113
|
+
# Solid Queue table prefix (default: 'solid_queue_')
|
|
114
|
+
@table_prefix = 'solid_queue_'
|
|
115
|
+
|
|
116
|
+
# Infrastructure adapter (defaults to Heroku)
|
|
117
|
+
@adapter_class = nil
|
|
118
|
+
|
|
119
|
+
# Kubernetes settings (for Kubernetes adapter)
|
|
120
|
+
@kubernetes_deployment = ENV.fetch('K8S_DEPLOYMENT', nil)
|
|
121
|
+
@kubernetes_namespace = ENV['K8S_NAMESPACE'] || 'default'
|
|
122
|
+
@kubernetes_context = ENV.fetch('K8S_CONTEXT', nil)
|
|
123
|
+
@kubernetes_kubeconfig = ENV.fetch('KUBECONFIG', nil)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Returns the lock key, auto-generating based on name if not explicitly set
|
|
127
|
+
# Each worker type gets a unique lock to allow parallel scaling
|
|
128
|
+
def lock_key
|
|
129
|
+
@lock_key || "solid_queue_autoscaler_#{name}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
VALID_SCALING_STRATEGIES = %i[fixed proportional].freeze
|
|
133
|
+
|
|
134
|
+
def validate!
|
|
135
|
+
errors = []
|
|
136
|
+
|
|
137
|
+
# Validate adapter-specific configuration
|
|
138
|
+
errors.concat(adapter.configuration_errors)
|
|
139
|
+
|
|
140
|
+
errors << 'min_workers must be >= 0' if min_workers.negative?
|
|
141
|
+
errors << 'max_workers must be > 0' if max_workers <= 0
|
|
142
|
+
errors << 'min_workers cannot exceed max_workers' if min_workers > max_workers
|
|
143
|
+
|
|
144
|
+
errors << 'scale_up_queue_depth must be > 0' if scale_up_queue_depth <= 0
|
|
145
|
+
errors << 'scale_up_latency_seconds must be > 0' if scale_up_latency_seconds <= 0
|
|
146
|
+
errors << 'scale_up_increment must be > 0' if scale_up_increment <= 0
|
|
147
|
+
|
|
148
|
+
errors << 'scale_down_queue_depth must be >= 0' if scale_down_queue_depth.negative?
|
|
149
|
+
errors << 'scale_down_decrement must be > 0' if scale_down_decrement <= 0
|
|
150
|
+
|
|
151
|
+
errors << 'cooldown_seconds must be >= 0' if cooldown_seconds.negative?
|
|
152
|
+
errors << 'lock_timeout_seconds must be > 0' if lock_timeout_seconds <= 0
|
|
153
|
+
|
|
154
|
+
if table_prefix.nil? || table_prefix.to_s.strip.empty?
|
|
155
|
+
errors << 'table_prefix cannot be nil or empty'
|
|
156
|
+
elsif !table_prefix.to_s.end_with?('_')
|
|
157
|
+
errors << 'table_prefix must end with an underscore'
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
unless VALID_SCALING_STRATEGIES.include?(scaling_strategy)
|
|
161
|
+
errors << "scaling_strategy must be one of: #{VALID_SCALING_STRATEGIES.join(', ')}"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
raise ConfigurationError, errors.join(', ') if errors.any?
|
|
165
|
+
|
|
166
|
+
true
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def effective_scale_up_cooldown
|
|
170
|
+
scale_up_cooldown_seconds || cooldown_seconds
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def effective_scale_down_cooldown
|
|
174
|
+
scale_down_cooldown_seconds || cooldown_seconds
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def connection
|
|
178
|
+
database_connection || ActiveRecord::Base.connection
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def dry_run?
|
|
182
|
+
dry_run
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def enabled?
|
|
186
|
+
enabled
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Returns the configured adapter instance.
|
|
190
|
+
# Creates a new instance from adapter_class if not set.
|
|
191
|
+
# Defaults to Heroku adapter.
|
|
192
|
+
def adapter
|
|
193
|
+
@adapter ||= begin
|
|
194
|
+
klass = adapter_class || Adapters::Heroku
|
|
195
|
+
klass.new(config: self)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Allow setting a pre-configured adapter instance
|
|
200
|
+
attr_writer :adapter
|
|
201
|
+
|
|
202
|
+
private
|
|
203
|
+
|
|
204
|
+
def default_logger
|
|
205
|
+
if defined?(Rails) && Rails.logger
|
|
206
|
+
Rails.logger
|
|
207
|
+
else
|
|
208
|
+
Logger.new($stdout).tap do |logger|
|
|
209
|
+
logger.level = Logger::INFO
|
|
210
|
+
logger.formatter = proc do |severity, datetime, _progname, msg|
|
|
211
|
+
"[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] [SolidQueueAutoscaler] #{severity}: #{msg}\n"
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'time'
|
|
4
|
+
|
|
5
|
+
module SolidQueueHerokuAutoscaler
|
|
6
|
+
class CooldownTracker
|
|
7
|
+
TABLE_NAME = 'solid_queue_autoscaler_state'
|
|
8
|
+
DEFAULT_KEY = 'default'
|
|
9
|
+
|
|
10
|
+
attr_reader :key
|
|
11
|
+
|
|
12
|
+
def initialize(config: nil, key: DEFAULT_KEY)
|
|
13
|
+
@config = config || SolidQueueHerokuAutoscaler.config
|
|
14
|
+
@key = key
|
|
15
|
+
@table_exists = nil
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def last_scale_up_at
|
|
19
|
+
return nil unless table_exists?
|
|
20
|
+
|
|
21
|
+
result = connection.select_value(<<~SQL)
|
|
22
|
+
SELECT last_scale_up_at FROM #{TABLE_NAME}
|
|
23
|
+
WHERE key = #{connection.quote(key)}
|
|
24
|
+
SQL
|
|
25
|
+
result ? Time.parse(result.to_s) : nil
|
|
26
|
+
rescue ArgumentError
|
|
27
|
+
nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def last_scale_down_at
|
|
31
|
+
return nil unless table_exists?
|
|
32
|
+
|
|
33
|
+
result = connection.select_value(<<~SQL)
|
|
34
|
+
SELECT last_scale_down_at FROM #{TABLE_NAME}
|
|
35
|
+
WHERE key = #{connection.quote(key)}
|
|
36
|
+
SQL
|
|
37
|
+
result ? Time.parse(result.to_s) : nil
|
|
38
|
+
rescue ArgumentError
|
|
39
|
+
nil
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def record_scale_up!
|
|
43
|
+
return false unless table_exists?
|
|
44
|
+
|
|
45
|
+
upsert_state(last_scale_up_at: Time.current)
|
|
46
|
+
true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def record_scale_down!
|
|
50
|
+
return false unless table_exists?
|
|
51
|
+
|
|
52
|
+
upsert_state(last_scale_down_at: Time.current)
|
|
53
|
+
true
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def reset!
|
|
57
|
+
return false unless table_exists?
|
|
58
|
+
|
|
59
|
+
connection.execute(<<~SQL)
|
|
60
|
+
DELETE FROM #{TABLE_NAME} WHERE key = #{connection.quote(key)}
|
|
61
|
+
SQL
|
|
62
|
+
true
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def cooldown_active_for_scale_up?
|
|
66
|
+
last = last_scale_up_at
|
|
67
|
+
return false unless last
|
|
68
|
+
|
|
69
|
+
Time.current - last < @config.effective_scale_up_cooldown
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def cooldown_active_for_scale_down?
|
|
73
|
+
last = last_scale_down_at
|
|
74
|
+
return false unless last
|
|
75
|
+
|
|
76
|
+
Time.current - last < @config.effective_scale_down_cooldown
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def scale_up_cooldown_remaining
|
|
80
|
+
last = last_scale_up_at
|
|
81
|
+
return 0 unless last
|
|
82
|
+
|
|
83
|
+
remaining = @config.effective_scale_up_cooldown - (Time.current - last)
|
|
84
|
+
[remaining, 0].max
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def scale_down_cooldown_remaining
|
|
88
|
+
last = last_scale_down_at
|
|
89
|
+
return 0 unless last
|
|
90
|
+
|
|
91
|
+
remaining = @config.effective_scale_down_cooldown - (Time.current - last)
|
|
92
|
+
[remaining, 0].max
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def table_exists?
|
|
96
|
+
return @table_exists unless @table_exists.nil?
|
|
97
|
+
|
|
98
|
+
@table_exists = connection.table_exists?(TABLE_NAME)
|
|
99
|
+
rescue StandardError
|
|
100
|
+
@table_exists = false
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def state
|
|
104
|
+
return {} unless table_exists?
|
|
105
|
+
|
|
106
|
+
row = connection.select_one(<<~SQL)
|
|
107
|
+
SELECT last_scale_up_at, last_scale_down_at, updated_at
|
|
108
|
+
FROM #{TABLE_NAME}
|
|
109
|
+
WHERE key = #{connection.quote(key)}
|
|
110
|
+
SQL
|
|
111
|
+
|
|
112
|
+
return {} unless row
|
|
113
|
+
|
|
114
|
+
{
|
|
115
|
+
last_scale_up_at: row['last_scale_up_at'],
|
|
116
|
+
last_scale_down_at: row['last_scale_down_at'],
|
|
117
|
+
updated_at: row['updated_at']
|
|
118
|
+
}
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
|
|
123
|
+
def connection
|
|
124
|
+
@config.connection
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def upsert_state(last_scale_up_at: nil, last_scale_down_at: nil)
|
|
128
|
+
now = Time.current
|
|
129
|
+
quoted_key = connection.quote(key)
|
|
130
|
+
quoted_now = connection.quote(now)
|
|
131
|
+
|
|
132
|
+
if last_scale_up_at
|
|
133
|
+
quoted_time = connection.quote(last_scale_up_at)
|
|
134
|
+
connection.execute(<<~SQL)
|
|
135
|
+
INSERT INTO #{TABLE_NAME} (key, last_scale_up_at, created_at, updated_at)
|
|
136
|
+
VALUES (#{quoted_key}, #{quoted_time}, #{quoted_now}, #{quoted_now})
|
|
137
|
+
ON CONFLICT (key) DO UPDATE SET
|
|
138
|
+
last_scale_up_at = EXCLUDED.last_scale_up_at,
|
|
139
|
+
updated_at = EXCLUDED.updated_at
|
|
140
|
+
SQL
|
|
141
|
+
elsif last_scale_down_at
|
|
142
|
+
quoted_time = connection.quote(last_scale_down_at)
|
|
143
|
+
connection.execute(<<~SQL)
|
|
144
|
+
INSERT INTO #{TABLE_NAME} (key, last_scale_down_at, created_at, updated_at)
|
|
145
|
+
VALUES (#{quoted_key}, #{quoted_time}, #{quoted_now}, #{quoted_now})
|
|
146
|
+
ON CONFLICT (key) DO UPDATE SET
|
|
147
|
+
last_scale_down_at = EXCLUDED.last_scale_down_at,
|
|
148
|
+
updated_at = EXCLUDED.updated_at
|
|
149
|
+
SQL
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidQueueHerokuAutoscaler
|
|
4
|
+
class DecisionEngine
|
|
5
|
+
Decision = Struct.new(:action, :from, :to, :reason, keyword_init: true) do
|
|
6
|
+
def scale_up?
|
|
7
|
+
action == :scale_up
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def scale_down?
|
|
11
|
+
action == :scale_down
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def no_change?
|
|
15
|
+
action == :no_change
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def delta
|
|
19
|
+
to - from
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def initialize(config: nil)
|
|
24
|
+
@config = config || SolidQueueHerokuAutoscaler.config
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def decide(metrics:, current_workers:)
|
|
28
|
+
return no_change_decision(current_workers, 'Autoscaler is disabled') unless @config.enabled?
|
|
29
|
+
|
|
30
|
+
if should_scale_up?(metrics, current_workers)
|
|
31
|
+
scale_up_decision(metrics, current_workers)
|
|
32
|
+
elsif should_scale_down?(metrics, current_workers)
|
|
33
|
+
scale_down_decision(metrics, current_workers)
|
|
34
|
+
else
|
|
35
|
+
no_change_decision(current_workers, determine_no_change_reason(metrics, current_workers))
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def should_scale_up?(metrics, current_workers)
|
|
42
|
+
return false if current_workers >= @config.max_workers
|
|
43
|
+
|
|
44
|
+
queue_depth_high = metrics.queue_depth >= @config.scale_up_queue_depth
|
|
45
|
+
latency_high = metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
|
|
46
|
+
|
|
47
|
+
queue_depth_high || latency_high
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def should_scale_down?(metrics, current_workers)
|
|
51
|
+
return false if current_workers <= @config.min_workers
|
|
52
|
+
|
|
53
|
+
queue_depth_low = metrics.queue_depth <= @config.scale_down_queue_depth
|
|
54
|
+
latency_low = metrics.oldest_job_age_seconds <= @config.scale_down_latency_seconds
|
|
55
|
+
is_idle = metrics.idle?
|
|
56
|
+
|
|
57
|
+
(queue_depth_low && latency_low) || is_idle
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def scale_up_decision(metrics, current_workers)
|
|
61
|
+
target = calculate_scale_up_target(metrics, current_workers)
|
|
62
|
+
reason = build_scale_up_reason(metrics, current_workers, target)
|
|
63
|
+
|
|
64
|
+
Decision.new(
|
|
65
|
+
action: :scale_up,
|
|
66
|
+
from: current_workers,
|
|
67
|
+
to: target,
|
|
68
|
+
reason: reason
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def scale_down_decision(metrics, current_workers)
|
|
73
|
+
target = calculate_scale_down_target(metrics, current_workers)
|
|
74
|
+
reason = build_scale_down_reason(metrics, current_workers, target)
|
|
75
|
+
|
|
76
|
+
Decision.new(
|
|
77
|
+
action: :scale_down,
|
|
78
|
+
from: current_workers,
|
|
79
|
+
to: target,
|
|
80
|
+
reason: reason
|
|
81
|
+
)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def calculate_scale_up_target(metrics, current_workers)
|
|
85
|
+
raw_target = case @config.scaling_strategy
|
|
86
|
+
when :proportional
|
|
87
|
+
calculate_proportional_scale_up_target(metrics, current_workers)
|
|
88
|
+
when :step_function
|
|
89
|
+
calculate_step_function_target(metrics, current_workers)
|
|
90
|
+
else # :fixed
|
|
91
|
+
current_workers + @config.scale_up_increment
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
[raw_target, @config.max_workers].min
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def calculate_scale_down_target(metrics, current_workers)
|
|
98
|
+
raw_target = case @config.scaling_strategy
|
|
99
|
+
when :proportional
|
|
100
|
+
calculate_proportional_scale_down_target(metrics, current_workers)
|
|
101
|
+
when :step_function
|
|
102
|
+
calculate_step_function_target(metrics, current_workers)
|
|
103
|
+
else # :fixed
|
|
104
|
+
current_workers - @config.scale_down_decrement
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
[raw_target, @config.min_workers].max
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def calculate_proportional_scale_up_target(metrics, current_workers)
|
|
111
|
+
# Calculate workers needed based on queue depth
|
|
112
|
+
jobs_over_threshold = [metrics.queue_depth - @config.scale_up_queue_depth, 0].max
|
|
113
|
+
workers_for_depth = (jobs_over_threshold.to_f / @config.scale_up_jobs_per_worker).ceil
|
|
114
|
+
|
|
115
|
+
# Calculate workers needed based on latency
|
|
116
|
+
latency_over_threshold = [metrics.oldest_job_age_seconds - @config.scale_up_latency_seconds, 0].max
|
|
117
|
+
workers_for_latency = (latency_over_threshold / @config.scale_up_latency_per_worker).ceil
|
|
118
|
+
|
|
119
|
+
# Take the higher of the two calculations
|
|
120
|
+
additional_workers = [workers_for_depth, workers_for_latency].max
|
|
121
|
+
|
|
122
|
+
# Always add at least scale_up_increment if we're scaling up
|
|
123
|
+
additional_workers = [@config.scale_up_increment, additional_workers].max
|
|
124
|
+
|
|
125
|
+
current_workers + additional_workers
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def calculate_proportional_scale_down_target(metrics, current_workers)
|
|
129
|
+
# If idle, scale down aggressively
|
|
130
|
+
return @config.min_workers if metrics.idle?
|
|
131
|
+
|
|
132
|
+
# Calculate how much capacity we have based on queue depth
|
|
133
|
+
jobs_under_capacity = [@config.scale_down_queue_depth - metrics.queue_depth, 0].max
|
|
134
|
+
workers_to_remove = (jobs_under_capacity.to_f / @config.scale_down_jobs_per_worker).floor
|
|
135
|
+
|
|
136
|
+
# Ensure we remove at least scale_down_decrement if we're scaling down
|
|
137
|
+
workers_to_remove = [@config.scale_down_decrement, workers_to_remove].max
|
|
138
|
+
|
|
139
|
+
current_workers - workers_to_remove
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def calculate_step_function_target(metrics, current_workers)
|
|
143
|
+
# Step function uses fixed thresholds (future implementation)
|
|
144
|
+
# For now, fall back to fixed strategy
|
|
145
|
+
if should_scale_up?(metrics, current_workers)
|
|
146
|
+
current_workers + @config.scale_up_increment
|
|
147
|
+
else
|
|
148
|
+
current_workers - @config.scale_down_decrement
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def no_change_decision(current_workers, reason)
|
|
153
|
+
Decision.new(
|
|
154
|
+
action: :no_change,
|
|
155
|
+
from: current_workers,
|
|
156
|
+
to: current_workers,
|
|
157
|
+
reason: reason
|
|
158
|
+
)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def build_scale_up_reason(metrics, current_workers = nil, target = nil)
|
|
162
|
+
reasons = []
|
|
163
|
+
|
|
164
|
+
if metrics.queue_depth >= @config.scale_up_queue_depth
|
|
165
|
+
reasons << "queue_depth=#{metrics.queue_depth} >= #{@config.scale_up_queue_depth}"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
if metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
|
|
169
|
+
reasons << "latency=#{metrics.oldest_job_age_seconds.round}s >= #{@config.scale_up_latency_seconds}s"
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
base_reason = reasons.join(', ')
|
|
173
|
+
|
|
174
|
+
if @config.scaling_strategy == :proportional && current_workers && target
|
|
175
|
+
delta = target - current_workers
|
|
176
|
+
"#{base_reason} [proportional: +#{delta} workers]"
|
|
177
|
+
else
|
|
178
|
+
base_reason
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def build_scale_down_reason(metrics, current_workers = nil, target = nil)
|
|
183
|
+
if metrics.idle?
|
|
184
|
+
base_reason = 'queue is idle (no pending or claimed jobs)'
|
|
185
|
+
else
|
|
186
|
+
reasons = []
|
|
187
|
+
|
|
188
|
+
if metrics.queue_depth <= @config.scale_down_queue_depth
|
|
189
|
+
reasons << "queue_depth=#{metrics.queue_depth} <= #{@config.scale_down_queue_depth}"
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
if metrics.oldest_job_age_seconds <= @config.scale_down_latency_seconds
|
|
193
|
+
reasons << "latency=#{metrics.oldest_job_age_seconds.round}s <= #{@config.scale_down_latency_seconds}s"
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
base_reason = reasons.join(', ')
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
if @config.scaling_strategy == :proportional && current_workers && target
|
|
200
|
+
delta = current_workers - target
|
|
201
|
+
"#{base_reason} [proportional: -#{delta} workers]"
|
|
202
|
+
else
|
|
203
|
+
base_reason
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def determine_no_change_reason(metrics, current_workers)
|
|
208
|
+
# Check if we would scale up but we're at max
|
|
209
|
+
queue_depth_high = metrics.queue_depth >= @config.scale_up_queue_depth
|
|
210
|
+
latency_high = metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
|
|
211
|
+
would_scale_up = queue_depth_high || latency_high
|
|
212
|
+
|
|
213
|
+
# Check if we would scale down but we're at min
|
|
214
|
+
queue_depth_low = metrics.queue_depth <= @config.scale_down_queue_depth
|
|
215
|
+
latency_low = metrics.oldest_job_age_seconds <= @config.scale_down_latency_seconds
|
|
216
|
+
is_idle = metrics.idle?
|
|
217
|
+
would_scale_down = (queue_depth_low && latency_low) || is_idle
|
|
218
|
+
|
|
219
|
+
if current_workers >= @config.max_workers && would_scale_up
|
|
220
|
+
"at max_workers (#{@config.max_workers})"
|
|
221
|
+
elsif current_workers <= @config.min_workers && would_scale_down
|
|
222
|
+
"at min_workers (#{@config.min_workers})"
|
|
223
|
+
else
|
|
224
|
+
"metrics within normal range (depth=#{metrics.queue_depth}, latency=#{metrics.oldest_job_age_seconds.round}s)"
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidQueueHerokuAutoscaler
|
|
4
|
+
# Base error class for all autoscaler errors.
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when configuration is invalid.
|
|
8
|
+
class ConfigurationError < Error; end
|
|
9
|
+
|
|
10
|
+
# Raised when the advisory lock cannot be acquired.
|
|
11
|
+
class LockError < Error; end
|
|
12
|
+
|
|
13
|
+
# Raised when Heroku API calls fail.
|
|
14
|
+
class HerokuAPIError < Error
|
|
15
|
+
attr_reader :status_code, :response_body
|
|
16
|
+
|
|
17
|
+
def initialize(message, status_code: nil, response_body: nil)
|
|
18
|
+
super(message)
|
|
19
|
+
@status_code = status_code
|
|
20
|
+
@response_body = response_body
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Raised when Kubernetes API calls fail.
|
|
25
|
+
class KubernetesAPIError < Error
|
|
26
|
+
attr_reader :original_error
|
|
27
|
+
|
|
28
|
+
def initialize(message, original_error: nil)
|
|
29
|
+
super(message)
|
|
30
|
+
@original_error = original_error
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class MetricsError < Error; end
|
|
35
|
+
|
|
36
|
+
class CooldownActiveError < Error
|
|
37
|
+
attr_reader :remaining_seconds
|
|
38
|
+
|
|
39
|
+
def initialize(remaining_seconds)
|
|
40
|
+
@remaining_seconds = remaining_seconds
|
|
41
|
+
super("Cooldown active, #{remaining_seconds.round}s remaining")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|