solid_queue_heroku_autoscaler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+
5
+ module SolidQueueHerokuAutoscaler
6
+ class Configuration
7
+ # Configuration name (for multi-worker support)
8
+ attr_accessor :name
9
+
10
+ # Heroku settings
11
+ attr_accessor :heroku_api_key
12
+
13
+ # Worker limits
14
+ attr_accessor :min_workers
15
+
16
+ # Scale-up thresholds
17
+ attr_accessor :scale_up_queue_depth
18
+
19
+ # Scale-down thresholds
20
+ attr_accessor :scale_down_queue_depth
21
+
22
+ # Scaling strategy
23
+ attr_accessor :scaling_strategy
24
+
25
+ # Safety settings
26
+ attr_accessor :cooldown_seconds
27
+
28
+ # Advisory lock settings
29
+ attr_accessor :lock_timeout_seconds
30
+
31
+ # Behavior settings
32
+ attr_accessor :dry_run
33
+
34
+ # Queue filtering
35
+ attr_accessor :queues
36
+
37
+ # Database connection
38
+ attr_accessor :database_connection
39
+
40
+ # Solid Queue table prefix (default: 'solid_queue_')
41
+ attr_accessor :table_prefix
42
+
43
+ # Infrastructure adapter (defaults to Heroku)
44
+ attr_accessor :adapter_class
45
+
46
+ # Kubernetes settings (for Kubernetes adapter)
47
+ attr_accessor :kubernetes_deployment, :kubernetes_namespace, :kubernetes_context, :kubernetes_kubeconfig
48
+
49
+ # Additional Heroku settings
50
+ attr_accessor :heroku_app_name, :process_type, :max_workers
51
+
52
+ # Scale-up settings
53
+ attr_accessor :scale_up_latency_seconds, :scale_up_increment
54
+
55
+ # Scale-down settings
56
+ attr_accessor :scale_down_latency_seconds, :scale_down_idle_minutes, :scale_down_decrement
57
+ attr_accessor :scale_up_jobs_per_worker, :scale_up_latency_per_worker, :scale_up_cooldown_seconds, :scale_down_jobs_per_worker, :scale_down_cooldown_seconds
58
+
59
+ # Other settings
60
+ attr_accessor :enabled, :logger
61
+ attr_writer :lock_key
62
+
63
+ def initialize
64
+ # Configuration name (auto-set when using named configurations)
65
+ @name = :default
66
+
67
+ # Heroku settings - required
68
+ @heroku_api_key = ENV.fetch('HEROKU_API_KEY', nil)
69
+ @heroku_app_name = ENV.fetch('HEROKU_APP_NAME', nil)
70
+ @process_type = 'worker'
71
+
72
+ # Worker limits
73
+ @min_workers = 1
74
+ @max_workers = 10
75
+
76
+ # Scale-up thresholds
77
+ @scale_up_queue_depth = 100
78
+ @scale_up_latency_seconds = 300
79
+ @scale_up_increment = 1
80
+
81
+ # Scale-down thresholds
82
+ @scale_down_queue_depth = 10
83
+ @scale_down_latency_seconds = 30
84
+ @scale_down_idle_minutes = 5
85
+ @scale_down_decrement = 1
86
+
87
+ # Scaling strategy (:fixed or :proportional)
88
+ @scaling_strategy = :fixed
89
+ @scale_up_jobs_per_worker = 50
90
+ @scale_up_latency_per_worker = 60
91
+ @scale_down_jobs_per_worker = 50
92
+
93
+ # Safety settings
94
+ @cooldown_seconds = 120
95
+ @scale_up_cooldown_seconds = nil
96
+ @scale_down_cooldown_seconds = nil
97
+
98
+ # Advisory lock settings
99
+ @lock_timeout_seconds = 30
100
+ @lock_key = nil # Auto-generated based on name if not set
101
+
102
+ # Behavior
103
+ @dry_run = false
104
+ @enabled = true
105
+ @logger = default_logger
106
+
107
+ # Queue filtering (nil = all queues)
108
+ @queues = nil
109
+
110
+ # Database connection (defaults to ActiveRecord::Base.connection)
111
+ @database_connection = nil
112
+
113
+ # Solid Queue table prefix (default: 'solid_queue_')
114
+ @table_prefix = 'solid_queue_'
115
+
116
+ # Infrastructure adapter (defaults to Heroku)
117
+ @adapter_class = nil
118
+
119
+ # Kubernetes settings (for Kubernetes adapter)
120
+ @kubernetes_deployment = ENV.fetch('K8S_DEPLOYMENT', nil)
121
+ @kubernetes_namespace = ENV['K8S_NAMESPACE'] || 'default'
122
+ @kubernetes_context = ENV.fetch('K8S_CONTEXT', nil)
123
+ @kubernetes_kubeconfig = ENV.fetch('KUBECONFIG', nil)
124
+ end
125
+
126
+ # Returns the lock key, auto-generating based on name if not explicitly set
127
+ # Each worker type gets a unique lock to allow parallel scaling
128
+ def lock_key
129
+ @lock_key || "solid_queue_autoscaler_#{name}"
130
+ end
131
+
132
+ VALID_SCALING_STRATEGIES = %i[fixed proportional].freeze
133
+
134
+ def validate!
135
+ errors = []
136
+
137
+ # Validate adapter-specific configuration
138
+ errors.concat(adapter.configuration_errors)
139
+
140
+ errors << 'min_workers must be >= 0' if min_workers.negative?
141
+ errors << 'max_workers must be > 0' if max_workers <= 0
142
+ errors << 'min_workers cannot exceed max_workers' if min_workers > max_workers
143
+
144
+ errors << 'scale_up_queue_depth must be > 0' if scale_up_queue_depth <= 0
145
+ errors << 'scale_up_latency_seconds must be > 0' if scale_up_latency_seconds <= 0
146
+ errors << 'scale_up_increment must be > 0' if scale_up_increment <= 0
147
+
148
+ errors << 'scale_down_queue_depth must be >= 0' if scale_down_queue_depth.negative?
149
+ errors << 'scale_down_decrement must be > 0' if scale_down_decrement <= 0
150
+
151
+ errors << 'cooldown_seconds must be >= 0' if cooldown_seconds.negative?
152
+ errors << 'lock_timeout_seconds must be > 0' if lock_timeout_seconds <= 0
153
+
154
+ if table_prefix.nil? || table_prefix.to_s.strip.empty?
155
+ errors << 'table_prefix cannot be nil or empty'
156
+ elsif !table_prefix.to_s.end_with?('_')
157
+ errors << 'table_prefix must end with an underscore'
158
+ end
159
+
160
+ unless VALID_SCALING_STRATEGIES.include?(scaling_strategy)
161
+ errors << "scaling_strategy must be one of: #{VALID_SCALING_STRATEGIES.join(', ')}"
162
+ end
163
+
164
+ raise ConfigurationError, errors.join(', ') if errors.any?
165
+
166
+ true
167
+ end
168
+
169
+ def effective_scale_up_cooldown
170
+ scale_up_cooldown_seconds || cooldown_seconds
171
+ end
172
+
173
+ def effective_scale_down_cooldown
174
+ scale_down_cooldown_seconds || cooldown_seconds
175
+ end
176
+
177
+ def connection
178
+ database_connection || ActiveRecord::Base.connection
179
+ end
180
+
181
+ def dry_run?
182
+ dry_run
183
+ end
184
+
185
+ def enabled?
186
+ enabled
187
+ end
188
+
189
+ # Returns the configured adapter instance.
190
+ # Creates a new instance from adapter_class if not set.
191
+ # Defaults to Heroku adapter.
192
+ def adapter
193
+ @adapter ||= begin
194
+ klass = adapter_class || Adapters::Heroku
195
+ klass.new(config: self)
196
+ end
197
+ end
198
+
199
+ # Allow setting a pre-configured adapter instance
200
+ attr_writer :adapter
201
+
202
+ private
203
+
204
+ def default_logger
205
+ if defined?(Rails) && Rails.logger
206
+ Rails.logger
207
+ else
208
+ Logger.new($stdout).tap do |logger|
209
+ logger.level = Logger::INFO
210
+ logger.formatter = proc do |severity, datetime, _progname, msg|
211
+ "[#{datetime.strftime('%Y-%m-%d %H:%M:%S')}] [SolidQueueAutoscaler] #{severity}: #{msg}\n"
212
+ end
213
+ end
214
+ end
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+
5
+ module SolidQueueHerokuAutoscaler
6
+ class CooldownTracker
7
+ TABLE_NAME = 'solid_queue_autoscaler_state'
8
+ DEFAULT_KEY = 'default'
9
+
10
+ attr_reader :key
11
+
12
+ def initialize(config: nil, key: DEFAULT_KEY)
13
+ @config = config || SolidQueueHerokuAutoscaler.config
14
+ @key = key
15
+ @table_exists = nil
16
+ end
17
+
18
+ def last_scale_up_at
19
+ return nil unless table_exists?
20
+
21
+ result = connection.select_value(<<~SQL)
22
+ SELECT last_scale_up_at FROM #{TABLE_NAME}
23
+ WHERE key = #{connection.quote(key)}
24
+ SQL
25
+ result ? Time.parse(result.to_s) : nil
26
+ rescue ArgumentError
27
+ nil
28
+ end
29
+
30
+ def last_scale_down_at
31
+ return nil unless table_exists?
32
+
33
+ result = connection.select_value(<<~SQL)
34
+ SELECT last_scale_down_at FROM #{TABLE_NAME}
35
+ WHERE key = #{connection.quote(key)}
36
+ SQL
37
+ result ? Time.parse(result.to_s) : nil
38
+ rescue ArgumentError
39
+ nil
40
+ end
41
+
42
+ def record_scale_up!
43
+ return false unless table_exists?
44
+
45
+ upsert_state(last_scale_up_at: Time.current)
46
+ true
47
+ end
48
+
49
+ def record_scale_down!
50
+ return false unless table_exists?
51
+
52
+ upsert_state(last_scale_down_at: Time.current)
53
+ true
54
+ end
55
+
56
+ def reset!
57
+ return false unless table_exists?
58
+
59
+ connection.execute(<<~SQL)
60
+ DELETE FROM #{TABLE_NAME} WHERE key = #{connection.quote(key)}
61
+ SQL
62
+ true
63
+ end
64
+
65
+ def cooldown_active_for_scale_up?
66
+ last = last_scale_up_at
67
+ return false unless last
68
+
69
+ Time.current - last < @config.effective_scale_up_cooldown
70
+ end
71
+
72
+ def cooldown_active_for_scale_down?
73
+ last = last_scale_down_at
74
+ return false unless last
75
+
76
+ Time.current - last < @config.effective_scale_down_cooldown
77
+ end
78
+
79
+ def scale_up_cooldown_remaining
80
+ last = last_scale_up_at
81
+ return 0 unless last
82
+
83
+ remaining = @config.effective_scale_up_cooldown - (Time.current - last)
84
+ [remaining, 0].max
85
+ end
86
+
87
+ def scale_down_cooldown_remaining
88
+ last = last_scale_down_at
89
+ return 0 unless last
90
+
91
+ remaining = @config.effective_scale_down_cooldown - (Time.current - last)
92
+ [remaining, 0].max
93
+ end
94
+
95
+ def table_exists?
96
+ return @table_exists unless @table_exists.nil?
97
+
98
+ @table_exists = connection.table_exists?(TABLE_NAME)
99
+ rescue StandardError
100
+ @table_exists = false
101
+ end
102
+
103
+ def state
104
+ return {} unless table_exists?
105
+
106
+ row = connection.select_one(<<~SQL)
107
+ SELECT last_scale_up_at, last_scale_down_at, updated_at
108
+ FROM #{TABLE_NAME}
109
+ WHERE key = #{connection.quote(key)}
110
+ SQL
111
+
112
+ return {} unless row
113
+
114
+ {
115
+ last_scale_up_at: row['last_scale_up_at'],
116
+ last_scale_down_at: row['last_scale_down_at'],
117
+ updated_at: row['updated_at']
118
+ }
119
+ end
120
+
121
+ private
122
+
123
+ def connection
124
+ @config.connection
125
+ end
126
+
127
+ def upsert_state(last_scale_up_at: nil, last_scale_down_at: nil)
128
+ now = Time.current
129
+ quoted_key = connection.quote(key)
130
+ quoted_now = connection.quote(now)
131
+
132
+ if last_scale_up_at
133
+ quoted_time = connection.quote(last_scale_up_at)
134
+ connection.execute(<<~SQL)
135
+ INSERT INTO #{TABLE_NAME} (key, last_scale_up_at, created_at, updated_at)
136
+ VALUES (#{quoted_key}, #{quoted_time}, #{quoted_now}, #{quoted_now})
137
+ ON CONFLICT (key) DO UPDATE SET
138
+ last_scale_up_at = EXCLUDED.last_scale_up_at,
139
+ updated_at = EXCLUDED.updated_at
140
+ SQL
141
+ elsif last_scale_down_at
142
+ quoted_time = connection.quote(last_scale_down_at)
143
+ connection.execute(<<~SQL)
144
+ INSERT INTO #{TABLE_NAME} (key, last_scale_down_at, created_at, updated_at)
145
+ VALUES (#{quoted_key}, #{quoted_time}, #{quoted_now}, #{quoted_now})
146
+ ON CONFLICT (key) DO UPDATE SET
147
+ last_scale_down_at = EXCLUDED.last_scale_down_at,
148
+ updated_at = EXCLUDED.updated_at
149
+ SQL
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,228 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SolidQueueHerokuAutoscaler
4
+ class DecisionEngine
5
+ Decision = Struct.new(:action, :from, :to, :reason, keyword_init: true) do
6
+ def scale_up?
7
+ action == :scale_up
8
+ end
9
+
10
+ def scale_down?
11
+ action == :scale_down
12
+ end
13
+
14
+ def no_change?
15
+ action == :no_change
16
+ end
17
+
18
+ def delta
19
+ to - from
20
+ end
21
+ end
22
+
23
+ def initialize(config: nil)
24
+ @config = config || SolidQueueHerokuAutoscaler.config
25
+ end
26
+
27
+ def decide(metrics:, current_workers:)
28
+ return no_change_decision(current_workers, 'Autoscaler is disabled') unless @config.enabled?
29
+
30
+ if should_scale_up?(metrics, current_workers)
31
+ scale_up_decision(metrics, current_workers)
32
+ elsif should_scale_down?(metrics, current_workers)
33
+ scale_down_decision(metrics, current_workers)
34
+ else
35
+ no_change_decision(current_workers, determine_no_change_reason(metrics, current_workers))
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def should_scale_up?(metrics, current_workers)
42
+ return false if current_workers >= @config.max_workers
43
+
44
+ queue_depth_high = metrics.queue_depth >= @config.scale_up_queue_depth
45
+ latency_high = metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
46
+
47
+ queue_depth_high || latency_high
48
+ end
49
+
50
+ def should_scale_down?(metrics, current_workers)
51
+ return false if current_workers <= @config.min_workers
52
+
53
+ queue_depth_low = metrics.queue_depth <= @config.scale_down_queue_depth
54
+ latency_low = metrics.oldest_job_age_seconds <= @config.scale_down_latency_seconds
55
+ is_idle = metrics.idle?
56
+
57
+ (queue_depth_low && latency_low) || is_idle
58
+ end
59
+
60
+ def scale_up_decision(metrics, current_workers)
61
+ target = calculate_scale_up_target(metrics, current_workers)
62
+ reason = build_scale_up_reason(metrics, current_workers, target)
63
+
64
+ Decision.new(
65
+ action: :scale_up,
66
+ from: current_workers,
67
+ to: target,
68
+ reason: reason
69
+ )
70
+ end
71
+
72
+ def scale_down_decision(metrics, current_workers)
73
+ target = calculate_scale_down_target(metrics, current_workers)
74
+ reason = build_scale_down_reason(metrics, current_workers, target)
75
+
76
+ Decision.new(
77
+ action: :scale_down,
78
+ from: current_workers,
79
+ to: target,
80
+ reason: reason
81
+ )
82
+ end
83
+
84
+ def calculate_scale_up_target(metrics, current_workers)
85
+ raw_target = case @config.scaling_strategy
86
+ when :proportional
87
+ calculate_proportional_scale_up_target(metrics, current_workers)
88
+ when :step_function
89
+ calculate_step_function_target(metrics, current_workers)
90
+ else # :fixed
91
+ current_workers + @config.scale_up_increment
92
+ end
93
+
94
+ [raw_target, @config.max_workers].min
95
+ end
96
+
97
+ def calculate_scale_down_target(metrics, current_workers)
98
+ raw_target = case @config.scaling_strategy
99
+ when :proportional
100
+ calculate_proportional_scale_down_target(metrics, current_workers)
101
+ when :step_function
102
+ calculate_step_function_target(metrics, current_workers)
103
+ else # :fixed
104
+ current_workers - @config.scale_down_decrement
105
+ end
106
+
107
+ [raw_target, @config.min_workers].max
108
+ end
109
+
110
+ def calculate_proportional_scale_up_target(metrics, current_workers)
111
+ # Calculate workers needed based on queue depth
112
+ jobs_over_threshold = [metrics.queue_depth - @config.scale_up_queue_depth, 0].max
113
+ workers_for_depth = (jobs_over_threshold.to_f / @config.scale_up_jobs_per_worker).ceil
114
+
115
+ # Calculate workers needed based on latency
116
+ latency_over_threshold = [metrics.oldest_job_age_seconds - @config.scale_up_latency_seconds, 0].max
117
+ workers_for_latency = (latency_over_threshold / @config.scale_up_latency_per_worker).ceil
118
+
119
+ # Take the higher of the two calculations
120
+ additional_workers = [workers_for_depth, workers_for_latency].max
121
+
122
+ # Always add at least scale_up_increment if we're scaling up
123
+ additional_workers = [@config.scale_up_increment, additional_workers].max
124
+
125
+ current_workers + additional_workers
126
+ end
127
+
128
+ def calculate_proportional_scale_down_target(metrics, current_workers)
129
+ # If idle, scale down aggressively
130
+ return @config.min_workers if metrics.idle?
131
+
132
+ # Calculate how much capacity we have based on queue depth
133
+ jobs_under_capacity = [@config.scale_down_queue_depth - metrics.queue_depth, 0].max
134
+ workers_to_remove = (jobs_under_capacity.to_f / @config.scale_down_jobs_per_worker).floor
135
+
136
+ # Ensure we remove at least scale_down_decrement if we're scaling down
137
+ workers_to_remove = [@config.scale_down_decrement, workers_to_remove].max
138
+
139
+ current_workers - workers_to_remove
140
+ end
141
+
142
+ def calculate_step_function_target(metrics, current_workers)
143
+ # Step function uses fixed thresholds (future implementation)
144
+ # For now, fall back to fixed strategy
145
+ if should_scale_up?(metrics, current_workers)
146
+ current_workers + @config.scale_up_increment
147
+ else
148
+ current_workers - @config.scale_down_decrement
149
+ end
150
+ end
151
+
152
+ def no_change_decision(current_workers, reason)
153
+ Decision.new(
154
+ action: :no_change,
155
+ from: current_workers,
156
+ to: current_workers,
157
+ reason: reason
158
+ )
159
+ end
160
+
161
+ def build_scale_up_reason(metrics, current_workers = nil, target = nil)
162
+ reasons = []
163
+
164
+ if metrics.queue_depth >= @config.scale_up_queue_depth
165
+ reasons << "queue_depth=#{metrics.queue_depth} >= #{@config.scale_up_queue_depth}"
166
+ end
167
+
168
+ if metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
169
+ reasons << "latency=#{metrics.oldest_job_age_seconds.round}s >= #{@config.scale_up_latency_seconds}s"
170
+ end
171
+
172
+ base_reason = reasons.join(', ')
173
+
174
+ if @config.scaling_strategy == :proportional && current_workers && target
175
+ delta = target - current_workers
176
+ "#{base_reason} [proportional: +#{delta} workers]"
177
+ else
178
+ base_reason
179
+ end
180
+ end
181
+
182
+ def build_scale_down_reason(metrics, current_workers = nil, target = nil)
183
+ if metrics.idle?
184
+ base_reason = 'queue is idle (no pending or claimed jobs)'
185
+ else
186
+ reasons = []
187
+
188
+ if metrics.queue_depth <= @config.scale_down_queue_depth
189
+ reasons << "queue_depth=#{metrics.queue_depth} <= #{@config.scale_down_queue_depth}"
190
+ end
191
+
192
+ if metrics.oldest_job_age_seconds <= @config.scale_down_latency_seconds
193
+ reasons << "latency=#{metrics.oldest_job_age_seconds.round}s <= #{@config.scale_down_latency_seconds}s"
194
+ end
195
+
196
+ base_reason = reasons.join(', ')
197
+ end
198
+
199
+ if @config.scaling_strategy == :proportional && current_workers && target
200
+ delta = current_workers - target
201
+ "#{base_reason} [proportional: -#{delta} workers]"
202
+ else
203
+ base_reason
204
+ end
205
+ end
206
+
207
+ def determine_no_change_reason(metrics, current_workers)
208
+ # Check if we would scale up but we're at max
209
+ queue_depth_high = metrics.queue_depth >= @config.scale_up_queue_depth
210
+ latency_high = metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
211
+ would_scale_up = queue_depth_high || latency_high
212
+
213
+ # Check if we would scale down but we're at min
214
+ queue_depth_low = metrics.queue_depth <= @config.scale_down_queue_depth
215
+ latency_low = metrics.oldest_job_age_seconds <= @config.scale_down_latency_seconds
216
+ is_idle = metrics.idle?
217
+ would_scale_down = (queue_depth_low && latency_low) || is_idle
218
+
219
+ if current_workers >= @config.max_workers && would_scale_up
220
+ "at max_workers (#{@config.max_workers})"
221
+ elsif current_workers <= @config.min_workers && would_scale_down
222
+ "at min_workers (#{@config.min_workers})"
223
+ else
224
+ "metrics within normal range (depth=#{metrics.queue_depth}, latency=#{metrics.oldest_job_age_seconds.round}s)"
225
+ end
226
+ end
227
+ end
228
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SolidQueueHerokuAutoscaler
4
+ # Base error class for all autoscaler errors.
5
+ class Error < StandardError; end
6
+
7
+ # Raised when configuration is invalid.
8
+ class ConfigurationError < Error; end
9
+
10
+ # Raised when the advisory lock cannot be acquired.
11
+ class LockError < Error; end
12
+
13
+ # Raised when Heroku API calls fail.
14
+ class HerokuAPIError < Error
15
+ attr_reader :status_code, :response_body
16
+
17
+ def initialize(message, status_code: nil, response_body: nil)
18
+ super(message)
19
+ @status_code = status_code
20
+ @response_body = response_body
21
+ end
22
+ end
23
+
24
+ # Raised when Kubernetes API calls fail.
25
+ class KubernetesAPIError < Error
26
+ attr_reader :original_error
27
+
28
+ def initialize(message, original_error: nil)
29
+ super(message)
30
+ @original_error = original_error
31
+ end
32
+ end
33
+
34
+ class MetricsError < Error; end
35
+
36
+ class CooldownActiveError < Error
37
+ attr_reader :remaining_seconds
38
+
39
+ def initialize(remaining_seconds)
40
+ @remaining_seconds = remaining_seconds
41
+ super("Cooldown active, #{remaining_seconds.round}s remaining")
42
+ end
43
+ end
44
+ end