solid_queue_heroku_autoscaler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +128 -0
- data/LICENSE.txt +21 -0
- data/README.md +474 -0
- data/lib/generators/solid_queue_heroku_autoscaler/install_generator.rb +21 -0
- data/lib/generators/solid_queue_heroku_autoscaler/migration_generator.rb +29 -0
- data/lib/generators/solid_queue_heroku_autoscaler/templates/README +41 -0
- data/lib/generators/solid_queue_heroku_autoscaler/templates/create_solid_queue_autoscaler_state.rb.erb +15 -0
- data/lib/generators/solid_queue_heroku_autoscaler/templates/initializer.rb +52 -0
- data/lib/solid_queue_heroku_autoscaler/adapters/base.rb +102 -0
- data/lib/solid_queue_heroku_autoscaler/adapters/heroku.rb +93 -0
- data/lib/solid_queue_heroku_autoscaler/adapters/kubernetes.rb +158 -0
- data/lib/solid_queue_heroku_autoscaler/adapters.rb +57 -0
- data/lib/solid_queue_heroku_autoscaler/advisory_lock.rb +71 -0
- data/lib/solid_queue_heroku_autoscaler/autoscale_job.rb +71 -0
- data/lib/solid_queue_heroku_autoscaler/configuration.rb +217 -0
- data/lib/solid_queue_heroku_autoscaler/cooldown_tracker.rb +153 -0
- data/lib/solid_queue_heroku_autoscaler/decision_engine.rb +228 -0
- data/lib/solid_queue_heroku_autoscaler/errors.rb +44 -0
- data/lib/solid_queue_heroku_autoscaler/metrics.rb +172 -0
- data/lib/solid_queue_heroku_autoscaler/railtie.rb +149 -0
- data/lib/solid_queue_heroku_autoscaler/scaler.rb +227 -0
- data/lib/solid_queue_heroku_autoscaler/version.rb +5 -0
- data/lib/solid_queue_heroku_autoscaler.rb +106 -0
- metadata +169 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidQueueHerokuAutoscaler
|
|
4
|
+
class Metrics
|
|
5
|
+
Result = Struct.new(
|
|
6
|
+
:queue_depth,
|
|
7
|
+
:oldest_job_age_seconds,
|
|
8
|
+
:jobs_per_minute,
|
|
9
|
+
:claimed_jobs,
|
|
10
|
+
:failed_jobs,
|
|
11
|
+
:blocked_jobs,
|
|
12
|
+
:active_workers,
|
|
13
|
+
:queues_breakdown,
|
|
14
|
+
:collected_at,
|
|
15
|
+
keyword_init: true
|
|
16
|
+
) do
|
|
17
|
+
def idle?
|
|
18
|
+
queue_depth.zero? && claimed_jobs.zero?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def latency_seconds
|
|
22
|
+
oldest_job_age_seconds
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def to_h
|
|
26
|
+
{
|
|
27
|
+
queue_depth: queue_depth,
|
|
28
|
+
oldest_job_age_seconds: oldest_job_age_seconds,
|
|
29
|
+
jobs_per_minute: jobs_per_minute,
|
|
30
|
+
claimed_jobs: claimed_jobs,
|
|
31
|
+
failed_jobs: failed_jobs,
|
|
32
|
+
blocked_jobs: blocked_jobs,
|
|
33
|
+
active_workers: active_workers,
|
|
34
|
+
queues_breakdown: queues_breakdown,
|
|
35
|
+
collected_at: collected_at
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def initialize(config: nil)
|
|
41
|
+
@config = config || SolidQueueHerokuAutoscaler.config
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def collect
|
|
45
|
+
Result.new(
|
|
46
|
+
queue_depth: queue_depth,
|
|
47
|
+
oldest_job_age_seconds: oldest_job_age_seconds,
|
|
48
|
+
jobs_per_minute: jobs_per_minute,
|
|
49
|
+
claimed_jobs: claimed_jobs_count,
|
|
50
|
+
failed_jobs: failed_jobs_count,
|
|
51
|
+
blocked_jobs: blocked_jobs_count,
|
|
52
|
+
active_workers: active_workers_count,
|
|
53
|
+
queues_breakdown: queues_breakdown,
|
|
54
|
+
collected_at: Time.current
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def queue_depth
|
|
59
|
+
sql = <<~SQL
|
|
60
|
+
SELECT COUNT(*) FROM #{ready_executions_table}
|
|
61
|
+
WHERE 1=1
|
|
62
|
+
#{queue_filter_clause}
|
|
63
|
+
SQL
|
|
64
|
+
connection.select_value(sql).to_i
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def oldest_job_age_seconds
|
|
68
|
+
sql = <<~SQL
|
|
69
|
+
SELECT EXTRACT(EPOCH FROM (NOW() - MIN(created_at)))
|
|
70
|
+
FROM #{ready_executions_table}
|
|
71
|
+
WHERE 1=1
|
|
72
|
+
#{queue_filter_clause}
|
|
73
|
+
SQL
|
|
74
|
+
result = connection.select_value(sql)
|
|
75
|
+
result.to_f
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def jobs_per_minute
|
|
79
|
+
sql = <<~SQL
|
|
80
|
+
SELECT COUNT(*)
|
|
81
|
+
FROM #{jobs_table}
|
|
82
|
+
WHERE finished_at IS NOT NULL
|
|
83
|
+
AND finished_at > NOW() - INTERVAL '1 minute'
|
|
84
|
+
#{queue_filter_clause('queue_name')}
|
|
85
|
+
SQL
|
|
86
|
+
connection.select_value(sql).to_i
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def claimed_jobs_count
|
|
90
|
+
sql = <<~SQL
|
|
91
|
+
SELECT COUNT(*) FROM #{claimed_executions_table}
|
|
92
|
+
SQL
|
|
93
|
+
connection.select_value(sql).to_i
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def failed_jobs_count
|
|
97
|
+
sql = <<~SQL
|
|
98
|
+
SELECT COUNT(*) FROM #{failed_executions_table}
|
|
99
|
+
SQL
|
|
100
|
+
connection.select_value(sql).to_i
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def blocked_jobs_count
|
|
104
|
+
sql = <<~SQL
|
|
105
|
+
SELECT COUNT(*) FROM #{blocked_executions_table}
|
|
106
|
+
SQL
|
|
107
|
+
connection.select_value(sql).to_i
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def active_workers_count
|
|
111
|
+
sql = <<~SQL
|
|
112
|
+
SELECT COUNT(*)
|
|
113
|
+
FROM #{processes_table}
|
|
114
|
+
WHERE kind = 'Worker'
|
|
115
|
+
AND last_heartbeat_at > NOW() - INTERVAL '5 minutes'
|
|
116
|
+
SQL
|
|
117
|
+
connection.select_value(sql).to_i
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def queues_breakdown
|
|
121
|
+
sql = <<~SQL
|
|
122
|
+
SELECT queue_name, COUNT(*) as count
|
|
123
|
+
FROM #{ready_executions_table}
|
|
124
|
+
GROUP BY queue_name
|
|
125
|
+
ORDER BY count DESC
|
|
126
|
+
SQL
|
|
127
|
+
connection.select_all(sql).to_a.to_h { |row| [row['queue_name'], row['count'].to_i] }
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private
|
|
131
|
+
|
|
132
|
+
def connection
|
|
133
|
+
@config.connection
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def queue_filter_clause(column_name = 'queue_name')
|
|
137
|
+
return '' unless @config.queues&.any?
|
|
138
|
+
|
|
139
|
+
quoted_queues = @config.queues.map { |q| connection.quote(q) }.join(', ')
|
|
140
|
+
"AND #{column_name} IN (#{quoted_queues})"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Table name helpers using configurable prefix
|
|
144
|
+
def table_prefix
|
|
145
|
+
@config.table_prefix
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def ready_executions_table
|
|
149
|
+
"#{table_prefix}ready_executions"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def jobs_table
|
|
153
|
+
"#{table_prefix}jobs"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def claimed_executions_table
|
|
157
|
+
"#{table_prefix}claimed_executions"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def failed_executions_table
|
|
161
|
+
"#{table_prefix}failed_executions"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def blocked_executions_table
|
|
165
|
+
"#{table_prefix}blocked_executions"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def processes_table
|
|
169
|
+
"#{table_prefix}processes"
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidQueueHerokuAutoscaler
|
|
4
|
+
class Railtie < Rails::Railtie
|
|
5
|
+
initializer 'solid_queue_heroku_autoscaler.configure' do
|
|
6
|
+
# Configuration happens via initializer, nothing to do here
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
rake_tasks do
|
|
10
|
+
namespace :solid_queue_autoscaler do
|
|
11
|
+
desc 'Run the autoscaler once for a specific worker (default: :default). Use WORKER=name'
|
|
12
|
+
task scale: :environment do
|
|
13
|
+
worker_name = (ENV['WORKER'] || 'default').to_sym
|
|
14
|
+
result = SolidQueueHerokuAutoscaler.scale!(worker_name)
|
|
15
|
+
print_scale_result(result, worker_name)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
desc 'Run the autoscaler for all configured workers'
|
|
19
|
+
task scale_all: :environment do
|
|
20
|
+
results = SolidQueueHerokuAutoscaler.scale_all!
|
|
21
|
+
if results.empty?
|
|
22
|
+
puts 'No workers configured'
|
|
23
|
+
exit 1
|
|
24
|
+
end
|
|
25
|
+
results.each do |worker_name, result|
|
|
26
|
+
print_scale_result(result, worker_name)
|
|
27
|
+
end
|
|
28
|
+
exit 1 if results.values.any? { |r| !r.success? }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
desc 'List all configured workers'
|
|
32
|
+
task workers: :environment do
|
|
33
|
+
workers = SolidQueueHerokuAutoscaler.registered_workers
|
|
34
|
+
if workers.empty?
|
|
35
|
+
puts 'No workers configured'
|
|
36
|
+
else
|
|
37
|
+
puts "Configured Workers (#{workers.size}):"
|
|
38
|
+
workers.each do |name|
|
|
39
|
+
config = SolidQueueHerokuAutoscaler.config(name)
|
|
40
|
+
queues = config.queues&.join(', ') || 'all'
|
|
41
|
+
puts " #{name}:"
|
|
42
|
+
puts " Process Type: #{config.process_type}"
|
|
43
|
+
puts " Queues: #{queues}"
|
|
44
|
+
puts " Workers: #{config.min_workers}-#{config.max_workers}"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
desc 'Show current queue metrics for a worker. Use WORKER=name'
|
|
50
|
+
task metrics: :environment do
|
|
51
|
+
worker_name = (ENV['WORKER'] || 'default').to_sym
|
|
52
|
+
metrics = SolidQueueHerokuAutoscaler.metrics(worker_name)
|
|
53
|
+
config = SolidQueueHerokuAutoscaler.config(worker_name)
|
|
54
|
+
puts "Queue Metrics#{" [#{worker_name}]" unless worker_name == :default}:"
|
|
55
|
+
puts " Queues Filter: #{config.queues&.join(', ') || 'all'}"
|
|
56
|
+
puts " Queue Depth: #{metrics.queue_depth}"
|
|
57
|
+
puts " Oldest Job Age: #{metrics.oldest_job_age_seconds.round}s"
|
|
58
|
+
puts " Jobs/Minute: #{metrics.jobs_per_minute}"
|
|
59
|
+
puts " Claimed Jobs: #{metrics.claimed_jobs}"
|
|
60
|
+
puts " Failed Jobs: #{metrics.failed_jobs}"
|
|
61
|
+
puts " Blocked Jobs: #{metrics.blocked_jobs}"
|
|
62
|
+
puts " Active Workers: #{metrics.active_workers}"
|
|
63
|
+
puts " Queues Breakdown: #{metrics.queues_breakdown}"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
desc 'Show current worker formation. Use WORKER=name'
|
|
67
|
+
task formation: :environment do
|
|
68
|
+
worker_name = (ENV['WORKER'] || 'default').to_sym
|
|
69
|
+
workers = SolidQueueHerokuAutoscaler.current_workers(worker_name)
|
|
70
|
+
config = SolidQueueHerokuAutoscaler.config(worker_name)
|
|
71
|
+
puts "Current Formation#{" [#{worker_name}]" unless worker_name == :default}:"
|
|
72
|
+
puts " Process Type: #{config.process_type}"
|
|
73
|
+
puts " Workers: #{workers}"
|
|
74
|
+
puts " Min: #{config.min_workers}"
|
|
75
|
+
puts " Max: #{config.max_workers}"
|
|
76
|
+
puts " Queues: #{config.queues&.join(', ') || 'all'}"
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
desc 'Show cooldown state for a worker. Use WORKER=name'
|
|
80
|
+
task cooldown: :environment do
|
|
81
|
+
worker_name = (ENV['WORKER'] || 'default').to_sym
|
|
82
|
+
config = SolidQueueHerokuAutoscaler.config(worker_name)
|
|
83
|
+
tracker = SolidQueueHerokuAutoscaler::CooldownTracker.new(config: config, key: worker_name.to_s)
|
|
84
|
+
|
|
85
|
+
puts "Cooldown State#{" [#{worker_name}]" unless worker_name == :default}:"
|
|
86
|
+
puts " Table Exists: #{tracker.table_exists?}"
|
|
87
|
+
|
|
88
|
+
if tracker.table_exists?
|
|
89
|
+
state = tracker.state
|
|
90
|
+
puts " Last Scale Up: #{state[:last_scale_up_at] || 'never'}"
|
|
91
|
+
puts " Last Scale Down: #{state[:last_scale_down_at] || 'never'}"
|
|
92
|
+
puts " Scale Up Cooldown Active: #{tracker.cooldown_active_for_scale_up?}"
|
|
93
|
+
puts " Scale Down Cooldown Active: #{tracker.cooldown_active_for_scale_down?}"
|
|
94
|
+
|
|
95
|
+
if tracker.cooldown_active_for_scale_up?
|
|
96
|
+
puts " Scale Up Cooldown Remaining: #{tracker.scale_up_cooldown_remaining.round}s"
|
|
97
|
+
end
|
|
98
|
+
if tracker.cooldown_active_for_scale_down?
|
|
99
|
+
puts " Scale Down Cooldown Remaining: #{tracker.scale_down_cooldown_remaining.round}s"
|
|
100
|
+
end
|
|
101
|
+
else
|
|
102
|
+
puts ' (Using in-memory cooldowns - run migration for persistence)'
|
|
103
|
+
scale_up = SolidQueueHerokuAutoscaler::Scaler.last_scale_up_at(worker_name)
|
|
104
|
+
scale_down = SolidQueueHerokuAutoscaler::Scaler.last_scale_down_at(worker_name)
|
|
105
|
+
puts " In-Memory Scale Up: #{scale_up || 'never'}"
|
|
106
|
+
puts " In-Memory Scale Down: #{scale_down || 'never'}"
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
desc 'Reset cooldown state for a worker (or all if WORKER=all). Use WORKER=name'
|
|
111
|
+
task reset_cooldown: :environment do
|
|
112
|
+
worker_name = ENV['WORKER']&.to_sym
|
|
113
|
+
|
|
114
|
+
if worker_name == :all || worker_name.nil?
|
|
115
|
+
# Reset all workers
|
|
116
|
+
SolidQueueHerokuAutoscaler.registered_workers.each do |name|
|
|
117
|
+
config = SolidQueueHerokuAutoscaler.config(name)
|
|
118
|
+
tracker = SolidQueueHerokuAutoscaler::CooldownTracker.new(config: config, key: name.to_s)
|
|
119
|
+
tracker.reset! if tracker.table_exists?
|
|
120
|
+
end
|
|
121
|
+
SolidQueueHerokuAutoscaler::Scaler.reset_cooldowns!
|
|
122
|
+
puts 'All cooldown states reset'
|
|
123
|
+
else
|
|
124
|
+
config = SolidQueueHerokuAutoscaler.config(worker_name)
|
|
125
|
+
tracker = SolidQueueHerokuAutoscaler::CooldownTracker.new(config: config, key: worker_name.to_s)
|
|
126
|
+
tracker.reset! if tracker.table_exists?
|
|
127
|
+
SolidQueueHerokuAutoscaler::Scaler.reset_cooldowns!(worker_name)
|
|
128
|
+
puts "Cooldown state reset for #{worker_name}"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def print_scale_result(result, worker_name)
|
|
133
|
+
prefix = worker_name == :default ? '' : "[#{worker_name}] "
|
|
134
|
+
if result.success?
|
|
135
|
+
if result.scaled?
|
|
136
|
+
puts "#{prefix}Scaled #{result.decision.from} -> #{result.decision.to} workers"
|
|
137
|
+
elsif result.skipped?
|
|
138
|
+
puts "#{prefix}Skipped: #{result.skipped_reason}"
|
|
139
|
+
else
|
|
140
|
+
puts "#{prefix}No change needed: #{result.decision&.reason}"
|
|
141
|
+
end
|
|
142
|
+
else
|
|
143
|
+
puts "#{prefix}Error: #{result.error}"
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidQueueHerokuAutoscaler
|
|
4
|
+
class Scaler
|
|
5
|
+
ScaleResult = Struct.new(
|
|
6
|
+
:success,
|
|
7
|
+
:decision,
|
|
8
|
+
:metrics,
|
|
9
|
+
:error,
|
|
10
|
+
:skipped_reason,
|
|
11
|
+
:executed_at,
|
|
12
|
+
keyword_init: true
|
|
13
|
+
) do
|
|
14
|
+
def success?
|
|
15
|
+
success == true
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def skipped?
|
|
19
|
+
!skipped_reason.nil?
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def scaled?
|
|
23
|
+
success? && decision && !decision.no_change?
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Per-configuration cooldown tracking for multi-worker support
|
|
28
|
+
class << self
|
|
29
|
+
def cooldown_mutex
|
|
30
|
+
@cooldown_mutex ||= Mutex.new
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def cooldowns
|
|
34
|
+
@cooldowns ||= {}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def last_scale_up_at(config_name = :default)
|
|
38
|
+
cooldown_mutex.synchronize { cooldowns.dig(config_name, :scale_up) }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def set_last_scale_up_at(config_name, value)
|
|
42
|
+
cooldown_mutex.synchronize do
|
|
43
|
+
cooldowns[config_name] ||= {}
|
|
44
|
+
cooldowns[config_name][:scale_up] = value
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def last_scale_down_at(config_name = :default)
|
|
49
|
+
cooldown_mutex.synchronize { cooldowns.dig(config_name, :scale_down) }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def set_last_scale_down_at(config_name, value)
|
|
53
|
+
cooldown_mutex.synchronize do
|
|
54
|
+
cooldowns[config_name] ||= {}
|
|
55
|
+
cooldowns[config_name][:scale_down] = value
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def reset_cooldowns!(config_name = nil)
|
|
60
|
+
cooldown_mutex.synchronize do
|
|
61
|
+
if config_name
|
|
62
|
+
cooldowns.delete(config_name)
|
|
63
|
+
else
|
|
64
|
+
@cooldowns = {}
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Backward compatibility setters
|
|
70
|
+
def last_scale_up_at=(value)
|
|
71
|
+
set_last_scale_up_at(:default, value)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def last_scale_down_at=(value)
|
|
75
|
+
set_last_scale_down_at(:default, value)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def initialize(config: nil)
|
|
80
|
+
@config = config || SolidQueueHerokuAutoscaler.config
|
|
81
|
+
@lock = AdvisoryLock.new(config: @config)
|
|
82
|
+
@metrics_collector = Metrics.new(config: @config)
|
|
83
|
+
@decision_engine = DecisionEngine.new(config: @config)
|
|
84
|
+
@adapter = @config.adapter
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def run
|
|
88
|
+
return skipped_result('Autoscaler is disabled') unless @config.enabled?
|
|
89
|
+
|
|
90
|
+
return skipped_result('Could not acquire advisory lock (another instance is running)') unless @lock.try_lock
|
|
91
|
+
|
|
92
|
+
begin
|
|
93
|
+
execute_scaling
|
|
94
|
+
ensure
|
|
95
|
+
@lock.release
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def run!
|
|
100
|
+
@lock.with_lock do
|
|
101
|
+
execute_scaling
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
def execute_scaling
|
|
108
|
+
metrics = @metrics_collector.collect
|
|
109
|
+
current_workers = @adapter.current_workers
|
|
110
|
+
decision = @decision_engine.decide(metrics: metrics, current_workers: current_workers)
|
|
111
|
+
|
|
112
|
+
log_decision(decision, metrics)
|
|
113
|
+
|
|
114
|
+
return success_result(decision, metrics) if decision.no_change?
|
|
115
|
+
|
|
116
|
+
if cooldown_active?(decision)
|
|
117
|
+
remaining = cooldown_remaining(decision)
|
|
118
|
+
return skipped_result("Cooldown active (#{remaining.round}s remaining)", decision: decision, metrics: metrics)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
apply_decision(decision, metrics)
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
error_result(e)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def apply_decision(decision, metrics)
|
|
127
|
+
@adapter.scale(decision.to)
|
|
128
|
+
record_scale_time(decision)
|
|
129
|
+
|
|
130
|
+
log_scale_action(decision)
|
|
131
|
+
|
|
132
|
+
success_result(decision, metrics)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def cooldown_active?(decision)
|
|
136
|
+
config_name = @config.name
|
|
137
|
+
if decision.scale_up?
|
|
138
|
+
last_scale_up = self.class.last_scale_up_at(config_name)
|
|
139
|
+
return false unless last_scale_up
|
|
140
|
+
|
|
141
|
+
Time.current - last_scale_up < @config.effective_scale_up_cooldown
|
|
142
|
+
elsif decision.scale_down?
|
|
143
|
+
last_scale_down = self.class.last_scale_down_at(config_name)
|
|
144
|
+
return false unless last_scale_down
|
|
145
|
+
|
|
146
|
+
Time.current - last_scale_down < @config.effective_scale_down_cooldown
|
|
147
|
+
else
|
|
148
|
+
false
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def cooldown_remaining(decision)
|
|
153
|
+
config_name = @config.name
|
|
154
|
+
if decision.scale_up?
|
|
155
|
+
elapsed = Time.current - self.class.last_scale_up_at(config_name)
|
|
156
|
+
@config.effective_scale_up_cooldown - elapsed
|
|
157
|
+
else
|
|
158
|
+
elapsed = Time.current - self.class.last_scale_down_at(config_name)
|
|
159
|
+
@config.effective_scale_down_cooldown - elapsed
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def record_scale_time(decision)
|
|
164
|
+
config_name = @config.name
|
|
165
|
+
if decision.scale_up?
|
|
166
|
+
self.class.set_last_scale_up_at(config_name, Time.current)
|
|
167
|
+
elsif decision.scale_down?
|
|
168
|
+
self.class.set_last_scale_down_at(config_name, Time.current)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def log_decision(decision, metrics)
|
|
173
|
+
worker_label = @config.name == :default ? '' : "[#{@config.name}] "
|
|
174
|
+
logger.info(
|
|
175
|
+
"[Autoscaler] #{worker_label}Evaluated: action=#{decision.action} " \
|
|
176
|
+
"workers=#{decision.from}->#{decision.to} " \
|
|
177
|
+
"queue_depth=#{metrics.queue_depth} " \
|
|
178
|
+
"latency=#{metrics.oldest_job_age_seconds.round}s " \
|
|
179
|
+
"reason=\"#{decision.reason}\""
|
|
180
|
+
)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def log_scale_action(decision)
|
|
184
|
+
prefix = @config.dry_run? ? '[DRY RUN] ' : ''
|
|
185
|
+
worker_label = @config.name == :default ? '' : "[#{@config.name}] "
|
|
186
|
+
logger.info(
|
|
187
|
+
"#{prefix}[Autoscaler] #{worker_label}Scaling #{decision.action}: " \
|
|
188
|
+
"#{decision.from} -> #{decision.to} workers (#{decision.reason})"
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def success_result(decision, metrics)
|
|
193
|
+
ScaleResult.new(
|
|
194
|
+
success: true,
|
|
195
|
+
decision: decision,
|
|
196
|
+
metrics: metrics,
|
|
197
|
+
executed_at: Time.current
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def skipped_result(reason, decision: nil, metrics: nil)
|
|
202
|
+
logger.debug("[Autoscaler] Skipped: #{reason}")
|
|
203
|
+
|
|
204
|
+
ScaleResult.new(
|
|
205
|
+
success: true,
|
|
206
|
+
decision: decision,
|
|
207
|
+
metrics: metrics,
|
|
208
|
+
skipped_reason: reason,
|
|
209
|
+
executed_at: Time.current
|
|
210
|
+
)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def error_result(error)
|
|
214
|
+
logger.error("[Autoscaler] Error: #{error.class}: #{error.message}")
|
|
215
|
+
|
|
216
|
+
ScaleResult.new(
|
|
217
|
+
success: false,
|
|
218
|
+
error: error,
|
|
219
|
+
executed_at: Time.current
|
|
220
|
+
)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def logger
|
|
224
|
+
@config.logger
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_record'
|
|
4
|
+
require 'active_support'
|
|
5
|
+
require 'active_support/core_ext/numeric/time'
|
|
6
|
+
|
|
7
|
+
require_relative 'solid_queue_heroku_autoscaler/version'
|
|
8
|
+
require_relative 'solid_queue_heroku_autoscaler/errors'
|
|
9
|
+
require_relative 'solid_queue_heroku_autoscaler/adapters'
|
|
10
|
+
require_relative 'solid_queue_heroku_autoscaler/configuration'
|
|
11
|
+
require_relative 'solid_queue_heroku_autoscaler/advisory_lock'
|
|
12
|
+
require_relative 'solid_queue_heroku_autoscaler/metrics'
|
|
13
|
+
require_relative 'solid_queue_heroku_autoscaler/decision_engine'
|
|
14
|
+
require_relative 'solid_queue_heroku_autoscaler/cooldown_tracker'
|
|
15
|
+
require_relative 'solid_queue_heroku_autoscaler/scaler'
|
|
16
|
+
|
|
17
|
+
module SolidQueueHerokuAutoscaler
|
|
18
|
+
class << self
|
|
19
|
+
# Registry of named configurations for multi-worker support
|
|
20
|
+
def configurations
|
|
21
|
+
@configurations ||= {}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Configure a named worker type (default: :default for backward compatibility)
|
|
25
|
+
# @param name [Symbol] The name of the worker type (e.g., :critical_worker, :default_worker)
|
|
26
|
+
# @yield [Configuration] The configuration object to customize
|
|
27
|
+
# @return [Configuration] The configured configuration object
|
|
28
|
+
def configure(name = :default)
|
|
29
|
+
config_obj = configurations[name] ||= Configuration.new
|
|
30
|
+
config_obj.name = name
|
|
31
|
+
yield(config_obj) if block_given?
|
|
32
|
+
config_obj.validate!
|
|
33
|
+
config_obj
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Get configuration for a named worker type
|
|
37
|
+
# @param name [Symbol] The name of the worker type
|
|
38
|
+
# @return [Configuration] The configuration object
|
|
39
|
+
def config(name = :default)
|
|
40
|
+
configurations[name] || configure(name)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Scale a specific worker type
|
|
44
|
+
# @param name [Symbol] The name of the worker type to scale
|
|
45
|
+
# @return [Scaler::ScaleResult] The result of the scaling operation
|
|
46
|
+
def scale!(name = :default)
|
|
47
|
+
Scaler.new(config: config(name)).run
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Scale all configured worker types
|
|
51
|
+
# @return [Hash<Symbol, Scaler::ScaleResult>] Results keyed by worker name
|
|
52
|
+
def scale_all!
|
|
53
|
+
return {} if configurations.empty?
|
|
54
|
+
|
|
55
|
+
# Copy keys to avoid modifying hash during iteration
|
|
56
|
+
worker_names = configurations.keys.dup
|
|
57
|
+
worker_names.each_with_object({}) do |name, results|
|
|
58
|
+
results[name] = scale!(name)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Get metrics for a specific worker type
|
|
63
|
+
# @param name [Symbol] The name of the worker type
|
|
64
|
+
# @return [Metrics::Result] The collected metrics
|
|
65
|
+
def metrics(name = :default)
|
|
66
|
+
Metrics.new(config: config(name)).collect
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Get current worker count for a specific worker type
|
|
70
|
+
# @param name [Symbol] The name of the worker type
|
|
71
|
+
# @return [Integer] The current number of workers
|
|
72
|
+
def current_workers(name = :default)
|
|
73
|
+
config(name).adapter.current_workers
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# List all registered worker type names
|
|
77
|
+
# @return [Array<Symbol>] List of configured worker names
|
|
78
|
+
def registered_workers
|
|
79
|
+
configurations.keys
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Reset all configurations (useful for testing)
|
|
83
|
+
def reset_configuration!
|
|
84
|
+
@configurations = {}
|
|
85
|
+
Scaler.reset_cooldowns!
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Backward compatibility: single configuration accessor
|
|
89
|
+
def configuration
|
|
90
|
+
configurations[:default]
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def configuration=(config_obj)
|
|
94
|
+
if config_obj.nil?
|
|
95
|
+
@configurations = {}
|
|
96
|
+
else
|
|
97
|
+
config_obj.name ||= :default
|
|
98
|
+
configurations[:default] = config_obj
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
require_relative 'solid_queue_heroku_autoscaler/railtie' if defined?(Rails::Railtie)
|
|
105
|
+
|
|
106
|
+
require_relative 'solid_queue_heroku_autoscaler/autoscale_job' if defined?(ActiveJob::Base)
|