solid_queue_autoscaler 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/lib/solid_queue_autoscaler/adapters/heroku.rb +16 -1
- data/lib/solid_queue_autoscaler/autoscale_job.rb +30 -0
- data/lib/solid_queue_autoscaler/configuration.rb +12 -0
- data/lib/solid_queue_autoscaler/decision_engine.rb +33 -5
- data/lib/solid_queue_autoscaler/scaler.rb +6 -0
- data/lib/solid_queue_autoscaler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0f94990d679958250735f0fd7fa473c1d7e232d0f817479cfbd5f13c68d4f75b
|
|
4
|
+
data.tar.gz: 89178dfc3eed39b2553c99e778412158b6fe0582f6e3b59628892fd3a5e0251b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9a7aaffab4800907fdf86f2037e64ce7aaff36999f1fe9bd4eae77e768ac4633261cc2ebb0e411977314ae9b50b110f5dc2350bc5c2b4ce059f41dd02bd3b91e
|
|
7
|
+
data.tar.gz: 3787b68c1338cd3df627487a145419c82a687930b050999237943008b1cb2a5521dac9bf74e0e82febf4571a789dc28fcaffc7a721676b402954320210734c57
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.0.20] - 2025-02-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **Scale-from-zero optimization** - New configuration options for faster cold starts when `min_workers = 0`:
|
|
14
|
+
- `scale_from_zero_queue_depth` (default: 1) - Scale up immediately when at 0 workers if queue has at least this many jobs
|
|
15
|
+
- `scale_from_zero_latency_seconds` (default: 1.0) - Job must be at least this old before scaling up (gives other workers a chance to pick it up first)
|
|
16
|
+
- When at 0 workers, uses these lower thresholds instead of the normal `scale_up_queue_depth` and `scale_up_latency_seconds`
|
|
17
|
+
- Cooldowns are bypassed when scaling from 0 workers for fast cold start
|
|
18
|
+
- Comprehensive tests in `scale_to_zero_workflow_spec.rb`
|
|
19
|
+
|
|
20
|
+
## [1.0.19] - 2025-02-02
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
- **AutoscaleJob string/symbol validation** - Detects when `recurring.yml` passes a quoted string like `":all"` instead of the symbol `:all`
|
|
24
|
+
- Raises a helpful `ConfigurationError` with exact before/after YAML examples
|
|
25
|
+
- Plain strings like `"default"` are leniently converted to symbols
|
|
26
|
+
- New `normalize_worker_name` helper method with comprehensive tests
|
|
27
|
+
|
|
28
|
+
### Improved
|
|
29
|
+
- **Better error message for missing Procfile process types** - When `batch_update` returns 404 (process type doesn't exist), the error now explains:
|
|
30
|
+
- The process type doesn't exist in the Procfile
|
|
31
|
+
- How to verify with `heroku ps -a <app_name>`
|
|
32
|
+
- That the configured `process_type` must exactly match a Procfile entry
|
|
33
|
+
|
|
10
34
|
## [1.0.18] - 2025-01-31
|
|
11
35
|
|
|
12
36
|
### Fixed
|
|
@@ -115,9 +115,24 @@ module SolidQueueAutoscaler
|
|
|
115
115
|
end
|
|
116
116
|
quantity
|
|
117
117
|
rescue Excon::Error => e
|
|
118
|
+
status = e.respond_to?(:response) ? e.response&.status : nil
|
|
119
|
+
|
|
120
|
+
# 404 from batch_update means the process type doesn't exist in the Procfile
|
|
121
|
+
# This is different from 404 on formation.update (which means scaled to 0)
|
|
122
|
+
if status == 404
|
|
123
|
+
raise HerokuAPIError.new(
|
|
124
|
+
"Process type '#{process_type}' does not exist. " \
|
|
125
|
+
"Verify that '#{process_type}:' is defined in your Procfile. " \
|
|
126
|
+
"Available process types can be viewed with 'heroku ps -a #{app_name}' or in your Procfile. " \
|
|
127
|
+
"The configured process_type must exactly match a Procfile entry.",
|
|
128
|
+
status_code: status,
|
|
129
|
+
response_body: e.respond_to?(:response) ? e.response&.body : nil
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
|
|
118
133
|
raise HerokuAPIError.new(
|
|
119
134
|
"Failed to create formation #{process_type} with quantity #{quantity}: #{e.message}",
|
|
120
|
-
status_code:
|
|
135
|
+
status_code: status,
|
|
121
136
|
response_body: e.respond_to?(:response) ? e.response&.body : nil
|
|
122
137
|
)
|
|
123
138
|
end
|
|
@@ -21,6 +21,8 @@ module SolidQueueAutoscaler
|
|
|
21
21
|
# @param worker_name [Symbol] The worker type to scale (:default, :critical_worker, etc.)
|
|
22
22
|
# Pass :all to scale all registered workers
|
|
23
23
|
def perform(worker_name = :default)
|
|
24
|
+
worker_name = normalize_worker_name(worker_name)
|
|
25
|
+
|
|
24
26
|
if worker_name == :all
|
|
25
27
|
perform_scale_all
|
|
26
28
|
else
|
|
@@ -79,5 +81,33 @@ module SolidQueueAutoscaler
|
|
|
79
81
|
worker_label = worker_name == :default ? '' : "[#{worker_name}] "
|
|
80
82
|
Rails.logger.error("[AutoscaleJob] #{worker_label}Failed: #{result.error&.message}")
|
|
81
83
|
end
|
|
84
|
+
|
|
85
|
+
# Normalize and validate worker_name argument.
|
|
86
|
+
# Detects common YAML misconfiguration where symbols are quoted as strings.
|
|
87
|
+
#
|
|
88
|
+
# @param worker_name [Symbol, String] The worker name to normalize
|
|
89
|
+
# @return [Symbol] The normalized worker name as a symbol
|
|
90
|
+
# @raise [ConfigurationError] If a string that looks like a symbol is passed
|
|
91
|
+
def normalize_worker_name(worker_name)
|
|
92
|
+
return worker_name if worker_name.is_a?(Symbol)
|
|
93
|
+
|
|
94
|
+
# Detect strings that look like symbols (e.g., ":all", ":default")
|
|
95
|
+
# This is a common YAML misconfiguration
|
|
96
|
+
if worker_name.is_a?(String) && worker_name.start_with?(':')
|
|
97
|
+
symbol_name = worker_name[1..] # Remove the leading colon
|
|
98
|
+
raise ConfigurationError,
|
|
99
|
+
"Invalid worker_name argument: received string #{worker_name.inspect} instead of symbol :#{symbol_name}. " \
|
|
100
|
+
"In your recurring.yml, change:\n" \
|
|
101
|
+
" args:\n" \
|
|
102
|
+
" - \"#{worker_name}\"\n" \
|
|
103
|
+
"to:\n" \
|
|
104
|
+
" args:\n" \
|
|
105
|
+
" - :#{symbol_name}\n" \
|
|
106
|
+
'(Remove the quotes around the symbol)'
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Convert plain strings to symbols (lenient mode)
|
|
110
|
+
worker_name.to_sym
|
|
111
|
+
end
|
|
82
112
|
end
|
|
83
113
|
end
|
|
@@ -69,6 +69,9 @@ module SolidQueueAutoscaler
|
|
|
69
69
|
# AutoscaleJob settings
|
|
70
70
|
attr_accessor :job_queue, :job_priority
|
|
71
71
|
|
|
72
|
+
# Scale-from-zero settings (for faster cold start when min_workers=0)
|
|
73
|
+
attr_accessor :scale_from_zero_queue_depth, :scale_from_zero_latency_seconds
|
|
74
|
+
|
|
72
75
|
def initialize
|
|
73
76
|
# Configuration name (auto-set when using named configurations)
|
|
74
77
|
@name = :default
|
|
@@ -141,6 +144,11 @@ module SolidQueueAutoscaler
|
|
|
141
144
|
# AutoscaleJob settings
|
|
142
145
|
@job_queue = :autoscaler # Queue name for the autoscaler job
|
|
143
146
|
@job_priority = nil # Job priority (lower = higher priority, nil = default)
|
|
147
|
+
|
|
148
|
+
# Scale-from-zero settings (for faster cold start when min_workers=0)
|
|
149
|
+
# When at 0 workers, use these lower thresholds instead of normal scale_up thresholds
|
|
150
|
+
@scale_from_zero_queue_depth = 1 # Scale up if at least 1 job in queue
|
|
151
|
+
@scale_from_zero_latency_seconds = 1.0 # Job must be at least 1 second old (gives other workers a chance)
|
|
144
152
|
end
|
|
145
153
|
|
|
146
154
|
# Returns the lock key, auto-generating based on name if not explicitly set
|
|
@@ -196,6 +204,10 @@ module SolidQueueAutoscaler
|
|
|
196
204
|
errors << "scaling_strategy must be one of: #{VALID_SCALING_STRATEGIES.join(', ')}"
|
|
197
205
|
end
|
|
198
206
|
|
|
207
|
+
# Validate scale-from-zero settings
|
|
208
|
+
errors << 'scale_from_zero_queue_depth must be > 0' if scale_from_zero_queue_depth <= 0
|
|
209
|
+
errors << 'scale_from_zero_latency_seconds must be >= 0' if scale_from_zero_latency_seconds.negative?
|
|
210
|
+
|
|
199
211
|
raise ConfigurationError, errors.join(', ') if errors.any?
|
|
200
212
|
|
|
201
213
|
true
|
|
@@ -41,12 +41,30 @@ module SolidQueueAutoscaler
|
|
|
41
41
|
def should_scale_up?(metrics, current_workers)
|
|
42
42
|
return false if current_workers >= @config.max_workers
|
|
43
43
|
|
|
44
|
+
# Special case: scale-from-zero uses lower thresholds for faster cold start
|
|
45
|
+
# This allows immediate scaling when at 0 workers with any work in queue
|
|
46
|
+
if current_workers.zero? && @config.min_workers.zero?
|
|
47
|
+
return should_scale_from_zero?(metrics)
|
|
48
|
+
end
|
|
49
|
+
|
|
44
50
|
queue_depth_high = metrics.queue_depth >= @config.scale_up_queue_depth
|
|
45
51
|
latency_high = metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
|
|
46
52
|
|
|
47
53
|
queue_depth_high || latency_high
|
|
48
54
|
end
|
|
49
55
|
|
|
56
|
+
# Scale-from-zero check: uses lower thresholds for faster cold start
|
|
57
|
+
# Requires:
|
|
58
|
+
# 1. Queue depth >= scale_from_zero_queue_depth (default: 1)
|
|
59
|
+
# 2. Oldest job age >= scale_from_zero_latency_seconds (default: 1s)
|
|
60
|
+
# This gives other workers/queues a chance to pick up the job first
|
|
61
|
+
def should_scale_from_zero?(metrics)
|
|
62
|
+
has_work = metrics.queue_depth >= @config.scale_from_zero_queue_depth
|
|
63
|
+
job_old_enough = metrics.oldest_job_age_seconds >= @config.scale_from_zero_latency_seconds
|
|
64
|
+
|
|
65
|
+
has_work && job_old_enough
|
|
66
|
+
end
|
|
67
|
+
|
|
50
68
|
def should_scale_down?(metrics, current_workers)
|
|
51
69
|
return false if current_workers <= @config.min_workers
|
|
52
70
|
|
|
@@ -161,12 +179,22 @@ module SolidQueueAutoscaler
|
|
|
161
179
|
def build_scale_up_reason(metrics, current_workers = nil, target = nil)
|
|
162
180
|
reasons = []
|
|
163
181
|
|
|
164
|
-
if
|
|
165
|
-
|
|
166
|
-
|
|
182
|
+
# Check if this is a scale-from-zero scenario
|
|
183
|
+
is_scale_from_zero = current_workers&.zero? && @config.min_workers.zero? &&
|
|
184
|
+
metrics.queue_depth >= @config.scale_from_zero_queue_depth &&
|
|
185
|
+
metrics.oldest_job_age_seconds >= @config.scale_from_zero_latency_seconds
|
|
167
186
|
|
|
168
|
-
if
|
|
169
|
-
reasons << "
|
|
187
|
+
if is_scale_from_zero
|
|
188
|
+
reasons << "scale_from_zero: queue_depth=#{metrics.queue_depth} >= #{@config.scale_from_zero_queue_depth}"
|
|
189
|
+
reasons << "job_age=#{metrics.oldest_job_age_seconds.round(1)}s >= #{@config.scale_from_zero_latency_seconds}s"
|
|
190
|
+
else
|
|
191
|
+
if metrics.queue_depth >= @config.scale_up_queue_depth
|
|
192
|
+
reasons << "queue_depth=#{metrics.queue_depth} >= #{@config.scale_up_queue_depth}"
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
if metrics.oldest_job_age_seconds >= @config.scale_up_latency_seconds
|
|
196
|
+
reasons << "latency=#{metrics.oldest_job_age_seconds.round}s >= #{@config.scale_up_latency_seconds}s"
|
|
197
|
+
end
|
|
170
198
|
end
|
|
171
199
|
|
|
172
200
|
base_reason = reasons.join(', ')
|
|
@@ -185,6 +185,12 @@ module SolidQueueAutoscaler
|
|
|
185
185
|
end
|
|
186
186
|
|
|
187
187
|
def cooldown_active?(decision)
|
|
188
|
+
# Bypass cooldowns when scaling from zero - we want fast cold start
|
|
189
|
+
# This is safe because there are no workers to destabilize
|
|
190
|
+
if decision.scale_up? && decision.from.zero? && @config.min_workers.zero?
|
|
191
|
+
return false
|
|
192
|
+
end
|
|
193
|
+
|
|
188
194
|
if @config.persist_cooldowns && cooldown_tracker.table_exists?
|
|
189
195
|
# Use database-persisted cooldowns (survives process restarts)
|
|
190
196
|
if decision.scale_up?
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: solid_queue_autoscaler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.20
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- reillyse
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-02-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activerecord
|