solid_queue_autoscaler 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -82,6 +82,7 @@ module SolidQueueAutoscaler
82
82
  @metrics_collector = Metrics.new(config: @config)
83
83
  @decision_engine = DecisionEngine.new(config: @config)
84
84
  @adapter = @config.adapter
85
+ @cooldown_tracker = nil # Lazy-loaded when persist_cooldowns is enabled
85
86
  end
86
87
 
87
88
  def run
@@ -124,44 +125,117 @@ module SolidQueueAutoscaler
124
125
  end
125
126
 
126
127
  def apply_decision(decision, metrics)
127
- @adapter.scale(decision.to)
128
+ # Re-verify current workers to catch race conditions where another instance
129
+ # may have scaled while we were making our decision
130
+ verified_current = @adapter.current_workers
131
+
132
+ if verified_current != decision.from
133
+ logger.warn(
134
+ "[Autoscaler] Worker count changed during decision: expected=#{decision.from}, actual=#{verified_current}. " \
135
+ "Re-evaluating..."
136
+ )
137
+
138
+ # If we're already at or above max, don't scale up
139
+ if decision.scale_up? && verified_current >= @config.max_workers
140
+ return skipped_result(
141
+ "Aborted scale_up: already at max_workers (#{verified_current} >= #{@config.max_workers})",
142
+ decision: decision,
143
+ metrics: metrics
144
+ )
145
+ end
146
+
147
+ # If we're already at or below min, don't scale down
148
+ if decision.scale_down? && verified_current <= @config.min_workers
149
+ return skipped_result(
150
+ "Aborted scale_down: already at min_workers (#{verified_current} <= #{@config.min_workers})",
151
+ decision: decision,
152
+ metrics: metrics
153
+ )
154
+ end
155
+ end
156
+
157
+ # Final safety clamp: never exceed configured limits
158
+ target = decision.to.clamp(@config.min_workers, @config.max_workers)
159
+
160
+ if target != decision.to
161
+ logger.warn(
162
+ "[Autoscaler] Clamping target from #{decision.to} to #{target} " \
163
+ "(limits: #{@config.min_workers}-#{@config.max_workers})"
164
+ )
165
+ # Ensure decision reflects the clamped target for logging and events
166
+ decision.to = target
167
+ end
168
+
169
+ @adapter.scale(target)
128
170
  record_scale_time(decision)
129
171
  record_scale_event(decision, metrics)
130
-
172
+
131
173
  log_scale_action(decision)
132
174
 
133
175
  success_result(decision, metrics)
134
176
  end
135
177
 
136
178
  def cooldown_active?(decision)
137
- config_name = @config.name
138
- if decision.scale_up?
139
- last_scale_up = self.class.last_scale_up_at(config_name)
140
- return false unless last_scale_up
141
-
142
- Time.current - last_scale_up < @config.effective_scale_up_cooldown
143
- elsif decision.scale_down?
144
- last_scale_down = self.class.last_scale_down_at(config_name)
145
- return false unless last_scale_down
146
-
147
- Time.current - last_scale_down < @config.effective_scale_down_cooldown
179
+ if @config.persist_cooldowns && cooldown_tracker.table_exists?
180
+ # Use database-persisted cooldowns (survives process restarts)
181
+ if decision.scale_up?
182
+ cooldown_tracker.cooldown_active_for_scale_up?
183
+ elsif decision.scale_down?
184
+ cooldown_tracker.cooldown_active_for_scale_down?
185
+ else
186
+ false
187
+ end
148
188
  else
149
- false
189
+ # Fall back to in-memory cooldowns
190
+ config_name = @config.name
191
+ if decision.scale_up?
192
+ last_scale_up = self.class.last_scale_up_at(config_name)
193
+ return false unless last_scale_up
194
+
195
+ Time.current - last_scale_up < @config.effective_scale_up_cooldown
196
+ elsif decision.scale_down?
197
+ last_scale_down = self.class.last_scale_down_at(config_name)
198
+ return false unless last_scale_down
199
+
200
+ Time.current - last_scale_down < @config.effective_scale_down_cooldown
201
+ else
202
+ false
203
+ end
150
204
  end
151
205
  end
152
206
 
153
207
  def cooldown_remaining(decision)
154
- config_name = @config.name
155
- if decision.scale_up?
156
- elapsed = Time.current - self.class.last_scale_up_at(config_name)
157
- @config.effective_scale_up_cooldown - elapsed
208
+ if @config.persist_cooldowns && cooldown_tracker.table_exists?
209
+ # Use database-persisted cooldowns
210
+ if decision.scale_up?
211
+ cooldown_tracker.scale_up_cooldown_remaining
212
+ else
213
+ cooldown_tracker.scale_down_cooldown_remaining
214
+ end
158
215
  else
159
- elapsed = Time.current - self.class.last_scale_down_at(config_name)
160
- @config.effective_scale_down_cooldown - elapsed
216
+ # Fall back to in-memory cooldowns
217
+ config_name = @config.name
218
+ if decision.scale_up?
219
+ elapsed = Time.current - self.class.last_scale_up_at(config_name)
220
+ @config.effective_scale_up_cooldown - elapsed
221
+ else
222
+ elapsed = Time.current - self.class.last_scale_down_at(config_name)
223
+ @config.effective_scale_down_cooldown - elapsed
224
+ end
161
225
  end
162
226
  end
163
227
 
164
228
  def record_scale_time(decision)
229
+ if @config.persist_cooldowns && cooldown_tracker.table_exists?
230
+ # Use database-persisted cooldowns
231
+ if decision.scale_up?
232
+ cooldown_tracker.record_scale_up!
233
+ elsif decision.scale_down?
234
+ cooldown_tracker.record_scale_down!
235
+ end
236
+ end
237
+
238
+ # Always update in-memory cooldowns as well (for immediate effect within same process)
165
239
  config_name = @config.name
166
240
  if decision.scale_up?
167
241
  self.class.set_last_scale_up_at(config_name, Time.current)
@@ -170,6 +244,10 @@ module SolidQueueAutoscaler
170
244
  end
171
245
  end
172
246
 
247
+ def cooldown_tracker
248
+ @cooldown_tracker ||= CooldownTracker.new(config: @config, key: @config.name.to_s)
249
+ end
250
+
173
251
  def log_decision(decision, metrics)
174
252
  worker_label = @config.name == :default ? '' : "[#{@config.name}] "
175
253
  logger.info(
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SolidQueueAutoscaler
4
- VERSION = '1.0.7'
4
+ VERSION = '1.0.9'
5
5
  end
@@ -99,6 +99,260 @@ module SolidQueueAutoscaler
99
99
  configurations[:default] = config_obj
100
100
  end
101
101
  end
102
+
103
+ # Verify the installation is complete and working.
104
+ # Prints a human-friendly report (when verbose: true) and returns a VerificationResult.
105
+ #
106
+ # Usage (Rails/Heroku console):
107
+ # SolidQueueAutoscaler.verify_setup!
108
+ # # or alias:
109
+ # SolidQueueAutoscaler.verify_install!
110
+ #
111
+ # You can also inspect the returned struct:
112
+ # result = SolidQueueAutoscaler.verify_setup!(verbose: false)
113
+ # result.ok? # => true/false
114
+ # result.to_h # => hash of details
115
+ def verify_setup!(name = :default, verbose: true)
116
+ result = VerificationResult.new
117
+ cfg = config(name)
118
+ connection = cfg.connection
119
+
120
+ output = []
121
+ output << '=' * 60
122
+ output << 'SolidQueueAutoscaler Setup Verification'
123
+ output << '=' * 60
124
+ output << ''
125
+ output << "Version: #{VERSION}"
126
+ output << "Configuration: #{name}"
127
+
128
+ # Check connection type (handles SolidQueue in its own DB)
129
+ if defined?(SolidQueue::Record) && SolidQueue::Record.respond_to?(:connection)
130
+ output << '✓ Using SolidQueue::Record connection (multi-database setup)'
131
+ result.connection_type = :solid_queue_record
132
+ else
133
+ output << '✓ Using ActiveRecord::Base connection'
134
+ result.connection_type = :active_record_base
135
+ end
136
+
137
+ # 1. Cooldown state table
138
+ output << ''
139
+ output << '-' * 60
140
+ output << '1. COOLDOWN STATE TABLE (solid_queue_autoscaler_state)'
141
+ output << '-' * 60
142
+
143
+ if connection.table_exists?(:solid_queue_autoscaler_state)
144
+ result.state_table_exists = true
145
+ output << '✓ Table exists'
146
+
147
+ columns = connection.columns(:solid_queue_autoscaler_state).map(&:name)
148
+ expected = %w[id key last_scale_up_at last_scale_down_at created_at updated_at]
149
+ missing = expected - columns
150
+
151
+ if missing.empty?
152
+ result.state_table_columns_ok = true
153
+ output << ' ✓ All expected columns present'
154
+ else
155
+ result.state_table_columns_ok = false
156
+ result.add_warning("State table missing columns: #{missing.join(', ')}")
157
+ output << " ⚠ Missing columns: #{missing.join(', ')}"
158
+ end
159
+
160
+ state_count = connection.select_value('SELECT COUNT(*) FROM solid_queue_autoscaler_state').to_i
161
+ output << " Current records: #{state_count}"
162
+ else
163
+ result.state_table_exists = false
164
+ result.add_error('Cooldown state table does not exist')
165
+ output << '✗ Table DOES NOT EXIST'
166
+ output << ' Run: rails generate solid_queue_autoscaler:migration && rails db:migrate'
167
+ output << ' ⚠ Cooldowns are NOT shared across workers (using in-memory fallback)'
168
+ end
169
+
170
+ # 2. Events table
171
+ output << ''
172
+ output << '-' * 60
173
+ output << '2. EVENTS TABLE (solid_queue_autoscaler_events)'
174
+ output << '-' * 60
175
+
176
+ if connection.table_exists?(:solid_queue_autoscaler_events)
177
+ result.events_table_exists = true
178
+ output << '✓ Table exists'
179
+
180
+ columns = connection.columns(:solid_queue_autoscaler_events).map(&:name)
181
+ expected = %w[id worker_name action from_workers to_workers reason queue_depth latency_seconds metrics_json dry_run created_at]
182
+ missing = expected - columns
183
+
184
+ if missing.empty?
185
+ result.events_table_columns_ok = true
186
+ output << ' ✓ All expected columns present'
187
+ else
188
+ result.events_table_columns_ok = false
189
+ result.add_warning("Events table missing columns: #{missing.join(', ')}")
190
+ output << " ⚠ Missing columns: #{missing.join(', ')}"
191
+ end
192
+
193
+ event_count = connection.select_value('SELECT COUNT(*) FROM solid_queue_autoscaler_events').to_i
194
+ output << " Total events: #{event_count}"
195
+ else
196
+ result.events_table_exists = false
197
+ result.add_error('Events table does not exist')
198
+ output << '✗ Table DOES NOT EXIST'
199
+ output << ' Run: rails generate solid_queue_autoscaler:migration && rails db:migrate'
200
+ output << ' ⚠ Scale events are NOT being recorded (dashboard will be empty)'
201
+ end
202
+
203
+ # 3. Configuration
204
+ output << ''
205
+ output << '-' * 60
206
+ output << '3. CONFIGURATION'
207
+ output << '-' * 60
208
+
209
+ begin
210
+ result.config_valid = true
211
+ output << '✓ Configuration loaded'
212
+ output << " enabled: #{cfg.enabled?}"
213
+ output << " dry_run: #{cfg.dry_run?}"
214
+ output << " persist_cooldowns: #{cfg.respond_to?(:persist_cooldowns) ? cfg.persist_cooldowns : '(not supported in this version)'}"
215
+ output << " record_events: #{cfg.respond_to?(:record_events) ? cfg.record_events : '(not supported in this version)'}"
216
+ output << " min_workers: #{cfg.min_workers}"
217
+ output << " max_workers: #{cfg.max_workers}"
218
+ output << " job_queue: #{cfg.job_queue}"
219
+ output << " adapter: #{cfg.adapter.class.name}"
220
+ rescue StandardError => e
221
+ result.config_valid = false
222
+ result.add_error("Configuration error: #{e.message}")
223
+ output << "✗ Configuration error: #{e.message}"
224
+ end
225
+
226
+ # 4. Adapter connectivity
227
+ output << ''
228
+ output << '-' * 60
229
+ output << '4. ADAPTER CONNECTIVITY'
230
+ output << '-' * 60
231
+
232
+ begin
233
+ workers = cfg.adapter.current_workers
234
+ result.adapter_connected = true
235
+ output << "✓ Adapter connected (current workers: #{workers})"
236
+ rescue StandardError => e
237
+ result.adapter_connected = false
238
+ result.add_error("Adapter connection failed: #{e.message}")
239
+ output << "✗ Adapter connection failed: #{e.message}"
240
+ end
241
+
242
+ # 5. Solid Queue tables
243
+ output << ''
244
+ output << '-' * 60
245
+ output << '5. SOLID QUEUE TABLES'
246
+ output << '-' * 60
247
+
248
+ sq_tables = %w[solid_queue_jobs solid_queue_ready_executions solid_queue_claimed_executions solid_queue_processes]
249
+ result.solid_queue_tables = {}
250
+
251
+ sq_tables.each do |table|
252
+ if connection.table_exists?(table)
253
+ count = connection.select_value("SELECT COUNT(*) FROM #{table}").to_i
254
+ result.solid_queue_tables[table] = count
255
+ output << "✓ #{table}: #{count} records"
256
+ else
257
+ result.solid_queue_tables[table] = nil
258
+ output << "✗ #{table}: MISSING"
259
+ end
260
+ end
261
+
262
+ # Summary
263
+ output << ''
264
+ output << '=' * 60
265
+ output << 'SUMMARY'
266
+ output << '=' * 60
267
+
268
+ if result.ok?
269
+ output << '✓ All checks passed! Autoscaler is correctly configured.'
270
+ if result.cooldowns_shared?
271
+ output << ' Cooldowns: SHARED across workers (database-persisted)'
272
+ else
273
+ output << ' Cooldowns: In-memory only (not shared across workers)'
274
+ end
275
+ if result.events_table_exists
276
+ output << ' Events: RECORDING to database'
277
+ else
278
+ output << ' Events: NOT recording (events table missing)'
279
+ end
280
+ else
281
+ output << '⚠ Some issues found:'
282
+ result.errors.each { |err| output << " ✗ #{err}" }
283
+ result.warnings.each { |warn| output << " ⚠ #{warn}" }
284
+ output << ''
285
+ output << 'To fix missing tables, run:'
286
+ output << ' rails generate solid_queue_autoscaler:migration'
287
+ output << ' rails db:migrate'
288
+ end
289
+
290
+ puts output.join("\n") if verbose
291
+
292
+ nil
293
+ end
294
+
295
+ # Convenience alias so users can call verify_install! as requested
296
+ def verify_install!(name = :default, verbose: true)
297
+ verify_setup!(name, verbose: verbose)
298
+ end
299
+ end
300
+
301
+ # Structured result from verify_setup!/verify_install!
302
+ class VerificationResult
303
+ attr_accessor :connection_type,
304
+ :state_table_exists, :state_table_columns_ok,
305
+ :events_table_exists, :events_table_columns_ok,
306
+ :config_valid, :adapter_connected,
307
+ :solid_queue_tables
308
+
309
+ def initialize
310
+ @errors = []
311
+ @warnings = []
312
+ @solid_queue_tables = {}
313
+ end
314
+
315
+ def errors
316
+ @errors
317
+ end
318
+
319
+ def warnings
320
+ @warnings
321
+ end
322
+
323
+ def add_error(message)
324
+ @errors << message
325
+ end
326
+
327
+ def add_warning(message)
328
+ @warnings << message
329
+ end
330
+
331
+ def ok?
332
+ @errors.empty?
333
+ end
334
+
335
+ def tables_exist?
336
+ state_table_exists && events_table_exists
337
+ end
338
+
339
+ def cooldowns_shared?
340
+ state_table_exists && state_table_columns_ok
341
+ end
342
+
343
+ def to_h
344
+ {
345
+ ok: ok?,
346
+ connection_type: connection_type,
347
+ state_table: { exists: state_table_exists, columns_ok: state_table_columns_ok },
348
+ events_table: { exists: events_table_exists, columns_ok: events_table_columns_ok },
349
+ config_valid: config_valid,
350
+ adapter_connected: adapter_connected,
351
+ solid_queue_tables: solid_queue_tables,
352
+ errors: errors,
353
+ warnings: warnings
354
+ }
355
+ end
102
356
  end
103
357
  end
104
358
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solid_queue_autoscaler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - reillyse
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-01-16 00:00:00.000000000 Z
11
+ date: 2026-01-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: activejob
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '7.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '7.0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake
57
71
  requirement: !ruby/object:Gem::Requirement