solid_queue_autoscaler 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -0
- data/README.md +770 -4
- data/lib/generators/solid_queue_autoscaler/migration_generator.rb +180 -4
- data/lib/generators/solid_queue_autoscaler/templates/README +27 -6
- data/lib/generators/solid_queue_autoscaler/templates/create_solid_queue_autoscaler_events.rb.erb +17 -10
- data/lib/generators/solid_queue_autoscaler/templates/create_solid_queue_autoscaler_state.rb.erb +9 -0
- data/lib/generators/solid_queue_autoscaler/templates/initializer.rb +6 -0
- data/lib/solid_queue_autoscaler/autoscale_job.rb +10 -0
- data/lib/solid_queue_autoscaler/configuration.rb +13 -0
- data/lib/solid_queue_autoscaler/scale_event.rb +183 -0
- data/lib/solid_queue_autoscaler/scaler.rb +98 -20
- data/lib/solid_queue_autoscaler/version.rb +1 -1
- data/lib/solid_queue_autoscaler.rb +254 -0
- metadata +16 -2
data/README.md
CHANGED
|
@@ -240,6 +240,13 @@ Scaling down triggers when **ALL** thresholds are met:
|
|
|
240
240
|
| `scale_down_cooldown_seconds` | Integer | `nil` | Override for scale-down cooldown |
|
|
241
241
|
| `persist_cooldowns` | Boolean | `true` | Save cooldowns to database |
|
|
242
242
|
|
|
243
|
+
### AutoscaleJob Settings
|
|
244
|
+
|
|
245
|
+
| Option | Type | Default | Description |
|
|
246
|
+
|--------|------|---------|-------------|
|
|
247
|
+
| `job_queue` | Symbol/String | `:autoscaler` | Queue for the AutoscaleJob |
|
|
248
|
+
| `job_priority` | Integer | `nil` | Priority for the AutoscaleJob (lower = higher priority) |
|
|
249
|
+
|
|
243
250
|
### Heroku-Specific
|
|
244
251
|
|
|
245
252
|
| Option | Type | Default | Description |
|
|
@@ -256,23 +263,503 @@ Scaling down triggers when **ALL** thresholds are met:
|
|
|
256
263
|
| `kubernetes_deployment` | String | `nil` | Deployment name to scale |
|
|
257
264
|
| `kubernetes_config_path` | String | `nil` | Path to kubeconfig (optional) |
|
|
258
265
|
|
|
266
|
+
## Common Configuration Examples
|
|
267
|
+
|
|
268
|
+
These examples show typical setups for different use cases. Copy and adapt them to your needs.
|
|
269
|
+
|
|
270
|
+
### Simple Setup (Single Worker, Heroku)
|
|
271
|
+
|
|
272
|
+
Ideal for small apps, side projects, or getting started:
|
|
273
|
+
|
|
274
|
+
```ruby
|
|
275
|
+
# config/initializers/solid_queue_autoscaler.rb
|
|
276
|
+
SolidQueueAutoscaler.configure do |config|
|
|
277
|
+
config.adapter = :heroku
|
|
278
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
279
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
280
|
+
config.process_type = 'worker'
|
|
281
|
+
|
|
282
|
+
config.min_workers = 1
|
|
283
|
+
config.max_workers = 5
|
|
284
|
+
|
|
285
|
+
# Scale up when queue backs up
|
|
286
|
+
config.scale_up_queue_depth = 50
|
|
287
|
+
config.scale_up_latency_seconds = 180 # 3 minutes
|
|
288
|
+
|
|
289
|
+
# Scale down when queue is nearly empty
|
|
290
|
+
config.scale_down_queue_depth = 5
|
|
291
|
+
config.scale_down_latency_seconds = 30
|
|
292
|
+
|
|
293
|
+
# Safety: only run in production
|
|
294
|
+
config.dry_run = !Rails.env.production?
|
|
295
|
+
config.enabled = Rails.env.production?
|
|
296
|
+
end
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
```yaml
|
|
300
|
+
# config/recurring.yml
|
|
301
|
+
autoscaler:
|
|
302
|
+
class: SolidQueueAutoscaler::AutoscaleJob
|
|
303
|
+
queue: autoscaler
|
|
304
|
+
schedule: every 30 seconds
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
### Cost-Optimized Setup (Scale to Zero)
|
|
310
|
+
|
|
311
|
+
For apps with sporadic workloads where you want to minimize costs during idle periods:
|
|
312
|
+
|
|
313
|
+
```ruby
|
|
314
|
+
SolidQueueAutoscaler.configure do |config|
|
|
315
|
+
config.adapter = :heroku
|
|
316
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
317
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
318
|
+
config.process_type = 'worker'
|
|
319
|
+
|
|
320
|
+
# Allow scaling to zero - no workers when idle
|
|
321
|
+
config.min_workers = 0
|
|
322
|
+
config.max_workers = 5
|
|
323
|
+
|
|
324
|
+
# Scale up immediately when any job is queued
|
|
325
|
+
config.scale_up_queue_depth = 1
|
|
326
|
+
config.scale_up_latency_seconds = 60 # 1 minute
|
|
327
|
+
|
|
328
|
+
# Scale down aggressively when empty
|
|
329
|
+
config.scale_down_queue_depth = 0
|
|
330
|
+
config.scale_down_latency_seconds = 10
|
|
331
|
+
|
|
332
|
+
# Shorter cooldowns for faster response
|
|
333
|
+
config.scale_up_cooldown_seconds = 30
|
|
334
|
+
config.scale_down_cooldown_seconds = 300 # 5 min before scaling to zero
|
|
335
|
+
|
|
336
|
+
config.enabled = Rails.env.production?
|
|
337
|
+
end
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
**⚠️ Note:** With `min_workers = 0`, there's cold-start latency when the first job arrives. The autoscaler must run on a web dyno or separate process, not on the workers themselves.
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
### E-Commerce / SaaS (Multiple Worker Types)
|
|
345
|
+
|
|
346
|
+
For apps with different job priorities (payments, notifications, reports):
|
|
347
|
+
|
|
348
|
+
```ruby
|
|
349
|
+
# Critical jobs - payments, webhooks, user-facing notifications
|
|
350
|
+
SolidQueueAutoscaler.configure(:critical_worker) do |config|
|
|
351
|
+
config.adapter = :heroku
|
|
352
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
353
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
354
|
+
config.process_type = 'critical_worker'
|
|
355
|
+
|
|
356
|
+
config.queues = ['critical', 'payments', 'webhooks']
|
|
357
|
+
|
|
358
|
+
# Always have capacity, scale aggressively
|
|
359
|
+
config.min_workers = 2
|
|
360
|
+
config.max_workers = 10
|
|
361
|
+
config.scale_up_queue_depth = 5
|
|
362
|
+
config.scale_up_latency_seconds = 30
|
|
363
|
+
|
|
364
|
+
# Short cooldowns for responsiveness
|
|
365
|
+
config.cooldown_seconds = 60
|
|
366
|
+
|
|
367
|
+
# High-priority autoscaler job
|
|
368
|
+
config.job_queue = :autoscaler
|
|
369
|
+
config.job_priority = 0
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Default jobs - emails, notifications, analytics
|
|
373
|
+
SolidQueueAutoscaler.configure(:default_worker) do |config|
|
|
374
|
+
config.adapter = :heroku
|
|
375
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
376
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
377
|
+
config.process_type = 'worker'
|
|
378
|
+
|
|
379
|
+
config.queues = ['default', 'mailers', 'analytics']
|
|
380
|
+
|
|
381
|
+
# Standard capacity, moderate scaling
|
|
382
|
+
config.min_workers = 1
|
|
383
|
+
config.max_workers = 8
|
|
384
|
+
config.scale_up_queue_depth = 100
|
|
385
|
+
config.scale_up_latency_seconds = 300
|
|
386
|
+
|
|
387
|
+
config.cooldown_seconds = 120
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# Batch jobs - reports, exports, data processing
|
|
391
|
+
SolidQueueAutoscaler.configure(:batch_worker) do |config|
|
|
392
|
+
config.adapter = :heroku
|
|
393
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
394
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
395
|
+
config.process_type = 'batch_worker'
|
|
396
|
+
|
|
397
|
+
config.queues = ['batch', 'reports', 'exports']
|
|
398
|
+
|
|
399
|
+
# Scale to zero when no batch jobs, scale up for any batch work
|
|
400
|
+
config.min_workers = 0
|
|
401
|
+
config.max_workers = 3
|
|
402
|
+
config.scale_up_queue_depth = 1
|
|
403
|
+
config.scale_down_queue_depth = 0
|
|
404
|
+
|
|
405
|
+
# Long cooldowns - batch jobs take time
|
|
406
|
+
config.cooldown_seconds = 300
|
|
407
|
+
end
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
```yaml
|
|
411
|
+
# config/recurring.yml
|
|
412
|
+
# Scale critical workers frequently
|
|
413
|
+
autoscaler_critical:
|
|
414
|
+
class: SolidQueueAutoscaler::AutoscaleJob
|
|
415
|
+
queue: autoscaler
|
|
416
|
+
schedule: every 15 seconds
|
|
417
|
+
args: [:critical_worker]
|
|
418
|
+
|
|
419
|
+
# Scale default workers normally
|
|
420
|
+
autoscaler_default:
|
|
421
|
+
class: SolidQueueAutoscaler::AutoscaleJob
|
|
422
|
+
queue: autoscaler
|
|
423
|
+
schedule: every 30 seconds
|
|
424
|
+
args: [:default_worker]
|
|
425
|
+
|
|
426
|
+
# Scale batch workers less frequently
|
|
427
|
+
autoscaler_batch:
|
|
428
|
+
class: SolidQueueAutoscaler::AutoscaleJob
|
|
429
|
+
queue: autoscaler
|
|
430
|
+
schedule: every 60 seconds
|
|
431
|
+
args: [:batch_worker]
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
---
|
|
435
|
+
|
|
436
|
+
### High-Volume API (Webhook Processing)
|
|
437
|
+
|
|
438
|
+
For apps processing many incoming webhooks or API callbacks:
|
|
439
|
+
|
|
440
|
+
```ruby
|
|
441
|
+
SolidQueueAutoscaler.configure do |config|
|
|
442
|
+
config.adapter = :heroku
|
|
443
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
444
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
445
|
+
config.process_type = 'worker'
|
|
446
|
+
|
|
447
|
+
config.queues = ['webhooks', 'callbacks', 'api_jobs']
|
|
448
|
+
|
|
449
|
+
# Maintain baseline capacity
|
|
450
|
+
config.min_workers = 2
|
|
451
|
+
config.max_workers = 20
|
|
452
|
+
|
|
453
|
+
# Proportional scaling - scale based on actual load
|
|
454
|
+
config.scaling_strategy = :proportional
|
|
455
|
+
config.scale_up_queue_depth = 50
|
|
456
|
+
config.scale_up_latency_seconds = 60
|
|
457
|
+
|
|
458
|
+
# Add 1 worker per 25 jobs over threshold
|
|
459
|
+
config.scale_up_jobs_per_worker = 25
|
|
460
|
+
# Add 1 worker per 30 seconds over latency threshold
|
|
461
|
+
config.scale_up_latency_per_worker = 30
|
|
462
|
+
|
|
463
|
+
# Scale down when under capacity
|
|
464
|
+
config.scale_down_queue_depth = 10
|
|
465
|
+
config.scale_down_jobs_per_worker = 50
|
|
466
|
+
|
|
467
|
+
# Fast cooldowns for responsive scaling
|
|
468
|
+
config.scale_up_cooldown_seconds = 30
|
|
469
|
+
config.scale_down_cooldown_seconds = 120
|
|
470
|
+
|
|
471
|
+
config.job_priority = 0 # Process autoscaler jobs first
|
|
472
|
+
end
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
---
|
|
476
|
+
|
|
477
|
+
### Data Processing / ETL Pipeline
|
|
478
|
+
|
|
479
|
+
For apps with heavy data processing, imports, or batch ETL jobs:
|
|
480
|
+
|
|
481
|
+
```ruby
|
|
482
|
+
SolidQueueAutoscaler.configure(:etl_worker) do |config|
|
|
483
|
+
config.adapter = :heroku
|
|
484
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
485
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
486
|
+
config.process_type = 'etl_worker'
|
|
487
|
+
|
|
488
|
+
config.queues = ['imports', 'exports', 'etl', 'data_sync']
|
|
489
|
+
|
|
490
|
+
# Scale to zero when no work, burst when needed
|
|
491
|
+
config.min_workers = 0
|
|
492
|
+
config.max_workers = 10
|
|
493
|
+
|
|
494
|
+
# Scale up as soon as work is queued
|
|
495
|
+
config.scale_up_queue_depth = 1
|
|
496
|
+
config.scale_up_latency_seconds = 120
|
|
497
|
+
|
|
498
|
+
# Use fixed scaling for predictable behavior
|
|
499
|
+
config.scaling_strategy = :fixed
|
|
500
|
+
config.scale_up_increment = 2 # Add 2 workers at a time
|
|
501
|
+
config.scale_down_decrement = 1
|
|
502
|
+
|
|
503
|
+
# Long cooldowns - ETL jobs are long-running
|
|
504
|
+
config.scale_up_cooldown_seconds = 120
|
|
505
|
+
config.scale_down_cooldown_seconds = 600 # 10 minutes
|
|
506
|
+
|
|
507
|
+
# Scale down only when truly idle
|
|
508
|
+
config.scale_down_queue_depth = 0
|
|
509
|
+
config.scale_down_latency_seconds = 0
|
|
510
|
+
end
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
---
|
|
514
|
+
|
|
515
|
+
### High-Availability Setup
|
|
516
|
+
|
|
517
|
+
For mission-critical apps requiring guaranteed capacity:
|
|
518
|
+
|
|
519
|
+
```ruby
|
|
520
|
+
SolidQueueAutoscaler.configure do |config|
|
|
521
|
+
config.adapter = :heroku
|
|
522
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
523
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
524
|
+
config.process_type = 'worker'
|
|
525
|
+
|
|
526
|
+
# Always maintain minimum capacity
|
|
527
|
+
config.min_workers = 3
|
|
528
|
+
config.max_workers = 15
|
|
529
|
+
|
|
530
|
+
# Scale up proactively before queue backs up
|
|
531
|
+
config.scale_up_queue_depth = 25
|
|
532
|
+
config.scale_up_latency_seconds = 60
|
|
533
|
+
|
|
534
|
+
# Conservative scale-down
|
|
535
|
+
config.scale_down_queue_depth = 5
|
|
536
|
+
config.scale_down_latency_seconds = 15
|
|
537
|
+
|
|
538
|
+
# Longer cooldowns to prevent flapping
|
|
539
|
+
config.cooldown_seconds = 180
|
|
540
|
+
config.scale_down_cooldown_seconds = 300 # Extra cautious on scale-down
|
|
541
|
+
|
|
542
|
+
# Record all events for monitoring
|
|
543
|
+
config.record_events = true
|
|
544
|
+
config.record_all_events = true # Even no-change events
|
|
545
|
+
end
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
---
|
|
549
|
+
|
|
550
|
+
### Kubernetes Setup
|
|
551
|
+
|
|
552
|
+
For apps deployed on Kubernetes:
|
|
553
|
+
|
|
554
|
+
```ruby
|
|
555
|
+
SolidQueueAutoscaler.configure do |config|
|
|
556
|
+
config.adapter = :kubernetes
|
|
557
|
+
config.kubernetes_namespace = ENV.fetch('K8S_NAMESPACE', 'production')
|
|
558
|
+
config.kubernetes_deployment = 'solid-queue-worker'
|
|
559
|
+
|
|
560
|
+
# Optional: specify kubeconfig for local development
|
|
561
|
+
# config.kubernetes_kubeconfig = '~/.kube/config'
|
|
562
|
+
# config.kubernetes_context = 'my-cluster'
|
|
563
|
+
|
|
564
|
+
config.min_workers = 2 # Minimum replicas
|
|
565
|
+
config.max_workers = 20
|
|
566
|
+
|
|
567
|
+
config.scale_up_queue_depth = 100
|
|
568
|
+
config.scale_up_latency_seconds = 180
|
|
569
|
+
|
|
570
|
+
config.scale_down_queue_depth = 10
|
|
571
|
+
config.scale_down_latency_seconds = 30
|
|
572
|
+
|
|
573
|
+
# K8s scaling can be faster than Heroku
|
|
574
|
+
config.cooldown_seconds = 60
|
|
575
|
+
|
|
576
|
+
config.enabled = Rails.env.production?
|
|
577
|
+
end
|
|
578
|
+
```
|
|
579
|
+
|
|
580
|
+
**Required RBAC configuration:**
|
|
581
|
+
|
|
582
|
+
```yaml
|
|
583
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
584
|
+
kind: Role
|
|
585
|
+
metadata:
|
|
586
|
+
name: solid-queue-autoscaler
|
|
587
|
+
namespace: production
|
|
588
|
+
rules:
|
|
589
|
+
- apiGroups: ["apps"]
|
|
590
|
+
resources: ["deployments", "deployments/scale"]
|
|
591
|
+
verbs: ["get", "patch", "update"]
|
|
592
|
+
---
|
|
593
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
594
|
+
kind: RoleBinding
|
|
595
|
+
metadata:
|
|
596
|
+
name: solid-queue-autoscaler
|
|
597
|
+
namespace: production
|
|
598
|
+
subjects:
|
|
599
|
+
- kind: ServiceAccount
|
|
600
|
+
name: solid-queue-autoscaler
|
|
601
|
+
namespace: production
|
|
602
|
+
roleRef:
|
|
603
|
+
kind: Role
|
|
604
|
+
name: solid-queue-autoscaler
|
|
605
|
+
apiGroup: rbac.authorization.k8s.io
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
---
|
|
609
|
+
|
|
610
|
+
### Development / Testing Setup
|
|
611
|
+
|
|
612
|
+
For local development and CI environments:
|
|
613
|
+
|
|
614
|
+
```ruby
|
|
615
|
+
SolidQueueAutoscaler.configure do |config|
|
|
616
|
+
config.adapter = :heroku
|
|
617
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
618
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
619
|
+
config.process_type = 'worker'
|
|
620
|
+
|
|
621
|
+
config.min_workers = 1
|
|
622
|
+
config.max_workers = 3
|
|
623
|
+
|
|
624
|
+
config.scale_up_queue_depth = 10
|
|
625
|
+
config.scale_up_latency_seconds = 60
|
|
626
|
+
|
|
627
|
+
# IMPORTANT: Disable in development, use dry_run in staging
|
|
628
|
+
case Rails.env
|
|
629
|
+
when 'production'
|
|
630
|
+
config.enabled = true
|
|
631
|
+
config.dry_run = false
|
|
632
|
+
when 'staging'
|
|
633
|
+
config.enabled = true
|
|
634
|
+
config.dry_run = true # Log decisions but don't scale
|
|
635
|
+
else
|
|
636
|
+
config.enabled = false
|
|
637
|
+
end
|
|
638
|
+
|
|
639
|
+
# Verbose logging for debugging
|
|
640
|
+
config.logger = Logger.new(STDOUT)
|
|
641
|
+
config.logger.level = Rails.env.production? ? Logger::INFO : Logger::DEBUG
|
|
642
|
+
end
|
|
643
|
+
```
|
|
644
|
+
|
|
645
|
+
---
|
|
646
|
+
|
|
647
|
+
### Configuration Comparison
|
|
648
|
+
|
|
649
|
+
| Use Case | min | max | scale_up_depth | scale_up_latency | cooldown | strategy |
|
|
650
|
+
|----------|-----|-----|----------------|------------------|----------|----------|
|
|
651
|
+
| Simple/Starter | 1 | 5 | 50 | 180s | 120s | fixed |
|
|
652
|
+
| Cost-Optimized | 0 | 5 | 1 | 60s | 30s/300s | fixed |
|
|
653
|
+
| E-Commerce Critical | 2 | 10 | 5 | 30s | 60s | fixed |
|
|
654
|
+
| E-Commerce Default | 1 | 8 | 100 | 300s | 120s | fixed |
|
|
655
|
+
| Webhook Processing | 2 | 20 | 50 | 60s | 30s/120s | proportional |
|
|
656
|
+
| ETL/Batch | 0 | 10 | 1 | 120s | 120s/600s | fixed |
|
|
657
|
+
| High-Availability | 3 | 15 | 25 | 60s | 180s/300s | fixed |
|
|
658
|
+
|
|
659
|
+
## Configuring a High-Priority Queue for the Autoscaler
|
|
660
|
+
|
|
661
|
+
The autoscaler job should run reliably and quickly, even when your queues are backed up. By default, the autoscaler job runs on the `:autoscaler` queue. You can configure this and set up Solid Queue to prioritize it.
|
|
662
|
+
|
|
663
|
+
### Configure the Job Queue and Priority
|
|
664
|
+
|
|
665
|
+
In your initializer, set the queue and priority for the autoscaler job:
|
|
666
|
+
|
|
667
|
+
```ruby
|
|
668
|
+
SolidQueueAutoscaler.configure do |config|
|
|
669
|
+
# Use a dedicated high-priority queue for the autoscaler
|
|
670
|
+
config.job_queue = :autoscaler # Default value
|
|
671
|
+
|
|
672
|
+
# Or use an existing high-priority queue
|
|
673
|
+
config.job_queue = :critical
|
|
674
|
+
|
|
675
|
+
# Set job priority (lower = higher priority, processed first)
|
|
676
|
+
# This works with queue backends that support job-level priority like Solid Queue
|
|
677
|
+
config.job_priority = 0 # Highest priority
|
|
678
|
+
|
|
679
|
+
# ... other config
|
|
680
|
+
end
|
|
681
|
+
```
|
|
682
|
+
|
|
683
|
+
For multi-worker configurations, each worker type can have its own queue and priority:
|
|
684
|
+
|
|
685
|
+
```ruby
|
|
686
|
+
SolidQueueAutoscaler.configure(:critical_worker) do |config|
|
|
687
|
+
config.job_queue = :autoscaler_critical
|
|
688
|
+
config.job_priority = 0 # Highest priority for critical worker scaling
|
|
689
|
+
# ... other config
|
|
690
|
+
end
|
|
691
|
+
|
|
692
|
+
SolidQueueAutoscaler.configure(:default_worker) do |config|
|
|
693
|
+
config.job_queue = :autoscaler_default
|
|
694
|
+
config.job_priority = 10 # Lower priority for default worker scaling
|
|
695
|
+
# ... other config
|
|
696
|
+
end
|
|
697
|
+
```
|
|
698
|
+
|
|
699
|
+
### Configure Solid Queue to Prioritize the Autoscaler
|
|
700
|
+
|
|
701
|
+
In your `config/solid_queue.yml`, ensure the autoscaler queue is processed by a dedicated worker or listed first in the queue order:
|
|
702
|
+
|
|
703
|
+
```yaml
|
|
704
|
+
# Option 1: Dedicated dispatcher/worker for autoscaler (recommended)
|
|
705
|
+
production:
|
|
706
|
+
dispatchers:
|
|
707
|
+
- polling_interval: 1
|
|
708
|
+
batch_size: 500
|
|
709
|
+
concurrency_maintenance_interval: 30
|
|
710
|
+
|
|
711
|
+
workers:
|
|
712
|
+
# Dedicated worker for autoscaler - always responsive
|
|
713
|
+
- queues: [autoscaler]
|
|
714
|
+
threads: 1
|
|
715
|
+
processes: 1
|
|
716
|
+
polling_interval: 0.5 # Check frequently
|
|
717
|
+
|
|
718
|
+
# Main workers for business logic
|
|
719
|
+
- queues: [critical, default, mailers]
|
|
720
|
+
threads: 5
|
|
721
|
+
processes: 2
|
|
722
|
+
polling_interval: 1
|
|
723
|
+
```
|
|
724
|
+
|
|
725
|
+
```yaml
|
|
726
|
+
# Option 2: Include autoscaler first in queue list (simpler)
|
|
727
|
+
production:
|
|
728
|
+
workers:
|
|
729
|
+
- queues: [autoscaler, critical, default, mailers]
|
|
730
|
+
threads: 5
|
|
731
|
+
processes: 2
|
|
732
|
+
```
|
|
733
|
+
|
|
734
|
+
Solid Queue processes queues in order, so listing `autoscaler` first ensures those jobs are picked up before others.
|
|
735
|
+
|
|
736
|
+
### Why This Matters
|
|
737
|
+
|
|
738
|
+
- **Responsiveness**: When your queues are backed up, you want the autoscaler to scale up workers quickly
|
|
739
|
+
- **Reliability**: A dedicated queue prevents autoscaler jobs from waiting behind thousands of business jobs
|
|
740
|
+
- **Isolation**: Separating autoscaler jobs makes monitoring and debugging easier
|
|
741
|
+
|
|
259
742
|
## Usage
|
|
260
743
|
|
|
261
744
|
### Running as a Solid Queue Recurring Job (Recommended)
|
|
262
745
|
|
|
746
|
+
> ⚠️ **IMPORTANT**: The `queue:` setting in `recurring.yml` **overrides** the `config.job_queue` setting!
|
|
747
|
+
> If you omit `queue:` in your recurring.yml, the job will go to the `default` queue, NOT your configured queue.
|
|
748
|
+
> Always ensure your `recurring.yml` queue matches your `config.job_queue` setting.
|
|
749
|
+
|
|
263
750
|
Add to your `config/recurring.yml`:
|
|
264
751
|
|
|
265
752
|
```yaml
|
|
266
753
|
# Single worker configuration
|
|
267
754
|
autoscaler:
|
|
268
755
|
class: SolidQueueAutoscaler::AutoscaleJob
|
|
269
|
-
queue: autoscaler
|
|
756
|
+
queue: autoscaler # ⚠️ REQUIRED: Must match config.job_queue!
|
|
270
757
|
schedule: every 30 seconds
|
|
271
758
|
|
|
272
759
|
# Or for multi-worker: scale all workers
|
|
273
760
|
autoscaler_all:
|
|
274
761
|
class: SolidQueueAutoscaler::AutoscaleJob
|
|
275
|
-
queue: autoscaler
|
|
762
|
+
queue: autoscaler # ⚠️ REQUIRED!
|
|
276
763
|
schedule: every 30 seconds
|
|
277
764
|
args:
|
|
278
765
|
- :all
|
|
@@ -280,19 +767,25 @@ autoscaler_all:
|
|
|
280
767
|
# Or scale specific worker types on different schedules
|
|
281
768
|
autoscaler_critical:
|
|
282
769
|
class: SolidQueueAutoscaler::AutoscaleJob
|
|
283
|
-
queue: autoscaler
|
|
770
|
+
queue: autoscaler # ⚠️ REQUIRED!
|
|
284
771
|
schedule: every 15 seconds
|
|
285
772
|
args:
|
|
286
773
|
- :critical_worker
|
|
287
774
|
|
|
288
775
|
autoscaler_default:
|
|
289
776
|
class: SolidQueueAutoscaler::AutoscaleJob
|
|
290
|
-
queue: autoscaler
|
|
777
|
+
queue: autoscaler # ⚠️ REQUIRED!
|
|
291
778
|
schedule: every 60 seconds
|
|
292
779
|
args:
|
|
293
780
|
- :default_worker
|
|
294
781
|
```
|
|
295
782
|
|
|
783
|
+
> **Note on multiple worker dynos**: SolidQueue's recurring jobs are processed by the **dispatcher** process,
|
|
784
|
+
> not workers. If each of your worker dynos runs its own dispatcher (which is the default on Heroku),
|
|
785
|
+
> each dyno will try to enqueue the recurring job. To prevent duplicate enqueuing:
|
|
786
|
+
> 1. Run a single dedicated dispatcher dyno, OR
|
|
787
|
+
> 2. Configure workers to NOT run the dispatcher (set `dispatchers: []` in their solid_queue.yml)
|
|
788
|
+
|
|
296
789
|
### Running via Rake Tasks
|
|
297
790
|
|
|
298
791
|
```bash
|
|
@@ -506,10 +999,32 @@ KEEP_DAYS=7 bundle exec rake solid_queue_autoscaler:cleanup_events
|
|
|
506
999
|
|
|
507
1000
|
Another autoscaler instance is currently running. This is expected behavior — only one instance should run at a time per worker type.
|
|
508
1001
|
|
|
1002
|
+
**If you believe no other instance is running:**
|
|
1003
|
+
|
|
1004
|
+
```ruby
|
|
1005
|
+
# Check for stale advisory locks
|
|
1006
|
+
ActiveRecord::Base.connection.execute(<<~SQL)
|
|
1007
|
+
SELECT * FROM pg_locks WHERE locktype = 'advisory'
|
|
1008
|
+
SQL
|
|
1009
|
+
|
|
1010
|
+
# Force release a stuck lock (use with caution!)
|
|
1011
|
+
lock_key = SolidQueueAutoscaler.config.lock_key
|
|
1012
|
+
lock_id = Zlib.crc32(lock_key) & 0x7FFFFFFF
|
|
1013
|
+
ActiveRecord::Base.connection.execute("SELECT pg_advisory_unlock(#{lock_id})")
|
|
1014
|
+
```
|
|
1015
|
+
|
|
509
1016
|
### "Cooldown active"
|
|
510
1017
|
|
|
511
1018
|
A recent scaling event triggered the cooldown. Wait for the cooldown to expire or adjust `cooldown_seconds`.
|
|
512
1019
|
|
|
1020
|
+
```ruby
|
|
1021
|
+
# Check cooldown status
|
|
1022
|
+
bundle exec rake solid_queue_autoscaler:cooldown
|
|
1023
|
+
|
|
1024
|
+
# Reset cooldowns (for testing only)
|
|
1025
|
+
SolidQueueAutoscaler::CooldownTracker.reset!
|
|
1026
|
+
```
|
|
1027
|
+
|
|
513
1028
|
### Workers not scaling
|
|
514
1029
|
|
|
515
1030
|
1. Check that `enabled` is `true`
|
|
@@ -518,12 +1033,263 @@ A recent scaling event triggered the cooldown. Wait for the cooldown to expire o
|
|
|
518
1033
|
4. Enable dry-run to see what decisions would be made
|
|
519
1034
|
5. Check the logs for error messages
|
|
520
1035
|
|
|
1036
|
+
**Debug with a manual scale attempt:**
|
|
1037
|
+
|
|
1038
|
+
```ruby
|
|
1039
|
+
# Check configuration
|
|
1040
|
+
config = SolidQueueAutoscaler.config
|
|
1041
|
+
puts "Enabled: #{config.enabled?}"
|
|
1042
|
+
puts "Dry Run: #{config.dry_run?}"
|
|
1043
|
+
puts "API Key Set: #{config.heroku_api_key.present?}"
|
|
1044
|
+
|
|
1045
|
+
# Check current metrics
|
|
1046
|
+
metrics = SolidQueueAutoscaler.metrics
|
|
1047
|
+
puts "Queue depth: #{metrics.queue_depth}"
|
|
1048
|
+
puts "Latency: #{metrics.oldest_job_age_seconds}s"
|
|
1049
|
+
|
|
1050
|
+
# Try a manual scale
|
|
1051
|
+
result = SolidQueueAutoscaler.scale!
|
|
1052
|
+
puts result.decision.inspect if result.decision
|
|
1053
|
+
puts result.skipped_reason if result.skipped?
|
|
1054
|
+
puts result.error if result.error
|
|
1055
|
+
```
|
|
1056
|
+
|
|
1057
|
+
### Workers not scaling down
|
|
1058
|
+
|
|
1059
|
+
Scale-down requires **ALL** conditions to be met:
|
|
1060
|
+
|
|
1061
|
+
```ruby
|
|
1062
|
+
metrics = SolidQueueAutoscaler.metrics
|
|
1063
|
+
config = SolidQueueAutoscaler.config
|
|
1064
|
+
|
|
1065
|
+
puts "Queue depth: #{metrics.queue_depth} (threshold: <= #{config.scale_down_queue_depth})"
|
|
1066
|
+
puts "Latency: #{metrics.oldest_job_age_seconds}s (threshold: <= #{config.scale_down_latency_seconds}s)"
|
|
1067
|
+
puts "Claimed jobs: #{metrics.claimed_jobs}" # Must be 0 for idle scale-down
|
|
1068
|
+
puts "Current workers: #{SolidQueueAutoscaler.current_workers}"
|
|
1069
|
+
puts "Min workers: #{config.min_workers}" # Can't scale below this
|
|
1070
|
+
```
|
|
1071
|
+
|
|
1072
|
+
### Heroku API errors
|
|
1073
|
+
|
|
1074
|
+
**401 Unauthorized:**
|
|
1075
|
+
|
|
1076
|
+
```bash
|
|
1077
|
+
# Check if API key is valid
|
|
1078
|
+
heroku authorizations
|
|
1079
|
+
|
|
1080
|
+
# Create a new authorization
|
|
1081
|
+
heroku authorizations:create -d "Solid Queue Autoscaler"
|
|
1082
|
+
```
|
|
1083
|
+
|
|
1084
|
+
**404 Not Found:**
|
|
1085
|
+
|
|
1086
|
+
```bash
|
|
1087
|
+
# Verify app name
|
|
1088
|
+
heroku apps
|
|
1089
|
+
heroku apps:info -a $HEROKU_APP_NAME
|
|
1090
|
+
```
|
|
1091
|
+
|
|
1092
|
+
**429 Rate Limited:**
|
|
1093
|
+
|
|
1094
|
+
Increase cooldown to reduce API calls:
|
|
1095
|
+
|
|
1096
|
+
```ruby
|
|
1097
|
+
config.cooldown_seconds = 180 # 3 minutes instead of default 2
|
|
1098
|
+
```
|
|
1099
|
+
|
|
521
1100
|
### Kubernetes authentication issues
|
|
522
1101
|
|
|
523
1102
|
1. Ensure the service account has permissions to patch deployments
|
|
524
1103
|
2. Check namespace is correct
|
|
525
1104
|
3. Verify deployment name matches exactly
|
|
526
1105
|
|
|
1106
|
+
**Check RBAC permissions:**
|
|
1107
|
+
|
|
1108
|
+
```yaml
|
|
1109
|
+
# Required RBAC rules for the autoscaler service account
|
|
1110
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
1111
|
+
kind: Role
|
|
1112
|
+
metadata:
|
|
1113
|
+
name: solid-queue-autoscaler
|
|
1114
|
+
rules:
|
|
1115
|
+
- apiGroups: ["apps"]
|
|
1116
|
+
resources: ["deployments", "deployments/scale"]
|
|
1117
|
+
verbs: ["get", "patch", "update"]
|
|
1118
|
+
```
|
|
1119
|
+
|
|
1120
|
+
### AutoscaleJob not running
|
|
1121
|
+
|
|
1122
|
+
**Check recurring.yml configuration:**
|
|
1123
|
+
|
|
1124
|
+
```yaml
|
|
1125
|
+
# config/recurring.yml
|
|
1126
|
+
autoscaler:
|
|
1127
|
+
class: SolidQueueAutoscaler::AutoscaleJob
|
|
1128
|
+
queue: autoscaler
|
|
1129
|
+
schedule: every 30 seconds
|
|
1130
|
+
```
|
|
1131
|
+
|
|
1132
|
+
**Ensure a worker processes the autoscaler queue:**
|
|
1133
|
+
|
|
1134
|
+
```yaml
|
|
1135
|
+
# config/solid_queue.yml
|
|
1136
|
+
workers:
|
|
1137
|
+
- queues: [autoscaler] # Must include autoscaler queue
|
|
1138
|
+
threads: 1
|
|
1139
|
+
```
|
|
1140
|
+
|
|
1141
|
+
**Test manual enqueue:**
|
|
1142
|
+
|
|
1143
|
+
```ruby
|
|
1144
|
+
SolidQueueAutoscaler::AutoscaleJob.perform_later
|
|
1145
|
+
```
|
|
1146
|
+
|
|
1147
|
+
### Multi-worker configuration issues
|
|
1148
|
+
|
|
1149
|
+
**"Unknown worker: :my_worker":**
|
|
1150
|
+
|
|
1151
|
+
Ensure you've configured the worker before referencing it:
|
|
1152
|
+
|
|
1153
|
+
```ruby
|
|
1154
|
+
# Configure the worker first
|
|
1155
|
+
SolidQueueAutoscaler.configure(:my_worker) do |config|
|
|
1156
|
+
config.heroku_api_key = ENV['HEROKU_API_KEY']
|
|
1157
|
+
config.heroku_app_name = ENV['HEROKU_APP_NAME']
|
|
1158
|
+
config.process_type = 'my_worker'
|
|
1159
|
+
end
|
|
1160
|
+
|
|
1161
|
+
# Then reference it
|
|
1162
|
+
SolidQueueAutoscaler.scale!(:my_worker)
|
|
1163
|
+
```
|
|
1164
|
+
|
|
1165
|
+
**List all registered workers:**
|
|
1166
|
+
|
|
1167
|
+
```ruby
|
|
1168
|
+
SolidQueueAutoscaler.registered_workers
|
|
1169
|
+
# => [:default, :critical_worker, :batch_worker]
|
|
1170
|
+
```
|
|
1171
|
+
|
|
1172
|
+
### Database/Migration issues
|
|
1173
|
+
|
|
1174
|
+
**"relation 'solid_queue_autoscaler_state' does not exist":**
|
|
1175
|
+
|
|
1176
|
+
```bash
|
|
1177
|
+
rails generate solid_queue_autoscaler:migration
|
|
1178
|
+
rails db:migrate
|
|
1179
|
+
```
|
|
1180
|
+
|
|
1181
|
+
**"relation 'solid_queue_ready_executions' does not exist":**
|
|
1182
|
+
|
|
1183
|
+
Solid Queue tables are missing. Run Solid Queue migrations:
|
|
1184
|
+
|
|
1185
|
+
```bash
|
|
1186
|
+
rails solid_queue:install:migrations
|
|
1187
|
+
rails db:migrate
|
|
1188
|
+
```
|
|
1189
|
+
|
|
1190
|
+
**Multi-database setup (Solid Queue on separate database):**
|
|
1191
|
+
|
|
1192
|
+
The autoscaler automatically detects `SolidQueue::Record.connection`. If auto-detection fails:
|
|
1193
|
+
|
|
1194
|
+
```ruby
|
|
1195
|
+
SolidQueueAutoscaler.configure do |config|
|
|
1196
|
+
config.database_connection = SolidQueue::Record.connection
|
|
1197
|
+
end
|
|
1198
|
+
```
|
|
1199
|
+
|
|
1200
|
+
### Dashboard not loading
|
|
1201
|
+
|
|
1202
|
+
**404 when visiting /autoscaler:**
|
|
1203
|
+
|
|
1204
|
+
Ensure the engine is mounted in `config/routes.rb`:
|
|
1205
|
+
|
|
1206
|
+
```ruby
|
|
1207
|
+
mount SolidQueueAutoscaler::Dashboard::Engine => "/autoscaler"
|
|
1208
|
+
```
|
|
1209
|
+
|
|
1210
|
+
**"ActionView::MissingTemplate" errors:**
|
|
1211
|
+
|
|
1212
|
+
Run the dashboard generator:
|
|
1213
|
+
|
|
1214
|
+
```bash
|
|
1215
|
+
rails generate solid_queue_autoscaler:dashboard
|
|
1216
|
+
rails db:migrate
|
|
1217
|
+
```
|
|
1218
|
+
|
|
1219
|
+
### Wrong process type being scaled
|
|
1220
|
+
|
|
1221
|
+
```ruby
|
|
1222
|
+
# Check what process type is configured
|
|
1223
|
+
puts SolidQueueAutoscaler.config.process_type
|
|
1224
|
+
|
|
1225
|
+
# Verify it matches your Procfile
|
|
1226
|
+
# Procfile:
|
|
1227
|
+
# web: bundle exec puma -C config/puma.rb
|
|
1228
|
+
# worker: bundle exec rake solid_queue:start # <- This is "worker"
|
|
1229
|
+
```
|
|
1230
|
+
|
|
1231
|
+
### Scaling too aggressively or too slowly
|
|
1232
|
+
|
|
1233
|
+
**Scaling up too often (flapping):**
|
|
1234
|
+
|
|
1235
|
+
```ruby
|
|
1236
|
+
config.cooldown_seconds = 180 # Increase cooldown
|
|
1237
|
+
config.scale_up_cooldown_seconds = 120 # Or set scale-up specific cooldown
|
|
1238
|
+
config.scale_up_queue_depth = 200 # Increase threshold
|
|
1239
|
+
```
|
|
1240
|
+
|
|
1241
|
+
**Not scaling up fast enough:**
|
|
1242
|
+
|
|
1243
|
+
```ruby
|
|
1244
|
+
config.scale_up_queue_depth = 50 # Lower threshold
|
|
1245
|
+
config.scale_up_latency_seconds = 120 # Trigger on 2 min latency
|
|
1246
|
+
config.cooldown_seconds = 60 # Reduce cooldown
|
|
1247
|
+
config.scaling_strategy = :proportional # Scale based on load, not fixed increment
|
|
1248
|
+
config.scale_up_jobs_per_worker = 25 # More workers per jobs over threshold
|
|
1249
|
+
```
|
|
1250
|
+
|
|
1251
|
+
**Not scaling down:**
|
|
1252
|
+
|
|
1253
|
+
```ruby
|
|
1254
|
+
config.scale_down_queue_depth = 5 # More aggressive scale-down threshold
|
|
1255
|
+
config.scale_down_latency_seconds = 10 # Tighter latency requirement
|
|
1256
|
+
config.min_workers = 0 # Allow scaling to zero (if appropriate)
|
|
1257
|
+
```
|
|
1258
|
+
|
|
1259
|
+
### Debugging tips
|
|
1260
|
+
|
|
1261
|
+
**Enable debug logging:**
|
|
1262
|
+
|
|
1263
|
+
```ruby
|
|
1264
|
+
SolidQueueAutoscaler.configure do |config|
|
|
1265
|
+
config.logger = Logger.new(STDOUT)
|
|
1266
|
+
config.logger.level = Logger::DEBUG
|
|
1267
|
+
end
|
|
1268
|
+
```
|
|
1269
|
+
|
|
1270
|
+
**Simulate a scaling decision without making changes:**
|
|
1271
|
+
|
|
1272
|
+
```ruby
|
|
1273
|
+
metrics = SolidQueueAutoscaler.metrics
|
|
1274
|
+
workers = SolidQueueAutoscaler.current_workers
|
|
1275
|
+
engine = SolidQueueAutoscaler::DecisionEngine.new(config: SolidQueueAutoscaler.config)
|
|
1276
|
+
decision = engine.decide(metrics: metrics, current_workers: workers)
|
|
1277
|
+
|
|
1278
|
+
puts "Action: #{decision.action}" # :scale_up, :scale_down, or :no_change
|
|
1279
|
+
puts "From: #{decision.from} -> To: #{decision.to}"
|
|
1280
|
+
puts "Reason: #{decision.reason}"
|
|
1281
|
+
```
|
|
1282
|
+
|
|
1283
|
+
**Run diagnostics:**
|
|
1284
|
+
|
|
1285
|
+
```bash
|
|
1286
|
+
bundle exec rake solid_queue_autoscaler:metrics
|
|
1287
|
+
bundle exec rake solid_queue_autoscaler:formation
|
|
1288
|
+
bundle exec rake solid_queue_autoscaler:cooldown
|
|
1289
|
+
```
|
|
1290
|
+
|
|
1291
|
+
For more detailed troubleshooting, see [docs/troubleshooting.md](docs/troubleshooting.md).
|
|
1292
|
+
|
|
527
1293
|
## Architecture Notes
|
|
528
1294
|
|
|
529
1295
|
This gem acts as a **control plane** for Solid Queue:
|