solid_queue 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +239 -41
- data/Rakefile +27 -5
- data/app/models/solid_queue/blocked_execution.rb +1 -1
- data/app/models/solid_queue/claimed_execution.rb +10 -3
- data/app/models/solid_queue/job/clearable.rb +2 -1
- data/app/models/solid_queue/job/concurrency_controls.rb +12 -0
- data/app/models/solid_queue/job/executable.rb +1 -1
- data/app/models/solid_queue/job.rb +3 -2
- data/app/models/solid_queue/recurring_task.rb +14 -1
- data/app/models/solid_queue/scheduled_execution.rb +1 -1
- data/lib/active_job/concurrency_controls.rb +4 -1
- data/lib/active_job/queue_adapters/solid_queue_adapter.rb +4 -1
- data/lib/generators/solid_queue/install/templates/config/recurring.yml +7 -2
- data/lib/solid_queue/app_executor.rb +1 -1
- data/lib/solid_queue/cli.rb +2 -1
- data/lib/solid_queue/configuration.rb +57 -7
- data/lib/solid_queue/dispatcher.rb +10 -11
- data/lib/solid_queue/lifecycle_hooks.rb +11 -2
- data/lib/solid_queue/log_subscriber.rb +2 -1
- data/lib/solid_queue/pool.rb +3 -7
- data/lib/solid_queue/processes/base.rb +2 -1
- data/lib/solid_queue/processes/interruptible.rb +21 -10
- data/lib/solid_queue/processes/poller.rb +4 -4
- data/lib/solid_queue/processes/process_pruned_error.rb +1 -1
- data/lib/solid_queue/processes/registrable.rb +1 -2
- data/lib/solid_queue/scheduler.rb +5 -1
- data/lib/solid_queue/supervisor.rb +8 -3
- data/lib/solid_queue/version.rb +1 -1
- data/lib/solid_queue/worker.rb +6 -3
- data/lib/solid_queue.rb +12 -6
- metadata +35 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9066bd5266e43075385bfd3365de2512400960f5cfa7f780dba69e4a3259c07a
|
4
|
+
data.tar.gz: 0a7103f485e445563814874e3113b6ac6dca84c8333bad418c449ebaf3fac1c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 952b71b5cd59ebd79eb51c44f7ed509bf3d4959c010dc0441cff37c0a1bd2ccea97054007bd3a197b287a158342e8791318c841f0d1d9b3dd347986da68bb53a
|
7
|
+
data.tar.gz: 1309ce242499f430667d9677b7ff8807e1b43af33873d9b00575c4c9e13be24824d9520ca24db432901da2616c4ba5a530db83fd29c09c47b820fff3586a75b7
|
data/README.md
CHANGED
@@ -6,6 +6,34 @@ Besides regular job enqueuing and processing, Solid Queue supports delayed jobs,
|
|
6
6
|
|
7
7
|
Solid Queue can be used with SQL databases such as MySQL, PostgreSQL or SQLite, and it leverages the `FOR UPDATE SKIP LOCKED` clause, if available, to avoid blocking and waiting on locks when polling jobs. It relies on Active Job for retries, discarding, error handling, serialization, or delays, and it's compatible with Ruby on Rails's multi-threading.
|
8
8
|
|
9
|
+
## Table of contents
|
10
|
+
|
11
|
+
- [Installation](#installation)
|
12
|
+
- [Usage in development and other non-production environments](#usage-in-development-and-other-non-production-environments)
|
13
|
+
- [Single database configuration](#single-database-configuration)
|
14
|
+
- [Dashboard UI Setup](#dashboard-ui-setup)
|
15
|
+
- [Incremental adoption](#incremental-adoption)
|
16
|
+
- [High performance requirements](#high-performance-requirements)
|
17
|
+
- [Configuration](#configuration)
|
18
|
+
- [Workers, dispatchers and scheduler](#workers-dispatchers-and-scheduler)
|
19
|
+
- [Queue order and priorities](#queue-order-and-priorities)
|
20
|
+
- [Queues specification and performance](#queues-specification-and-performance)
|
21
|
+
- [Threads, processes and signals](#threads-processes-and-signals)
|
22
|
+
- [Database configuration](#database-configuration)
|
23
|
+
- [Other configuration settings](#other-configuration-settings)
|
24
|
+
- [Lifecycle hooks](#lifecycle-hooks)
|
25
|
+
- [Errors when enqueuing](#errors-when-enqueuing)
|
26
|
+
- [Concurrency controls](#concurrency-controls)
|
27
|
+
- [Performance considerations](#performance-considerations)
|
28
|
+
- [Failed jobs and retries](#failed-jobs-and-retries)
|
29
|
+
- [Error reporting on jobs](#error-reporting-on-jobs)
|
30
|
+
- [Puma plugin](#puma-plugin)
|
31
|
+
- [Jobs and transactional integrity](#jobs-and-transactional-integrity)
|
32
|
+
- [Recurring tasks](#recurring-tasks)
|
33
|
+
- [Inspiration](#inspiration)
|
34
|
+
- [License](#license)
|
35
|
+
|
36
|
+
|
9
37
|
## Installation
|
10
38
|
|
11
39
|
Solid Queue is configured by default in new Rails 8 applications. But if you're running an earlier version, you can add it manually following these steps:
|
@@ -13,6 +41,8 @@ Solid Queue is configured by default in new Rails 8 applications. But if you're
|
|
13
41
|
1. `bundle add solid_queue`
|
14
42
|
2. `bin/rails solid_queue:install`
|
15
43
|
|
44
|
+
(Note: The minimum supported version of Rails is 7.1 and Ruby is 3.1.6.)
|
45
|
+
|
16
46
|
This will configure Solid Queue as the production Active Job backend, create the configuration files `config/queue.yml` and `config/recurring.yml`, and create the `db/queue_schema.rb`. It'll also create a `bin/jobs` executable wrapper that you can use to start Solid Queue.
|
17
47
|
|
18
48
|
Once you've done that, you will then have to add the configuration for the queue database in `config/database.yml`. If you're using SQLite, it'll look like this:
|
@@ -43,8 +73,6 @@ production:
|
|
43
73
|
migrations_paths: db/queue_migrate
|
44
74
|
```
|
45
75
|
|
46
|
-
Note: Calling `bin/rails solid_queue:install` will automatically add `config.solid_queue.connects_to = { database: { writing: :queue } }` to `config/environments/production.rb`, so no additional configuration is needed there (although you must make sure that you use the `queue` name in `database.yml` for this to match!). But if you want to use Solid Queue in a different environment (like staging or even development), you'll have to manually add that `config.solid_queue.connects_to` line to the respective environment file. And, as always, make sure that the name you're using for the database in `config/database.yml` matches the name you use in `config.solid_queue.connects_to`.
|
47
|
-
|
48
76
|
Then run `db:prepare` in production to ensure the database is created and the schema is loaded.
|
49
77
|
|
50
78
|
Now you're ready to start processing jobs by running `bin/jobs` on the server that's doing the work. This will start processing jobs in all queues using the default configuration. See [below](#configuration) to learn more about configuring Solid Queue.
|
@@ -53,6 +81,72 @@ For small projects, you can run Solid Queue on the same machine as your webserve
|
|
53
81
|
|
54
82
|
**Note**: future changes to the schema will come in the form of regular migrations.
|
55
83
|
|
84
|
+
### Usage in development and other non-production environments
|
85
|
+
|
86
|
+
Calling `bin/rails solid_queue:install` will automatically add `config.solid_queue.connects_to = { database: { writing: :queue } }` to `config/environments/production.rb`. In order to use Solid Queue in other environments (such as development or staging), you'll need to add a similar configuration(s).
|
87
|
+
|
88
|
+
For example, if you're using SQLite in development, update `database.yml` as follows:
|
89
|
+
|
90
|
+
```diff
|
91
|
+
development:
|
92
|
+
+ primary:
|
93
|
+
<<: *default
|
94
|
+
database: storage/development.sqlite3
|
95
|
+
+ queue:
|
96
|
+
+ <<: *default
|
97
|
+
+ database: storage/development_queue.sqlite3
|
98
|
+
+ migrations_paths: db/queue_migrate
|
99
|
+
```
|
100
|
+
|
101
|
+
Next, add the following to `development.rb`
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
# Use Solid Queue in Development.
|
105
|
+
config.active_job.queue_adapter = :solid_queue
|
106
|
+
config.solid_queue.connects_to = { database: { writing: :queue } }
|
107
|
+
```
|
108
|
+
|
109
|
+
Once you've added this, run `db:prepare` to create the Solid Queue database and load the schema.
|
110
|
+
|
111
|
+
Finally, in order for jobs to be processed, you'll need to have Solid Queue running. In Development, this can be done via [the Puma plugin](#puma-plugin) as well. In `puma.rb` update the following line:
|
112
|
+
|
113
|
+
```ruby
|
114
|
+
# You can either set the env var, or check for development
|
115
|
+
plugin :solid_queue if ENV["SOLID_QUEUE_IN_PUMA"] || Rails.env.development?
|
116
|
+
```
|
117
|
+
|
118
|
+
You can also just use `bin/jobs`, but in this case you might want to [set a different logger for Solid Queue](#other-configuration-settings) because the default logger will log to `log/development.log` and you won't see anything when you run `bin/jobs`. For example:
|
119
|
+
```ruby
|
120
|
+
config.solid_queue.logger = ActiveSupport::Logger.new(STDOUT)
|
121
|
+
```
|
122
|
+
|
123
|
+
**Note about Action Cable**: If you use Action Cable (or anything dependent on Action Cable, such as Turbo Streams), you will also need to update it to use a database.
|
124
|
+
|
125
|
+
In `config/cable.yml`
|
126
|
+
|
127
|
+
```diff
|
128
|
+
development:
|
129
|
+
- adapter: async
|
130
|
+
+ adapter: solid_cable
|
131
|
+
+ connects_to:
|
132
|
+
+ database:
|
133
|
+
+ writing: cable
|
134
|
+
+ polling_interval: 0.1.seconds
|
135
|
+
+ message_retention: 1.day
|
136
|
+
```
|
137
|
+
|
138
|
+
In `config/database.yml`
|
139
|
+
|
140
|
+
```diff
|
141
|
+
development:
|
142
|
+
primary:
|
143
|
+
<<: *default
|
144
|
+
database: storage/development.sqlite3
|
145
|
+
+ cable:
|
146
|
+
+ <<: *default
|
147
|
+
+ database: storage/development_cable.sqlite3
|
148
|
+
+ migrations_paths: db/cable_migrate
|
149
|
+
```
|
56
150
|
|
57
151
|
### Single database configuration
|
58
152
|
|
@@ -64,7 +158,11 @@ Running Solid Queue in a separate database is recommended, but it's also possibl
|
|
64
158
|
|
65
159
|
You won't have multiple databases, so `database.yml` doesn't need to have primary and queue database.
|
66
160
|
|
67
|
-
|
161
|
+
### Dashboard ui setup
|
162
|
+
|
163
|
+
For viewing information about your jobs via a UI, we recommend taking a look at [mission_control-jobs](https://github.com/rails/mission_control-jobs), a dashboard where, among other things, you can examine and retry/discard failed jobs.
|
164
|
+
|
165
|
+
### Incremental adoption
|
68
166
|
|
69
167
|
If you're planning to adopt Solid Queue incrementally by switching one job at the time, you can do so by leaving the `config.active_job.queue_adapter` set to your old backend, and then set the `queue_adapter` directly in the jobs you're moving:
|
70
168
|
|
@@ -77,7 +175,7 @@ class MyJob < ApplicationJob
|
|
77
175
|
end
|
78
176
|
```
|
79
177
|
|
80
|
-
|
178
|
+
### High performance requirements
|
81
179
|
|
82
180
|
Solid Queue was designed for the highest throughput when used with MySQL 8+ or PostgreSQL 9.5+, as they support `FOR UPDATE SKIP LOCKED`. You can use it with older versions, but in that case, you might run into lock waits if you run multiple workers for the same queue. You can also use it with SQLite on smaller applications.
|
83
181
|
|
@@ -86,6 +184,7 @@ Solid Queue was designed for the highest throughput when used with MySQL 8+ or P
|
|
86
184
|
### Workers, dispatchers and scheduler
|
87
185
|
|
88
186
|
We have several types of actors in Solid Queue:
|
187
|
+
|
89
188
|
- _Workers_ are in charge of picking jobs ready to run from queues and processing them. They work off the `solid_queue_ready_executions` table.
|
90
189
|
- _Dispatchers_ are in charge of selecting jobs scheduled to run in the future that are due and _dispatching_ them, which is simply moving them from the `solid_queue_scheduled_executions` table over to the `solid_queue_ready_executions` table so that workers can pick them up. On top of that, they do some maintenance work related to [concurrency controls](#concurrency-controls).
|
91
190
|
- The _scheduler_ manages [recurring tasks](#recurring-tasks), enqueuing jobs for them when they're due.
|
@@ -99,6 +198,7 @@ By default, Solid Queue will try to find your configuration under `config/queue.
|
|
99
198
|
bin/jobs -c config/calendar.yml
|
100
199
|
```
|
101
200
|
|
201
|
+
You can also skip all recurring tasks by setting the environment variable `SOLID_QUEUE_SKIP_RECURRING=true`. This is useful for environments like staging, review apps, or development where you don't want any recurring jobs to run. This is equivalent to using the `--skip-recurring` option with `bin/jobs`.
|
102
202
|
|
103
203
|
This is what this configuration looks like:
|
104
204
|
|
@@ -153,6 +253,7 @@ Here's an overview of the different options:
|
|
153
253
|
Check the sections below on [how queue order behaves combined with priorities](#queue-order-and-priorities), and [how the way you specify the queues per worker might affect performance](#queues-specification-and-performance).
|
154
254
|
|
155
255
|
- `threads`: this is the max size of the thread pool that each worker will have to run jobs. Each worker will fetch this number of jobs from their queue(s), at most and will post them to the thread pool to be run. By default, this is `3`. Only workers have this setting.
|
256
|
+
It is recommended to set this value less than or equal to the queue database's connection pool size minus 2, as each worker thread uses one connection, and two additional connections are reserved for polling and heartbeat.
|
156
257
|
- `processes`: this is the number of worker processes that will be forked by the supervisor with the settings given. By default, this is `1`, just a single process. This setting is useful if you want to dedicate more than one CPU core to a queue or queues with the same configuration. Only workers have this setting.
|
157
258
|
- `concurrency_maintenance`: whether the dispatcher will perform the concurrency maintenance work. This is `true` by default, and it's useful if you don't use any [concurrency controls](#concurrency-controls) and want to disable it or if you run multiple dispatchers and want some of them to just dispatch jobs without doing anything else.
|
158
259
|
|
@@ -220,7 +321,7 @@ and then remove the paused ones. Pausing in general should be something rare, us
|
|
220
321
|
Do this:
|
221
322
|
|
222
323
|
```yml
|
223
|
-
queues: background, backend
|
324
|
+
queues: [ background, backend ]
|
224
325
|
```
|
225
326
|
|
226
327
|
instead of this:
|
@@ -250,33 +351,6 @@ You can configure the database used by Solid Queue via the `config.solid_queue.c
|
|
250
351
|
|
251
352
|
All the options available to Active Record for multiple databases can be used here.
|
252
353
|
|
253
|
-
## Lifecycle hooks
|
254
|
-
|
255
|
-
In Solid queue, you can hook into two different points in the supervisor's life:
|
256
|
-
- `start`: after the supervisor has finished booting and right before it forks workers and dispatchers.
|
257
|
-
- `stop`: after receiving a signal (`TERM`, `INT` or `QUIT`) and right before starting graceful or immediate shutdown.
|
258
|
-
|
259
|
-
And into two different points in a worker's life:
|
260
|
-
- `worker_start`: after the worker has finished booting and right before it starts the polling loop.
|
261
|
-
- `worker_stop`: after receiving a signal (`TERM`, `INT` or `QUIT`) and right before starting graceful or immediate shutdown (which is just `exit!`).
|
262
|
-
|
263
|
-
You can use the following methods with a block to do this:
|
264
|
-
```ruby
|
265
|
-
SolidQueue.on_start
|
266
|
-
SolidQueue.on_stop
|
267
|
-
|
268
|
-
SolidQueue.on_worker_start
|
269
|
-
SolidQueue.on_worker_stop
|
270
|
-
```
|
271
|
-
|
272
|
-
For example:
|
273
|
-
```ruby
|
274
|
-
SolidQueue.on_start { start_metrics_server }
|
275
|
-
SolidQueue.on_stop { stop_metrics_server }
|
276
|
-
```
|
277
|
-
|
278
|
-
These can be called several times to add multiple hooks, but it needs to happen before Solid Queue is started. An initializer would be a good place to do this.
|
279
|
-
|
280
354
|
### Other configuration settings
|
281
355
|
|
282
356
|
_Note_: The settings in this section should be set in your `config/application.rb` or your environment config like this: `config.solid_queue.silence_polling = true`
|
@@ -299,9 +373,58 @@ There are several settings that control how Solid Queue works that you can set a
|
|
299
373
|
- `silence_polling`: whether to silence Active Record logs emitted when polling for both workers and dispatchers—defaults to `true`.
|
300
374
|
- `supervisor_pidfile`: path to a pidfile that the supervisor will create when booting to prevent running more than one supervisor in the same host, or in case you want to use it for a health check. It's `nil` by default.
|
301
375
|
- `preserve_finished_jobs`: whether to keep finished jobs in the `solid_queue_jobs` table—defaults to `true`.
|
302
|
-
- `clear_finished_jobs_after`: period to keep finished jobs around, in case `preserve_finished_jobs` is true—defaults to 1 day.
|
376
|
+
- `clear_finished_jobs_after`: period to keep finished jobs around, in case `preserve_finished_jobs` is true — defaults to 1 day. When installing Solid Queue, [a recurring job](#recurring-tasks) is automatically configured to clear finished jobs every hour on the 12th minute in batches. You can edit the `recurring.yml` configuration to change this as you see fit.
|
303
377
|
- `default_concurrency_control_period`: the value to be used as the default for the `duration` parameter in [concurrency controls](#concurrency-controls). It defaults to 3 minutes.
|
304
378
|
|
379
|
+
|
380
|
+
## Lifecycle hooks
|
381
|
+
|
382
|
+
In Solid queue, you can hook into two different points in the supervisor's life:
|
383
|
+
- `start`: after the supervisor has finished booting and right before it forks workers and dispatchers.
|
384
|
+
- `stop`: after receiving a signal (`TERM`, `INT` or `QUIT`) and right before starting graceful or immediate shutdown.
|
385
|
+
|
386
|
+
And into two different points in the worker's, dispatcher's and scheduler's life:
|
387
|
+
- `(worker|dispatcher|scheduler)_start`: after the worker/dispatcher/scheduler has finished booting and right before it starts the polling loop or loading the recurring schedule.
|
388
|
+
- `(worker|dispatcher|scheduler)_stop`: after receiving a signal (`TERM`, `INT` or `QUIT`) and right before starting graceful or immediate shutdown (which is just `exit!`).
|
389
|
+
|
390
|
+
Each of these hooks has an instance of the supervisor/worker/dispatcher/scheduler yielded to the block so that you may read its configuration for logging or metrics reporting purposes.
|
391
|
+
|
392
|
+
You can use the following methods with a block to do this:
|
393
|
+
```ruby
|
394
|
+
SolidQueue.on_start
|
395
|
+
SolidQueue.on_stop
|
396
|
+
|
397
|
+
SolidQueue.on_worker_start
|
398
|
+
SolidQueue.on_worker_stop
|
399
|
+
|
400
|
+
SolidQueue.on_dispatcher_start
|
401
|
+
SolidQueue.on_dispatcher_stop
|
402
|
+
|
403
|
+
SolidQueue.on_scheduler_start
|
404
|
+
SolidQueue.on_scheduler_stop
|
405
|
+
```
|
406
|
+
|
407
|
+
For example:
|
408
|
+
```ruby
|
409
|
+
SolidQueue.on_start do |supervisor|
|
410
|
+
MyMetricsReporter.process_name = supervisor.name
|
411
|
+
|
412
|
+
start_metrics_server
|
413
|
+
end
|
414
|
+
|
415
|
+
SolidQueue.on_stop do |_supervisor|
|
416
|
+
stop_metrics_server
|
417
|
+
end
|
418
|
+
|
419
|
+
SolidQueue.on_worker_start do |worker|
|
420
|
+
MyMetricsReporter.process_name = worker.name
|
421
|
+
MyMetricsReporter.queues = worker.queues.join(',')
|
422
|
+
end
|
423
|
+
```
|
424
|
+
|
425
|
+
These can be called several times to add multiple hooks, but it needs to happen before Solid Queue is started. An initializer would be a good place to do this.
|
426
|
+
|
427
|
+
|
305
428
|
## Errors when enqueuing
|
306
429
|
|
307
430
|
Solid Queue will raise a `SolidQueue::Job::EnqueueError` for any Active Record errors that happen when enqueuing a job. The reason for not raising `ActiveJob::EnqueueError` is that this one gets handled by Active Job, causing `perform_later` to return `false` and set `job.enqueue_error`, yielding the job to a block that you need to pass to `perform_later`. This works very well for your own jobs, but makes failure very hard to handle for jobs enqueued by Rails or other gems, such as `Turbo::Streams::BroadcastJob` or `ActiveStorage::AnalyzeJob`, because you don't control the call to `perform_later` in that cases.
|
@@ -310,11 +433,13 @@ In the case of recurring tasks, if such error is raised when enqueuing the job c
|
|
310
433
|
|
311
434
|
## Concurrency controls
|
312
435
|
|
313
|
-
Solid Queue extends Active Job with concurrency controls, that allows you to limit how many jobs of a certain type or with certain arguments can run at the same time. When limited in this way, jobs will be blocked from running
|
436
|
+
Solid Queue extends Active Job with concurrency controls, that allows you to limit how many jobs of a certain type or with certain arguments can run at the same time. When limited in this way, **by default, jobs will be blocked from running**, and they'll stay blocked until another job finishes and unblocks them, or after the set expiry time (concurrency limit's _duration_) elapses.
|
437
|
+
|
438
|
+
**Alternatively, jobs can be configured to be discarded instead of blocked**. This means that if a job with certain arguments has already been enqueued, other jobs with the same characteristics (in the same concurrency _class_) won't be enqueued.
|
314
439
|
|
315
440
|
```ruby
|
316
441
|
class MyJob < ApplicationJob
|
317
|
-
limits_concurrency to: max_concurrent_executions, key: ->(arg1, arg2, **) { ... }, duration: max_interval_to_guarantee_concurrency_limit, group: concurrency_group
|
442
|
+
limits_concurrency to: max_concurrent_executions, key: ->(arg1, arg2, **) { ... }, duration: max_interval_to_guarantee_concurrency_limit, group: concurrency_group, on_conflict: on_conflict_behaviour
|
318
443
|
|
319
444
|
# ...
|
320
445
|
```
|
@@ -322,10 +447,19 @@ class MyJob < ApplicationJob
|
|
322
447
|
- `to` is `1` by default.
|
323
448
|
- `duration` is set to `SolidQueue.default_concurrency_control_period` by default, which itself defaults to `3 minutes`, but that you can configure as well.
|
324
449
|
- `group` is used to control the concurrency of different job classes together. It defaults to the job class name.
|
450
|
+
- `on_conflict` controls behaviour when enqueuing a job that conflicts with the concurrency limits configured. It can be set to one of the following:
|
451
|
+
- (default) `:block`: the job is blocked and is dispatched when another job completes and unblocks it, or when the duration expires.
|
452
|
+
- `:discard`: the job is discarded. When you choose this option, bear in mind that if a job runs and fails to remove the concurrency lock (or _semaphore_, read below to know more about this), all jobs conflicting with it will be discarded up to the interval defined by `duration` has elapsed.
|
325
453
|
|
326
454
|
When a job includes these controls, we'll ensure that, at most, the number of jobs (indicated as `to`) that yield the same `key` will be performed concurrently, and this guarantee will last for `duration` for each job enqueued. Note that there's no guarantee about _the order of execution_, only about jobs being performed at the same time (overlapping).
|
327
455
|
|
328
|
-
The concurrency limits use the concept of semaphores when enqueuing, and work as follows: when a job is enqueued, we check if it specifies concurrency controls. If it does, we check the semaphore for the computed concurrency key. If the semaphore is open, we claim it and we set the job as _ready_. Ready means it can be picked up by workers for execution. When the job finishes executing (be it successfully or unsuccessfully, resulting in a failed execution), we signal the semaphore and try to unblock the next job with the same key, if any. Unblocking the next job doesn't mean running that job right away, but moving it from _blocked_ to _ready_.
|
456
|
+
The concurrency limits use the concept of semaphores when enqueuing, and work as follows: when a job is enqueued, we check if it specifies concurrency controls. If it does, we check the semaphore for the computed concurrency key. If the semaphore is open, we claim it and we set the job as _ready_. Ready means it can be picked up by workers for execution. When the job finishes executing (be it successfully or unsuccessfully, resulting in a failed execution), we signal the semaphore and try to unblock the next job with the same key, if any. Unblocking the next job doesn't mean running that job right away, but moving it from _blocked_ to _ready_. If you're using the `discard` behaviour for `on_conflict`, jobs enqueued while the semaphore is closed will be discarded.
|
457
|
+
|
458
|
+
Since something can happen that prevents the first job from releasing the semaphore and unblocking the next job (for example, someone pulling a plug in the machine where the worker is running), we have the `duration` as a failsafe. Jobs that have been blocked for more than `duration` are candidates to be released, but only as many of them as the concurrency rules allow, as each one would need to go through the semaphore dance check. This means that the `duration` is not really about the job that's enqueued or being run, it's about the jobs that are blocked waiting, or about the jobs that would get discarded while the semaphore is closed.
|
459
|
+
|
460
|
+
It's important to note that after one or more candidate jobs are unblocked (either because a job finishes or because `duration` expires and a semaphore is released), the `duration` timer for the still blocked jobs is reset. This happens indirectly via the expiration time of the semaphore, which is updated.
|
461
|
+
|
462
|
+
When using `discard` as the behaviour to handle conflicts, you might have jobs discarded for up to the `duration` interval if something happens and a running job fails to release the semaphore.
|
329
463
|
|
330
464
|
|
331
465
|
For example:
|
@@ -358,12 +492,63 @@ class Bundle::RebundlePostingsJob < ApplicationJob
|
|
358
492
|
|
359
493
|
In this case, if we have a `Box::MovePostingsByContactToDesignatedBoxJob` job enqueued for a contact record with id `123` and another `Bundle::RebundlePostingsJob` job enqueued simultaneously for a bundle record that references contact `123`, only one of them will be allowed to proceed. The other one will stay blocked until the first one finishes (or 15 minutes pass, whatever happens first).
|
360
494
|
|
361
|
-
Note that the `duration` setting depends indirectly on the value for `concurrency_maintenance_interval` that you set for your dispatcher(s), as that'd be the frequency with which blocked jobs are checked and unblocked. In general, you should set `duration` in a way that all your jobs would finish well under that duration and think of the concurrency maintenance task as a failsafe in case something goes wrong.
|
495
|
+
Note that the `duration` setting depends indirectly on the value for `concurrency_maintenance_interval` that you set for your dispatcher(s), as that'd be the frequency with which blocked jobs are checked and unblocked (at which point, only one job per concurrency key, at most, is unblocked). In general, you should set `duration` in a way that all your jobs would finish well under that duration and think of the concurrency maintenance task as a failsafe in case something goes wrong.
|
362
496
|
|
363
|
-
Jobs are unblocked in order of priority but queue order is not taken into account for unblocking jobs
|
497
|
+
Jobs are unblocked in order of priority but **queue order is not taken into account for unblocking jobs**. That means that if you have a group of jobs that share a concurrency group but are in different queues, or jobs of the same class that you enqueue in different queues, the queue order you set for a worker is not taken into account when unblocking blocked ones. The reason is that a job that runs unblocks the next one, and the job itself doesn't know about a particular worker's queue order (you could even have different workers with different queue orders), it can only know about priority. Once blocked jobs are unblocked and available for polling, they'll be picked up by a worker following its queue order.
|
364
498
|
|
365
499
|
Finally, failed jobs that are automatically or manually retried work in the same way as new jobs that get enqueued: they get in the queue for getting an open semaphore, and whenever they get it, they'll be run. It doesn't matter if they had already gotten an open semaphore in the past.
|
366
500
|
|
501
|
+
### Scheduled jobs
|
502
|
+
|
503
|
+
Jobs set to run in the future (via Active Job's `wait` or `wait_until` options) have concurrency limits enforced when they're due, not when they're scheduled. For example, consider this job:
|
504
|
+
```ruby
|
505
|
+
class DeliverAnnouncementToContactJob < ApplicationJob
|
506
|
+
limits_concurrency to: 1, key: ->(contact) { contact.account }, duration: 5.minutes
|
507
|
+
|
508
|
+
def perform(contact)
|
509
|
+
# ...
|
510
|
+
```
|
511
|
+
|
512
|
+
If several jobs are enqueued like this:
|
513
|
+
|
514
|
+
```ruby
|
515
|
+
DeliverAnnouncementToContactJob.set(wait: 10.minutes).perform_later(contact)
|
516
|
+
DeliverAnnouncementToContactJob.set(wait: 10.minutes).perform_later(contact)
|
517
|
+
DeliverAnnouncementToContactJob.set(wait: 30.minutes).perform_later(contact)
|
518
|
+
```
|
519
|
+
|
520
|
+
The 3 jobs will go into the scheduled queue and will wait there until they're due. Then, 10 minutes after, the first two jobs will be enqueued and the second one most likely will be blocked because the first one will be running first. Then, assuming the jobs are fast and finish in a few seconds, when the third job is due, it'll be enqueued normally.
|
521
|
+
|
522
|
+
Normally scheduled jobs are enqueued in batches, but with concurrency controls, jobs need to be enqueued one by one. This has an impact on performance, similarly to the impact of concurrency controls in bulk enqueuing. Read below for more details. I'd generally advise against mixing concurrency controls with waiting/scheduling in the future.
|
523
|
+
|
524
|
+
### Performance considerations
|
525
|
+
|
526
|
+
Concurrency controls introduce significant overhead (blocked executions need to be created and promoted to ready, semaphores need to be created and updated) so you should consider carefully whether you need them. For throttling purposes, where you plan to have `limit` significantly larger than 1, I'd encourage relying on a limited number of workers per queue instead. For example:
|
527
|
+
|
528
|
+
```ruby
|
529
|
+
class ThrottledJob < ApplicationJob
|
530
|
+
queue_as :throttled
|
531
|
+
```
|
532
|
+
|
533
|
+
```yml
|
534
|
+
production:
|
535
|
+
workers:
|
536
|
+
- queues: throttled
|
537
|
+
threads: 1
|
538
|
+
polling_interval: 1
|
539
|
+
- queues: default
|
540
|
+
threads: 5
|
541
|
+
polling_interval: 0.1
|
542
|
+
processes: 3
|
543
|
+
```
|
544
|
+
|
545
|
+
Or something similar to that depending on your setup. You can also assign a different queue to a job on the moment of enqueuing so you can decide whether to enqueue a job in the throttled queue or another queue depending on the arguments, or pass a block to `queue_as` as explained [here](https://guides.rubyonrails.org/active_job_basics.html#queues).
|
546
|
+
|
547
|
+
|
548
|
+
In addition, mixing concurrency controls with **bulk enqueuing** (Active Job's `perform_all_later`) is not a good idea because concurrency controlled job needs to be enqueued one by one to ensure concurrency limits are respected, so you lose all the benefits of bulk enqueuing.
|
549
|
+
|
550
|
+
When jobs that have concurrency controls and `on_conflict: :discard` are enqueued in bulk, the ones that fail to be enqueued and are discarded would have `successfully_enqueued` set to `false`. The total count of jobs enqueued returned by `perform_all_later` will exclude these jobs as expected.
|
551
|
+
|
367
552
|
## Failed jobs and retries
|
368
553
|
|
369
554
|
Solid Queue doesn't include any automatic retry mechanism, it [relies on Active Job for this](https://edgeguides.rubyonrails.org/active_job_basics.html#retrying-or-discarding-failed-jobs). Jobs that fail will be kept in the system, and a _failed execution_ (a record in the `solid_queue_failed_executions` table) will be created for these. The job will stay there until manually discarded or re-enqueued. You can do this in a console as:
|
@@ -375,8 +560,6 @@ failed_execution.retry # This will re-enqueue the job as if it was enqueued for
|
|
375
560
|
failed_execution.discard # This will delete the job from the system
|
376
561
|
```
|
377
562
|
|
378
|
-
However, we recommend taking a look at [mission_control-jobs](https://github.com/rails/mission_control-jobs), a dashboard where, among other things, you can examine and retry/discard failed jobs.
|
379
|
-
|
380
563
|
### Error reporting on jobs
|
381
564
|
|
382
565
|
Some error tracking services that integrate with Rails, such as Sentry or Rollbar, hook into [Active Job](https://guides.rubyonrails.org/active_job_basics.html#exceptions) and automatically report not handled errors that happen during job execution. However, if your error tracking system doesn't, or if you need some custom reporting, you can hook into Active Job yourself. A possible way of doing this would be:
|
@@ -412,6 +595,13 @@ plugin :solid_queue
|
|
412
595
|
```
|
413
596
|
to your `puma.rb` configuration.
|
414
597
|
|
598
|
+
If you're using Puma in development but you don't want to use Solid Queue in development, make sure you avoid the plugin being used, for example using an environment variable like this:
|
599
|
+
```ruby
|
600
|
+
plugin :solid_queue if ENV["SOLID_QUEUE_IN_PUMA"]
|
601
|
+
```
|
602
|
+
that you set in production only. This is what Rails 8's default Puma config looks like. Otherwise, if you're using Puma in development but not Solid Queue, starting Puma would start also Solid Queue supervisor and it'll most likely fail because it won't be properly configured.
|
603
|
+
|
604
|
+
**Note**: phased restarts are not supported currently because the plugin requires [app preloading](https://github.com/puma/puma?tab=readme-ov-file#cluster-mode) to work.
|
415
605
|
|
416
606
|
## Jobs and transactional integrity
|
417
607
|
:warning: Having your jobs in the same ACID-compliant database as your application data enables a powerful yet sharp tool: taking advantage of transactional integrity to ensure some action in your app is not committed unless your job is also committed and vice versa, and ensuring that your job won't be enqueued until the transaction within which you're enqueuing it is committed. This can be very powerful and useful, but it can also backfire if you base some of your logic on this behaviour, and in the future, you move to another active job backend, or if you simply move Solid Queue to its own database, and suddenly the behaviour changes under you. Because this can be quite tricky and many people shouldn't need to worry about it, by default Solid Queue is configured in a different database as the main app.
|
@@ -450,6 +640,8 @@ Solid Queue supports defining recurring tasks that run at specific times in the
|
|
450
640
|
bin/jobs --recurring_schedule_file=config/schedule.yml
|
451
641
|
```
|
452
642
|
|
643
|
+
You can completely disable recurring tasks by setting the environment variable `SOLID_QUEUE_SKIP_RECURRING=true` or by using the `--skip-recurring` option with `bin/jobs`.
|
644
|
+
|
453
645
|
The configuration itself looks like this:
|
454
646
|
|
455
647
|
```yml
|
@@ -477,9 +669,15 @@ MyJob.perform_later(42, status: "custom_status")
|
|
477
669
|
|
478
670
|
- `priority`: a numeric priority value to be used when enqueuing the job.
|
479
671
|
|
480
|
-
|
481
672
|
Tasks are enqueued at their corresponding times by the scheduler, and each task schedules the next one. This is pretty much [inspired by what GoodJob does](https://github.com/bensheldon/good_job/blob/994ecff5323bf0337e10464841128fda100750e6/lib/good_job/cron_manager.rb).
|
482
673
|
|
674
|
+
For recurring tasks defined as a `command`, you can also change the job class that runs them as follows:
|
675
|
+
```ruby
|
676
|
+
Rails.application.config.after_initialize do # or to_prepare
|
677
|
+
SolidQueue::RecurringTask.default_job_class = MyRecurringCommandJob
|
678
|
+
end
|
679
|
+
```
|
680
|
+
|
483
681
|
It's possible to run multiple schedulers with the same `recurring_tasks` configuration, for example, if you have multiple servers for redundancy, and you run the `scheduler` in more than one of them. To avoid enqueuing duplicate tasks at the same time, an entry in a new `solid_queue_recurring_executions` table is created in the same transaction as the job is enqueued. This table has a unique index on `task_key` and `run_at`, ensuring only one entry per task per time will be created. This only works if you have `preserve_finished_jobs` set to `true` (the default), and the guarantee applies as long as you keep the jobs around.
|
484
682
|
|
485
683
|
**Note**: a single recurring schedule is supported, so you can have multiple schedulers using the same schedule, but not multiple schedulers using different configurations.
|
data/Rakefile
CHANGED
@@ -8,14 +8,36 @@ load "rails/tasks/engine.rake"
|
|
8
8
|
load "rails/tasks/statistics.rake"
|
9
9
|
|
10
10
|
require "bundler/gem_tasks"
|
11
|
+
require "rake/tasklib"
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
class TestHelpers < Rake::TaskLib
|
14
|
+
def initialize(databases)
|
15
|
+
@databases = databases
|
16
|
+
define
|
17
|
+
end
|
15
18
|
|
16
|
-
|
17
|
-
|
19
|
+
def define
|
20
|
+
desc "Run tests for all databases (mysql, postgres, sqlite)"
|
21
|
+
task :test do
|
22
|
+
@databases.each { |database| run_test_for_database(database) }
|
23
|
+
end
|
24
|
+
|
25
|
+
namespace :test do
|
26
|
+
@databases.each do |database|
|
27
|
+
desc "Run tests for #{database} database"
|
28
|
+
task database do
|
29
|
+
run_test_for_database(database)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def run_test_for_database(database)
|
18
38
|
sh("TARGET_DB=#{database} bin/setup")
|
19
39
|
sh("TARGET_DB=#{database} bin/rails test")
|
20
40
|
end
|
21
41
|
end
|
42
|
+
|
43
|
+
TestHelpers.new(%w[ mysql postgres sqlite ])
|
@@ -12,7 +12,7 @@ module SolidQueue
|
|
12
12
|
class << self
|
13
13
|
def unblock(limit)
|
14
14
|
SolidQueue.instrument(:release_many_blocked, limit: limit) do |payload|
|
15
|
-
expired.distinct.limit(limit).pluck(:concurrency_key).then do |concurrency_keys|
|
15
|
+
expired.order(:concurrency_key).distinct.limit(limit).pluck(:concurrency_key).then do |concurrency_keys|
|
16
16
|
payload[:size] = release_many releasable(concurrency_keys)
|
17
17
|
end
|
18
18
|
end
|
@@ -39,7 +39,10 @@ class SolidQueue::ClaimedExecution < SolidQueue::Execution
|
|
39
39
|
def fail_all_with(error)
|
40
40
|
SolidQueue.instrument(:fail_many_claimed) do |payload|
|
41
41
|
includes(:job).tap do |executions|
|
42
|
-
executions.each
|
42
|
+
executions.each do |execution|
|
43
|
+
execution.failed_with(error)
|
44
|
+
execution.unblock_next_job
|
45
|
+
end
|
43
46
|
|
44
47
|
payload[:process_ids] = executions.map(&:process_id).uniq
|
45
48
|
payload[:job_ids] = executions.map(&:job_id).uniq
|
@@ -67,7 +70,7 @@ class SolidQueue::ClaimedExecution < SolidQueue::Execution
|
|
67
70
|
raise result.error
|
68
71
|
end
|
69
72
|
ensure
|
70
|
-
|
73
|
+
unblock_next_job
|
71
74
|
end
|
72
75
|
|
73
76
|
def release
|
@@ -90,9 +93,13 @@ class SolidQueue::ClaimedExecution < SolidQueue::Execution
|
|
90
93
|
end
|
91
94
|
end
|
92
95
|
|
96
|
+
def unblock_next_job
|
97
|
+
job.unblock_next_blocked_job
|
98
|
+
end
|
99
|
+
|
93
100
|
private
|
94
101
|
def execute
|
95
|
-
ActiveJob::Base.execute(job.arguments)
|
102
|
+
ActiveJob::Base.execute(job.arguments.merge("provider_job_id" => job.id))
|
96
103
|
Result.new(true, nil)
|
97
104
|
rescue Exception => e
|
98
105
|
Result.new(false, e)
|
@@ -10,9 +10,10 @@ module SolidQueue
|
|
10
10
|
end
|
11
11
|
|
12
12
|
class_methods do
|
13
|
-
def clear_finished_in_batches(batch_size: 500, finished_before: SolidQueue.clear_finished_jobs_after.ago, class_name: nil)
|
13
|
+
def clear_finished_in_batches(batch_size: 500, finished_before: SolidQueue.clear_finished_jobs_after.ago, class_name: nil, sleep_between_batches: 0)
|
14
14
|
loop do
|
15
15
|
records_deleted = clearable(finished_before: finished_before, class_name: class_name).limit(batch_size).delete_all
|
16
|
+
sleep(sleep_between_batches) if sleep_between_batches > 0
|
16
17
|
break if records_deleted == 0
|
17
18
|
end
|
18
19
|
end
|
@@ -34,6 +34,10 @@ module SolidQueue
|
|
34
34
|
end
|
35
35
|
|
36
36
|
private
|
37
|
+
def concurrency_on_conflict
|
38
|
+
job_class.concurrency_on_conflict.to_s.inquiry
|
39
|
+
end
|
40
|
+
|
37
41
|
def acquire_concurrency_lock
|
38
42
|
return true unless concurrency_limited?
|
39
43
|
|
@@ -46,6 +50,14 @@ module SolidQueue
|
|
46
50
|
Semaphore.signal(self)
|
47
51
|
end
|
48
52
|
|
53
|
+
def handle_concurrency_conflict
|
54
|
+
if concurrency_on_conflict.discard?
|
55
|
+
destroy
|
56
|
+
else
|
57
|
+
block
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
49
61
|
def block
|
50
62
|
BlockedExecution.create_or_find_by!(job_id: id)
|
51
63
|
end
|
@@ -29,7 +29,8 @@ module SolidQueue
|
|
29
29
|
active_job.scheduled_at = scheduled_at
|
30
30
|
|
31
31
|
create_from_active_job(active_job).tap do |enqueued_job|
|
32
|
-
active_job.provider_job_id = enqueued_job.id
|
32
|
+
active_job.provider_job_id = enqueued_job.id if enqueued_job.persisted?
|
33
|
+
active_job.successfully_enqueued = enqueued_job.persisted?
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
@@ -49,7 +50,7 @@ module SolidQueue
|
|
49
50
|
def create_all_from_active_jobs(active_jobs)
|
50
51
|
job_rows = active_jobs.map { |job| attributes_from_active_job(job) }
|
51
52
|
insert_all(job_rows)
|
52
|
-
where(active_job_id: active_jobs.map(&:job_id))
|
53
|
+
where(active_job_id: active_jobs.map(&:job_id)).order(id: :asc)
|
53
54
|
end
|
54
55
|
|
55
56
|
def attributes_from_active_job(active_job)
|
@@ -12,6 +12,8 @@ module SolidQueue
|
|
12
12
|
|
13
13
|
scope :static, -> { where(static: true) }
|
14
14
|
|
15
|
+
has_many :recurring_executions, foreign_key: :task_key, primary_key: :key
|
16
|
+
|
15
17
|
mattr_accessor :default_job_class
|
16
18
|
self.default_job_class = RecurringJob
|
17
19
|
|
@@ -53,6 +55,18 @@ module SolidQueue
|
|
53
55
|
parsed_schedule.next_time.utc
|
54
56
|
end
|
55
57
|
|
58
|
+
def previous_time
|
59
|
+
parsed_schedule.previous_time.utc
|
60
|
+
end
|
61
|
+
|
62
|
+
def last_enqueued_time
|
63
|
+
if recurring_executions.loaded?
|
64
|
+
recurring_executions.map(&:run_at).max
|
65
|
+
else
|
66
|
+
recurring_executions.maximum(:run_at)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
56
70
|
def enqueue(at:)
|
57
71
|
SolidQueue.instrument(:enqueue_recurring_task, task: key, at: at) do |payload|
|
58
72
|
active_job = if using_solid_queue_adapter?
|
@@ -116,7 +130,6 @@ module SolidQueue
|
|
116
130
|
active_job.run_callbacks(:enqueue) do
|
117
131
|
Job.enqueue(active_job)
|
118
132
|
end
|
119
|
-
active_job.successfully_enqueued = true
|
120
133
|
end
|
121
134
|
end
|
122
135
|
end
|
@@ -14,7 +14,7 @@ module SolidQueue
|
|
14
14
|
def dispatch_next_batch(batch_size)
|
15
15
|
transaction do
|
16
16
|
job_ids = next_batch(batch_size).non_blocking_lock.pluck(:job_id)
|
17
|
-
if job_ids.empty? then
|
17
|
+
if job_ids.empty? then 0
|
18
18
|
else
|
19
19
|
SolidQueue.instrument(:dispatch_scheduled, batch_size: batch_size) do |payload|
|
20
20
|
payload[:size] = dispatch_jobs(job_ids)
|