beanqueue 0.2.3__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {beanqueue-0.2.3 → beanqueue-1.0.0}/PKG-INFO +107 -26
- {beanqueue-0.2.3 → beanqueue-1.0.0}/README.md +105 -24
- beanqueue-1.0.0/bq/__init__.py +19 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/app.py +22 -3
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/config.py +3 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/models/__init__.py +6 -0
- beanqueue-1.0.0/bq/models/event.py +76 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/models/task.py +44 -2
- beanqueue-1.0.0/bq/processors/processor.py +120 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/processors/registry.py +14 -4
- beanqueue-1.0.0/bq/processors/retry_policies.py +55 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/services/dispatch.py +21 -3
- {beanqueue-0.2.3 → beanqueue-1.0.0}/pyproject.toml +2 -2
- beanqueue-0.2.3/bq/__init__.py +0 -10
- beanqueue-0.2.3/bq/processors/processor.py +0 -72
- {beanqueue-0.2.3 → beanqueue-1.0.0}/LICENSE +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/cmds/__init__.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/cmds/create_tables.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/cmds/process.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/cmds/submit.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/cmds/utils.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/constants.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/db/__init__.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/db/base.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/db/session.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/events.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/models/helpers.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/models/worker.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/processors/__init__.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/services/__init__.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/services/worker.py +0 -0
- {beanqueue-0.2.3 → beanqueue-1.0.0}/bq/utils.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: beanqueue
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Fang-Pen Lin
|
|
7
7
|
Author-email: fangpen@launchplatform.com
|
|
@@ -19,7 +19,7 @@ Requires-Dist: venusian (>=3.1.0,<4.0.0)
|
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
|
|
21
21
|
# BeanQueue [](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
|
|
22
|
-
BeanQueue, a lightweight
|
|
22
|
+
BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
|
|
23
23
|
|
|
24
24
|
**Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
|
|
25
25
|
|
|
@@ -29,8 +29,10 @@ BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://ww
|
|
|
29
29
|
- **Easy-to-deploy**: Only rely on PostgreSQL
|
|
30
30
|
- **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
|
|
31
31
|
- **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
|
|
32
|
+
- **Retry**: Built-in and customizable retry-policies
|
|
33
|
+
- **Schedule**: Schedule task to run later
|
|
32
34
|
- **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
|
|
33
|
-
- **Customizable**: Use it as an library and build your own
|
|
35
|
+
- **Customizable**: Use it as an library and build your own work queue
|
|
34
36
|
- **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
|
|
35
37
|
|
|
36
38
|
## Install
|
|
@@ -127,6 +129,78 @@ To create tables for BeanQueue, you can run
|
|
|
127
129
|
python -m bq.cmds.create_tables
|
|
128
130
|
```
|
|
129
131
|
|
|
132
|
+
### Schedule
|
|
133
|
+
|
|
134
|
+
In most cases, a task will be executed as soon as possible after it is created.
|
|
135
|
+
To run a task later, you can set a datetime value to the `scheduled_at` attribute of the task model.
|
|
136
|
+
For example:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
import datetime
|
|
140
|
+
|
|
141
|
+
db = Session()
|
|
142
|
+
task = resize_image.run(width=200, height=300)
|
|
143
|
+
task.scheduled_at = func.now() + datetime.timedelta(minutes=3)
|
|
144
|
+
db.add(task)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Please note that currently, workers won't wake up at the next exact moment when the scheduled tasks are ready to run.
|
|
148
|
+
It has to wait until the polling times out, and eventually, it will see the task's scheduled_at time exceeds the current datetime.
|
|
149
|
+
Therefore, depending on your `POLL_TIMEOUT` setting and the number of your workers when they started processing, the actual execution may be inaccurate.
|
|
150
|
+
If you set the `POLL_TIMEOUT` to 60 seconds, please expect less than 60 seconds of delay.
|
|
151
|
+
|
|
152
|
+
### Retry
|
|
153
|
+
|
|
154
|
+
To automatically retry a task after failure, you can specify a retry policy to the processor.
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
import datetime
|
|
158
|
+
import bq
|
|
159
|
+
from sqlalchemy.orm import Session
|
|
160
|
+
|
|
161
|
+
app = bq.BeanQueue()
|
|
162
|
+
delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
|
|
163
|
+
|
|
164
|
+
@app.processor(channel="images", retry_policy=delay_retry)
|
|
165
|
+
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
166
|
+
# resize iamge here ...
|
|
167
|
+
pass
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Currently, we provide some simple common retry policies such as `DelayRetry` and `ExponentialBackoffRetry`.
|
|
171
|
+
Surely, you can define your retry policy easily by making a function that returns an optional object at the next scheduled time for retry.
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
def my_retry_policy(task: bq.Task) -> typing.Any:
|
|
175
|
+
# calculate delay based on task model ...
|
|
176
|
+
return func.now() + datetime.timedelta(seconds=delay)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
To cap how many attempts is allowed, you can also use `LimitAttempt` like this:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
|
|
183
|
+
capped_delay_retry = bq.LimitAttempt(3, delay_retry)
|
|
184
|
+
|
|
185
|
+
@app.processor(channel="images", retry_policy=capped_delay_retry)
|
|
186
|
+
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
187
|
+
# resize iamge here ...
|
|
188
|
+
pass
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
You can also retry only for specific exception classes with the `retry_exceptions` argument.
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
@app.processor(
|
|
195
|
+
channel="images",
|
|
196
|
+
retry_policy=delay_retry,
|
|
197
|
+
retry_exceptions=ValueError,
|
|
198
|
+
)
|
|
199
|
+
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
200
|
+
# resize iamge here ...
|
|
201
|
+
pass
|
|
202
|
+
```
|
|
203
|
+
|
|
130
204
|
### Configurations
|
|
131
205
|
|
|
132
206
|
Configurations can be modified by setting environment variables with `BQ_` prefix.
|
|
@@ -145,7 +219,7 @@ container = bq.Container()
|
|
|
145
219
|
container.wire(packages=[bq])
|
|
146
220
|
config = bq.Config(
|
|
147
221
|
PROCESSOR_PACKAGES=["my_pkgs.processors"],
|
|
148
|
-
DATABASE_URL=
|
|
222
|
+
DATABASE_URL=config.DATABASE_URL,
|
|
149
223
|
BATCH_SIZE=10,
|
|
150
224
|
)
|
|
151
225
|
app = bq.BeanQueue(config=config)
|
|
@@ -168,12 +242,16 @@ app.process_tasks(channels=("images",))
|
|
|
168
242
|
BeanQueue is designed to be as customizable as much as possible.
|
|
169
243
|
Of course, you can define your own SQLAlchemy model instead of using the ones we provided.
|
|
170
244
|
|
|
171
|
-
To make defining your own `Task`
|
|
245
|
+
To make defining your own `Task`, `Worker` or `Event` model much easier, you can use our mixin classes:
|
|
172
246
|
|
|
173
247
|
- `bq.TaskModelMixin`: provides task model columns
|
|
174
248
|
- `bq.TaskModelRefWorkerMixin`: provides foreign key column and relationship to `bq.Worker`
|
|
249
|
+
- `bq.TaskModelRefParentMixin`: provides foreign key column and relationship to children `bq.Task` created during processing
|
|
250
|
+
- `bq.TaskModelRefEventMixin`: provides foreign key column and relationship to `bq.Event`
|
|
175
251
|
- `bq.WorkerModelMixin`: provides worker model columns
|
|
176
252
|
- `bq.WorkerRefMixin`: provides relationship to `bq.Task`
|
|
253
|
+
- `bq.EventModelMixin`: provides event model columns
|
|
254
|
+
- `bq.EventModelRefTaskMixin`: provides foreign key column and relationship to `bq.Task`
|
|
177
255
|
|
|
178
256
|
Here's an example for defining your own Task model:
|
|
179
257
|
|
|
@@ -232,13 +310,14 @@ class Worker(bq.WorkerModelMixin, Base):
|
|
|
232
310
|
)
|
|
233
311
|
```
|
|
234
312
|
|
|
235
|
-
With the model class ready, you only need to change the `TASK_MODEL` and `
|
|
313
|
+
With the model class ready, you only need to change the `TASK_MODEL`, `WORKER_MODEL` and `EVENT_MODEL` of `Config` to the full Python module name plus the class name like this.
|
|
236
314
|
|
|
237
315
|
```python
|
|
238
316
|
import bq
|
|
239
317
|
config = bq.Config(
|
|
240
318
|
TASK_MODEL="my_pkgs.models.Task",
|
|
241
319
|
WORKER_MODEL="my_pkgs.models.Worker",
|
|
320
|
+
EVENT_MODEL="my_pkgs.models.Event",
|
|
242
321
|
# ... other configs
|
|
243
322
|
)
|
|
244
323
|
app = bq.BeanQueue(config)
|
|
@@ -246,21 +325,21 @@ app = bq.BeanQueue(config)
|
|
|
246
325
|
|
|
247
326
|
## Why?
|
|
248
327
|
|
|
249
|
-
There are countless
|
|
250
|
-
The primary issue with most
|
|
251
|
-
Our
|
|
252
|
-
However, integrating an external
|
|
253
|
-
The
|
|
328
|
+
There are countless work queue projects. Why make yet another one?
|
|
329
|
+
The primary issue with most work queue tools is their reliance on a standalone broker server.
|
|
330
|
+
Our work queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
|
|
331
|
+
However, integrating an external work queue into the system presents a risk.
|
|
332
|
+
The work queue and the database don't share the same data view, potentially compromising data integrity and reliability.
|
|
254
333
|
|
|
255
334
|
For example, you have a table of `images` to keep the user-uploaded images.
|
|
256
|
-
And you have a background
|
|
257
|
-
So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the
|
|
335
|
+
And you have a background work queue for resizing the uploaded images into different thumbnail sizes.
|
|
336
|
+
So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the work queue.
|
|
258
337
|
|
|
259
|
-
Say you push the task to the
|
|
338
|
+
Say you push the task to the work queue immediately after you insert the `images` table then commit like this:
|
|
260
339
|
|
|
261
340
|
```
|
|
262
341
|
1. Insert into the "images" table
|
|
263
|
-
2. Push resizing task to the
|
|
342
|
+
2. Push resizing task to the work queue
|
|
264
343
|
3. Commit db changes
|
|
265
344
|
```
|
|
266
345
|
|
|
@@ -268,21 +347,21 @@ While this might seem like the right way to do it, there's a hidden bug.
|
|
|
268
347
|
If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
|
|
269
348
|
One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
|
|
270
349
|
But this adds complexity to the system and also increases the latency if the first attempt fails.
|
|
271
|
-
Also, if the commit step fails, you will have a failed
|
|
350
|
+
Also, if the commit step fails, you will have a failed work queue job trying to fetch a row from the database that will never exist.
|
|
272
351
|
|
|
273
352
|
Another approach is to push the resize task after the database changes are committed. It works like this:
|
|
274
353
|
|
|
275
354
|
```
|
|
276
355
|
1. Insert into the "images" table
|
|
277
356
|
2. Commit db changes
|
|
278
|
-
3. Push resizing task to the
|
|
357
|
+
3. Push resizing task to the work queue
|
|
279
358
|
```
|
|
280
359
|
|
|
281
360
|
With this approach, we don't need to worry about workers picking up the task too early.
|
|
282
361
|
However, there's another drawback.
|
|
283
|
-
If step 3 for pushing a new task to the
|
|
284
|
-
There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the
|
|
285
|
-
Things will be much easier if we have a
|
|
362
|
+
If step 3 for pushing a new task to the work queue fails, the newly inserted `images` row will never be processed.
|
|
363
|
+
There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the work queue storage.
|
|
364
|
+
Things will be much easier if we have a work queue that shares the same consistent view with the database.
|
|
286
365
|
|
|
287
366
|
By using a database as the data storage, all the problems are gone.
|
|
288
367
|
You can simply do the following:
|
|
@@ -294,15 +373,15 @@ You can simply do the following:
|
|
|
294
373
|
```
|
|
295
374
|
|
|
296
375
|
It's all or nothing!
|
|
297
|
-
By doing so, you don't need to maintain another
|
|
298
|
-
You are probably using a database anyway, so this
|
|
376
|
+
By doing so, you don't need to maintain another work queue backend.
|
|
377
|
+
You are probably using a database anyway, so this work queue comes for free.
|
|
299
378
|
|
|
300
|
-
Usually, a database is inefficient as the
|
|
379
|
+
Usually, a database is inefficient as the work queues data storage because of the potential lock contention and the need for constant querying.
|
|
301
380
|
However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
|
|
302
381
|
|
|
303
|
-
This project is inspired by many of the SKIP-LOCKED-based
|
|
382
|
+
This project is inspired by many of the SKIP-LOCKED-based work queue successors.
|
|
304
383
|
Why don't we just use those existing tools?
|
|
305
|
-
Well, because while they work great as
|
|
384
|
+
Well, because while they work great as work queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
|
|
306
385
|
Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
|
|
307
386
|
|
|
308
387
|
With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
|
|
@@ -319,6 +398,8 @@ A modern accounting book service based on the most popular open source version c
|
|
|
319
398
|
## Alternatives
|
|
320
399
|
|
|
321
400
|
- [solid_queue](https://github.com/rails/solid_queue)
|
|
401
|
+
- [good_job](https://github.com/bensheldon/good_job)
|
|
402
|
+
- [graphile-worker](https://github.com/graphile/worker)
|
|
322
403
|
- [postgres-tq](https://github.com/flix-tech/postgres-tq)
|
|
323
404
|
- [pq](https://github.com/malthe/pq/)
|
|
324
405
|
- [PgQueuer](https://github.com/janbjorge/PgQueuer)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# BeanQueue [](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
|
|
2
|
-
BeanQueue, a lightweight
|
|
2
|
+
BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
|
|
3
3
|
|
|
4
4
|
**Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
|
|
5
5
|
|
|
@@ -9,8 +9,10 @@ BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://ww
|
|
|
9
9
|
- **Easy-to-deploy**: Only rely on PostgreSQL
|
|
10
10
|
- **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
|
|
11
11
|
- **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
|
|
12
|
+
- **Retry**: Built-in and customizable retry-policies
|
|
13
|
+
- **Schedule**: Schedule task to run later
|
|
12
14
|
- **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
|
|
13
|
-
- **Customizable**: Use it as an library and build your own
|
|
15
|
+
- **Customizable**: Use it as an library and build your own work queue
|
|
14
16
|
- **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
|
|
15
17
|
|
|
16
18
|
## Install
|
|
@@ -107,6 +109,78 @@ To create tables for BeanQueue, you can run
|
|
|
107
109
|
python -m bq.cmds.create_tables
|
|
108
110
|
```
|
|
109
111
|
|
|
112
|
+
### Schedule
|
|
113
|
+
|
|
114
|
+
In most cases, a task will be executed as soon as possible after it is created.
|
|
115
|
+
To run a task later, you can set a datetime value to the `scheduled_at` attribute of the task model.
|
|
116
|
+
For example:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
import datetime
|
|
120
|
+
|
|
121
|
+
db = Session()
|
|
122
|
+
task = resize_image.run(width=200, height=300)
|
|
123
|
+
task.scheduled_at = func.now() + datetime.timedelta(minutes=3)
|
|
124
|
+
db.add(task)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Please note that currently, workers won't wake up at the next exact moment when the scheduled tasks are ready to run.
|
|
128
|
+
It has to wait until the polling times out, and eventually, it will see the task's scheduled_at time exceeds the current datetime.
|
|
129
|
+
Therefore, depending on your `POLL_TIMEOUT` setting and the number of your workers when they started processing, the actual execution may be inaccurate.
|
|
130
|
+
If you set the `POLL_TIMEOUT` to 60 seconds, please expect less than 60 seconds of delay.
|
|
131
|
+
|
|
132
|
+
### Retry
|
|
133
|
+
|
|
134
|
+
To automatically retry a task after failure, you can specify a retry policy to the processor.
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
import datetime
|
|
138
|
+
import bq
|
|
139
|
+
from sqlalchemy.orm import Session
|
|
140
|
+
|
|
141
|
+
app = bq.BeanQueue()
|
|
142
|
+
delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
|
|
143
|
+
|
|
144
|
+
@app.processor(channel="images", retry_policy=delay_retry)
|
|
145
|
+
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
146
|
+
# resize iamge here ...
|
|
147
|
+
pass
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Currently, we provide some simple common retry policies such as `DelayRetry` and `ExponentialBackoffRetry`.
|
|
151
|
+
Surely, you can define your retry policy easily by making a function that returns an optional object at the next scheduled time for retry.
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
def my_retry_policy(task: bq.Task) -> typing.Any:
|
|
155
|
+
# calculate delay based on task model ...
|
|
156
|
+
return func.now() + datetime.timedelta(seconds=delay)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
To cap how many attempts is allowed, you can also use `LimitAttempt` like this:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
|
|
163
|
+
capped_delay_retry = bq.LimitAttempt(3, delay_retry)
|
|
164
|
+
|
|
165
|
+
@app.processor(channel="images", retry_policy=capped_delay_retry)
|
|
166
|
+
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
167
|
+
# resize iamge here ...
|
|
168
|
+
pass
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
You can also retry only for specific exception classes with the `retry_exceptions` argument.
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
@app.processor(
|
|
175
|
+
channel="images",
|
|
176
|
+
retry_policy=delay_retry,
|
|
177
|
+
retry_exceptions=ValueError,
|
|
178
|
+
)
|
|
179
|
+
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
180
|
+
# resize iamge here ...
|
|
181
|
+
pass
|
|
182
|
+
```
|
|
183
|
+
|
|
110
184
|
### Configurations
|
|
111
185
|
|
|
112
186
|
Configurations can be modified by setting environment variables with `BQ_` prefix.
|
|
@@ -125,7 +199,7 @@ container = bq.Container()
|
|
|
125
199
|
container.wire(packages=[bq])
|
|
126
200
|
config = bq.Config(
|
|
127
201
|
PROCESSOR_PACKAGES=["my_pkgs.processors"],
|
|
128
|
-
DATABASE_URL=
|
|
202
|
+
DATABASE_URL=config.DATABASE_URL,
|
|
129
203
|
BATCH_SIZE=10,
|
|
130
204
|
)
|
|
131
205
|
app = bq.BeanQueue(config=config)
|
|
@@ -148,12 +222,16 @@ app.process_tasks(channels=("images",))
|
|
|
148
222
|
BeanQueue is designed to be as customizable as much as possible.
|
|
149
223
|
Of course, you can define your own SQLAlchemy model instead of using the ones we provided.
|
|
150
224
|
|
|
151
|
-
To make defining your own `Task`
|
|
225
|
+
To make defining your own `Task`, `Worker` or `Event` model much easier, you can use our mixin classes:
|
|
152
226
|
|
|
153
227
|
- `bq.TaskModelMixin`: provides task model columns
|
|
154
228
|
- `bq.TaskModelRefWorkerMixin`: provides foreign key column and relationship to `bq.Worker`
|
|
229
|
+
- `bq.TaskModelRefParentMixin`: provides foreign key column and relationship to children `bq.Task` created during processing
|
|
230
|
+
- `bq.TaskModelRefEventMixin`: provides foreign key column and relationship to `bq.Event`
|
|
155
231
|
- `bq.WorkerModelMixin`: provides worker model columns
|
|
156
232
|
- `bq.WorkerRefMixin`: provides relationship to `bq.Task`
|
|
233
|
+
- `bq.EventModelMixin`: provides event model columns
|
|
234
|
+
- `bq.EventModelRefTaskMixin`: provides foreign key column and relationship to `bq.Task`
|
|
157
235
|
|
|
158
236
|
Here's an example for defining your own Task model:
|
|
159
237
|
|
|
@@ -212,13 +290,14 @@ class Worker(bq.WorkerModelMixin, Base):
|
|
|
212
290
|
)
|
|
213
291
|
```
|
|
214
292
|
|
|
215
|
-
With the model class ready, you only need to change the `TASK_MODEL` and `
|
|
293
|
+
With the model class ready, you only need to change the `TASK_MODEL`, `WORKER_MODEL` and `EVENT_MODEL` of `Config` to the full Python module name plus the class name like this.
|
|
216
294
|
|
|
217
295
|
```python
|
|
218
296
|
import bq
|
|
219
297
|
config = bq.Config(
|
|
220
298
|
TASK_MODEL="my_pkgs.models.Task",
|
|
221
299
|
WORKER_MODEL="my_pkgs.models.Worker",
|
|
300
|
+
EVENT_MODEL="my_pkgs.models.Event",
|
|
222
301
|
# ... other configs
|
|
223
302
|
)
|
|
224
303
|
app = bq.BeanQueue(config)
|
|
@@ -226,21 +305,21 @@ app = bq.BeanQueue(config)
|
|
|
226
305
|
|
|
227
306
|
## Why?
|
|
228
307
|
|
|
229
|
-
There are countless
|
|
230
|
-
The primary issue with most
|
|
231
|
-
Our
|
|
232
|
-
However, integrating an external
|
|
233
|
-
The
|
|
308
|
+
There are countless work queue projects. Why make yet another one?
|
|
309
|
+
The primary issue with most work queue tools is their reliance on a standalone broker server.
|
|
310
|
+
Our work queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
|
|
311
|
+
However, integrating an external work queue into the system presents a risk.
|
|
312
|
+
The work queue and the database don't share the same data view, potentially compromising data integrity and reliability.
|
|
234
313
|
|
|
235
314
|
For example, you have a table of `images` to keep the user-uploaded images.
|
|
236
|
-
And you have a background
|
|
237
|
-
So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the
|
|
315
|
+
And you have a background work queue for resizing the uploaded images into different thumbnail sizes.
|
|
316
|
+
So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the work queue.
|
|
238
317
|
|
|
239
|
-
Say you push the task to the
|
|
318
|
+
Say you push the task to the work queue immediately after you insert the `images` table then commit like this:
|
|
240
319
|
|
|
241
320
|
```
|
|
242
321
|
1. Insert into the "images" table
|
|
243
|
-
2. Push resizing task to the
|
|
322
|
+
2. Push resizing task to the work queue
|
|
244
323
|
3. Commit db changes
|
|
245
324
|
```
|
|
246
325
|
|
|
@@ -248,21 +327,21 @@ While this might seem like the right way to do it, there's a hidden bug.
|
|
|
248
327
|
If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
|
|
249
328
|
One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
|
|
250
329
|
But this adds complexity to the system and also increases the latency if the first attempt fails.
|
|
251
|
-
Also, if the commit step fails, you will have a failed
|
|
330
|
+
Also, if the commit step fails, you will have a failed work queue job trying to fetch a row from the database that will never exist.
|
|
252
331
|
|
|
253
332
|
Another approach is to push the resize task after the database changes are committed. It works like this:
|
|
254
333
|
|
|
255
334
|
```
|
|
256
335
|
1. Insert into the "images" table
|
|
257
336
|
2. Commit db changes
|
|
258
|
-
3. Push resizing task to the
|
|
337
|
+
3. Push resizing task to the work queue
|
|
259
338
|
```
|
|
260
339
|
|
|
261
340
|
With this approach, we don't need to worry about workers picking up the task too early.
|
|
262
341
|
However, there's another drawback.
|
|
263
|
-
If step 3 for pushing a new task to the
|
|
264
|
-
There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the
|
|
265
|
-
Things will be much easier if we have a
|
|
342
|
+
If step 3 for pushing a new task to the work queue fails, the newly inserted `images` row will never be processed.
|
|
343
|
+
There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the work queue storage.
|
|
344
|
+
Things will be much easier if we have a work queue that shares the same consistent view with the database.
|
|
266
345
|
|
|
267
346
|
By using a database as the data storage, all the problems are gone.
|
|
268
347
|
You can simply do the following:
|
|
@@ -274,15 +353,15 @@ You can simply do the following:
|
|
|
274
353
|
```
|
|
275
354
|
|
|
276
355
|
It's all or nothing!
|
|
277
|
-
By doing so, you don't need to maintain another
|
|
278
|
-
You are probably using a database anyway, so this
|
|
356
|
+
By doing so, you don't need to maintain another work queue backend.
|
|
357
|
+
You are probably using a database anyway, so this work queue comes for free.
|
|
279
358
|
|
|
280
|
-
Usually, a database is inefficient as the
|
|
359
|
+
Usually, a database is inefficient as the work queues data storage because of the potential lock contention and the need for constant querying.
|
|
281
360
|
However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
|
|
282
361
|
|
|
283
|
-
This project is inspired by many of the SKIP-LOCKED-based
|
|
362
|
+
This project is inspired by many of the SKIP-LOCKED-based work queue successors.
|
|
284
363
|
Why don't we just use those existing tools?
|
|
285
|
-
Well, because while they work great as
|
|
364
|
+
Well, because while they work great as work queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
|
|
286
365
|
Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
|
|
287
366
|
|
|
288
367
|
With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
|
|
@@ -299,6 +378,8 @@ A modern accounting book service based on the most popular open source version c
|
|
|
299
378
|
## Alternatives
|
|
300
379
|
|
|
301
380
|
- [solid_queue](https://github.com/rails/solid_queue)
|
|
381
|
+
- [good_job](https://github.com/bensheldon/good_job)
|
|
382
|
+
- [graphile-worker](https://github.com/graphile/worker)
|
|
302
383
|
- [postgres-tq](https://github.com/flix-tech/postgres-tq)
|
|
303
384
|
- [pq](https://github.com/malthe/pq/)
|
|
304
385
|
- [PgQueuer](https://github.com/janbjorge/PgQueuer)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .app import BeanQueue
|
|
2
|
+
from .config import Config # noqa
|
|
3
|
+
from .models import Event
|
|
4
|
+
from .models import EventModelMixin
|
|
5
|
+
from .models import EventModelRefTaskMixin
|
|
6
|
+
from .models import EventType
|
|
7
|
+
from .models import Task # noqa
|
|
8
|
+
from .models import TaskModelMixin
|
|
9
|
+
from .models import TaskModelRefEventMixin
|
|
10
|
+
from .models import TaskModelRefParentMixin
|
|
11
|
+
from .models import TaskModelRefWorkerMixin
|
|
12
|
+
from .models import TaskState # noqa
|
|
13
|
+
from .models import Worker # noqa
|
|
14
|
+
from .models import WorkerModelMixin # noqa
|
|
15
|
+
from .models import WorkerRefMixin # noqa
|
|
16
|
+
from .models import WorkerState # noqa
|
|
17
|
+
from .processors.retry_policies import DelayRetry
|
|
18
|
+
from .processors.retry_policies import ExponentialBackoffRetry
|
|
19
|
+
from .processors.retry_policies import LimitAttempt
|
|
@@ -7,6 +7,8 @@ import sys
|
|
|
7
7
|
import threading
|
|
8
8
|
import time
|
|
9
9
|
import typing
|
|
10
|
+
from importlib.metadata import PackageNotFoundError
|
|
11
|
+
from importlib.metadata import version
|
|
10
12
|
from wsgiref.simple_server import make_server
|
|
11
13
|
from wsgiref.simple_server import WSGIRequestHandler
|
|
12
14
|
|
|
@@ -84,6 +86,12 @@ class BeanQueue:
|
|
|
84
86
|
def worker_model(self) -> typing.Type[models.Worker]:
|
|
85
87
|
return load_module_var(self.config.WORKER_MODEL)
|
|
86
88
|
|
|
89
|
+
@property
|
|
90
|
+
def event_model(self) -> typing.Type[models.Event] | None:
|
|
91
|
+
if self.config.EVENT_MODEL is None:
|
|
92
|
+
return
|
|
93
|
+
return load_module_var(self.config.EVENT_MODEL)
|
|
94
|
+
|
|
87
95
|
def _make_worker_service(self, session: DBSession):
|
|
88
96
|
return self.worker_service_cls(
|
|
89
97
|
session=session, task_model=self.task_model, worker_model=self.worker_model
|
|
@@ -96,7 +104,8 @@ class BeanQueue:
|
|
|
96
104
|
self,
|
|
97
105
|
channel: str = constants.DEFAULT_CHANNEL,
|
|
98
106
|
auto_complete: bool = True,
|
|
99
|
-
|
|
107
|
+
retry_policy: typing.Callable | None = None,
|
|
108
|
+
retry_exceptions: typing.Type | typing.Tuple[typing.Type, ...] | None = None,
|
|
100
109
|
task_model: typing.Type | None = None,
|
|
101
110
|
) -> typing.Callable:
|
|
102
111
|
def decorator(wrapped: typing.Callable):
|
|
@@ -106,7 +115,8 @@ class BeanQueue:
|
|
|
106
115
|
channel=channel,
|
|
107
116
|
func=wrapped,
|
|
108
117
|
auto_complete=auto_complete,
|
|
109
|
-
|
|
118
|
+
retry_policy=retry_policy,
|
|
119
|
+
retry_exceptions=retry_exceptions,
|
|
110
120
|
)
|
|
111
121
|
helper_obj = ProcessorHelper(
|
|
112
122
|
processor,
|
|
@@ -241,6 +251,15 @@ class BeanQueue:
|
|
|
241
251
|
self,
|
|
242
252
|
channels: tuple[str, ...],
|
|
243
253
|
):
|
|
254
|
+
try:
|
|
255
|
+
bq_version = version(__name__.split(".")[0])
|
|
256
|
+
except PackageNotFoundError:
|
|
257
|
+
bq_version = "unknown"
|
|
258
|
+
|
|
259
|
+
logger.info(
|
|
260
|
+
"Starting processing tasks, bq_version=%s",
|
|
261
|
+
bq_version,
|
|
262
|
+
)
|
|
244
263
|
db = self.make_session()
|
|
245
264
|
if not channels:
|
|
246
265
|
channels = [constants.DEFAULT_CHANNEL]
|
|
@@ -318,7 +337,7 @@ class BeanQueue:
|
|
|
318
337
|
task.func_name,
|
|
319
338
|
)
|
|
320
339
|
# TODO: support processor pool and other approaches to dispatch the workload
|
|
321
|
-
registry.process(task)
|
|
340
|
+
registry.process(task, event_cls=self.event_model)
|
|
322
341
|
if not tasks:
|
|
323
342
|
# we should try to keep dispatching until we cannot find tasks
|
|
324
343
|
break
|
|
@@ -1,5 +1,11 @@
|
|
|
1
|
+
from .event import Event
|
|
2
|
+
from .event import EventModelMixin
|
|
3
|
+
from .event import EventModelRefTaskMixin
|
|
4
|
+
from .event import EventType
|
|
1
5
|
from .task import Task
|
|
2
6
|
from .task import TaskModelMixin
|
|
7
|
+
from .task import TaskModelRefEventMixin
|
|
8
|
+
from .task import TaskModelRefParentMixin
|
|
3
9
|
from .task import TaskModelRefWorkerMixin
|
|
4
10
|
from .task import TaskState
|
|
5
11
|
from .worker import Worker
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import enum
|
|
3
|
+
import typing
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import DateTime
|
|
7
|
+
from sqlalchemy import Enum
|
|
8
|
+
from sqlalchemy import ForeignKey
|
|
9
|
+
from sqlalchemy import func
|
|
10
|
+
from sqlalchemy import String
|
|
11
|
+
from sqlalchemy.dialects.postgresql import UUID
|
|
12
|
+
from sqlalchemy.orm import declared_attr
|
|
13
|
+
from sqlalchemy.orm import Mapped
|
|
14
|
+
from sqlalchemy.orm import mapped_column
|
|
15
|
+
from sqlalchemy.orm import relationship
|
|
16
|
+
|
|
17
|
+
from ..db.base import Base
|
|
18
|
+
from .helpers import make_repr_attrs
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EventType(enum.Enum):
|
|
22
|
+
# task failed
|
|
23
|
+
FAILED = "FAILED"
|
|
24
|
+
# task failed and retry scheduled
|
|
25
|
+
FAILED_RETRY_SCHEDULED = "FAILED_RETRY_SCHEDULED"
|
|
26
|
+
# task complete
|
|
27
|
+
COMPLETE = "COMPLETE"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EventModelMixin:
|
|
31
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
32
|
+
UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()
|
|
33
|
+
)
|
|
34
|
+
# type of the event
|
|
35
|
+
type: Mapped[EventType] = mapped_column(
|
|
36
|
+
Enum(EventType),
|
|
37
|
+
nullable=False,
|
|
38
|
+
index=True,
|
|
39
|
+
)
|
|
40
|
+
# Error message
|
|
41
|
+
error_message: Mapped[typing.Optional[str]] = mapped_column(String, nullable=True)
|
|
42
|
+
# the scheduled at time for retry
|
|
43
|
+
scheduled_at: Mapped[datetime.datetime] = mapped_column(
|
|
44
|
+
DateTime(timezone=True),
|
|
45
|
+
nullable=True,
|
|
46
|
+
)
|
|
47
|
+
# created datetime of the event
|
|
48
|
+
created_at: Mapped[datetime.datetime] = mapped_column(
|
|
49
|
+
DateTime(timezone=True), nullable=False, server_default=func.now()
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class EventModelRefTaskMixin:
|
|
54
|
+
# foreign key id of the task
|
|
55
|
+
task_id: Mapped[uuid.UUID] = mapped_column(
|
|
56
|
+
UUID(as_uuid=True),
|
|
57
|
+
ForeignKey("bq_tasks.id", name="fk_event_task_id"),
|
|
58
|
+
nullable=True,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@declared_attr
|
|
62
|
+
def task(cls) -> Mapped["Task"]:
|
|
63
|
+
return relationship("Task", back_populates="events", uselist=False)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class Event(EventModelMixin, EventModelRefTaskMixin, Base):
|
|
67
|
+
__tablename__ = "bq_events"
|
|
68
|
+
|
|
69
|
+
def __repr__(self) -> str:
|
|
70
|
+
items = [
|
|
71
|
+
("id", self.id),
|
|
72
|
+
("type", self.type),
|
|
73
|
+
("created_at", self.created_at),
|
|
74
|
+
("scheduled_at", self.scheduled_at),
|
|
75
|
+
]
|
|
76
|
+
return f"<{self.__class__.__name__} {make_repr_attrs(items)}>"
|
|
@@ -24,7 +24,7 @@ from .helpers import make_repr_attrs
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class TaskState(enum.Enum):
|
|
27
|
-
# task just created, not
|
|
27
|
+
# task just created, not dispatched yet. or, the task failed and is waiting for a retry.
|
|
28
28
|
PENDING = "PENDING"
|
|
29
29
|
# a worker is processing the task right now
|
|
30
30
|
PROCESSING = "PROCESSING"
|
|
@@ -62,6 +62,11 @@ class TaskModelMixin:
|
|
|
62
62
|
created_at: Mapped[datetime.datetime] = mapped_column(
|
|
63
63
|
DateTime(timezone=True), nullable=False, server_default=func.now()
|
|
64
64
|
)
|
|
65
|
+
# scheduled to run at a specific time
|
|
66
|
+
scheduled_at: Mapped[datetime.datetime] = mapped_column(
|
|
67
|
+
DateTime(timezone=True),
|
|
68
|
+
nullable=True,
|
|
69
|
+
)
|
|
65
70
|
|
|
66
71
|
|
|
67
72
|
class TaskModelRefWorkerMixin:
|
|
@@ -77,7 +82,44 @@ class TaskModelRefWorkerMixin:
|
|
|
77
82
|
return relationship("Worker", back_populates="tasks", uselist=False)
|
|
78
83
|
|
|
79
84
|
|
|
80
|
-
class
|
|
85
|
+
class TaskModelRefParentMixin:
|
|
86
|
+
# foreign key id of the source task which created the current task while we are processing it
|
|
87
|
+
parent_id: Mapped[uuid.UUID] = mapped_column(
|
|
88
|
+
UUID(as_uuid=True),
|
|
89
|
+
ForeignKey("bq_tasks.id", name="fk_task_parent_task_id"),
|
|
90
|
+
nullable=True,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
@declared_attr
|
|
94
|
+
def parent(cls) -> Mapped[typing.Optional["Task"]]:
|
|
95
|
+
return relationship(
|
|
96
|
+
"Task",
|
|
97
|
+
back_populates="children",
|
|
98
|
+
remote_side=[cls.id],
|
|
99
|
+
foreign_keys=[cls.parent_id],
|
|
100
|
+
uselist=False,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
@declared_attr
|
|
104
|
+
def children(cls) -> Mapped[list["Task"]]:
|
|
105
|
+
return relationship(
|
|
106
|
+
"Task", foreign_keys=[cls.parent_id], back_populates="parent"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class TaskModelRefEventMixin:
|
|
111
|
+
@declared_attr
|
|
112
|
+
def events(cls) -> Mapped[list["Event"]]:
|
|
113
|
+
return relationship("Event", back_populates="task")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class Task(
|
|
117
|
+
TaskModelMixin,
|
|
118
|
+
TaskModelRefWorkerMixin,
|
|
119
|
+
TaskModelRefEventMixin,
|
|
120
|
+
TaskModelRefParentMixin,
|
|
121
|
+
Base,
|
|
122
|
+
):
|
|
81
123
|
__tablename__ = "bq_tasks"
|
|
82
124
|
|
|
83
125
|
def __repr__(self) -> str:
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import contextvars
|
|
2
|
+
import dataclasses
|
|
3
|
+
import datetime
|
|
4
|
+
import inspect
|
|
5
|
+
import logging
|
|
6
|
+
import typing
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import select
|
|
9
|
+
from sqlalchemy.orm import object_session
|
|
10
|
+
|
|
11
|
+
from .. import events
|
|
12
|
+
from .. import models
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
current_task = contextvars.ContextVar("current_task")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclasses.dataclass(frozen=True)
|
|
19
|
+
class Processor:
|
|
20
|
+
channel: str
|
|
21
|
+
module: str
|
|
22
|
+
name: str
|
|
23
|
+
func: typing.Callable
|
|
24
|
+
# should we auto complete the task or not
|
|
25
|
+
auto_complete: bool = True
|
|
26
|
+
# The retry policy function for returning a new scheduled time for next attempt
|
|
27
|
+
retry_policy: typing.Callable | None = None
|
|
28
|
+
# The exceptions we suppose to retry when encountered
|
|
29
|
+
retry_exceptions: typing.Type | typing.Tuple[typing.Type, ...] | None = None
|
|
30
|
+
|
|
31
|
+
def process(self, task: models.Task, event_cls: typing.Type | None = None):
|
|
32
|
+
ctx_token = current_task.set(task)
|
|
33
|
+
try:
|
|
34
|
+
db = object_session(task)
|
|
35
|
+
func_signature = inspect.signature(self.func)
|
|
36
|
+
base_kwargs = {}
|
|
37
|
+
if "task" in func_signature.parameters:
|
|
38
|
+
base_kwargs["task"] = task
|
|
39
|
+
if "db" in func_signature.parameters:
|
|
40
|
+
base_kwargs["db"] = db
|
|
41
|
+
try:
|
|
42
|
+
with db.begin_nested() as savepoint:
|
|
43
|
+
if "savepoint" in func_signature.parameters:
|
|
44
|
+
base_kwargs["savepoint"] = savepoint
|
|
45
|
+
result = self.func(**base_kwargs, **task.kwargs)
|
|
46
|
+
except Exception as exc:
|
|
47
|
+
logger.error("Unhandled exception for task %s", task.id, exc_info=True)
|
|
48
|
+
events.task_failure.send(self, task=task, exception=exc)
|
|
49
|
+
task.state = models.TaskState.FAILED
|
|
50
|
+
task.error_message = str(exc)
|
|
51
|
+
retry_scheduled_at = None
|
|
52
|
+
if (
|
|
53
|
+
self.retry_exceptions is None
|
|
54
|
+
or isinstance(exc, self.retry_exceptions)
|
|
55
|
+
) and self.retry_policy is not None:
|
|
56
|
+
retry_scheduled_at = self.retry_policy(task)
|
|
57
|
+
if retry_scheduled_at is not None:
|
|
58
|
+
task.state = models.TaskState.PENDING
|
|
59
|
+
task.scheduled_at = retry_scheduled_at
|
|
60
|
+
if isinstance(retry_scheduled_at, datetime.datetime):
|
|
61
|
+
retry_scheduled_at_value = retry_scheduled_at
|
|
62
|
+
else:
|
|
63
|
+
retry_scheduled_at_value = db.scalar(
|
|
64
|
+
select(retry_scheduled_at)
|
|
65
|
+
)
|
|
66
|
+
logger.info(
|
|
67
|
+
"Schedule task %s for retry at %s",
|
|
68
|
+
task.id,
|
|
69
|
+
retry_scheduled_at_value,
|
|
70
|
+
)
|
|
71
|
+
if event_cls is not None:
|
|
72
|
+
event = event_cls(
|
|
73
|
+
task=task,
|
|
74
|
+
type=models.EventType.FAILED
|
|
75
|
+
if retry_scheduled_at is None
|
|
76
|
+
else models.EventType.FAILED_RETRY_SCHEDULED,
|
|
77
|
+
error_message=task.error_message,
|
|
78
|
+
scheduled_at=retry_scheduled_at,
|
|
79
|
+
)
|
|
80
|
+
db.add(event)
|
|
81
|
+
db.add(task)
|
|
82
|
+
return
|
|
83
|
+
if self.auto_complete:
|
|
84
|
+
logger.info("Task %s auto complete", task.id)
|
|
85
|
+
task.state = models.TaskState.DONE
|
|
86
|
+
task.result = result
|
|
87
|
+
if event_cls is not None:
|
|
88
|
+
event = event_cls(
|
|
89
|
+
task=task,
|
|
90
|
+
type=models.EventType.COMPLETE,
|
|
91
|
+
)
|
|
92
|
+
db.add(event)
|
|
93
|
+
db.add(task)
|
|
94
|
+
return result
|
|
95
|
+
finally:
|
|
96
|
+
current_task.reset(ctx_token)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ProcessorHelper:
|
|
100
|
+
"""Helper function to replace the decorated processor function and make creating Task model much easier"""
|
|
101
|
+
|
|
102
|
+
def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
|
|
103
|
+
self._processor = processor
|
|
104
|
+
self._task_cls = task_cls
|
|
105
|
+
|
|
106
|
+
def __call__(self, *args, **kwargs):
|
|
107
|
+
return self._processor.func(*args, **kwargs)
|
|
108
|
+
|
|
109
|
+
def run(self, **kwargs) -> models.Task:
|
|
110
|
+
try:
|
|
111
|
+
parent = current_task.get()
|
|
112
|
+
except LookupError:
|
|
113
|
+
parent = None
|
|
114
|
+
return self._task_cls(
|
|
115
|
+
channel=self._processor.channel,
|
|
116
|
+
module=self._processor.module,
|
|
117
|
+
func_name=self._processor.name,
|
|
118
|
+
kwargs=kwargs,
|
|
119
|
+
parent=parent,
|
|
120
|
+
)
|
|
@@ -18,10 +18,14 @@ class Registry:
|
|
|
18
18
|
def add(self, processor: Processor):
|
|
19
19
|
self.processors[processor.channel][processor.module][processor.name] = processor
|
|
20
20
|
|
|
21
|
-
def process(
|
|
21
|
+
def process(
|
|
22
|
+
self,
|
|
23
|
+
task: models.Task,
|
|
24
|
+
event_cls: typing.Type | None = None,
|
|
25
|
+
) -> typing.Any:
|
|
22
26
|
modules = self.processors.get(task.channel, {})
|
|
23
27
|
functions = modules.get(task.module, {})
|
|
24
|
-
processor = functions.get(task.func_name)
|
|
28
|
+
processor: Processor = functions.get(task.func_name)
|
|
25
29
|
db = object_session(task)
|
|
26
30
|
if processor is None:
|
|
27
31
|
self.logger.error(
|
|
@@ -30,12 +34,18 @@ class Registry:
|
|
|
30
34
|
task.module,
|
|
31
35
|
task.func_name,
|
|
32
36
|
)
|
|
33
|
-
# TODO: add error event
|
|
34
37
|
task.state = models.TaskState.FAILED
|
|
35
38
|
task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
|
|
39
|
+
if event_cls is not None:
|
|
40
|
+
event = event_cls(
|
|
41
|
+
task=task,
|
|
42
|
+
type=models.EventType.FAILED,
|
|
43
|
+
error_message=task.error_message,
|
|
44
|
+
)
|
|
45
|
+
db.add(event)
|
|
36
46
|
db.add(task)
|
|
37
47
|
return
|
|
38
|
-
return processor.process(task)
|
|
48
|
+
return processor.process(task, event_cls=event_cls)
|
|
39
49
|
|
|
40
50
|
|
|
41
51
|
def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import func
|
|
5
|
+
from sqlalchemy import inspect
|
|
6
|
+
from sqlalchemy.orm import object_session
|
|
7
|
+
|
|
8
|
+
from .. import models
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_failure_times(task: models.Task) -> int:
|
|
12
|
+
db = object_session(task)
|
|
13
|
+
task_info = inspect(task.__class__)
|
|
14
|
+
event_cls = task_info.attrs["events"].entity.class_
|
|
15
|
+
return (
|
|
16
|
+
db.query(event_cls)
|
|
17
|
+
.filter(event_cls.task == task)
|
|
18
|
+
.filter(event_cls.type == models.EventType.FAILED_RETRY_SCHEDULED)
|
|
19
|
+
).count()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DelayRetry:
|
|
23
|
+
def __init__(self, delay: datetime.timedelta):
|
|
24
|
+
self.delay = delay
|
|
25
|
+
|
|
26
|
+
def __call__(self, task: models.Task) -> typing.Any:
|
|
27
|
+
return func.now() + self.delay
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ExponentialBackoffRetry:
|
|
31
|
+
def __init__(
|
|
32
|
+
self, base: float = 2, exponent_offset: float = 0, exponent_scalar: float = 1.0
|
|
33
|
+
):
|
|
34
|
+
self.base = base
|
|
35
|
+
self.exponent_offset = exponent_offset
|
|
36
|
+
self.exponent_scalar = exponent_scalar
|
|
37
|
+
|
|
38
|
+
def __call__(self, task: models.Task) -> typing.Any:
|
|
39
|
+
failure_times = get_failure_times(task)
|
|
40
|
+
delay_seconds = self.base ** (
|
|
41
|
+
self.exponent_offset + (self.exponent_scalar * (failure_times + 1))
|
|
42
|
+
)
|
|
43
|
+
return func.now() + datetime.timedelta(seconds=delay_seconds)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class LimitAttempt:
|
|
47
|
+
def __init__(self, maximum_attempt: int, retry_policy: typing.Callable):
|
|
48
|
+
self.maximum_attempt = maximum_attempt
|
|
49
|
+
self.retry_policy = retry_policy
|
|
50
|
+
|
|
51
|
+
def __call__(self, task: models.Task) -> typing.Any:
|
|
52
|
+
failure_times = get_failure_times(task)
|
|
53
|
+
if (failure_times + 1) >= self.maximum_attempt:
|
|
54
|
+
return None
|
|
55
|
+
return self.retry_policy(task)
|
|
@@ -3,6 +3,9 @@ import select
|
|
|
3
3
|
import typing
|
|
4
4
|
import uuid
|
|
5
5
|
|
|
6
|
+
from sqlalchemy import func
|
|
7
|
+
from sqlalchemy import null
|
|
8
|
+
from sqlalchemy import or_
|
|
6
9
|
from sqlalchemy.orm import Query
|
|
7
10
|
|
|
8
11
|
from .. import models
|
|
@@ -21,11 +24,22 @@ class DispatchService:
|
|
|
21
24
|
self.session = session
|
|
22
25
|
self.task_model: typing.Type[models.Task] = task_model
|
|
23
26
|
|
|
24
|
-
def make_task_query(
|
|
27
|
+
def make_task_query(
|
|
28
|
+
self,
|
|
29
|
+
channels: typing.Sequence[str],
|
|
30
|
+
limit: int = 1,
|
|
31
|
+
now: typing.Any = func.now(),
|
|
32
|
+
) -> Query:
|
|
25
33
|
return (
|
|
26
34
|
self.session.query(self.task_model.id)
|
|
27
35
|
.filter(self.task_model.channel.in_(channels))
|
|
28
36
|
.filter(self.task_model.state == models.TaskState.PENDING)
|
|
37
|
+
.filter(
|
|
38
|
+
or_(
|
|
39
|
+
self.task_model.scheduled_at.is_(null()),
|
|
40
|
+
now >= self.task_model.scheduled_at,
|
|
41
|
+
)
|
|
42
|
+
)
|
|
29
43
|
.order_by(self.task_model.created_at)
|
|
30
44
|
.limit(limit)
|
|
31
45
|
.with_for_update(skip_locked=True)
|
|
@@ -43,9 +57,13 @@ class DispatchService:
|
|
|
43
57
|
)
|
|
44
58
|
|
|
45
59
|
def dispatch(
|
|
46
|
-
self,
|
|
60
|
+
self,
|
|
61
|
+
channels: typing.Sequence[str],
|
|
62
|
+
worker_id: uuid.UUID,
|
|
63
|
+
limit: int = 1,
|
|
64
|
+
now: typing.Any = func.now(),
|
|
47
65
|
) -> Query:
|
|
48
|
-
task_query = self.make_task_query(channels, limit=limit)
|
|
66
|
+
task_query = self.make_task_query(channels, limit=limit, now=now)
|
|
49
67
|
task_subquery = task_query.scalar_subquery()
|
|
50
68
|
task_ids = [
|
|
51
69
|
item[0]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "beanqueue"
|
|
3
|
-
version = "0.
|
|
4
|
-
description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library"
|
|
5
5
|
authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
beanqueue-0.2.3/bq/__init__.py
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
from .app import BeanQueue
|
|
2
|
-
from .config import Config # noqa
|
|
3
|
-
from .models import Task # noqa
|
|
4
|
-
from .models import TaskModelMixin
|
|
5
|
-
from .models import TaskModelRefWorkerMixin
|
|
6
|
-
from .models import TaskState # noqa
|
|
7
|
-
from .models import Worker # noqa
|
|
8
|
-
from .models import WorkerModelMixin # noqa
|
|
9
|
-
from .models import WorkerRefMixin # noqa
|
|
10
|
-
from .models import WorkerState # noqa
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
import dataclasses
|
|
2
|
-
import inspect
|
|
3
|
-
import logging
|
|
4
|
-
import typing
|
|
5
|
-
|
|
6
|
-
from sqlalchemy.orm import object_session
|
|
7
|
-
|
|
8
|
-
from .. import events
|
|
9
|
-
from .. import models
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclasses.dataclass(frozen=True)
|
|
15
|
-
class Processor:
|
|
16
|
-
channel: str
|
|
17
|
-
module: str
|
|
18
|
-
name: str
|
|
19
|
-
func: typing.Callable
|
|
20
|
-
# should we auto complete the task or not
|
|
21
|
-
auto_complete: bool = True
|
|
22
|
-
# should we auto rollback the transaction when encounter unhandled exception
|
|
23
|
-
auto_rollback_on_exc: bool = True
|
|
24
|
-
|
|
25
|
-
def process(self, task: models.Task):
|
|
26
|
-
db = object_session(task)
|
|
27
|
-
func_signature = inspect.signature(self.func)
|
|
28
|
-
base_kwargs = {}
|
|
29
|
-
if "task" in func_signature.parameters:
|
|
30
|
-
base_kwargs["task"] = task
|
|
31
|
-
if "db" in func_signature.parameters:
|
|
32
|
-
base_kwargs["db"] = db
|
|
33
|
-
with db.begin_nested() as savepoint:
|
|
34
|
-
if "savepoint" in func_signature.parameters:
|
|
35
|
-
base_kwargs["savepoint"] = savepoint
|
|
36
|
-
try:
|
|
37
|
-
result = self.func(**base_kwargs, **task.kwargs)
|
|
38
|
-
except Exception as exc:
|
|
39
|
-
logger.error("Unhandled exception for task %s", task.id, exc_info=True)
|
|
40
|
-
events.task_failure.send(self, task=task, exception=exc)
|
|
41
|
-
if self.auto_rollback_on_exc:
|
|
42
|
-
savepoint.rollback()
|
|
43
|
-
# TODO: add error event
|
|
44
|
-
task.state = models.TaskState.FAILED
|
|
45
|
-
task.error_message = str(exc)
|
|
46
|
-
db.add(task)
|
|
47
|
-
return
|
|
48
|
-
if self.auto_complete:
|
|
49
|
-
logger.info("Task %s auto complete", task.id)
|
|
50
|
-
task.state = models.TaskState.DONE
|
|
51
|
-
task.result = result
|
|
52
|
-
db.add(task)
|
|
53
|
-
return result
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class ProcessorHelper:
|
|
57
|
-
"""Helper function to replace the decorated processor function and make creating Task model much easier"""
|
|
58
|
-
|
|
59
|
-
def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
|
|
60
|
-
self._processor = processor
|
|
61
|
-
self._task_cls = task_cls
|
|
62
|
-
|
|
63
|
-
def __call__(self, *args, **kwargs):
|
|
64
|
-
return self._processor.func(*args, **kwargs)
|
|
65
|
-
|
|
66
|
-
def run(self, **kwargs) -> models.Task:
|
|
67
|
-
return self._task_cls(
|
|
68
|
-
channel=self._processor.channel,
|
|
69
|
-
module=self._processor.module,
|
|
70
|
-
func_name=self._processor.name,
|
|
71
|
-
kwargs=kwargs,
|
|
72
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|