beanqueue 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- beanqueue-0.1.2/PKG-INFO +235 -0
- beanqueue-0.1.2/README.md +214 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/db/base.py +5 -5
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/db/session.py +5 -5
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/models/__init__.py +4 -4
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/models/helpers.py +5 -5
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/models/worker.py +69 -69
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/processors/registry.py +136 -136
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/services/worker.py +69 -69
- {beanqueue-0.1.0 → beanqueue-0.1.2}/pyproject.toml +1 -1
- beanqueue-0.1.0/PKG-INFO +0 -23
- beanqueue-0.1.0/README.md +0 -2
- {beanqueue-0.1.0 → beanqueue-0.1.2}/LICENSE +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/__init__.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/cmds/__init__.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/cmds/create_tables.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/cmds/process.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/cmds/submit.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/config.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/container.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/db/__init__.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/models/task.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/processors/__init__.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/services/__init__.py +0 -0
- {beanqueue-0.1.0 → beanqueue-0.1.2}/bq/services/dispatch.py +0 -0
beanqueue-0.1.2/PKG-INFO
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: beanqueue
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Fang-Pen Lin
|
|
7
|
+
Author-email: fangpen@launchplatform.com
|
|
8
|
+
Requires-Python: >=3.11,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
14
|
+
Requires-Dist: dependency-injector (>=4.41.0,<5.0.0)
|
|
15
|
+
Requires-Dist: pg-activity (>=3.5.1,<4.0.0)
|
|
16
|
+
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
|
17
|
+
Requires-Dist: sqlalchemy (>=2.0.30,<3.0.0)
|
|
18
|
+
Requires-Dist: venusian (>=3.1.0,<4.0.0)
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# BeanQueue [](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
|
|
22
|
+
BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), [PostgreSQL SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
|
|
23
|
+
|
|
24
|
+
**Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **Super lightweight**: Under 1K lines
|
|
29
|
+
- **Easy-to-deploy**: Only rely on PostgreSQL
|
|
30
|
+
- **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
|
|
31
|
+
- **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
|
|
32
|
+
- **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is dead, the others will reschedule the tasks
|
|
33
|
+
- **Customizable**: Use it as an library and build your own worker queue
|
|
34
|
+
- **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install beanqueue
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
|
|
44
|
+
You can define a task processor like this
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from sqlalchemy.orm import Session
|
|
48
|
+
|
|
49
|
+
from bq.processors.registry import processor
|
|
50
|
+
from bq import models
|
|
51
|
+
from .. import my_models
|
|
52
|
+
from .. import image_utils
|
|
53
|
+
|
|
54
|
+
@processor(channel="images")
|
|
55
|
+
def resize_image(db: Session, task: models.Task, width: int, height: int):
|
|
56
|
+
image = db.query(my_models.Image).filter(my_models.Image.task == task).one()
|
|
57
|
+
image_utils.resize(image, size=(width, height))
|
|
58
|
+
db.add(image)
|
|
59
|
+
# by default the `processor` decorator has `auto_complete` flag turns on,
|
|
60
|
+
# so it will commit the db changes for us automatically
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
The `db` and `task` keyword arguments are optional.
|
|
64
|
+
If you don't need to access the task object, you can simply define the function without these two parameters.
|
|
65
|
+
|
|
66
|
+
To submit a task, you can either use `bq.models.Task` model object to construct the task object, insert into the
|
|
67
|
+
database session and commit.
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from bq import models
|
|
71
|
+
from .db import Session
|
|
72
|
+
from .. import my_models
|
|
73
|
+
|
|
74
|
+
db = Session()
|
|
75
|
+
task = models.Task(
|
|
76
|
+
channel="files",
|
|
77
|
+
module="my_pkgs.files.processors",
|
|
78
|
+
name="upload_to_s3_for_backup",
|
|
79
|
+
)
|
|
80
|
+
file = my_models.File(
|
|
81
|
+
task=task,
|
|
82
|
+
blob_name="...",
|
|
83
|
+
)
|
|
84
|
+
db.add(task)
|
|
85
|
+
db.add(file)
|
|
86
|
+
db.commit()
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Or, you can use the `run` helper like this:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from .processors import resize_image
|
|
93
|
+
from .db import Session
|
|
94
|
+
from .. import my_models
|
|
95
|
+
|
|
96
|
+
db = Session()
|
|
97
|
+
# a Task model generated for invoking resize_image function
|
|
98
|
+
task = resize_image.run(width=200, height=300)
|
|
99
|
+
# associate task with your own models
|
|
100
|
+
image = my_models.Image(task=task, blob_name="...")
|
|
101
|
+
db.add(image)
|
|
102
|
+
# we have Task model SQLALchemy event handler to send NOTIFY "<channel>" statement for you,
|
|
103
|
+
# so that the workers will be woken up immediately
|
|
104
|
+
db.add(task)
|
|
105
|
+
# commit will make the task visible to worker immediately
|
|
106
|
+
db.commit()
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
To run the worker, you can do this:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
BQ_PROCESSOR_PACKAGES='["my_pkgs.processors"]' python -m bq.cmds.process images
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
To submit a task for testing purpose, you can do
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
python -m bq.cmds.submit images my_pkgs.processors resize_image -k '{"width": 200, "height": 300}'
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
To create tables for BeanQueue, you can run
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
python -m bq.cmds.create_tables
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Configurations
|
|
128
|
+
|
|
129
|
+
Configurations can be modified by setting environment variables with `BQ_` prefix.
|
|
130
|
+
For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
|
|
131
|
+
To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
|
|
132
|
+
The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
|
|
133
|
+
For now, the configurations only affect command line tools.
|
|
134
|
+
|
|
135
|
+
If you want to configure BeanQueue programmatically for the command lines, you can override our [dependency-injector](https://python-dependency-injector.ets-labs.org/)'s container defined at [bq/container.py](bq/container.py) and call the command function manually.
|
|
136
|
+
For example:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
import bq.cmds.process
|
|
140
|
+
from bq.container import Container
|
|
141
|
+
from bq.config import Config
|
|
142
|
+
|
|
143
|
+
container = Container()
|
|
144
|
+
container.wire(modules=[bq.cmds.process])
|
|
145
|
+
with container.config.override(
|
|
146
|
+
Config(
|
|
147
|
+
PROCESSOR_PACKAGES=["my_pkgs.processors"],
|
|
148
|
+
DATABASE_URL="postgresql://...",
|
|
149
|
+
BATCH_SIZE=10,
|
|
150
|
+
)
|
|
151
|
+
):
|
|
152
|
+
bq.cmds.process.process_tasks(channels=("images",))
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Many other behaviors of this framework can also be modified by overriding the container defined at [bq/container.py](bq/container.py).
|
|
156
|
+
|
|
157
|
+
## Why?
|
|
158
|
+
|
|
159
|
+
There are countless worker queue projects. Why make yet another one?
|
|
160
|
+
The primary issue with most worker queue tools is their reliance on a standalone broker server.
|
|
161
|
+
Our worker queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
|
|
162
|
+
However, integrating an external worker queue into the system presents a risk.
|
|
163
|
+
The worker queue and the database don't share the same data view, potentially compromising data integrity and reliability.
|
|
164
|
+
|
|
165
|
+
For example, you have a table of `images` to keep the user-uploaded images.
|
|
166
|
+
And you have a background worker queue for resizing the uploaded images into different thumbnail sizes.
|
|
167
|
+
So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the worker queue.
|
|
168
|
+
|
|
169
|
+
Say you push the task to the worker queue immediately after you insert the `images` table then commit like this:
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
1. Insert into the "images" table
|
|
173
|
+
2. Push resizing task to the worker queue
|
|
174
|
+
3. Commit db changes
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
While this might seem like the right way to do it, there's a hidden bug.
|
|
178
|
+
If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
|
|
179
|
+
One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
|
|
180
|
+
But this adds complexity to the system and also increases the latency if the first attempt fails.
|
|
181
|
+
Also, if the commit step fails, you will have a failed worker queue job trying to fetch a row from the database that will never exist.
|
|
182
|
+
|
|
183
|
+
Another approach is to push the resize task after the database changes are committed. It works like this:
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
1. Insert into the "images" table
|
|
187
|
+
2. Commit db changes
|
|
188
|
+
3. Push resizing task to the worker queue
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
With this approach, we don't need to worry about workers picking up the task too early.
|
|
192
|
+
However, there's another drawback.
|
|
193
|
+
If step 3 for pushing a new task to the worker queue fails, the newly inserted `images` row will never be processed.
|
|
194
|
+
There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the worker queue storage.
|
|
195
|
+
Things will be much easier if we have a worker queue that shares the same consistent view with the worker queue.
|
|
196
|
+
|
|
197
|
+
By using a database as the data storage, all the problems are gone.
|
|
198
|
+
You can simply do the following:
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
1. Insert into the "images" table
|
|
202
|
+
2. Insert the image resizing task into the `tasks` table
|
|
203
|
+
3. Commit db changes
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
It's all or nothing!
|
|
207
|
+
By doing so, you don't need to maintain another worker queue backend.
|
|
208
|
+
You are probably using a database anyway, so this worker queue comes for free.
|
|
209
|
+
|
|
210
|
+
Usually, a database is inefficient as the worker queues data storage because of the potential lock contention and the need for constant querying.
|
|
211
|
+
However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
|
|
212
|
+
|
|
213
|
+
This project is inspired by many of the SKIP-LOCKED-based worker queue successors.
|
|
214
|
+
Why don't we just use those existing tools?
|
|
215
|
+
Well, because while they work great as worker queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
|
|
216
|
+
Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
|
|
217
|
+
|
|
218
|
+
With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
|
|
219
|
+
Instead, we open it up to let the user insert the row and choose when and what to commit to the task.
|
|
220
|
+
|
|
221
|
+
## Sponsor
|
|
222
|
+
|
|
223
|
+
<p align="center">
|
|
224
|
+
<a href="https://beanhub.io"><img src="https://github.com/LaunchPlatform/bq/raw/master/assets/beanhub.svg?raw=true" alt="BeanHub logo" /></a>
|
|
225
|
+
</p>
|
|
226
|
+
|
|
227
|
+
A modern accounting book service based on the most popular open source version control system [Git](https://git-scm.com/) and text-based double entry accounting book software [Beancount](https://beancount.github.io/docs/index.html).
|
|
228
|
+
|
|
229
|
+
## Alternatives
|
|
230
|
+
|
|
231
|
+
- [solid_queue](https://github.com/rails/solid_queue)
|
|
232
|
+
- [postgres-tq](https://github.com/flix-tech/postgres-tq)
|
|
233
|
+
- [PgQueuer](https://github.com/janbjorge/PgQueuer)
|
|
234
|
+
- [hatchet](https://github.com/hatchet-dev/hatchet)
|
|
235
|
+
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# BeanQueue [](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
|
|
2
|
+
BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), [PostgreSQL SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
|
|
3
|
+
|
|
4
|
+
**Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
|
|
5
|
+
|
|
6
|
+
## Features
|
|
7
|
+
|
|
8
|
+
- **Super lightweight**: Under 1K lines
|
|
9
|
+
- **Easy-to-deploy**: Only rely on PostgreSQL
|
|
10
|
+
- **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
|
|
11
|
+
- **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
|
|
12
|
+
- **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is dead, the others will reschedule the tasks
|
|
13
|
+
- **Customizable**: Use it as an library and build your own worker queue
|
|
14
|
+
- **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install beanqueue
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
You can define a task processor like this
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from sqlalchemy.orm import Session
|
|
28
|
+
|
|
29
|
+
from bq.processors.registry import processor
|
|
30
|
+
from bq import models
|
|
31
|
+
from .. import my_models
|
|
32
|
+
from .. import image_utils
|
|
33
|
+
|
|
34
|
+
@processor(channel="images")
|
|
35
|
+
def resize_image(db: Session, task: models.Task, width: int, height: int):
|
|
36
|
+
image = db.query(my_models.Image).filter(my_models.Image.task == task).one()
|
|
37
|
+
image_utils.resize(image, size=(width, height))
|
|
38
|
+
db.add(image)
|
|
39
|
+
# by default the `processor` decorator has `auto_complete` flag turns on,
|
|
40
|
+
# so it will commit the db changes for us automatically
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
The `db` and `task` keyword arguments are optional.
|
|
44
|
+
If you don't need to access the task object, you can simply define the function without these two parameters.
|
|
45
|
+
|
|
46
|
+
To submit a task, you can either use `bq.models.Task` model object to construct the task object, insert into the
|
|
47
|
+
database session and commit.
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from bq import models
|
|
51
|
+
from .db import Session
|
|
52
|
+
from .. import my_models
|
|
53
|
+
|
|
54
|
+
db = Session()
|
|
55
|
+
task = models.Task(
|
|
56
|
+
channel="files",
|
|
57
|
+
module="my_pkgs.files.processors",
|
|
58
|
+
name="upload_to_s3_for_backup",
|
|
59
|
+
)
|
|
60
|
+
file = my_models.File(
|
|
61
|
+
task=task,
|
|
62
|
+
blob_name="...",
|
|
63
|
+
)
|
|
64
|
+
db.add(task)
|
|
65
|
+
db.add(file)
|
|
66
|
+
db.commit()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Or, you can use the `run` helper like this:
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from .processors import resize_image
|
|
73
|
+
from .db import Session
|
|
74
|
+
from .. import my_models
|
|
75
|
+
|
|
76
|
+
db = Session()
|
|
77
|
+
# a Task model generated for invoking resize_image function
|
|
78
|
+
task = resize_image.run(width=200, height=300)
|
|
79
|
+
# associate task with your own models
|
|
80
|
+
image = my_models.Image(task=task, blob_name="...")
|
|
81
|
+
db.add(image)
|
|
82
|
+
# we have Task model SQLALchemy event handler to send NOTIFY "<channel>" statement for you,
|
|
83
|
+
# so that the workers will be woken up immediately
|
|
84
|
+
db.add(task)
|
|
85
|
+
# commit will make the task visible to worker immediately
|
|
86
|
+
db.commit()
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
To run the worker, you can do this:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
BQ_PROCESSOR_PACKAGES='["my_pkgs.processors"]' python -m bq.cmds.process images
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
To submit a task for testing purpose, you can do
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
python -m bq.cmds.submit images my_pkgs.processors resize_image -k '{"width": 200, "height": 300}'
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
To create tables for BeanQueue, you can run
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
python -m bq.cmds.create_tables
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Configurations
|
|
108
|
+
|
|
109
|
+
Configurations can be modified by setting environment variables with `BQ_` prefix.
|
|
110
|
+
For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
|
|
111
|
+
To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
|
|
112
|
+
The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
|
|
113
|
+
For now, the configurations only affect command line tools.
|
|
114
|
+
|
|
115
|
+
If you want to configure BeanQueue programmatically for the command lines, you can override our [dependency-injector](https://python-dependency-injector.ets-labs.org/)'s container defined at [bq/container.py](bq/container.py) and call the command function manually.
|
|
116
|
+
For example:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
import bq.cmds.process
|
|
120
|
+
from bq.container import Container
|
|
121
|
+
from bq.config import Config
|
|
122
|
+
|
|
123
|
+
container = Container()
|
|
124
|
+
container.wire(modules=[bq.cmds.process])
|
|
125
|
+
with container.config.override(
|
|
126
|
+
Config(
|
|
127
|
+
PROCESSOR_PACKAGES=["my_pkgs.processors"],
|
|
128
|
+
DATABASE_URL="postgresql://...",
|
|
129
|
+
BATCH_SIZE=10,
|
|
130
|
+
)
|
|
131
|
+
):
|
|
132
|
+
bq.cmds.process.process_tasks(channels=("images",))
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Many other behaviors of this framework can also be modified by overriding the container defined at [bq/container.py](bq/container.py).
|
|
136
|
+
|
|
137
|
+
## Why?
|
|
138
|
+
|
|
139
|
+
There are countless worker queue projects. Why make yet another one?
|
|
140
|
+
The primary issue with most worker queue tools is their reliance on a standalone broker server.
|
|
141
|
+
Our worker queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
|
|
142
|
+
However, integrating an external worker queue into the system presents a risk.
|
|
143
|
+
The worker queue and the database don't share the same data view, potentially compromising data integrity and reliability.
|
|
144
|
+
|
|
145
|
+
For example, you have a table of `images` to keep the user-uploaded images.
|
|
146
|
+
And you have a background worker queue for resizing the uploaded images into different thumbnail sizes.
|
|
147
|
+
So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the worker queue.
|
|
148
|
+
|
|
149
|
+
Say you push the task to the worker queue immediately after you insert the `images` table then commit like this:
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
1. Insert into the "images" table
|
|
153
|
+
2. Push resizing task to the worker queue
|
|
154
|
+
3. Commit db changes
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
While this might seem like the right way to do it, there's a hidden bug.
|
|
158
|
+
If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
|
|
159
|
+
One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
|
|
160
|
+
But this adds complexity to the system and also increases the latency if the first attempt fails.
|
|
161
|
+
Also, if the commit step fails, you will have a failed worker queue job trying to fetch a row from the database that will never exist.
|
|
162
|
+
|
|
163
|
+
Another approach is to push the resize task after the database changes are committed. It works like this:
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
1. Insert into the "images" table
|
|
167
|
+
2. Commit db changes
|
|
168
|
+
3. Push resizing task to the worker queue
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
With this approach, we don't need to worry about workers picking up the task too early.
|
|
172
|
+
However, there's another drawback.
|
|
173
|
+
If step 3 for pushing a new task to the worker queue fails, the newly inserted `images` row will never be processed.
|
|
174
|
+
There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the worker queue storage.
|
|
175
|
+
Things will be much easier if we have a worker queue that shares the same consistent view with the worker queue.
|
|
176
|
+
|
|
177
|
+
By using a database as the data storage, all the problems are gone.
|
|
178
|
+
You can simply do the following:
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
1. Insert into the "images" table
|
|
182
|
+
2. Insert the image resizing task into the `tasks` table
|
|
183
|
+
3. Commit db changes
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
It's all or nothing!
|
|
187
|
+
By doing so, you don't need to maintain another worker queue backend.
|
|
188
|
+
You are probably using a database anyway, so this worker queue comes for free.
|
|
189
|
+
|
|
190
|
+
Usually, a database is inefficient as the worker queues data storage because of the potential lock contention and the need for constant querying.
|
|
191
|
+
However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
|
|
192
|
+
|
|
193
|
+
This project is inspired by many of the SKIP-LOCKED-based worker queue successors.
|
|
194
|
+
Why don't we just use those existing tools?
|
|
195
|
+
Well, because while they work great as worker queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
|
|
196
|
+
Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
|
|
197
|
+
|
|
198
|
+
With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
|
|
199
|
+
Instead, we open it up to let the user insert the row and choose when and what to commit to the task.
|
|
200
|
+
|
|
201
|
+
## Sponsor
|
|
202
|
+
|
|
203
|
+
<p align="center">
|
|
204
|
+
<a href="https://beanhub.io"><img src="https://github.com/LaunchPlatform/bq/raw/master/assets/beanhub.svg?raw=true" alt="BeanHub logo" /></a>
|
|
205
|
+
</p>
|
|
206
|
+
|
|
207
|
+
A modern accounting book service based on the most popular open source version control system [Git](https://git-scm.com/) and text-based double entry accounting book software [Beancount](https://beancount.github.io/docs/index.html).
|
|
208
|
+
|
|
209
|
+
## Alternatives
|
|
210
|
+
|
|
211
|
+
- [solid_queue](https://github.com/rails/solid_queue)
|
|
212
|
+
- [postgres-tq](https://github.com/flix-tech/postgres-tq)
|
|
213
|
+
- [PgQueuer](https://github.com/janbjorge/PgQueuer)
|
|
214
|
+
- [hatchet](https://github.com/hatchet-dev/hatchet)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from sqlalchemy.orm import DeclarativeBase
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Base(DeclarativeBase):
|
|
5
|
-
pass
|
|
1
|
+
from sqlalchemy.orm import DeclarativeBase
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Base(DeclarativeBase):
|
|
5
|
+
pass
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from sqlalchemy.orm import scoped_session
|
|
2
|
-
from sqlalchemy.orm import sessionmaker
|
|
3
|
-
|
|
4
|
-
SessionMaker = sessionmaker()
|
|
5
|
-
Session = scoped_session(SessionMaker)
|
|
1
|
+
from sqlalchemy.orm import scoped_session
|
|
2
|
+
from sqlalchemy.orm import sessionmaker
|
|
3
|
+
|
|
4
|
+
SessionMaker = sessionmaker()
|
|
5
|
+
Session = scoped_session(SessionMaker)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .task import Task
|
|
2
|
-
from .task import TaskState
|
|
3
|
-
from .worker import Worker
|
|
4
|
-
from .worker import WorkerState
|
|
1
|
+
from .task import Task
|
|
2
|
+
from .task import TaskState
|
|
3
|
+
from .worker import Worker
|
|
4
|
+
from .worker import WorkerState
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import typing
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def make_repr_attrs(items: typing.Sequence[typing.Tuple[str, typing.Any]]) -> str:
|
|
5
|
-
return " ".join(map(lambda item: "=".join([item[0], str(item[1])]), items))
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def make_repr_attrs(items: typing.Sequence[typing.Tuple[str, typing.Any]]) -> str:
|
|
5
|
+
return " ".join(map(lambda item: "=".join([item[0], str(item[1])]), items))
|
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
import enum
|
|
2
|
-
|
|
3
|
-
from sqlalchemy import Column
|
|
4
|
-
from sqlalchemy import DateTime
|
|
5
|
-
from sqlalchemy import Enum
|
|
6
|
-
from sqlalchemy import func
|
|
7
|
-
from sqlalchemy import String
|
|
8
|
-
from sqlalchemy.dialects.postgresql import ARRAY
|
|
9
|
-
from sqlalchemy.dialects.postgresql import UUID
|
|
10
|
-
from sqlalchemy.orm import relationship
|
|
11
|
-
|
|
12
|
-
from ..db.base import Base
|
|
13
|
-
from .helpers import make_repr_attrs
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class WorkerState(enum.Enum):
|
|
17
|
-
# the worker is running
|
|
18
|
-
RUNNING = "RUNNING"
|
|
19
|
-
# the worker shuts down normally
|
|
20
|
-
SHUTDOWN = "SHUTDOWN"
|
|
21
|
-
# The worker has no heartbeat for a while
|
|
22
|
-
NO_HEARTBEAT = "NO_HEARTBEAT"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class Worker(Base):
|
|
26
|
-
id = Column(
|
|
27
|
-
UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()
|
|
28
|
-
)
|
|
29
|
-
# current state of the worker
|
|
30
|
-
state = Column(
|
|
31
|
-
Enum(WorkerState),
|
|
32
|
-
nullable=False,
|
|
33
|
-
default=WorkerState.RUNNING,
|
|
34
|
-
server_default=WorkerState.RUNNING.value,
|
|
35
|
-
index=True,
|
|
36
|
-
)
|
|
37
|
-
# name of the worker
|
|
38
|
-
name = Column(String, nullable=False)
|
|
39
|
-
# the channels we are processing
|
|
40
|
-
channels = Column(ARRAY(String), nullable=False)
|
|
41
|
-
# last heartbeat of this worker
|
|
42
|
-
last_heartbeat = Column(
|
|
43
|
-
DateTime(timezone=True),
|
|
44
|
-
nullable=False,
|
|
45
|
-
server_default=func.now(),
|
|
46
|
-
index=True,
|
|
47
|
-
)
|
|
48
|
-
# created datetime of the worker
|
|
49
|
-
created_at = Column(
|
|
50
|
-
DateTime(timezone=True), nullable=False, server_default=func.now()
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
tasks = relationship(
|
|
54
|
-
"Task",
|
|
55
|
-
back_populates="worker",
|
|
56
|
-
cascade="all,delete",
|
|
57
|
-
order_by="Task.created_at",
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
__tablename__ = "bq_workers"
|
|
61
|
-
|
|
62
|
-
def __repr__(self) -> str:
|
|
63
|
-
items = [
|
|
64
|
-
("id", self.id),
|
|
65
|
-
("name", self.name),
|
|
66
|
-
("channels", self.channels),
|
|
67
|
-
("state", self.state),
|
|
68
|
-
]
|
|
69
|
-
return f"<{self.__class__.__name__} {make_repr_attrs(items)}>"
|
|
1
|
+
import enum
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import Column
|
|
4
|
+
from sqlalchemy import DateTime
|
|
5
|
+
from sqlalchemy import Enum
|
|
6
|
+
from sqlalchemy import func
|
|
7
|
+
from sqlalchemy import String
|
|
8
|
+
from sqlalchemy.dialects.postgresql import ARRAY
|
|
9
|
+
from sqlalchemy.dialects.postgresql import UUID
|
|
10
|
+
from sqlalchemy.orm import relationship
|
|
11
|
+
|
|
12
|
+
from ..db.base import Base
|
|
13
|
+
from .helpers import make_repr_attrs
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class WorkerState(enum.Enum):
|
|
17
|
+
# the worker is running
|
|
18
|
+
RUNNING = "RUNNING"
|
|
19
|
+
# the worker shuts down normally
|
|
20
|
+
SHUTDOWN = "SHUTDOWN"
|
|
21
|
+
# The worker has no heartbeat for a while
|
|
22
|
+
NO_HEARTBEAT = "NO_HEARTBEAT"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Worker(Base):
|
|
26
|
+
id = Column(
|
|
27
|
+
UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()
|
|
28
|
+
)
|
|
29
|
+
# current state of the worker
|
|
30
|
+
state = Column(
|
|
31
|
+
Enum(WorkerState),
|
|
32
|
+
nullable=False,
|
|
33
|
+
default=WorkerState.RUNNING,
|
|
34
|
+
server_default=WorkerState.RUNNING.value,
|
|
35
|
+
index=True,
|
|
36
|
+
)
|
|
37
|
+
# name of the worker
|
|
38
|
+
name = Column(String, nullable=False)
|
|
39
|
+
# the channels we are processing
|
|
40
|
+
channels = Column(ARRAY(String), nullable=False)
|
|
41
|
+
# last heartbeat of this worker
|
|
42
|
+
last_heartbeat = Column(
|
|
43
|
+
DateTime(timezone=True),
|
|
44
|
+
nullable=False,
|
|
45
|
+
server_default=func.now(),
|
|
46
|
+
index=True,
|
|
47
|
+
)
|
|
48
|
+
# created datetime of the worker
|
|
49
|
+
created_at = Column(
|
|
50
|
+
DateTime(timezone=True), nullable=False, server_default=func.now()
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
tasks = relationship(
|
|
54
|
+
"Task",
|
|
55
|
+
back_populates="worker",
|
|
56
|
+
cascade="all,delete",
|
|
57
|
+
order_by="Task.created_at",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
__tablename__ = "bq_workers"
|
|
61
|
+
|
|
62
|
+
def __repr__(self) -> str:
|
|
63
|
+
items = [
|
|
64
|
+
("id", self.id),
|
|
65
|
+
("name", self.name),
|
|
66
|
+
("channels", self.channels),
|
|
67
|
+
("state", self.state),
|
|
68
|
+
]
|
|
69
|
+
return f"<{self.__class__.__name__} {make_repr_attrs(items)}>"
|
|
@@ -1,136 +1,136 @@
|
|
|
1
|
-
import collections
|
|
2
|
-
import dataclasses
|
|
3
|
-
import inspect
|
|
4
|
-
import logging
|
|
5
|
-
import typing
|
|
6
|
-
|
|
7
|
-
import venusian
|
|
8
|
-
from sqlalchemy.orm import object_session
|
|
9
|
-
|
|
10
|
-
from bq import models
|
|
11
|
-
|
|
12
|
-
BQ_PROCESSOR_CATEGORY = "bq_processor"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclasses.dataclass(frozen=True)
|
|
16
|
-
class Processor:
|
|
17
|
-
channel: str
|
|
18
|
-
module: str
|
|
19
|
-
name: str
|
|
20
|
-
func: typing.Callable
|
|
21
|
-
# should we auto complete the task or not
|
|
22
|
-
auto_complete: bool = True
|
|
23
|
-
# should we auto rollback the transaction when encounter unhandled exception
|
|
24
|
-
auto_rollback_on_exc: bool = True
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class ProcessorHelper:
|
|
28
|
-
def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
|
|
29
|
-
self._processor = processor
|
|
30
|
-
self._task_cls = task_cls
|
|
31
|
-
|
|
32
|
-
def __call__(self, *args, **kwargs):
|
|
33
|
-
return self._processor.func(*args, **kwargs)
|
|
34
|
-
|
|
35
|
-
def run(self, **kwargs) -> models.Task:
|
|
36
|
-
return self._task_cls(
|
|
37
|
-
channel=self._processor.channel,
|
|
38
|
-
module=self._processor.module,
|
|
39
|
-
func_name=self._processor.name,
|
|
40
|
-
kwargs=kwargs,
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def process_task(task: models.Task, processor: Processor):
|
|
45
|
-
logger = logging.getLogger(__name__)
|
|
46
|
-
db = object_session(task)
|
|
47
|
-
func_signature = inspect.signature(processor.func)
|
|
48
|
-
base_kwargs = {}
|
|
49
|
-
if "task" in func_signature.parameters:
|
|
50
|
-
base_kwargs["task"] = task
|
|
51
|
-
if "db" in func_signature.parameters:
|
|
52
|
-
base_kwargs["db"] = db
|
|
53
|
-
with db.begin_nested() as savepoint:
|
|
54
|
-
try:
|
|
55
|
-
result = processor.func(**base_kwargs, **task.kwargs)
|
|
56
|
-
savepoint.commit()
|
|
57
|
-
except Exception as exc:
|
|
58
|
-
logger.error("Unhandled exception for task %s", task.id, exc_info=True)
|
|
59
|
-
if processor.auto_rollback_on_exc:
|
|
60
|
-
savepoint.rollback()
|
|
61
|
-
# TODO: add error event
|
|
62
|
-
task.state = models.TaskState.FAILED
|
|
63
|
-
task.error_message = str(exc)
|
|
64
|
-
db.add(task)
|
|
65
|
-
return
|
|
66
|
-
if processor.auto_complete:
|
|
67
|
-
logger.info("Task %s auto complete", task.id)
|
|
68
|
-
task.state = models.TaskState.DONE
|
|
69
|
-
task.result = result
|
|
70
|
-
db.add(task)
|
|
71
|
-
return result
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class Registry:
|
|
75
|
-
def __init__(self):
|
|
76
|
-
self.logger = logging.getLogger(__name__)
|
|
77
|
-
self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
|
|
78
|
-
|
|
79
|
-
def add(self, processor: Processor):
|
|
80
|
-
self.processors[processor.channel][processor.module][processor.name] = processor
|
|
81
|
-
|
|
82
|
-
def process(self, task: models.Task) -> typing.Any:
|
|
83
|
-
modules = self.processors.get(task.channel, {})
|
|
84
|
-
functions = modules.get(task.module, {})
|
|
85
|
-
processor = functions.get(task.func_name)
|
|
86
|
-
db = object_session(task)
|
|
87
|
-
if processor is None:
|
|
88
|
-
self.logger.error(
|
|
89
|
-
"Cannot find processor for task %s with module=%s, func=%s",
|
|
90
|
-
task.id,
|
|
91
|
-
task.module,
|
|
92
|
-
task.func_name,
|
|
93
|
-
)
|
|
94
|
-
# TODO: add error event
|
|
95
|
-
task.state = models.TaskState.FAILED
|
|
96
|
-
task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
|
|
97
|
-
db.add(task)
|
|
98
|
-
return
|
|
99
|
-
return process_task(task, processor)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def processor(
|
|
103
|
-
channel: str,
|
|
104
|
-
auto_complete: bool = True,
|
|
105
|
-
auto_rollback_on_exc: bool = True,
|
|
106
|
-
task_cls: typing.Type = models.Task,
|
|
107
|
-
) -> typing.Callable:
|
|
108
|
-
def decorator(wrapped: typing.Callable):
|
|
109
|
-
processor = Processor(
|
|
110
|
-
module=wrapped.__module__,
|
|
111
|
-
name=wrapped.__name__,
|
|
112
|
-
channel=channel,
|
|
113
|
-
func=wrapped,
|
|
114
|
-
auto_complete=auto_complete,
|
|
115
|
-
auto_rollback_on_exc=auto_rollback_on_exc,
|
|
116
|
-
)
|
|
117
|
-
helper_obj = ProcessorHelper(processor, task_cls=task_cls)
|
|
118
|
-
|
|
119
|
-
def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
|
|
120
|
-
if processor.name != name:
|
|
121
|
-
raise ValueError("Name is not the same")
|
|
122
|
-
scanner.registry.add(processor)
|
|
123
|
-
|
|
124
|
-
venusian.attach(helper_obj, callback, category=BQ_PROCESSOR_CATEGORY)
|
|
125
|
-
return helper_obj
|
|
126
|
-
|
|
127
|
-
return decorator
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
|
|
131
|
-
if registry is None:
|
|
132
|
-
registry = Registry()
|
|
133
|
-
scanner = venusian.Scanner(registry=registry)
|
|
134
|
-
for package in packages:
|
|
135
|
-
scanner.scan(package, categories=(BQ_PROCESSOR_CATEGORY,))
|
|
136
|
-
return registry
|
|
1
|
+
import collections
|
|
2
|
+
import dataclasses
|
|
3
|
+
import inspect
|
|
4
|
+
import logging
|
|
5
|
+
import typing
|
|
6
|
+
|
|
7
|
+
import venusian
|
|
8
|
+
from sqlalchemy.orm import object_session
|
|
9
|
+
|
|
10
|
+
from bq import models
|
|
11
|
+
|
|
12
|
+
BQ_PROCESSOR_CATEGORY = "bq_processor"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclasses.dataclass(frozen=True)
|
|
16
|
+
class Processor:
|
|
17
|
+
channel: str
|
|
18
|
+
module: str
|
|
19
|
+
name: str
|
|
20
|
+
func: typing.Callable
|
|
21
|
+
# should we auto complete the task or not
|
|
22
|
+
auto_complete: bool = True
|
|
23
|
+
# should we auto rollback the transaction when encounter unhandled exception
|
|
24
|
+
auto_rollback_on_exc: bool = True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ProcessorHelper:
|
|
28
|
+
def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
|
|
29
|
+
self._processor = processor
|
|
30
|
+
self._task_cls = task_cls
|
|
31
|
+
|
|
32
|
+
def __call__(self, *args, **kwargs):
|
|
33
|
+
return self._processor.func(*args, **kwargs)
|
|
34
|
+
|
|
35
|
+
def run(self, **kwargs) -> models.Task:
|
|
36
|
+
return self._task_cls(
|
|
37
|
+
channel=self._processor.channel,
|
|
38
|
+
module=self._processor.module,
|
|
39
|
+
func_name=self._processor.name,
|
|
40
|
+
kwargs=kwargs,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def process_task(task: models.Task, processor: Processor):
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
db = object_session(task)
|
|
47
|
+
func_signature = inspect.signature(processor.func)
|
|
48
|
+
base_kwargs = {}
|
|
49
|
+
if "task" in func_signature.parameters:
|
|
50
|
+
base_kwargs["task"] = task
|
|
51
|
+
if "db" in func_signature.parameters:
|
|
52
|
+
base_kwargs["db"] = db
|
|
53
|
+
with db.begin_nested() as savepoint:
|
|
54
|
+
try:
|
|
55
|
+
result = processor.func(**base_kwargs, **task.kwargs)
|
|
56
|
+
savepoint.commit()
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
logger.error("Unhandled exception for task %s", task.id, exc_info=True)
|
|
59
|
+
if processor.auto_rollback_on_exc:
|
|
60
|
+
savepoint.rollback()
|
|
61
|
+
# TODO: add error event
|
|
62
|
+
task.state = models.TaskState.FAILED
|
|
63
|
+
task.error_message = str(exc)
|
|
64
|
+
db.add(task)
|
|
65
|
+
return
|
|
66
|
+
if processor.auto_complete:
|
|
67
|
+
logger.info("Task %s auto complete", task.id)
|
|
68
|
+
task.state = models.TaskState.DONE
|
|
69
|
+
task.result = result
|
|
70
|
+
db.add(task)
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class Registry:
|
|
75
|
+
def __init__(self):
|
|
76
|
+
self.logger = logging.getLogger(__name__)
|
|
77
|
+
self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
|
|
78
|
+
|
|
79
|
+
def add(self, processor: Processor):
|
|
80
|
+
self.processors[processor.channel][processor.module][processor.name] = processor
|
|
81
|
+
|
|
82
|
+
def process(self, task: models.Task) -> typing.Any:
|
|
83
|
+
modules = self.processors.get(task.channel, {})
|
|
84
|
+
functions = modules.get(task.module, {})
|
|
85
|
+
processor = functions.get(task.func_name)
|
|
86
|
+
db = object_session(task)
|
|
87
|
+
if processor is None:
|
|
88
|
+
self.logger.error(
|
|
89
|
+
"Cannot find processor for task %s with module=%s, func=%s",
|
|
90
|
+
task.id,
|
|
91
|
+
task.module,
|
|
92
|
+
task.func_name,
|
|
93
|
+
)
|
|
94
|
+
# TODO: add error event
|
|
95
|
+
task.state = models.TaskState.FAILED
|
|
96
|
+
task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
|
|
97
|
+
db.add(task)
|
|
98
|
+
return
|
|
99
|
+
return process_task(task, processor)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def processor(
|
|
103
|
+
channel: str,
|
|
104
|
+
auto_complete: bool = True,
|
|
105
|
+
auto_rollback_on_exc: bool = True,
|
|
106
|
+
task_cls: typing.Type = models.Task,
|
|
107
|
+
) -> typing.Callable:
|
|
108
|
+
def decorator(wrapped: typing.Callable):
|
|
109
|
+
processor = Processor(
|
|
110
|
+
module=wrapped.__module__,
|
|
111
|
+
name=wrapped.__name__,
|
|
112
|
+
channel=channel,
|
|
113
|
+
func=wrapped,
|
|
114
|
+
auto_complete=auto_complete,
|
|
115
|
+
auto_rollback_on_exc=auto_rollback_on_exc,
|
|
116
|
+
)
|
|
117
|
+
helper_obj = ProcessorHelper(processor, task_cls=task_cls)
|
|
118
|
+
|
|
119
|
+
def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
|
|
120
|
+
if processor.name != name:
|
|
121
|
+
raise ValueError("Name is not the same")
|
|
122
|
+
scanner.registry.add(processor)
|
|
123
|
+
|
|
124
|
+
venusian.attach(helper_obj, callback, category=BQ_PROCESSOR_CATEGORY)
|
|
125
|
+
return helper_obj
|
|
126
|
+
|
|
127
|
+
return decorator
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
|
|
131
|
+
if registry is None:
|
|
132
|
+
registry = Registry()
|
|
133
|
+
scanner = venusian.Scanner(registry=registry)
|
|
134
|
+
for package in packages:
|
|
135
|
+
scanner.scan(package, categories=(BQ_PROCESSOR_CATEGORY,))
|
|
136
|
+
return registry
|
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
import datetime
|
|
2
|
-
import typing
|
|
3
|
-
|
|
4
|
-
from sqlalchemy import func
|
|
5
|
-
from sqlalchemy.orm import Query
|
|
6
|
-
from sqlalchemy.orm import Session
|
|
7
|
-
|
|
8
|
-
from .. import models
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class WorkerService:
|
|
12
|
-
def __init__(self, session: Session):
|
|
13
|
-
self.session = session
|
|
14
|
-
|
|
15
|
-
def update_heartbeat(self, worker: models.Worker):
|
|
16
|
-
worker.last_heartbeat = func.now()
|
|
17
|
-
self.session.add(worker)
|
|
18
|
-
|
|
19
|
-
def make_dead_worker_query(self, timeout: int, limit: int = 5) -> Query:
|
|
20
|
-
return (
|
|
21
|
-
self.session.query(models.Worker.id)
|
|
22
|
-
.filter(
|
|
23
|
-
models.Worker.last_heartbeat
|
|
24
|
-
< (func.now() - datetime.timedelta(seconds=timeout))
|
|
25
|
-
)
|
|
26
|
-
.filter(models.Worker.state == models.WorkerState.RUNNING)
|
|
27
|
-
.limit(limit)
|
|
28
|
-
.with_for_update(skip_locked=True)
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
def make_update_dead_worker_query(self, worker_query: typing.Any):
|
|
32
|
-
return (
|
|
33
|
-
models.Worker.__table__.update()
|
|
34
|
-
.where(models.Worker.id.in_(worker_query))
|
|
35
|
-
.values(
|
|
36
|
-
state=models.WorkerState.NO_HEARTBEAT,
|
|
37
|
-
)
|
|
38
|
-
.returning(models.Worker.id)
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
def fetch_dead_workers(self, timeout: int, limit: int = 5) -> Query:
|
|
42
|
-
dead_worker_query = self.make_dead_worker_query(timeout=timeout, limit=limit)
|
|
43
|
-
dead_worker_subquery = dead_worker_query.scalar_subquery()
|
|
44
|
-
worker_ids = [
|
|
45
|
-
item[0]
|
|
46
|
-
for item in self.session.execute(
|
|
47
|
-
self.make_update_dead_worker_query(dead_worker_subquery)
|
|
48
|
-
)
|
|
49
|
-
]
|
|
50
|
-
# TODO: ideally returning with (models.Task) should return the whole model, but SQLAlchemy is returning
|
|
51
|
-
# it columns in rows. We can save a round trip if we can find out how to solve this
|
|
52
|
-
return self.session.query(models.Worker).filter(
|
|
53
|
-
models.Worker.id.in_(worker_ids)
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
def make_update_tasks_query(self, worker_query: typing.Any):
|
|
57
|
-
return (
|
|
58
|
-
models.Task.__table__.update()
|
|
59
|
-
.where(models.Task.worker_id.in_(worker_query))
|
|
60
|
-
.where(models.Task.state == models.TaskState.PROCESSING)
|
|
61
|
-
.values(
|
|
62
|
-
state=models.TaskState.PENDING,
|
|
63
|
-
)
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
def reschedule_dead_tasks(self, worker_query: typing.Any) -> int:
|
|
67
|
-
update_dead_task_query = self.make_update_tasks_query(worker_query=worker_query)
|
|
68
|
-
res = self.session.execute(update_dead_task_query)
|
|
69
|
-
return res.rowcount
|
|
1
|
+
import datetime
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import func
|
|
5
|
+
from sqlalchemy.orm import Query
|
|
6
|
+
from sqlalchemy.orm import Session
|
|
7
|
+
|
|
8
|
+
from .. import models
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class WorkerService:
|
|
12
|
+
def __init__(self, session: Session):
|
|
13
|
+
self.session = session
|
|
14
|
+
|
|
15
|
+
def update_heartbeat(self, worker: models.Worker):
|
|
16
|
+
worker.last_heartbeat = func.now()
|
|
17
|
+
self.session.add(worker)
|
|
18
|
+
|
|
19
|
+
def make_dead_worker_query(self, timeout: int, limit: int = 5) -> Query:
|
|
20
|
+
return (
|
|
21
|
+
self.session.query(models.Worker.id)
|
|
22
|
+
.filter(
|
|
23
|
+
models.Worker.last_heartbeat
|
|
24
|
+
< (func.now() - datetime.timedelta(seconds=timeout))
|
|
25
|
+
)
|
|
26
|
+
.filter(models.Worker.state == models.WorkerState.RUNNING)
|
|
27
|
+
.limit(limit)
|
|
28
|
+
.with_for_update(skip_locked=True)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def make_update_dead_worker_query(self, worker_query: typing.Any):
|
|
32
|
+
return (
|
|
33
|
+
models.Worker.__table__.update()
|
|
34
|
+
.where(models.Worker.id.in_(worker_query))
|
|
35
|
+
.values(
|
|
36
|
+
state=models.WorkerState.NO_HEARTBEAT,
|
|
37
|
+
)
|
|
38
|
+
.returning(models.Worker.id)
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def fetch_dead_workers(self, timeout: int, limit: int = 5) -> Query:
|
|
42
|
+
dead_worker_query = self.make_dead_worker_query(timeout=timeout, limit=limit)
|
|
43
|
+
dead_worker_subquery = dead_worker_query.scalar_subquery()
|
|
44
|
+
worker_ids = [
|
|
45
|
+
item[0]
|
|
46
|
+
for item in self.session.execute(
|
|
47
|
+
self.make_update_dead_worker_query(dead_worker_subquery)
|
|
48
|
+
)
|
|
49
|
+
]
|
|
50
|
+
# TODO: ideally returning with (models.Task) should return the whole model, but SQLAlchemy is returning
|
|
51
|
+
# it columns in rows. We can save a round trip if we can find out how to solve this
|
|
52
|
+
return self.session.query(models.Worker).filter(
|
|
53
|
+
models.Worker.id.in_(worker_ids)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def make_update_tasks_query(self, worker_query: typing.Any):
|
|
57
|
+
return (
|
|
58
|
+
models.Task.__table__.update()
|
|
59
|
+
.where(models.Task.worker_id.in_(worker_query))
|
|
60
|
+
.where(models.Task.state == models.TaskState.PROCESSING)
|
|
61
|
+
.values(
|
|
62
|
+
state=models.TaskState.PENDING,
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def reschedule_dead_tasks(self, worker_query: typing.Any) -> int:
|
|
67
|
+
update_dead_task_query = self.make_update_tasks_query(worker_query=worker_query)
|
|
68
|
+
res = self.session.execute(update_dead_task_query)
|
|
69
|
+
return res.rowcount
|
beanqueue-0.1.0/PKG-INFO
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: beanqueue
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
|
|
5
|
-
License: MIT
|
|
6
|
-
Author: Fang-Pen Lin
|
|
7
|
-
Author-email: fangpen@launchplatform.com
|
|
8
|
-
Requires-Python: >=3.11,<4.0
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
-
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
14
|
-
Requires-Dist: dependency-injector (>=4.41.0,<5.0.0)
|
|
15
|
-
Requires-Dist: pg-activity (>=3.5.1,<4.0.0)
|
|
16
|
-
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
|
17
|
-
Requires-Dist: sqlalchemy (>=2.0.30,<3.0.0)
|
|
18
|
-
Requires-Dist: venusian (>=3.1.0,<4.0.0)
|
|
19
|
-
Description-Content-Type: text/markdown
|
|
20
|
-
|
|
21
|
-
# bq
|
|
22
|
-
BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
|
|
23
|
-
|
beanqueue-0.1.0/README.md
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|