beanqueue 0.1.3__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {beanqueue-0.1.3 → beanqueue-0.2.1}/PKG-INFO +20 -10
- {beanqueue-0.1.3 → beanqueue-0.2.1}/README.md +18 -8
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/__init__.py +1 -2
- beanqueue-0.2.1/bq/app.py +330 -0
- beanqueue-0.2.1/bq/cmds/create_tables.py +26 -0
- beanqueue-0.2.1/bq/cmds/process.py +23 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/cmds/submit.py +9 -9
- beanqueue-0.2.1/bq/cmds/utils.py +14 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/config.py +9 -0
- beanqueue-0.2.1/bq/events.py +3 -0
- beanqueue-0.2.1/bq/processors/processor.py +70 -0
- beanqueue-0.2.1/bq/processors/registry.py +47 -0
- beanqueue-0.2.1/bq/utils.py +8 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/pyproject.toml +2 -2
- beanqueue-0.1.3/bq/cmds/create_tables.py +0 -25
- beanqueue-0.1.3/bq/cmds/process.py +0 -188
- beanqueue-0.1.3/bq/container.py +0 -77
- beanqueue-0.1.3/bq/processors/registry.py +0 -136
- {beanqueue-0.1.3 → beanqueue-0.2.1}/LICENSE +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/cmds/__init__.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/constants.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/db/__init__.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/db/base.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/db/session.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/__init__.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/helpers.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/task.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/worker.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/processors/__init__.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/services/__init__.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/services/dispatch.py +0 -0
- {beanqueue-0.1.3 → beanqueue-0.2.1}/bq/services/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: beanqueue
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Fang-Pen Lin
|
|
@@ -10,8 +10,8 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: blinker (>=1.8.2,<2.0.0)
|
|
13
14
|
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
14
|
-
Requires-Dist: dependency-injector (>=4.41.0,<5.0.0)
|
|
15
15
|
Requires-Dist: pg-activity (>=3.5.1,<4.0.0)
|
|
16
16
|
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
|
17
17
|
Requires-Dist: sqlalchemy (>=2.0.30,<3.0.0)
|
|
@@ -50,7 +50,9 @@ import bq
|
|
|
50
50
|
from .. import models
|
|
51
51
|
from .. import image_utils
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
app = bq.BeanQueue()
|
|
54
|
+
|
|
55
|
+
@app.processor(channel="images")
|
|
54
56
|
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
55
57
|
image = db.query(models.Image).filter(models.Image.task == task).one()
|
|
56
58
|
image_utils.resize(image, size=(width, height))
|
|
@@ -61,6 +63,7 @@ def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
|
61
63
|
|
|
62
64
|
The `db` and `task` keyword arguments are optional.
|
|
63
65
|
If you don't need to access the task object, you can simply define the function without these two parameters.
|
|
66
|
+
We also provide an optional `savepoint` argument in case if you want to rollback database changes you made.
|
|
64
67
|
|
|
65
68
|
To submit a task, you can either use `bq.Task` model object to construct the task object, insert into the
|
|
66
69
|
database session and commit.
|
|
@@ -130,14 +133,12 @@ Configurations can be modified by setting environment variables with `BQ_` prefi
|
|
|
130
133
|
For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
|
|
131
134
|
To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
|
|
132
135
|
The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
|
|
133
|
-
For now, the configurations only affect command line tools.
|
|
134
136
|
|
|
135
|
-
If you want to configure BeanQueue programmatically
|
|
137
|
+
If you want to configure BeanQueue programmatically, you can pass in `Config` object to the `bq.BeanQueue` object when creating.
|
|
136
138
|
For example:
|
|
137
139
|
|
|
138
140
|
```python
|
|
139
141
|
import bq
|
|
140
|
-
from bq.cmds.process import process_tasks
|
|
141
142
|
from .my_config import config
|
|
142
143
|
|
|
143
144
|
container = bq.Container()
|
|
@@ -147,11 +148,20 @@ config = bq.Config(
|
|
|
147
148
|
DATABASE_URL=str(config.DATABASE_URL),
|
|
148
149
|
BATCH_SIZE=10,
|
|
149
150
|
)
|
|
150
|
-
|
|
151
|
-
|
|
151
|
+
app = bq.BeanQueue(config=config)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Then you can pass `--app` argument (or `-a` for short) pointing to the app object to the process command like this:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
python -m bq.cmds.process -a my_pkgs.bq.app images
|
|
152
158
|
```
|
|
153
159
|
|
|
154
|
-
|
|
160
|
+
Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
app.process_tasks(channels=("images",))
|
|
164
|
+
```
|
|
155
165
|
|
|
156
166
|
### Define your own tables
|
|
157
167
|
|
|
@@ -231,7 +241,7 @@ config = bq.Config(
|
|
|
231
241
|
WORKER_MODEL="my_pkgs.models.Worker",
|
|
232
242
|
# ... other configs
|
|
233
243
|
)
|
|
234
|
-
|
|
244
|
+
app = bq.BeanQueue(config)
|
|
235
245
|
```
|
|
236
246
|
|
|
237
247
|
## Why?
|
|
@@ -30,7 +30,9 @@ import bq
|
|
|
30
30
|
from .. import models
|
|
31
31
|
from .. import image_utils
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
app = bq.BeanQueue()
|
|
34
|
+
|
|
35
|
+
@app.processor(channel="images")
|
|
34
36
|
def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
35
37
|
image = db.query(models.Image).filter(models.Image.task == task).one()
|
|
36
38
|
image_utils.resize(image, size=(width, height))
|
|
@@ -41,6 +43,7 @@ def resize_image(db: Session, task: bq.Task, width: int, height: int):
|
|
|
41
43
|
|
|
42
44
|
The `db` and `task` keyword arguments are optional.
|
|
43
45
|
If you don't need to access the task object, you can simply define the function without these two parameters.
|
|
46
|
+
We also provide an optional `savepoint` argument in case if you want to rollback database changes you made.
|
|
44
47
|
|
|
45
48
|
To submit a task, you can either use `bq.Task` model object to construct the task object, insert into the
|
|
46
49
|
database session and commit.
|
|
@@ -110,14 +113,12 @@ Configurations can be modified by setting environment variables with `BQ_` prefi
|
|
|
110
113
|
For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
|
|
111
114
|
To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
|
|
112
115
|
The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
|
|
113
|
-
For now, the configurations only affect command line tools.
|
|
114
116
|
|
|
115
|
-
If you want to configure BeanQueue programmatically
|
|
117
|
+
If you want to configure BeanQueue programmatically, you can pass in `Config` object to the `bq.BeanQueue` object when creating.
|
|
116
118
|
For example:
|
|
117
119
|
|
|
118
120
|
```python
|
|
119
121
|
import bq
|
|
120
|
-
from bq.cmds.process import process_tasks
|
|
121
122
|
from .my_config import config
|
|
122
123
|
|
|
123
124
|
container = bq.Container()
|
|
@@ -127,11 +128,20 @@ config = bq.Config(
|
|
|
127
128
|
DATABASE_URL=str(config.DATABASE_URL),
|
|
128
129
|
BATCH_SIZE=10,
|
|
129
130
|
)
|
|
130
|
-
|
|
131
|
-
|
|
131
|
+
app = bq.BeanQueue(config=config)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Then you can pass `--app` argument (or `-a` for short) pointing to the app object to the process command like this:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
python -m bq.cmds.process -a my_pkgs.bq.app images
|
|
132
138
|
```
|
|
133
139
|
|
|
134
|
-
|
|
140
|
+
Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
app.process_tasks(channels=("images",))
|
|
144
|
+
```
|
|
135
145
|
|
|
136
146
|
### Define your own tables
|
|
137
147
|
|
|
@@ -211,7 +221,7 @@ config = bq.Config(
|
|
|
211
221
|
WORKER_MODEL="my_pkgs.models.Worker",
|
|
212
222
|
# ... other configs
|
|
213
223
|
)
|
|
214
|
-
|
|
224
|
+
app = bq.BeanQueue(config)
|
|
215
225
|
```
|
|
216
226
|
|
|
217
227
|
## Why?
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
from .app import BeanQueue
|
|
1
2
|
from .config import Config # noqa
|
|
2
|
-
from .container import Container # noqa
|
|
3
3
|
from .models import Task # noqa
|
|
4
4
|
from .models import TaskModelMixin
|
|
5
5
|
from .models import TaskModelRefWorkerMixin
|
|
@@ -8,4 +8,3 @@ from .models import Worker # noqa
|
|
|
8
8
|
from .models import WorkerModelMixin # noqa
|
|
9
9
|
from .models import WorkerRefMixin # noqa
|
|
10
10
|
from .models import WorkerState # noqa
|
|
11
|
-
from .processors.registry import processor # noqa
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import importlib
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import platform
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
import typing
|
|
10
|
+
from wsgiref.simple_server import make_server
|
|
11
|
+
|
|
12
|
+
import venusian
|
|
13
|
+
from sqlalchemy import func
|
|
14
|
+
from sqlalchemy.engine import create_engine
|
|
15
|
+
from sqlalchemy.engine import Engine
|
|
16
|
+
from sqlalchemy.orm import Session as DBSession
|
|
17
|
+
from sqlalchemy.pool import SingletonThreadPool
|
|
18
|
+
|
|
19
|
+
from . import constants
|
|
20
|
+
from . import events
|
|
21
|
+
from . import models
|
|
22
|
+
from .config import Config
|
|
23
|
+
from .db.session import SessionMaker
|
|
24
|
+
from .processors.processor import Processor
|
|
25
|
+
from .processors.processor import ProcessorHelper
|
|
26
|
+
from .processors.registry import collect
|
|
27
|
+
from .services.dispatch import DispatchService
|
|
28
|
+
from .services.worker import WorkerService
|
|
29
|
+
from .utils import load_module_var
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BeanQueue:
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
config: Config | None = None,
|
|
38
|
+
session_cls: DBSession = SessionMaker,
|
|
39
|
+
worker_service_cls: typing.Type[WorkerService] = WorkerService,
|
|
40
|
+
dispatch_service_cls: typing.Type[DispatchService] = DispatchService,
|
|
41
|
+
engine: Engine | None = None,
|
|
42
|
+
):
|
|
43
|
+
self.config = config if config is not None else Config()
|
|
44
|
+
self.session_cls = session_cls
|
|
45
|
+
self.worker_service_cls = worker_service_cls
|
|
46
|
+
self.dispatch_service_cls = dispatch_service_cls
|
|
47
|
+
self._engine = engine
|
|
48
|
+
|
|
49
|
+
def create_default_engine(self):
|
|
50
|
+
return create_engine(
|
|
51
|
+
str(self.config.DATABASE_URL), poolclass=SingletonThreadPool
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def make_session(self) -> DBSession:
|
|
55
|
+
return self.session_cls(bind=self.engine)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def engine(self) -> Engine:
|
|
59
|
+
if self._engine is None:
|
|
60
|
+
self._engine = self.create_default_engine()
|
|
61
|
+
return self._engine
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def task_model(self) -> typing.Type[models.Task]:
|
|
65
|
+
return load_module_var(self.config.TASK_MODEL)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def worker_model(self) -> typing.Type[models.Worker]:
|
|
69
|
+
return load_module_var(self.config.WORKER_MODEL)
|
|
70
|
+
|
|
71
|
+
def _make_worker_service(self, session: DBSession):
|
|
72
|
+
return self.worker_service_cls(
|
|
73
|
+
session=session, task_model=self.task_model, worker_model=self.worker_model
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def _make_dispatch_service(self, session: DBSession):
|
|
77
|
+
return self.dispatch_service_cls(session=session, task_model=self.task_model)
|
|
78
|
+
|
|
79
|
+
def processor(
|
|
80
|
+
self,
|
|
81
|
+
channel: str = constants.DEFAULT_CHANNEL,
|
|
82
|
+
auto_complete: bool = True,
|
|
83
|
+
auto_rollback_on_exc: bool = True,
|
|
84
|
+
task_model: typing.Type | None = None,
|
|
85
|
+
) -> typing.Callable:
|
|
86
|
+
def decorator(wrapped: typing.Callable):
|
|
87
|
+
processor = Processor(
|
|
88
|
+
module=wrapped.__module__,
|
|
89
|
+
name=wrapped.__name__,
|
|
90
|
+
channel=channel,
|
|
91
|
+
func=wrapped,
|
|
92
|
+
auto_complete=auto_complete,
|
|
93
|
+
auto_rollback_on_exc=auto_rollback_on_exc,
|
|
94
|
+
)
|
|
95
|
+
helper_obj = ProcessorHelper(
|
|
96
|
+
processor,
|
|
97
|
+
task_cls=task_model if task_model is not None else self.task_model,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
|
|
101
|
+
if processor.name != name:
|
|
102
|
+
raise ValueError("Name is not the same")
|
|
103
|
+
scanner.registry.add(processor)
|
|
104
|
+
|
|
105
|
+
venusian.attach(
|
|
106
|
+
helper_obj, callback, category=constants.BQ_PROCESSOR_CATEGORY
|
|
107
|
+
)
|
|
108
|
+
return helper_obj
|
|
109
|
+
|
|
110
|
+
return decorator
|
|
111
|
+
|
|
112
|
+
def update_workers(
|
|
113
|
+
self,
|
|
114
|
+
worker_id: typing.Any,
|
|
115
|
+
):
|
|
116
|
+
db = self.make_session()
|
|
117
|
+
|
|
118
|
+
worker_service = self._make_worker_service(db)
|
|
119
|
+
dispatch_service = self._make_dispatch_service(db)
|
|
120
|
+
|
|
121
|
+
current_worker = worker_service.get_worker(worker_id)
|
|
122
|
+
logger.info(
|
|
123
|
+
"Updating worker %s with heartbeat_period=%s, heartbeat_timeout=%s",
|
|
124
|
+
current_worker.id,
|
|
125
|
+
self.config.WORKER_HEARTBEAT_PERIOD,
|
|
126
|
+
self.config.WORKER_HEARTBEAT_TIMEOUT,
|
|
127
|
+
)
|
|
128
|
+
while True:
|
|
129
|
+
dead_workers = worker_service.fetch_dead_workers(
|
|
130
|
+
timeout=self.config.WORKER_HEARTBEAT_TIMEOUT
|
|
131
|
+
)
|
|
132
|
+
task_count = worker_service.reschedule_dead_tasks(
|
|
133
|
+
# TODO: a better way to abstract this?
|
|
134
|
+
dead_workers.with_entities(current_worker.__class__.id)
|
|
135
|
+
)
|
|
136
|
+
found_dead_worker = False
|
|
137
|
+
for dead_worker in dead_workers:
|
|
138
|
+
found_dead_worker = True
|
|
139
|
+
logger.info(
|
|
140
|
+
"Found dead worker %s (name=%s), reschedule %s dead tasks in channels %s",
|
|
141
|
+
dead_worker.id,
|
|
142
|
+
dead_worker.name,
|
|
143
|
+
task_count,
|
|
144
|
+
dead_worker.channels,
|
|
145
|
+
)
|
|
146
|
+
dispatch_service.notify(dead_worker.channels)
|
|
147
|
+
if found_dead_worker:
|
|
148
|
+
db.commit()
|
|
149
|
+
|
|
150
|
+
if current_worker.state != models.WorkerState.RUNNING:
|
|
151
|
+
# This probably means we are somehow very slow to update the heartbeat in time, or the timeout window
|
|
152
|
+
# is set too short. It could also be the administrator update the worker state to something else than
|
|
153
|
+
# RUNNING. Regardless the reason, let's stop processing.
|
|
154
|
+
logger.warning(
|
|
155
|
+
"Current worker %s state is %s instead of running, quit processing",
|
|
156
|
+
current_worker.id,
|
|
157
|
+
current_worker.state,
|
|
158
|
+
)
|
|
159
|
+
sys.exit(0)
|
|
160
|
+
|
|
161
|
+
time.sleep(self.config.WORKER_HEARTBEAT_PERIOD)
|
|
162
|
+
current_worker.last_heartbeat = func.now()
|
|
163
|
+
db.add(current_worker)
|
|
164
|
+
db.commit()
|
|
165
|
+
|
|
166
|
+
def _serve_http_request(
|
|
167
|
+
self, worker_id: typing.Any, environ: dict, start_response: typing.Callable
|
|
168
|
+
) -> list[bytes]:
|
|
169
|
+
path = environ["PATH_INFO"]
|
|
170
|
+
if path == "/healthz":
|
|
171
|
+
db = self.make_session()
|
|
172
|
+
worker_service = self._make_worker_service(db)
|
|
173
|
+
worker = worker_service.get_worker(worker_id)
|
|
174
|
+
if worker is not None and worker.state == models.WorkerState.RUNNING:
|
|
175
|
+
start_response(
|
|
176
|
+
"200 OK",
|
|
177
|
+
[
|
|
178
|
+
("Content-Type", "application/json"),
|
|
179
|
+
],
|
|
180
|
+
)
|
|
181
|
+
return [
|
|
182
|
+
json.dumps(dict(status="ok", worker_id=str(worker_id))).encode(
|
|
183
|
+
"utf8"
|
|
184
|
+
)
|
|
185
|
+
]
|
|
186
|
+
else:
|
|
187
|
+
logger.warning("Bad worker %s state %s", worker_id, worker.state)
|
|
188
|
+
start_response(
|
|
189
|
+
"500 Internal Server Error",
|
|
190
|
+
[
|
|
191
|
+
("Content-Type", "application/json"),
|
|
192
|
+
],
|
|
193
|
+
)
|
|
194
|
+
return [
|
|
195
|
+
json.dumps(
|
|
196
|
+
dict(
|
|
197
|
+
status="internal error",
|
|
198
|
+
worker_id=str(worker_id),
|
|
199
|
+
state=str(worker.state),
|
|
200
|
+
)
|
|
201
|
+
).encode("utf8")
|
|
202
|
+
]
|
|
203
|
+
# TODO: add other metrics endpoints
|
|
204
|
+
start_response(
|
|
205
|
+
"404 NOT FOUND",
|
|
206
|
+
[
|
|
207
|
+
("Content-Type", "application/json"),
|
|
208
|
+
],
|
|
209
|
+
)
|
|
210
|
+
return [json.dumps(dict(status="not found")).encode("utf8")]
|
|
211
|
+
|
|
212
|
+
def run_metrics_http_server(self, worker_id: typing.Any):
|
|
213
|
+
host = self.config.METRICS_HTTP_SERVER_INTERFACE
|
|
214
|
+
port = self.config.METRICS_HTTP_SERVER_PORT
|
|
215
|
+
with make_server(
|
|
216
|
+
host, port, functools.partial(self._serve_http_request, worker_id)
|
|
217
|
+
) as httpd:
|
|
218
|
+
logger.info("Run metrics HTTP server on %s:%s", host, port)
|
|
219
|
+
httpd.serve_forever()
|
|
220
|
+
|
|
221
|
+
def process_tasks(
|
|
222
|
+
self,
|
|
223
|
+
channels: tuple[str, ...],
|
|
224
|
+
):
|
|
225
|
+
db = self.make_session()
|
|
226
|
+
if not channels:
|
|
227
|
+
channels = [constants.DEFAULT_CHANNEL]
|
|
228
|
+
|
|
229
|
+
if not self.config.PROCESSOR_PACKAGES:
|
|
230
|
+
logger.error("No PROCESSOR_PACKAGES provided")
|
|
231
|
+
raise ValueError("No PROCESSOR_PACKAGES provided")
|
|
232
|
+
|
|
233
|
+
logger.info("Scanning packages %s", self.config.PROCESSOR_PACKAGES)
|
|
234
|
+
pkgs = list(map(importlib.import_module, self.config.PROCESSOR_PACKAGES))
|
|
235
|
+
registry = collect(pkgs)
|
|
236
|
+
for channel, module_processors in registry.processors.items():
|
|
237
|
+
logger.info("Collected processors with channel %r", channel)
|
|
238
|
+
for module, func_processors in module_processors.items():
|
|
239
|
+
for processor in func_processors.values():
|
|
240
|
+
logger.info(
|
|
241
|
+
" Processor module=%r, name=%r", module, processor.name
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
dispatch_service = self.dispatch_service_cls(
|
|
245
|
+
session=db, task_model=self.task_model
|
|
246
|
+
)
|
|
247
|
+
work_service = self.worker_service_cls(
|
|
248
|
+
session=db, task_model=self.task_model, worker_model=self.worker_model
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
worker = work_service.make_worker(name=platform.node(), channels=channels)
|
|
252
|
+
db.add(worker)
|
|
253
|
+
dispatch_service.listen(channels)
|
|
254
|
+
db.commit()
|
|
255
|
+
|
|
256
|
+
metrics_server_thread = None
|
|
257
|
+
if self.config.METRICS_HTTP_SERVER_ENABLED:
|
|
258
|
+
metrics_server_thread = threading.Thread(
|
|
259
|
+
target=self.run_metrics_http_server,
|
|
260
|
+
args=(worker.id,),
|
|
261
|
+
)
|
|
262
|
+
metrics_server_thread.daemon = True
|
|
263
|
+
metrics_server_thread.start()
|
|
264
|
+
|
|
265
|
+
logger.info("Created worker %s, name=%s", worker.id, worker.name)
|
|
266
|
+
events.worker_init.send(self, worker=worker)
|
|
267
|
+
|
|
268
|
+
logger.info("Processing tasks in channels = %s ...", channels)
|
|
269
|
+
|
|
270
|
+
worker_update_thread = threading.Thread(
|
|
271
|
+
target=functools.partial(
|
|
272
|
+
self.update_workers,
|
|
273
|
+
worker_id=worker.id,
|
|
274
|
+
),
|
|
275
|
+
name="update_workers",
|
|
276
|
+
)
|
|
277
|
+
worker_update_thread.daemon = True
|
|
278
|
+
worker_update_thread.start()
|
|
279
|
+
|
|
280
|
+
worker_id = worker.id
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
while True:
|
|
284
|
+
while True:
|
|
285
|
+
tasks = dispatch_service.dispatch(
|
|
286
|
+
channels,
|
|
287
|
+
worker_id=worker_id,
|
|
288
|
+
limit=self.config.BATCH_SIZE,
|
|
289
|
+
).all()
|
|
290
|
+
for task in tasks:
|
|
291
|
+
logger.info(
|
|
292
|
+
"Processing task %s, channel=%s, module=%s, func=%s",
|
|
293
|
+
task.id,
|
|
294
|
+
task.channel,
|
|
295
|
+
task.module,
|
|
296
|
+
task.func_name,
|
|
297
|
+
)
|
|
298
|
+
# TODO: support processor pool and other approaches to dispatch the workload
|
|
299
|
+
registry.process(task)
|
|
300
|
+
if not tasks:
|
|
301
|
+
# we should try to keep dispatching until we cannot find tasks
|
|
302
|
+
break
|
|
303
|
+
else:
|
|
304
|
+
db.commit()
|
|
305
|
+
# we will not see notifications in a transaction, need to close the transaction first before entering
|
|
306
|
+
# polling
|
|
307
|
+
db.close()
|
|
308
|
+
try:
|
|
309
|
+
for notification in dispatch_service.poll(
|
|
310
|
+
timeout=self.config.POLL_TIMEOUT
|
|
311
|
+
):
|
|
312
|
+
logger.debug("Receive notification %s", notification)
|
|
313
|
+
except TimeoutError:
|
|
314
|
+
logger.debug("Poll timeout, try again")
|
|
315
|
+
continue
|
|
316
|
+
except (SystemExit, KeyboardInterrupt):
|
|
317
|
+
db.rollback()
|
|
318
|
+
logger.info("Shutting down ...")
|
|
319
|
+
worker_update_thread.join(5)
|
|
320
|
+
if metrics_server_thread is not None:
|
|
321
|
+
metrics_server_thread.join(5)
|
|
322
|
+
|
|
323
|
+
worker.state = models.WorkerState.SHUTDOWN
|
|
324
|
+
db.add(worker)
|
|
325
|
+
task_count = self.worker_service_cls.reschedule_dead_tasks([worker.id])
|
|
326
|
+
logger.info("Reschedule %s tasks", task_count)
|
|
327
|
+
dispatch_service.notify(channels)
|
|
328
|
+
db.commit()
|
|
329
|
+
|
|
330
|
+
logger.info("Shutdown gracefully")
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from .. import models # noqa
|
|
6
|
+
from ..db.base import Base
|
|
7
|
+
from .utils import load_app
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.option(
|
|
14
|
+
"-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
|
|
15
|
+
)
|
|
16
|
+
def main(
|
|
17
|
+
app: str | None = None,
|
|
18
|
+
):
|
|
19
|
+
app = load_app(app)
|
|
20
|
+
Base.metadata.create_all(bind=app.engine)
|
|
21
|
+
logger.info("Done, tables created")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if __name__ == "__main__":
|
|
25
|
+
logging.basicConfig(level=logging.INFO)
|
|
26
|
+
main()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from .utils import load_app
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.command()
|
|
9
|
+
@click.argument("channels", nargs=-1)
|
|
10
|
+
@click.option(
|
|
11
|
+
"-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
|
|
12
|
+
)
|
|
13
|
+
def main(
|
|
14
|
+
channels: tuple[str, ...],
|
|
15
|
+
app: str | None = None,
|
|
16
|
+
):
|
|
17
|
+
app = load_app(app)
|
|
18
|
+
app.process_tasks(channels)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
if __name__ == "__main__":
|
|
22
|
+
logging.basicConfig(level=logging.INFO)
|
|
23
|
+
main()
|
|
@@ -2,12 +2,11 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
import click
|
|
5
|
-
from dependency_injector.wiring import inject
|
|
6
|
-
from dependency_injector.wiring import Provide
|
|
7
5
|
|
|
8
6
|
from .. import models
|
|
9
|
-
from
|
|
10
|
-
|
|
7
|
+
from .utils import load_app
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
@click.command()
|
|
@@ -17,15 +16,18 @@ from ..db.session import Session
|
|
|
17
16
|
@click.option(
|
|
18
17
|
"-k", "--kwargs", type=str, help="Keyword arguments as JSON", default=None
|
|
19
18
|
)
|
|
20
|
-
@
|
|
19
|
+
@click.option(
|
|
20
|
+
"-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
|
|
21
|
+
)
|
|
21
22
|
def main(
|
|
22
23
|
channel: str,
|
|
23
24
|
module: str,
|
|
24
25
|
func: str,
|
|
25
26
|
kwargs: str | None,
|
|
26
|
-
|
|
27
|
+
app: str | None = None,
|
|
27
28
|
):
|
|
28
|
-
|
|
29
|
+
app = load_app(app)
|
|
30
|
+
db = app.session_cls(bind=app.create_default_engine())
|
|
29
31
|
|
|
30
32
|
logger.info(
|
|
31
33
|
"Submit task with channel=%s, module=%s, func=%s", channel, module, func
|
|
@@ -43,6 +45,4 @@ def main(
|
|
|
43
45
|
|
|
44
46
|
if __name__ == "__main__":
|
|
45
47
|
logging.basicConfig(level=logging.INFO)
|
|
46
|
-
container = Container()
|
|
47
|
-
container.wire(modules=[__name__])
|
|
48
48
|
main()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from ..app import BeanQueue
|
|
4
|
+
from ..utils import load_module_var
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_app(app: str | None) -> BeanQueue:
|
|
10
|
+
if app is None:
|
|
11
|
+
logger.info("No BeanQueue app provided, create default app")
|
|
12
|
+
return BeanQueue()
|
|
13
|
+
logger.info("Load BeanQueue app from %s", app)
|
|
14
|
+
return load_module_var(app)
|
|
@@ -30,6 +30,15 @@ class Config(BaseSettings):
|
|
|
30
30
|
# which worker model to use
|
|
31
31
|
WORKER_MODEL: str = "bq.Worker"
|
|
32
32
|
|
|
33
|
+
# Enable metrics HTTP server
|
|
34
|
+
METRICS_HTTP_SERVER_ENABLED: bool = True
|
|
35
|
+
|
|
36
|
+
# the metrics http server interface to listen
|
|
37
|
+
METRICS_HTTP_SERVER_INTERFACE: str = ""
|
|
38
|
+
|
|
39
|
+
# the metrics http server port to listen
|
|
40
|
+
METRICS_HTTP_SERVER_PORT: int = 8000
|
|
41
|
+
|
|
33
42
|
POSTGRES_SERVER: str = "localhost"
|
|
34
43
|
POSTGRES_USER: str = "bq"
|
|
35
44
|
POSTGRES_PASSWORD: str = ""
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import inspect
|
|
3
|
+
import logging
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from sqlalchemy.orm import object_session
|
|
7
|
+
|
|
8
|
+
from .. import models
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclasses.dataclass(frozen=True)
|
|
14
|
+
class Processor:
|
|
15
|
+
channel: str
|
|
16
|
+
module: str
|
|
17
|
+
name: str
|
|
18
|
+
func: typing.Callable
|
|
19
|
+
# should we auto complete the task or not
|
|
20
|
+
auto_complete: bool = True
|
|
21
|
+
# should we auto rollback the transaction when encounter unhandled exception
|
|
22
|
+
auto_rollback_on_exc: bool = True
|
|
23
|
+
|
|
24
|
+
def process(self, task: models.Task):
|
|
25
|
+
db = object_session(task)
|
|
26
|
+
func_signature = inspect.signature(self.func)
|
|
27
|
+
base_kwargs = {}
|
|
28
|
+
if "task" in func_signature.parameters:
|
|
29
|
+
base_kwargs["task"] = task
|
|
30
|
+
if "db" in func_signature.parameters:
|
|
31
|
+
base_kwargs["db"] = db
|
|
32
|
+
with db.begin_nested() as savepoint:
|
|
33
|
+
if "savepoint" in func_signature.parameters:
|
|
34
|
+
base_kwargs["savepoint"] = savepoint
|
|
35
|
+
try:
|
|
36
|
+
result = self.func(**base_kwargs, **task.kwargs)
|
|
37
|
+
except Exception as exc:
|
|
38
|
+
logger.error("Unhandled exception for task %s", task.id, exc_info=True)
|
|
39
|
+
if self.auto_rollback_on_exc:
|
|
40
|
+
savepoint.rollback()
|
|
41
|
+
# TODO: add error event
|
|
42
|
+
task.state = models.TaskState.FAILED
|
|
43
|
+
task.error_message = str(exc)
|
|
44
|
+
db.add(task)
|
|
45
|
+
return
|
|
46
|
+
if self.auto_complete:
|
|
47
|
+
logger.info("Task %s auto complete", task.id)
|
|
48
|
+
task.state = models.TaskState.DONE
|
|
49
|
+
task.result = result
|
|
50
|
+
db.add(task)
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ProcessorHelper:
|
|
55
|
+
"""Helper function to replace the decorated processor function and make creating Task model much easier"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
|
|
58
|
+
self._processor = processor
|
|
59
|
+
self._task_cls = task_cls
|
|
60
|
+
|
|
61
|
+
def __call__(self, *args, **kwargs):
|
|
62
|
+
return self._processor.func(*args, **kwargs)
|
|
63
|
+
|
|
64
|
+
def run(self, **kwargs) -> models.Task:
|
|
65
|
+
return self._task_cls(
|
|
66
|
+
channel=self._processor.channel,
|
|
67
|
+
module=self._processor.module,
|
|
68
|
+
func_name=self._processor.name,
|
|
69
|
+
kwargs=kwargs,
|
|
70
|
+
)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import logging
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import venusian
|
|
6
|
+
from sqlalchemy.orm import object_session
|
|
7
|
+
|
|
8
|
+
from .. import constants
|
|
9
|
+
from .. import models
|
|
10
|
+
from .processor import Processor
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Registry:
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self.logger = logging.getLogger(__name__)
|
|
16
|
+
self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
|
|
17
|
+
|
|
18
|
+
def add(self, processor: Processor):
|
|
19
|
+
self.processors[processor.channel][processor.module][processor.name] = processor
|
|
20
|
+
|
|
21
|
+
def process(self, task: models.Task) -> typing.Any:
|
|
22
|
+
modules = self.processors.get(task.channel, {})
|
|
23
|
+
functions = modules.get(task.module, {})
|
|
24
|
+
processor = functions.get(task.func_name)
|
|
25
|
+
db = object_session(task)
|
|
26
|
+
if processor is None:
|
|
27
|
+
self.logger.error(
|
|
28
|
+
"Cannot find processor for task %s with module=%s, func=%s",
|
|
29
|
+
task.id,
|
|
30
|
+
task.module,
|
|
31
|
+
task.func_name,
|
|
32
|
+
)
|
|
33
|
+
# TODO: add error event
|
|
34
|
+
task.state = models.TaskState.FAILED
|
|
35
|
+
task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
|
|
36
|
+
db.add(task)
|
|
37
|
+
return
|
|
38
|
+
return processor.process(task)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
|
|
42
|
+
if registry is None:
|
|
43
|
+
registry = Registry()
|
|
44
|
+
scanner = venusian.Scanner(registry=registry)
|
|
45
|
+
for package in packages:
|
|
46
|
+
scanner.scan(package, categories=(constants.BQ_PROCESSOR_CATEGORY,))
|
|
47
|
+
return registry
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "beanqueue"
|
|
3
|
-
version = "0.1
|
|
3
|
+
version = "0.2.1"
|
|
4
4
|
description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library"
|
|
5
5
|
authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -14,9 +14,9 @@ python = "^3.11"
|
|
|
14
14
|
sqlalchemy = "^2.0.30"
|
|
15
15
|
venusian = "^3.1.0"
|
|
16
16
|
click = "^8.1.7"
|
|
17
|
-
dependency-injector = "^4.41.0"
|
|
18
17
|
pydantic-settings = "^2.2.1"
|
|
19
18
|
pg-activity = "^3.5.1"
|
|
19
|
+
blinker = "^1.8.2"
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
[tool.poetry.group.dev.dependencies]
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import click
|
|
4
|
-
from dependency_injector.wiring import inject
|
|
5
|
-
from dependency_injector.wiring import Provide
|
|
6
|
-
from sqlalchemy.engine import Engine
|
|
7
|
-
|
|
8
|
-
from .. import models # noqa
|
|
9
|
-
from ..container import Container
|
|
10
|
-
from ..db.base import Base
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@click.command()
|
|
14
|
-
@inject
|
|
15
|
-
def main(engine: Engine = Provide[Container.db_engine]):
|
|
16
|
-
logger = logging.getLogger(__name__)
|
|
17
|
-
Base.metadata.create_all(bind=engine)
|
|
18
|
-
logger.info("Done, tables created")
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
if __name__ == "__main__":
|
|
22
|
-
logging.basicConfig(level=logging.INFO)
|
|
23
|
-
container = Container()
|
|
24
|
-
container.wire(modules=[__name__])
|
|
25
|
-
main()
|
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
import functools
|
|
2
|
-
import importlib
|
|
3
|
-
import logging
|
|
4
|
-
import platform
|
|
5
|
-
import sys
|
|
6
|
-
import threading
|
|
7
|
-
import time
|
|
8
|
-
import typing
|
|
9
|
-
|
|
10
|
-
import click
|
|
11
|
-
from dependency_injector.wiring import inject
|
|
12
|
-
from dependency_injector.wiring import Provide
|
|
13
|
-
from sqlalchemy import func
|
|
14
|
-
from sqlalchemy.orm import Session as DBSession
|
|
15
|
-
|
|
16
|
-
from .. import constants
|
|
17
|
-
from .. import models
|
|
18
|
-
from ..config import Config
|
|
19
|
-
from ..container import Container
|
|
20
|
-
from ..processors.registry import collect
|
|
21
|
-
from ..services.dispatch import DispatchService
|
|
22
|
-
from ..services.worker import WorkerService
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@inject
|
|
26
|
-
def update_workers(
|
|
27
|
-
worker_id: typing.Any,
|
|
28
|
-
config: Config = Provide[Container.config],
|
|
29
|
-
session_factory: typing.Callable = Provide[Container.session_factory],
|
|
30
|
-
make_dispatch_service: typing.Callable = Provide[Container.make_dispatch_service],
|
|
31
|
-
make_worker_service: typing.Callable = Provide[Container.make_worker_service],
|
|
32
|
-
):
|
|
33
|
-
db: DBSession = session_factory()
|
|
34
|
-
worker_service: WorkerService = make_worker_service(session=db)
|
|
35
|
-
dispatch_service: DispatchService = make_dispatch_service(session=db)
|
|
36
|
-
current_worker = worker_service.get_worker(worker_id)
|
|
37
|
-
logger = logging.getLogger(__name__)
|
|
38
|
-
logger.info(
|
|
39
|
-
"Updating worker %s with heartbeat_period=%s, heartbeat_timeout=%s",
|
|
40
|
-
current_worker.id,
|
|
41
|
-
config.WORKER_HEARTBEAT_PERIOD,
|
|
42
|
-
config.WORKER_HEARTBEAT_TIMEOUT,
|
|
43
|
-
)
|
|
44
|
-
while True:
|
|
45
|
-
dead_workers = worker_service.fetch_dead_workers(
|
|
46
|
-
timeout=config.WORKER_HEARTBEAT_TIMEOUT
|
|
47
|
-
)
|
|
48
|
-
task_count = worker_service.reschedule_dead_tasks(
|
|
49
|
-
# TODO: a better way to abstract this?
|
|
50
|
-
dead_workers.with_entities(current_worker.__class__.id)
|
|
51
|
-
)
|
|
52
|
-
found_dead_worker = False
|
|
53
|
-
for dead_worker in dead_workers:
|
|
54
|
-
found_dead_worker = True
|
|
55
|
-
logger.info(
|
|
56
|
-
"Found dead worker %s (name=%s), reschedule %s dead tasks in channels %s",
|
|
57
|
-
dead_worker.id,
|
|
58
|
-
dead_worker.name,
|
|
59
|
-
task_count,
|
|
60
|
-
dead_worker.channels,
|
|
61
|
-
)
|
|
62
|
-
dispatch_service.notify(dead_worker.channels)
|
|
63
|
-
if found_dead_worker:
|
|
64
|
-
db.commit()
|
|
65
|
-
|
|
66
|
-
if current_worker.state != models.WorkerState.RUNNING:
|
|
67
|
-
# This probably means we are somehow very slow to update the heartbeat in time, or the timeout window
|
|
68
|
-
# is set too short. It could also be the administrator update the worker state to something else than
|
|
69
|
-
# RUNNING. Regardless the reason, let's stop processing.
|
|
70
|
-
logger.warning(
|
|
71
|
-
"Current worker %s state is %s instead of running, quit processing"
|
|
72
|
-
)
|
|
73
|
-
sys.exit(0)
|
|
74
|
-
|
|
75
|
-
time.sleep(config.WORKER_HEARTBEAT_PERIOD)
|
|
76
|
-
current_worker.last_heartbeat = func.now()
|
|
77
|
-
db.add(current_worker)
|
|
78
|
-
db.commit()
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
@inject
|
|
82
|
-
def process_tasks(
|
|
83
|
-
channels: tuple[str, ...],
|
|
84
|
-
config: Config = Provide[Container.config],
|
|
85
|
-
db: DBSession = Provide[Container.session],
|
|
86
|
-
dispatch_service: DispatchService = Provide[Container.dispatch_service],
|
|
87
|
-
worker_service: WorkerService = Provide[Container.worker_service],
|
|
88
|
-
):
|
|
89
|
-
logger = logging.getLogger(__name__)
|
|
90
|
-
|
|
91
|
-
if not channels:
|
|
92
|
-
channels = [constants.DEFAULT_CHANNEL]
|
|
93
|
-
|
|
94
|
-
if not config.PROCESSOR_PACKAGES:
|
|
95
|
-
logger.error("No PROCESSOR_PACKAGES provided")
|
|
96
|
-
sys.exit(-1)
|
|
97
|
-
|
|
98
|
-
logger.info("Scanning packages %s", config.PROCESSOR_PACKAGES)
|
|
99
|
-
pkgs = list(map(importlib.import_module, config.PROCESSOR_PACKAGES))
|
|
100
|
-
registry = collect(pkgs)
|
|
101
|
-
for channel, module_processors in registry.processors.items():
|
|
102
|
-
logger.info("Collected processors with channel %r", channel)
|
|
103
|
-
for module, func_processors in module_processors.items():
|
|
104
|
-
for processor in func_processors.values():
|
|
105
|
-
logger.info(
|
|
106
|
-
" Processor module %r, processor %r", module, processor.name
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
worker = worker_service.make_worker(name=platform.node(), channels=channels)
|
|
110
|
-
db.add(worker)
|
|
111
|
-
dispatch_service.listen(channels)
|
|
112
|
-
db.commit()
|
|
113
|
-
|
|
114
|
-
logger.info("Created worker %s, name=%s", worker.id, worker.name)
|
|
115
|
-
logger.info("Processing tasks in channels = %s ...", channels)
|
|
116
|
-
|
|
117
|
-
worker_update_thread = threading.Thread(
|
|
118
|
-
target=functools.partial(
|
|
119
|
-
update_workers,
|
|
120
|
-
worker_id=worker.id,
|
|
121
|
-
),
|
|
122
|
-
name="update_workers",
|
|
123
|
-
)
|
|
124
|
-
worker_update_thread.daemon = True
|
|
125
|
-
worker_update_thread.start()
|
|
126
|
-
|
|
127
|
-
worker_id = worker.id
|
|
128
|
-
|
|
129
|
-
try:
|
|
130
|
-
while True:
|
|
131
|
-
while True:
|
|
132
|
-
tasks = dispatch_service.dispatch(
|
|
133
|
-
channels,
|
|
134
|
-
worker_id=worker_id,
|
|
135
|
-
limit=config.BATCH_SIZE,
|
|
136
|
-
).all()
|
|
137
|
-
for task in tasks:
|
|
138
|
-
logger.info(
|
|
139
|
-
"Processing task %s, channel=%s, module=%s, func=%s",
|
|
140
|
-
task.id,
|
|
141
|
-
task.channel,
|
|
142
|
-
task.module,
|
|
143
|
-
task.func_name,
|
|
144
|
-
)
|
|
145
|
-
# TODO: support processor pool and other approaches to dispatch the workload
|
|
146
|
-
registry.process(task)
|
|
147
|
-
if not tasks:
|
|
148
|
-
# we should try to keep dispatching until we cannot find tasks
|
|
149
|
-
break
|
|
150
|
-
else:
|
|
151
|
-
db.commit()
|
|
152
|
-
# we will not see notifications in a transaction, need to close the transaction first before entering
|
|
153
|
-
# polling
|
|
154
|
-
db.close()
|
|
155
|
-
try:
|
|
156
|
-
for notification in dispatch_service.poll(timeout=config.POLL_TIMEOUT):
|
|
157
|
-
logger.debug("Receive notification %s", notification)
|
|
158
|
-
except TimeoutError:
|
|
159
|
-
logger.debug("Poll timeout, try again")
|
|
160
|
-
continue
|
|
161
|
-
except (SystemExit, KeyboardInterrupt):
|
|
162
|
-
db.rollback()
|
|
163
|
-
logger.info("Shutting down ...")
|
|
164
|
-
worker_update_thread.join(5)
|
|
165
|
-
|
|
166
|
-
worker.state = models.WorkerState.SHUTDOWN
|
|
167
|
-
db.add(worker)
|
|
168
|
-
task_count = worker_service.reschedule_dead_tasks([worker.id])
|
|
169
|
-
logger.info("Reschedule %s tasks", task_count)
|
|
170
|
-
dispatch_service.notify(channels)
|
|
171
|
-
db.commit()
|
|
172
|
-
|
|
173
|
-
logger.info("Shutdown gracefully")
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
@click.command()
|
|
177
|
-
@click.argument("channels", nargs=-1)
|
|
178
|
-
def main(
|
|
179
|
-
channels: tuple[str, ...],
|
|
180
|
-
):
|
|
181
|
-
process_tasks(channels)
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
if __name__ == "__main__":
|
|
185
|
-
logging.basicConfig(level=logging.INFO)
|
|
186
|
-
container = Container()
|
|
187
|
-
container.wire(modules=[__name__])
|
|
188
|
-
main()
|
beanqueue-0.1.3/bq/container.py
DELETED
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import functools
|
|
2
|
-
import importlib
|
|
3
|
-
import typing
|
|
4
|
-
|
|
5
|
-
from dependency_injector import containers
|
|
6
|
-
from dependency_injector import providers
|
|
7
|
-
from sqlalchemy import create_engine
|
|
8
|
-
from sqlalchemy import Engine
|
|
9
|
-
from sqlalchemy.orm import Session as DBSession
|
|
10
|
-
from sqlalchemy.pool import SingletonThreadPool
|
|
11
|
-
|
|
12
|
-
from .config import Config
|
|
13
|
-
from .db.session import SessionMaker
|
|
14
|
-
from .services.dispatch import DispatchService
|
|
15
|
-
from .services.worker import WorkerService
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def get_model_class(name: str) -> typing.Type:
|
|
19
|
-
module_name, model_name = name.rsplit(".", 1)
|
|
20
|
-
module = importlib.import_module(module_name)
|
|
21
|
-
return getattr(module, model_name)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def make_db_engine(config: Config) -> Engine:
|
|
25
|
-
return create_engine(str(config.DATABASE_URL), poolclass=SingletonThreadPool)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def make_session_factory(engine: Engine) -> typing.Callable:
|
|
29
|
-
return functools.partial(SessionMaker, bind=engine)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def make_session(factory: typing.Callable) -> DBSession:
|
|
33
|
-
return factory()
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def make_dispatch_service(config: Config, session: DBSession) -> DispatchService:
|
|
37
|
-
return DispatchService(session, task_model=get_model_class(config.TASK_MODEL))
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def make_worker_service(config: Config, session: DBSession) -> WorkerService:
|
|
41
|
-
return WorkerService(
|
|
42
|
-
session,
|
|
43
|
-
task_model=get_model_class(config.TASK_MODEL),
|
|
44
|
-
worker_model=get_model_class(config.WORKER_MODEL),
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class Container(containers.DeclarativeContainer):
|
|
49
|
-
config = providers.Singleton(Config)
|
|
50
|
-
|
|
51
|
-
db_engine: Engine = providers.Singleton(make_db_engine, config=config)
|
|
52
|
-
|
|
53
|
-
session_factory: typing.Callable = providers.Singleton(
|
|
54
|
-
make_session_factory, engine=db_engine
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
session: DBSession = providers.Singleton(make_session, factory=session_factory)
|
|
58
|
-
|
|
59
|
-
dispatch_service: DispatchService = providers.Singleton(
|
|
60
|
-
make_dispatch_service,
|
|
61
|
-
config=config,
|
|
62
|
-
session=session,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
worker_service: WorkerService = providers.Singleton(
|
|
66
|
-
make_worker_service, config=config, session=session
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
make_dispatch_service = providers.Singleton(
|
|
70
|
-
lambda config: functools.partial(make_dispatch_service, config=config),
|
|
71
|
-
config=config,
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
make_worker_service = providers.Singleton(
|
|
75
|
-
lambda config: functools.partial(make_worker_service, config=config),
|
|
76
|
-
config=config,
|
|
77
|
-
)
|
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
import collections
|
|
2
|
-
import dataclasses
|
|
3
|
-
import inspect
|
|
4
|
-
import logging
|
|
5
|
-
import typing
|
|
6
|
-
|
|
7
|
-
import venusian
|
|
8
|
-
from sqlalchemy.orm import object_session
|
|
9
|
-
|
|
10
|
-
from .. import constants
|
|
11
|
-
from .. import models
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclasses.dataclass(frozen=True)
|
|
15
|
-
class Processor:
|
|
16
|
-
channel: str
|
|
17
|
-
module: str
|
|
18
|
-
name: str
|
|
19
|
-
func: typing.Callable
|
|
20
|
-
# should we auto complete the task or not
|
|
21
|
-
auto_complete: bool = True
|
|
22
|
-
# should we auto rollback the transaction when encounter unhandled exception
|
|
23
|
-
auto_rollback_on_exc: bool = True
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class ProcessorHelper:
|
|
27
|
-
def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
|
|
28
|
-
self._processor = processor
|
|
29
|
-
self._task_cls = task_cls
|
|
30
|
-
|
|
31
|
-
def __call__(self, *args, **kwargs):
|
|
32
|
-
return self._processor.func(*args, **kwargs)
|
|
33
|
-
|
|
34
|
-
def run(self, **kwargs) -> models.Task:
|
|
35
|
-
return self._task_cls(
|
|
36
|
-
channel=self._processor.channel,
|
|
37
|
-
module=self._processor.module,
|
|
38
|
-
func_name=self._processor.name,
|
|
39
|
-
kwargs=kwargs,
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def process_task(task: models.Task, processor: Processor):
|
|
44
|
-
logger = logging.getLogger(__name__)
|
|
45
|
-
db = object_session(task)
|
|
46
|
-
func_signature = inspect.signature(processor.func)
|
|
47
|
-
base_kwargs = {}
|
|
48
|
-
if "task" in func_signature.parameters:
|
|
49
|
-
base_kwargs["task"] = task
|
|
50
|
-
if "db" in func_signature.parameters:
|
|
51
|
-
base_kwargs["db"] = db
|
|
52
|
-
with db.begin_nested() as savepoint:
|
|
53
|
-
if "savepoint" in func_signature.parameters:
|
|
54
|
-
base_kwargs["savepoint"] = savepoint
|
|
55
|
-
try:
|
|
56
|
-
result = processor.func(**base_kwargs, **task.kwargs)
|
|
57
|
-
except Exception as exc:
|
|
58
|
-
logger.error("Unhandled exception for task %s", task.id, exc_info=True)
|
|
59
|
-
if processor.auto_rollback_on_exc:
|
|
60
|
-
savepoint.rollback()
|
|
61
|
-
# TODO: add error event
|
|
62
|
-
task.state = models.TaskState.FAILED
|
|
63
|
-
task.error_message = str(exc)
|
|
64
|
-
db.add(task)
|
|
65
|
-
return
|
|
66
|
-
if processor.auto_complete:
|
|
67
|
-
logger.info("Task %s auto complete", task.id)
|
|
68
|
-
task.state = models.TaskState.DONE
|
|
69
|
-
task.result = result
|
|
70
|
-
db.add(task)
|
|
71
|
-
return result
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class Registry:
|
|
75
|
-
def __init__(self):
|
|
76
|
-
self.logger = logging.getLogger(__name__)
|
|
77
|
-
self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
|
|
78
|
-
|
|
79
|
-
def add(self, processor: Processor):
|
|
80
|
-
self.processors[processor.channel][processor.module][processor.name] = processor
|
|
81
|
-
|
|
82
|
-
def process(self, task: models.Task) -> typing.Any:
|
|
83
|
-
modules = self.processors.get(task.channel, {})
|
|
84
|
-
functions = modules.get(task.module, {})
|
|
85
|
-
processor = functions.get(task.func_name)
|
|
86
|
-
db = object_session(task)
|
|
87
|
-
if processor is None:
|
|
88
|
-
self.logger.error(
|
|
89
|
-
"Cannot find processor for task %s with module=%s, func=%s",
|
|
90
|
-
task.id,
|
|
91
|
-
task.module,
|
|
92
|
-
task.func_name,
|
|
93
|
-
)
|
|
94
|
-
# TODO: add error event
|
|
95
|
-
task.state = models.TaskState.FAILED
|
|
96
|
-
task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
|
|
97
|
-
db.add(task)
|
|
98
|
-
return
|
|
99
|
-
return process_task(task, processor)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def processor(
|
|
103
|
-
channel: str = constants.DEFAULT_CHANNEL,
|
|
104
|
-
auto_complete: bool = True,
|
|
105
|
-
auto_rollback_on_exc: bool = True,
|
|
106
|
-
task_cls: typing.Type = models.Task,
|
|
107
|
-
) -> typing.Callable:
|
|
108
|
-
def decorator(wrapped: typing.Callable):
|
|
109
|
-
processor = Processor(
|
|
110
|
-
module=wrapped.__module__,
|
|
111
|
-
name=wrapped.__name__,
|
|
112
|
-
channel=channel,
|
|
113
|
-
func=wrapped,
|
|
114
|
-
auto_complete=auto_complete,
|
|
115
|
-
auto_rollback_on_exc=auto_rollback_on_exc,
|
|
116
|
-
)
|
|
117
|
-
helper_obj = ProcessorHelper(processor, task_cls=task_cls)
|
|
118
|
-
|
|
119
|
-
def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
|
|
120
|
-
if processor.name != name:
|
|
121
|
-
raise ValueError("Name is not the same")
|
|
122
|
-
scanner.registry.add(processor)
|
|
123
|
-
|
|
124
|
-
venusian.attach(helper_obj, callback, category=constants.BQ_PROCESSOR_CATEGORY)
|
|
125
|
-
return helper_obj
|
|
126
|
-
|
|
127
|
-
return decorator
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
|
|
131
|
-
if registry is None:
|
|
132
|
-
registry = Registry()
|
|
133
|
-
scanner = venusian.Scanner(registry=registry)
|
|
134
|
-
for package in packages:
|
|
135
|
-
scanner.scan(package, categories=(constants.BQ_PROCESSOR_CATEGORY,))
|
|
136
|
-
return registry
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|