beanqueue 0.1.3__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {beanqueue-0.1.3 → beanqueue-0.2.0}/PKG-INFO +20 -10
  2. {beanqueue-0.1.3 → beanqueue-0.2.0}/README.md +18 -8
  3. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/__init__.py +1 -2
  4. beanqueue-0.2.0/bq/app.py +260 -0
  5. beanqueue-0.2.0/bq/cmds/create_tables.py +26 -0
  6. beanqueue-0.2.0/bq/cmds/process.py +23 -0
  7. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/cmds/submit.py +9 -9
  8. beanqueue-0.2.0/bq/cmds/utils.py +14 -0
  9. beanqueue-0.2.0/bq/events.py +3 -0
  10. beanqueue-0.2.0/bq/processors/processor.py +70 -0
  11. beanqueue-0.2.0/bq/processors/registry.py +47 -0
  12. beanqueue-0.2.0/bq/utils.py +8 -0
  13. {beanqueue-0.1.3 → beanqueue-0.2.0}/pyproject.toml +2 -2
  14. beanqueue-0.1.3/bq/cmds/create_tables.py +0 -25
  15. beanqueue-0.1.3/bq/cmds/process.py +0 -188
  16. beanqueue-0.1.3/bq/container.py +0 -77
  17. beanqueue-0.1.3/bq/processors/registry.py +0 -136
  18. {beanqueue-0.1.3 → beanqueue-0.2.0}/LICENSE +0 -0
  19. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/cmds/__init__.py +0 -0
  20. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/config.py +0 -0
  21. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/constants.py +0 -0
  22. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/db/__init__.py +0 -0
  23. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/db/base.py +0 -0
  24. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/db/session.py +0 -0
  25. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/models/__init__.py +0 -0
  26. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/models/helpers.py +0 -0
  27. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/models/task.py +0 -0
  28. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/models/worker.py +0 -0
  29. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/processors/__init__.py +0 -0
  30. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/services/__init__.py +0 -0
  31. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/services/dispatch.py +0 -0
  32. {beanqueue-0.1.3 → beanqueue-0.2.0}/bq/services/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: beanqueue
3
- Version: 0.1.3
3
+ Version: 0.2.0
4
4
  Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
5
5
  License: MIT
6
6
  Author: Fang-Pen Lin
@@ -10,8 +10,8 @@ Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: blinker (>=1.8.2,<2.0.0)
13
14
  Requires-Dist: click (>=8.1.7,<9.0.0)
14
- Requires-Dist: dependency-injector (>=4.41.0,<5.0.0)
15
15
  Requires-Dist: pg-activity (>=3.5.1,<4.0.0)
16
16
  Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
17
17
  Requires-Dist: sqlalchemy (>=2.0.30,<3.0.0)
@@ -50,7 +50,9 @@ import bq
50
50
  from .. import models
51
51
  from .. import image_utils
52
52
 
53
- @bq.processor(channel="images")
53
+ app = bq.BeanQueue()
54
+
55
+ @app.processor(channel="images")
54
56
  def resize_image(db: Session, task: bq.Task, width: int, height: int):
55
57
  image = db.query(models.Image).filter(models.Image.task == task).one()
56
58
  image_utils.resize(image, size=(width, height))
@@ -61,6 +63,7 @@ def resize_image(db: Session, task: bq.Task, width: int, height: int):
61
63
 
62
64
  The `db` and `task` keyword arguments are optional.
63
65
  If you don't need to access the task object, you can simply define the function without these two parameters.
66
+ We also provide an optional `savepoint` argument in case if you want to rollback database changes you made.
64
67
 
65
68
  To submit a task, you can either use `bq.Task` model object to construct the task object, insert into the
66
69
  database session and commit.
@@ -130,14 +133,12 @@ Configurations can be modified by setting environment variables with `BQ_` prefi
130
133
  For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
131
134
  To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
132
135
  The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
133
- For now, the configurations only affect command line tools.
134
136
 
135
- If you want to configure BeanQueue programmatically for the command lines, you can override our [dependency-injector](https://python-dependency-injector.ets-labs.org/)'s container defined at [bq/container.py](bq/container.py) and call the command function manually.
137
+ If you want to configure BeanQueue programmatically, you can pass in `Config` object to the `bq.BeanQueue` object when creating.
136
138
  For example:
137
139
 
138
140
  ```python
139
141
  import bq
140
- from bq.cmds.process import process_tasks
141
142
  from .my_config import config
142
143
 
143
144
  container = bq.Container()
@@ -147,11 +148,20 @@ config = bq.Config(
147
148
  DATABASE_URL=str(config.DATABASE_URL),
148
149
  BATCH_SIZE=10,
149
150
  )
150
- with container.config.override(config):
151
- process_tasks(channels=("images",))
151
+ app = bq.BeanQueue(config=config)
152
+ ```
153
+
154
+ Then you can pass `--app` argument pointing to the app object to the process command like this:
155
+
156
+ ```bash
157
+ python -m bq.cmds.process -a my_pkgs.bq.app images
152
158
  ```
153
159
 
154
- Many other behaviors of this framework can also be modified by overriding the container defined at [bq/container.py](bq/container.py).
160
+ Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
161
+
162
+ ```python
163
+ app.process_tasks(channels=("images",))
164
+ ```
155
165
 
156
166
  ### Define your own tables
157
167
 
@@ -231,7 +241,7 @@ config = bq.Config(
231
241
  WORKER_MODEL="my_pkgs.models.Worker",
232
242
  # ... other configs
233
243
  )
234
- # Override container...
244
+ app = bq.BeanQueue(config)
235
245
  ```
236
246
 
237
247
  ## Why?
@@ -30,7 +30,9 @@ import bq
30
30
  from .. import models
31
31
  from .. import image_utils
32
32
 
33
- @bq.processor(channel="images")
33
+ app = bq.BeanQueue()
34
+
35
+ @app.processor(channel="images")
34
36
  def resize_image(db: Session, task: bq.Task, width: int, height: int):
35
37
  image = db.query(models.Image).filter(models.Image.task == task).one()
36
38
  image_utils.resize(image, size=(width, height))
@@ -41,6 +43,7 @@ def resize_image(db: Session, task: bq.Task, width: int, height: int):
41
43
 
42
44
  The `db` and `task` keyword arguments are optional.
43
45
  If you don't need to access the task object, you can simply define the function without these two parameters.
46
+ We also provide an optional `savepoint` argument in case if you want to rollback database changes you made.
44
47
 
45
48
  To submit a task, you can either use `bq.Task` model object to construct the task object, insert into the
46
49
  database session and commit.
@@ -110,14 +113,12 @@ Configurations can be modified by setting environment variables with `BQ_` prefi
110
113
  For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
111
114
  To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
112
115
  The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
113
- For now, the configurations only affect command line tools.
114
116
 
115
- If you want to configure BeanQueue programmatically for the command lines, you can override our [dependency-injector](https://python-dependency-injector.ets-labs.org/)'s container defined at [bq/container.py](bq/container.py) and call the command function manually.
117
+ If you want to configure BeanQueue programmatically, you can pass in `Config` object to the `bq.BeanQueue` object when creating.
116
118
  For example:
117
119
 
118
120
  ```python
119
121
  import bq
120
- from bq.cmds.process import process_tasks
121
122
  from .my_config import config
122
123
 
123
124
  container = bq.Container()
@@ -127,11 +128,20 @@ config = bq.Config(
127
128
  DATABASE_URL=str(config.DATABASE_URL),
128
129
  BATCH_SIZE=10,
129
130
  )
130
- with container.config.override(config):
131
- process_tasks(channels=("images",))
131
+ app = bq.BeanQueue(config=config)
132
+ ```
133
+
134
+ Then you can pass `--app` argument pointing to the app object to the process command like this:
135
+
136
+ ```bash
137
+ python -m bq.cmds.process -a my_pkgs.bq.app images
132
138
  ```
133
139
 
134
- Many other behaviors of this framework can also be modified by overriding the container defined at [bq/container.py](bq/container.py).
140
+ Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
141
+
142
+ ```python
143
+ app.process_tasks(channels=("images",))
144
+ ```
135
145
 
136
146
  ### Define your own tables
137
147
 
@@ -211,7 +221,7 @@ config = bq.Config(
211
221
  WORKER_MODEL="my_pkgs.models.Worker",
212
222
  # ... other configs
213
223
  )
214
- # Override container...
224
+ app = bq.BeanQueue(config)
215
225
  ```
216
226
 
217
227
  ## Why?
@@ -1,5 +1,5 @@
1
+ from .app import BeanQueue
1
2
  from .config import Config # noqa
2
- from .container import Container # noqa
3
3
  from .models import Task # noqa
4
4
  from .models import TaskModelMixin
5
5
  from .models import TaskModelRefWorkerMixin
@@ -8,4 +8,3 @@ from .models import Worker # noqa
8
8
  from .models import WorkerModelMixin # noqa
9
9
  from .models import WorkerRefMixin # noqa
10
10
  from .models import WorkerState # noqa
11
- from .processors.registry import processor # noqa
@@ -0,0 +1,260 @@
1
+ import functools
2
+ import importlib
3
+ import logging
4
+ import platform
5
+ import sys
6
+ import threading
7
+ import time
8
+ import typing
9
+
10
+ import venusian
11
+ from sqlalchemy import func
12
+ from sqlalchemy.engine import create_engine
13
+ from sqlalchemy.engine import Engine
14
+ from sqlalchemy.orm import Session as DBSession
15
+ from sqlalchemy.pool import SingletonThreadPool
16
+
17
+ from . import constants
18
+ from . import events
19
+ from . import models
20
+ from .config import Config
21
+ from .db.session import SessionMaker
22
+ from .processors.processor import Processor
23
+ from .processors.processor import ProcessorHelper
24
+ from .processors.registry import collect
25
+ from .services.dispatch import DispatchService
26
+ from .services.worker import WorkerService
27
+ from .utils import load_module_var
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class BeanQueue:
33
+ def __init__(
34
+ self,
35
+ config: Config | None = None,
36
+ session_cls: DBSession = SessionMaker,
37
+ worker_service_cls: typing.Type[WorkerService] = WorkerService,
38
+ dispatch_service_cls: typing.Type[DispatchService] = DispatchService,
39
+ engine: Engine | None = None,
40
+ ):
41
+ self.config = config if config is not None else Config()
42
+ self.session_cls = session_cls
43
+ self.worker_service_cls = worker_service_cls
44
+ self.dispatch_service_cls = dispatch_service_cls
45
+ self._engine = engine
46
+
47
+ def create_default_engine(self):
48
+ return create_engine(
49
+ str(self.config.DATABASE_URL), poolclass=SingletonThreadPool
50
+ )
51
+
52
+ def make_session(self) -> DBSession:
53
+ return self.session_cls(bind=self.engine)
54
+
55
+ @property
56
+ def engine(self) -> Engine:
57
+ if self._engine is None:
58
+ self._engine = self.create_default_engine()
59
+ return self._engine
60
+
61
+ @property
62
+ def task_model(self) -> typing.Type[models.Task]:
63
+ return load_module_var(self.config.TASK_MODEL)
64
+
65
+ @property
66
+ def worker_model(self) -> typing.Type[models.Worker]:
67
+ return load_module_var(self.config.WORKER_MODEL)
68
+
69
+ def _make_worker_service(self, session: DBSession):
70
+ return self.worker_service_cls(
71
+ session=session, task_model=self.task_model, worker_model=self.worker_model
72
+ )
73
+
74
+ def _make_dispatch_service(self, session: DBSession):
75
+ return self.dispatch_service_cls(session=session, task_model=self.task_model)
76
+
77
+ def processor(
78
+ self,
79
+ channel: str = constants.DEFAULT_CHANNEL,
80
+ auto_complete: bool = True,
81
+ auto_rollback_on_exc: bool = True,
82
+ task_model: typing.Type | None = None,
83
+ ) -> typing.Callable:
84
+ def decorator(wrapped: typing.Callable):
85
+ processor = Processor(
86
+ module=wrapped.__module__,
87
+ name=wrapped.__name__,
88
+ channel=channel,
89
+ func=wrapped,
90
+ auto_complete=auto_complete,
91
+ auto_rollback_on_exc=auto_rollback_on_exc,
92
+ )
93
+ helper_obj = ProcessorHelper(
94
+ processor,
95
+ task_cls=task_model if task_model is not None else self.task_model,
96
+ )
97
+
98
+ def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
99
+ if processor.name != name:
100
+ raise ValueError("Name is not the same")
101
+ scanner.registry.add(processor)
102
+
103
+ venusian.attach(
104
+ helper_obj, callback, category=constants.BQ_PROCESSOR_CATEGORY
105
+ )
106
+ return helper_obj
107
+
108
+ return decorator
109
+
110
+ def update_workers(
111
+ self,
112
+ worker_id: typing.Any,
113
+ ):
114
+ db = self.make_session()
115
+
116
+ worker_service = self._make_worker_service(db)
117
+ dispatch_service = self._make_dispatch_service(db)
118
+
119
+ current_worker = worker_service.get_worker(worker_id)
120
+ logger.info(
121
+ "Updating worker %s with heartbeat_period=%s, heartbeat_timeout=%s",
122
+ current_worker.id,
123
+ self.config.WORKER_HEARTBEAT_PERIOD,
124
+ self.config.WORKER_HEARTBEAT_TIMEOUT,
125
+ )
126
+ while True:
127
+ dead_workers = worker_service.fetch_dead_workers(
128
+ timeout=self.config.WORKER_HEARTBEAT_TIMEOUT
129
+ )
130
+ task_count = worker_service.reschedule_dead_tasks(
131
+ # TODO: a better way to abstract this?
132
+ dead_workers.with_entities(current_worker.__class__.id)
133
+ )
134
+ found_dead_worker = False
135
+ for dead_worker in dead_workers:
136
+ found_dead_worker = True
137
+ logger.info(
138
+ "Found dead worker %s (name=%s), reschedule %s dead tasks in channels %s",
139
+ dead_worker.id,
140
+ dead_worker.name,
141
+ task_count,
142
+ dead_worker.channels,
143
+ )
144
+ dispatch_service.notify(dead_worker.channels)
145
+ if found_dead_worker:
146
+ db.commit()
147
+
148
+ if current_worker.state != models.WorkerState.RUNNING:
149
+ # This probably means we are somehow very slow to update the heartbeat in time, or the timeout window
150
+ # is set too short. It could also be the administrator update the worker state to something else than
151
+ # RUNNING. Regardless the reason, let's stop processing.
152
+ logger.warning(
153
+ "Current worker %s state is %s instead of running, quit processing"
154
+ )
155
+ sys.exit(0)
156
+
157
+ time.sleep(self.config.WORKER_HEARTBEAT_PERIOD)
158
+ current_worker.last_heartbeat = func.now()
159
+ db.add(current_worker)
160
+ db.commit()
161
+
162
+ def process_tasks(
163
+ self,
164
+ channels: tuple[str, ...],
165
+ ):
166
+ db = self.make_session()
167
+ if not channels:
168
+ channels = [constants.DEFAULT_CHANNEL]
169
+
170
+ if not self.config.PROCESSOR_PACKAGES:
171
+ logger.error("No PROCESSOR_PACKAGES provided")
172
+ raise ValueError("No PROCESSOR_PACKAGES provided")
173
+
174
+ logger.info("Scanning packages %s", self.config.PROCESSOR_PACKAGES)
175
+ pkgs = list(map(importlib.import_module, self.config.PROCESSOR_PACKAGES))
176
+ registry = collect(pkgs)
177
+ for channel, module_processors in registry.processors.items():
178
+ logger.info("Collected processors with channel %r", channel)
179
+ for module, func_processors in module_processors.items():
180
+ for processor in func_processors.values():
181
+ logger.info(
182
+ " Processor module=%r, name=%r", module, processor.name
183
+ )
184
+
185
+ dispatch_service = self.dispatch_service_cls(
186
+ session=db, task_model=self.task_model
187
+ )
188
+ work_service = self.worker_service_cls(
189
+ session=db, task_model=self.task_model, worker_model=self.worker_model
190
+ )
191
+
192
+ worker = work_service.make_worker(name=platform.node(), channels=channels)
193
+ db.add(worker)
194
+ dispatch_service.listen(channels)
195
+ db.commit()
196
+
197
+ logger.info("Created worker %s, name=%s", worker.id, worker.name)
198
+ events.worker_init.send(self, worker=worker)
199
+
200
+ logger.info("Processing tasks in channels = %s ...", channels)
201
+
202
+ worker_update_thread = threading.Thread(
203
+ target=functools.partial(
204
+ self.update_workers,
205
+ worker_id=worker.id,
206
+ ),
207
+ name="update_workers",
208
+ )
209
+ worker_update_thread.daemon = True
210
+ worker_update_thread.start()
211
+
212
+ worker_id = worker.id
213
+
214
+ try:
215
+ while True:
216
+ while True:
217
+ tasks = dispatch_service.dispatch(
218
+ channels,
219
+ worker_id=worker_id,
220
+ limit=self.config.BATCH_SIZE,
221
+ ).all()
222
+ for task in tasks:
223
+ logger.info(
224
+ "Processing task %s, channel=%s, module=%s, func=%s",
225
+ task.id,
226
+ task.channel,
227
+ task.module,
228
+ task.func_name,
229
+ )
230
+ # TODO: support processor pool and other approaches to dispatch the workload
231
+ registry.process(task)
232
+ if not tasks:
233
+ # we should try to keep dispatching until we cannot find tasks
234
+ break
235
+ else:
236
+ db.commit()
237
+ # we will not see notifications in a transaction, need to close the transaction first before entering
238
+ # polling
239
+ db.close()
240
+ try:
241
+ for notification in dispatch_service.poll(
242
+ timeout=self.config.POLL_TIMEOUT
243
+ ):
244
+ logger.debug("Receive notification %s", notification)
245
+ except TimeoutError:
246
+ logger.debug("Poll timeout, try again")
247
+ continue
248
+ except (SystemExit, KeyboardInterrupt):
249
+ db.rollback()
250
+ logger.info("Shutting down ...")
251
+ worker_update_thread.join(5)
252
+
253
+ worker.state = models.WorkerState.SHUTDOWN
254
+ db.add(worker)
255
+ task_count = self.worker_service_cls.reschedule_dead_tasks([worker.id])
256
+ logger.info("Reschedule %s tasks", task_count)
257
+ dispatch_service.notify(channels)
258
+ db.commit()
259
+
260
+ logger.info("Shutdown gracefully")
@@ -0,0 +1,26 @@
1
+ import logging
2
+
3
+ import click
4
+
5
+ from .. import models # noqa
6
+ from ..db.base import Base
7
+ from .utils import load_app
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ @click.command()
13
+ @click.option(
14
+ "-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
15
+ )
16
+ def main(
17
+ app: str | None = None,
18
+ ):
19
+ app = load_app(app)
20
+ Base.metadata.create_all(bind=app.engine)
21
+ logger.info("Done, tables created")
22
+
23
+
24
+ if __name__ == "__main__":
25
+ logging.basicConfig(level=logging.INFO)
26
+ main()
@@ -0,0 +1,23 @@
1
+ import logging
2
+
3
+ import click
4
+
5
+ from .utils import load_app
6
+
7
+
8
+ @click.command()
9
+ @click.argument("channels", nargs=-1)
10
+ @click.option(
11
+ "-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
12
+ )
13
+ def main(
14
+ channels: tuple[str, ...],
15
+ app: str | None = None,
16
+ ):
17
+ app = load_app(app)
18
+ app.process_tasks(channels)
19
+
20
+
21
+ if __name__ == "__main__":
22
+ logging.basicConfig(level=logging.INFO)
23
+ main()
@@ -2,12 +2,11 @@ import json
2
2
  import logging
3
3
 
4
4
  import click
5
- from dependency_injector.wiring import inject
6
- from dependency_injector.wiring import Provide
7
5
 
8
6
  from .. import models
9
- from ..container import Container
10
- from ..db.session import Session
7
+ from .utils import load_app
8
+
9
+ logger = logging.getLogger(__name__)
11
10
 
12
11
 
13
12
  @click.command()
@@ -17,15 +16,18 @@ from ..db.session import Session
17
16
  @click.option(
18
17
  "-k", "--kwargs", type=str, help="Keyword arguments as JSON", default=None
19
18
  )
20
- @inject
19
+ @click.option(
20
+ "-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
21
+ )
21
22
  def main(
22
23
  channel: str,
23
24
  module: str,
24
25
  func: str,
25
26
  kwargs: str | None,
26
- db: Session = Provide[Container.session],
27
+ app: str | None = None,
27
28
  ):
28
- logger = logging.getLogger(__name__)
29
+ app = load_app(app)
30
+ db = app.session_cls(bind=app.create_default_engine())
29
31
 
30
32
  logger.info(
31
33
  "Submit task with channel=%s, module=%s, func=%s", channel, module, func
@@ -43,6 +45,4 @@ def main(
43
45
 
44
46
  if __name__ == "__main__":
45
47
  logging.basicConfig(level=logging.INFO)
46
- container = Container()
47
- container.wire(modules=[__name__])
48
48
  main()
@@ -0,0 +1,14 @@
1
+ import logging
2
+
3
+ from ..app import BeanQueue
4
+ from ..utils import load_module_var
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def load_app(app: str | None) -> BeanQueue:
10
+ if app is None:
11
+ logger.info("No BeanQueue app provided, create default app")
12
+ return BeanQueue()
13
+ logger.info("Load BeanQueue app from %s", app)
14
+ return load_module_var(app)
@@ -0,0 +1,3 @@
1
+ import blinker
2
+
3
+ worker_init = blinker.signal("worker-init")
@@ -0,0 +1,70 @@
1
+ import dataclasses
2
+ import inspect
3
+ import logging
4
+ import typing
5
+
6
+ from sqlalchemy.orm import object_session
7
+
8
+ from .. import models
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ @dataclasses.dataclass(frozen=True)
14
+ class Processor:
15
+ channel: str
16
+ module: str
17
+ name: str
18
+ func: typing.Callable
19
+ # should we auto complete the task or not
20
+ auto_complete: bool = True
21
+ # should we auto rollback the transaction when encounter unhandled exception
22
+ auto_rollback_on_exc: bool = True
23
+
24
+ def process(self, task: models.Task):
25
+ db = object_session(task)
26
+ func_signature = inspect.signature(self.func)
27
+ base_kwargs = {}
28
+ if "task" in func_signature.parameters:
29
+ base_kwargs["task"] = task
30
+ if "db" in func_signature.parameters:
31
+ base_kwargs["db"] = db
32
+ with db.begin_nested() as savepoint:
33
+ if "savepoint" in func_signature.parameters:
34
+ base_kwargs["savepoint"] = savepoint
35
+ try:
36
+ result = self.func(**base_kwargs, **task.kwargs)
37
+ except Exception as exc:
38
+ logger.error("Unhandled exception for task %s", task.id, exc_info=True)
39
+ if self.auto_rollback_on_exc:
40
+ savepoint.rollback()
41
+ # TODO: add error event
42
+ task.state = models.TaskState.FAILED
43
+ task.error_message = str(exc)
44
+ db.add(task)
45
+ return
46
+ if self.auto_complete:
47
+ logger.info("Task %s auto complete", task.id)
48
+ task.state = models.TaskState.DONE
49
+ task.result = result
50
+ db.add(task)
51
+ return result
52
+
53
+
54
+ class ProcessorHelper:
55
+ """Helper function to replace the decorated processor function and make creating Task model much easier"""
56
+
57
+ def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
58
+ self._processor = processor
59
+ self._task_cls = task_cls
60
+
61
+ def __call__(self, *args, **kwargs):
62
+ return self._processor.func(*args, **kwargs)
63
+
64
+ def run(self, **kwargs) -> models.Task:
65
+ return self._task_cls(
66
+ channel=self._processor.channel,
67
+ module=self._processor.module,
68
+ func_name=self._processor.name,
69
+ kwargs=kwargs,
70
+ )
@@ -0,0 +1,47 @@
1
+ import collections
2
+ import logging
3
+ import typing
4
+
5
+ import venusian
6
+ from sqlalchemy.orm import object_session
7
+
8
+ from .. import constants
9
+ from .. import models
10
+ from .processor import Processor
11
+
12
+
13
+ class Registry:
14
+ def __init__(self):
15
+ self.logger = logging.getLogger(__name__)
16
+ self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
17
+
18
+ def add(self, processor: Processor):
19
+ self.processors[processor.channel][processor.module][processor.name] = processor
20
+
21
+ def process(self, task: models.Task) -> typing.Any:
22
+ modules = self.processors.get(task.channel, {})
23
+ functions = modules.get(task.module, {})
24
+ processor = functions.get(task.func_name)
25
+ db = object_session(task)
26
+ if processor is None:
27
+ self.logger.error(
28
+ "Cannot find processor for task %s with module=%s, func=%s",
29
+ task.id,
30
+ task.module,
31
+ task.func_name,
32
+ )
33
+ # TODO: add error event
34
+ task.state = models.TaskState.FAILED
35
+ task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
36
+ db.add(task)
37
+ return
38
+ return processor.process(task)
39
+
40
+
41
+ def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
42
+ if registry is None:
43
+ registry = Registry()
44
+ scanner = venusian.Scanner(registry=registry)
45
+ for package in packages:
46
+ scanner.scan(package, categories=(constants.BQ_PROCESSOR_CATEGORY,))
47
+ return registry
@@ -0,0 +1,8 @@
1
+ import importlib
2
+ import typing
3
+
4
+
5
+ def load_module_var(name: str) -> typing.Type:
6
+ module_name, model_name = name.rsplit(".", 1)
7
+ module = importlib.import_module(module_name)
8
+ return getattr(module, model_name)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "beanqueue"
3
- version = "0.1.3"
3
+ version = "0.2.0"
4
4
  description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library"
5
5
  authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
6
6
  license = "MIT"
@@ -14,9 +14,9 @@ python = "^3.11"
14
14
  sqlalchemy = "^2.0.30"
15
15
  venusian = "^3.1.0"
16
16
  click = "^8.1.7"
17
- dependency-injector = "^4.41.0"
18
17
  pydantic-settings = "^2.2.1"
19
18
  pg-activity = "^3.5.1"
19
+ blinker = "^1.8.2"
20
20
 
21
21
 
22
22
  [tool.poetry.group.dev.dependencies]
@@ -1,25 +0,0 @@
1
- import logging
2
-
3
- import click
4
- from dependency_injector.wiring import inject
5
- from dependency_injector.wiring import Provide
6
- from sqlalchemy.engine import Engine
7
-
8
- from .. import models # noqa
9
- from ..container import Container
10
- from ..db.base import Base
11
-
12
-
13
- @click.command()
14
- @inject
15
- def main(engine: Engine = Provide[Container.db_engine]):
16
- logger = logging.getLogger(__name__)
17
- Base.metadata.create_all(bind=engine)
18
- logger.info("Done, tables created")
19
-
20
-
21
- if __name__ == "__main__":
22
- logging.basicConfig(level=logging.INFO)
23
- container = Container()
24
- container.wire(modules=[__name__])
25
- main()
@@ -1,188 +0,0 @@
1
- import functools
2
- import importlib
3
- import logging
4
- import platform
5
- import sys
6
- import threading
7
- import time
8
- import typing
9
-
10
- import click
11
- from dependency_injector.wiring import inject
12
- from dependency_injector.wiring import Provide
13
- from sqlalchemy import func
14
- from sqlalchemy.orm import Session as DBSession
15
-
16
- from .. import constants
17
- from .. import models
18
- from ..config import Config
19
- from ..container import Container
20
- from ..processors.registry import collect
21
- from ..services.dispatch import DispatchService
22
- from ..services.worker import WorkerService
23
-
24
-
25
- @inject
26
- def update_workers(
27
- worker_id: typing.Any,
28
- config: Config = Provide[Container.config],
29
- session_factory: typing.Callable = Provide[Container.session_factory],
30
- make_dispatch_service: typing.Callable = Provide[Container.make_dispatch_service],
31
- make_worker_service: typing.Callable = Provide[Container.make_worker_service],
32
- ):
33
- db: DBSession = session_factory()
34
- worker_service: WorkerService = make_worker_service(session=db)
35
- dispatch_service: DispatchService = make_dispatch_service(session=db)
36
- current_worker = worker_service.get_worker(worker_id)
37
- logger = logging.getLogger(__name__)
38
- logger.info(
39
- "Updating worker %s with heartbeat_period=%s, heartbeat_timeout=%s",
40
- current_worker.id,
41
- config.WORKER_HEARTBEAT_PERIOD,
42
- config.WORKER_HEARTBEAT_TIMEOUT,
43
- )
44
- while True:
45
- dead_workers = worker_service.fetch_dead_workers(
46
- timeout=config.WORKER_HEARTBEAT_TIMEOUT
47
- )
48
- task_count = worker_service.reschedule_dead_tasks(
49
- # TODO: a better way to abstract this?
50
- dead_workers.with_entities(current_worker.__class__.id)
51
- )
52
- found_dead_worker = False
53
- for dead_worker in dead_workers:
54
- found_dead_worker = True
55
- logger.info(
56
- "Found dead worker %s (name=%s), reschedule %s dead tasks in channels %s",
57
- dead_worker.id,
58
- dead_worker.name,
59
- task_count,
60
- dead_worker.channels,
61
- )
62
- dispatch_service.notify(dead_worker.channels)
63
- if found_dead_worker:
64
- db.commit()
65
-
66
- if current_worker.state != models.WorkerState.RUNNING:
67
- # This probably means we are somehow very slow to update the heartbeat in time, or the timeout window
68
- # is set too short. It could also be the administrator update the worker state to something else than
69
- # RUNNING. Regardless the reason, let's stop processing.
70
- logger.warning(
71
- "Current worker %s state is %s instead of running, quit processing"
72
- )
73
- sys.exit(0)
74
-
75
- time.sleep(config.WORKER_HEARTBEAT_PERIOD)
76
- current_worker.last_heartbeat = func.now()
77
- db.add(current_worker)
78
- db.commit()
79
-
80
-
81
- @inject
82
- def process_tasks(
83
- channels: tuple[str, ...],
84
- config: Config = Provide[Container.config],
85
- db: DBSession = Provide[Container.session],
86
- dispatch_service: DispatchService = Provide[Container.dispatch_service],
87
- worker_service: WorkerService = Provide[Container.worker_service],
88
- ):
89
- logger = logging.getLogger(__name__)
90
-
91
- if not channels:
92
- channels = [constants.DEFAULT_CHANNEL]
93
-
94
- if not config.PROCESSOR_PACKAGES:
95
- logger.error("No PROCESSOR_PACKAGES provided")
96
- sys.exit(-1)
97
-
98
- logger.info("Scanning packages %s", config.PROCESSOR_PACKAGES)
99
- pkgs = list(map(importlib.import_module, config.PROCESSOR_PACKAGES))
100
- registry = collect(pkgs)
101
- for channel, module_processors in registry.processors.items():
102
- logger.info("Collected processors with channel %r", channel)
103
- for module, func_processors in module_processors.items():
104
- for processor in func_processors.values():
105
- logger.info(
106
- " Processor module %r, processor %r", module, processor.name
107
- )
108
-
109
- worker = worker_service.make_worker(name=platform.node(), channels=channels)
110
- db.add(worker)
111
- dispatch_service.listen(channels)
112
- db.commit()
113
-
114
- logger.info("Created worker %s, name=%s", worker.id, worker.name)
115
- logger.info("Processing tasks in channels = %s ...", channels)
116
-
117
- worker_update_thread = threading.Thread(
118
- target=functools.partial(
119
- update_workers,
120
- worker_id=worker.id,
121
- ),
122
- name="update_workers",
123
- )
124
- worker_update_thread.daemon = True
125
- worker_update_thread.start()
126
-
127
- worker_id = worker.id
128
-
129
- try:
130
- while True:
131
- while True:
132
- tasks = dispatch_service.dispatch(
133
- channels,
134
- worker_id=worker_id,
135
- limit=config.BATCH_SIZE,
136
- ).all()
137
- for task in tasks:
138
- logger.info(
139
- "Processing task %s, channel=%s, module=%s, func=%s",
140
- task.id,
141
- task.channel,
142
- task.module,
143
- task.func_name,
144
- )
145
- # TODO: support processor pool and other approaches to dispatch the workload
146
- registry.process(task)
147
- if not tasks:
148
- # we should try to keep dispatching until we cannot find tasks
149
- break
150
- else:
151
- db.commit()
152
- # we will not see notifications in a transaction, need to close the transaction first before entering
153
- # polling
154
- db.close()
155
- try:
156
- for notification in dispatch_service.poll(timeout=config.POLL_TIMEOUT):
157
- logger.debug("Receive notification %s", notification)
158
- except TimeoutError:
159
- logger.debug("Poll timeout, try again")
160
- continue
161
- except (SystemExit, KeyboardInterrupt):
162
- db.rollback()
163
- logger.info("Shutting down ...")
164
- worker_update_thread.join(5)
165
-
166
- worker.state = models.WorkerState.SHUTDOWN
167
- db.add(worker)
168
- task_count = worker_service.reschedule_dead_tasks([worker.id])
169
- logger.info("Reschedule %s tasks", task_count)
170
- dispatch_service.notify(channels)
171
- db.commit()
172
-
173
- logger.info("Shutdown gracefully")
174
-
175
-
176
- @click.command()
177
- @click.argument("channels", nargs=-1)
178
- def main(
179
- channels: tuple[str, ...],
180
- ):
181
- process_tasks(channels)
182
-
183
-
184
- if __name__ == "__main__":
185
- logging.basicConfig(level=logging.INFO)
186
- container = Container()
187
- container.wire(modules=[__name__])
188
- main()
@@ -1,77 +0,0 @@
1
- import functools
2
- import importlib
3
- import typing
4
-
5
- from dependency_injector import containers
6
- from dependency_injector import providers
7
- from sqlalchemy import create_engine
8
- from sqlalchemy import Engine
9
- from sqlalchemy.orm import Session as DBSession
10
- from sqlalchemy.pool import SingletonThreadPool
11
-
12
- from .config import Config
13
- from .db.session import SessionMaker
14
- from .services.dispatch import DispatchService
15
- from .services.worker import WorkerService
16
-
17
-
18
- def get_model_class(name: str) -> typing.Type:
19
- module_name, model_name = name.rsplit(".", 1)
20
- module = importlib.import_module(module_name)
21
- return getattr(module, model_name)
22
-
23
-
24
- def make_db_engine(config: Config) -> Engine:
25
- return create_engine(str(config.DATABASE_URL), poolclass=SingletonThreadPool)
26
-
27
-
28
- def make_session_factory(engine: Engine) -> typing.Callable:
29
- return functools.partial(SessionMaker, bind=engine)
30
-
31
-
32
- def make_session(factory: typing.Callable) -> DBSession:
33
- return factory()
34
-
35
-
36
- def make_dispatch_service(config: Config, session: DBSession) -> DispatchService:
37
- return DispatchService(session, task_model=get_model_class(config.TASK_MODEL))
38
-
39
-
40
- def make_worker_service(config: Config, session: DBSession) -> WorkerService:
41
- return WorkerService(
42
- session,
43
- task_model=get_model_class(config.TASK_MODEL),
44
- worker_model=get_model_class(config.WORKER_MODEL),
45
- )
46
-
47
-
48
- class Container(containers.DeclarativeContainer):
49
- config = providers.Singleton(Config)
50
-
51
- db_engine: Engine = providers.Singleton(make_db_engine, config=config)
52
-
53
- session_factory: typing.Callable = providers.Singleton(
54
- make_session_factory, engine=db_engine
55
- )
56
-
57
- session: DBSession = providers.Singleton(make_session, factory=session_factory)
58
-
59
- dispatch_service: DispatchService = providers.Singleton(
60
- make_dispatch_service,
61
- config=config,
62
- session=session,
63
- )
64
-
65
- worker_service: WorkerService = providers.Singleton(
66
- make_worker_service, config=config, session=session
67
- )
68
-
69
- make_dispatch_service = providers.Singleton(
70
- lambda config: functools.partial(make_dispatch_service, config=config),
71
- config=config,
72
- )
73
-
74
- make_worker_service = providers.Singleton(
75
- lambda config: functools.partial(make_worker_service, config=config),
76
- config=config,
77
- )
@@ -1,136 +0,0 @@
1
- import collections
2
- import dataclasses
3
- import inspect
4
- import logging
5
- import typing
6
-
7
- import venusian
8
- from sqlalchemy.orm import object_session
9
-
10
- from .. import constants
11
- from .. import models
12
-
13
-
14
- @dataclasses.dataclass(frozen=True)
15
- class Processor:
16
- channel: str
17
- module: str
18
- name: str
19
- func: typing.Callable
20
- # should we auto complete the task or not
21
- auto_complete: bool = True
22
- # should we auto rollback the transaction when encounter unhandled exception
23
- auto_rollback_on_exc: bool = True
24
-
25
-
26
- class ProcessorHelper:
27
- def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
28
- self._processor = processor
29
- self._task_cls = task_cls
30
-
31
- def __call__(self, *args, **kwargs):
32
- return self._processor.func(*args, **kwargs)
33
-
34
- def run(self, **kwargs) -> models.Task:
35
- return self._task_cls(
36
- channel=self._processor.channel,
37
- module=self._processor.module,
38
- func_name=self._processor.name,
39
- kwargs=kwargs,
40
- )
41
-
42
-
43
- def process_task(task: models.Task, processor: Processor):
44
- logger = logging.getLogger(__name__)
45
- db = object_session(task)
46
- func_signature = inspect.signature(processor.func)
47
- base_kwargs = {}
48
- if "task" in func_signature.parameters:
49
- base_kwargs["task"] = task
50
- if "db" in func_signature.parameters:
51
- base_kwargs["db"] = db
52
- with db.begin_nested() as savepoint:
53
- if "savepoint" in func_signature.parameters:
54
- base_kwargs["savepoint"] = savepoint
55
- try:
56
- result = processor.func(**base_kwargs, **task.kwargs)
57
- except Exception as exc:
58
- logger.error("Unhandled exception for task %s", task.id, exc_info=True)
59
- if processor.auto_rollback_on_exc:
60
- savepoint.rollback()
61
- # TODO: add error event
62
- task.state = models.TaskState.FAILED
63
- task.error_message = str(exc)
64
- db.add(task)
65
- return
66
- if processor.auto_complete:
67
- logger.info("Task %s auto complete", task.id)
68
- task.state = models.TaskState.DONE
69
- task.result = result
70
- db.add(task)
71
- return result
72
-
73
-
74
- class Registry:
75
- def __init__(self):
76
- self.logger = logging.getLogger(__name__)
77
- self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
78
-
79
- def add(self, processor: Processor):
80
- self.processors[processor.channel][processor.module][processor.name] = processor
81
-
82
- def process(self, task: models.Task) -> typing.Any:
83
- modules = self.processors.get(task.channel, {})
84
- functions = modules.get(task.module, {})
85
- processor = functions.get(task.func_name)
86
- db = object_session(task)
87
- if processor is None:
88
- self.logger.error(
89
- "Cannot find processor for task %s with module=%s, func=%s",
90
- task.id,
91
- task.module,
92
- task.func_name,
93
- )
94
- # TODO: add error event
95
- task.state = models.TaskState.FAILED
96
- task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
97
- db.add(task)
98
- return
99
- return process_task(task, processor)
100
-
101
-
102
- def processor(
103
- channel: str = constants.DEFAULT_CHANNEL,
104
- auto_complete: bool = True,
105
- auto_rollback_on_exc: bool = True,
106
- task_cls: typing.Type = models.Task,
107
- ) -> typing.Callable:
108
- def decorator(wrapped: typing.Callable):
109
- processor = Processor(
110
- module=wrapped.__module__,
111
- name=wrapped.__name__,
112
- channel=channel,
113
- func=wrapped,
114
- auto_complete=auto_complete,
115
- auto_rollback_on_exc=auto_rollback_on_exc,
116
- )
117
- helper_obj = ProcessorHelper(processor, task_cls=task_cls)
118
-
119
- def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
120
- if processor.name != name:
121
- raise ValueError("Name is not the same")
122
- scanner.registry.add(processor)
123
-
124
- venusian.attach(helper_obj, callback, category=constants.BQ_PROCESSOR_CATEGORY)
125
- return helper_obj
126
-
127
- return decorator
128
-
129
-
130
- def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
131
- if registry is None:
132
- registry = Registry()
133
- scanner = venusian.Scanner(registry=registry)
134
- for package in packages:
135
- scanner.scan(package, categories=(constants.BQ_PROCESSOR_CATEGORY,))
136
- return registry
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes