beanqueue 1.1.9__tar.gz → 2.0.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. beanqueue-1.1.9/README.md → beanqueue-2.0.0rc0/PKG-INFO +119 -0
  2. beanqueue-1.1.9/PKG-INFO → beanqueue-2.0.0rc0/README.md +100 -16
  3. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/app.py +227 -127
  4. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/config.py +21 -1
  5. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/events.py +2 -0
  6. beanqueue-2.0.0rc0/bq/metrics.py +171 -0
  7. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/pyproject.toml +15 -3
  8. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/.gitignore +0 -0
  9. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/LICENSE +0 -0
  10. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/__init__.py +0 -0
  11. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/__init__.py +0 -0
  12. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/cli.py +0 -0
  13. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/create_tables.py +0 -0
  14. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/environment.py +0 -0
  15. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/main.py +0 -0
  16. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/process.py +0 -0
  17. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/submit.py +0 -0
  18. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/cmds/utils.py +0 -0
  19. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/constants.py +0 -0
  20. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/db/__init__.py +0 -0
  21. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/db/base.py +0 -0
  22. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/db/session.py +0 -0
  23. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/models/__init__.py +0 -0
  24. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/models/event.py +0 -0
  25. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/models/helpers.py +0 -0
  26. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/models/task.py +0 -0
  27. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/models/worker.py +0 -0
  28. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/processors/__init__.py +0 -0
  29. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/processors/processor.py +0 -0
  30. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/processors/registry.py +0 -0
  31. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/processors/retry_policies.py +0 -0
  32. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/services/__init__.py +0 -0
  33. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/services/dispatch.py +0 -0
  34. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/services/worker.py +0 -0
  35. {beanqueue-1.1.9 → beanqueue-2.0.0rc0}/bq/utils.py +0 -0
@@ -1,3 +1,22 @@
1
+ Metadata-Version: 2.4
2
+ Name: beanqueue
3
+ Version: 2.0.0rc0
4
+ Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library
5
+ Author-email: Fang-Pen Lin <fangpen@launchplatform.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: <4,>=3.11
9
+ Requires-Dist: blinker<2,>=1.8.2
10
+ Requires-Dist: click<9,>=8.1.7
11
+ Requires-Dist: pydantic-settings<3,>=2.2.1
12
+ Requires-Dist: rich<14,>=13.7.1
13
+ Requires-Dist: sqlalchemy<3,>=2.0.30
14
+ Requires-Dist: venusian<4,>=3.1.0
15
+ Provides-Extra: metrics
16
+ Requires-Dist: starlette<2,>=0.27; extra == 'metrics'
17
+ Requires-Dist: uvicorn<1,>=0.30.0; extra == 'metrics'
18
+ Description-Content-Type: text/markdown
19
+
1
20
  # BeanQueue [![CircleCI](https://dl.circleci.com/status-badge/img/gh/LaunchPlatform/bq/tree/master.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/bq/tree/master)
2
21
 
3
22
  BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
@@ -14,6 +33,7 @@ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https
14
33
  - **Retry**: Built-in and customizable retry policies
15
34
  - **Schedule**: Schedule tasks to run later
16
35
  - **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
36
+ - **Custom health checks**: Optional HTTP `/healthz` endpoint with pluggable checks via Blinker signals
17
37
  - **Customizable**: Custom Task, Worker and Event models. Use it as a library and build your own work queue
18
38
 
19
39
  ## Install
@@ -22,6 +42,20 @@ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https
22
42
  pip install beanqueue
23
43
  ```
24
44
 
45
+ To enable the optional metrics HTTP server (currently `/healthz` only), install with the `metrics` extra:
46
+
47
+ ```bash
48
+ pip install "beanqueue[metrics]"
49
+ ```
50
+
51
+ ## Upgrading to 2.0
52
+
53
+ BeanQueue 2.0 includes breaking changes around the metrics HTTP server and custom health checks:
54
+
55
+ - **`METRICS_HTTP_SERVER_ENABLED` defaults to `False`** (it was `True` in 1.x). Set `BQ_METRICS_HTTP_SERVER_ENABLED=true` to turn the server back on.
56
+ - **The metrics server requires optional dependencies.** Install `beanqueue[metrics]` (`starlette` and `uvicorn`). Without them, enabling the server raises an error at startup.
57
+ - **Custom health checks use the `healthz_check` event** (`bq.events.healthz_check`) instead of a `healthz_check` argument on `bq.BeanQueue`. Connect sync or async receivers to the signal.
58
+
25
59
  ## Usage
26
60
 
27
61
  You can define a basic task processor like this
@@ -216,6 +250,91 @@ Or if you prefer to define your own process command, you can also call `process_
216
250
  app.process_tasks(channels=("images",))
217
251
  ```
218
252
 
253
+ ### Health check and metrics HTTP server
254
+
255
+ When enabled, each worker starts a small HTTP server (Starlette + Uvicorn) for operational endpoints.
256
+ Today this only exposes `GET /healthz`, which returns `{"status": "ok"}` by default.
257
+
258
+ Enable it with the `metrics` extra installed and configuration:
259
+
260
+ ```bash
261
+ pip install "beanqueue[metrics]"
262
+ BQ_METRICS_HTTP_SERVER_ENABLED=true bq process images
263
+ ```
264
+
265
+ Relevant settings (see [bq/config.py](bq/config.py)):
266
+
267
+ | Setting | Env var | Default |
268
+ | --- | --- | --- |
269
+ | `METRICS_HTTP_SERVER_ENABLED` | `BQ_METRICS_HTTP_SERVER_ENABLED` | `False` |
270
+ | `METRICS_HTTP_SERVER_INTERFACE` | `BQ_METRICS_HTTP_SERVER_INTERFACE` | `""` (all interfaces) |
271
+ | `METRICS_HTTP_SERVER_PORT` | `BQ_METRICS_HTTP_SERVER_PORT` | `8000` |
272
+ | `METRICS_HTTP_SERVER_LOG_LEVEL` | `BQ_METRICS_HTTP_SERVER_LOG_LEVEL` | `30` (`WARNING`) |
273
+
274
+ Access requests are logged at INFO via `uvicorn.access` (visible even when `METRICS_HTTP_SERVER_LOG_LEVEL` is `WARNING`).
275
+ BeanQueue also uses a `metrics_server` logger for its own messages.
276
+ Override the entire logging setup by passing a [logging.config](https://docs.python.org/3/library/logging.config.html#logging-config-dictschema) dict via `METRICS_HTTP_SERVER_LOG_CONFIG` (or `BQ_METRICS_HTTP_SERVER_LOG_CONFIG` as JSON):
277
+
278
+ ```python
279
+ import bq
280
+
281
+ config = bq.Config(
282
+ METRICS_HTTP_SERVER_ENABLED=True,
283
+ METRICS_HTTP_SERVER_LOG_CONFIG={
284
+ "version": 1,
285
+ "disable_existing_loggers": False,
286
+ "handlers": {
287
+ "default": {
288
+ "class": "logging.StreamHandler",
289
+ "formatter": "default",
290
+ }
291
+ },
292
+ "formatters": {
293
+ "default": {
294
+ "format": "%(asctime)s %(name)s %(levelname)s %(message)s",
295
+ }
296
+ },
297
+ "loggers": {
298
+ "metrics_server": {"handlers": ["default"], "level": "INFO"},
299
+ "uvicorn.access": {"handlers": ["default"], "level": "INFO"},
300
+ },
301
+ },
302
+ )
303
+ app = bq.BeanQueue(config=config)
304
+ ```
305
+
306
+ #### Custom health checks
307
+
308
+ Register additional checks by connecting receivers to `bq.events.healthz_check`.
309
+ If no receivers are connected, `/healthz` returns OK without touching the database.
310
+
311
+ With receivers connected, BeanQueue loads the current worker and passes a database `session` to each check.
312
+ Receivers may be synchronous or asynchronous; both can be mixed on the same signal.
313
+
314
+ ```python
315
+ from sqlalchemy import text
316
+ from sqlalchemy.orm import Session
317
+
318
+ import bq
319
+ from bq import events
320
+
321
+ app = bq.BeanQueue()
322
+
323
+
324
+ @events.healthz_check.connect
325
+ def check_database(sender: bq.BeanQueue, worker, session: Session):
326
+ session.execute(text("SELECT 1"))
327
+
328
+
329
+ @events.healthz_check.connect
330
+ async def check_external_service(sender: bq.BeanQueue, worker, session: Session):
331
+ # async HTTP call, etc.
332
+ ...
333
+ ```
334
+
335
+ Receiver signature must accept the keyword arguments you need, for example `(sender, worker, session)`, or use `(sender, **kwargs)`.
336
+ If a check raises an exception, `/healthz` responds with HTTP 500 and a JSON body containing the error message.
337
+
219
338
  ### Define your own tables
220
339
 
221
340
  BeanQueue is designed to be as customizable as much as possible.
@@ -1,19 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: beanqueue
3
- Version: 1.1.9
4
- Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library
5
- Author-email: Fang-Pen Lin <fangpen@launchplatform.com>
6
- License-Expression: MIT
7
- License-File: LICENSE
8
- Requires-Python: ~=3.11
9
- Requires-Dist: blinker<2,>=1.8.2
10
- Requires-Dist: click<9,>=8.1.7
11
- Requires-Dist: pydantic-settings<3,>=2.2.1
12
- Requires-Dist: rich<14,>=13.7.1
13
- Requires-Dist: sqlalchemy<3,>=2.0.30
14
- Requires-Dist: venusian<4,>=3.1.0
15
- Description-Content-Type: text/markdown
16
-
17
1
  # BeanQueue [![CircleCI](https://dl.circleci.com/status-badge/img/gh/LaunchPlatform/bq/tree/master.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/bq/tree/master)
18
2
 
19
3
  BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
@@ -30,6 +14,7 @@ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https
30
14
  - **Retry**: Built-in and customizable retry policies
31
15
  - **Schedule**: Schedule tasks to run later
32
16
  - **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
17
+ - **Custom health checks**: Optional HTTP `/healthz` endpoint with pluggable checks via Blinker signals
33
18
  - **Customizable**: Custom Task, Worker and Event models. Use it as a library and build your own work queue
34
19
 
35
20
  ## Install
@@ -38,6 +23,20 @@ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https
38
23
  pip install beanqueue
39
24
  ```
40
25
 
26
+ To enable the optional metrics HTTP server (currently `/healthz` only), install with the `metrics` extra:
27
+
28
+ ```bash
29
+ pip install "beanqueue[metrics]"
30
+ ```
31
+
32
+ ## Upgrading to 2.0
33
+
34
+ BeanQueue 2.0 includes breaking changes around the metrics HTTP server and custom health checks:
35
+
36
+ - **`METRICS_HTTP_SERVER_ENABLED` defaults to `False`** (it was `True` in 1.x). Set `BQ_METRICS_HTTP_SERVER_ENABLED=true` to turn the server back on.
37
+ - **The metrics server requires optional dependencies.** Install `beanqueue[metrics]` (`starlette` and `uvicorn`). Without them, enabling the server raises an error at startup.
38
+ - **Custom health checks use the `healthz_check` event** (`bq.events.healthz_check`) instead of a `healthz_check` argument on `bq.BeanQueue`. Connect sync or async receivers to the signal.
39
+
41
40
  ## Usage
42
41
 
43
42
  You can define a basic task processor like this
@@ -232,6 +231,91 @@ Or if you prefer to define your own process command, you can also call `process_
232
231
  app.process_tasks(channels=("images",))
233
232
  ```
234
233
 
234
+ ### Health check and metrics HTTP server
235
+
236
+ When enabled, each worker starts a small HTTP server (Starlette + Uvicorn) for operational endpoints.
237
+ Today this only exposes `GET /healthz`, which returns `{"status": "ok"}` by default.
238
+
239
+ Enable it with the `metrics` extra installed and configuration:
240
+
241
+ ```bash
242
+ pip install "beanqueue[metrics]"
243
+ BQ_METRICS_HTTP_SERVER_ENABLED=true bq process images
244
+ ```
245
+
246
+ Relevant settings (see [bq/config.py](bq/config.py)):
247
+
248
+ | Setting | Env var | Default |
249
+ | --- | --- | --- |
250
+ | `METRICS_HTTP_SERVER_ENABLED` | `BQ_METRICS_HTTP_SERVER_ENABLED` | `False` |
251
+ | `METRICS_HTTP_SERVER_INTERFACE` | `BQ_METRICS_HTTP_SERVER_INTERFACE` | `""` (all interfaces) |
252
+ | `METRICS_HTTP_SERVER_PORT` | `BQ_METRICS_HTTP_SERVER_PORT` | `8000` |
253
+ | `METRICS_HTTP_SERVER_LOG_LEVEL` | `BQ_METRICS_HTTP_SERVER_LOG_LEVEL` | `30` (`WARNING`) |
254
+
255
+ Access requests are logged at INFO via `uvicorn.access` (visible even when `METRICS_HTTP_SERVER_LOG_LEVEL` is `WARNING`).
256
+ BeanQueue also uses a `metrics_server` logger for its own messages.
257
+ Override the entire logging setup by passing a [logging.config](https://docs.python.org/3/library/logging.config.html#logging-config-dictschema) dict via `METRICS_HTTP_SERVER_LOG_CONFIG` (or `BQ_METRICS_HTTP_SERVER_LOG_CONFIG` as JSON):
258
+
259
+ ```python
260
+ import bq
261
+
262
+ config = bq.Config(
263
+ METRICS_HTTP_SERVER_ENABLED=True,
264
+ METRICS_HTTP_SERVER_LOG_CONFIG={
265
+ "version": 1,
266
+ "disable_existing_loggers": False,
267
+ "handlers": {
268
+ "default": {
269
+ "class": "logging.StreamHandler",
270
+ "formatter": "default",
271
+ }
272
+ },
273
+ "formatters": {
274
+ "default": {
275
+ "format": "%(asctime)s %(name)s %(levelname)s %(message)s",
276
+ }
277
+ },
278
+ "loggers": {
279
+ "metrics_server": {"handlers": ["default"], "level": "INFO"},
280
+ "uvicorn.access": {"handlers": ["default"], "level": "INFO"},
281
+ },
282
+ },
283
+ )
284
+ app = bq.BeanQueue(config=config)
285
+ ```
286
+
287
+ #### Custom health checks
288
+
289
+ Register additional checks by connecting receivers to `bq.events.healthz_check`.
290
+ If no receivers are connected, `/healthz` returns OK without touching the database.
291
+
292
+ With receivers connected, BeanQueue loads the current worker and passes a database `session` to each check.
293
+ Receivers may be synchronous or asynchronous; both can be mixed on the same signal.
294
+
295
+ ```python
296
+ from sqlalchemy import text
297
+ from sqlalchemy.orm import Session
298
+
299
+ import bq
300
+ from bq import events
301
+
302
+ app = bq.BeanQueue()
303
+
304
+
305
+ @events.healthz_check.connect
306
+ def check_database(sender: bq.BeanQueue, worker, session: Session):
307
+ session.execute(text("SELECT 1"))
308
+
309
+
310
+ @events.healthz_check.connect
311
+ async def check_external_service(sender: bq.BeanQueue, worker, session: Session):
312
+ # async HTTP call, etc.
313
+ ...
314
+ ```
315
+
316
+ Receiver signature must accept the keyword arguments you need, for example `(sender, worker, session)`, or use `(sender, **kwargs)`.
317
+ If a check raises an exception, `/healthz` responds with HTTP 500 and a JSON body containing the error message.
318
+
235
319
  ### Define your own tables
236
320
 
237
321
  BeanQueue is designed to be as customizable as much as possible.
@@ -1,21 +1,22 @@
1
1
  import functools
2
2
  import importlib
3
- import json
4
3
  import logging
5
4
  import platform
6
5
  import sys
7
6
  import threading
8
7
  import typing
8
+ from concurrent.futures import FIRST_COMPLETED
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from concurrent.futures import wait as futures_wait
9
11
  from importlib.metadata import PackageNotFoundError
10
12
  from importlib.metadata import version
11
- from wsgiref.simple_server import make_server
12
- from wsgiref.simple_server import WSGIRequestHandler
13
13
 
14
14
  import venusian
15
15
  from sqlalchemy import func
16
16
  from sqlalchemy.engine import create_engine
17
17
  from sqlalchemy.engine import Engine
18
18
  from sqlalchemy.orm import Session as DBSession
19
+ from sqlalchemy.pool import QueuePool
19
20
  from sqlalchemy.pool import SingletonThreadPool
20
21
 
21
22
  from . import constants
@@ -23,6 +24,7 @@ from . import events
23
24
  from . import models
24
25
  from .config import Config
25
26
  from .db.session import SessionMaker
27
+ from .metrics import MetricsServer
26
28
  from .processors.processor import Processor
27
29
  from .processors.processor import ProcessorHelper
28
30
  from .processors.registry import collect
@@ -33,21 +35,6 @@ from .utils import load_module_var
33
35
  logger = logging.getLogger(__name__)
34
36
 
35
37
 
36
- class WSGIRequestHandlerWithLogger(WSGIRequestHandler):
37
- logger = logging.getLogger("metrics_server")
38
-
39
- def log_message(self, format, *args):
40
- message = format % args
41
- self.logger.info(
42
- "%s - - [%s] %s\n"
43
- % (
44
- self.address_string(),
45
- self.log_date_time_string(),
46
- message.translate(self._control_char_table),
47
- )
48
- )
49
-
50
-
51
38
  class BeanQueue:
52
39
  def __init__(
53
40
  self,
@@ -63,13 +50,32 @@ class BeanQueue:
63
50
  self.dispatch_service_cls = dispatch_service_cls
64
51
  self._engine = engine
65
52
  self._worker_update_shutdown_event: threading.Event = threading.Event()
66
- # noop if metrics thread is not started yet, shutdown if it is started
67
- self._metrics_server_shutdown: typing.Callable[[], None] = lambda: None
53
+ self._metrics_server: MetricsServer | None = None
68
54
 
69
55
  def create_default_engine(self):
70
- return create_engine(
71
- str(self.config.DATABASE_URL), poolclass=SingletonThreadPool
72
- )
56
+ # Use thread-safe connection pool when thread pool executor is enabled
57
+ if self.config.MAX_WORKER_THREADS != 1:
58
+ # QueuePool is thread-safe and suitable for multi-threaded usage
59
+ # Configure pool size based on number of worker threads
60
+ max_workers = (
61
+ self.config.MAX_WORKER_THREADS
62
+ if self.config.MAX_WORKER_THREADS > 0
63
+ else 10
64
+ )
65
+ pool_size = (
66
+ max_workers + 5
67
+ ) # Extra connections for main thread and worker update thread
68
+ return create_engine(
69
+ str(self.config.DATABASE_URL),
70
+ poolclass=QueuePool,
71
+ pool_size=pool_size,
72
+ max_overflow=10,
73
+ )
74
+ else:
75
+ # SingletonThreadPool for single-threaded sequential processing
76
+ return create_engine(
77
+ str(self.config.DATABASE_URL), poolclass=SingletonThreadPool
78
+ )
73
79
 
74
80
  def make_session(self) -> DBSession:
75
81
  return self.session_cls(bind=self.engine)
@@ -196,65 +202,164 @@ class BeanQueue:
196
202
  db.add(current_worker)
197
203
  db.commit()
198
204
 
199
- def _serve_http_request(
200
- self, worker_id: typing.Any, environ: dict, start_response: typing.Callable
201
- ) -> list[bytes]:
202
- path = environ["PATH_INFO"]
203
- if path == "/healthz":
204
- db = self.make_session()
205
- worker_service = self._make_worker_service(db)
206
- worker = worker_service.get_worker(worker_id)
207
- if worker is not None and worker.state == models.WorkerState.RUNNING:
208
- start_response(
209
- "200 OK",
210
- [
211
- ("Content-Type", "application/json"),
212
- ],
213
- )
214
- return [
215
- json.dumps(dict(status="ok", worker_id=str(worker_id))).encode(
216
- "utf8"
205
+ def _process_task_in_thread(
206
+ self,
207
+ task_id: typing.Any,
208
+ registry: typing.Any,
209
+ ):
210
+ """Process a single task in a thread-safe manner with its own database session.
211
+
212
+ This method is called from worker threads in the thread pool. It creates its own
213
+ database session to avoid SQLAlchemy session conflicts between threads.
214
+ """
215
+ db = self.make_session()
216
+ try:
217
+ # Reload the task in this thread's session to avoid SQLAlchemy context issues
218
+ task = db.query(self.task_model).filter(self.task_model.id == task_id).one()
219
+
220
+ logger.info(
221
+ "Processing task %s, channel=%s, module=%s, func=%s",
222
+ task.id,
223
+ task.channel,
224
+ task.module,
225
+ task.func_name,
226
+ )
227
+ registry.process(task, event_cls=self.event_model)
228
+ db.commit()
229
+ except Exception as e:
230
+ logger.exception("Error processing task %s: %s", task_id, e)
231
+ db.rollback()
232
+ raise
233
+ finally:
234
+ db.close()
235
+
236
+ def _process_tasks_sequential(
237
+ self,
238
+ db: DBSession,
239
+ dispatch_service: DispatchService,
240
+ registry: typing.Any,
241
+ channels: tuple[str, ...],
242
+ worker_id: typing.Any,
243
+ ):
244
+ """Process tasks sequentially (original behavior for MAX_WORKER_THREADS=1)."""
245
+ while True:
246
+ while True:
247
+ tasks = dispatch_service.dispatch(
248
+ channels,
249
+ worker_id=worker_id,
250
+ limit=self.config.BATCH_SIZE,
251
+ ).all()
252
+
253
+ for task in tasks:
254
+ logger.info(
255
+ "Processing task %s, channel=%s, module=%s, func=%s",
256
+ task.id,
257
+ task.channel,
258
+ task.module,
259
+ task.func_name,
217
260
  )
218
- ]
219
- else:
220
- logger.warning("Bad worker %s state %s", worker_id, worker.state)
221
- start_response(
222
- "500 Internal Server Error",
223
- [
224
- ("Content-Type", "application/json"),
225
- ],
261
+ registry.process(task, event_cls=self.event_model)
262
+ if tasks:
263
+ db.commit()
264
+
265
+ if not tasks:
266
+ break
267
+
268
+ db.close()
269
+ try:
270
+ for notification in dispatch_service.poll(
271
+ timeout=self.config.POLL_TIMEOUT
272
+ ):
273
+ logger.debug("Receive notification %s", notification)
274
+ except TimeoutError:
275
+ logger.debug("Poll timeout, try again")
276
+ continue
277
+
278
+ def _process_tasks_threaded(
279
+ self,
280
+ db: DBSession,
281
+ executor: ThreadPoolExecutor,
282
+ dispatch_service: DispatchService,
283
+ registry: typing.Any,
284
+ channels: tuple[str, ...],
285
+ worker_id: typing.Any,
286
+ ):
287
+ """Process tasks using thread pool with continuous task feeding.
288
+
289
+ This implementation continuously checks for completed futures and fetches new tasks
290
+ when there's capacity in the thread pool. It uses concurrent.futures.wait() to
291
+ properly detect ANY completed future, not just the first one submitted.
292
+ """
293
+ max_workers = self.config.MAX_WORKER_THREADS
294
+ if max_workers == 0:
295
+ max_workers = 10 # Default when set to auto
296
+
297
+ running_futures: set = set()
298
+
299
+ while True:
300
+ # Clean up ANY completed futures using wait() with zero timeout
301
+ if running_futures:
302
+ done, running_futures = futures_wait(
303
+ running_futures, timeout=0, return_when=FIRST_COMPLETED
226
304
  )
227
- return [
228
- json.dumps(
229
- dict(
230
- status="internal error",
231
- worker_id=str(worker_id),
232
- state=str(worker.state),
305
+ for f in done:
306
+ try:
307
+ f.result()
308
+ except Exception as e:
309
+ logger.error("Task processing failed: %s", e)
310
+
311
+ # If we have capacity, fetch and submit more tasks
312
+ capacity = max_workers - len(running_futures)
313
+ if capacity > 0:
314
+ tasks = dispatch_service.dispatch(
315
+ channels,
316
+ worker_id=worker_id,
317
+ limit=min(capacity, self.config.BATCH_SIZE),
318
+ ).all()
319
+
320
+ # Always commit to close the transaction and refresh the snapshot,
321
+ # so subsequent dispatch calls can see newly committed tasks
322
+ db.commit()
323
+
324
+ if tasks:
325
+ logger.debug(
326
+ "Dispatching %d tasks (running=%d, capacity=%d)",
327
+ len(tasks),
328
+ len(running_futures),
329
+ capacity,
330
+ )
331
+
332
+ for task in tasks:
333
+ future = executor.submit(
334
+ self._process_task_in_thread,
335
+ task.id,
336
+ registry,
233
337
  )
234
- ).encode("utf8")
235
- ]
236
- # TODO: add other metrics endpoints
237
- start_response(
238
- "404 NOT FOUND",
239
- [
240
- ("Content-Type", "application/json"),
241
- ],
242
- )
243
- return [json.dumps(dict(status="not found")).encode("utf8")]
244
-
245
- def run_metrics_http_server(self, worker_id: typing.Any):
246
- host = self.config.METRICS_HTTP_SERVER_INTERFACE
247
- port = self.config.METRICS_HTTP_SERVER_PORT
248
- with make_server(
249
- host,
250
- port,
251
- functools.partial(self._serve_http_request, worker_id),
252
- handler_class=WSGIRequestHandlerWithLogger,
253
- ) as httpd:
254
- # expose graceful shutdown to the main thread
255
- self._metrics_server_shutdown = httpd.shutdown
256
- logger.info("Run metrics HTTP server on %s:%s", host, port)
257
- httpd.serve_forever()
338
+ running_futures.add(future)
339
+
340
+ # If we have running tasks, wait briefly for any to complete then check for new tasks
341
+ if running_futures:
342
+ # Short wait - allows checking for new tasks frequently
343
+ done, running_futures = futures_wait(
344
+ running_futures, timeout=0.05, return_when=FIRST_COMPLETED
345
+ )
346
+ for f in done:
347
+ try:
348
+ f.result()
349
+ except Exception as e:
350
+ logger.error("Task processing failed: %s", e)
351
+ continue
352
+
353
+ # No running tasks and no new tasks found - poll for notifications
354
+ db.close()
355
+ try:
356
+ for notification in dispatch_service.poll(
357
+ timeout=self.config.POLL_TIMEOUT
358
+ ):
359
+ logger.debug("Receive notification %s", notification)
360
+ except TimeoutError:
361
+ logger.debug("Poll timeout, try again")
362
+ continue
258
363
 
259
364
  def process_tasks(
260
365
  self,
@@ -300,17 +405,9 @@ class BeanQueue:
300
405
  dispatch_service.listen(channels)
301
406
  db.commit()
302
407
 
303
- metrics_server_thread = None
304
408
  if self.config.METRICS_HTTP_SERVER_ENABLED:
305
- WSGIRequestHandlerWithLogger.logger.setLevel(
306
- self.config.METRICS_HTTP_SERVER_LOG_LEVEL
307
- )
308
- metrics_server_thread = threading.Thread(
309
- target=self.run_metrics_http_server,
310
- args=(worker.id,),
311
- )
312
- metrics_server_thread.daemon = True
313
- metrics_server_thread.start()
409
+ self._metrics_server = MetricsServer(self, worker.id)
410
+ self._metrics_server.start()
314
411
 
315
412
  logger.info("Created worker %s, name=%s", worker.id, worker.name)
316
413
  events.worker_init.send(self, worker=worker)
@@ -329,50 +426,53 @@ class BeanQueue:
329
426
 
330
427
  worker_id = worker.id
331
428
 
429
+ # Determine the number of worker threads
430
+ max_workers = self.config.MAX_WORKER_THREADS
431
+ if max_workers == 0:
432
+ max_workers = None # Default to (num_cpus * 5)
433
+
434
+ # Create thread pool executor for concurrent task processing
435
+ executor = None
436
+ if max_workers != 1:
437
+ executor = ThreadPoolExecutor(
438
+ max_workers=max_workers, thread_name_prefix="task_worker"
439
+ )
440
+ logger.info("Created thread pool executor with max_workers=%s", max_workers)
441
+
332
442
  try:
333
- while True:
334
- while True:
335
- tasks = dispatch_service.dispatch(
336
- channels,
337
- worker_id=worker_id,
338
- limit=self.config.BATCH_SIZE,
339
- ).all()
340
- for task in tasks:
341
- logger.info(
342
- "Processing task %s, channel=%s, module=%s, func=%s",
343
- task.id,
344
- task.channel,
345
- task.module,
346
- task.func_name,
347
- )
348
- # TODO: support processor pool and other approaches to dispatch the workload
349
- registry.process(task, event_cls=self.event_model)
350
- if not tasks:
351
- # we should try to keep dispatching until we cannot find tasks
352
- break
353
- else:
354
- db.commit()
355
- # we will not see notifications in a transaction, need to close the transaction first before entering
356
- # polling
357
- db.close()
358
- try:
359
- for notification in dispatch_service.poll(
360
- timeout=self.config.POLL_TIMEOUT
361
- ):
362
- logger.debug("Receive notification %s", notification)
363
- except TimeoutError:
364
- logger.debug("Poll timeout, try again")
365
- continue
443
+ if executor is not None:
444
+ # Threaded processing with continuous task feeding
445
+ self._process_tasks_threaded(
446
+ db=db,
447
+ executor=executor,
448
+ dispatch_service=dispatch_service,
449
+ registry=registry,
450
+ channels=channels,
451
+ worker_id=worker_id,
452
+ )
453
+ else:
454
+ # Sequential processing (original behavior)
455
+ self._process_tasks_sequential(
456
+ db=db,
457
+ dispatch_service=dispatch_service,
458
+ registry=registry,
459
+ channels=channels,
460
+ worker_id=worker_id,
461
+ )
366
462
  except (SystemExit, KeyboardInterrupt):
367
463
  db.rollback()
368
464
  logger.info("Shutting down ...")
465
+
466
+ # Shutdown the executor if it was created
467
+ if executor is not None:
468
+ logger.info("Shutting down thread pool executor...")
469
+ executor.shutdown(wait=True, cancel_futures=False)
470
+ logger.info("Thread pool executor shutdown complete")
471
+
369
472
  self._worker_update_shutdown_event.set()
370
473
  worker_update_thread.join(5)
371
- if metrics_server_thread is not None:
372
- # set a threading event, waits until server is shutdown
373
- # serve the ongoing requests
374
- self._metrics_server_shutdown()
375
- metrics_server_thread.join(1)
474
+ if self._metrics_server is not None:
475
+ self._metrics_server.shutdown()
376
476
 
377
477
  worker.state = models.WorkerState.SHUTDOWN
378
478
  db.add(worker)
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import typing
2
3
 
3
4
  from pydantic import Field
@@ -16,6 +17,11 @@ class Config(BaseSettings):
16
17
  # Size of tasks batch to fetch each time from the database
17
18
  BATCH_SIZE: int = 1
18
19
 
20
+ # Maximum number of worker threads for concurrent task processing
21
+ # Set to 1 to disable thread pool and process tasks sequentially
22
+ # Set to 0 to use the default (number of CPUs * 5)
23
+ MAX_WORKER_THREADS: int = 1
24
+
19
25
  # How long we should poll before timeout in seconds
20
26
  POLL_TIMEOUT: int = 60
21
27
 
@@ -35,7 +41,7 @@ class Config(BaseSettings):
35
41
  EVENT_MODEL: str | None = "bq.Event"
36
42
 
37
43
  # Enable metrics HTTP server
38
- METRICS_HTTP_SERVER_ENABLED: bool = True
44
+ METRICS_HTTP_SERVER_ENABLED: bool = False
39
45
 
40
46
  # the metrics http server interface to listen
41
47
  METRICS_HTTP_SERVER_INTERFACE: str = ""
@@ -46,6 +52,11 @@ class Config(BaseSettings):
46
52
  # default log level for metrics http server
47
53
  METRICS_HTTP_SERVER_LOG_LEVEL: int = 30
48
54
 
55
+ # Optional logging.config dict for the metrics HTTP server (uvicorn).
56
+ # When unset, a default config is used. Pass a dict programmatically or
57
+ # JSON via BQ_METRICS_HTTP_SERVER_LOG_CONFIG.
58
+ METRICS_HTTP_SERVER_LOG_CONFIG: dict[str, typing.Any] | None = None
59
+
49
60
  POSTGRES_SERVER: str = "localhost"
50
61
  POSTGRES_USER: str = "bq"
51
62
  POSTGRES_PASSWORD: str = ""
@@ -53,6 +64,15 @@ class Config(BaseSettings):
53
64
  # The URL of postgresql database to connect
54
65
  DATABASE_URL: typing.Optional[PostgresDsn] = None
55
66
 
67
+ @field_validator("METRICS_HTTP_SERVER_LOG_CONFIG", mode="before")
68
+ @classmethod
69
+ def parse_metrics_log_config(cls, v: typing.Any) -> typing.Any:
70
+ if v is None or isinstance(v, dict):
71
+ return v
72
+ if isinstance(v, str):
73
+ return json.loads(v)
74
+ raise ValueError("Unexpected METRICS_HTTP_SERVER_LOG_CONFIG type")
75
+
56
76
  @field_validator("DATABASE_URL", mode="before")
57
77
  def assemble_db_connection(
58
78
  cls, v: typing.Optional[str], info: ValidationInfo
@@ -3,3 +3,5 @@ import blinker
3
3
  worker_init = blinker.signal("worker-init")
4
4
 
5
5
  task_failure = blinker.signal("task-failure")
6
+
7
+ healthz_check = blinker.signal("healthz-check")
@@ -0,0 +1,171 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import logging.config
5
+ import threading
6
+ import typing
7
+ from collections.abc import Callable
8
+ from collections.abc import Coroutine
9
+ from importlib.util import find_spec
10
+
11
+ from sqlalchemy.orm import Session as DBSession
12
+
13
+ from . import events
14
+ from . import models
15
+
16
+ if typing.TYPE_CHECKING:
17
+ from .app import BeanQueue
18
+ from .config import Config
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ METRICS_EXTRA = "metrics"
23
+ METRICS_SERVER_LOGGER = "metrics_server"
24
+
25
+
26
+ def _healthz_sync_wrapper(
27
+ func: Callable[..., typing.Any],
28
+ ) -> Callable[..., Coroutine[typing.Any, typing.Any, typing.Any]]:
29
+ async def wrapper(sender: typing.Any, **kwargs: typing.Any) -> typing.Any:
30
+ return func(sender, **kwargs)
31
+
32
+ return wrapper
33
+
34
+
35
+ class MetricsExtrasNotInstalledError(ImportError):
36
+ """Raised when metrics optional dependencies are not installed."""
37
+
38
+
39
+ def require_metrics_extras() -> None:
40
+ missing = []
41
+ if find_spec("starlette") is None:
42
+ missing.append("starlette")
43
+ if find_spec("uvicorn") is None:
44
+ missing.append("uvicorn")
45
+ if missing:
46
+ raise MetricsExtrasNotInstalledError(
47
+ "Health check and metrics HTTP server require optional dependencies "
48
+ f"({', '.join(missing)}). "
49
+ f"Install them with: pip install beanqueue[{METRICS_EXTRA}]"
50
+ )
51
+
52
+
53
+ def default_metrics_log_config(log_level: int) -> dict[str, typing.Any]:
54
+ from uvicorn.config import LOGGING_CONFIG
55
+
56
+ level_name = logging.getLevelName(log_level)
57
+ info_level = logging.getLevelName(logging.INFO)
58
+ log_config = copy.deepcopy(LOGGING_CONFIG)
59
+ log_config["handlers"]["access"]["stream"] = "ext://sys.stderr"
60
+ log_config["loggers"]["uvicorn"]["level"] = level_name
61
+ log_config["loggers"]["uvicorn.error"]["level"] = level_name
62
+ # Access lines are logged at INFO by uvicorn regardless of server log level.
63
+ log_config["loggers"]["uvicorn.access"]["level"] = info_level
64
+ log_config["loggers"][METRICS_SERVER_LOGGER] = {
65
+ "handlers": ["default"],
66
+ "level": info_level,
67
+ "propagate": False,
68
+ }
69
+ return log_config
70
+
71
+
72
+ def resolve_metrics_log_config(config: Config) -> dict[str, typing.Any]:
73
+ if config.METRICS_HTTP_SERVER_LOG_CONFIG is not None:
74
+ return config.METRICS_HTTP_SERVER_LOG_CONFIG
75
+ return default_metrics_log_config(config.METRICS_HTTP_SERVER_LOG_LEVEL)
76
+
77
+
78
+ class MetricsServer:
79
+ def __init__(self, bq: BeanQueue, worker_id: typing.Any):
80
+ require_metrics_extras()
81
+ self._bq = bq
82
+ self._worker_id = worker_id
83
+ self._server = None
84
+ self._thread: threading.Thread | None = None
85
+
86
+ def _has_custom_health_checks(self) -> bool:
87
+ return bool(events.healthz_check.receivers)
88
+
89
+ async def _run_healthz_checks(
90
+ self, worker: models.Worker, session: DBSession, body: dict[str, typing.Any]
91
+ ) -> bool:
92
+ try:
93
+ await events.healthz_check.send_async(
94
+ self._bq,
95
+ _sync_wrapper=_healthz_sync_wrapper,
96
+ worker=worker,
97
+ session=session,
98
+ )
99
+ except Exception as exc:
100
+ logger.exception("Custom healthz check failed")
101
+ body["error"] = str(exc)
102
+ return False
103
+ return True
104
+
105
+ async def check_healthz(self) -> tuple[bool, dict[str, typing.Any]]:
106
+ body: dict[str, typing.Any] = {"status": "ok"}
107
+
108
+ if not self._has_custom_health_checks():
109
+ return True, body
110
+
111
+ with self._bq.make_session() as db:
112
+ worker_service = self._bq._make_worker_service(db)
113
+ worker = worker_service.get_worker(self._worker_id)
114
+ body["worker_id"] = str(self._worker_id)
115
+
116
+ if not await self._run_healthz_checks(worker, db, body):
117
+ body["status"] = "internal error"
118
+ return False, body
119
+ return True, body
120
+
121
+ def create_app(self):
122
+ from starlette.applications import Starlette
123
+ from starlette.responses import JSONResponse
124
+ from starlette.routing import Route
125
+
126
+ async def healthz(_request):
127
+ ok, body = await self.check_healthz()
128
+ return JSONResponse(body, status_code=200 if ok else 500)
129
+
130
+ return Starlette(
131
+ routes=[
132
+ Route("/healthz", healthz),
133
+ ]
134
+ )
135
+
136
+ def start(self) -> None:
137
+ import uvicorn
138
+
139
+ require_metrics_extras()
140
+ host = self._bq.config.METRICS_HTTP_SERVER_INTERFACE
141
+ port = self._bq.config.METRICS_HTTP_SERVER_PORT
142
+ log_config = resolve_metrics_log_config(self._bq.config)
143
+ logging.config.dictConfig(log_config)
144
+
145
+ app = self.create_app()
146
+ # log_level is intentionally omitted: uvicorn would override logger levels
147
+ # from log_config (including uvicorn.access) after configure_logging().
148
+ config = uvicorn.Config(
149
+ app,
150
+ host=host,
151
+ port=port,
152
+ log_config=log_config,
153
+ access_log=True,
154
+ )
155
+ self._server = uvicorn.Server(config)
156
+
157
+ def run() -> None:
158
+ logging.getLogger(METRICS_SERVER_LOGGER).info(
159
+ "Run metrics HTTP server on %s:%s", host, port
160
+ )
161
+ self._server.run()
162
+
163
+ self._thread = threading.Thread(target=run, name="metrics_server")
164
+ self._thread.daemon = True
165
+ self._thread.start()
166
+
167
+ def shutdown(self) -> None:
168
+ if self._server is not None:
169
+ self._server.should_exit = True
170
+ if self._thread is not None:
171
+ self._thread.join(1)
@@ -1,9 +1,9 @@
1
1
  [project]
2
2
  name = "beanqueue"
3
- version = "1.1.9"
3
+ version = "2.0.0rc0"
4
4
  description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library"
5
5
  authors = [{ name = "Fang-Pen Lin", email = "fangpen@launchplatform.com" }]
6
- requires-python = "~=3.11"
6
+ requires-python = ">=3.11,<4"
7
7
  readme = "README.md"
8
8
  license = "MIT"
9
9
  dependencies = [
@@ -18,10 +18,19 @@ dependencies = [
18
18
  [project.scripts]
19
19
  bq = "bq.cmds.main:cli"
20
20
 
21
+ [project.optional-dependencies]
22
+ metrics = [
23
+ "starlette>=0.27,<2",
24
+ "uvicorn>=0.30.0,<1",
25
+ ]
26
+
21
27
  [dependency-groups]
22
28
  dev = [
23
- "psycopg2-binary>=2.9.9,<3",
29
+ "psycopg2-binary>=2.9.10,<3",
24
30
  "pytest-factoryboy>=2.7.0,<3",
31
+ "starlette>=0.27,<2",
32
+ "uvicorn>=0.30.0,<1",
33
+ "httpx>=0.27.0,<1",
25
34
  ]
26
35
 
27
36
  [tool.hatch.build.targets.sdist]
@@ -33,3 +42,6 @@ include = ["bq"]
33
42
  [build-system]
34
43
  requires = ["hatchling"]
35
44
  build-backend = "hatchling.build"
45
+
46
+ [tool.pytest.ini_options]
47
+ testpaths = ["tests"]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes