PyPI - beanqueue - Versions diffs - 0.1.3__tar.gz → 0.2.1__tar.gz - Mend

beanqueue 0.1.3tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{beanqueue-0.1.3 → beanqueue-0.2.1}/PKG-INFO +20 -10
{beanqueue-0.1.3 → beanqueue-0.2.1}/README.md +18 -8
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/__init__.py +1 -2
beanqueue-0.2.1/bq/app.py +330 -0
beanqueue-0.2.1/bq/cmds/create_tables.py +26 -0
beanqueue-0.2.1/bq/cmds/process.py +23 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/cmds/submit.py +9 -9
beanqueue-0.2.1/bq/cmds/utils.py +14 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/config.py +9 -0
beanqueue-0.2.1/bq/events.py +3 -0
beanqueue-0.2.1/bq/processors/processor.py +70 -0
beanqueue-0.2.1/bq/processors/registry.py +47 -0
beanqueue-0.2.1/bq/utils.py +8 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/pyproject.toml +2 -2
beanqueue-0.1.3/bq/cmds/create_tables.py +0 -25
beanqueue-0.1.3/bq/cmds/process.py +0 -188
beanqueue-0.1.3/bq/container.py +0 -77
beanqueue-0.1.3/bq/processors/registry.py +0 -136
{beanqueue-0.1.3 → beanqueue-0.2.1}/LICENSE +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/cmds/__init__.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/constants.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/db/__init__.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/db/base.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/db/session.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/__init__.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/helpers.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/task.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/models/worker.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/processors/__init__.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/services/__init__.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/services/dispatch.py +0 -0
{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/services/worker.py +0 -0

{beanqueue-0.1.3 → beanqueue-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: beanqueue
-Version: 0.1.3
+Version: 0.2.1
 Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
 License: MIT
 Author: Fang-Pen Lin
@@ -10,8 +10,8 @@ Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: blinker (>=1.8.2,<2.0.0)
 Requires-Dist: click (>=8.1.7,<9.0.0)
-Requires-Dist: dependency-injector (>=4.41.0,<5.0.0)
 Requires-Dist: pg-activity (>=3.5.1,<4.0.0)
 Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
 Requires-Dist: sqlalchemy (>=2.0.30,<3.0.0)
@@ -50,7 +50,9 @@ import bq
 from .. import models
 from .. import image_utils
-@bq.processor(channel="images")
+app = bq.BeanQueue()
+@app.processor(channel="images")
 def resize_image(db: Session, task: bq.Task, width: int, height: int):
     image = db.query(models.Image).filter(models.Image.task == task).one()
     image_utils.resize(image, size=(width, height))
@@ -61,6 +63,7 @@ def resize_image(db: Session, task: bq.Task, width: int, height: int):
 The `db` and `task` keyword arguments are optional.
 If you don't need to access the task object, you can simply define the function without these two parameters.
+We also provide an optional `savepoint` argument in case if you want to rollback database changes you made.
 To submit a task, you can either use `bq.Task` model object to construct the task object, insert into the
 database session and commit.
@@ -130,14 +133,12 @@ Configurations can be modified by setting environment variables with `BQ_` prefi
 For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
 To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
 The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
-For now, the configurations only affect command line tools.
-If you want to configure BeanQueue programmatically for the command lines, you can override our [dependency-injector](https://python-dependency-injector.ets-labs.org/)'s container defined at [bq/container.py](bq/container.py) and call the command function manually.
+If you want to configure BeanQueue programmatically, you can pass in `Config` object to the `bq.BeanQueue` object when creating.
 For example:
 ```python
 import bq
-from bq.cmds.process import process_tasks
 from .my_config import config
 container = bq.Container()
@@ -147,11 +148,20 @@ config = bq.Config(
     DATABASE_URL=str(config.DATABASE_URL),
     BATCH_SIZE=10,
 )
-with container.config.override(config):
-    process_tasks(channels=("images",))
+app = bq.BeanQueue(config=config)
+```
+Then you can pass `--app` argument (or `-a` for short) pointing to the app object to the process command like this:
+```bash
+python -m bq.cmds.process -a my_pkgs.bq.app images
 ```
-Many other behaviors of this framework can also be modified by overriding the container defined at [bq/container.py](bq/container.py).
+Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
+```python
+app.process_tasks(channels=("images",))
+```
 ### Define your own tables
@@ -231,7 +241,7 @@ config = bq.Config(
     WORKER_MODEL="my_pkgs.models.Worker",
     # ... other configs
 )
-# Override container...
+app = bq.BeanQueue(config)
 ```
 ## Why?

{beanqueue-0.1.3 → beanqueue-0.2.1}/README.md RENAMED Viewed

@@ -30,7 +30,9 @@ import bq
 from .. import models
 from .. import image_utils
-@bq.processor(channel="images")
+app = bq.BeanQueue()
+@app.processor(channel="images")
 def resize_image(db: Session, task: bq.Task, width: int, height: int):
     image = db.query(models.Image).filter(models.Image.task == task).one()
     image_utils.resize(image, size=(width, height))
@@ -41,6 +43,7 @@ def resize_image(db: Session, task: bq.Task, width: int, height: int):
 The `db` and `task` keyword arguments are optional.
 If you don't need to access the task object, you can simply define the function without these two parameters.
+We also provide an optional `savepoint` argument in case if you want to rollback database changes you made.
 To submit a task, you can either use `bq.Task` model object to construct the task object, insert into the
 database session and commit.
@@ -110,14 +113,12 @@ Configurations can be modified by setting environment variables with `BQ_` prefi
 For example, to set the python packages to scan for processors, you can set `BQ_PROCESSOR_PACKAGES`.
 To change the PostgreSQL database to connect to, you can set `BQ_DATABASE_URL`.
 The complete definition of configurations can be found at the [bq/config.py](bq/config.py) module.
-For now, the configurations only affect command line tools.
-If you want to configure BeanQueue programmatically for the command lines, you can override our [dependency-injector](https://python-dependency-injector.ets-labs.org/)'s container defined at [bq/container.py](bq/container.py) and call the command function manually.
+If you want to configure BeanQueue programmatically, you can pass in `Config` object to the `bq.BeanQueue` object when creating.
 For example:
 ```python
 import bq
-from bq.cmds.process import process_tasks
 from .my_config import config
 container = bq.Container()
@@ -127,11 +128,20 @@ config = bq.Config(
     DATABASE_URL=str(config.DATABASE_URL),
     BATCH_SIZE=10,
 )
-with container.config.override(config):
-    process_tasks(channels=("images",))
+app = bq.BeanQueue(config=config)
+```
+Then you can pass `--app` argument (or `-a` for short) pointing to the app object to the process command like this:
+```bash
+python -m bq.cmds.process -a my_pkgs.bq.app images
 ```
-Many other behaviors of this framework can also be modified by overriding the container defined at [bq/container.py](bq/container.py).
+Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
+```python
+app.process_tasks(channels=("images",))
+```
 ### Define your own tables
@@ -211,7 +221,7 @@ config = bq.Config(
     WORKER_MODEL="my_pkgs.models.Worker",
     # ... other configs
 )
-# Override container...
+app = bq.BeanQueue(config)
 ```
 ## Why?

{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
+from .app import BeanQueue
 from .config import Config  # noqa
-from .container import Container  # noqa
 from .models import Task  # noqa
 from .models import TaskModelMixin
 from .models import TaskModelRefWorkerMixin
@@ -8,4 +8,3 @@ from .models import Worker  # noqa
 from .models import WorkerModelMixin  # noqa
 from .models import WorkerRefMixin  # noqa
 from .models import WorkerState  # noqa
-from .processors.registry import processor  # noqa

beanqueue-0.2.1/bq/app.py ADDED Viewed

@@ -0,0 +1,330 @@
+import functools
+import importlib
+import json
+import logging
+import platform
+import sys
+import threading
+import time
+import typing
+from wsgiref.simple_server import make_server
+import venusian
+from sqlalchemy import func
+from sqlalchemy.engine import create_engine
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session as DBSession
+from sqlalchemy.pool import SingletonThreadPool
+from . import constants
+from . import events
+from . import models
+from .config import Config
+from .db.session import SessionMaker
+from .processors.processor import Processor
+from .processors.processor import ProcessorHelper
+from .processors.registry import collect
+from .services.dispatch import DispatchService
+from .services.worker import WorkerService
+from .utils import load_module_var
+logger = logging.getLogger(__name__)
+class BeanQueue:
+    def __init__(
+        self,
+        config: Config | None = None,
+        session_cls: DBSession = SessionMaker,
+        worker_service_cls: typing.Type[WorkerService] = WorkerService,
+        dispatch_service_cls: typing.Type[DispatchService] = DispatchService,
+        engine: Engine | None = None,
+    ):
+        self.config = config if config is not None else Config()
+        self.session_cls = session_cls
+        self.worker_service_cls = worker_service_cls
+        self.dispatch_service_cls = dispatch_service_cls
+        self._engine = engine
+    def create_default_engine(self):
+        return create_engine(
+            str(self.config.DATABASE_URL), poolclass=SingletonThreadPool
+        )
+    def make_session(self) -> DBSession:
+        return self.session_cls(bind=self.engine)
+    @property
+    def engine(self) -> Engine:
+        if self._engine is None:
+            self._engine = self.create_default_engine()
+        return self._engine
+    @property
+    def task_model(self) -> typing.Type[models.Task]:
+        return load_module_var(self.config.TASK_MODEL)
+    @property
+    def worker_model(self) -> typing.Type[models.Worker]:
+        return load_module_var(self.config.WORKER_MODEL)
+    def _make_worker_service(self, session: DBSession):
+        return self.worker_service_cls(
+            session=session, task_model=self.task_model, worker_model=self.worker_model
+        )
+    def _make_dispatch_service(self, session: DBSession):
+        return self.dispatch_service_cls(session=session, task_model=self.task_model)
+    def processor(
+        self,
+        channel: str = constants.DEFAULT_CHANNEL,
+        auto_complete: bool = True,
+        auto_rollback_on_exc: bool = True,
+        task_model: typing.Type | None = None,
+    ) -> typing.Callable:
+        def decorator(wrapped: typing.Callable):
+            processor = Processor(
+                module=wrapped.__module__,
+                name=wrapped.__name__,
+                channel=channel,
+                func=wrapped,
+                auto_complete=auto_complete,
+                auto_rollback_on_exc=auto_rollback_on_exc,
+            )
+            helper_obj = ProcessorHelper(
+                processor,
+                task_cls=task_model if task_model is not None else self.task_model,
+            )
+            def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
+                if processor.name != name:
+                    raise ValueError("Name is not the same")
+                scanner.registry.add(processor)
+            venusian.attach(
+                helper_obj, callback, category=constants.BQ_PROCESSOR_CATEGORY
+            )
+            return helper_obj
+        return decorator
+    def update_workers(
+        self,
+        worker_id: typing.Any,
+    ):
+        db = self.make_session()
+        worker_service = self._make_worker_service(db)
+        dispatch_service = self._make_dispatch_service(db)
+        current_worker = worker_service.get_worker(worker_id)
+        logger.info(
+            "Updating worker %s with heartbeat_period=%s, heartbeat_timeout=%s",
+            current_worker.id,
+            self.config.WORKER_HEARTBEAT_PERIOD,
+            self.config.WORKER_HEARTBEAT_TIMEOUT,
+        )
+        while True:
+            dead_workers = worker_service.fetch_dead_workers(
+                timeout=self.config.WORKER_HEARTBEAT_TIMEOUT
+            )
+            task_count = worker_service.reschedule_dead_tasks(
+                # TODO: a better way to abstract this?
+                dead_workers.with_entities(current_worker.__class__.id)
+            )
+            found_dead_worker = False
+            for dead_worker in dead_workers:
+                found_dead_worker = True
+                logger.info(
+                    "Found dead worker %s (name=%s), reschedule %s dead tasks in channels %s",
+                    dead_worker.id,
+                    dead_worker.name,
+                    task_count,
+                    dead_worker.channels,
+                )
+                dispatch_service.notify(dead_worker.channels)
+            if found_dead_worker:
+                db.commit()
+            if current_worker.state != models.WorkerState.RUNNING:
+                # This probably means we are somehow very slow to update the heartbeat in time, or the timeout window
+                # is set too short. It could also be the administrator update the worker state to something else than
+                # RUNNING. Regardless the reason, let's stop processing.
+                logger.warning(
+                    "Current worker %s state is %s instead of running, quit processing",
+                    current_worker.id,
+                    current_worker.state,
+                )
+                sys.exit(0)
+            time.sleep(self.config.WORKER_HEARTBEAT_PERIOD)
+            current_worker.last_heartbeat = func.now()
+            db.add(current_worker)
+            db.commit()
+    def _serve_http_request(
+        self, worker_id: typing.Any, environ: dict, start_response: typing.Callable
+    ) -> list[bytes]:
+        path = environ["PATH_INFO"]
+        if path == "/healthz":
+            db = self.make_session()
+            worker_service = self._make_worker_service(db)
+            worker = worker_service.get_worker(worker_id)
+            if worker is not None and worker.state == models.WorkerState.RUNNING:
+                start_response(
+                    "200 OK",
+                    [
+                        ("Content-Type", "application/json"),
+                    ],
+                )
+                return [
+                    json.dumps(dict(status="ok", worker_id=str(worker_id))).encode(
+                        "utf8"
+                    )
+                ]
+            else:
+                logger.warning("Bad worker %s state %s", worker_id, worker.state)
+                start_response(
+                    "500 Internal Server Error",
+                    [
+                        ("Content-Type", "application/json"),
+                    ],
+                )
+                return [
+                    json.dumps(
+                        dict(
+                            status="internal error",
+                            worker_id=str(worker_id),
+                            state=str(worker.state),
+                        )
+                    ).encode("utf8")
+                ]
+        # TODO: add other metrics endpoints
+        start_response(
+            "404 NOT FOUND",
+            [
+                ("Content-Type", "application/json"),
+            ],
+        )
+        return [json.dumps(dict(status="not found")).encode("utf8")]
+    def run_metrics_http_server(self, worker_id: typing.Any):
+        host = self.config.METRICS_HTTP_SERVER_INTERFACE
+        port = self.config.METRICS_HTTP_SERVER_PORT
+        with make_server(
+            host, port, functools.partial(self._serve_http_request, worker_id)
+        ) as httpd:
+            logger.info("Run metrics HTTP server on %s:%s", host, port)
+            httpd.serve_forever()
+    def process_tasks(
+        self,
+        channels: tuple[str, ...],
+    ):
+        db = self.make_session()
+        if not channels:
+            channels = [constants.DEFAULT_CHANNEL]
+        if not self.config.PROCESSOR_PACKAGES:
+            logger.error("No PROCESSOR_PACKAGES provided")
+            raise ValueError("No PROCESSOR_PACKAGES provided")
+        logger.info("Scanning packages %s", self.config.PROCESSOR_PACKAGES)
+        pkgs = list(map(importlib.import_module, self.config.PROCESSOR_PACKAGES))
+        registry = collect(pkgs)
+        for channel, module_processors in registry.processors.items():
+            logger.info("Collected processors with channel %r", channel)
+            for module, func_processors in module_processors.items():
+                for processor in func_processors.values():
+                    logger.info(
+                        "  Processor module=%r, name=%r", module, processor.name
+                    )
+        dispatch_service = self.dispatch_service_cls(
+            session=db, task_model=self.task_model
+        )
+        work_service = self.worker_service_cls(
+            session=db, task_model=self.task_model, worker_model=self.worker_model
+        )
+        worker = work_service.make_worker(name=platform.node(), channels=channels)
+        db.add(worker)
+        dispatch_service.listen(channels)
+        db.commit()
+        metrics_server_thread = None
+        if self.config.METRICS_HTTP_SERVER_ENABLED:
+            metrics_server_thread = threading.Thread(
+                target=self.run_metrics_http_server,
+                args=(worker.id,),
+            )
+            metrics_server_thread.daemon = True
+            metrics_server_thread.start()
+        logger.info("Created worker %s, name=%s", worker.id, worker.name)
+        events.worker_init.send(self, worker=worker)
+        logger.info("Processing tasks in channels = %s ...", channels)
+        worker_update_thread = threading.Thread(
+            target=functools.partial(
+                self.update_workers,
+                worker_id=worker.id,
+            ),
+            name="update_workers",
+        )
+        worker_update_thread.daemon = True
+        worker_update_thread.start()
+        worker_id = worker.id
+        try:
+            while True:
+                while True:
+                    tasks = dispatch_service.dispatch(
+                        channels,
+                        worker_id=worker_id,
+                        limit=self.config.BATCH_SIZE,
+                    ).all()
+                    for task in tasks:
+                        logger.info(
+                            "Processing task %s, channel=%s, module=%s, func=%s",
+                            task.id,
+                            task.channel,
+                            task.module,
+                            task.func_name,
+                        )
+                        # TODO: support processor pool and other approaches to dispatch the workload
+                        registry.process(task)
+                    if not tasks:
+                        # we should try to keep dispatching until we cannot find tasks
+                        break
+                    else:
+                        db.commit()
+                # we will not see notifications in a transaction, need to close the transaction first before entering
+                # polling
+                db.close()
+                try:
+                    for notification in dispatch_service.poll(
+                        timeout=self.config.POLL_TIMEOUT
+                    ):
+                        logger.debug("Receive notification %s", notification)
+                except TimeoutError:
+                    logger.debug("Poll timeout, try again")
+                    continue
+        except (SystemExit, KeyboardInterrupt):
+            db.rollback()
+            logger.info("Shutting down ...")
+            worker_update_thread.join(5)
+            if metrics_server_thread is not None:
+                metrics_server_thread.join(5)
+        worker.state = models.WorkerState.SHUTDOWN
+        db.add(worker)
+        task_count = self.worker_service_cls.reschedule_dead_tasks([worker.id])
+        logger.info("Reschedule %s tasks", task_count)
+        dispatch_service.notify(channels)
+        db.commit()
+        logger.info("Shutdown gracefully")

beanqueue-0.2.1/bq/cmds/create_tables.py ADDED Viewed

@@ -0,0 +1,26 @@
+import logging
+import click
+from .. import models  # noqa
+from ..db.base import Base
+from .utils import load_app
+logger = logging.getLogger(__name__)
+@click.command()
+@click.option(
+    "-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
+)
+def main(
+    app: str | None = None,
+):
+    app = load_app(app)
+    Base.metadata.create_all(bind=app.engine)
+    logger.info("Done, tables created")
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    main()

beanqueue-0.2.1/bq/cmds/process.py ADDED Viewed

@@ -0,0 +1,23 @@
+import logging
+import click
+from .utils import load_app
+@click.command()
+@click.argument("channels", nargs=-1)
+@click.option(
+    "-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
+)
+def main(
+    channels: tuple[str, ...],
+    app: str | None = None,
+):
+    app = load_app(app)
+    app.process_tasks(channels)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    main()

{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/cmds/submit.py RENAMED Viewed

@@ -2,12 +2,11 @@ import json
 import logging
 import click
-from dependency_injector.wiring import inject
-from dependency_injector.wiring import Provide
 from .. import models
-from ..container import Container
-from ..db.session import Session
+from .utils import load_app
+logger = logging.getLogger(__name__)
 @click.command()
@@ -17,15 +16,18 @@ from ..db.session import Session
 @click.option(
     "-k", "--kwargs", type=str, help="Keyword arguments as JSON", default=None
 )
-@inject
+@click.option(
+    "-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
+)
 def main(
     channel: str,
     module: str,
     func: str,
     kwargs: str | None,
-    db: Session = Provide[Container.session],
+    app: str | None = None,
 ):
-    logger = logging.getLogger(__name__)
+    app = load_app(app)
+    db = app.session_cls(bind=app.create_default_engine())
     logger.info(
         "Submit task with channel=%s, module=%s, func=%s", channel, module, func
@@ -43,6 +45,4 @@ def main(
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
-    container = Container()
-    container.wire(modules=[__name__])
     main()

beanqueue-0.2.1/bq/cmds/utils.py ADDED Viewed

@@ -0,0 +1,14 @@
+import logging
+from ..app import BeanQueue
+from ..utils import load_module_var
+logger = logging.getLogger(__name__)
+def load_app(app: str | None) -> BeanQueue:
+    if app is None:
+        logger.info("No BeanQueue app provided, create default app")
+        return BeanQueue()
+    logger.info("Load BeanQueue app from %s", app)
+    return load_module_var(app)

{beanqueue-0.1.3 → beanqueue-0.2.1}/bq/config.py RENAMED Viewed

@@ -30,6 +30,15 @@ class Config(BaseSettings):
     # which worker model to use
     WORKER_MODEL: str = "bq.Worker"
+    # Enable metrics HTTP server
+    METRICS_HTTP_SERVER_ENABLED: bool = True
+    # the metrics http server interface to listen
+    METRICS_HTTP_SERVER_INTERFACE: str = ""
+    # the metrics http server port to listen
+    METRICS_HTTP_SERVER_PORT: int = 8000
     POSTGRES_SERVER: str = "localhost"
     POSTGRES_USER: str = "bq"
     POSTGRES_PASSWORD: str = ""

beanqueue-0.2.1/bq/events.py ADDED Viewed

@@ -0,0 +1,3 @@
+import blinker
+worker_init = blinker.signal("worker-init")

beanqueue-0.2.1/bq/processors/processor.py ADDED Viewed

@@ -0,0 +1,70 @@
+import dataclasses
+import inspect
+import logging
+import typing
+from sqlalchemy.orm import object_session
+from .. import models
+logger = logging.getLogger(__name__)
+@dataclasses.dataclass(frozen=True)
+class Processor:
+    channel: str
+    module: str
+    name: str
+    func: typing.Callable
+    # should we auto complete the task or not
+    auto_complete: bool = True
+    # should we auto rollback the transaction when encounter unhandled exception
+    auto_rollback_on_exc: bool = True
+    def process(self, task: models.Task):
+        db = object_session(task)
+        func_signature = inspect.signature(self.func)
+        base_kwargs = {}
+        if "task" in func_signature.parameters:
+            base_kwargs["task"] = task
+        if "db" in func_signature.parameters:
+            base_kwargs["db"] = db
+        with db.begin_nested() as savepoint:
+            if "savepoint" in func_signature.parameters:
+                base_kwargs["savepoint"] = savepoint
+            try:
+                result = self.func(**base_kwargs, **task.kwargs)
+            except Exception as exc:
+                logger.error("Unhandled exception for task %s", task.id, exc_info=True)
+                if self.auto_rollback_on_exc:
+                    savepoint.rollback()
+                # TODO: add error event
+                task.state = models.TaskState.FAILED
+                task.error_message = str(exc)
+                db.add(task)
+                return
+        if self.auto_complete:
+            logger.info("Task %s auto complete", task.id)
+            task.state = models.TaskState.DONE
+            task.result = result
+            db.add(task)
+        return result
+class ProcessorHelper:
+    """Helper function to replace the decorated processor function and make creating Task model much easier"""
+    def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
+        self._processor = processor
+        self._task_cls = task_cls
+    def __call__(self, *args, **kwargs):
+        return self._processor.func(*args, **kwargs)
+    def run(self, **kwargs) -> models.Task:
+        return self._task_cls(
+            channel=self._processor.channel,
+            module=self._processor.module,
+            func_name=self._processor.name,
+            kwargs=kwargs,
+        )

beanqueue-0.2.1/bq/processors/registry.py ADDED Viewed

@@ -0,0 +1,47 @@
+import collections
+import logging
+import typing
+import venusian
+from sqlalchemy.orm import object_session
+from .. import constants
+from .. import models
+from .processor import Processor
+class Registry:
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
+    def add(self, processor: Processor):
+        self.processors[processor.channel][processor.module][processor.name] = processor
+    def process(self, task: models.Task) -> typing.Any:
+        modules = self.processors.get(task.channel, {})
+        functions = modules.get(task.module, {})
+        processor = functions.get(task.func_name)
+        db = object_session(task)
+        if processor is None:
+            self.logger.error(
+                "Cannot find processor for task %s with module=%s, func=%s",
+                task.id,
+                task.module,
+                task.func_name,
+            )
+            # TODO: add error event
+            task.state = models.TaskState.FAILED
+            task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
+            db.add(task)
+            return
+        return processor.process(task)
+def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
+    if registry is None:
+        registry = Registry()
+    scanner = venusian.Scanner(registry=registry)
+    for package in packages:
+        scanner.scan(package, categories=(constants.BQ_PROCESSOR_CATEGORY,))
+    return registry

beanqueue-0.2.1/bq/utils.py ADDED Viewed

@@ -0,0 +1,8 @@
+import importlib
+import typing
+def load_module_var(name: str) -> typing.Type:
+    module_name, model_name = name.rsplit(".", 1)
+    module = importlib.import_module(module_name)
+    return getattr(module, model_name)

{beanqueue-0.1.3 → beanqueue-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "beanqueue"
-version = "0.1.3"
+version = "0.2.1"
 description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library"
 authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
 license = "MIT"
@@ -14,9 +14,9 @@ python = "^3.11"
 sqlalchemy = "^2.0.30"
 venusian = "^3.1.0"
 click = "^8.1.7"
-dependency-injector = "^4.41.0"
 pydantic-settings = "^2.2.1"
 pg-activity = "^3.5.1"
+blinker = "^1.8.2"
 [tool.poetry.group.dev.dependencies]

beanqueue-0.1.3/bq/cmds/create_tables.py DELETED Viewed

@@ -1,25 +0,0 @@
-import logging
-import click
-from dependency_injector.wiring import inject
-from dependency_injector.wiring import Provide
-from sqlalchemy.engine import Engine
-from .. import models  # noqa
-from ..container import Container
-from ..db.base import Base
-@click.command()
-@inject
-def main(engine: Engine = Provide[Container.db_engine]):
-    logger = logging.getLogger(__name__)
-    Base.metadata.create_all(bind=engine)
-    logger.info("Done, tables created")
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    container = Container()
-    container.wire(modules=[__name__])
-    main()

beanqueue-0.1.3/bq/cmds/process.py DELETED Viewed

@@ -1,188 +0,0 @@
-import functools
-import importlib
-import logging
-import platform
-import sys
-import threading
-import time
-import typing
-import click
-from dependency_injector.wiring import inject
-from dependency_injector.wiring import Provide
-from sqlalchemy import func
-from sqlalchemy.orm import Session as DBSession
-from .. import constants
-from .. import models
-from ..config import Config
-from ..container import Container
-from ..processors.registry import collect
-from ..services.dispatch import DispatchService
-from ..services.worker import WorkerService
-@inject
-def update_workers(
-    worker_id: typing.Any,
-    config: Config = Provide[Container.config],
-    session_factory: typing.Callable = Provide[Container.session_factory],
-    make_dispatch_service: typing.Callable = Provide[Container.make_dispatch_service],
-    make_worker_service: typing.Callable = Provide[Container.make_worker_service],
-):
-    db: DBSession = session_factory()
-    worker_service: WorkerService = make_worker_service(session=db)
-    dispatch_service: DispatchService = make_dispatch_service(session=db)
-    current_worker = worker_service.get_worker(worker_id)
-    logger = logging.getLogger(__name__)
-    logger.info(
-        "Updating worker %s with heartbeat_period=%s, heartbeat_timeout=%s",
-        current_worker.id,
-        config.WORKER_HEARTBEAT_PERIOD,
-        config.WORKER_HEARTBEAT_TIMEOUT,
-    )
-    while True:
-        dead_workers = worker_service.fetch_dead_workers(
-            timeout=config.WORKER_HEARTBEAT_TIMEOUT
-        )
-        task_count = worker_service.reschedule_dead_tasks(
-            # TODO: a better way to abstract this?
-            dead_workers.with_entities(current_worker.__class__.id)
-        )
-        found_dead_worker = False
-        for dead_worker in dead_workers:
-            found_dead_worker = True
-            logger.info(
-                "Found dead worker %s (name=%s), reschedule %s dead tasks in channels %s",
-                dead_worker.id,
-                dead_worker.name,
-                task_count,
-                dead_worker.channels,
-            )
-            dispatch_service.notify(dead_worker.channels)
-        if found_dead_worker:
-            db.commit()
-        if current_worker.state != models.WorkerState.RUNNING:
-            # This probably means we are somehow very slow to update the heartbeat in time, or the timeout window
-            # is set too short. It could also be the administrator update the worker state to something else than
-            # RUNNING. Regardless the reason, let's stop processing.
-            logger.warning(
-                "Current worker %s state is %s instead of running, quit processing"
-            )
-            sys.exit(0)
-        time.sleep(config.WORKER_HEARTBEAT_PERIOD)
-        current_worker.last_heartbeat = func.now()
-        db.add(current_worker)
-        db.commit()
-@inject
-def process_tasks(
-    channels: tuple[str, ...],
-    config: Config = Provide[Container.config],
-    db: DBSession = Provide[Container.session],
-    dispatch_service: DispatchService = Provide[Container.dispatch_service],
-    worker_service: WorkerService = Provide[Container.worker_service],
-):
-    logger = logging.getLogger(__name__)
-    if not channels:
-        channels = [constants.DEFAULT_CHANNEL]
-    if not config.PROCESSOR_PACKAGES:
-        logger.error("No PROCESSOR_PACKAGES provided")
-        sys.exit(-1)
-    logger.info("Scanning packages %s", config.PROCESSOR_PACKAGES)
-    pkgs = list(map(importlib.import_module, config.PROCESSOR_PACKAGES))
-    registry = collect(pkgs)
-    for channel, module_processors in registry.processors.items():
-        logger.info("Collected processors with channel %r", channel)
-        for module, func_processors in module_processors.items():
-            for processor in func_processors.values():
-                logger.info(
-                    "  Processor module %r, processor %r", module, processor.name
-                )
-    worker = worker_service.make_worker(name=platform.node(), channels=channels)
-    db.add(worker)
-    dispatch_service.listen(channels)
-    db.commit()
-    logger.info("Created worker %s, name=%s", worker.id, worker.name)
-    logger.info("Processing tasks in channels = %s ...", channels)
-    worker_update_thread = threading.Thread(
-        target=functools.partial(
-            update_workers,
-            worker_id=worker.id,
-        ),
-        name="update_workers",
-    )
-    worker_update_thread.daemon = True
-    worker_update_thread.start()
-    worker_id = worker.id
-    try:
-        while True:
-            while True:
-                tasks = dispatch_service.dispatch(
-                    channels,
-                    worker_id=worker_id,
-                    limit=config.BATCH_SIZE,
-                ).all()
-                for task in tasks:
-                    logger.info(
-                        "Processing task %s, channel=%s, module=%s, func=%s",
-                        task.id,
-                        task.channel,
-                        task.module,
-                        task.func_name,
-                    )
-                    # TODO: support processor pool and other approaches to dispatch the workload
-                    registry.process(task)
-                if not tasks:
-                    # we should try to keep dispatching until we cannot find tasks
-                    break
-                else:
-                    db.commit()
-            # we will not see notifications in a transaction, need to close the transaction first before entering
-            # polling
-            db.close()
-            try:
-                for notification in dispatch_service.poll(timeout=config.POLL_TIMEOUT):
-                    logger.debug("Receive notification %s", notification)
-            except TimeoutError:
-                logger.debug("Poll timeout, try again")
-                continue
-    except (SystemExit, KeyboardInterrupt):
-        db.rollback()
-        logger.info("Shutting down ...")
-        worker_update_thread.join(5)
-    worker.state = models.WorkerState.SHUTDOWN
-    db.add(worker)
-    task_count = worker_service.reschedule_dead_tasks([worker.id])
-    logger.info("Reschedule %s tasks", task_count)
-    dispatch_service.notify(channels)
-    db.commit()
-    logger.info("Shutdown gracefully")
-@click.command()
-@click.argument("channels", nargs=-1)
-def main(
-    channels: tuple[str, ...],
-):
-    process_tasks(channels)
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    container = Container()
-    container.wire(modules=[__name__])
-    main()

beanqueue-0.1.3/bq/container.py DELETED Viewed

@@ -1,77 +0,0 @@
-import functools
-import importlib
-import typing
-from dependency_injector import containers
-from dependency_injector import providers
-from sqlalchemy import create_engine
-from sqlalchemy import Engine
-from sqlalchemy.orm import Session as DBSession
-from sqlalchemy.pool import SingletonThreadPool
-from .config import Config
-from .db.session import SessionMaker
-from .services.dispatch import DispatchService
-from .services.worker import WorkerService
-def get_model_class(name: str) -> typing.Type:
-    module_name, model_name = name.rsplit(".", 1)
-    module = importlib.import_module(module_name)
-    return getattr(module, model_name)
-def make_db_engine(config: Config) -> Engine:
-    return create_engine(str(config.DATABASE_URL), poolclass=SingletonThreadPool)
-def make_session_factory(engine: Engine) -> typing.Callable:
-    return functools.partial(SessionMaker, bind=engine)
-def make_session(factory: typing.Callable) -> DBSession:
-    return factory()
-def make_dispatch_service(config: Config, session: DBSession) -> DispatchService:
-    return DispatchService(session, task_model=get_model_class(config.TASK_MODEL))
-def make_worker_service(config: Config, session: DBSession) -> WorkerService:
-    return WorkerService(
-        session,
-        task_model=get_model_class(config.TASK_MODEL),
-        worker_model=get_model_class(config.WORKER_MODEL),
-    )
-class Container(containers.DeclarativeContainer):
-    config = providers.Singleton(Config)
-    db_engine: Engine = providers.Singleton(make_db_engine, config=config)
-    session_factory: typing.Callable = providers.Singleton(
-        make_session_factory, engine=db_engine
-    )
-    session: DBSession = providers.Singleton(make_session, factory=session_factory)
-    dispatch_service: DispatchService = providers.Singleton(
-        make_dispatch_service,
-        config=config,
-        session=session,
-    )
-    worker_service: WorkerService = providers.Singleton(
-        make_worker_service, config=config, session=session
-    )
-    make_dispatch_service = providers.Singleton(
-        lambda config: functools.partial(make_dispatch_service, config=config),
-        config=config,
-    )
-    make_worker_service = providers.Singleton(
-        lambda config: functools.partial(make_worker_service, config=config),
-        config=config,
-    )

beanqueue-0.1.3/bq/processors/registry.py DELETED Viewed

@@ -1,136 +0,0 @@
-import collections
-import dataclasses
-import inspect
-import logging
-import typing
-import venusian
-from sqlalchemy.orm import object_session
-from .. import constants
-from .. import models
-@dataclasses.dataclass(frozen=True)
-class Processor:
-    channel: str
-    module: str
-    name: str
-    func: typing.Callable
-    # should we auto complete the task or not
-    auto_complete: bool = True
-    # should we auto rollback the transaction when encounter unhandled exception
-    auto_rollback_on_exc: bool = True
-class ProcessorHelper:
-    def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
-        self._processor = processor
-        self._task_cls = task_cls
-    def __call__(self, *args, **kwargs):
-        return self._processor.func(*args, **kwargs)
-    def run(self, **kwargs) -> models.Task:
-        return self._task_cls(
-            channel=self._processor.channel,
-            module=self._processor.module,
-            func_name=self._processor.name,
-            kwargs=kwargs,
-        )
-def process_task(task: models.Task, processor: Processor):
-    logger = logging.getLogger(__name__)
-    db = object_session(task)
-    func_signature = inspect.signature(processor.func)
-    base_kwargs = {}
-    if "task" in func_signature.parameters:
-        base_kwargs["task"] = task
-    if "db" in func_signature.parameters:
-        base_kwargs["db"] = db
-    with db.begin_nested() as savepoint:
-        if "savepoint" in func_signature.parameters:
-            base_kwargs["savepoint"] = savepoint
-        try:
-            result = processor.func(**base_kwargs, **task.kwargs)
-        except Exception as exc:
-            logger.error("Unhandled exception for task %s", task.id, exc_info=True)
-            if processor.auto_rollback_on_exc:
-                savepoint.rollback()
-            # TODO: add error event
-            task.state = models.TaskState.FAILED
-            task.error_message = str(exc)
-            db.add(task)
-            return
-    if processor.auto_complete:
-        logger.info("Task %s auto complete", task.id)
-        task.state = models.TaskState.DONE
-        task.result = result
-        db.add(task)
-    return result
-class Registry:
-    def __init__(self):
-        self.logger = logging.getLogger(__name__)
-        self.processors = collections.defaultdict(lambda: collections.defaultdict(dict))
-    def add(self, processor: Processor):
-        self.processors[processor.channel][processor.module][processor.name] = processor
-    def process(self, task: models.Task) -> typing.Any:
-        modules = self.processors.get(task.channel, {})
-        functions = modules.get(task.module, {})
-        processor = functions.get(task.func_name)
-        db = object_session(task)
-        if processor is None:
-            self.logger.error(
-                "Cannot find processor for task %s with module=%s, func=%s",
-                task.id,
-                task.module,
-                task.func_name,
-            )
-            # TODO: add error event
-            task.state = models.TaskState.FAILED
-            task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
-            db.add(task)
-            return
-        return process_task(task, processor)
-def processor(
-    channel: str = constants.DEFAULT_CHANNEL,
-    auto_complete: bool = True,
-    auto_rollback_on_exc: bool = True,
-    task_cls: typing.Type = models.Task,
-) -> typing.Callable:
-    def decorator(wrapped: typing.Callable):
-        processor = Processor(
-            module=wrapped.__module__,
-            name=wrapped.__name__,
-            channel=channel,
-            func=wrapped,
-            auto_complete=auto_complete,
-            auto_rollback_on_exc=auto_rollback_on_exc,
-        )
-        helper_obj = ProcessorHelper(processor, task_cls=task_cls)
-        def callback(scanner: venusian.Scanner, name: str, ob: typing.Callable):
-            if processor.name != name:
-                raise ValueError("Name is not the same")
-            scanner.registry.add(processor)
-        venusian.attach(helper_obj, callback, category=constants.BQ_PROCESSOR_CATEGORY)
-        return helper_obj
-    return decorator
-def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
-    if registry is None:
-        registry = Registry()
-    scanner = venusian.Scanner(registry=registry)
-    for package in packages:
-        scanner.scan(package, categories=(constants.BQ_PROCESSOR_CATEGORY,))
-    return registry