metaspn-ops 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaspn_ops-0.1.0/LICENSE +21 -0
- metaspn_ops-0.1.0/PKG-INFO +107 -0
- metaspn_ops-0.1.0/README.md +81 -0
- metaspn_ops-0.1.0/pyproject.toml +47 -0
- metaspn_ops-0.1.0/setup.cfg +4 -0
- metaspn_ops-0.1.0/src/metaspn_ops/__init__.py +20 -0
- metaspn_ops-0.1.0/src/metaspn_ops/backends.py +63 -0
- metaspn_ops-0.1.0/src/metaspn_ops/cli.py +118 -0
- metaspn_ops-0.1.0/src/metaspn_ops/fs_queue.py +156 -0
- metaspn_ops-0.1.0/src/metaspn_ops/lease.py +82 -0
- metaspn_ops-0.1.0/src/metaspn_ops/runner.py +112 -0
- metaspn_ops-0.1.0/src/metaspn_ops/scheduler.py +14 -0
- metaspn_ops-0.1.0/src/metaspn_ops/types.py +103 -0
- metaspn_ops-0.1.0/src/metaspn_ops.egg-info/PKG-INFO +107 -0
- metaspn_ops-0.1.0/src/metaspn_ops.egg-info/SOURCES.txt +18 -0
- metaspn_ops-0.1.0/src/metaspn_ops.egg-info/dependency_links.txt +1 -0
- metaspn_ops-0.1.0/src/metaspn_ops.egg-info/entry_points.txt +2 -0
- metaspn_ops-0.1.0/src/metaspn_ops.egg-info/requires.txt +4 -0
- metaspn_ops-0.1.0/src/metaspn_ops.egg-info/top_level.txt +1 -0
- metaspn_ops-0.1.0/tests/test_ops_queue.py +108 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MetaSPN Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: metaspn-ops
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Standard inbox/outbox worker runtime for MetaSPN agent systems
|
|
5
|
+
Author: MetaSPN Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/metaspn/metaspn-ops
|
|
8
|
+
Project-URL: Repository, https://github.com/metaspn/metaspn-ops
|
|
9
|
+
Project-URL: Issues, https://github.com/metaspn/metaspn-ops/issues
|
|
10
|
+
Keywords: metaspn,agents,worker,queue,scheduler
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
24
|
+
Requires-Dist: twine>=5.1.1; extra == "dev"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# metaspn-ops
|
|
28
|
+
|
|
29
|
+
Standard inbox/outbox worker runtime for MetaSPN agent systems.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- Filesystem queue backend with inbox/outbox semantics
|
|
34
|
+
- Task leasing with lock files and lease expiration
|
|
35
|
+
- Retries with exponential backoff and dead-letter queue
|
|
36
|
+
- Worker runner with polling and parallel execution
|
|
37
|
+
- CLI for worker runs and queue operations
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install metaspn-ops
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Quickstart
|
|
46
|
+
|
|
47
|
+
### 1) Define a worker
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# example_worker.py
|
|
51
|
+
from metaspn_ops import Result, Task
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class EnrichWorker:
|
|
55
|
+
name = "enrich"
|
|
56
|
+
|
|
57
|
+
def handle(self, task: Task) -> Result:
|
|
58
|
+
payload = {"seen": task.payload}
|
|
59
|
+
return Result(task_id=task.task_id, status="ok", payload=payload)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### 2) Run a worker once
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
metaspn worker run example_worker:EnrichWorker --workspace . --once --max-tasks 10
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 3) Queue inspection
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
metaspn queue stats enrich --workspace .
|
|
72
|
+
metaspn queue deadletter list enrich --workspace .
|
|
73
|
+
metaspn queue retry enrich --workspace .
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Queue layout
|
|
77
|
+
|
|
78
|
+
```text
|
|
79
|
+
workspace/
|
|
80
|
+
inbox/{worker_name}/
|
|
81
|
+
outbox/{worker_name}/
|
|
82
|
+
runs/{worker_name}/
|
|
83
|
+
deadletter/{worker_name}/
|
|
84
|
+
locks/{worker_name}/
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Development
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
python -m venv .venv
|
|
91
|
+
source .venv/bin/activate
|
|
92
|
+
pip install -e .[dev]
|
|
93
|
+
PYTHONPATH=src python -m unittest discover -s tests -v
|
|
94
|
+
python -m build
|
|
95
|
+
python -m twine check dist/*
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Release
|
|
99
|
+
|
|
100
|
+
- Tag a release in GitHub (for example `v0.1.0`).
|
|
101
|
+
- GitHub Actions builds and publishes to PyPI using trusted publishing.
|
|
102
|
+
- Configure a PyPI Trusted Publisher for this repository before the first release.
|
|
103
|
+
- See `/Users/leoguinan/MetaSPN/metaspn-ops/PUBLISHING.md` for the full flow.
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
MIT
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# metaspn-ops
|
|
2
|
+
|
|
3
|
+
Standard inbox/outbox worker runtime for MetaSPN agent systems.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Filesystem queue backend with inbox/outbox semantics
|
|
8
|
+
- Task leasing with lock files and lease expiration
|
|
9
|
+
- Retries with exponential backoff and dead-letter queue
|
|
10
|
+
- Worker runner with polling and parallel execution
|
|
11
|
+
- CLI for worker runs and queue operations
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install metaspn-ops
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quickstart
|
|
20
|
+
|
|
21
|
+
### 1) Define a worker
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# example_worker.py
|
|
25
|
+
from metaspn_ops import Result, Task
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class EnrichWorker:
|
|
29
|
+
name = "enrich"
|
|
30
|
+
|
|
31
|
+
def handle(self, task: Task) -> Result:
|
|
32
|
+
payload = {"seen": task.payload}
|
|
33
|
+
return Result(task_id=task.task_id, status="ok", payload=payload)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### 2) Run a worker once
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
metaspn worker run example_worker:EnrichWorker --workspace . --once --max-tasks 10
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### 3) Queue inspection
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
metaspn queue stats enrich --workspace .
|
|
46
|
+
metaspn queue deadletter list enrich --workspace .
|
|
47
|
+
metaspn queue retry enrich --workspace .
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Queue layout
|
|
51
|
+
|
|
52
|
+
```text
|
|
53
|
+
workspace/
|
|
54
|
+
inbox/{worker_name}/
|
|
55
|
+
outbox/{worker_name}/
|
|
56
|
+
runs/{worker_name}/
|
|
57
|
+
deadletter/{worker_name}/
|
|
58
|
+
locks/{worker_name}/
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Development
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
python -m venv .venv
|
|
65
|
+
source .venv/bin/activate
|
|
66
|
+
pip install -e .[dev]
|
|
67
|
+
PYTHONPATH=src python -m unittest discover -s tests -v
|
|
68
|
+
python -m build
|
|
69
|
+
python -m twine check dist/*
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Release
|
|
73
|
+
|
|
74
|
+
- Tag a release in GitHub (for example `v0.1.0`).
|
|
75
|
+
- GitHub Actions builds and publishes to PyPI using trusted publishing.
|
|
76
|
+
- Configure a PyPI Trusted Publisher for this repository before the first release.
|
|
77
|
+
- See `/Users/leoguinan/MetaSPN/metaspn-ops/PUBLISHING.md` for the full flow.
|
|
78
|
+
|
|
79
|
+
## License
|
|
80
|
+
|
|
81
|
+
MIT
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "metaspn-ops"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Standard inbox/outbox worker runtime for MetaSPN agent systems"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "MetaSPN Contributors" }
|
|
15
|
+
]
|
|
16
|
+
keywords = ["metaspn", "agents", "worker", "queue", "scheduler"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Topic :: Software Development :: Libraries",
|
|
25
|
+
"Topic :: System :: Distributed Computing",
|
|
26
|
+
]
|
|
27
|
+
dependencies = []
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
dev = [
|
|
31
|
+
"build>=1.2.2",
|
|
32
|
+
"twine>=5.1.1",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/metaspn/metaspn-ops"
|
|
37
|
+
Repository = "https://github.com/metaspn/metaspn-ops"
|
|
38
|
+
Issues = "https://github.com/metaspn/metaspn-ops/issues"
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
metaspn = "metaspn_ops.cli:main"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools]
|
|
44
|
+
package-dir = {"" = "src"}
|
|
45
|
+
|
|
46
|
+
[tool.setuptools.packages.find]
|
|
47
|
+
where = ["src"]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""metaspn-ops public API."""
|
|
2
|
+
|
|
3
|
+
from .backends import QueueBackend, SQLiteQueueStub
|
|
4
|
+
from .fs_queue import FilesystemQueue
|
|
5
|
+
from .lease import LeaseManager
|
|
6
|
+
from .runner import Worker, WorkerRunner
|
|
7
|
+
from .scheduler import TaskScheduler
|
|
8
|
+
from .types import Result, Task
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"FilesystemQueue",
|
|
12
|
+
"LeaseManager",
|
|
13
|
+
"QueueBackend",
|
|
14
|
+
"SQLiteQueueStub",
|
|
15
|
+
"Task",
|
|
16
|
+
"Result",
|
|
17
|
+
"TaskScheduler",
|
|
18
|
+
"Worker",
|
|
19
|
+
"WorkerRunner",
|
|
20
|
+
]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from .types import Result, RunRecord, Task
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class QueueBackend(Protocol):
|
|
10
|
+
worker_name: str
|
|
11
|
+
|
|
12
|
+
def enqueue_task(self, task: Task, *, scheduled_for=None) -> Path:
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
def lease_next_task(self, *, owner: str, lease_seconds: int) -> tuple[Task, Path] | None:
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
def ack_task(self, leased_path: Path) -> None:
|
|
19
|
+
...
|
|
20
|
+
|
|
21
|
+
def fail_task(self, leased_path: Path, task: Task, error: str) -> None:
|
|
22
|
+
...
|
|
23
|
+
|
|
24
|
+
def write_result(self, result: Result) -> Path:
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
def write_run_record(self, record: RunRecord) -> Path:
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
def stats(self) -> dict[str, int]:
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SQLiteQueueStub:
|
|
35
|
+
"""Interface placeholder for PRD milestone M3."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, worker_name: str, db_path: str | Path):
|
|
38
|
+
self.worker_name = worker_name
|
|
39
|
+
self.db_path = Path(db_path)
|
|
40
|
+
|
|
41
|
+
def _not_implemented(self):
|
|
42
|
+
raise NotImplementedError("SQLite backend is a v0.1 stub; implement in M3")
|
|
43
|
+
|
|
44
|
+
def enqueue_task(self, task: Task, *, scheduled_for=None) -> Path:
|
|
45
|
+
self._not_implemented()
|
|
46
|
+
|
|
47
|
+
def lease_next_task(self, *, owner: str, lease_seconds: int) -> tuple[Task, Path] | None:
|
|
48
|
+
self._not_implemented()
|
|
49
|
+
|
|
50
|
+
def ack_task(self, leased_path: Path) -> None:
|
|
51
|
+
self._not_implemented()
|
|
52
|
+
|
|
53
|
+
def fail_task(self, leased_path: Path, task: Task, error: str) -> None:
|
|
54
|
+
self._not_implemented()
|
|
55
|
+
|
|
56
|
+
def write_result(self, result: Result) -> Path:
|
|
57
|
+
self._not_implemented()
|
|
58
|
+
|
|
59
|
+
def write_run_record(self, record: RunRecord) -> Path:
|
|
60
|
+
self._not_implemented()
|
|
61
|
+
|
|
62
|
+
def stats(self) -> dict[str, int]:
|
|
63
|
+
self._not_implemented()
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import importlib
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .fs_queue import FilesystemQueue
|
|
10
|
+
from .runner import RunnerConfig, WorkerRunner
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _load_worker(spec: str):
|
|
14
|
+
if ":" not in spec:
|
|
15
|
+
raise ValueError("Worker must be import path in form module:attr")
|
|
16
|
+
module_name, attr = spec.split(":", 1)
|
|
17
|
+
module = importlib.import_module(module_name)
|
|
18
|
+
worker = getattr(module, attr)
|
|
19
|
+
if isinstance(worker, type):
|
|
20
|
+
worker = worker()
|
|
21
|
+
if not hasattr(worker, "name") or not hasattr(worker, "handle"):
|
|
22
|
+
raise ValueError("Loaded worker must expose name and handle(task)")
|
|
23
|
+
return worker
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _parse_every(raw: str | None) -> int | None:
|
|
27
|
+
if raw is None:
|
|
28
|
+
return None
|
|
29
|
+
raw = raw.strip().lower()
|
|
30
|
+
if raw.endswith("ms"):
|
|
31
|
+
return max(1, int(raw[:-2]) // 1000)
|
|
32
|
+
if raw.endswith("s"):
|
|
33
|
+
return int(raw[:-1])
|
|
34
|
+
if raw.endswith("m"):
|
|
35
|
+
return int(raw[:-1]) * 60
|
|
36
|
+
if raw.endswith("h"):
|
|
37
|
+
return int(raw[:-1]) * 3600
|
|
38
|
+
return int(raw)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
42
|
+
parser = argparse.ArgumentParser(prog="metaspn")
|
|
43
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
44
|
+
|
|
45
|
+
worker_p = sub.add_parser("worker")
|
|
46
|
+
worker_sub = worker_p.add_subparsers(dest="worker_cmd", required=True)
|
|
47
|
+
run_p = worker_sub.add_parser("run")
|
|
48
|
+
run_p.add_argument("worker", help="Worker import path module:attr")
|
|
49
|
+
run_p.add_argument("--workspace", default=".")
|
|
50
|
+
run_p.add_argument("--every", default=None)
|
|
51
|
+
run_p.add_argument("--max-tasks", type=int, default=None)
|
|
52
|
+
run_p.add_argument("--parallel", type=int, default=1)
|
|
53
|
+
run_p.add_argument("--lease-seconds", type=int, default=120)
|
|
54
|
+
run_p.add_argument("--once", action="store_true")
|
|
55
|
+
|
|
56
|
+
queue_p = sub.add_parser("queue")
|
|
57
|
+
queue_sub = queue_p.add_subparsers(dest="queue_cmd", required=True)
|
|
58
|
+
|
|
59
|
+
stats_p = queue_sub.add_parser("stats")
|
|
60
|
+
stats_p.add_argument("worker")
|
|
61
|
+
stats_p.add_argument("--workspace", default=".")
|
|
62
|
+
|
|
63
|
+
retry_p = queue_sub.add_parser("retry")
|
|
64
|
+
retry_p.add_argument("worker")
|
|
65
|
+
retry_p.add_argument("--workspace", default=".")
|
|
66
|
+
retry_p.add_argument("--task-id", default=None)
|
|
67
|
+
|
|
68
|
+
deadletter_p = queue_sub.add_parser("deadletter")
|
|
69
|
+
deadletter_sub = deadletter_p.add_subparsers(dest="deadletter_cmd", required=True)
|
|
70
|
+
dl_list_p = deadletter_sub.add_parser("list")
|
|
71
|
+
dl_list_p.add_argument("worker")
|
|
72
|
+
dl_list_p.add_argument("--workspace", default=".")
|
|
73
|
+
|
|
74
|
+
return parser
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def main(argv: list[str] | None = None) -> int:
|
|
78
|
+
parser = build_parser()
|
|
79
|
+
args = parser.parse_args(argv)
|
|
80
|
+
|
|
81
|
+
if args.command == "worker" and args.worker_cmd == "run":
|
|
82
|
+
worker = _load_worker(args.worker)
|
|
83
|
+
queue = FilesystemQueue(workspace=Path(args.workspace), worker_name=worker.name)
|
|
84
|
+
cfg = RunnerConfig(
|
|
85
|
+
every_seconds=_parse_every(args.every),
|
|
86
|
+
max_tasks=args.max_tasks,
|
|
87
|
+
parallel=args.parallel,
|
|
88
|
+
lease_seconds=args.lease_seconds,
|
|
89
|
+
once=args.once,
|
|
90
|
+
)
|
|
91
|
+
runner = WorkerRunner(queue=queue, worker=worker, config=cfg)
|
|
92
|
+
processed = runner.run()
|
|
93
|
+
print(json.dumps({"processed": processed}))
|
|
94
|
+
return 0
|
|
95
|
+
|
|
96
|
+
if args.command == "queue" and args.queue_cmd == "stats":
|
|
97
|
+
queue = FilesystemQueue(workspace=Path(args.workspace), worker_name=args.worker)
|
|
98
|
+
print(json.dumps(queue.stats()))
|
|
99
|
+
return 0
|
|
100
|
+
|
|
101
|
+
if args.command == "queue" and args.queue_cmd == "retry":
|
|
102
|
+
queue = FilesystemQueue(workspace=Path(args.workspace), worker_name=args.worker)
|
|
103
|
+
retried = queue.retry_deadletter(task_id=args.task_id)
|
|
104
|
+
print(json.dumps({"retried": retried}))
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
if args.command == "queue" and args.queue_cmd == "deadletter" and args.deadletter_cmd == "list":
|
|
108
|
+
queue = FilesystemQueue(workspace=Path(args.workspace), worker_name=args.worker)
|
|
109
|
+
items = [str(p) for p in queue.deadletter_items()]
|
|
110
|
+
print(json.dumps({"items": items}))
|
|
111
|
+
return 0
|
|
112
|
+
|
|
113
|
+
parser.print_help()
|
|
114
|
+
return 1
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
sys.exit(main())
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import threading
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from .lease import LeaseManager
|
|
10
|
+
from .scheduler import TaskScheduler
|
|
11
|
+
from .types import Result, RunRecord, Task
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _ts_for_name(dt: datetime) -> str:
|
|
15
|
+
dt = dt.astimezone(timezone.utc).replace(microsecond=0)
|
|
16
|
+
return dt.strftime("%Y-%m-%dT%H%M%SZ")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _safe_task_id(task_id: str) -> str:
|
|
20
|
+
return "".join(ch if ch.isalnum() or ch in {"-", "_"} else "_" for ch in task_id)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FilesystemQueue:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
*,
|
|
27
|
+
workspace: str | Path,
|
|
28
|
+
worker_name: str,
|
|
29
|
+
scheduler: TaskScheduler | None = None,
|
|
30
|
+
):
|
|
31
|
+
self.workspace = Path(workspace)
|
|
32
|
+
self.worker_name = worker_name
|
|
33
|
+
self.scheduler = scheduler or TaskScheduler()
|
|
34
|
+
|
|
35
|
+
self.inbox_dir = self.workspace / "inbox" / worker_name
|
|
36
|
+
self.outbox_dir = self.workspace / "outbox" / worker_name
|
|
37
|
+
self.runs_dir = self.workspace / "runs" / worker_name
|
|
38
|
+
self.deadletter_dir = self.workspace / "deadletter" / worker_name
|
|
39
|
+
self.lock_dir = self.workspace / "locks" / worker_name
|
|
40
|
+
|
|
41
|
+
for folder in [
|
|
42
|
+
self.inbox_dir,
|
|
43
|
+
self.outbox_dir,
|
|
44
|
+
self.runs_dir,
|
|
45
|
+
self.deadletter_dir,
|
|
46
|
+
self.lock_dir,
|
|
47
|
+
]:
|
|
48
|
+
folder.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
self.leases = LeaseManager(self.lock_dir)
|
|
51
|
+
self._io_lock = threading.RLock()
|
|
52
|
+
|
|
53
|
+
def enqueue_task(self, task: Task, *, scheduled_for: datetime | None = None) -> Path:
|
|
54
|
+
ts = scheduled_for or datetime.now(timezone.utc)
|
|
55
|
+
name = f"{_ts_for_name(ts)}__t_{_safe_task_id(task.task_id)}.json"
|
|
56
|
+
path = self.inbox_dir / name
|
|
57
|
+
with self._io_lock:
|
|
58
|
+
self._write_json(path, task.to_dict())
|
|
59
|
+
return path
|
|
60
|
+
|
|
61
|
+
def lease_next_task(self, *, owner: str, lease_seconds: int) -> tuple[Task, Path] | None:
|
|
62
|
+
now_name = _ts_for_name(datetime.now(timezone.utc))
|
|
63
|
+
for path in sorted(self.inbox_dir.glob("*.json")):
|
|
64
|
+
scheduled_name = path.name.split("__", 1)[0]
|
|
65
|
+
if scheduled_name > now_name:
|
|
66
|
+
continue
|
|
67
|
+
raw = self._read_json(path)
|
|
68
|
+
task = Task.from_dict(raw)
|
|
69
|
+
lease = self.leases.try_acquire(
|
|
70
|
+
task_id=task.task_id,
|
|
71
|
+
worker_name=self.worker_name,
|
|
72
|
+
owner=owner,
|
|
73
|
+
lease_seconds=lease_seconds,
|
|
74
|
+
)
|
|
75
|
+
if lease is not None:
|
|
76
|
+
return task, path
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def ack_task(self, leased_path: Path) -> None:
|
|
80
|
+
with self._io_lock:
|
|
81
|
+
if leased_path.exists():
|
|
82
|
+
raw = self._read_json(leased_path)
|
|
83
|
+
task_id = raw.get("task_id")
|
|
84
|
+
leased_path.unlink()
|
|
85
|
+
if task_id:
|
|
86
|
+
self.leases.release(str(task_id))
|
|
87
|
+
|
|
88
|
+
def fail_task(self, leased_path: Path, task: Task, error: str) -> None:
|
|
89
|
+
task.attempt_count += 1
|
|
90
|
+
can_retry = task.attempt_count < task.max_attempts
|
|
91
|
+
with self._io_lock:
|
|
92
|
+
if leased_path.exists():
|
|
93
|
+
leased_path.unlink()
|
|
94
|
+
if can_retry:
|
|
95
|
+
retry_at = self.scheduler.next_retry_at(attempt_count=task.attempt_count)
|
|
96
|
+
self.enqueue_task(task, scheduled_for=retry_at)
|
|
97
|
+
else:
|
|
98
|
+
deadletter_name = f"{_ts_for_name(datetime.now(timezone.utc))}__t_{_safe_task_id(task.task_id)}.json"
|
|
99
|
+
self._write_json(
|
|
100
|
+
self.deadletter_dir / deadletter_name,
|
|
101
|
+
{
|
|
102
|
+
"task": task.to_dict(),
|
|
103
|
+
"final_error": error,
|
|
104
|
+
"deadlettered_at": datetime.now(timezone.utc).isoformat(),
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
self.leases.release(task.task_id)
|
|
108
|
+
|
|
109
|
+
def write_result(self, result: Result) -> Path:
|
|
110
|
+
name = f"{_ts_for_name(datetime.now(timezone.utc))}__r_{_safe_task_id(result.task_id)}__{uuid4().hex[:8]}.json"
|
|
111
|
+
path = self.outbox_dir / name
|
|
112
|
+
with self._io_lock:
|
|
113
|
+
self._write_json(path, result.to_dict())
|
|
114
|
+
return path
|
|
115
|
+
|
|
116
|
+
def write_run_record(self, record: RunRecord) -> Path:
|
|
117
|
+
name = f"{_ts_for_name(datetime.now(timezone.utc))}__run_{record.run_id}.json"
|
|
118
|
+
path = self.runs_dir / name
|
|
119
|
+
with self._io_lock:
|
|
120
|
+
self._write_json(path, record.to_dict())
|
|
121
|
+
return path
|
|
122
|
+
|
|
123
|
+
def stats(self) -> dict[str, int]:
|
|
124
|
+
return {
|
|
125
|
+
"inbox": len(list(self.inbox_dir.glob("*.json"))),
|
|
126
|
+
"outbox": len(list(self.outbox_dir.glob("*.json"))),
|
|
127
|
+
"runs": len(list(self.runs_dir.glob("*.json"))),
|
|
128
|
+
"deadletter": len(list(self.deadletter_dir.glob("*.json"))),
|
|
129
|
+
"locks": len(list(self.lock_dir.glob("*.lock"))),
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
def deadletter_items(self) -> list[Path]:
|
|
133
|
+
return sorted(self.deadletter_dir.glob("*.json"))
|
|
134
|
+
|
|
135
|
+
def retry_deadletter(self, *, task_id: str | None = None) -> int:
|
|
136
|
+
retried = 0
|
|
137
|
+
for item in self.deadletter_items():
|
|
138
|
+
raw = self._read_json(item)
|
|
139
|
+
task = Task.from_dict(raw["task"])
|
|
140
|
+
if task_id and task.task_id != task_id:
|
|
141
|
+
continue
|
|
142
|
+
task.attempt_count = 0
|
|
143
|
+
self.enqueue_task(task)
|
|
144
|
+
item.unlink()
|
|
145
|
+
retried += 1
|
|
146
|
+
return retried
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _read_json(path: Path) -> dict:
|
|
150
|
+
with path.open("r", encoding="utf-8") as f:
|
|
151
|
+
return json.load(f)
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def _write_json(path: Path, payload: dict) -> None:
|
|
155
|
+
with path.open("w", encoding="utf-8") as f:
|
|
156
|
+
json.dump(payload, f, ensure_ascii=True)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class Lease:
|
|
12
|
+
task_id: str
|
|
13
|
+
worker_name: str
|
|
14
|
+
owner: str
|
|
15
|
+
acquired_at: datetime
|
|
16
|
+
expires_at: datetime
|
|
17
|
+
|
|
18
|
+
def to_dict(self) -> dict[str, str]:
|
|
19
|
+
return {
|
|
20
|
+
"task_id": self.task_id,
|
|
21
|
+
"worker_name": self.worker_name,
|
|
22
|
+
"owner": self.owner,
|
|
23
|
+
"acquired_at": self.acquired_at.isoformat(),
|
|
24
|
+
"expires_at": self.expires_at.isoformat(),
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class LeaseManager:
|
|
29
|
+
def __init__(self, lock_dir: Path):
|
|
30
|
+
self.lock_dir = lock_dir
|
|
31
|
+
self.lock_dir.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def _now() -> datetime:
|
|
35
|
+
return datetime.now(timezone.utc)
|
|
36
|
+
|
|
37
|
+
def _lock_path(self, task_id: str) -> Path:
|
|
38
|
+
return self.lock_dir / f"{task_id}.lock"
|
|
39
|
+
|
|
40
|
+
def try_acquire(self, *, task_id: str, worker_name: str, owner: str, lease_seconds: int) -> Lease | None:
|
|
41
|
+
now = self._now()
|
|
42
|
+
expires_at = now + timedelta(seconds=max(1, lease_seconds))
|
|
43
|
+
lock_path = self._lock_path(task_id)
|
|
44
|
+
|
|
45
|
+
if lock_path.exists() and not self._is_expired(lock_path, now):
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
lease = Lease(task_id, worker_name, owner, now, expires_at)
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
52
|
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
53
|
+
json.dump(lease.to_dict(), f)
|
|
54
|
+
return lease
|
|
55
|
+
except FileExistsError:
|
|
56
|
+
if self._is_expired(lock_path, now):
|
|
57
|
+
self.break_lease(task_id)
|
|
58
|
+
return self.try_acquire(
|
|
59
|
+
task_id=task_id,
|
|
60
|
+
worker_name=worker_name,
|
|
61
|
+
owner=owner,
|
|
62
|
+
lease_seconds=lease_seconds,
|
|
63
|
+
)
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def release(self, task_id: str) -> None:
|
|
67
|
+
lock_path = self._lock_path(task_id)
|
|
68
|
+
if lock_path.exists():
|
|
69
|
+
lock_path.unlink()
|
|
70
|
+
|
|
71
|
+
def break_lease(self, task_id: str) -> None:
|
|
72
|
+
self.release(task_id)
|
|
73
|
+
|
|
74
|
+
def _is_expired(self, lock_path: Path, now: datetime) -> bool:
|
|
75
|
+
try:
|
|
76
|
+
raw = json.loads(lock_path.read_text(encoding="utf-8"))
|
|
77
|
+
expires_at = datetime.fromisoformat(raw["expires_at"])
|
|
78
|
+
if expires_at.tzinfo is None:
|
|
79
|
+
expires_at = expires_at.replace(tzinfo=timezone.utc)
|
|
80
|
+
return expires_at <= now
|
|
81
|
+
except Exception:
|
|
82
|
+
return True
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
import socket
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Protocol
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
|
|
11
|
+
from .backends import QueueBackend
|
|
12
|
+
from .types import Result, RunRecord, Task
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Worker(Protocol):
|
|
16
|
+
name: str
|
|
17
|
+
|
|
18
|
+
def handle(self, task: Task) -> Result:
|
|
19
|
+
...
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(slots=True)
|
|
23
|
+
class RunnerConfig:
|
|
24
|
+
every_seconds: int | None = None
|
|
25
|
+
max_tasks: int | None = None
|
|
26
|
+
parallel: int = 1
|
|
27
|
+
lease_seconds: int = 120
|
|
28
|
+
once: bool = False
|
|
29
|
+
poll_interval_seconds: float = 0.5
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class WorkerRunner:
|
|
33
|
+
def __init__(self, *, queue: QueueBackend, worker: Worker, config: RunnerConfig | None = None):
|
|
34
|
+
self.queue = queue
|
|
35
|
+
self.worker = worker
|
|
36
|
+
self.config = config or RunnerConfig()
|
|
37
|
+
self.owner = f"{socket.gethostname()}:{uuid4().hex[:8]}"
|
|
38
|
+
|
|
39
|
+
def run(self) -> int:
|
|
40
|
+
if self.config.once:
|
|
41
|
+
return self._run_batch()
|
|
42
|
+
|
|
43
|
+
processed = 0
|
|
44
|
+
while True:
|
|
45
|
+
start = time.monotonic()
|
|
46
|
+
processed += self._run_batch()
|
|
47
|
+
if self.config.every_seconds is None:
|
|
48
|
+
time.sleep(self.config.poll_interval_seconds)
|
|
49
|
+
continue
|
|
50
|
+
elapsed = time.monotonic() - start
|
|
51
|
+
sleep_for = max(0.0, self.config.every_seconds - elapsed)
|
|
52
|
+
time.sleep(sleep_for)
|
|
53
|
+
return processed
|
|
54
|
+
|
|
55
|
+
def _run_batch(self) -> int:
|
|
56
|
+
target = self.config.max_tasks or 1
|
|
57
|
+
processed = 0
|
|
58
|
+
|
|
59
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, self.config.parallel)) as pool:
|
|
60
|
+
futures = []
|
|
61
|
+
for _ in range(target):
|
|
62
|
+
leased = self.queue.lease_next_task(owner=self.owner, lease_seconds=self.config.lease_seconds)
|
|
63
|
+
if leased is None:
|
|
64
|
+
break
|
|
65
|
+
task, path = leased
|
|
66
|
+
futures.append(pool.submit(self._process_one, task, path))
|
|
67
|
+
|
|
68
|
+
for future in concurrent.futures.as_completed(futures):
|
|
69
|
+
processed += 1
|
|
70
|
+
future.result()
|
|
71
|
+
|
|
72
|
+
return processed
|
|
73
|
+
|
|
74
|
+
def _process_one(self, task: Task, path):
|
|
75
|
+
started = datetime.now(timezone.utc)
|
|
76
|
+
error = None
|
|
77
|
+
status = "ok"
|
|
78
|
+
try:
|
|
79
|
+
result = self.worker.handle(task)
|
|
80
|
+
if result.task_id != task.task_id:
|
|
81
|
+
result.task_id = task.task_id
|
|
82
|
+
self.queue.write_result(result)
|
|
83
|
+
self.queue.ack_task(path)
|
|
84
|
+
except Exception as exc:
|
|
85
|
+
status = "error"
|
|
86
|
+
error = str(exc)
|
|
87
|
+
self.queue.fail_task(path, task, error)
|
|
88
|
+
self.queue.write_result(
|
|
89
|
+
Result(
|
|
90
|
+
task_id=task.task_id,
|
|
91
|
+
status="error",
|
|
92
|
+
payload={},
|
|
93
|
+
error=error,
|
|
94
|
+
trace_context=task.trace_context,
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
finally:
|
|
98
|
+
finished = datetime.now(timezone.utc)
|
|
99
|
+
duration_ms = int((finished - started).total_seconds() * 1000)
|
|
100
|
+
self.queue.write_run_record(
|
|
101
|
+
RunRecord(
|
|
102
|
+
run_id=uuid4().hex,
|
|
103
|
+
worker_name=self.worker.name,
|
|
104
|
+
task_id=task.task_id,
|
|
105
|
+
started_at=started.isoformat(),
|
|
106
|
+
finished_at=finished.isoformat(),
|
|
107
|
+
duration_ms=duration_ms,
|
|
108
|
+
status=status,
|
|
109
|
+
error=error,
|
|
110
|
+
trace_context=task.trace_context,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TaskScheduler:
|
|
7
|
+
def __init__(self, *, base_delay_seconds: int = 5, max_delay_seconds: int = 3600):
|
|
8
|
+
self.base_delay_seconds = max(1, base_delay_seconds)
|
|
9
|
+
self.max_delay_seconds = max(self.base_delay_seconds, max_delay_seconds)
|
|
10
|
+
|
|
11
|
+
def next_retry_at(self, *, attempt_count: int, now: datetime | None = None) -> datetime:
|
|
12
|
+
now = now or datetime.now(timezone.utc)
|
|
13
|
+
delay = min(self.max_delay_seconds, self.base_delay_seconds * (2 ** max(0, attempt_count - 1)))
|
|
14
|
+
return now + timedelta(seconds=delay)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
UTC = timezone.utc
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def utc_now() -> datetime:
|
|
12
|
+
return datetime.now(tz=UTC)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(slots=True)
|
|
16
|
+
class Task:
|
|
17
|
+
task_id: str
|
|
18
|
+
task_type: str
|
|
19
|
+
payload: dict[str, Any]
|
|
20
|
+
trace_context: dict[str, Any] = field(default_factory=dict)
|
|
21
|
+
attempt_count: int = 0
|
|
22
|
+
max_attempts: int = 3
|
|
23
|
+
created_at: str = field(default_factory=lambda: utc_now().isoformat())
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def from_dict(cls, raw: dict[str, Any]) -> "Task":
|
|
27
|
+
return cls(
|
|
28
|
+
task_id=str(raw["task_id"]),
|
|
29
|
+
task_type=str(raw.get("task_type", "unknown")),
|
|
30
|
+
payload=dict(raw.get("payload", {})),
|
|
31
|
+
trace_context=dict(raw.get("trace_context", {})),
|
|
32
|
+
attempt_count=int(raw.get("attempt_count", 0)),
|
|
33
|
+
max_attempts=int(raw.get("max_attempts", 3)),
|
|
34
|
+
created_at=str(raw.get("created_at", utc_now().isoformat())),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict[str, Any]:
|
|
38
|
+
return {
|
|
39
|
+
"task_id": self.task_id,
|
|
40
|
+
"task_type": self.task_type,
|
|
41
|
+
"payload": self.payload,
|
|
42
|
+
"trace_context": self.trace_context,
|
|
43
|
+
"attempt_count": self.attempt_count,
|
|
44
|
+
"max_attempts": self.max_attempts,
|
|
45
|
+
"created_at": self.created_at,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(slots=True)
|
|
50
|
+
class Result:
|
|
51
|
+
task_id: str
|
|
52
|
+
status: str
|
|
53
|
+
payload: dict[str, Any] = field(default_factory=dict)
|
|
54
|
+
error: str | None = None
|
|
55
|
+
trace_context: dict[str, Any] = field(default_factory=dict)
|
|
56
|
+
produced_at: str = field(default_factory=lambda: utc_now().isoformat())
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_dict(cls, raw: dict[str, Any]) -> "Result":
|
|
60
|
+
return cls(
|
|
61
|
+
task_id=str(raw["task_id"]),
|
|
62
|
+
status=str(raw.get("status", "ok")),
|
|
63
|
+
payload=dict(raw.get("payload", {})),
|
|
64
|
+
error=raw.get("error"),
|
|
65
|
+
trace_context=dict(raw.get("trace_context", {})),
|
|
66
|
+
produced_at=str(raw.get("produced_at", utc_now().isoformat())),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def to_dict(self) -> dict[str, Any]:
|
|
70
|
+
return {
|
|
71
|
+
"task_id": self.task_id,
|
|
72
|
+
"status": self.status,
|
|
73
|
+
"payload": self.payload,
|
|
74
|
+
"error": self.error,
|
|
75
|
+
"trace_context": self.trace_context,
|
|
76
|
+
"produced_at": self.produced_at,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass(slots=True)
|
|
81
|
+
class RunRecord:
|
|
82
|
+
run_id: str
|
|
83
|
+
worker_name: str
|
|
84
|
+
task_id: str
|
|
85
|
+
started_at: str
|
|
86
|
+
finished_at: str
|
|
87
|
+
duration_ms: int
|
|
88
|
+
status: str
|
|
89
|
+
error: str | None
|
|
90
|
+
trace_context: dict[str, Any]
|
|
91
|
+
|
|
92
|
+
def to_dict(self) -> dict[str, Any]:
|
|
93
|
+
return {
|
|
94
|
+
"run_id": self.run_id,
|
|
95
|
+
"worker_name": self.worker_name,
|
|
96
|
+
"task_id": self.task_id,
|
|
97
|
+
"started_at": self.started_at,
|
|
98
|
+
"finished_at": self.finished_at,
|
|
99
|
+
"duration_ms": self.duration_ms,
|
|
100
|
+
"status": self.status,
|
|
101
|
+
"error": self.error,
|
|
102
|
+
"trace_context": self.trace_context,
|
|
103
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: metaspn-ops
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Standard inbox/outbox worker runtime for MetaSPN agent systems
|
|
5
|
+
Author: MetaSPN Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/metaspn/metaspn-ops
|
|
8
|
+
Project-URL: Repository, https://github.com/metaspn/metaspn-ops
|
|
9
|
+
Project-URL: Issues, https://github.com/metaspn/metaspn-ops/issues
|
|
10
|
+
Keywords: metaspn,agents,worker,queue,scheduler
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
24
|
+
Requires-Dist: twine>=5.1.1; extra == "dev"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# metaspn-ops
|
|
28
|
+
|
|
29
|
+
Standard inbox/outbox worker runtime for MetaSPN agent systems.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- Filesystem queue backend with inbox/outbox semantics
|
|
34
|
+
- Task leasing with lock files and lease expiration
|
|
35
|
+
- Retries with exponential backoff and dead-letter queue
|
|
36
|
+
- Worker runner with polling and parallel execution
|
|
37
|
+
- CLI for worker runs and queue operations
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install metaspn-ops
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Quickstart
|
|
46
|
+
|
|
47
|
+
### 1) Define a worker
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# example_worker.py
|
|
51
|
+
from metaspn_ops import Result, Task
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class EnrichWorker:
|
|
55
|
+
name = "enrich"
|
|
56
|
+
|
|
57
|
+
def handle(self, task: Task) -> Result:
|
|
58
|
+
payload = {"seen": task.payload}
|
|
59
|
+
return Result(task_id=task.task_id, status="ok", payload=payload)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### 2) Run a worker once
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
metaspn worker run example_worker:EnrichWorker --workspace . --once --max-tasks 10
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 3) Queue inspection
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
metaspn queue stats enrich --workspace .
|
|
72
|
+
metaspn queue deadletter list enrich --workspace .
|
|
73
|
+
metaspn queue retry enrich --workspace .
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Queue layout
|
|
77
|
+
|
|
78
|
+
```text
|
|
79
|
+
workspace/
|
|
80
|
+
inbox/{worker_name}/
|
|
81
|
+
outbox/{worker_name}/
|
|
82
|
+
runs/{worker_name}/
|
|
83
|
+
deadletter/{worker_name}/
|
|
84
|
+
locks/{worker_name}/
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Development
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
python -m venv .venv
|
|
91
|
+
source .venv/bin/activate
|
|
92
|
+
pip install -e .[dev]
|
|
93
|
+
PYTHONPATH=src python -m unittest discover -s tests -v
|
|
94
|
+
python -m build
|
|
95
|
+
python -m twine check dist/*
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Release
|
|
99
|
+
|
|
100
|
+
- Tag a release in GitHub (for example `v0.1.0`).
|
|
101
|
+
- GitHub Actions builds and publishes to PyPI using trusted publishing.
|
|
102
|
+
- Configure a PyPI Trusted Publisher for this repository before the first release.
|
|
103
|
+
- See `/Users/leoguinan/MetaSPN/metaspn-ops/PUBLISHING.md` for the full flow.
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
MIT
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/metaspn_ops/__init__.py
|
|
5
|
+
src/metaspn_ops/backends.py
|
|
6
|
+
src/metaspn_ops/cli.py
|
|
7
|
+
src/metaspn_ops/fs_queue.py
|
|
8
|
+
src/metaspn_ops/lease.py
|
|
9
|
+
src/metaspn_ops/runner.py
|
|
10
|
+
src/metaspn_ops/scheduler.py
|
|
11
|
+
src/metaspn_ops/types.py
|
|
12
|
+
src/metaspn_ops.egg-info/PKG-INFO
|
|
13
|
+
src/metaspn_ops.egg-info/SOURCES.txt
|
|
14
|
+
src/metaspn_ops.egg-info/dependency_links.txt
|
|
15
|
+
src/metaspn_ops.egg-info/entry_points.txt
|
|
16
|
+
src/metaspn_ops.egg-info/requires.txt
|
|
17
|
+
src/metaspn_ops.egg-info/top_level.txt
|
|
18
|
+
tests/test_ops_queue.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
metaspn_ops
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import tempfile
|
|
5
|
+
import threading
|
|
6
|
+
import time
|
|
7
|
+
import unittest
|
|
8
|
+
from datetime import datetime, timedelta, timezone
|
|
9
|
+
|
|
10
|
+
from metaspn_ops.fs_queue import FilesystemQueue
|
|
11
|
+
from metaspn_ops.types import Result, Task
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FilesystemQueueTests(unittest.TestCase):
|
|
15
|
+
def setUp(self):
|
|
16
|
+
self.tmp = tempfile.TemporaryDirectory()
|
|
17
|
+
self.queue = FilesystemQueue(workspace=self.tmp.name, worker_name="enrich")
|
|
18
|
+
|
|
19
|
+
def tearDown(self):
|
|
20
|
+
self.tmp.cleanup()
|
|
21
|
+
|
|
22
|
+
def test_lock_correctness_under_concurrency(self):
|
|
23
|
+
task = Task(task_id="t1", task_type="enrich", payload={"x": 1})
|
|
24
|
+
self.queue.enqueue_task(task)
|
|
25
|
+
|
|
26
|
+
got = []
|
|
27
|
+
lock = threading.Lock()
|
|
28
|
+
|
|
29
|
+
def attempt():
|
|
30
|
+
leased = self.queue.lease_next_task(owner=threading.current_thread().name, lease_seconds=30)
|
|
31
|
+
with lock:
|
|
32
|
+
got.append(leased is not None)
|
|
33
|
+
|
|
34
|
+
threads = [threading.Thread(target=attempt) for _ in range(8)]
|
|
35
|
+
for t in threads:
|
|
36
|
+
t.start()
|
|
37
|
+
for t in threads:
|
|
38
|
+
t.join()
|
|
39
|
+
|
|
40
|
+
self.assertEqual(sum(1 for x in got if x), 1)
|
|
41
|
+
|
|
42
|
+
def test_lease_expiration_behavior(self):
|
|
43
|
+
task = Task(task_id="t-lease", task_type="enrich", payload={})
|
|
44
|
+
self.queue.enqueue_task(task)
|
|
45
|
+
|
|
46
|
+
first = self.queue.lease_next_task(owner="a", lease_seconds=1)
|
|
47
|
+
self.assertIsNotNone(first)
|
|
48
|
+
|
|
49
|
+
time.sleep(1.2)
|
|
50
|
+
second = self.queue.lease_next_task(owner="b", lease_seconds=10)
|
|
51
|
+
self.assertIsNotNone(second)
|
|
52
|
+
|
|
53
|
+
def test_retry_scheduling(self):
|
|
54
|
+
task = Task(task_id="t-retry", task_type="enrich", payload={}, max_attempts=3)
|
|
55
|
+
self.queue.enqueue_task(task)
|
|
56
|
+
leased = self.queue.lease_next_task(owner="runner", lease_seconds=10)
|
|
57
|
+
self.assertIsNotNone(leased)
|
|
58
|
+
leased_task, leased_path = leased
|
|
59
|
+
|
|
60
|
+
before = datetime.now(timezone.utc)
|
|
61
|
+
self.queue.fail_task(leased_path, leased_task, "boom")
|
|
62
|
+
|
|
63
|
+
inbox_files = sorted(self.queue.inbox_dir.glob("*.json"))
|
|
64
|
+
self.assertEqual(len(inbox_files), 1)
|
|
65
|
+
scheduled = inbox_files[0].name.split("__", 1)[0]
|
|
66
|
+
self.assertGreaterEqual(scheduled, before.strftime("%Y-%m-%dT%H%M%SZ"))
|
|
67
|
+
|
|
68
|
+
raw = json.loads(inbox_files[0].read_text())
|
|
69
|
+
self.assertEqual(raw["attempt_count"], 1)
|
|
70
|
+
|
|
71
|
+
def test_deadletter_handling(self):
|
|
72
|
+
task = Task(task_id="t-dead", task_type="enrich", payload={}, max_attempts=1)
|
|
73
|
+
self.queue.enqueue_task(task)
|
|
74
|
+
leased = self.queue.lease_next_task(owner="runner", lease_seconds=10)
|
|
75
|
+
self.assertIsNotNone(leased)
|
|
76
|
+
leased_task, leased_path = leased
|
|
77
|
+
|
|
78
|
+
self.queue.fail_task(leased_path, leased_task, "permanent")
|
|
79
|
+
|
|
80
|
+
self.assertEqual(len(list(self.queue.inbox_dir.glob("*.json"))), 0)
|
|
81
|
+
dead = list(self.queue.deadletter_dir.glob("*.json"))
|
|
82
|
+
self.assertEqual(len(dead), 1)
|
|
83
|
+
|
|
84
|
+
def test_deterministic_ordering(self):
|
|
85
|
+
base = datetime.now(timezone.utc) - timedelta(minutes=1)
|
|
86
|
+
self.queue.enqueue_task(Task(task_id="c", task_type="x", payload={}), scheduled_for=base + timedelta(seconds=3))
|
|
87
|
+
self.queue.enqueue_task(Task(task_id="a", task_type="x", payload={}), scheduled_for=base + timedelta(seconds=1))
|
|
88
|
+
self.queue.enqueue_task(Task(task_id="b", task_type="x", payload={}), scheduled_for=base + timedelta(seconds=2))
|
|
89
|
+
|
|
90
|
+
leased_ids = []
|
|
91
|
+
for _ in range(3):
|
|
92
|
+
leased = self.queue.lease_next_task(owner="runner", lease_seconds=20)
|
|
93
|
+
self.assertIsNotNone(leased)
|
|
94
|
+
t, p = leased
|
|
95
|
+
leased_ids.append(t.task_id)
|
|
96
|
+
self.queue.ack_task(p)
|
|
97
|
+
|
|
98
|
+
self.assertEqual(leased_ids, sorted(leased_ids))
|
|
99
|
+
|
|
100
|
+
def test_results_roundtrip_json(self):
|
|
101
|
+
path = self.queue.write_result(Result(task_id="t1", status="ok", payload={"x": 1}))
|
|
102
|
+
data = json.loads(path.read_text())
|
|
103
|
+
self.assertEqual(data["task_id"], "t1")
|
|
104
|
+
self.assertEqual(data["payload"]["x"], 1)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
unittest.main()
|