khnm 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khnm-1.0.0/LICENSE +21 -0
- khnm-1.0.0/PKG-INFO +9 -0
- khnm-1.0.0/README.md +163 -0
- khnm-1.0.0/pyproject.toml +37 -0
- khnm-1.0.0/setup.cfg +4 -0
- khnm-1.0.0/src/khnm/__init__.py +5 -0
- khnm-1.0.0/src/khnm/consumers.py +79 -0
- khnm-1.0.0/src/khnm/exceptions.py +10 -0
- khnm-1.0.0/src/khnm/main.py +61 -0
- khnm-1.0.0/src/khnm/pipelines.py +536 -0
- khnm-1.0.0/src/khnm/pipes.py +171 -0
- khnm-1.0.0/src/khnm/producers.py +94 -0
- khnm-1.0.0/src/khnm/serialization.py +22 -0
- khnm-1.0.0/src/khnm/time.py +17 -0
- khnm-1.0.0/src/khnm/types.py +24 -0
- khnm-1.0.0/src/khnm.egg-info/PKG-INFO +9 -0
- khnm-1.0.0/src/khnm.egg-info/SOURCES.txt +24 -0
- khnm-1.0.0/src/khnm.egg-info/dependency_links.txt +1 -0
- khnm-1.0.0/src/khnm.egg-info/entry_points.txt +2 -0
- khnm-1.0.0/src/khnm.egg-info/requires.txt +2 -0
- khnm-1.0.0/src/khnm.egg-info/top_level.txt +1 -0
- khnm-1.0.0/tests/test_consumers.py +43 -0
- khnm-1.0.0/tests/test_pipelines.py +394 -0
- khnm-1.0.0/tests/test_pipes.py +423 -0
- khnm-1.0.0/tests/test_producers.py +34 -0
- khnm-1.0.0/tests/test_serialization.py +58 -0
khnm-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Marcin Sawicki
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
khnm-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: khnm
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Micro-framework for implementing data pipelines over RabbitMQ
|
|
5
|
+
Requires-Python: >=3.14
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Dist: aio-pika>=9.5.8
|
|
8
|
+
Requires-Dist: pydantic>=2.12.5
|
|
9
|
+
Dynamic: license-file
|
khnm-1.0.0/README.md
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# khnm
|
|
2
|
+
|
|
3
|
+
A pipeline framework for building message-processing workflows on top of RabbitMQ. Define pipelines as chains of callbacks connected by queues, and run each stage independently with configurable concurrency.
|
|
4
|
+
|
|
5
|
+
## Requirements
|
|
6
|
+
|
|
7
|
+
- Python >= 3.14
|
|
8
|
+
- RabbitMQ
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install khnm
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Quick start
|
|
17
|
+
|
|
18
|
+
Define your data models and callbacks, then chain them into a pipeline:
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
import khnm
|
|
22
|
+
import pydantic
|
|
23
|
+
from typing import AsyncGenerator, List
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Task(pydantic.BaseModel):
|
|
27
|
+
data: List[int]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Result(pydantic.BaseModel):
|
|
31
|
+
data: int
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def generate() -> AsyncGenerator[Task, None]:
|
|
35
|
+
for i in range(100):
|
|
36
|
+
yield Task(data=[i, i + 1, i + 2])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def split(task: Task) -> List[Result]:
|
|
40
|
+
return [Result(data=n) for n in task.data]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def print_result(result: Result) -> None:
|
|
44
|
+
print(result.data)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
pipeline = (
|
|
48
|
+
khnm.make_pipeline()
|
|
49
|
+
.add("generator", generate, pipe_length=128)
|
|
50
|
+
.add("splitter", split, pipe_length=128, prefetch_count=1)
|
|
51
|
+
.add("printer", print_result, prefetch_count=1)
|
|
52
|
+
.build()
|
|
53
|
+
)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Run each stage as a separate process:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
export RABBITMQ_URL="amqp://guest:guest@localhost/"
|
|
60
|
+
|
|
61
|
+
khnm my_app:pipeline generator
|
|
62
|
+
khnm my_app:pipeline splitter
|
|
63
|
+
khnm my_app:pipeline printer
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Pipeline structure
|
|
67
|
+
|
|
68
|
+
A pipeline is an ordered chain of stages:
|
|
69
|
+
|
|
70
|
+
- **Source** (first stage) -- a sync or async generator that produces messages. Runs until the generator is exhausted, then exits. It does not restart or reconnect; when the source process finishes, no further messages will be produced.
|
|
71
|
+
- **Node** (middle stages) -- a sync or async function that consumes a message and returns one, many, or no output messages
|
|
72
|
+
- **Sink** (last stage) -- a sync or async function that consumes messages without forwarding
|
|
73
|
+
|
|
74
|
+
All messages are [Pydantic](https://docs.pydantic.dev/) `BaseModel` instances, serialized automatically between stages via RabbitMQ queues.
|
|
75
|
+
|
|
76
|
+
## Callbacks
|
|
77
|
+
|
|
78
|
+
Callbacks are automatically detected as sync or async. Both styles work interchangeably:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# Sync
|
|
82
|
+
def process(task: Task) -> Result:
|
|
83
|
+
return Result(data=task.data * 2)
|
|
84
|
+
|
|
85
|
+
# Async
|
|
86
|
+
async def process(task: Task) -> Result:
|
|
87
|
+
await some_io()
|
|
88
|
+
return Result(data=task.data * 2)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
A callback can return:
|
|
92
|
+
- A single `BaseModel` -- published as one message
|
|
93
|
+
- A list of `BaseModel` -- each item published as a separate message
|
|
94
|
+
- `None` -- nothing is published (filtering)
|
|
95
|
+
|
|
96
|
+
## Concurrency
|
|
97
|
+
|
|
98
|
+
### Async tasks
|
|
99
|
+
|
|
100
|
+
Run multiple concurrent asyncio tasks per process with `--tasks`:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
khnm my_app:pipeline splitter --tasks 4
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Each task gets its own consumer loop. This is the primary concurrency lever for async callbacks.
|
|
107
|
+
|
|
108
|
+
### Threads
|
|
109
|
+
|
|
110
|
+
Run sync callbacks on a thread pool with `--threads`:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
khnm my_app:pipeline splitter --threads 4
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
This offloads blocking sync callbacks to a `ThreadPoolExecutor`, preventing them from blocking the event loop. Has no effect on async callbacks.
|
|
117
|
+
|
|
118
|
+
## Per-stage options
|
|
119
|
+
|
|
120
|
+
Options are passed as keyword arguments to `.add()`:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
khnm.make_pipeline()
|
|
124
|
+
.add("source", generate, pipe_length=256, durable=True)
|
|
125
|
+
.add("worker", process,
|
|
126
|
+
pipe_length=128,
|
|
127
|
+
prefetch_count=10,
|
|
128
|
+
backoff_seconds=0.5,
|
|
129
|
+
max_retries=5,
|
|
130
|
+
exponential_backoff=True,
|
|
131
|
+
max_backoff_seconds=30.0,
|
|
132
|
+
apply_jitter=True,
|
|
133
|
+
connection_max_retries=10,
|
|
134
|
+
connection_backoff_seconds=2.0)
|
|
135
|
+
.add("sink", output, prefetch_count=1)
|
|
136
|
+
.build()
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
| Option | Applies to | Description |
|
|
140
|
+
|---|---|--------------------------------------------------------------------------------------------------------------------------------------------|
|
|
141
|
+
| `pipe_length` | Source, Node | Max queue length |
|
|
142
|
+
| `durable` | Source, Node | Persist messages to disk |
|
|
143
|
+
| `prefetch_count` | Node, Sink | RabbitMQ QoS -- max unacknowledged messages per consumer. Important! If you use multiple threads, set this to no lower than threads count! |
|
|
144
|
+
| `backoff_seconds` | Source, Node | Initial backoff for publish retries |
|
|
145
|
+
| `max_retries` | Source, Node | Max publish retry attempts |
|
|
146
|
+
| `exponential_backoff` | Source, Node | Enable exponential backoff |
|
|
147
|
+
| `max_backoff_seconds` | Source, Node | Cap on backoff duration |
|
|
148
|
+
| `apply_jitter` | Source, Node | Add randomness to backoff |
|
|
149
|
+
| `connection_max_retries` | Node, Sink | Max connection retry attempts |
|
|
150
|
+
| `connection_backoff_seconds` | Node, Sink | Backoff between connection retries |
|
|
151
|
+
|
|
152
|
+
## CLI reference:
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
khnm <module:pipeline> <node> [--tasks N] [--threads N]
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
- `module:pipeline` -- import path to the built `Pipeline` object (e.g. `my_app:pipeline`)
|
|
159
|
+
- `node` -- name of the stage to run
|
|
160
|
+
- `--tasks N` -- number of concurrent async tasks (default: 1)
|
|
161
|
+
- `--threads N` -- number of threads for sync callbacks (default: 1)
|
|
162
|
+
|
|
163
|
+
The `RABBITMQ_URL` environment variable must be set.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "khnm"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Micro-framework for implementing data pipelines over RabbitMQ"
|
|
5
|
+
requires-python = ">=3.14"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"aio-pika>=9.5.8",
|
|
8
|
+
"pydantic>=2.12.5",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.scripts]
|
|
12
|
+
khnm = "khnm.main:main"
|
|
13
|
+
|
|
14
|
+
[dependency-groups]
|
|
15
|
+
dev = [
|
|
16
|
+
"httpx>=0.28.1",
|
|
17
|
+
"pika>=1.3.2",
|
|
18
|
+
"pre-commit>=4.5.1",
|
|
19
|
+
"pytest>=9.0.2",
|
|
20
|
+
"pytest-asyncio>=1.3.0",
|
|
21
|
+
"ruff>=0.14.14",
|
|
22
|
+
"testcontainers>=4.14.0",
|
|
23
|
+
"ty>=0.0.13",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[tool.pytest.ini_options]
|
|
27
|
+
asyncio_mode = "auto"
|
|
28
|
+
asyncio_default_fixture_loop_scope = "session"
|
|
29
|
+
asyncio_default_test_loop_scope = "session"
|
|
30
|
+
filterwarnings = [
|
|
31
|
+
"ignore::DeprecationWarning:testcontainers.*",
|
|
32
|
+
]
|
|
33
|
+
pythonpath = ["src"]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
[tool.hatch.build.targets.wheel]
|
|
37
|
+
packages = ["src/khnm"]
|
khnm-1.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from collections.abc import AsyncGenerator
|
|
2
|
+
from contextlib import asynccontextmanager, AbstractAsyncContextManager
|
|
3
|
+
from typing import Optional, AsyncIterator
|
|
4
|
+
|
|
5
|
+
from aio_pika.abc import (
|
|
6
|
+
AbstractRobustConnection,
|
|
7
|
+
AbstractChannel,
|
|
8
|
+
AbstractIncomingMessage,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from khnm.exceptions import UpstreamPipeUnavailable
|
|
12
|
+
from khnm.pipes import get_queue, wait_for_pipe, get_queue_name
|
|
13
|
+
from khnm.time import Clock, UtcClock
|
|
14
|
+
from khnm.types import QueueGetterT
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def consume(
|
|
18
|
+
amqp_connection: AbstractRobustConnection,
|
|
19
|
+
upstream_pipe: str,
|
|
20
|
+
upstream_connection_max_retries: Optional[int] = None,
|
|
21
|
+
upstream_connection_backoff_seconds: float = 1.0,
|
|
22
|
+
upstream_queue_getter: QueueGetterT = get_queue,
|
|
23
|
+
prefetch_count: Optional[int] = None,
|
|
24
|
+
clock: Clock = UtcClock(),
|
|
25
|
+
) -> AsyncGenerator[AbstractAsyncContextManager[AbstractIncomingMessage], None]:
|
|
26
|
+
channel = await _connect(
|
|
27
|
+
connection=amqp_connection,
|
|
28
|
+
upstream_pipe=upstream_pipe,
|
|
29
|
+
upstream_connection_backoff_seconds=upstream_connection_backoff_seconds,
|
|
30
|
+
upstream_connection_max_retries=upstream_connection_max_retries,
|
|
31
|
+
prefetch_count=prefetch_count,
|
|
32
|
+
queue_getter=upstream_queue_getter,
|
|
33
|
+
clock=clock,
|
|
34
|
+
)
|
|
35
|
+
try:
|
|
36
|
+
queue = await upstream_queue_getter(channel, get_queue_name(upstream_pipe))
|
|
37
|
+
async with queue.iterator() as queue_iter:
|
|
38
|
+
async for message in queue_iter:
|
|
39
|
+
yield _handle_message(message)
|
|
40
|
+
finally:
|
|
41
|
+
await channel.close()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def _connect(
|
|
45
|
+
connection: AbstractRobustConnection,
|
|
46
|
+
upstream_pipe: str,
|
|
47
|
+
upstream_connection_backoff_seconds: float = 1.0,
|
|
48
|
+
upstream_connection_max_retries: Optional[int] = None,
|
|
49
|
+
prefetch_count: Optional[int] = None,
|
|
50
|
+
queue_getter: QueueGetterT = get_queue,
|
|
51
|
+
clock: Clock = UtcClock(),
|
|
52
|
+
) -> AbstractChannel:
|
|
53
|
+
channel = await connection.channel()
|
|
54
|
+
if prefetch_count is not None:
|
|
55
|
+
await channel.set_qos(prefetch_count=prefetch_count)
|
|
56
|
+
success = await wait_for_pipe(
|
|
57
|
+
channel,
|
|
58
|
+
upstream_pipe,
|
|
59
|
+
upstream_connection_backoff_seconds,
|
|
60
|
+
upstream_connection_max_retries,
|
|
61
|
+
clock,
|
|
62
|
+
queue_getter,
|
|
63
|
+
)
|
|
64
|
+
if not success:
|
|
65
|
+
raise UpstreamPipeUnavailable(f"Upstream unavailable: {upstream_pipe}")
|
|
66
|
+
return channel
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@asynccontextmanager
|
|
70
|
+
async def _handle_message(
|
|
71
|
+
message: AbstractIncomingMessage,
|
|
72
|
+
) -> AsyncIterator[AbstractIncomingMessage]:
|
|
73
|
+
try:
|
|
74
|
+
yield message
|
|
75
|
+
except Exception:
|
|
76
|
+
await message.nack(requeue=True)
|
|
77
|
+
raise
|
|
78
|
+
else:
|
|
79
|
+
await message.ack()
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
import importlib
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from aio_pika import connect_robust
|
|
7
|
+
|
|
8
|
+
from khnm.pipelines import Pipeline
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_pipeline_object(import_string: str) -> Pipeline:
|
|
12
|
+
module_name, attribute_name = import_string.split(":", 1)
|
|
13
|
+
module = importlib.import_module(module_name)
|
|
14
|
+
pipeline = getattr(module, attribute_name)
|
|
15
|
+
return pipeline
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def run_node(
|
|
19
|
+
pipeline: Pipeline,
|
|
20
|
+
node_name: str,
|
|
21
|
+
connection_string: str,
|
|
22
|
+
tasks: int = 1,
|
|
23
|
+
threads: int = 1,
|
|
24
|
+
) -> None:
|
|
25
|
+
connection = await connect_robust(connection_string)
|
|
26
|
+
await asyncio.gather(
|
|
27
|
+
*[pipeline.run(connection, node_name, threads) for _ in range(tasks)]
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def main() -> None:
|
|
32
|
+
parser = argparse.ArgumentParser()
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"pipeline",
|
|
35
|
+
help="Where to look for pipeline object, in format 'module:attribute' (e.g. 'main:pipeline')",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument("node", help="Node name to run")
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--tasks",
|
|
40
|
+
type=int,
|
|
41
|
+
default=1,
|
|
42
|
+
help="Number of concurrent async tasks to run",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--threads",
|
|
46
|
+
type=int,
|
|
47
|
+
default=1,
|
|
48
|
+
help="Number of concurrent threads to run (sync callbacks only)",
|
|
49
|
+
)
|
|
50
|
+
args = parser.parse_args()
|
|
51
|
+
|
|
52
|
+
conn_string = os.getenv("RABBITMQ_URL")
|
|
53
|
+
if not conn_string:
|
|
54
|
+
raise SystemExit("RABBITMQ_URL not set")
|
|
55
|
+
if args.tasks < 1:
|
|
56
|
+
raise SystemExit("Number of tasks must be greater than 0")
|
|
57
|
+
if args.threads < 1:
|
|
58
|
+
raise SystemExit("Number of threads must be greater than 0")
|
|
59
|
+
|
|
60
|
+
pipeline = load_pipeline_object(args.pipeline)
|
|
61
|
+
asyncio.run(run_node(pipeline, args.node, conn_string, args.tasks, args.threads))
|