khnm 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
khnm-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Marcin Sawicki
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
khnm-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: khnm
3
+ Version: 1.0.0
4
+ Summary: Micro-framework for implementing data pipelines over RabbitMQ
5
+ Requires-Python: >=3.14
6
+ License-File: LICENSE
7
+ Requires-Dist: aio-pika>=9.5.8
8
+ Requires-Dist: pydantic>=2.12.5
9
+ Dynamic: license-file
khnm-1.0.0/README.md ADDED
@@ -0,0 +1,163 @@
1
+ # khnm
2
+
3
+ A pipeline framework for building message-processing workflows on top of RabbitMQ. Define pipelines as chains of callbacks connected by queues, and run each stage independently with configurable concurrency.
4
+
5
+ ## Requirements
6
+
7
+ - Python >= 3.14
8
+ - RabbitMQ
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ pip install khnm
14
+ ```
15
+
16
+ ## Quick start
17
+
18
+ Define your data models and callbacks, then chain them into a pipeline:
19
+
20
+ ```python
21
+ import khnm
22
+ import pydantic
23
+ from typing import AsyncGenerator, List
24
+
25
+
26
+ class Task(pydantic.BaseModel):
27
+ data: List[int]
28
+
29
+
30
+ class Result(pydantic.BaseModel):
31
+ data: int
32
+
33
+
34
+ async def generate() -> AsyncGenerator[Task, None]:
35
+ for i in range(100):
36
+ yield Task(data=[i, i + 1, i + 2])
37
+
38
+
39
+ def split(task: Task) -> List[Result]:
40
+ return [Result(data=n) for n in task.data]
41
+
42
+
43
+ def print_result(result: Result) -> None:
44
+ print(result.data)
45
+
46
+
47
+ pipeline = (
48
+ khnm.make_pipeline()
49
+ .add("generator", generate, pipe_length=128)
50
+ .add("splitter", split, pipe_length=128, prefetch_count=1)
51
+ .add("printer", print_result, prefetch_count=1)
52
+ .build()
53
+ )
54
+ ```
55
+
56
+ Run each stage as a separate process:
57
+
58
+ ```bash
59
+ export RABBITMQ_URL="amqp://guest:guest@localhost/"
60
+
61
+ khnm my_app:pipeline generator
62
+ khnm my_app:pipeline splitter
63
+ khnm my_app:pipeline printer
64
+ ```
65
+
66
+ ## Pipeline structure
67
+
68
+ A pipeline is an ordered chain of stages:
69
+
70
+ - **Source** (first stage) -- a sync or async generator that produces messages. Runs until the generator is exhausted, then exits. It does not restart or reconnect; when the source process finishes, no further messages will be produced.
71
+ - **Node** (middle stages) -- a sync or async function that consumes a message and returns one, many, or no output messages
72
+ - **Sink** (last stage) -- a sync or async function that consumes messages without forwarding
73
+
74
+ All messages are [Pydantic](https://docs.pydantic.dev/) `BaseModel` instances, serialized automatically between stages via RabbitMQ queues.
75
+
76
+ ## Callbacks
77
+
78
+ Callbacks are automatically detected as sync or async. Both styles work interchangeably:
79
+
80
+ ```python
81
+ # Sync
82
+ def process(task: Task) -> Result:
83
+ return Result(data=task.data * 2)
84
+
85
+ # Async
86
+ async def process(task: Task) -> Result:
87
+ await some_io()
88
+ return Result(data=task.data * 2)
89
+ ```
90
+
91
+ A callback can return:
92
+ - A single `BaseModel` -- published as one message
93
+ - A list of `BaseModel` -- each item published as a separate message
94
+ - `None` -- nothing is published (filtering)
95
+
96
+ ## Concurrency
97
+
98
+ ### Async tasks
99
+
100
+ Run multiple concurrent asyncio tasks per process with `--tasks`:
101
+
102
+ ```bash
103
+ khnm my_app:pipeline splitter --tasks 4
104
+ ```
105
+
106
+ Each task gets its own consumer loop. This is the primary concurrency lever for async callbacks.
107
+
108
+ ### Threads
109
+
110
+ Run sync callbacks on a thread pool with `--threads`:
111
+
112
+ ```bash
113
+ khnm my_app:pipeline splitter --threads 4
114
+ ```
115
+
116
+ This offloads blocking sync callbacks to a `ThreadPoolExecutor`, preventing them from blocking the event loop. Has no effect on async callbacks.
117
+
118
+ ## Per-stage options
119
+
120
+ Options are passed as keyword arguments to `.add()`:
121
+
122
+ ```python
123
+ khnm.make_pipeline()
124
+ .add("source", generate, pipe_length=256, durable=True)
125
+ .add("worker", process,
126
+ pipe_length=128,
127
+ prefetch_count=10,
128
+ backoff_seconds=0.5,
129
+ max_retries=5,
130
+ exponential_backoff=True,
131
+ max_backoff_seconds=30.0,
132
+ apply_jitter=True,
133
+ connection_max_retries=10,
134
+ connection_backoff_seconds=2.0)
135
+ .add("sink", output, prefetch_count=1)
136
+ .build()
137
+ ```
138
+
139
+ | Option | Applies to | Description |
140
+ |---|---|--------------------------------------------------------------------------------------------------------------------------------------------|
141
+ | `pipe_length` | Source, Node | Max queue length |
142
+ | `durable` | Source, Node | Persist messages to disk |
143
+ | `prefetch_count` | Node, Sink | RabbitMQ QoS -- max unacknowledged messages per consumer. Important! If you use multiple threads, set this to no lower than threads count! |
144
+ | `backoff_seconds` | Source, Node | Initial backoff for publish retries |
145
+ | `max_retries` | Source, Node | Max publish retry attempts |
146
+ | `exponential_backoff` | Source, Node | Enable exponential backoff |
147
+ | `max_backoff_seconds` | Source, Node | Cap on backoff duration |
148
+ | `apply_jitter` | Source, Node | Add randomness to backoff |
149
+ | `connection_max_retries` | Node, Sink | Max connection retry attempts |
150
+ | `connection_backoff_seconds` | Node, Sink | Backoff between connection retries |
151
+
152
+ ## CLI reference:
153
+
154
+ ```
155
+ khnm <module:pipeline> <node> [--tasks N] [--threads N]
156
+ ```
157
+
158
+ - `module:pipeline` -- import path to the built `Pipeline` object (e.g. `my_app:pipeline`)
159
+ - `node` -- name of the stage to run
160
+ - `--tasks N` -- number of concurrent async tasks (default: 1)
161
+ - `--threads N` -- number of threads for sync callbacks (default: 1)
162
+
163
+ The `RABBITMQ_URL` environment variable must be set.
@@ -0,0 +1,37 @@
1
+ [project]
2
+ name = "khnm"
3
+ version = "1.0.0"
4
+ description = "Micro-framework for implementing data pipelines over RabbitMQ"
5
+ requires-python = ">=3.14"
6
+ dependencies = [
7
+ "aio-pika>=9.5.8",
8
+ "pydantic>=2.12.5",
9
+ ]
10
+
11
+ [project.scripts]
12
+ khnm = "khnm.main:main"
13
+
14
+ [dependency-groups]
15
+ dev = [
16
+ "httpx>=0.28.1",
17
+ "pika>=1.3.2",
18
+ "pre-commit>=4.5.1",
19
+ "pytest>=9.0.2",
20
+ "pytest-asyncio>=1.3.0",
21
+ "ruff>=0.14.14",
22
+ "testcontainers>=4.14.0",
23
+ "ty>=0.0.13",
24
+ ]
25
+
26
+ [tool.pytest.ini_options]
27
+ asyncio_mode = "auto"
28
+ asyncio_default_fixture_loop_scope = "session"
29
+ asyncio_default_test_loop_scope = "session"
30
+ filterwarnings = [
31
+ "ignore::DeprecationWarning:testcontainers.*",
32
+ ]
33
+ pythonpath = ["src"]
34
+
35
+
36
+ [tool.hatch.build.targets.wheel]
37
+ packages = ["src/khnm"]
khnm-1.0.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ from khnm.pipelines import make_pipeline
2
+
3
+ __all__ = [
4
+ "make_pipeline",
5
+ ]
@@ -0,0 +1,79 @@
1
+ from collections.abc import AsyncGenerator
2
+ from contextlib import asynccontextmanager, AbstractAsyncContextManager
3
+ from typing import Optional, AsyncIterator
4
+
5
+ from aio_pika.abc import (
6
+ AbstractRobustConnection,
7
+ AbstractChannel,
8
+ AbstractIncomingMessage,
9
+ )
10
+
11
+ from khnm.exceptions import UpstreamPipeUnavailable
12
+ from khnm.pipes import get_queue, wait_for_pipe, get_queue_name
13
+ from khnm.time import Clock, UtcClock
14
+ from khnm.types import QueueGetterT
15
+
16
+
17
+ async def consume(
18
+ amqp_connection: AbstractRobustConnection,
19
+ upstream_pipe: str,
20
+ upstream_connection_max_retries: Optional[int] = None,
21
+ upstream_connection_backoff_seconds: float = 1.0,
22
+ upstream_queue_getter: QueueGetterT = get_queue,
23
+ prefetch_count: Optional[int] = None,
24
+ clock: Clock = UtcClock(),
25
+ ) -> AsyncGenerator[AbstractAsyncContextManager[AbstractIncomingMessage], None]:
26
+ channel = await _connect(
27
+ connection=amqp_connection,
28
+ upstream_pipe=upstream_pipe,
29
+ upstream_connection_backoff_seconds=upstream_connection_backoff_seconds,
30
+ upstream_connection_max_retries=upstream_connection_max_retries,
31
+ prefetch_count=prefetch_count,
32
+ queue_getter=upstream_queue_getter,
33
+ clock=clock,
34
+ )
35
+ try:
36
+ queue = await upstream_queue_getter(channel, get_queue_name(upstream_pipe))
37
+ async with queue.iterator() as queue_iter:
38
+ async for message in queue_iter:
39
+ yield _handle_message(message)
40
+ finally:
41
+ await channel.close()
42
+
43
+
44
+ async def _connect(
45
+ connection: AbstractRobustConnection,
46
+ upstream_pipe: str,
47
+ upstream_connection_backoff_seconds: float = 1.0,
48
+ upstream_connection_max_retries: Optional[int] = None,
49
+ prefetch_count: Optional[int] = None,
50
+ queue_getter: QueueGetterT = get_queue,
51
+ clock: Clock = UtcClock(),
52
+ ) -> AbstractChannel:
53
+ channel = await connection.channel()
54
+ if prefetch_count is not None:
55
+ await channel.set_qos(prefetch_count=prefetch_count)
56
+ success = await wait_for_pipe(
57
+ channel,
58
+ upstream_pipe,
59
+ upstream_connection_backoff_seconds,
60
+ upstream_connection_max_retries,
61
+ clock,
62
+ queue_getter,
63
+ )
64
+ if not success:
65
+ raise UpstreamPipeUnavailable(f"Upstream unavailable: {upstream_pipe}")
66
+ return channel
67
+
68
+
69
+ @asynccontextmanager
70
+ async def _handle_message(
71
+ message: AbstractIncomingMessage,
72
+ ) -> AsyncIterator[AbstractIncomingMessage]:
73
+ try:
74
+ yield message
75
+ except Exception:
76
+ await message.nack(requeue=True)
77
+ raise
78
+ else:
79
+ await message.ack()
@@ -0,0 +1,10 @@
1
+ class UpstreamPipeUnavailable(Exception):
2
+ pass
3
+
4
+
5
+ class NodeKwargsInvalid(Exception):
6
+ pass
7
+
8
+
9
+ class PipelineDefinitionInvalid(Exception):
10
+ pass
@@ -0,0 +1,61 @@
1
+ import argparse
2
+ import asyncio
3
+ import importlib
4
+ import os
5
+
6
+ from aio_pika import connect_robust
7
+
8
+ from khnm.pipelines import Pipeline
9
+
10
+
11
+ def load_pipeline_object(import_string: str) -> Pipeline:
12
+ module_name, attribute_name = import_string.split(":", 1)
13
+ module = importlib.import_module(module_name)
14
+ pipeline = getattr(module, attribute_name)
15
+ return pipeline
16
+
17
+
18
+ async def run_node(
19
+ pipeline: Pipeline,
20
+ node_name: str,
21
+ connection_string: str,
22
+ tasks: int = 1,
23
+ threads: int = 1,
24
+ ) -> None:
25
+ connection = await connect_robust(connection_string)
26
+ await asyncio.gather(
27
+ *[pipeline.run(connection, node_name, threads) for _ in range(tasks)]
28
+ )
29
+
30
+
31
+ def main() -> None:
32
+ parser = argparse.ArgumentParser()
33
+ parser.add_argument(
34
+ "pipeline",
35
+ help="Where to look for pipeline object, in format 'module:attribute' (e.g. 'main:pipeline')",
36
+ )
37
+ parser.add_argument("node", help="Node name to run")
38
+ parser.add_argument(
39
+ "--tasks",
40
+ type=int,
41
+ default=1,
42
+ help="Number of concurrent async tasks to run",
43
+ )
44
+ parser.add_argument(
45
+ "--threads",
46
+ type=int,
47
+ default=1,
48
+ help="Number of concurrent threads to run (sync callbacks only)",
49
+ )
50
+ args = parser.parse_args()
51
+
52
+ conn_string = os.getenv("RABBITMQ_URL")
53
+ if not conn_string:
54
+ raise SystemExit("RABBITMQ_URL not set")
55
+ if args.tasks < 1:
56
+ raise SystemExit("Number of tasks must be greater than 0")
57
+ if args.threads < 1:
58
+ raise SystemExit("Number of threads must be greater than 0")
59
+
60
+ pipeline = load_pipeline_object(args.pipeline)
61
+ asyncio.run(run_node(pipeline, args.node, conn_string, args.tasks, args.threads))