PyPI - senpuki - Versions diffs - 0.1.0__tar.gz - Mend

senpuki 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

senpuki-0.1.0/PKG-INFO +258 -0
senpuki-0.1.0/README.md +247 -0
senpuki-0.1.0/examples/batch_processing.py +114 -0
senpuki-0.1.0/examples/complex_workflow.py +151 -0
senpuki-0.1.0/examples/failing_flow.py +49 -0
senpuki-0.1.0/examples/media_pipeline.py +312 -0
senpuki-0.1.0/examples/parallel_scraper.py +141 -0
senpuki-0.1.0/examples/saga_trip_booking.py +131 -0
senpuki-0.1.0/examples/simple_flow.py +65 -0
senpuki-0.1.0/pyproject.toml +21 -0
senpuki-0.1.0/senpuki/__init__.py +5 -0
senpuki-0.1.0/senpuki/backend/__init__.py +0 -0
senpuki-0.1.0/senpuki/backend/base.py +55 -0
senpuki-0.1.0/senpuki/backend/postgres.py +476 -0
senpuki-0.1.0/senpuki/backend/sqlite.py +511 -0
senpuki-0.1.0/senpuki/core.py +135 -0
senpuki-0.1.0/senpuki/executor.py +1047 -0
senpuki-0.1.0/senpuki/notifications/__init__.py +0 -0
senpuki-0.1.0/senpuki/notifications/base.py +19 -0
senpuki-0.1.0/senpuki/notifications/redis.py +93 -0
senpuki-0.1.0/senpuki/registry.py +33 -0
senpuki-0.1.0/senpuki/utils/__init__.py +0 -0
senpuki-0.1.0/senpuki/utils/async_sqlite.py +91 -0
senpuki-0.1.0/senpuki/utils/idempotency.py +35 -0
senpuki-0.1.0/senpuki/utils/serialization.py +75 -0
senpuki-0.1.0/senpuki/utils/time.py +38 -0
senpuki-0.1.0/senpuki.egg-info/PKG-INFO +258 -0
senpuki-0.1.0/senpuki.egg-info/SOURCES.txt +43 -0
senpuki-0.1.0/senpuki.egg-info/dependency_links.txt +1 -0
senpuki-0.1.0/senpuki.egg-info/requires.txt +4 -0
senpuki-0.1.0/senpuki.egg-info/top_level.txt +4 -0
senpuki-0.1.0/setup.cfg +4 -0
senpuki-0.1.0/tests/__init__.py +0 -0
senpuki-0.1.0/tests/test_core.py +53 -0
senpuki-0.1.0/tests/test_deadlock.py +49 -0
senpuki-0.1.0/tests/test_execution.py +266 -0
senpuki-0.1.0/tests/test_helpers.py +68 -0
senpuki-0.1.0/tests/test_map.py +80 -0
senpuki-0.1.0/tests/test_max_duration.py +56 -0
senpuki-0.1.0/tests/test_parallel.py +79 -0
senpuki-0.1.0/tests/test_rate_limit.py +110 -0
senpuki-0.1.0/tests/test_scenarios.py +110 -0
senpuki-0.1.0/tests/test_scheduling.py +98 -0
senpuki-0.1.0/tests/test_wait_for.py +72 -0
senpuki-0.1.0/tests/utils.py +30 -0

senpuki-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,258 @@
+Metadata-Version: 2.4
+Name: senpuki
+Version: 0.1.0
+Summary: Distributed Durable Functions in Python
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+Requires-Dist: aiosqlite>=0.21.0
+Requires-Dist: asyncpg>=0.31.0
+Requires-Dist: pyrefly>=0.43.1
+Requires-Dist: redis>=7.1.0
+# Senpuki: Distributed Durable Functions for Python
+Senpuki is a lightweight, asynchronous, distributed task orchestration library for Python. It allows you to write stateful, reliable workflows ("durable functions") using standard Python async/await syntax. Senpuki handles the complexity of persisting state, retrying failures, and distributing work across a pool of workers.
+## Table of Contents
+- [Core Concepts](#core-concepts)
+- [Installation](#installation)
+- [Quick Start](#quick-start)
+- [Features Guide](#features-guide)
+    - [Defining Durable Functions](#defining-durable-functions)
+    - [Orchestration & Activities](#orchestration--activities)
+    - [Retries & Error Handling](#retries--error-handling)
+    - [Idempotency & Caching](#idempotency--caching)
+    - [Parallel Execution (Fan-out/Fan-in)](#parallel-execution-fan-outfan-in)
+    - [Timeouts & Expirys](#timeouts--expirys)
+- [Architecture & Backends](#architecture--backends)
+- [Running Workers](#running-workers)
+- [Examples](#examples)
+---
+## Core Concepts
+*   **Durable Functions**: Python async functions decorated with `@Senpuki.durable()`. They can be orchestrators (calling other functions) or activities (doing work).
+*   **Orchestrator**: A durable function that schedules other durable functions. It sleeps while waiting for sub-tasks to complete, freeing up worker resources.
+*   **Activity**: A leaf-node durable function that performs a specific action (e.g., API call, DB operation).
+*   **Execution**: A single run of a workflow. It has a unique ID and persistent state.
+*   **Worker**: A process that polls the backend storage for pending tasks and executes them.
+---
+## Installation
+```bash
+pip install senpuki
+```
+**Requirements:**
+*   Python 3.12+
+*   `aiosqlite` (optional, for SQLite backend async support)
+*   `asyncpg` (optional, for PostgreSQL backend async support)
+*   `redis` (optional, for Redis notification support)
+---
+## Quick Start
+1.  **Define your workflow**:
+    ```python
+    import asyncio
+    from senpuki import Senpuki, Result
+    # 1. Define an activity
+    @Senpuki.durable()
+    async def greet(name: str) -> str:
+        await asyncio.sleep(0.1) # Simulate work
+        return f"Hello, {name}!"
+    # 2. Define an orchestrator
+    @Senpuki.durable()
+    async def workflow(names: list[str]) -> Result[list[str], Exception]:
+        results = []
+        for name in names:
+            # Call activity (awaiting it schedules it and waits for result)
+            res = await greet(name)
+            results.append(res)
+        return Result.Ok(results)
+    ```
+2.  **Run the system**:
+    ```python
+    async def main():
+        # Setup Backend
+        backend = Senpuki.backends.SQLiteBackend("senpuki.sqlite")
+        await backend.init_db()
+        executor = Senpuki(backend=backend)
+        # Start a Worker (in background)
+        worker = asyncio.create_task(executor.serve())
+        # Dispatch Workflow
+        exec_id = await executor.dispatch(workflow, ["Alice", "Bob"])
+        print(f"Started execution: {exec_id}")
+        # Wait for Result
+        while True:
+            state = await executor.state_of(exec_id)
+            if state.state in ("completed", "failed"):
+                break
+            await asyncio.sleep(0.5)
+        result = await executor.result_of(exec_id)
+        print(result.value) # ['Hello, Alice!', 'Hello, Bob!']
+    if __name__ == "__main__":
+        asyncio.run(main())
+    ```
+---
+## Features Guide
+### Defining Durable Functions
+Use the `@Senpuki.durable` decorator. You can configure retry policies, caching, and queues here.
+```python
+from senpuki import Senpuki, RetryPolicy
+@Senpuki.durable(
+    retry_policy=RetryPolicy(max_attempts=3, initial_delay=1.0),
+    queue="high_priority",
+    tags=["billing"]
+)
+async def charge_card(amount: int):
+    ...
+```
+### Orchestration & Activities
+When a durable function calls another durable function (e.g., `await other_func()`), Senpuki intercepts this call.
+*   It persists a **Task** record for the child function.
+*   The parent function "sleeps" (suspends) until the child task is completed by a worker.
+*   This allows workflows to run over days or weeks without consuming memory while waiting.
+### Retries & Error Handling
+Failures happen. Senpuki allows declarative retry policies.
+```python
+policy = RetryPolicy(
+    max_attempts=5,
+    backoff_factor=2.0, # Exponential backoff
+    jitter=0.1,         # Add randomness to prevent thundering herd
+    retry_for=(ConnectionError, ExpiryError) # Only retry these exceptions
+)
+@Senpuki.durable(retry_policy=policy)
+async def unstable_api_call():
+    ...
+```
+If the function fails after all retries, the Execution is marked as `failed`, and the error is propagated to the parent orchestrator (if any), which can catch it using standard `try/except`.
+### Idempotency & Caching
+To prevent duplicate side-effects (like charging a card twice) or re-doing expensive work:
+1.  **Idempotency**: Results are stored permanently. If a task is scheduled again with the same arguments (and version), the stored result is returned immediately without running the function.
+2.  **Caching**: Similar to idempotency but implies the result can be reused across different executions if the key matches.
+```python
+@Senpuki.durable(idempotent=True)
+async def send_email(user_id: str, subject: str):
+    # Safe to call multiple times; will only execute once per unique arguments
+    ...
+@Senpuki.durable(cached=True, version="v1")
+async def heavy_compute(data_hash: str):
+    # Result stored in cache table; subsequent calls return immediately
+    ...
+```
+### Parallel Execution (Fan-out/Fan-in)
+Use standard `asyncio.gather` to run tasks in parallel. Senpuki schedules them all, and the worker pool executes them concurrently.
+```python
+@Senpuki.durable()
+async def batch_processor(items: list[int]):
+    tasks = []
+    for item in items:
+        # Schedule all tasks
+        tasks.append(process_item(item))
+    # Wait for all to complete
+    results = await asyncio.gather(*tasks)
+    return sum(results)
+```
+### Timeouts & Expirys
+You can set a expiry for the entire execution. If it exceeds this duration, it is cancelled.
+```python
+exec_id = await executor.dispatch(long_workflow, expiry="1h 30m")
+```
+---
+## Architecture & Backends
+Senpuki is backend-agnostic.
+### SQLite Backend
+Included by default. Stores state in a local SQLite file.
+*   **Best for**: Development, testing, single-node deployments, embedded workflows.
+*   **Features**: Full persistence, async support.
+### Postgres Backend
+*   **Best for**: Production environments, concurrent access, high reliability.
+*   **Features**: Uses `asyncpg` for high performance.
+### Mongo Backend (Planned)
+*   **Best for**: Distributed production clusters, high availability.
+### Redis (Notifications)
+Optional. Uses Redis Pub/Sub to notify orchestrators immediately when a task finishes, reducing polling latency.
+---
+## Running Workers
+The `executor.serve()` method runs the worker loop. In production, you typically run this in a separate process or container.
+```python
+# worker.py
+async def run_worker():
+    backend = Senpuki.backends.SQLiteBackend("prod.db")
+    executor = Senpuki(backend=backend)
+    # Consume only specific queues
+    await executor.serve(
+        queues=["default", "high_priority"],
+        max_concurrency=50
+    )
+```
+You can scale horizontally by running multiple worker instances pointing to the same database.
+---
+## Examples
+See the `examples/` folder for complete code:
+1.  **`simple_flow.py`**: Basic parent-child function calls.
+2.  **`failing_flow.py`**: Demonstrates automatic retries and Dead Letter Queue (DLQ) behavior.
+3.  **`complex_workflow.py`**: A data pipeline showcasing caching, retries, and expirys.
+*   `batch_processing.py`: Fan-out/fan-in pattern (processing multiple items in parallel).
+*   `saga_trip_booking.py`: Saga pattern with compensation (rollback) logic.
+*   `media_pipeline.py`: A complex 5-minute simulation of a media processing pipeline (Validation -> Safety -> Transcode/AI -> Package) with a live progress dashboard.
+## Requirements

senpuki-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,247 @@
+# Senpuki: Distributed Durable Functions for Python
+Senpuki is a lightweight, asynchronous, distributed task orchestration library for Python. It allows you to write stateful, reliable workflows ("durable functions") using standard Python async/await syntax. Senpuki handles the complexity of persisting state, retrying failures, and distributing work across a pool of workers.
+## Table of Contents
+- [Core Concepts](#core-concepts)
+- [Installation](#installation)
+- [Quick Start](#quick-start)
+- [Features Guide](#features-guide)
+    - [Defining Durable Functions](#defining-durable-functions)
+    - [Orchestration & Activities](#orchestration--activities)
+    - [Retries & Error Handling](#retries--error-handling)
+    - [Idempotency & Caching](#idempotency--caching)
+    - [Parallel Execution (Fan-out/Fan-in)](#parallel-execution-fan-outfan-in)
+    - [Timeouts & Expirys](#timeouts--expirys)
+- [Architecture & Backends](#architecture--backends)
+- [Running Workers](#running-workers)
+- [Examples](#examples)
+---
+## Core Concepts
+*   **Durable Functions**: Python async functions decorated with `@Senpuki.durable()`. They can be orchestrators (calling other functions) or activities (doing work).
+*   **Orchestrator**: A durable function that schedules other durable functions. It sleeps while waiting for sub-tasks to complete, freeing up worker resources.
+*   **Activity**: A leaf-node durable function that performs a specific action (e.g., API call, DB operation).
+*   **Execution**: A single run of a workflow. It has a unique ID and persistent state.
+*   **Worker**: A process that polls the backend storage for pending tasks and executes them.
+---
+## Installation
+```bash
+pip install senpuki
+```
+**Requirements:**
+*   Python 3.12+
+*   `aiosqlite` (optional, for SQLite backend async support)
+*   `asyncpg` (optional, for PostgreSQL backend async support)
+*   `redis` (optional, for Redis notification support)
+---
+## Quick Start
+1.  **Define your workflow**:
+    ```python
+    import asyncio
+    from senpuki import Senpuki, Result
+    # 1. Define an activity
+    @Senpuki.durable()
+    async def greet(name: str) -> str:
+        await asyncio.sleep(0.1) # Simulate work
+        return f"Hello, {name}!"
+    # 2. Define an orchestrator
+    @Senpuki.durable()
+    async def workflow(names: list[str]) -> Result[list[str], Exception]:
+        results = []
+        for name in names:
+            # Call activity (awaiting it schedules it and waits for result)
+            res = await greet(name)
+            results.append(res)
+        return Result.Ok(results)
+    ```
+2.  **Run the system**:
+    ```python
+    async def main():
+        # Setup Backend
+        backend = Senpuki.backends.SQLiteBackend("senpuki.sqlite")
+        await backend.init_db()
+        executor = Senpuki(backend=backend)
+        # Start a Worker (in background)
+        worker = asyncio.create_task(executor.serve())
+        # Dispatch Workflow
+        exec_id = await executor.dispatch(workflow, ["Alice", "Bob"])
+        print(f"Started execution: {exec_id}")
+        # Wait for Result
+        while True:
+            state = await executor.state_of(exec_id)
+            if state.state in ("completed", "failed"):
+                break
+            await asyncio.sleep(0.5)
+        result = await executor.result_of(exec_id)
+        print(result.value) # ['Hello, Alice!', 'Hello, Bob!']
+    if __name__ == "__main__":
+        asyncio.run(main())
+    ```
+---
+## Features Guide
+### Defining Durable Functions
+Use the `@Senpuki.durable` decorator. You can configure retry policies, caching, and queues here.
+```python
+from senpuki import Senpuki, RetryPolicy
+@Senpuki.durable(
+    retry_policy=RetryPolicy(max_attempts=3, initial_delay=1.0),
+    queue="high_priority",
+    tags=["billing"]
+)
+async def charge_card(amount: int):
+    ...
+```
+### Orchestration & Activities
+When a durable function calls another durable function (e.g., `await other_func()`), Senpuki intercepts this call.
+*   It persists a **Task** record for the child function.
+*   The parent function "sleeps" (suspends) until the child task is completed by a worker.
+*   This allows workflows to run over days or weeks without consuming memory while waiting.
+### Retries & Error Handling
+Failures happen. Senpuki allows declarative retry policies.
+```python
+policy = RetryPolicy(
+    max_attempts=5,
+    backoff_factor=2.0, # Exponential backoff
+    jitter=0.1,         # Add randomness to prevent thundering herd
+    retry_for=(ConnectionError, ExpiryError) # Only retry these exceptions
+)
+@Senpuki.durable(retry_policy=policy)
+async def unstable_api_call():
+    ...
+```
+If the function fails after all retries, the Execution is marked as `failed`, and the error is propagated to the parent orchestrator (if any), which can catch it using standard `try/except`.
+### Idempotency & Caching
+To prevent duplicate side-effects (like charging a card twice) or re-doing expensive work:
+1.  **Idempotency**: Results are stored permanently. If a task is scheduled again with the same arguments (and version), the stored result is returned immediately without running the function.
+2.  **Caching**: Similar to idempotency but implies the result can be reused across different executions if the key matches.
+```python
+@Senpuki.durable(idempotent=True)
+async def send_email(user_id: str, subject: str):
+    # Safe to call multiple times; will only execute once per unique arguments
+    ...
+@Senpuki.durable(cached=True, version="v1")
+async def heavy_compute(data_hash: str):
+    # Result stored in cache table; subsequent calls return immediately
+    ...
+```
+### Parallel Execution (Fan-out/Fan-in)
+Use standard `asyncio.gather` to run tasks in parallel. Senpuki schedules them all, and the worker pool executes them concurrently.
+```python
+@Senpuki.durable()
+async def batch_processor(items: list[int]):
+    tasks = []
+    for item in items:
+        # Schedule all tasks
+        tasks.append(process_item(item))
+    # Wait for all to complete
+    results = await asyncio.gather(*tasks)
+    return sum(results)
+```
+### Timeouts & Expirys
+You can set a expiry for the entire execution. If it exceeds this duration, it is cancelled.
+```python
+exec_id = await executor.dispatch(long_workflow, expiry="1h 30m")
+```
+---
+## Architecture & Backends
+Senpuki is backend-agnostic.
+### SQLite Backend
+Included by default. Stores state in a local SQLite file.
+*   **Best for**: Development, testing, single-node deployments, embedded workflows.
+*   **Features**: Full persistence, async support.
+### Postgres Backend
+*   **Best for**: Production environments, concurrent access, high reliability.
+*   **Features**: Uses `asyncpg` for high performance.
+### Mongo Backend (Planned)
+*   **Best for**: Distributed production clusters, high availability.
+### Redis (Notifications)
+Optional. Uses Redis Pub/Sub to notify orchestrators immediately when a task finishes, reducing polling latency.
+---
+## Running Workers
+The `executor.serve()` method runs the worker loop. In production, you typically run this in a separate process or container.
+```python
+# worker.py
+async def run_worker():
+    backend = Senpuki.backends.SQLiteBackend("prod.db")
+    executor = Senpuki(backend=backend)
+    # Consume only specific queues
+    await executor.serve(
+        queues=["default", "high_priority"],
+        max_concurrency=50
+    )
+```
+You can scale horizontally by running multiple worker instances pointing to the same database.
+---
+## Examples
+See the `examples/` folder for complete code:
+1.  **`simple_flow.py`**: Basic parent-child function calls.
+2.  **`failing_flow.py`**: Demonstrates automatic retries and Dead Letter Queue (DLQ) behavior.
+3.  **`complex_workflow.py`**: A data pipeline showcasing caching, retries, and expirys.
+*   `batch_processing.py`: Fan-out/fan-in pattern (processing multiple items in parallel).
+*   `saga_trip_booking.py`: Saga pattern with compensation (rollback) logic.
+*   `media_pipeline.py`: A complex 5-minute simulation of a media processing pipeline (Validation -> Safety -> Transcode/AI -> Package) with a live progress dashboard.
+## Requirements

senpuki-0.1.0/examples/batch_processing.py ADDED Viewed

@@ -0,0 +1,114 @@
+import asyncio
+import os
+import logging
+import random
+from senpuki import Senpuki, Result
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%H:%M:%S')
+logger = logging.getLogger("BatchExample")
+# --- Activities ---
+@Senpuki.durable()
+async def download_image(image_id: int) -> str:
+    # Simulate network IO
+    delay = random.uniform(0.1, 0.5)
+    await asyncio.sleep(delay)
+    # Simulate occasional network failure
+    if random.random() < 0.1:
+        raise ConnectionError(f"Network error downloading image {image_id}")
+    path = f"/tmp/img_{image_id}.jpg"
+    logger.info(f"Downloaded image {image_id} to {path} ({delay:.2f}s)")
+    return path
+@Senpuki.durable()
+async def process_image(path: str) -> str:
+    # Simulate CPU intensive work
+    await asyncio.sleep(0.2)
+    processed_path = path.replace(".jpg", "_bw.jpg")
+    logger.info(f"Processed {path} -> {processed_path}")
+    return processed_path
+@Senpuki.durable()
+async def create_gallery(image_paths: list[str]) -> str:
+    await asyncio.sleep(0.5)
+    logger.info(f"Creating gallery from {len(image_paths)} images")
+    return f"http://example.com/gallery/{len(image_paths)}_images"
+# --- Orchestrator ---
+@Senpuki.durable()
+async def batch_image_workflow(image_ids: list[int]) -> Result[str, Exception]:
+    logger.info(f"Starting batch workflow for {len(image_ids)} images")
+    # Fan-out: Download all images in parallel
+    download_tasks = []
+    for img_id in image_ids:
+        # We can fire off tasks. If one fails, we might want to handle it.
+        # asyncio.gather will raise the first exception by default.
+        # In a real batch, maybe we want return_exceptions=True to process partials.
+        download_tasks.append(download_image(img_id))
+    # Wait for downloads
+    # To handle partial failures, we would wrap download_image in a safe version or use return_exceptions.
+    # Here we assume we want all or nothing for simplicity, but let's use return_exceptions=True to show robustness.
+    download_results = await asyncio.gather(*download_tasks, return_exceptions=True)
+    successful_downloads = []
+    for i, res in enumerate(download_results):
+        if isinstance(res, Exception):
+            logger.warning(f"Failed to download image {image_ids[i]}: {res}")
+        else:
+            successful_downloads.append(res)
+    if not successful_downloads:
+        return Result.Error(Exception("No images downloaded successfully"))
+    # Fan-out: Process downloaded images
+    process_tasks = [process_image(path) for path in successful_downloads]
+    processed_paths = await asyncio.gather(*process_tasks)
+    # Fan-in: Create gallery
+    gallery_url = await create_gallery(processed_paths)
+    return Result.Ok(gallery_url)
+# --- Runner ---
+async def main():
+    db_path = "batch_example.sqlite"
+    if os.path.exists(db_path):
+        os.remove(db_path)
+    backend = Senpuki.backends.SQLiteBackend(db_path)
+    await backend.init_db()
+    executor = Senpuki(backend=backend)
+    # Start worker with concurrency
+    worker_task = asyncio.create_task(executor.serve(poll_interval=0.1, max_concurrency=20))
+    logger.info("Dispatching workflow...")
+    # Process 10 images
+    ids = list(range(1, 11))
+    exec_id = await executor.dispatch(batch_image_workflow, ids)
+    # Monitor
+    while True:
+        state = await executor.state_of(exec_id)
+        if state.state in ("completed", "failed", "timed_out"):
+            break
+        await asyncio.sleep(0.5)
+    result = await executor.result_of(exec_id)
+    logger.info(f"Final Result: {result}")
+    worker_task.cancel()
+    try:
+        await worker_task
+    except asyncio.CancelledError:
+        pass
+if __name__ == "__main__":
+    asyncio.run(main())