PyPI - ai-pipeline-core - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

ai-pipeline-core 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

ai_pipeline_core/__init__.py +84 -4
ai_pipeline_core/documents/__init__.py +9 -0
ai_pipeline_core/documents/document.py +1044 -152
ai_pipeline_core/documents/document_list.py +147 -38
ai_pipeline_core/documents/flow_document.py +112 -11
ai_pipeline_core/documents/mime_type.py +173 -15
ai_pipeline_core/documents/task_document.py +117 -12
ai_pipeline_core/documents/temporary_document.py +84 -5
ai_pipeline_core/documents/utils.py +41 -9
ai_pipeline_core/exceptions.py +47 -11
ai_pipeline_core/flow/__init__.py +2 -0
ai_pipeline_core/flow/config.py +236 -27
ai_pipeline_core/flow/options.py +50 -1
ai_pipeline_core/llm/__init__.py +6 -0
ai_pipeline_core/llm/ai_messages.py +125 -27
ai_pipeline_core/llm/client.py +278 -26
ai_pipeline_core/llm/model_options.py +130 -1
ai_pipeline_core/llm/model_response.py +239 -35
ai_pipeline_core/llm/model_types.py +67 -0
ai_pipeline_core/logging/__init__.py +13 -0
ai_pipeline_core/logging/logging_config.py +72 -20
ai_pipeline_core/logging/logging_mixin.py +38 -32
ai_pipeline_core/pipeline.py +363 -60
ai_pipeline_core/prefect.py +48 -1
ai_pipeline_core/prompt_manager.py +209 -24
ai_pipeline_core/settings.py +108 -4
ai_pipeline_core/simple_runner/__init__.py +5 -0
ai_pipeline_core/simple_runner/cli.py +96 -11
ai_pipeline_core/simple_runner/simple_runner.py +237 -4
ai_pipeline_core/tracing.py +253 -30
ai_pipeline_core-0.1.12.dist-info/METADATA +450 -0
ai_pipeline_core-0.1.12.dist-info/RECORD +36 -0
ai_pipeline_core-0.1.10.dist-info/METADATA +0 -538
ai_pipeline_core-0.1.10.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.10.dist-info → ai_pipeline_core-0.1.12.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/pipeline.py CHANGED Viewed

@@ -1,51 +1,26 @@
-"""
-ai_pipeline_core.pipeline
-=========================
-Tiny wrappers around Prefect's public ``@task`` and ``@flow`` that add our
-``trace`` decorator and **require async functions**.
-Why this exists
----------------
-Prefect tasks/flows are awaitable at runtime, but their public type stubs
-don’t declare that clearly. We therefore:
-1) Return the **real Prefect objects** (so you keep every Prefect method).
-2) Type them as small Protocols that say “this is awaitable and has common
-   helpers like `.submit`/`.map`”.
+"""Pipeline decorators with Prefect integration and tracing.
-This keeps Pyright happy without altering runtime behavior and avoids
-leaking advanced typing constructs (like ``ParamSpec``) that confuse tools
-that introspect callables (e.g., Pydantic).
+@public
-Quick start
------------
-from ai_pipeline_core.pipeline import pipeline_task, pipeline_flow
-from ai_pipeline_core.documents import DocumentList
-from ai_pipeline_core.flow.options import FlowOptions
-@pipeline_task
-async def add(x: int, y: int) -> int:
-    return x + y
-@pipeline_flow
-async def my_flow(project_name: str, docs: DocumentList, opts: FlowOptions) -> DocumentList:
-    await add(1, 2)  # awaitable and typed
-    return docs
-Rules
------
-• Your decorated function **must** be ``async def``.
-• ``@pipeline_flow`` functions must accept at least:
-  (project_name: str, documents: DocumentList, flow_options: FlowOptions | subclass).
-• Both wrappers return the same Prefect objects you’d get from Prefect directly.
+Wrappers around Prefect's @task and @flow that add Laminar tracing
+and enforce async-only execution for consistency.
 """
 from __future__ import annotations
 import datetime
 import inspect
-from typing import Any, Callable, Coroutine, Iterable, Protocol, TypeVar, Union, cast, overload
+from typing import (
+    Any,
+    Callable,
+    Coroutine,
+    Iterable,
+    Protocol,
+    TypeVar,
+    Union,
+    cast,
+    overload,
+)
 from prefect.assets import Asset
 from prefect.cache_policies import CachePolicy
@@ -79,7 +54,27 @@ FO_contra = TypeVar("FO_contra", bound=FlowOptions, contravariant=True)
 class _TaskLike(Protocol[R_co]):
-    """Minimal 'task-like' view: awaitable call + common helpers."""
+    """Protocol for type-safe Prefect task representation.
+    Defines the minimal interface for a Prefect task as seen by
+    type checkers. Ensures tasks are awaitable and have common
+    Prefect task methods.
+    Type Parameter:
+        R_co: Covariant return type of the task.
+    Methods:
+        __call__: Makes the task awaitable.
+        submit: Submit task for asynchronous execution.
+        map: Map task over multiple inputs.
+    Attributes:
+        name: Optional task name.
+    Note:
+        This is a typing Protocol, not a runtime class.
+        __getattr__ allows accessing Prefect-specific helpers.
+    """
     def __call__(self, *args: Any, **kwargs: Any) -> Coroutine[Any, Any, R_co]: ...
@@ -91,7 +86,27 @@ class _TaskLike(Protocol[R_co]):
 class _DocumentsFlowCallable(Protocol[FO_contra]):
-    """User async flow signature (first three params fixed)."""
+    """Protocol for user-defined flow functions.
+    Defines the required signature for functions that will be
+    decorated with @pipeline_flow. Enforces the standard parameters
+    for document processing flows.
+    Type Parameter:
+        FO_contra: Contravariant FlowOptions type (or subclass).
+    Required Parameters:
+        project_name: Name of the project/pipeline.
+        documents: Input DocumentList to process.
+        flow_options: Configuration options (FlowOptions or subclass).
+        *args, **kwargs: Additional flow-specific parameters.
+    Returns:
+        DocumentList: Processed documents.
+    Note:
+        Functions must be async and return DocumentList.
+    """
     def __call__(
         self,
@@ -104,7 +119,26 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
 class _FlowLike(Protocol[FO_contra]):
-    """Callable returned by Prefect ``@flow`` wrapper that we expose to users."""
+    """Protocol for decorated flow objects returned to users.
+    Represents the callable object returned by @pipeline_flow,
+    which wraps the original flow function with Prefect and
+    tracing capabilities.
+    Type Parameter:
+        FO_contra: Contravariant FlowOptions type.
+    Callable Signature:
+        Same as _DocumentsFlowCallable - accepts project_name,
+        documents, flow_options, plus additional arguments.
+    Attributes:
+        name: Optional flow name from decorator.
+    Note:
+        __getattr__ provides access to all Prefect flow methods
+        without explicit typing (e.g., .serve(), .deploy()).
+    """
     def __call__(
         self,
@@ -124,6 +158,18 @@ class _FlowLike(Protocol[FO_contra]):
 # Small helper: safely get a callable's name without upsetting the type checker
 # --------------------------------------------------------------------------- #
 def _callable_name(obj: Any, fallback: str) -> str:
+    """Safely extract callable's name for error messages.
+    Args:
+        obj: Any object that might have a __name__ attribute.
+        fallback: Default name if extraction fails.
+    Returns:
+        The callable's __name__ if available, fallback otherwise.
+    Note:
+        Internal helper that never raises exceptions.
+    """
     try:
         n = getattr(obj, "__name__", None)
         return n if isinstance(n, str) else fallback
@@ -131,6 +177,38 @@ def _callable_name(obj: Any, fallback: str) -> str:
         return fallback
+def _is_already_traced(func: Callable[..., Any]) -> bool:
+    """Check if a function has already been wrapped by the trace decorator.
+    This checks both for the explicit __is_traced__ marker and walks
+    the __wrapped__ chain to detect nested trace decorations.
+    Args:
+        func: Function to check for existing trace decoration.
+    Returns:
+        True if the function is already traced, False otherwise.
+    """
+    # Check for explicit marker
+    if hasattr(func, "__is_traced__") and func.__is_traced__:  # type: ignore[attr-defined]
+        return True
+    # Walk the __wrapped__ chain to detect nested traces
+    current = func
+    depth = 0
+    max_depth = 10  # Prevent infinite loops
+    while hasattr(current, "__wrapped__") and depth < max_depth:
+        wrapped = current.__wrapped__  # type: ignore[attr-defined]
+        # Check if the wrapped function has the trace marker
+        if hasattr(wrapped, "__is_traced__") and wrapped.__is_traced__:  # type: ignore[attr-defined]
+            return True
+        current = wrapped
+        depth += 1
+    return False
 # --------------------------------------------------------------------------- #
 # @pipeline_task — async-only, traced, returns Prefect's Task object
 # --------------------------------------------------------------------------- #
@@ -211,18 +289,125 @@ def pipeline_task(
     viz_return_value: bool | None = None,
     asset_deps: list[str | Asset] | None = None,
 ) -> _TaskLike[R_co] | Callable[[Callable[..., Coroutine[Any, Any, R_co]]], _TaskLike[R_co]]:
-    """Decorate an **async** function as a traced Prefect task."""
+    """Decorate an async function as a traced Prefect task.
+    @public
+    Wraps an async function with both Prefect task functionality and
+    LMNR tracing. The function MUST be async (declared with 'async def').
+    IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
+    The framework will raise TypeError if you try to use both decorators together.
+    Best Practice - Use Defaults:
+        For 90% of use cases, use this decorator WITHOUT any parameters.
+        Only specify parameters when you have EXPLICIT requirements.
+    Args:
+        __fn: Function to decorate (when used without parentheses).
+        Tracing parameters:
+        trace_level: When to trace ("always", "debug", "off").
+                    - "always": Always trace (default)
+                    - "debug": Only trace when LMNR_DEBUG="true"
+                    - "off": Disable tracing
+        trace_ignore_input: Don't trace input arguments.
+        trace_ignore_output: Don't trace return value.
+        trace_ignore_inputs: List of parameter names to exclude from tracing.
+        trace_input_formatter: Custom formatter for input tracing.
+        trace_output_formatter: Custom formatter for output tracing.
+        Prefect task parameters:
+        name: Task name (defaults to function name).
+        description: Human-readable task description.
+        tags: Tags for organization and filtering.
+        version: Task version string.
+        cache_policy: Caching policy for task results.
+        cache_key_fn: Custom cache key generation.
+        cache_expiration: How long to cache results.
+        task_run_name: Dynamic or static run name.
+        retries: Number of retry attempts (default 0).
+        retry_delay_seconds: Delay between retries.
+        retry_jitter_factor: Random jitter for retry delays.
+        persist_result: Whether to persist results.
+        result_storage: Where to store results.
+        result_serializer: How to serialize results.
+        result_storage_key: Custom storage key.
+        cache_result_in_memory: Keep results in memory.
+        timeout_seconds: Task execution timeout.
+        log_prints: Capture print() statements.
+        refresh_cache: Force cache refresh.
+        on_completion: Hooks for successful completion.
+        on_failure: Hooks for task failure.
+        retry_condition_fn: Custom retry condition.
+        viz_return_value: Include return value in visualization.
+        asset_deps: Upstream asset dependencies.
+    Returns:
+        Decorated task callable that is awaitable and has Prefect
+        task methods (submit, map, etc.).
+    Example:
+        >>> # RECOMMENDED - No parameters needed!
+        >>> @pipeline_task
+        >>> async def process_document(doc: Document) -> Document:
+        ...     result = await analyze(doc)
+        ...     return result
+        >>>
+        >>> # With parameters (only when necessary):
+        >>> @pipeline_task(retries=5)  # Only for known flaky operations
+        >>> async def unreliable_api_call(url: str) -> dict:
+        ...     # This API fails often, needs extra retries
+        ...     return await fetch_with_retry(url)
+        >>>
+        >>> # AVOID specifying defaults - they're already optimal:
+        >>> # - Automatic task naming
+        >>> # - Standard retry policy
+        >>> # - Sensible timeout
+        >>> # - Full observability
+    Performance:
+        - Task decoration overhead: ~1-2ms
+        - Tracing overhead: ~1-2ms per call
+        - Prefect state tracking: ~5-10ms
+    Note:
+        Tasks are automatically traced with LMNR and appear in
+        both Prefect and LMNR dashboards.
+    See Also:
+        - pipeline_flow: For flow-level decoration
+        - trace: Lower-level tracing decorator
+        - prefect.task: Standard Prefect task (no tracing)
+    """
     task_decorator: Callable[..., Any] = _prefect_task  # helps the type checker
     def _apply(fn: Callable[..., Coroutine[Any, Any, R_co]]) -> _TaskLike[R_co]:
+        """Apply pipeline_task decorator to async function.
+        Returns:
+            Wrapped task with tracing and Prefect functionality.
+        Raises:
+            TypeError: If function is not async or already traced.
+        """
         if not inspect.iscoroutinefunction(fn):
             raise TypeError(
                 f"@pipeline_task target '{_callable_name(fn, 'task')}' must be 'async def'"
             )
+        # Check if function is already traced
+        if _is_already_traced(fn):
+            raise TypeError(
+                f"@pipeline_task target '{_callable_name(fn, 'task')}' is already decorated "
+                f"with @trace. Remove the @trace decorator - @pipeline_task includes "
+                f"tracing automatically."
+            )
+        fname = _callable_name(fn, "task")
         traced_fn = trace(
             level=trace_level,
-            name=name or _callable_name(fn, "task"),
+            name=name or fname,
             ignore_input=trace_ignore_input,
             ignore_output=trace_ignore_output,
             ignore_inputs=trace_ignore_inputs,
@@ -233,14 +418,14 @@ def pipeline_task(
         return cast(
             _TaskLike[R_co],
             task_decorator(
-                name=name,
+                name=name or fname,
                 description=description,
                 tags=tags,
                 version=version,
                 cache_policy=cache_policy,
                 cache_key_fn=cache_key_fn,
                 cache_expiration=cache_expiration,
-                task_run_name=task_run_name,
+                task_run_name=task_run_name or name or fname,
                 retries=0 if retries is None else retries,
                 retry_delay_seconds=retry_delay_seconds,
                 retry_jitter_factor=retry_jitter_factor,
@@ -264,7 +449,7 @@ def pipeline_task(
 # --------------------------------------------------------------------------- #
-# @pipeline_flow — async-only, traced, returns Prefect’s flow wrapper
+# @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
 # --------------------------------------------------------------------------- #
 @overload
 def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
@@ -333,26 +518,140 @@ def pipeline_flow(
     on_crashed: list[FlowStateHook[Any, Any]] | None = None,
     on_running: list[FlowStateHook[Any, Any]] | None = None,
 ) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
-    """Decorate an **async** flow.
+    """Decorate an async flow for document processing.
-    Required signature:
-        async def flow_fn(
-            project_name: str,
-            documents: DocumentList,
-            flow_options: FlowOptions,  # or any subclass
-            *args,
-            **kwargs
-        ) -> DocumentList
+    @public
+    Wraps an async function as a Prefect flow with tracing and type safety.
+    The decorated function MUST be async and follow the required signature.
-    Returns the same callable object Prefect’s ``@flow`` would return.
+    IMPORTANT: Never combine with @trace decorator - this includes tracing automatically.
+    The framework will raise TypeError if you try to use both decorators together.
+    Best Practice - Use Defaults:
+        For 90% of use cases, use this decorator WITHOUT any parameters.
+        Only specify parameters when you have EXPLICIT requirements.
+    Required function signature:
+        async def flow_fn(
+            project_name: str,         # Project/pipeline identifier
+            documents: DocumentList,   # Input documents to process
+            flow_options: FlowOptions, # Configuration (or subclass)
+            *args,                     # Additional positional args for custom parameters
+            **kwargs                   # Additional keyword args for custom parameters
+        ) -> DocumentList             # Must return DocumentList
+    Note: *args and **kwargs allow for defining custom parameters on your flow
+    function, which can be passed during execution for flow-specific needs.
+    Args:
+        __fn: Function to decorate (when used without parentheses).
+        Tracing parameters:
+        trace_level: When to trace ("always", "debug", "off").
+                    - "always": Always trace (default)
+                    - "debug": Only trace when LMNR_DEBUG="true"
+                    - "off": Disable tracing
+        trace_ignore_input: Don't trace input arguments.
+        trace_ignore_output: Don't trace return value.
+        trace_ignore_inputs: Parameter names to exclude from tracing.
+        trace_input_formatter: Custom input formatter.
+        trace_output_formatter: Custom output formatter.
+        Prefect flow parameters:
+        name: Flow name (defaults to function name).
+        version: Flow version identifier.
+        flow_run_name: Static or dynamic run name.
+        retries: Number of flow retry attempts (default 0).
+        retry_delay_seconds: Delay between flow retries.
+        task_runner: Task execution strategy (sequential/concurrent).
+        description: Human-readable flow description.
+        timeout_seconds: Flow execution timeout.
+        validate_parameters: Validate input parameters.
+        persist_result: Persist flow results.
+        result_storage: Where to store results.
+        result_serializer: How to serialize results.
+        cache_result_in_memory: Keep results in memory.
+        log_prints: Capture print() statements.
+        on_completion: Hooks for successful completion.
+        on_failure: Hooks for flow failure.
+        on_cancellation: Hooks for flow cancellation.
+        on_crashed: Hooks for flow crashes.
+        on_running: Hooks for flow start.
+    Returns:
+        Decorated flow callable that maintains Prefect flow interface
+        while enforcing document processing conventions.
+    Example:
+        >>> from ai_pipeline_core import FlowOptions
+        >>>
+        >>> # RECOMMENDED - No parameters needed!
+        >>> @pipeline_flow
+        >>> async def analyze_documents(
+        ...     project_name: str,
+        ...     documents: DocumentList,
+        ...     flow_options: FlowOptions
+        >>> ) -> DocumentList:
+        ...     # Process each document
+        ...     results = []
+        ...     for doc in documents:
+        ...         result = await process(doc)
+        ...         results.append(result)
+        ...     return DocumentList(results)
+        >>>
+        >>> # With parameters (only when necessary):
+        >>> @pipeline_flow(retries=2)  # Only for flows that need retry logic
+        >>> async def critical_flow(
+        ...     project_name: str,
+        ...     documents: DocumentList,
+        ...     flow_options: FlowOptions
+        >>> ) -> DocumentList:
+        ...     # Critical processing that might fail
+        ...     return await process_critical(documents)
+        >>>
+        >>> # AVOID specifying defaults - they're already optimal:
+        >>> # - Automatic flow naming
+        >>> # - Standard retry policy
+        >>> # - Full observability
+    Note:
+        - Flow is wrapped with both Prefect and LMNR tracing
+        - Return type is validated at runtime
+        - FlowOptions can be subclassed for custom configuration
+        - All Prefect flow methods (.serve(), .deploy()) are available
+    See Also:
+        - pipeline_task: For task-level decoration
+        - FlowConfig: Type-safe flow configuration
+        - FlowOptions: Base class for flow options
+        - simple_runner.run_pipeline: Execute flows locally
     """
     flow_decorator: Callable[..., Any] = _prefect_flow
     def _apply(fn: _DocumentsFlowCallable[FO_contra]) -> _FlowLike[FO_contra]:
+        """Apply pipeline_flow decorator to flow function.
+        Returns:
+            Wrapped flow with tracing and Prefect functionality.
+        Raises:
+            TypeError: If function is not async, already traced, doesn't have
+                      required parameters, or doesn't return DocumentList.
+        """
         fname = _callable_name(fn, "flow")
         if not inspect.iscoroutinefunction(fn):
             raise TypeError(f"@pipeline_flow '{fname}' must be declared with 'async def'")
+        # Check if function is already traced
+        if _is_already_traced(fn):
+            raise TypeError(
+                f"@pipeline_flow target '{fname}' is already decorated "
+                f"with @trace. Remove the @trace decorator - @pipeline_flow includes "
+                f"tracing automatically."
+            )
         if len(inspect.signature(fn).parameters) < 3:
             raise TypeError(
                 f"@pipeline_flow '{fname}' must accept "
@@ -373,6 +672,10 @@ def pipeline_flow(
                 )
             return result
+        # Preserve the original function name for Prefect
+        _wrapper.__name__ = fname
+        _wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
         traced = trace(
             level=trace_level,
             name=name or fname,
@@ -386,9 +689,9 @@ def pipeline_flow(
         return cast(
             _FlowLike[FO_contra],
             flow_decorator(
-                name=name,
+                name=name or fname,
                 version=version,
-                flow_run_name=flow_run_name,
+                flow_run_name=flow_run_name or name or fname,
                 retries=0 if retries is None else retries,
                 retry_delay_seconds=retry_delay_seconds,
                 task_runner=task_runner,

ai_pipeline_core/prefect.py CHANGED Viewed

@@ -1,4 +1,51 @@
-"""Prefect core features."""
+"""Prefect core features for pipeline orchestration.
+This module provides clean re-exports of essential Prefect functionality.
+IMPORTANT: You should NEVER use the `task` and `flow` decorators directly
+unless it is 100% impossible to use `pipeline_task` and `pipeline_flow`.
+The standard Prefect decorators are exported here only for extremely
+limited edge cases where the pipeline decorators cannot be used.
+Always prefer:
+    >>> from ai_pipeline_core import pipeline_task, pipeline_flow
+    >>>
+    >>> @pipeline_task
+    >>> async def my_task(...): ...
+    >>>
+    >>> @pipeline_flow
+    >>> async def my_flow(...): ...
+The `task` and `flow` decorators should only be used when:
+- You absolutely cannot convert to async (pipeline decorators require async)
+- You have a very specific Prefect integration that conflicts with tracing
+- You are writing test utilities or infrastructure code
+Exported components:
+    task: Prefect task decorator (AVOID - use pipeline_task instead).
+    flow: Prefect flow decorator (AVOID - use pipeline_flow instead).
+    disable_run_logger: Context manager to suppress Prefect logging.
+    prefect_test_harness: Test harness for unit testing flows/tasks.
+Testing utilities (use as fixtures):
+    The disable_run_logger and prefect_test_harness should be used as
+    pytest fixtures as shown in tests/conftest.py:
+    >>> @pytest.fixture(autouse=True, scope="session")
+    >>> def prefect_test_fixture():
+    ...     with prefect_test_harness():
+    ...         yield
+    >>>
+    >>> @pytest.fixture(autouse=True)
+    >>> def disable_prefect_logging():
+    ...     with disable_run_logger():
+    ...         yield
+Note:
+    The pipeline_task and pipeline_flow decorators from
+    ai_pipeline_core.pipeline provide async-only execution with
+    integrated LMNR tracing and are the standard for this library.
+"""
 from prefect import flow, task
 from prefect.logging import disable_run_logger

ai-pipeline-core 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

ai-pipeline-core 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl