PyPI - ai-pipeline-core - Versions diffs - 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

ai-pipeline-core 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

ai_pipeline_core/__init__.py +25 -14
ai_pipeline_core/documents/__init__.py +2 -1
ai_pipeline_core/documents/document.py +317 -49
ai_pipeline_core/documents/document_list.py +136 -33
ai_pipeline_core/documents/flow_document.py +8 -29
ai_pipeline_core/documents/task_document.py +6 -27
ai_pipeline_core/documents/temporary_document.py +6 -27
ai_pipeline_core/documents/utils.py +64 -1
ai_pipeline_core/flow/config.py +174 -5
ai_pipeline_core/flow/options.py +2 -2
ai_pipeline_core/llm/__init__.py +6 -1
ai_pipeline_core/llm/ai_messages.py +14 -7
ai_pipeline_core/llm/client.py +143 -55
ai_pipeline_core/llm/model_options.py +20 -5
ai_pipeline_core/llm/model_response.py +77 -29
ai_pipeline_core/llm/model_types.py +38 -40
ai_pipeline_core/logging/__init__.py +0 -2
ai_pipeline_core/logging/logging_config.py +0 -6
ai_pipeline_core/logging/logging_mixin.py +2 -10
ai_pipeline_core/pipeline.py +68 -65
ai_pipeline_core/prefect.py +12 -3
ai_pipeline_core/prompt_manager.py +6 -7
ai_pipeline_core/settings.py +13 -5
ai_pipeline_core/simple_runner/__init__.py +1 -11
ai_pipeline_core/simple_runner/cli.py +13 -12
ai_pipeline_core/simple_runner/simple_runner.py +34 -172
ai_pipeline_core/storage/__init__.py +8 -0
ai_pipeline_core/storage/storage.py +628 -0
ai_pipeline_core/tracing.py +110 -26
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +60 -23
ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
ai_pipeline_core-0.1.13.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/pipeline.py CHANGED Viewed

@@ -10,6 +10,7 @@ from __future__ import annotations
 import datetime
 import inspect
+from functools import wraps
 from typing import (
     Any,
     Callable,
@@ -35,8 +36,9 @@ from prefect.utilities.annotations import NotSet
 from typing_extensions import TypeAlias
 from ai_pipeline_core.documents import DocumentList
+from ai_pipeline_core.flow.config import FlowConfig
 from ai_pipeline_core.flow.options import FlowOptions
-from ai_pipeline_core.tracing import TraceLevel, trace
+from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
 # --------------------------------------------------------------------------- #
 # Public callback aliases (Prefect stubs omit these exact types)
@@ -99,7 +101,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
         project_name: Name of the project/pipeline.
         documents: Input DocumentList to process.
         flow_options: Configuration options (FlowOptions or subclass).
-        *args, **kwargs: Additional flow-specific parameters.
     Returns:
         DocumentList: Processed documents.
@@ -113,8 +114,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
         project_name: str,
         documents: DocumentList,
         flow_options: FO_contra,
-        *args: Any,
-        **kwargs: Any,
     ) -> Coroutine[Any, Any, DocumentList]: ...
@@ -145,8 +144,6 @@ class _FlowLike(Protocol[FO_contra]):
         project_name: str,
         documents: DocumentList,
         flow_options: FO_contra,
-        *args: Any,
-        **kwargs: Any,
     ) -> Coroutine[Any, Any, DocumentList]: ...
     name: str | None
@@ -224,6 +221,7 @@ def pipeline_task(
     trace_ignore_inputs: list[str] | None = None,
     trace_input_formatter: Callable[..., str] | None = None,
     trace_output_formatter: Callable[..., str] | None = None,
+    trace_cost: float | None = None,
     # prefect passthrough
     name: str | None = None,
     description: str | None = None,
@@ -263,6 +261,7 @@ def pipeline_task(
     trace_ignore_inputs: list[str] | None = None,
     trace_input_formatter: Callable[..., str] | None = None,
     trace_output_formatter: Callable[..., str] | None = None,
+    trace_cost: float | None = None,
     # prefect passthrough
     name: str | None = None,
     description: str | None = None,
@@ -316,6 +315,9 @@ def pipeline_task(
         trace_ignore_inputs: List of parameter names to exclude from tracing.
         trace_input_formatter: Custom formatter for input tracing.
         trace_output_formatter: Custom formatter for output tracing.
+        trace_cost: Optional cost value to track in metadata. When provided and > 0,
+             sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
+             Also forces trace level to "always" if not already set.
         Prefect task parameters:
         name: Task name (defaults to function name).
@@ -405,6 +407,15 @@ def pipeline_task(
             )
         fname = _callable_name(fn, "task")
+        # Create wrapper to handle trace_cost if provided
+        @wraps(fn)
+        async def _wrapper(*args: Any, **kwargs: Any) -> R_co:
+            result = await fn(*args, **kwargs)
+            if trace_cost is not None and trace_cost > 0:
+                set_trace_cost(trace_cost)
+            return result
         traced_fn = trace(
             level=trace_level,
             name=name or fname,
@@ -413,7 +424,7 @@ def pipeline_task(
             ignore_inputs=trace_ignore_inputs,
             input_formatter=trace_input_formatter,
             output_formatter=trace_output_formatter,
-        )(fn)
+        )(_wrapper)
         return cast(
             _TaskLike[R_co],
@@ -451,45 +462,10 @@ def pipeline_task(
 # --------------------------------------------------------------------------- #
 # @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
 # --------------------------------------------------------------------------- #
-@overload
-def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
-@overload
-def pipeline_flow(
-    *,
-    # tracing
-    trace_level: TraceLevel = "always",
-    trace_ignore_input: bool = False,
-    trace_ignore_output: bool = False,
-    trace_ignore_inputs: list[str] | None = None,
-    trace_input_formatter: Callable[..., str] | None = None,
-    trace_output_formatter: Callable[..., str] | None = None,
-    # prefect passthrough
-    name: str | None = None,
-    version: str | None = None,
-    flow_run_name: Union[Callable[[], str], str] | None = None,
-    retries: int | None = None,
-    retry_delay_seconds: int | float | None = None,
-    task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
-    description: str | None = None,
-    timeout_seconds: int | float | None = None,
-    validate_parameters: bool = True,
-    persist_result: bool | None = None,
-    result_storage: ResultStorage | str | None = None,
-    result_serializer: ResultSerializer | str | None = None,
-    cache_result_in_memory: bool = True,
-    log_prints: bool | None = None,
-    on_completion: list[FlowStateHook[Any, Any]] | None = None,
-    on_failure: list[FlowStateHook[Any, Any]] | None = None,
-    on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
-    on_crashed: list[FlowStateHook[Any, Any]] | None = None,
-    on_running: list[FlowStateHook[Any, Any]] | None = None,
-) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]: ...
 def pipeline_flow(
-    __fn: _DocumentsFlowCallable[FO_contra] | None = None,
-    /,
     *,
+    # config
+    config: type[FlowConfig],
     # tracing
     trace_level: TraceLevel = "always",
     trace_ignore_input: bool = False,
@@ -497,6 +473,7 @@ def pipeline_flow(
     trace_ignore_inputs: list[str] | None = None,
     trace_input_formatter: Callable[..., str] | None = None,
     trace_output_formatter: Callable[..., str] | None = None,
+    trace_cost: float | None = None,
     # prefect passthrough
     name: str | None = None,
     version: str | None = None,
@@ -517,7 +494,7 @@ def pipeline_flow(
     on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
     on_crashed: list[FlowStateHook[Any, Any]] | None = None,
     on_running: list[FlowStateHook[Any, Any]] | None = None,
-) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
+) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
     """Decorate an async flow for document processing.
     @public
@@ -537,16 +514,15 @@ def pipeline_flow(
             project_name: str,         # Project/pipeline identifier
             documents: DocumentList,   # Input documents to process
             flow_options: FlowOptions, # Configuration (or subclass)
-            *args,                     # Additional positional args for custom parameters
-            **kwargs                   # Additional keyword args for custom parameters
         ) -> DocumentList             # Must return DocumentList
-    Note: *args and **kwargs allow for defining custom parameters on your flow
-    function, which can be passed during execution for flow-specific needs.
     Args:
         __fn: Function to decorate (when used without parentheses).
+        Config parameter:
+        config: Required FlowConfig class for document loading/saving. Enables
+                automatic loading from string paths and saving outputs.
         Tracing parameters:
         trace_level: When to trace ("always", "debug", "off").
                     - "always": Always trace (default)
@@ -557,6 +533,9 @@ def pipeline_flow(
         trace_ignore_inputs: Parameter names to exclude from tracing.
         trace_input_formatter: Custom input formatter.
         trace_output_formatter: Custom output formatter.
+        trace_cost: Optional cost value to track in metadata. When provided and > 0,
+             sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
+             Also forces trace level to "always" if not already set.
         Prefect flow parameters:
         name: Flow name (defaults to function name).
@@ -584,10 +563,14 @@ def pipeline_flow(
         while enforcing document processing conventions.
     Example:
-        >>> from ai_pipeline_core import FlowOptions
+        >>> from ai_pipeline_core import FlowOptions, FlowConfig
         >>>
-        >>> # RECOMMENDED - No parameters needed!
-        >>> @pipeline_flow
+        >>> class MyFlowConfig(FlowConfig):
+        ...     INPUT_DOCUMENT_TYPES = [InputDoc]
+        ...     OUTPUT_DOCUMENT_TYPE = OutputDoc
+        >>>
+        >>> # Standard usage with config
+        >>> @pipeline_flow(config=MyFlowConfig)
         >>> async def analyze_documents(
         ...     project_name: str,
         ...     documents: DocumentList,
@@ -600,8 +583,8 @@ def pipeline_flow(
         ...         results.append(result)
         ...     return DocumentList(results)
         >>>
-        >>> # With parameters (only when necessary):
-        >>> @pipeline_flow(retries=2)  # Only for flows that need retry logic
+        >>> # With additional parameters:
+        >>> @pipeline_flow(config=MyFlowConfig, retries=2)
         >>> async def critical_flow(
         ...     project_name: str,
         ...     documents: DocumentList,
@@ -658,24 +641,27 @@ def pipeline_flow(
                 "'project_name, documents, flow_options' as its first three parameters"
             )
+        @wraps(fn)
         async def _wrapper(
             project_name: str,
-            documents: DocumentList,
+            documents: str | DocumentList,
             flow_options: FO_contra,
-            *args: Any,
-            **kwargs: Any,
         ) -> DocumentList:
-            result = await fn(project_name, documents, flow_options, *args, **kwargs)
+            save_path: str | None = None
+            if isinstance(documents, str):
+                save_path = documents
+                documents = await config.load_documents(documents)
+            result = await fn(project_name, documents, flow_options)
+            if save_path:
+                await config.save_documents(save_path, result)
+            if trace_cost is not None and trace_cost > 0:
+                set_trace_cost(trace_cost)
             if not isinstance(result, DocumentList):  # pyright: ignore[reportUnnecessaryIsInstance]
                 raise TypeError(
                     f"Flow '{fname}' must return DocumentList, got {type(result).__name__}"
                 )
             return result
-        # Preserve the original function name for Prefect
-        _wrapper.__name__ = fname
-        _wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
         traced = trace(
             level=trace_level,
             name=name or fname,
@@ -686,7 +672,21 @@ def pipeline_flow(
             output_formatter=trace_output_formatter,
         )(_wrapper)
-        return cast(
+        # --- Publish a schema where `documents` accepts str (path) OR DocumentList ---
+        _sig = inspect.signature(fn)
+        _params = [
+            p.replace(annotation=(str | DocumentList)) if p.name == "documents" else p
+            for p in _sig.parameters.values()
+        ]
+        if hasattr(traced, "__signature__"):
+            setattr(traced, "__signature__", _sig.replace(parameters=_params))
+        if hasattr(traced, "__annotations__"):
+            traced.__annotations__ = {
+                **getattr(traced, "__annotations__", {}),
+                "documents": str | DocumentList,
+            }
+        flow_obj = cast(
             _FlowLike[FO_contra],
             flow_decorator(
                 name=name or fname,
@@ -710,8 +710,11 @@ def pipeline_flow(
                 on_running=on_running,
             )(traced),
         )
+        # Attach config to the flow object for later access
+        flow_obj.config = config  # type: ignore[attr-defined]
+        return flow_obj
-    return _apply(__fn) if __fn else _apply
+    return _apply
 __all__ = ["pipeline_task", "pipeline_flow"]

ai_pipeline_core/prefect.py CHANGED Viewed

@@ -47,8 +47,17 @@ Note:
     integrated LMNR tracing and are the standard for this library.
 """
-from prefect import flow, task
+from prefect import deploy, flow, serve, task
 from prefect.logging import disable_run_logger
 from prefect.testing.utilities import prefect_test_harness
-__all__ = ["task", "flow", "disable_run_logger", "prefect_test_harness"]
+from prefect.types.entrypoint import EntrypointType
+__all__ = [
+    "task",
+    "flow",
+    "disable_run_logger",
+    "prefect_test_harness",
+    "serve",
+    "deploy",
+    "EntrypointType",
+]

ai_pipeline_core/prompt_manager.py CHANGED Viewed

@@ -10,7 +10,8 @@ directories.
 Search strategy:
     1. Local directory (same as calling module)
     2. Local 'prompts' subdirectory
-    3. Parent 'prompts' directories (up to package boundary)
+    3. Parent 'prompts' directories (search ascends parent packages up to the package
+       boundary or after 4 parent levels, whichever comes first)
 Key features:
     - Automatic template discovery
@@ -69,7 +70,8 @@ class PromptManager:
     Search hierarchy:
         1. Same directory as the calling module (for local templates)
         2. 'prompts' subdirectory in the calling module's directory
-        3. 'prompts' directories in parent packages (up to package boundary)
+        3. 'prompts' directories in parent packages (search ascends parent packages up to the
+           package boundary or after 4 parent levels, whichever comes first)
     Attributes:
         search_paths: List of directories where templates are searched.
@@ -144,7 +146,8 @@ class PromptManager:
             2. /project/flows/prompts/ (if exists)
             3. /project/prompts/ (if /project has __init__.py)
-            Search stops when no __init__.py is found (package boundary).
+            Search ascends parent packages up to the package boundary or after 4 parent
+            levels, whichever comes first.
         Example:
             >>> # Correct usage
@@ -155,10 +158,6 @@ class PromptManager:
             >>>
             >>> # Common mistake (will raise PromptError)
             >>> pm = PromptManager(__name__)  # Wrong!
-        Note:
-            The search is limited to 4 parent levels to prevent
-            excessive filesystem traversal.
         """
         search_paths: list[Path] = []

ai_pipeline_core/settings.py CHANGED Viewed

@@ -12,6 +12,7 @@ Environment variables:
     PREFECT_API_URL: Prefect server endpoint for flow orchestration
     PREFECT_API_KEY: Prefect API authentication key
     LMNR_PROJECT_API_KEY: Laminar project key for observability
+    GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file
 Configuration precedence:
     1. Environment variables (highest priority)
@@ -39,6 +40,7 @@ Example:
     PREFECT_API_URL=http://localhost:4200/api
     PREFECT_API_KEY=pnu_abc123
     LMNR_PROJECT_API_KEY=lmnr_proj_xyz
+    GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
     APP_NAME=production-app
     DEBUG_MODE=false
@@ -90,12 +92,15 @@ class Settings(BaseSettings):
         prefect_api_key: Prefect API authentication key. Required only
                         when connecting to Prefect Cloud or secured server.
-        lmnr_project_api_key: Laminar (LMNR) project API key for tracing
-                              and observability. Optional but recommended
-                              for production monitoring.
+        lmnr_project_api_key: Laminar (LMNR) project API key for observability.
+                              Optional but recommended for production monitoring.
-        lmnr_debug: Debug mode flag for Laminar tracing. Set to "true" to
-                   enable debug-level traces. Empty string by default.
+        lmnr_debug: Debug mode flag for Laminar. Set to "true" to
+                   enable debug-level logging. Empty string by default.
+        gcs_service_account_file: Path to GCS service account JSON file.
+                                  Used for authenticating with Google Cloud Storage.
+                                  Optional - if not set, default credentials will be used.
     Configuration sources:
         - Environment variables (highest priority)
@@ -126,6 +131,9 @@ class Settings(BaseSettings):
     lmnr_project_api_key: str = ""
     lmnr_debug: str = ""
+    # Storage Configuration
+    gcs_service_account_file: str = ""  # Path to GCS service account JSON file
 # Legacy: Module-level instance for backwards compatibility
 # Applications should create their own settings instance

ai_pipeline_core/simple_runner/__init__.py CHANGED Viewed

@@ -4,21 +4,11 @@ Utilities for running AI pipelines locally without full Prefect orchestration.
 """
 from .cli import run_cli
-from .simple_runner import (
-    ConfigSequence,
-    FlowSequence,
-    load_documents_from_directory,
-    run_pipeline,
-    run_pipelines,
-    save_documents_to_directory,
-)
+from .simple_runner import FlowSequence, run_pipeline, run_pipelines
 __all__ = [
     "run_cli",
     "run_pipeline",
     "run_pipelines",
-    "load_documents_from_directory",
-    "save_documents_to_directory",
     "FlowSequence",
-    "ConfigSequence",
 ]

ai_pipeline_core/simple_runner/cli.py CHANGED Viewed

@@ -19,7 +19,7 @@ from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
 from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
 from ai_pipeline_core.settings import settings
-from .simple_runner import ConfigSequence, FlowSequence, run_pipelines, save_documents_to_directory
+from .simple_runner import FlowSequence, run_pipelines
 logger = get_pipeline_logger(__name__)
@@ -87,7 +87,6 @@ def _running_under_pytest() -> bool:
 def run_cli(
     *,
     flows: FlowSequence,
-    flow_configs: ConfigSequence,
     options_cls: Type[TOptions],
     initializer: InitializerFunc = None,
     trace_name: str | None = None,
@@ -105,17 +104,13 @@ def run_cli(
     Example:
         >>> # In __main__.py
-        >>> from ai_pipeline_core.simple_runner import run_cli
+        >>> from ai_pipeline_core import simple_runner
         >>> from .flows import AnalysisFlow, SummaryFlow
-        >>> from .config import AnalysisConfig, AnalysisOptions
+        >>> from .config import AnalysisOptions
         >>>
         >>> if __name__ == "__main__":
-        ...     run_cli(
+        ...     simple_runner.run_cli(
         ...         flows=[AnalysisFlow, SummaryFlow],
-        ...         flow_configs=[
-        ...             (AnalysisConfig, AnalysisOptions),
-        ...             (AnalysisConfig, AnalysisOptions)
-        ...         ],
         ...         options_cls=AnalysisOptions,
         ...         trace_name="document-analysis"
         ...     )
@@ -226,8 +221,15 @@ def run_cli(
         _, initial_documents = init_result  # Ignore project name from initializer
         # Save initial documents if starting from first step
-        if getattr(opts, "start", 1) == 1 and initial_documents:
-            save_documents_to_directory(wd, initial_documents)
+        if getattr(opts, "start", 1) == 1 and initial_documents and flows:
+            # Get config from the first flow
+            first_flow_config = getattr(flows[0], "config", None)
+            if first_flow_config:
+                asyncio.run(
+                    first_flow_config.save_documents(
+                        str(wd), initial_documents, validate_output_type=False
+                    )
+                )
     # Setup context stack with optional test harness and tracing
     with ExitStack() as stack:
@@ -247,7 +249,6 @@ def run_cli(
                 project_name=project_name,
                 output_dir=wd,
                 flows=flows,
-                flow_configs=flow_configs,
                 flow_options=opts,
                 start_step=getattr(opts, "start", 1),
                 end_step=getattr(opts, "end", None),

ai-pipeline-core 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

ai-pipeline-core 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl