PyPI - ai-pipeline-core - Versions diffs - 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

ai_pipeline_core/__init__.py +21 -13
ai_pipeline_core/documents/document.py +202 -51
ai_pipeline_core/documents/document_list.py +148 -24
ai_pipeline_core/documents/flow_document.py +2 -6
ai_pipeline_core/documents/task_document.py +0 -4
ai_pipeline_core/documents/temporary_document.py +1 -8
ai_pipeline_core/flow/config.py +174 -5
ai_pipeline_core/llm/__init__.py +1 -6
ai_pipeline_core/llm/ai_messages.py +137 -4
ai_pipeline_core/llm/client.py +118 -65
ai_pipeline_core/llm/model_options.py +6 -7
ai_pipeline_core/llm/model_response.py +17 -16
ai_pipeline_core/llm/model_types.py +3 -7
ai_pipeline_core/logging/__init__.py +0 -2
ai_pipeline_core/logging/logging_config.py +0 -6
ai_pipeline_core/logging/logging_mixin.py +2 -10
ai_pipeline_core/pipeline.py +54 -68
ai_pipeline_core/prefect.py +12 -3
ai_pipeline_core/prompt_manager.py +14 -7
ai_pipeline_core/settings.py +13 -5
ai_pipeline_core/simple_runner/__init__.py +1 -11
ai_pipeline_core/simple_runner/cli.py +13 -12
ai_pipeline_core/simple_runner/simple_runner.py +34 -189
ai_pipeline_core/storage/__init__.py +8 -0
ai_pipeline_core/storage/storage.py +628 -0
ai_pipeline_core/tracing.py +234 -30
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/METADATA +35 -20
ai_pipeline_core-0.2.1.dist-info/RECORD +38 -0
ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/logging/__init__.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Logging infrastructure for AI Pipeline Core.
-@public
 Provides a Prefect-integrated logging facade for unified logging across pipelines.
 Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.

ai_pipeline_core/logging/logging_config.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Centralized logging configuration for AI Pipeline Core.
-@public
 Provides logging configuration management that integrates with Prefect's logging system.
 """
@@ -26,8 +24,6 @@ DEFAULT_LOG_LEVELS = {
 class LoggingConfig:
     """Manages logging configuration for the pipeline.
-    @public
     Provides centralized logging configuration with Prefect integration.
     Configuration precedence:
@@ -144,8 +140,6 @@ _logging_config: Optional[LoggingConfig] = None
 def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
     """Setup logging for the AI Pipeline Core library.
-    @public
     Initializes logging configuration for the pipeline system.
     IMPORTANT: Call setup_logging exactly once in your application entry point

ai_pipeline_core/logging/logging_mixin.py CHANGED Viewed

@@ -1,7 +1,4 @@
-"""Logging mixin for consistent logging across components using Prefect logging.
-@public
-"""
+"""Logging mixin for consistent logging across components using Prefect logging."""
 import contextlib
 import time
@@ -17,8 +14,6 @@ from prefect.logging import get_logger
 class LoggerMixin:
     """Mixin class that provides consistent logging functionality using Prefect's logging system.
-    @public
     Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
     The mixin's internal behavior routes to the appropriate backend; you should not call
     logging.getLogger directly.
@@ -94,10 +89,7 @@ class LoggerMixin:
 class StructuredLoggerMixin(LoggerMixin):
-    """Extended mixin for structured logging with Prefect.
-    @public
-    """
+    """Extended mixin for structured logging with Prefect."""
     def log_event(self, event: str, **kwargs: Any) -> None:
         """Log a structured event.

ai_pipeline_core/pipeline.py CHANGED Viewed

@@ -36,6 +36,7 @@ from prefect.utilities.annotations import NotSet
 from typing_extensions import TypeAlias
 from ai_pipeline_core.documents import DocumentList
+from ai_pipeline_core.flow.config import FlowConfig
 from ai_pipeline_core.flow.options import FlowOptions
 from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
@@ -100,7 +101,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
         project_name: Name of the project/pipeline.
         documents: Input DocumentList to process.
         flow_options: Configuration options (FlowOptions or subclass).
-        *args, **kwargs: Additional flow-specific parameters.
     Returns:
         DocumentList: Processed documents.
@@ -114,8 +114,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
         project_name: str,
         documents: DocumentList,
         flow_options: FO_contra,
-        *args: Any,
-        **kwargs: Any,
     ) -> Coroutine[Any, Any, DocumentList]: ...
@@ -146,8 +144,6 @@ class _FlowLike(Protocol[FO_contra]):
         project_name: str,
         documents: DocumentList,
         flow_options: FO_contra,
-        *args: Any,
-        **kwargs: Any,
     ) -> Coroutine[Any, Any, DocumentList]: ...
     name: str | None
@@ -226,6 +222,7 @@ def pipeline_task(
     trace_input_formatter: Callable[..., str] | None = None,
     trace_output_formatter: Callable[..., str] | None = None,
     trace_cost: float | None = None,
+    trace_trim_documents: bool = True,
     # prefect passthrough
     name: str | None = None,
     description: str | None = None,
@@ -266,6 +263,7 @@ def pipeline_task(
     trace_input_formatter: Callable[..., str] | None = None,
     trace_output_formatter: Callable[..., str] | None = None,
     trace_cost: float | None = None,
+    trace_trim_documents: bool = True,
     # prefect passthrough
     name: str | None = None,
     description: str | None = None,
@@ -322,6 +320,8 @@ def pipeline_task(
         trace_cost: Optional cost value to track in metadata. When provided and > 0,
              sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
              Also forces trace level to "always" if not already set.
+        trace_trim_documents: Trim document content in traces to first 100 chars (default True).
+                             Reduces trace size with large documents.
         Prefect task parameters:
         name: Task name (defaults to function name).
@@ -420,10 +420,6 @@ def pipeline_task(
                 set_trace_cost(trace_cost)
             return result
-        # Preserve the original function name for Prefect
-        _wrapper.__name__ = fname
-        _wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
         traced_fn = trace(
             level=trace_level,
             name=name or fname,
@@ -432,6 +428,7 @@ def pipeline_task(
             ignore_inputs=trace_ignore_inputs,
             input_formatter=trace_input_formatter,
             output_formatter=trace_output_formatter,
+            trim_documents=trace_trim_documents,
         )(_wrapper)
         return cast(
@@ -470,11 +467,10 @@ def pipeline_task(
 # --------------------------------------------------------------------------- #
 # @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
 # --------------------------------------------------------------------------- #
-@overload
-def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
-@overload
 def pipeline_flow(
     *,
+    # config
+    config: type[FlowConfig],
     # tracing
     trace_level: TraceLevel = "always",
     trace_ignore_input: bool = False,
@@ -483,6 +479,7 @@ def pipeline_flow(
     trace_input_formatter: Callable[..., str] | None = None,
     trace_output_formatter: Callable[..., str] | None = None,
     trace_cost: float | None = None,
+    trace_trim_documents: bool = True,
     # prefect passthrough
     name: str | None = None,
     version: str | None = None,
@@ -503,42 +500,7 @@ def pipeline_flow(
     on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
     on_crashed: list[FlowStateHook[Any, Any]] | None = None,
     on_running: list[FlowStateHook[Any, Any]] | None = None,
-) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]: ...
-def pipeline_flow(
-    __fn: _DocumentsFlowCallable[FO_contra] | None = None,
-    /,
-    *,
-    # tracing
-    trace_level: TraceLevel = "always",
-    trace_ignore_input: bool = False,
-    trace_ignore_output: bool = False,
-    trace_ignore_inputs: list[str] | None = None,
-    trace_input_formatter: Callable[..., str] | None = None,
-    trace_output_formatter: Callable[..., str] | None = None,
-    trace_cost: float | None = None,
-    # prefect passthrough
-    name: str | None = None,
-    version: str | None = None,
-    flow_run_name: Union[Callable[[], str], str] | None = None,
-    retries: int | None = None,
-    retry_delay_seconds: int | float | None = None,
-    task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
-    description: str | None = None,
-    timeout_seconds: int | float | None = None,
-    validate_parameters: bool = True,
-    persist_result: bool | None = None,
-    result_storage: ResultStorage | str | None = None,
-    result_serializer: ResultSerializer | str | None = None,
-    cache_result_in_memory: bool = True,
-    log_prints: bool | None = None,
-    on_completion: list[FlowStateHook[Any, Any]] | None = None,
-    on_failure: list[FlowStateHook[Any, Any]] | None = None,
-    on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
-    on_crashed: list[FlowStateHook[Any, Any]] | None = None,
-    on_running: list[FlowStateHook[Any, Any]] | None = None,
-) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
+) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
     """Decorate an async flow for document processing.
     @public
@@ -558,16 +520,15 @@ def pipeline_flow(
             project_name: str,         # Project/pipeline identifier
             documents: DocumentList,   # Input documents to process
             flow_options: FlowOptions, # Configuration (or subclass)
-            *args,                     # Additional positional args for custom parameters
-            **kwargs                   # Additional keyword args for custom parameters
         ) -> DocumentList             # Must return DocumentList
-    Note: *args and **kwargs allow for defining custom parameters on your flow
-    function, which can be passed during execution for flow-specific needs.
     Args:
         __fn: Function to decorate (when used without parentheses).
+        Config parameter:
+        config: Required FlowConfig class for document loading/saving. Enables
+                automatic loading from string paths and saving outputs.
         Tracing parameters:
         trace_level: When to trace ("always", "debug", "off").
                     - "always": Always trace (default)
@@ -581,6 +542,8 @@ def pipeline_flow(
         trace_cost: Optional cost value to track in metadata. When provided and > 0,
              sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
              Also forces trace level to "always" if not already set.
+        trace_trim_documents: Trim document content in traces to first 100 chars (default True).
+                             Reduces trace size with large documents.
         Prefect flow parameters:
         name: Flow name (defaults to function name).
@@ -608,10 +571,14 @@ def pipeline_flow(
         while enforcing document processing conventions.
     Example:
-        >>> from ai_pipeline_core import FlowOptions
+        >>> from ai_pipeline_core import FlowOptions, FlowConfig
         >>>
-        >>> # RECOMMENDED - No parameters needed!
-        >>> @pipeline_flow
+        >>> class MyFlowConfig(FlowConfig):
+        ...     INPUT_DOCUMENT_TYPES = [InputDoc]
+        ...     OUTPUT_DOCUMENT_TYPE = OutputDoc
+        >>>
+        >>> # Standard usage with config
+        >>> @pipeline_flow(config=MyFlowConfig)
         >>> async def analyze_documents(
         ...     project_name: str,
         ...     documents: DocumentList,
@@ -624,8 +591,8 @@ def pipeline_flow(
         ...         results.append(result)
         ...     return DocumentList(results)
         >>>
-        >>> # With parameters (only when necessary):
-        >>> @pipeline_flow(retries=2)  # Only for flows that need retry logic
+        >>> # With additional parameters:
+        >>> @pipeline_flow(config=MyFlowConfig, retries=2)
         >>> async def critical_flow(
         ...     project_name: str,
         ...     documents: DocumentList,
@@ -682,14 +649,19 @@ def pipeline_flow(
                 "'project_name, documents, flow_options' as its first three parameters"
             )
+        @wraps(fn)
         async def _wrapper(
             project_name: str,
-            documents: DocumentList,
+            documents: str | DocumentList,
             flow_options: FO_contra,
-            *args: Any,
-            **kwargs: Any,
         ) -> DocumentList:
-            result = await fn(project_name, documents, flow_options, *args, **kwargs)
+            save_path: str | None = None
+            if isinstance(documents, str):
+                save_path = documents
+                documents = await config.load_documents(documents)
+            result = await fn(project_name, documents, flow_options)
+            if save_path:
+                await config.save_documents(save_path, result)
             if trace_cost is not None and trace_cost > 0:
                 set_trace_cost(trace_cost)
             if not isinstance(result, DocumentList):  # pyright: ignore[reportUnnecessaryIsInstance]
@@ -698,10 +670,6 @@ def pipeline_flow(
                 )
             return result
-        # Preserve the original function name for Prefect
-        _wrapper.__name__ = fname
-        _wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
         traced = trace(
             level=trace_level,
             name=name or fname,
@@ -710,9 +678,24 @@ def pipeline_flow(
             ignore_inputs=trace_ignore_inputs,
             input_formatter=trace_input_formatter,
             output_formatter=trace_output_formatter,
+            trim_documents=trace_trim_documents,
         )(_wrapper)
-        return cast(
+        # --- Publish a schema where `documents` accepts str (path) OR DocumentList ---
+        _sig = inspect.signature(fn)
+        _params = [
+            p.replace(annotation=(str | DocumentList)) if p.name == "documents" else p
+            for p in _sig.parameters.values()
+        ]
+        if hasattr(traced, "__signature__"):
+            setattr(traced, "__signature__", _sig.replace(parameters=_params))
+        if hasattr(traced, "__annotations__"):
+            traced.__annotations__ = {
+                **getattr(traced, "__annotations__", {}),
+                "documents": str | DocumentList,
+            }
+        flow_obj = cast(
             _FlowLike[FO_contra],
             flow_decorator(
                 name=name or fname,
@@ -736,8 +719,11 @@ def pipeline_flow(
                 on_running=on_running,
             )(traced),
         )
+        # Attach config to the flow object for later access
+        flow_obj.config = config  # type: ignore[attr-defined]
+        return flow_obj
-    return _apply(__fn) if __fn else _apply
+    return _apply
 __all__ = ["pipeline_task", "pipeline_flow"]

ai_pipeline_core/prefect.py CHANGED Viewed

@@ -47,8 +47,17 @@ Note:
     integrated LMNR tracing and are the standard for this library.
 """
-from prefect import flow, task
+from prefect import deploy, flow, serve, task
 from prefect.logging import disable_run_logger
 from prefect.testing.utilities import prefect_test_harness
-__all__ = ["task", "flow", "disable_run_logger", "prefect_test_harness"]
+from prefect.types.entrypoint import EntrypointType
+__all__ = [
+    "task",
+    "flow",
+    "disable_run_logger",
+    "prefect_test_harness",
+    "serve",
+    "deploy",
+    "EntrypointType",
+]

ai_pipeline_core/prompt_manager.py CHANGED Viewed

@@ -10,13 +10,16 @@ directories.
 Search strategy:
     1. Local directory (same as calling module)
     2. Local 'prompts' subdirectory
-    3. Parent 'prompts' directories (up to package boundary)
+    3. Parent 'prompts' directories (search ascends parent packages up to the package
+       boundary or after 4 parent levels, whichever comes first)
 Key features:
     - Automatic template discovery
     - Jinja2 template rendering with context
     - Smart path resolution (.jinja2/.jinja extension handling)
     - Clear error messages for missing templates
+    - Built-in global variables:
+        - current_date: Current date in format "03 January 2025" (string)
 Example:
     >>> from ai_pipeline_core import PromptManager
@@ -44,6 +47,7 @@ Note:
     The extension can be omitted when calling get().
 """
+from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -69,7 +73,8 @@ class PromptManager:
     Search hierarchy:
         1. Same directory as the calling module (for local templates)
         2. 'prompts' subdirectory in the calling module's directory
-        3. 'prompts' directories in parent packages (up to package boundary)
+        3. 'prompts' directories in parent packages (search ascends parent packages up to the
+           package boundary or after 4 parent levels, whichever comes first)
     Attributes:
         search_paths: List of directories where templates are searched.
@@ -101,6 +106,8 @@ class PromptManager:
         {% if instructions %}
         Instructions: {{ instructions }}
         {% endif %}
+        Date: {{ current_date }}  # Current date in format "03 January 2025"
         ```
     Note:
@@ -144,7 +151,8 @@ class PromptManager:
             2. /project/flows/prompts/ (if exists)
             3. /project/prompts/ (if /project has __init__.py)
-            Search stops when no __init__.py is found (package boundary).
+            Search ascends parent packages up to the package boundary or after 4 parent
+            levels, whichever comes first.
         Example:
             >>> # Correct usage
@@ -155,10 +163,6 @@ class PromptManager:
             >>>
             >>> # Common mistake (will raise PromptError)
             >>> pm = PromptManager(__name__)  # Wrong!
-        Note:
-            The search is limited to 4 parent levels to prevent
-            excessive filesystem traversal.
         """
         search_paths: list[Path] = []
@@ -215,6 +219,9 @@ class PromptManager:
             autoescape=False,  # Important for prompt engineering
         )
+        # Add current_date as a global string (format: "03 January 2025")
+        self.env.globals["current_date"] = datetime.now().strftime("%d %B %Y")  # type: ignore[assignment]
     def get(self, prompt_path: str, **kwargs: Any) -> str:
         """Load and render a Jinja2 template with the given context.

ai_pipeline_core/settings.py CHANGED Viewed

@@ -12,6 +12,7 @@ Environment variables:
     PREFECT_API_URL: Prefect server endpoint for flow orchestration
     PREFECT_API_KEY: Prefect API authentication key
     LMNR_PROJECT_API_KEY: Laminar project key for observability
+    GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file
 Configuration precedence:
     1. Environment variables (highest priority)
@@ -39,6 +40,7 @@ Example:
     PREFECT_API_URL=http://localhost:4200/api
     PREFECT_API_KEY=pnu_abc123
     LMNR_PROJECT_API_KEY=lmnr_proj_xyz
+    GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
     APP_NAME=production-app
     DEBUG_MODE=false
@@ -90,12 +92,15 @@ class Settings(BaseSettings):
         prefect_api_key: Prefect API authentication key. Required only
                         when connecting to Prefect Cloud or secured server.
-        lmnr_project_api_key: Laminar (LMNR) project API key for tracing
-                              and observability. Optional but recommended
-                              for production monitoring.
+        lmnr_project_api_key: Laminar (LMNR) project API key for observability.
+                              Optional but recommended for production monitoring.
-        lmnr_debug: Debug mode flag for Laminar tracing. Set to "true" to
-                   enable debug-level traces. Empty string by default.
+        lmnr_debug: Debug mode flag for Laminar. Set to "true" to
+                   enable debug-level logging. Empty string by default.
+        gcs_service_account_file: Path to GCS service account JSON file.
+                                  Used for authenticating with Google Cloud Storage.
+                                  Optional - if not set, default credentials will be used.
     Configuration sources:
         - Environment variables (highest priority)
@@ -126,6 +131,9 @@ class Settings(BaseSettings):
     lmnr_project_api_key: str = ""
     lmnr_debug: str = ""
+    # Storage Configuration
+    gcs_service_account_file: str = ""  # Path to GCS service account JSON file
 # Legacy: Module-level instance for backwards compatibility
 # Applications should create their own settings instance

ai_pipeline_core/simple_runner/__init__.py CHANGED Viewed

@@ -4,21 +4,11 @@ Utilities for running AI pipelines locally without full Prefect orchestration.
 """
 from .cli import run_cli
-from .simple_runner import (
-    ConfigSequence,
-    FlowSequence,
-    load_documents_from_directory,
-    run_pipeline,
-    run_pipelines,
-    save_documents_to_directory,
-)
+from .simple_runner import FlowSequence, run_pipeline, run_pipelines
 __all__ = [
     "run_cli",
     "run_pipeline",
     "run_pipelines",
-    "load_documents_from_directory",
-    "save_documents_to_directory",
     "FlowSequence",
-    "ConfigSequence",
 ]

ai_pipeline_core/simple_runner/cli.py CHANGED Viewed

@@ -19,7 +19,7 @@ from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
 from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
 from ai_pipeline_core.settings import settings
-from .simple_runner import ConfigSequence, FlowSequence, run_pipelines, save_documents_to_directory
+from .simple_runner import FlowSequence, run_pipelines
 logger = get_pipeline_logger(__name__)
@@ -87,7 +87,6 @@ def _running_under_pytest() -> bool:
 def run_cli(
     *,
     flows: FlowSequence,
-    flow_configs: ConfigSequence,
     options_cls: Type[TOptions],
     initializer: InitializerFunc = None,
     trace_name: str | None = None,
@@ -105,17 +104,13 @@ def run_cli(
     Example:
         >>> # In __main__.py
-        >>> from ai_pipeline_core.simple_runner import run_cli
+        >>> from ai_pipeline_core import simple_runner
         >>> from .flows import AnalysisFlow, SummaryFlow
-        >>> from .config import AnalysisConfig, AnalysisOptions
+        >>> from .config import AnalysisOptions
         >>>
         >>> if __name__ == "__main__":
-        ...     run_cli(
+        ...     simple_runner.run_cli(
         ...         flows=[AnalysisFlow, SummaryFlow],
-        ...         flow_configs=[
-        ...             (AnalysisConfig, AnalysisOptions),
-        ...             (AnalysisConfig, AnalysisOptions)
-        ...         ],
         ...         options_cls=AnalysisOptions,
         ...         trace_name="document-analysis"
         ...     )
@@ -226,8 +221,15 @@ def run_cli(
         _, initial_documents = init_result  # Ignore project name from initializer
         # Save initial documents if starting from first step
-        if getattr(opts, "start", 1) == 1 and initial_documents:
-            save_documents_to_directory(wd, initial_documents)
+        if getattr(opts, "start", 1) == 1 and initial_documents and flows:
+            # Get config from the first flow
+            first_flow_config = getattr(flows[0], "config", None)
+            if first_flow_config:
+                asyncio.run(
+                    first_flow_config.save_documents(
+                        str(wd), initial_documents, validate_output_type=False
+                    )
+                )
     # Setup context stack with optional test harness and tracing
     with ExitStack() as stack:
@@ -247,7 +249,6 @@ def run_cli(
                 project_name=project_name,
                 output_dir=wd,
                 flows=flows,
-                flow_configs=flow_configs,
                 flow_options=opts,
                 start_step=getattr(opts, "start", 1),
                 end_step=getattr(opts, "end", None),

ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl

ai-pipeline-core 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl