PyPI - ai-pipeline-core - Versions diffs - 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

ai-pipeline-core 0.2.6py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

ai_pipeline_core/__init__.py +78 -125
ai_pipeline_core/deployment/__init__.py +34 -0
ai_pipeline_core/deployment/base.py +861 -0
ai_pipeline_core/deployment/contract.py +80 -0
ai_pipeline_core/deployment/deploy.py +561 -0
ai_pipeline_core/deployment/helpers.py +97 -0
ai_pipeline_core/deployment/progress.py +126 -0
ai_pipeline_core/deployment/remote.py +116 -0
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +12 -14
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +318 -1434
ai_pipeline_core/documents/mime_type.py +37 -82
ai_pipeline_core/documents/utils.py +4 -12
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +309 -0
ai_pipeline_core/images/_processing.py +151 -0
ai_pipeline_core/llm/__init__.py +6 -4
ai_pipeline_core/llm/ai_messages.py +130 -81
ai_pipeline_core/llm/client.py +327 -193
ai_pipeline_core/llm/model_options.py +14 -86
ai_pipeline_core/llm/model_response.py +60 -103
ai_pipeline_core/llm/model_types.py +16 -34
ai_pipeline_core/logging/__init__.py +2 -7
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -37
ai_pipeline_core/logging/logging_mixin.py +15 -41
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/observability/_debug/_config.py +95 -0
ai_pipeline_core/observability/_debug/_content.py +764 -0
ai_pipeline_core/observability/_debug/_processor.py +98 -0
ai_pipeline_core/observability/_debug/_summary.py +312 -0
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/observability/_debug/_writer.py +843 -0
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +16 -102
ai_pipeline_core/settings.py +26 -31
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
{ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
ai_pipeline_core/documents/document_list.py +0 -420
ai_pipeline_core/documents/flow_document.py +0 -112
ai_pipeline_core/documents/task_document.py +0 -117
ai_pipeline_core/documents/temporary_document.py +0 -74
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -483
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -718
ai_pipeline_core/prefect.py +0 -63
ai_pipeline_core/simple_runner/__init__.py +0 -14
ai_pipeline_core/simple_runner/cli.py +0 -254
ai_pipeline_core/simple_runner/simple_runner.py +0 -247
ai_pipeline_core/storage/__init__.py +0 -8
ai_pipeline_core/storage/storage.py +0 -628
ai_pipeline_core/utils/__init__.py +0 -8
ai_pipeline_core/utils/deploy.py +0 -373
ai_pipeline_core/utils/remote_deployment.py +0 -269
ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
{ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/pipeline/options.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Flow options base class for pipeline execution."""
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class FlowOptions(BaseSettings):
+    """Base configuration for pipeline flows.
+    Subclass to add flow-specific parameters. Uses pydantic-settings
+    for environment variable overrides. Immutable after creation.
+    """
+    model_config = SettingsConfigDict(frozen=True, extra="allow")
+__all__ = ["FlowOptions"]

ai_pipeline_core/prompt_manager.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Jinja2-based prompt template management system.
-@public
 This module provides the PromptManager class for loading and rendering
 Jinja2 templates used as prompts for language models. It implements a
 smart search strategy that looks for templates in both local and shared
@@ -13,27 +11,6 @@ Search strategy:
     3. Parent 'prompts' directories (search ascends parent packages up to the package
        boundary or after 4 parent levels, whichever comes first)
-Key features:
-    - Automatic template discovery
-    - Jinja2 template rendering with context
-    - Smart path resolution (.jinja2/.jinja extension handling)
-    - Clear error messages for missing templates
-    - Built-in global variables:
-        - current_date: Current date in format "03 January 2025" (string)
-Example:
-    >>> from ai_pipeline_core import PromptManager
-    >>>
-    >>> # Initialize at module level (not inside functions)
-    >>> pm = PromptManager(__file__)
-    >>>
-    >>> # Render a template
-    >>> prompt = pm.get(
-    ...     "analyze.jinja2",
-    ...     document=doc,
-    ...     instructions="Extract key points"
-    ... )
 Template organization:
     project/
     ├── my_module.py        # Can use local templates
@@ -42,9 +19,8 @@ Template organization:
         ├── summarize.jinja2
         └── extract.jinja2
-Note:
-    Templates should use .jinja2 or .jinja extension.
-    The extension can be omitted when calling get().
+Templates should use .jinja2 or .jinja extension.
+The extension can be omitted when calling get().
 """
 from datetime import datetime
@@ -63,8 +39,6 @@ logger = get_pipeline_logger(__name__)
 class PromptManager:
     """Manages Jinja2 prompt templates with smart path resolution.
-    @public
     PromptManager provides a convenient interface for loading and rendering
     Jinja2 templates used as prompts for LLMs. It automatically searches for
     templates in multiple locations, supporting both local (module-specific)
@@ -80,23 +54,6 @@ class PromptManager:
         search_paths: List of directories where templates are searched.
         env: Jinja2 Environment configured for prompt rendering.
-    Example:
-        >>> # BEST PRACTICE: Instantiate at module scope (top level), not inside functions
-        >>> # In flow/my_flow.py
-        >>> from ai_pipeline_core import PromptManager
-        >>> pm = PromptManager(__file__)  # Module-level initialization
-        >>>
-        >>> # WRONG - Don't instantiate inside handlers or hot paths:
-        >>> # async def process():
-        >>> #     pm = PromptManager(__file__)  # NO! Creates new instance each call
-        >>>
-        >>> # Uses flow/prompts/analyze.jinja2 if it exists,
-        >>> # otherwise searches parent directories
-        >>> prompt = pm.get("analyze", context=data)
-        >>>
-        >>> # Can also use templates in same directory as module
-        >>> prompt = pm.get("local_template.jinja2")
     Template format:
         Templates use standard Jinja2 syntax:
         ```jinja2
@@ -110,9 +67,8 @@ class PromptManager:
         Date: {{ current_date }}  # Current date in format "03 January 2025"
         ```
-    Note:
-        - Autoescape is disabled for prompts (raw text output)
-        - Whitespace control is enabled (trim_blocks, lstrip_blocks)
+    Autoescape is disabled for prompts (raw text output).
+    Whitespace control is enabled (trim_blocks, lstrip_blocks).
     Template Inheritance:
         Templates support standard Jinja2 inheritance. Templates are searched
@@ -127,8 +83,6 @@ class PromptManager:
     def __init__(self, current_file: str, prompts_dir: str = "prompts"):
         """Initialize PromptManager with smart template discovery.
-        @public
         Sets up the Jinja2 environment with a FileSystemLoader that searches
         multiple directories for templates. The search starts from the calling
         module's location and extends to parent package directories.
@@ -145,34 +99,19 @@ class PromptManager:
             PromptError: If current_file is not a valid file path (e.g.,
                         if __name__ was passed instead of __file__).
-        Note:
-            Search behavior - Given a module at /project/flows/my_flow.py:
-            1. /project/flows/ (local templates)
-            2. /project/flows/prompts/ (if exists)
+        Search behavior - Given a module at /project/tasks/my_task.py:
+            1. /project/tasks/ (local templates)
+            2. /project/tasks/prompts/ (if exists)
             3. /project/prompts/ (if /project has __init__.py)
-            Search ascends parent packages up to the package boundary or after 4 parent
-            levels, whichever comes first.
-        Example:
-            >>> # Correct usage
-            >>> pm = PromptManager(__file__)
-            >>>
-            >>> # Custom prompts directory name
-            >>> pm = PromptManager(__file__, prompts_dir="templates")
-            >>>
-            >>> # Common mistake (will raise PromptError)
-            >>> pm = PromptManager(__name__)  # Wrong!
+        Search ascends parent packages up to the package boundary or after 4 parent
+        levels, whichever comes first.
         """
         search_paths: list[Path] = []
         # Start from the directory containing the calling file
         current_path = Path(current_file).resolve()
         if not current_path.exists():
-            raise PromptError(
-                f"PromptManager expected __file__ (a valid file path), "
-                f"but got {current_file!r}. Did you pass __name__ instead?"
-            )
+            raise PromptError(f"PromptManager expected __file__ (a valid file path), but got {current_file!r}. Did you pass __name__ instead?")
         if current_path.is_file():
             current_path = current_path.parent
@@ -225,8 +164,6 @@ class PromptManager:
     def get(self, prompt_path: str, **kwargs: Any) -> str:
         """Load and render a Jinja2 template with the given context.
-        @public
         Searches for the template in all configured search paths and renders
         it with the provided context variables. Automatically tries adding
         .jinja2 or .jinja extensions if the file is not found.
@@ -249,31 +186,11 @@ class PromptManager:
                               rendering fails (e.g., missing variables,
                               syntax errors).
-        Note:
-            Template resolution - Given prompt_path="analyze":
+        Template resolution - Given prompt_path="analyze":
             1. Try "analyze" as-is
             2. Try "analyze.jinja2"
             3. Try "analyze.jinja"
-            The first matching file is used.
-        Example:
-            >>> pm = PromptManager(__file__)
-            >>>
-            >>> # Simple rendering
-            >>> prompt = pm.get("summarize", text="Long document...")
-            >>>
-            >>> # With complex context
-            >>> prompt = pm.get(
-            ...     "analyze",
-            ...     document=doc,
-            ...     max_length=500,
-            ...     style="technical",
-            ...     options={"include_metadata": True}
-            ... )
-            >>>
-            >>> # Nested template path
-            >>> prompt = pm.get("flows/extraction/extract_entities")
+        The first matching file is used.
         Template example:
             ```jinja2
@@ -286,9 +203,8 @@ class PromptManager:
             {% endif %}
             ```
-        Note:
-            All Jinja2 features are available: loops, conditionals,
-            filters, macros, inheritance, etc.
+        All Jinja2 features are available: loops, conditionals,
+        filters, macros, inheritance, etc.
         """
         try:
             template = self.env.get_template(prompt_path)
@@ -301,13 +217,11 @@ class PromptManager:
                     return template.render(**kwargs)
                 except jinja2.TemplateNotFound:
                     pass  # Fall through to the original error
-            raise PromptNotFoundError(
-                f"Prompt template '{prompt_path}' not found (searched in {self.search_paths})."
-            )
+            raise PromptNotFoundError(f"Prompt template '{prompt_path}' not found (searched in {self.search_paths}).") from None
         except jinja2.TemplateError as e:
             raise PromptRenderError(f"Template error in '{prompt_path}': {e}") from e
         except PromptNotFoundError:
             raise  # Re-raise our custom exception
-        except (KeyError, TypeError, AttributeError, IOError, ValueError) as e:
+        except (OSError, KeyError, TypeError, AttributeError, ValueError) as e:
             logger.error(f"Unexpected error rendering '{prompt_path}'", exc_info=True)
             raise PromptRenderError(f"Failed to render prompt '{prompt_path}': {e}") from e

ai_pipeline_core/settings.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Core configuration settings for pipeline operations.
-@public
 This module provides the Settings base class for configuration management.
 Applications should inherit from Settings to create their own ProjectSettings
 class with additional configuration fields.
@@ -12,43 +10,27 @@ Environment variables:
     PREFECT_API_URL: Prefect server endpoint for flow orchestration
     PREFECT_API_KEY: Prefect API authentication key
     LMNR_PROJECT_API_KEY: Laminar project key for observability
-    GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file
+    GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file (for Prefect deployment bundles)
 Configuration precedence:
     1. Environment variables (highest priority)
     2. .env file in current directory
     3. Default values (empty strings)
-Example:
-    >>> from ai_pipeline_core import Settings
-    >>>
-    >>> # Create your project's settings class
-    >>> class ProjectSettings(Settings):
-    ...     app_name: str = "my-app"
-    ...     debug_mode: bool = False
-    >>>
-    >>> # Create singleton instance
-    >>> settings = ProjectSettings()
-    >>>
-    >>> # Access configuration
-    >>> print(settings.openai_base_url)
-    >>> print(settings.app_name)
 .env file format:
     OPENAI_BASE_URL=http://localhost:4000
     OPENAI_API_KEY=sk-1234567890
     PREFECT_API_URL=http://localhost:4200/api
     PREFECT_API_KEY=pnu_abc123
     LMNR_PROJECT_API_KEY=lmnr_proj_xyz
-    GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
+    GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json  # For Prefect deployment
     APP_NAME=production-app
     DEBUG_MODE=false
-Note:
-    Settings are loaded once at initialization and frozen. There is no
-    built-in reload mechanism - the process must be restarted to pick up
-    changes to environment variables or .env file. This is by design to
-    ensure consistency during execution.
+Settings are loaded once at initialization and frozen. There is no
+built-in reload mechanism - the process must be restarted to pick up
+changes to environment variables or .env file. This is by design to
+ensure consistency during execution.
 """
 from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -57,8 +39,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 class Settings(BaseSettings):
     """Base configuration class for AI Pipeline applications.
-    @public
     Settings is designed to be inherited by your application's configuration
     class. It provides core AI Pipeline settings and type-safe configuration
     management with automatic loading from environment variables and .env files.
@@ -99,7 +79,7 @@ class Settings(BaseSettings):
                    enable debug-level logging. Empty string by default.
         gcs_service_account_file: Path to GCS service account JSON file.
-                                  Used for authenticating with Google Cloud Storage.
+                                  Used for Prefect deployment bundles to GCS.
                                   Optional - if not set, default credentials will be used.
     Configuration sources:
@@ -107,9 +87,8 @@ class Settings(BaseSettings):
         - .env file in current directory
         - Default values in class definition
-    Note:
-        Empty strings are used as defaults to allow optional services.
-        Check for empty values before using service-specific settings.
+    Empty strings are used as defaults to allow optional services.
+    Check for empty values before using service-specific settings.
     """
     model_config = SettingsConfigDict(
@@ -135,8 +114,24 @@ class Settings(BaseSettings):
     lmnr_project_api_key: str = ""
     lmnr_debug: str = ""
-    # Storage Configuration
+    # GCS (for Prefect deployment bundles)
     gcs_service_account_file: str = ""  # Path to GCS service account JSON file
+    # ClickHouse tracking
+    clickhouse_host: str = ""
+    clickhouse_port: int = 8443
+    clickhouse_database: str = "default"
+    clickhouse_user: str = "default"
+    clickhouse_password: str = ""
+    clickhouse_secure: bool = True
+    # Tracking behavior
+    tracking_enabled: bool = True
+    tracking_summary_model: str = "gemini-3-flash"
+    # Document summary generation (store-level)
+    doc_summary_enabled: bool = True
+    doc_summary_model: str = "gemini-3-flash"
 settings = Settings()

ai_pipeline_core/testing.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Test utilities for pipeline development.
+Re-exports Prefect testing helpers used in pipeline test suites.
+"""
+from prefect.logging import disable_run_logger
+from prefect.testing.utilities import prefect_test_harness
+__all__ = ["disable_run_logger", "prefect_test_harness"]

ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

ai-pipeline-core 0.2.6py3-none-any.whl → 0.4.1py3-none-any.whl