PyPI - ai-pipeline-core - Versions diffs - 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

ai_pipeline_core/__init__.py +83 -119
ai_pipeline_core/deployment/__init__.py +34 -0
ai_pipeline_core/deployment/base.py +861 -0
ai_pipeline_core/deployment/contract.py +80 -0
ai_pipeline_core/deployment/deploy.py +561 -0
ai_pipeline_core/deployment/helpers.py +97 -0
ai_pipeline_core/deployment/progress.py +126 -0
ai_pipeline_core/deployment/remote.py +116 -0
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +14 -15
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +349 -1062
ai_pipeline_core/documents/mime_type.py +40 -85
ai_pipeline_core/documents/utils.py +62 -7
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +309 -0
ai_pipeline_core/images/_processing.py +151 -0
ai_pipeline_core/llm/__init__.py +5 -3
ai_pipeline_core/llm/ai_messages.py +284 -73
ai_pipeline_core/llm/client.py +462 -209
ai_pipeline_core/llm/model_options.py +86 -53
ai_pipeline_core/llm/model_response.py +187 -241
ai_pipeline_core/llm/model_types.py +34 -54
ai_pipeline_core/logging/__init__.py +2 -9
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -43
ai_pipeline_core/logging/logging_mixin.py +17 -51
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/observability/_debug/_config.py +95 -0
ai_pipeline_core/observability/_debug/_content.py +764 -0
ai_pipeline_core/observability/_debug/_processor.py +98 -0
ai_pipeline_core/observability/_debug/_summary.py +312 -0
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/observability/_debug/_writer.py +843 -0
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/observability/tracing.py +640 -0
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +26 -105
ai_pipeline_core/settings.py +41 -32
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
ai_pipeline_core/documents/document_list.py +0 -240
ai_pipeline_core/documents/flow_document.py +0 -128
ai_pipeline_core/documents/task_document.py +0 -133
ai_pipeline_core/documents/temporary_document.py +0 -95
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -314
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -717
ai_pipeline_core/prefect.py +0 -54
ai_pipeline_core/simple_runner/__init__.py +0 -24
ai_pipeline_core/simple_runner/cli.py +0 -255
ai_pipeline_core/simple_runner/simple_runner.py +0 -385
ai_pipeline_core/tracing.py +0 -475
ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/pipeline/options.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Flow options base class for pipeline execution."""
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class FlowOptions(BaseSettings):
+    """Base configuration for pipeline flows.
+    Subclass to add flow-specific parameters. Uses pydantic-settings
+    for environment variable overrides. Immutable after creation.
+    """
+    model_config = SettingsConfigDict(frozen=True, extra="allow")
+__all__ = ["FlowOptions"]

ai_pipeline_core/prompt_manager.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Jinja2-based prompt template management system.
-@public
 This module provides the PromptManager class for loading and rendering
 Jinja2 templates used as prompts for language models. It implements a
 smart search strategy that looks for templates in both local and shared
@@ -10,26 +8,8 @@ directories.
 Search strategy:
     1. Local directory (same as calling module)
     2. Local 'prompts' subdirectory
-    3. Parent 'prompts' directories (up to package boundary)
-Key features:
-    - Automatic template discovery
-    - Jinja2 template rendering with context
-    - Smart path resolution (.jinja2/.jinja extension handling)
-    - Clear error messages for missing templates
-Example:
-    >>> from ai_pipeline_core import PromptManager
-    >>>
-    >>> # Initialize at module level (not inside functions)
-    >>> pm = PromptManager(__file__)
-    >>>
-    >>> # Render a template
-    >>> prompt = pm.get(
-    ...     "analyze.jinja2",
-    ...     document=doc,
-    ...     instructions="Extract key points"
-    ... )
+    3. Parent 'prompts' directories (search ascends parent packages up to the package
+       boundary or after 4 parent levels, whichever comes first)
 Template organization:
     project/
@@ -39,11 +19,11 @@ Template organization:
         ├── summarize.jinja2
         └── extract.jinja2
-Note:
-    Templates should use .jinja2 or .jinja extension.
-    The extension can be omitted when calling get().
+Templates should use .jinja2 or .jinja extension.
+The extension can be omitted when calling get().
 """
+from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -59,8 +39,6 @@ logger = get_pipeline_logger(__name__)
 class PromptManager:
     """Manages Jinja2 prompt templates with smart path resolution.
-    @public
     PromptManager provides a convenient interface for loading and rendering
     Jinja2 templates used as prompts for LLMs. It automatically searches for
     templates in multiple locations, supporting both local (module-specific)
@@ -69,29 +47,13 @@ class PromptManager:
     Search hierarchy:
         1. Same directory as the calling module (for local templates)
         2. 'prompts' subdirectory in the calling module's directory
-        3. 'prompts' directories in parent packages (up to package boundary)
+        3. 'prompts' directories in parent packages (search ascends parent packages up to the
+           package boundary or after 4 parent levels, whichever comes first)
     Attributes:
         search_paths: List of directories where templates are searched.
         env: Jinja2 Environment configured for prompt rendering.
-    Example:
-        >>> # BEST PRACTICE: Instantiate at module scope (top level), not inside functions
-        >>> # In flow/my_flow.py
-        >>> from ai_pipeline_core import PromptManager
-        >>> pm = PromptManager(__file__)  # Module-level initialization
-        >>>
-        >>> # WRONG - Don't instantiate inside handlers or hot paths:
-        >>> # async def process():
-        >>> #     pm = PromptManager(__file__)  # NO! Creates new instance each call
-        >>>
-        >>> # Uses flow/prompts/analyze.jinja2 if it exists,
-        >>> # otherwise searches parent directories
-        >>> prompt = pm.get("analyze", context=data)
-        >>>
-        >>> # Can also use templates in same directory as module
-        >>> prompt = pm.get("local_template.jinja2")
     Template format:
         Templates use standard Jinja2 syntax:
         ```jinja2
@@ -101,11 +63,12 @@ class PromptManager:
         {% if instructions %}
         Instructions: {{ instructions }}
         {% endif %}
+        Date: {{ current_date }}  # Current date in format "03 January 2025"
         ```
-    Note:
-        - Autoescape is disabled for prompts (raw text output)
-        - Whitespace control is enabled (trim_blocks, lstrip_blocks)
+    Autoescape is disabled for prompts (raw text output).
+    Whitespace control is enabled (trim_blocks, lstrip_blocks).
     Template Inheritance:
         Templates support standard Jinja2 inheritance. Templates are searched
@@ -120,8 +83,6 @@ class PromptManager:
     def __init__(self, current_file: str, prompts_dir: str = "prompts"):
         """Initialize PromptManager with smart template discovery.
-        @public
         Sets up the Jinja2 environment with a FileSystemLoader that searches
         multiple directories for templates. The search starts from the calling
         module's location and extends to parent package directories.
@@ -138,37 +99,19 @@ class PromptManager:
             PromptError: If current_file is not a valid file path (e.g.,
                         if __name__ was passed instead of __file__).
-        Note:
-            Search behavior - Given a module at /project/flows/my_flow.py:
-            1. /project/flows/ (local templates)
-            2. /project/flows/prompts/ (if exists)
+        Search behavior - Given a module at /project/tasks/my_task.py:
+            1. /project/tasks/ (local templates)
+            2. /project/tasks/prompts/ (if exists)
             3. /project/prompts/ (if /project has __init__.py)
-            Search stops when no __init__.py is found (package boundary).
-        Example:
-            >>> # Correct usage
-            >>> pm = PromptManager(__file__)
-            >>>
-            >>> # Custom prompts directory name
-            >>> pm = PromptManager(__file__, prompts_dir="templates")
-            >>>
-            >>> # Common mistake (will raise PromptError)
-            >>> pm = PromptManager(__name__)  # Wrong!
-        Note:
-            The search is limited to 4 parent levels to prevent
-            excessive filesystem traversal.
+        Search ascends parent packages up to the package boundary or after 4 parent
+        levels, whichever comes first.
         """
         search_paths: list[Path] = []
         # Start from the directory containing the calling file
         current_path = Path(current_file).resolve()
         if not current_path.exists():
-            raise PromptError(
-                f"PromptManager expected __file__ (a valid file path), "
-                f"but got {current_file!r}. Did you pass __name__ instead?"
-            )
+            raise PromptError(f"PromptManager expected __file__ (a valid file path), but got {current_file!r}. Did you pass __name__ instead?")
         if current_path.is_file():
             current_path = current_path.parent
@@ -215,11 +158,12 @@ class PromptManager:
             autoescape=False,  # Important for prompt engineering
         )
+        # Add current_date as a global string (format: "03 January 2025")
+        self.env.globals["current_date"] = datetime.now().strftime("%d %B %Y")  # type: ignore[assignment]
     def get(self, prompt_path: str, **kwargs: Any) -> str:
         """Load and render a Jinja2 template with the given context.
-        @public
         Searches for the template in all configured search paths and renders
         it with the provided context variables. Automatically tries adding
         .jinja2 or .jinja extensions if the file is not found.
@@ -242,31 +186,11 @@ class PromptManager:
                               rendering fails (e.g., missing variables,
                               syntax errors).
-        Note:
-            Template resolution - Given prompt_path="analyze":
+        Template resolution - Given prompt_path="analyze":
             1. Try "analyze" as-is
             2. Try "analyze.jinja2"
             3. Try "analyze.jinja"
-            The first matching file is used.
-        Example:
-            >>> pm = PromptManager(__file__)
-            >>>
-            >>> # Simple rendering
-            >>> prompt = pm.get("summarize", text="Long document...")
-            >>>
-            >>> # With complex context
-            >>> prompt = pm.get(
-            ...     "analyze",
-            ...     document=doc,
-            ...     max_length=500,
-            ...     style="technical",
-            ...     options={"include_metadata": True}
-            ... )
-            >>>
-            >>> # Nested template path
-            >>> prompt = pm.get("flows/extraction/extract_entities")
+        The first matching file is used.
         Template example:
             ```jinja2
@@ -279,9 +203,8 @@ class PromptManager:
             {% endif %}
             ```
-        Note:
-            All Jinja2 features are available: loops, conditionals,
-            filters, macros, inheritance, etc.
+        All Jinja2 features are available: loops, conditionals,
+        filters, macros, inheritance, etc.
         """
         try:
             template = self.env.get_template(prompt_path)
@@ -294,13 +217,11 @@ class PromptManager:
                     return template.render(**kwargs)
                 except jinja2.TemplateNotFound:
                     pass  # Fall through to the original error
-            raise PromptNotFoundError(
-                f"Prompt template '{prompt_path}' not found (searched in {self.search_paths})."
-            )
+            raise PromptNotFoundError(f"Prompt template '{prompt_path}' not found (searched in {self.search_paths}).") from None
         except jinja2.TemplateError as e:
             raise PromptRenderError(f"Template error in '{prompt_path}': {e}") from e
         except PromptNotFoundError:
             raise  # Re-raise our custom exception
-        except (KeyError, TypeError, AttributeError, IOError, ValueError) as e:
+        except (OSError, KeyError, TypeError, AttributeError, ValueError) as e:
             logger.error(f"Unexpected error rendering '{prompt_path}'", exc_info=True)
             raise PromptRenderError(f"Failed to render prompt '{prompt_path}': {e}") from e

ai_pipeline_core/settings.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Core configuration settings for pipeline operations.
-@public
 This module provides the Settings base class for configuration management.
 Applications should inherit from Settings to create their own ProjectSettings
 class with additional configuration fields.
@@ -12,41 +10,27 @@ Environment variables:
     PREFECT_API_URL: Prefect server endpoint for flow orchestration
     PREFECT_API_KEY: Prefect API authentication key
     LMNR_PROJECT_API_KEY: Laminar project key for observability
+    GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file (for Prefect deployment bundles)
 Configuration precedence:
     1. Environment variables (highest priority)
     2. .env file in current directory
     3. Default values (empty strings)
-Example:
-    >>> from ai_pipeline_core import Settings
-    >>>
-    >>> # Create your project's settings class
-    >>> class ProjectSettings(Settings):
-    ...     app_name: str = "my-app"
-    ...     debug_mode: bool = False
-    >>>
-    >>> # Create singleton instance
-    >>> settings = ProjectSettings()
-    >>>
-    >>> # Access configuration
-    >>> print(settings.openai_base_url)
-    >>> print(settings.app_name)
 .env file format:
     OPENAI_BASE_URL=http://localhost:4000
     OPENAI_API_KEY=sk-1234567890
     PREFECT_API_URL=http://localhost:4200/api
     PREFECT_API_KEY=pnu_abc123
     LMNR_PROJECT_API_KEY=lmnr_proj_xyz
+    GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json  # For Prefect deployment
     APP_NAME=production-app
     DEBUG_MODE=false
-Note:
-    Settings are loaded once at initialization and frozen. There is no
-    built-in reload mechanism - the process must be restarted to pick up
-    changes to environment variables or .env file. This is by design to
-    ensure consistency during execution.
+Settings are loaded once at initialization and frozen. There is no
+built-in reload mechanism - the process must be restarted to pick up
+changes to environment variables or .env file. This is by design to
+ensure consistency during execution.
 """
 from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -55,8 +39,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 class Settings(BaseSettings):
     """Base configuration class for AI Pipeline applications.
-    @public
     Settings is designed to be inherited by your application's configuration
     class. It provides core AI Pipeline settings and type-safe configuration
     management with automatic loading from environment variables and .env files.
@@ -90,18 +72,23 @@ class Settings(BaseSettings):
         prefect_api_key: Prefect API authentication key. Required only
                         when connecting to Prefect Cloud or secured server.
-        lmnr_project_api_key: Laminar (LMNR) project API key for tracing
-                              and observability. Optional but recommended
-                              for production monitoring.
+        lmnr_project_api_key: Laminar (LMNR) project API key for observability.
+                              Optional but recommended for production monitoring.
+        lmnr_debug: Debug mode flag for Laminar. Set to "true" to
+                   enable debug-level logging. Empty string by default.
+        gcs_service_account_file: Path to GCS service account JSON file.
+                                  Used for Prefect deployment bundles to GCS.
+                                  Optional - if not set, default credentials will be used.
     Configuration sources:
         - Environment variables (highest priority)
         - .env file in current directory
         - Default values in class definition
-    Note:
-        Empty strings are used as defaults to allow optional services.
-        Check for empty values before using service-specific settings.
+    Empty strings are used as defaults to allow optional services.
+    Check for empty values before using service-specific settings.
     """
     model_config = SettingsConfigDict(
@@ -118,11 +105,33 @@ class Settings(BaseSettings):
     # Prefect Configuration
     prefect_api_url: str = ""
     prefect_api_key: str = ""
+    prefect_api_auth_string: str = ""
+    prefect_work_pool_name: str = "default"
+    prefect_work_queue_name: str = "default"
+    prefect_gcs_bucket: str = ""
     # Observability
     lmnr_project_api_key: str = ""
+    lmnr_debug: str = ""
+    # GCS (for Prefect deployment bundles)
+    gcs_service_account_file: str = ""  # Path to GCS service account JSON file
+    # ClickHouse tracking
+    clickhouse_host: str = ""
+    clickhouse_port: int = 8443
+    clickhouse_database: str = "default"
+    clickhouse_user: str = "default"
+    clickhouse_password: str = ""
+    clickhouse_secure: bool = True
+    # Tracking behavior
+    tracking_enabled: bool = True
+    tracking_summary_model: str = "gemini-3-flash"
+    # Document summary generation (store-level)
+    doc_summary_enabled: bool = True
+    doc_summary_model: str = "gemini-3-flash"
-# Legacy: Module-level instance for backwards compatibility
-# Applications should create their own settings instance
 settings = Settings()

ai_pipeline_core/testing.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Test utilities for pipeline development.
+Re-exports Prefect testing helpers used in pipeline test suites.
+"""
+from prefect.logging import disable_run_logger
+from prefect.testing.utilities import prefect_test_harness
+__all__ = ["disable_run_logger", "prefect_test_harness"]

ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl