PyPI - ai-pipeline-core - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

ai-pipeline-core 0.1.8py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

ai_pipeline_core/__init__.py +86 -4
ai_pipeline_core/documents/__init__.py +11 -0
ai_pipeline_core/documents/document.py +1107 -131
ai_pipeline_core/documents/document_list.py +147 -38
ai_pipeline_core/documents/flow_document.py +112 -11
ai_pipeline_core/documents/mime_type.py +173 -15
ai_pipeline_core/documents/task_document.py +117 -12
ai_pipeline_core/documents/temporary_document.py +95 -0
ai_pipeline_core/documents/utils.py +41 -9
ai_pipeline_core/exceptions.py +47 -11
ai_pipeline_core/flow/__init__.py +2 -0
ai_pipeline_core/flow/config.py +250 -23
ai_pipeline_core/flow/options.py +50 -1
ai_pipeline_core/llm/__init__.py +6 -0
ai_pipeline_core/llm/ai_messages.py +125 -27
ai_pipeline_core/llm/client.py +278 -26
ai_pipeline_core/llm/model_options.py +130 -1
ai_pipeline_core/llm/model_response.py +239 -35
ai_pipeline_core/llm/model_types.py +67 -0
ai_pipeline_core/logging/__init__.py +13 -0
ai_pipeline_core/logging/logging_config.py +72 -20
ai_pipeline_core/logging/logging_mixin.py +38 -32
ai_pipeline_core/pipeline.py +308 -60
ai_pipeline_core/prefect.py +48 -1
ai_pipeline_core/prompt_manager.py +215 -24
ai_pipeline_core/settings.py +108 -4
ai_pipeline_core/simple_runner/__init__.py +5 -0
ai_pipeline_core/simple_runner/cli.py +145 -17
ai_pipeline_core/simple_runner/simple_runner.py +244 -6
ai_pipeline_core/tracing.py +232 -30
ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
ai_pipeline_core-0.1.8.dist-info/METADATA +0 -558
ai_pipeline_core-0.1.8.dist-info/RECORD +0 -35
{ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/model_response.py CHANGED Viewed

@@ -1,3 +1,11 @@
+"""Model response structures for LLM interactions.
+@public
+Provides enhanced response classes that wrap OpenAI API responses
+with additional metadata, cost tracking, and structured output support.
+"""
 import copy
 from typing import Any, Generic, TypeVar
@@ -5,16 +13,71 @@ from openai.types.chat import ChatCompletion, ParsedChatCompletion
 from pydantic import BaseModel, Field
 T = TypeVar("T", bound=BaseModel)
+"""Type parameter for structured response Pydantic models."""
 class ModelResponse(ChatCompletion):
-    """Response from an LLM without structured output."""
+    """Response wrapper for LLM text generation.
+    @public
+    Primary usage is adding to AIMessages for multi-turn conversations:
+        >>> response = await llm.generate(messages=messages)
+        >>> messages.add(response)  # Add assistant response to conversation
+        >>> print(response.content)  # Access generated text
+    The two main interactions with ModelResponse:
+    1. Adding to AIMessages for conversation flow
+    2. Accessing .content property for the generated text
+    Almost all use cases are covered by these two patterns. Advanced features
+    like token usage and cost tracking are available but rarely needed.
+    Example:
+        >>> from ai_pipeline_core.llm import AIMessages, generate
+        >>>
+        >>> messages = AIMessages("Explain quantum computing")
+        >>> response = await generate(messages=messages)
+        >>>
+        >>> # Primary usage: add to conversation
+        >>> messages.add(response)
+        >>>
+        >>> # Access generated text
+        >>> print(response.content)
+    Note:
+        Inherits from OpenAI's ChatCompletion for compatibility.
+        Other properties (usage, model, id) should only be accessed
+        when absolutely necessary.
+    """
     headers: dict[str, str] = Field(default_factory=dict)
     model_options: dict[str, Any] = Field(default_factory=dict)
     def __init__(self, chat_completion: ChatCompletion | None = None, **kwargs: Any) -> None:
-        """Initialize ModelResponse from a ChatCompletion."""
+        """Initialize ModelResponse from ChatCompletion or kwargs.
+        Can be initialized from an existing ChatCompletion object or
+        directly from keyword arguments. Automatically initializes
+        headers dict if not provided.
+        Args:
+            chat_completion: Optional ChatCompletion to wrap.
+            **kwargs: Direct initialization parameters if no
+                     ChatCompletion provided.
+        Example:
+            >>> # From ChatCompletion
+            >>> response = ModelResponse(chat_completion_obj)
+            >>>
+            >>> # Direct initialization (mainly for testing)
+            >>> response = ModelResponse(
+            ...     id="test",
+            ...     model="gpt-5",
+            ...     choices=[...]
+            ... )
+        """
         if chat_completion:
             # Copy all attributes from the ChatCompletion instance
             data = chat_completion.model_dump()
@@ -28,21 +91,92 @@ class ModelResponse(ChatCompletion):
     @property
     def content(self) -> str:
-        """Get the text content of the response."""
+        """Get the generated text content.
+        @public
+        Primary property for accessing the LLM's response text.
+        This covers 99% of use cases with ModelResponse.
+        Returns:
+            Generated text from the model, or empty string if none.
+        Example:
+            >>> response = await generate(messages="Hello")
+            >>> text = response.content  # The generated response
+            >>>
+            >>> # Common pattern: add to messages then use content
+            >>> messages.add(response)
+            >>> if "error" in response.content.lower():
+            ...     # Handle error case
+        """
         return self.choices[0].message.content or ""
     def set_model_options(self, options: dict[str, Any]) -> None:
-        """Set the model options."""
+        """Store the model configuration used for generation.
+        Saves a deep copy of the options used for this generation,
+        excluding the messages for brevity.
+        Args:
+            options: Dictionary of model options from the API call.
+        Note:
+            Messages are removed to avoid storing large prompts.
+            Called internally by the generation functions.
+        """
         self.model_options = copy.deepcopy(options)
         if "messages" in self.model_options:
             del self.model_options["messages"]
     def set_headers(self, headers: dict[str, str]) -> None:
-        """Set the response headers."""
+        """Store HTTP response headers.
+        Saves response headers which contain LiteLLM metadata
+        including cost information and call IDs.
+        Args:
+            headers: Dictionary of HTTP headers from the response.
+        Headers of interest:
+            - x-litellm-response-cost: Generation cost
+            - x-litellm-call-id: Unique call identifier
+            - x-litellm-model-id: Actual model used
+        """
         self.headers = copy.deepcopy(headers)
     def get_laminar_metadata(self) -> dict[str, str | int | float]:
-        """Extract metadata for Laminar observability logging."""
+        """Extract metadata for LMNR (Laminar) observability.
+        Collects comprehensive metadata about the generation for
+        tracing and monitoring in the LMNR platform.
+        Returns:
+            Dictionary containing:
+            - LiteLLM headers (call ID, costs, etc.)
+            - Token usage statistics
+            - Model configuration
+            - Cost information
+            - Cached token counts
+            - Reasoning token counts (for O1 models)
+        Metadata structure:
+            - litellm.*: All LiteLLM-specific headers
+            - gen_ai.usage.*: Token usage statistics
+            - gen_ai.response.*: Response identifiers
+            - gen_ai.cost: Cost information
+            - model_options.*: Configuration used
+        Example:
+            >>> response = await llm.generate(...)
+            >>> metadata = response.get_laminar_metadata()
+            >>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
+            >>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
+        Note:
+            Used internally by the tracing system for observability.
+            Cost is extracted from headers or usage object.
+        """
         metadata: dict[str, str | int | float] = {}
         litellm_id = self.headers.get("x-litellm-call-id")
@@ -55,23 +189,19 @@ class ModelResponse(ChatCompletion):
                 metadata[f"litellm.{header_name}"] = value
         # Add base metadata
-        metadata.update(
-            {
-                "gen_ai.response.id": litellm_id or self.id,
-                "gen_ai.response.model": self.model,
-                "get_ai.system": "litellm",
-            }
-        )
+        metadata.update({
+            "gen_ai.response.id": litellm_id or self.id,
+            "gen_ai.response.model": self.model,
+            "get_ai.system": "litellm",
+        })
         # Add usage metadata if available
         if self.usage:
-            metadata.update(
-                {
-                    "gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
-                    "gen_ai.usage.completion_tokens": self.usage.completion_tokens,
-                    "gen_ai.usage.total_tokens": self.usage.total_tokens,
-                }
-            )
+            metadata.update({
+                "gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
+                "gen_ai.usage.completion_tokens": self.usage.completion_tokens,
+                "gen_ai.usage.total_tokens": self.usage.total_tokens,
+            })
             # Check for cost in usage object
             if hasattr(self.usage, "cost"):
@@ -90,13 +220,11 @@ class ModelResponse(ChatCompletion):
         # Add cost metadata if available
         if cost and cost > 0:
-            metadata.update(
-                {
-                    "gen_ai.usage.output_cost": cost,
-                    "gen_ai.usage.cost": cost,
-                    "get_ai.cost": cost,
-                }
-            )
+            metadata.update({
+                "gen_ai.usage.output_cost": cost,
+                "gen_ai.usage.cost": cost,
+                "get_ai.cost": cost,
+            })
         if self.model_options:
             for key, value in self.model_options.items():
@@ -106,7 +234,42 @@ class ModelResponse(ChatCompletion):
 class StructuredModelResponse(ModelResponse, Generic[T]):
-    """Response from an LLM with structured output of type T."""
+    """Response wrapper for structured/typed LLM output.
+    @public
+    Primary usage is adding to AIMessages and accessing .parsed property:
+        >>> class Analysis(BaseModel):
+        ...     sentiment: float
+        ...     summary: str
+        >>>
+        >>> response = await generate_structured(
+        ...     response_format=Analysis,
+        ...     messages="Analyze this text..."
+        ... )
+        >>>
+        >>> # Primary usage: access parsed model
+        >>> analysis = response.parsed
+        >>> print(f"Sentiment: {analysis.sentiment}")
+        >>>
+        >>> # Can add to messages for conversation
+        >>> messages.add(response)
+    The two main interactions:
+    1. Accessing .parsed property for the structured data
+    2. Adding to AIMessages for conversation continuity
+    These patterns cover virtually all use cases. Advanced features exist
+    but should only be used when absolutely necessary.
+    Type Parameter:
+        T: The Pydantic model type for the structured output.
+    Note:
+        Extends ModelResponse with type-safe parsed data access.
+        Other inherited properties should rarely be needed.
+    """
     def __init__(
         self,
@@ -114,12 +277,27 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
         parsed_value: T | None = None,
         **kwargs: Any,
     ) -> None:
-        """Initialize StructuredModelResponse with a parsed value.
+        """Initialize with ChatCompletion and parsed value.
+        Creates a structured response from a base completion and
+        optionally a pre-parsed value. Can extract parsed value
+        from ParsedChatCompletion automatically.
         Args:
-            chat_completion: The base chat completion
-            parsed_value: The parsed structured output
-            **kwargs: Additional arguments for ChatCompletion
+            chat_completion: Base chat completion response.
+            parsed_value: Pre-parsed Pydantic model instance.
+                         If None, attempts extraction from
+                         ParsedChatCompletion.
+            **kwargs: Additional ChatCompletion parameters.
+        Extraction behavior:
+            1. Use provided parsed_value if given
+            2. Extract from ParsedChatCompletion if available
+            3. Store as None (access will raise ValueError)
+        Note:
+            Usually created internally by generate_structured().
+            The parsed value is validated by Pydantic automatically.
         """
         super().__init__(chat_completion, **kwargs)
         self._parsed_value: T | None = parsed_value
@@ -133,13 +311,39 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
     @property
     def parsed(self) -> T:
-        """Get the parsed structured output.
+        """Get the parsed Pydantic model instance.
+        @public
+        Primary property for accessing structured output.
+        This is the main reason to use generate_structured().
         Returns:
-            The parsed value of type T.
+            Validated instance of the Pydantic model type T.
         Raises:
-            ValueError: If no parsed content is available.
+            ValueError: If no parsed content available (internal error).
+        Example:
+            >>> class UserInfo(BaseModel):
+            ...     name: str
+            ...     age: int
+            >>>
+            >>> response = await generate_structured(
+            ...     response_format=UserInfo,
+            ...     messages="Extract user info..."
+            ... )
+            >>>
+            >>> # Primary usage: get the parsed model
+            >>> user = response.parsed
+            >>> print(f"{user.name} is {user.age} years old")
+            >>>
+            >>> # Can also add to messages
+            >>> messages.add(response)
+        Note:
+            Type-safe with full IDE support. This property covers
+            99% of structured response use cases.
         """
         if self._parsed_value is not None:
             return self._parsed_value

ai_pipeline_core/llm/model_types.py CHANGED Viewed

@@ -1,3 +1,15 @@
+"""Model type definitions for LLM interactions.
+This module defines type aliases for model names used throughout
+the AI Pipeline Core system. The ModelName type provides type safety
+and IDE support for supported model identifiers.
+Model categories:
+- Core models: High-capability general-purpose models
+- Small models: Efficient, cost-effective models
+- Search models: Models with web search capabilities
+"""
 from typing import Literal, TypeAlias
 ModelName: TypeAlias = Literal[
@@ -15,3 +27,58 @@ ModelName: TypeAlias = Literal[
     "gpt-4o-search",
     "grok-3-mini-search",
 ]
+"""Type-safe model name identifiers.
+@public
+Provides compile-time validation and IDE autocompletion for supported
+language model names. Used throughout the library to prevent typos
+and ensure only valid models are referenced.
+Note: These are example common model names as of Q3 2025. Actual availability
+depends on your LiteLLM proxy configuration and provider access.
+Model categories:
+    Core models (gemini-2.5-pro, gpt-5, grok-4):
+        High-capability models for complex tasks requiring deep reasoning,
+        nuanced understanding, or creative generation.
+    Small models (gemini-2.5-flash, gpt-5-mini, grok-3-mini):
+        Efficient models optimized for speed and cost, suitable for
+        simpler tasks or high-volume processing.
+    Search models (*-search suffix):
+        Models with integrated web search capabilities for retrieving
+        and synthesizing current information.
+Extending with custom models:
+    The generate functions accept any string, not just ModelName literals.
+    To add custom models for type safety:
+    1. Create a new type alias: CustomModel = Literal["my-model"]
+    2. Use Union: model: ModelName | CustomModel = "my-model"
+    3. Or simply use strings: model = "any-model-via-litellm"
+Example:
+    >>> from ai_pipeline_core import llm, ModelName
+    >>>
+    >>> # Type-safe model selection
+    >>> model: ModelName = "gpt-5"  # IDE autocomplete works
+    >>> response = await llm.generate(model, messages="Hello")
+    >>>
+    >>> # Also accepts string for custom models
+    >>> response = await llm.generate("custom-model-v2", messages="Hello")
+    >>>
+    >>> # Custom type safety
+    >>> from typing import Literal
+    >>> MyModel = Literal["company-llm-v1"]
+    >>> model: ModelName | MyModel = "company-llm-v1"
+Note:
+    While the type alias provides suggestions for common models,
+    the generate functions also accept string literals to support
+    custom or newer models accessed via LiteLLM proxy.
+See Also:
+    - llm.generate: Main generation function
+    - ModelOptions: Model configuration options
+"""

ai_pipeline_core/logging/__init__.py CHANGED Viewed

@@ -1,3 +1,16 @@
+"""Logging infrastructure for AI Pipeline Core.
+@public
+Provides a Prefect-integrated logging facade for unified logging across pipelines.
+Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.
+Example:
+    >>> from ai_pipeline_core import get_pipeline_logger
+    >>> logger = get_pipeline_logger(__name__)
+    >>> logger.info("Processing started")
+"""
 from .logging_config import LoggingConfig, get_pipeline_logger, setup_logging
 from .logging_mixin import LoggerMixin, StructuredLoggerMixin

ai_pipeline_core/logging/logging_config.py CHANGED Viewed

@@ -1,5 +1,11 @@
-"""Centralized logging configuration for AI Pipeline Core using Prefect logging"""
+"""Centralized logging configuration for AI Pipeline Core.
+@public
+Provides logging configuration management that integrates with Prefect's logging system.
+"""
+import logging.config
 import os
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -18,15 +24,39 @@ DEFAULT_LOG_LEVELS = {
 class LoggingConfig:
-    """Manages logging configuration for the pipeline using Prefect logging"""
+    """Manages logging configuration for the pipeline.
+    @public
+    Provides centralized logging configuration with Prefect integration.
+    Configuration precedence:
+        1. Explicit config_path parameter
+        2. AI_PIPELINE_LOGGING_CONFIG environment variable
+        3. PREFECT_LOGGING_SETTINGS_PATH environment variable
+        4. Default configuration
+    Example:
+        >>> config = LoggingConfig()
+        >>> config.apply()
+    """
     def __init__(self, config_path: Optional[Path] = None):
+        """Initialize logging configuration.
+        Args:
+            config_path: Optional path to YAML configuration file.
+        """
         self.config_path = config_path or self._get_default_config_path()
         self._config: Optional[Dict[str, Any]] = None
     @staticmethod
     def _get_default_config_path() -> Optional[Path]:
-        """Get default config path from environment or package"""
+        """Get default config path from environment variables.
+        Returns:
+            Path to the config file or None if not found.
+        """
         # Check environment variable first
         if env_path := os.environ.get("AI_PIPELINE_LOGGING_CONFIG"):
             return Path(env_path)
@@ -38,7 +68,11 @@ class LoggingConfig:
         return None
     def load_config(self) -> Dict[str, Any]:
-        """Load logging configuration from file"""
+        """Load logging configuration from file or defaults.
+        Returns:
+            Dictionary containing logging configuration.
+        """
         if self._config is None:
             if self.config_path and self.config_path.exists():
                 with open(self.config_path, "r") as f:
@@ -51,7 +85,11 @@ class LoggingConfig:
     @staticmethod
     def _get_default_config() -> Dict[str, Any]:
-        """Get default logging configuration compatible with Prefect"""
+        """Get default logging configuration.
+        Returns:
+            Default logging configuration dictionary.
+        """
         return {
             "version": 1,
             "disable_existing_loggers": False,
@@ -89,9 +127,7 @@ class LoggingConfig:
         }
     def apply(self):
-        """Apply the logging configuration"""
-        import logging.config
+        """Apply the logging configuration."""
         config = self.load_config()
         logging.config.dictConfig(config)
@@ -106,16 +142,29 @@ _logging_config: Optional[LoggingConfig] = None
 def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
-    """
-    Setup logging for the AI Pipeline Core library
+    """Setup logging for the AI Pipeline Core library.
+    @public
+    Initializes logging configuration for the pipeline system.
+    IMPORTANT: Call setup_logging exactly once in your application entry point
+    (for example, in main()). Do not call at import time or in library modules.
     Args:
-        config_path: Optional path to logging configuration file
-        level: Optional default log level (overrides config)
+        config_path: Optional path to YAML logging configuration file.
+        level: Optional log level override (INFO, DEBUG, WARNING, etc.).
     Example:
-        >>> from ai_pipeline_core.logging_config import setup_logging
-        >>> setup_logging(level="DEBUG")
+        >>> # In your main.py or application entry point:
+        >>> def main():
+        ...     setup_logging()  # Call once at startup
+        ...     # Your application code here
+        ...
+        >>> # Or with custom level:
+        >>> if __name__ == "__main__":
+        ...     setup_logging(level="DEBUG")
+        ...     run_application()
     """
     global _logging_config
@@ -134,18 +183,21 @@ def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = Non
 def get_pipeline_logger(name: str):
-    """
-    Get a logger for pipeline components using Prefect's get_logger
+    """Get a logger for pipeline components.
+    @public
+    Returns a Prefect-integrated logger with proper configuration.
     Args:
-        name: Logger name (e.g., "ai_pipeline_core.documents")
+        name: Logger name, typically __name__.
     Returns:
-        Logger instance
+        Prefect logger instance.
     Example:
-        >>> logger = get_pipeline_logger("ai_pipeline_core.llm")
-        >>> logger.info("Starting LLM processing")
+        >>> logger = get_pipeline_logger(__name__)
+        >>> logger.info("Module initialized")
     """
     # Ensure logging is setup
     if _logging_config is None:

ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl

ai-pipeline-core 0.1.8py3-none-any.whl → 0.1.11py3-none-any.whl