PyPI - ai-pipeline-core - Versions diffs - 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

ai-pipeline-core 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

ai_pipeline_core/__init__.py +25 -14
ai_pipeline_core/documents/__init__.py +2 -1
ai_pipeline_core/documents/document.py +317 -49
ai_pipeline_core/documents/document_list.py +136 -33
ai_pipeline_core/documents/flow_document.py +8 -29
ai_pipeline_core/documents/task_document.py +6 -27
ai_pipeline_core/documents/temporary_document.py +6 -27
ai_pipeline_core/documents/utils.py +64 -1
ai_pipeline_core/flow/config.py +174 -5
ai_pipeline_core/flow/options.py +2 -2
ai_pipeline_core/llm/__init__.py +6 -1
ai_pipeline_core/llm/ai_messages.py +14 -7
ai_pipeline_core/llm/client.py +143 -55
ai_pipeline_core/llm/model_options.py +20 -5
ai_pipeline_core/llm/model_response.py +77 -29
ai_pipeline_core/llm/model_types.py +38 -40
ai_pipeline_core/logging/__init__.py +0 -2
ai_pipeline_core/logging/logging_config.py +0 -6
ai_pipeline_core/logging/logging_mixin.py +2 -10
ai_pipeline_core/pipeline.py +68 -65
ai_pipeline_core/prefect.py +12 -3
ai_pipeline_core/prompt_manager.py +6 -7
ai_pipeline_core/settings.py +13 -5
ai_pipeline_core/simple_runner/__init__.py +1 -11
ai_pipeline_core/simple_runner/cli.py +13 -12
ai_pipeline_core/simple_runner/simple_runner.py +34 -172
ai_pipeline_core/storage/__init__.py +8 -0
ai_pipeline_core/storage/storage.py +628 -0
ai_pipeline_core/tracing.py +110 -26
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +60 -23
ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
ai_pipeline_core-0.1.13.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/model_options.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Configuration options for LLM generation.
-@public
 Provides the ModelOptions class for configuring model behavior,
 retry logic, and advanced features like web search and reasoning.
 """
@@ -14,8 +12,6 @@ from pydantic import BaseModel
 class ModelOptions(BaseModel):
     """Configuration options for LLM generation requests.
-    @public
     ModelOptions encapsulates all configuration parameters for model
     generation, including model behavior settings, retry logic, and
     advanced features. All fields are optional with sensible defaults.
@@ -49,6 +45,10 @@ class ModelOptions(BaseModel):
         timeout: Maximum seconds to wait for response (default: 300).
+        cache_ttl: Cache TTL for context messages (default: "120s").
+                   String format like "60s", "5m", or None to disable caching.
+                   Applied to the last context message for efficient token reuse.
         service_tier: API tier selection for performance/cost trade-offs.
                      "auto": Let API choose
                      "default": Standard tier
@@ -64,7 +64,8 @@ class ModelOptions(BaseModel):
         response_format: Pydantic model class for structured output.
                         Pass a Pydantic model; the client converts it to JSON Schema.
-                        Set automatically by generate_structured(). Provider support varies.
+                        Set automatically by generate_structured().
+                        Structured output support varies by provider and model.
     Example:
         >>> # Basic configuration
@@ -79,6 +80,18 @@ class ModelOptions(BaseModel):
         ...     temperature=0.3  # Lower for code generation
         ... )
         >>>
+        >>> # With custom cache TTL
+        >>> options = ModelOptions(
+        ...     cache_ttl="300s",  # Cache context for 5 minutes
+        ...     max_completion_tokens=1000
+        ... )
+        >>>
+        >>> # Disable caching
+        >>> options = ModelOptions(
+        ...     cache_ttl=None,  # No context caching
+        ...     temperature=0.5
+        ... )
+        >>>
         >>> # For search-enabled models
         >>> options = ModelOptions(
         ...     search_context_size="high",  # Get more search results
@@ -96,6 +109,7 @@ class ModelOptions(BaseModel):
         - search_context_size only works with search models
         - reasoning_effort only works with models that support explicit reasoning
         - response_format is set internally by generate_structured()
+        - cache_ttl accepts formats like "120s", "5m", "1h" or None to disable caching
     """
     temperature: float | None = None
@@ -105,6 +119,7 @@ class ModelOptions(BaseModel):
     retries: int = 3
     retry_delay_seconds: int = 10
     timeout: int = 300
+    cache_ttl: str | None = "120s"
     service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
     max_completion_tokens: int | None = None
     response_format: type[BaseModel] | None = None

ai_pipeline_core/llm/model_response.py CHANGED Viewed

@@ -2,7 +2,7 @@
 @public
-Provides enhanced response classes that wrap OpenAI API responses
+Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
 with additional metadata, cost tracking, and structured output support.
 """
@@ -23,8 +23,8 @@ class ModelResponse(ChatCompletion):
     Primary usage is adding to AIMessages for multi-turn conversations:
-        >>> response = await llm.generate(messages=messages)
-        >>> messages.add(response)  # Add assistant response to conversation
+        >>> response = await llm.generate("gpt-5", messages=messages)
+        >>> messages.append(response)  # Add assistant response to conversation
         >>> print(response.content)  # Access generated text
     The two main interactions with ModelResponse:
@@ -35,13 +35,13 @@ class ModelResponse(ChatCompletion):
     like token usage and cost tracking are available but rarely needed.
     Example:
-        >>> from ai_pipeline_core.llm import AIMessages, generate
+        >>> from ai_pipeline_core import llm, AIMessages
         >>>
-        >>> messages = AIMessages("Explain quantum computing")
-        >>> response = await generate(messages=messages)
+        >>> messages = AIMessages(["Explain quantum computing"])
+        >>> response = await llm.generate("gpt-5", messages=messages)
         >>>
         >>> # Primary usage: add to conversation
-        >>> messages.add(response)
+        >>> messages.append(response)
         >>>
         >>> # Access generated text
         >>> print(response.content)
@@ -96,17 +96,17 @@ class ModelResponse(ChatCompletion):
         @public
         Primary property for accessing the LLM's response text.
-        This covers 99% of use cases with ModelResponse.
+        This is the main property you'll use with ModelResponse.
         Returns:
             Generated text from the model, or empty string if none.
         Example:
-            >>> response = await generate(messages="Hello")
+            >>> response = await generate("gpt-5", messages="Hello")
             >>> text = response.content  # The generated response
             >>>
             >>> # Common pattern: add to messages then use content
-            >>> messages.add(response)
+            >>> messages.append(response)
             >>> if "error" in response.content.lower():
             ...     # Handle error case
         """
@@ -146,36 +146,82 @@ class ModelResponse(ChatCompletion):
         self.headers = copy.deepcopy(headers)
     def get_laminar_metadata(self) -> dict[str, str | int | float]:
-        """Extract metadata for LMNR (Laminar) observability.
+        """Extract metadata for LMNR (Laminar) observability including cost tracking.
-        Collects comprehensive metadata about the generation for
-        tracing and monitoring in the LMNR platform.
+        Collects comprehensive metadata about the generation for tracing,
+        monitoring, and cost analysis in the LMNR platform. This method
+        provides detailed insights into token usage, caching effectiveness,
+        and generation costs.
         Returns:
             Dictionary containing:
-            - LiteLLM headers (call ID, costs, etc.)
-            - Token usage statistics
-            - Model configuration
-            - Cost information
-            - Cached token counts
+            - LiteLLM headers (call ID, costs, model info, etc.)
+            - Token usage statistics (input, output, total, cached)
+            - Model configuration used for generation
+            - Cost information in multiple formats
+            - Cached token counts (when context caching enabled)
             - Reasoning token counts (for O1 models)
         Metadata structure:
             - litellm.*: All LiteLLM-specific headers
-            - gen_ai.usage.*: Token usage statistics
+            - gen_ai.usage.prompt_tokens: Input token count
+            - gen_ai.usage.completion_tokens: Output token count
+            - gen_ai.usage.total_tokens: Total tokens used
+            - gen_ai.usage.cached_tokens: Cached tokens (if applicable)
+            - gen_ai.usage.reasoning_tokens: Reasoning tokens (O1 models)
+            - gen_ai.usage.output_cost: Generation cost in dollars
+            - gen_ai.usage.cost: Alternative cost field (same value)
+            - gen_ai.cost: Simple cost field (same value)
             - gen_ai.response.*: Response identifiers
-            - gen_ai.cost: Cost information
             - model_options.*: Configuration used
+        Cost tracking:
+            Cost information is extracted from two sources:
+            1. x-litellm-response-cost header (primary)
+            2. usage.cost attribute (fallback)
+            Cost is stored in three fields for compatibility:
+            - gen_ai.usage.output_cost (standard)
+            - gen_ai.usage.cost (alternative)
+            - gen_ai.cost (simple)
         Example:
-            >>> response = await llm.generate(...)
+            >>> response = await llm.generate(
+            ...     "gpt-5",
+            ...     context=large_doc,
+            ...     messages="Summarize this"
+            ... )
+            >>>
+            >>> # Get comprehensive metadata
             >>> metadata = response.get_laminar_metadata()
-            >>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
-            >>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
+            >>>
+            >>> # Track generation cost
+            >>> cost = metadata.get('gen_ai.usage.output_cost', 0)
+            >>> if cost > 0:
+            ...     print(f"Generation cost: ${cost:.4f}")
+            >>>
+            >>> # Monitor token usage
+            >>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
+            >>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
+            >>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
+            >>>
+            >>> # Check cache effectiveness
+            >>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
+            >>> if cached > 0:
+            ...     total = metadata.get('gen_ai.usage.total_tokens', 1)
+            ...     savings = (cached / total) * 100
+            ...     print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
+            >>>
+            >>> # Calculate cost per token
+            >>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
+            ...     cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
+            ...     print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
         Note:
-            Used internally by the tracing system for observability.
-            Cost is extracted from headers or usage object.
+            - Cost availability depends on LiteLLM proxy configuration
+            - Not all providers return cost information
+            - Cached tokens reduce actual cost but may not be reflected
+            - Used internally by tracing but accessible for cost analysis
         """
         metadata: dict[str, str | int | float] = {}
@@ -245,6 +291,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
         ...     summary: str
         >>>
         >>> response = await generate_structured(
+        ...     "gpt-5",
         ...     response_format=Analysis,
         ...     messages="Analyze this text..."
         ... )
@@ -254,7 +301,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
         >>> print(f"Sentiment: {analysis.sentiment}")
         >>>
         >>> # Can add to messages for conversation
-        >>> messages.add(response)
+        >>> messages.append(response)
     The two main interactions:
     1. Accessing .parsed property for the structured data
@@ -330,6 +377,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
             ...     age: int
             >>>
             >>> response = await generate_structured(
+            ...     "gpt-5",
             ...     response_format=UserInfo,
             ...     messages="Extract user info..."
             ... )
@@ -339,11 +387,11 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
             >>> print(f"{user.name} is {user.age} years old")
             >>>
             >>> # Can also add to messages
-            >>> messages.add(response)
+            >>> messages.append(response)
         Note:
-            Type-safe with full IDE support. This property covers
-            99% of structured response use cases.
+            Type-safe with full IDE support. This is the main property
+            you'll use with structured responses.
         """
         if self._parsed_value is not None:
             return self._parsed_value

ai_pipeline_core/llm/model_types.py CHANGED Viewed

@@ -12,28 +12,32 @@ Model categories:
 from typing import Literal, TypeAlias
-ModelName: TypeAlias = Literal[
-    # Core models
-    "gemini-2.5-pro",
-    "gpt-5",
-    "grok-4",
-    # Small models
-    "gemini-2.5-flash",
-    "gpt-5-mini",
-    "grok-3-mini",
-    # Search models
-    "gemini-2.5-flash-search",
-    "sonar-pro-search",
-    "gpt-4o-search",
-    "grok-3-mini-search",
-]
-"""Type-safe model name identifiers.
+ModelName: TypeAlias = (
+    Literal[
+        # Core models
+        "gemini-2.5-pro",
+        "gpt-5",
+        "grok-4",
+        # Small models
+        "gemini-2.5-flash",
+        "gpt-5-mini",
+        "grok-3-mini",
+        # Search models
+        "gemini-2.5-flash-search",
+        "sonar-pro-search",
+        "gpt-4o-search",
+        "grok-3-mini-search",
+    ]
+    | str
+)
+"""Type-safe model name identifiers with support for custom models.
 @public
-Provides compile-time validation and IDE autocompletion for supported
-language model names. Used throughout the library to prevent typos
-and ensure only valid models are referenced.
+Provides IDE autocompletion for common model names while allowing any
+string for custom models. The type is a union of predefined literals
+and str, giving you the best of both worlds: suggestions for known
+models and flexibility for custom ones.
 Note: These are example common model names as of Q3 2025. Actual availability
 depends on your LiteLLM proxy configuration and provider access.
@@ -51,34 +55,28 @@ Model categories:
         Models with integrated web search capabilities for retrieving
         and synthesizing current information.
-Extending with custom models:
-    The generate functions accept any string, not just ModelName literals.
-    To add custom models for type safety:
-    1. Create a new type alias: CustomModel = Literal["my-model"]
-    2. Use Union: model: ModelName | CustomModel = "my-model"
-    3. Or simply use strings: model = "any-model-via-litellm"
+Using custom models:
+    ModelName now includes str, so you can use any model name directly:
+    - Predefined models get IDE autocomplete and validation
+    - Custom models work seamlessly as strings
+    - No need for Union types or additional type aliases
 Example:
     >>> from ai_pipeline_core import llm, ModelName
     >>>
-    >>> # Type-safe model selection
-    >>> model: ModelName = "gpt-5"  # IDE autocomplete works
+    >>> # Predefined model with IDE autocomplete
+    >>> model: ModelName = "gpt-5"  # IDE suggests common models
     >>> response = await llm.generate(model, messages="Hello")
     >>>
-    >>> # Also accepts string for custom models
-    >>> response = await llm.generate("custom-model-v2", messages="Hello")
+    >>> # Custom model works directly
+    >>> model: ModelName = "custom-model-v2"  # Any string is valid
+    >>> response = await llm.generate(model, messages="Hello")
     >>>
-    >>> # Custom type safety
-    >>> from typing import Literal
-    >>> MyModel = Literal["company-llm-v1"]
-    >>> model: ModelName | MyModel = "company-llm-v1"
+    >>> # Both types work seamlessly
+    >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
 Note:
-    While the type alias provides suggestions for common models,
-    the generate functions also accept string literals to support
-    custom or newer models accessed via LiteLLM proxy.
-See Also:
-    - llm.generate: Main generation function
-    - ModelOptions: Model configuration options
+    The ModelName type includes both predefined literals and str,
+    allowing full flexibility while maintaining IDE support for
+    common models.
 """

ai_pipeline_core/logging/__init__.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Logging infrastructure for AI Pipeline Core.
-@public
 Provides a Prefect-integrated logging facade for unified logging across pipelines.
 Prefer get_pipeline_logger instead of logging.getLogger to ensure proper integration.

ai_pipeline_core/logging/logging_config.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Centralized logging configuration for AI Pipeline Core.
-@public
 Provides logging configuration management that integrates with Prefect's logging system.
 """
@@ -26,8 +24,6 @@ DEFAULT_LOG_LEVELS = {
 class LoggingConfig:
     """Manages logging configuration for the pipeline.
-    @public
     Provides centralized logging configuration with Prefect integration.
     Configuration precedence:
@@ -144,8 +140,6 @@ _logging_config: Optional[LoggingConfig] = None
 def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
     """Setup logging for the AI Pipeline Core library.
-    @public
     Initializes logging configuration for the pipeline system.
     IMPORTANT: Call setup_logging exactly once in your application entry point

ai_pipeline_core/logging/logging_mixin.py CHANGED Viewed

@@ -1,7 +1,4 @@
-"""Logging mixin for consistent logging across components using Prefect logging.
-@public
-"""
+"""Logging mixin for consistent logging across components using Prefect logging."""
 import contextlib
 import time
@@ -17,8 +14,6 @@ from prefect.logging import get_logger
 class LoggerMixin:
     """Mixin class that provides consistent logging functionality using Prefect's logging system.
-    @public
     Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
     The mixin's internal behavior routes to the appropriate backend; you should not call
     logging.getLogger directly.
@@ -94,10 +89,7 @@ class LoggerMixin:
 class StructuredLoggerMixin(LoggerMixin):
-    """Extended mixin for structured logging with Prefect.
-    @public
-    """
+    """Extended mixin for structured logging with Prefect."""
     def log_event(self, event: str, **kwargs: Any) -> None:
         """Log a structured event.

ai-pipeline-core 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

ai-pipeline-core 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl