PyPI - ai-pipeline-core - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

ai-pipeline-core 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ai_pipeline_core/__init__.py +5 -2
ai_pipeline_core/documents/__init__.py +2 -1
ai_pipeline_core/documents/document.py +239 -14
ai_pipeline_core/documents/document_list.py +72 -16
ai_pipeline_core/documents/flow_document.py +6 -23
ai_pipeline_core/documents/task_document.py +6 -23
ai_pipeline_core/documents/temporary_document.py +5 -19
ai_pipeline_core/documents/utils.py +64 -1
ai_pipeline_core/flow/options.py +2 -2
ai_pipeline_core/llm/__init__.py +5 -0
ai_pipeline_core/llm/ai_messages.py +0 -3
ai_pipeline_core/llm/client.py +50 -19
ai_pipeline_core/llm/model_options.py +18 -0
ai_pipeline_core/llm/model_response.py +62 -15
ai_pipeline_core/llm/model_types.py +38 -36
ai_pipeline_core/pipeline.py +28 -2
ai_pipeline_core/settings.py +4 -0
ai_pipeline_core/simple_runner/simple_runner.py +18 -1
ai_pipeline_core/tracing.py +115 -7
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.1.14.dist-info}/METADATA +42 -7
ai_pipeline_core-0.1.14.dist-info/RECORD +36 -0
ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.1.14.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.1.14.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/documents/task_document.py CHANGED Viewed

@@ -29,24 +29,8 @@ class TaskDocument(Document):
     - Reduces persistent I/O for temporary data
     Creating TaskDocuments:
-        **Use the `create` classmethod** for most use cases. It handles automatic
-        conversion of various content types. Only use __init__ when you have bytes.
-        >>> from enum import StrEnum
-        >>>
-        >>> # Simple task document:
-        >>> class TempDoc(TaskDocument):
-        ...     pass
-        >>>
-        >>> # With restricted files:
-        >>> class CacheDoc(TaskDocument):
-        ...     class FILES(StrEnum):
-        ...         CACHE = "cache.json"
-        ...         INDEX = "index.dat"
-        >>>
-        >>> # RECOMMENDED - automatic conversion:
-        >>> doc = TempDoc.create(name="temp.json", content={"status": "processing"})
-        >>> doc = CacheDoc.create(name="cache.json", content={"data": [1, 2, 3]})
+        Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
+        See Document.create() for detailed usage examples.
     Use Cases:
         - Intermediate transformation results
@@ -71,13 +55,11 @@ class TaskDocument(Document):
         name: str,
         content: bytes,
         description: str | None = None,
+        sources: list[str] = [],
     ) -> None:
         """Initialize a TaskDocument with raw bytes content.
-        Important:
-            **Most users should use the `create` classmethod instead of __init__.**
-            The create method provides automatic content conversion for various types
-            (str, dict, list, Pydantic models) while __init__ only accepts bytes.
+        See Document.__init__() for parameter details and usage notes.
         Prevents direct instantiation of the abstract TaskDocument class.
         TaskDocument must be subclassed for specific temporary document types.
@@ -86,6 +68,7 @@ class TaskDocument(Document):
             name: Document filename (required, keyword-only)
             content: Document content as raw bytes (required, keyword-only)
             description: Optional human-readable description (keyword-only)
+            sources: Optional list of strings for provenance tracking
         Raises:
             TypeError: If attempting to instantiate TaskDocument directly
@@ -114,7 +97,7 @@ class TaskDocument(Document):
         """
         if type(self) is TaskDocument:
             raise TypeError("Cannot instantiate abstract TaskDocument class directly")
-        super().__init__(name=name, content=content, description=description)
+        super().__init__(name=name, content=content, description=description, sources=sources)
     @final
     def get_base_type(self) -> Literal["task"]:

ai_pipeline_core/documents/temporary_document.py CHANGED Viewed

@@ -30,25 +30,11 @@ class TemporaryDocument(Document):
     - Ignored by simple_runner save operations
     Creating TemporaryDocuments:
-        **Use the `create` classmethod** for most use cases. It handles automatic
-        conversion of various content types. Only use __init__ when you have bytes.
-        >>> # RECOMMENDED - automatic conversion:
-        >>> doc = TemporaryDocument.create(
-        ...     name="api_response.json",
-        ...     content={"status": "ok", "data": [1, 2, 3]}
-        ... )
-        >>> doc = TemporaryDocument.create(
-        ...     name="credentials.txt",
-        ...     content="secret_token_xyz"
-        ... )
-        >>>
-        >>> # Direct constructor - only for bytes:
-        >>> doc = TemporaryDocument(
-        ...     name="binary.dat",
-        ...     content=b"\x00\x01\x02"
-        ... )
-        >>>
+        Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
+        Unlike abstract document types, TemporaryDocument can be instantiated directly.
+        See Document.create() for detailed usage examples.
+        >>> doc = TemporaryDocument.create(name="api.json", content={"status": "ok"})
         >>> doc.is_temporary  # Always True
     Use Cases:

ai_pipeline_core/documents/utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Utility functions for document handling.
 Provides helper functions for URL sanitization, naming conventions,
-and canonical key generation used throughout the document system.
+canonical key generation, and hash validation used throughout the document system.
 """
 import re
@@ -115,3 +115,66 @@ def canonical_name_key(
                 break
     return camel_to_snake(name)
+def is_document_sha256(value: str) -> bool:
+    """Check if a string is a valid base32-encoded SHA256 hash with proper entropy.
+    @public
+    This function validates that a string is not just formatted like a SHA256 hash,
+    but actually has the entropy characteristics of a real hash. It checks:
+    1. Correct length (52 characters without padding)
+    2. Valid base32 characters (A-Z, 2-7)
+    3. Sufficient entropy (at least 8 unique characters)
+    The entropy check prevents false positives like 'AAAAAAA...AAA' from being
+    identified as valid document hashes.
+    Args:
+        value: String to check if it's a document SHA256 hash.
+    Returns:
+        True if the string appears to be a real base32-encoded SHA256 hash,
+        False otherwise.
+    Examples:
+        >>> # Real SHA256 hash
+        >>> is_document_sha256("P3AEMA2PSYILKFYVBUALJLMIYWVZIS2QDI3S5VTMD2X7SOODF2YQ")
+        True
+        >>> # Too uniform - lacks entropy
+        >>> is_document_sha256("A" * 52)
+        False
+        >>> # Wrong length
+        >>> is_document_sha256("ABC123")
+        False
+        >>> # Invalid characters
+        >>> is_document_sha256("a" * 52)  # lowercase
+        False
+    """
+    # Check basic format: exactly 52 uppercase base32 characters
+    try:
+        if not value or len(value) != 52:
+            return False
+    except (TypeError, AttributeError):
+        return False
+    # Check if all characters are valid base32 (A-Z, 2-7)
+    try:
+        if not re.match(r"^[A-Z2-7]{52}$", value):
+            return False
+    except TypeError:
+        # re.match raises TypeError for non-string types like bytes
+        return False
+    # Check entropy: real SHA256 hashes have high entropy
+    # Require at least 8 unique characters (out of 32 possible in base32)
+    # This prevents patterns like "AAAAAAA..." from being identified as real hashes
+    unique_chars = len(set(value))
+    if unique_chars < 8:
+        return False
+    return True

ai_pipeline_core/flow/options.py CHANGED Viewed

@@ -60,11 +60,11 @@ class FlowOptions(BaseSettings):
         add flow-specific parameters with appropriate validation.
     """
-    core_model: ModelName | str = Field(
+    core_model: ModelName = Field(
         default="gpt-5",
         description="Primary model for complex analysis and generation tasks.",
     )
-    small_model: ModelName | str = Field(
+    small_model: ModelName = Field(
         default="gpt-5-mini",
         description="Fast, cost-effective model for simple tasks and orchestration.",
     )

ai_pipeline_core/llm/__init__.py CHANGED Viewed

@@ -8,6 +8,8 @@ from .ai_messages import AIMessages, AIMessageType
 from .client import (
     generate,
     generate_structured,
+    generate_with_retry_for_testing,
+    process_messages_for_testing,
 )
 from .model_options import ModelOptions
 from .model_response import ModelResponse, StructuredModelResponse
@@ -22,4 +24,7 @@ __all__ = [
     "StructuredModelResponse",
     "generate",
     "generate_structured",
+    # Internal functions exposed for testing only
+    "process_messages_for_testing",
+    "generate_with_retry_for_testing",
 ]

ai_pipeline_core/llm/ai_messages.py CHANGED Viewed

@@ -63,7 +63,6 @@ class AIMessages(list[AIMessageType]):
         >>> messages.append("What is the capital of France?")
         >>> response = await llm.generate("gpt-5", messages=messages)
         >>> messages.append(response)  # Add the actual response
-        >>> prompt = messages.get_last_message_as_str()  # Get the last message as a string
     """
     def get_last_message(self) -> AIMessageType:
@@ -78,8 +77,6 @@ class AIMessages(list[AIMessageType]):
     def get_last_message_as_str(self) -> str:
         """Get the last message as a string, raising if not a string.
-        @public
         Returns:
             The last message as a string.

ai_pipeline_core/llm/client.py CHANGED Viewed

@@ -38,6 +38,7 @@ def _process_messages(
     context: AIMessages,
     messages: AIMessages,
     system_prompt: str | None = None,
+    cache_ttl: str | None = "120s",
 ) -> list[ChatCompletionMessageParam]:
     """Process and format messages for LLM API consumption.
@@ -49,11 +50,13 @@ def _process_messages(
         context: Messages to be cached (typically expensive/static content).
         messages: Regular messages without caching (dynamic queries).
         system_prompt: Optional system instructions for the model.
+        cache_ttl: Cache TTL for context messages (e.g. "120s", "5m", "1h").
+                   Set to None or empty string to disable caching.
     Returns:
         List of formatted messages ready for API calls, with:
         - System prompt at the beginning (if provided)
-        - Context messages with cache_control on the last one
+        - Context messages with cache_control on the last one (if cache_ttl)
         - Regular messages without caching
     System Prompt Location:
@@ -62,8 +65,10 @@ def _process_messages(
         allowing dynamic system prompts without breaking cache efficiency.
     Cache behavior:
-        The last context message gets ephemeral caching (120s TTL)
+        The last context message gets ephemeral caching with specified TTL
         to reduce token usage on repeated calls with same context.
+        If cache_ttl is None or empty string (falsy), no caching is applied.
+        Only the last context message receives cache_control to maximize efficiency.
     Note:
         This is an internal function used by _generate_with_retry().
@@ -80,11 +85,12 @@ def _process_messages(
         # Use AIMessages.to_prompt() for context
         context_messages = context.to_prompt()
-        # Apply caching to last context message
-        context_messages[-1]["cache_control"] = {  # type: ignore
-            "type": "ephemeral",
-            "ttl": "120s",  # Cache for 2m
-        }
+        # Apply caching to last context message if cache_ttl is set
+        if cache_ttl:
+            context_messages[-1]["cache_control"] = {  # type: ignore
+                "type": "ephemeral",
+                "ttl": cache_ttl,
+            }
         processed_messages.extend(context_messages)
@@ -173,7 +179,9 @@ async def _generate_with_retry(
     if not context and not messages:
         raise ValueError("Either context or messages must be provided")
-    processed_messages = _process_messages(context, messages, options.system_prompt)
+    processed_messages = _process_messages(
+        context, messages, options.system_prompt, options.cache_ttl
+    )
     completion_kwargs: dict[str, Any] = {
         "model": model,
         "messages": processed_messages,
@@ -215,7 +223,7 @@ async def _generate_with_retry(
 @trace(ignore_inputs=["context"])
 async def generate(
-    model: ModelName | str,
+    model: ModelName,
     *,
     context: AIMessages | None = None,
     messages: AIMessages | str,
@@ -236,7 +244,7 @@ async def generate(
     Args:
         model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
-               Can be ModelName literal or any string for custom models.
+               Accepts predefined models or any string for custom models.
         context: Static context to cache (documents, examples, instructions).
                 Defaults to None (empty context). Cached for 120 seconds.
         messages: Dynamic messages/queries. AIMessages or str ONLY.
@@ -292,6 +300,22 @@ async def generate(
         >>> # Second call: reuses cache, saves tokens!
         >>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
+        >>> # Custom cache TTL for longer-lived contexts
+        >>> response = await llm.generate(
+        ...     "gpt-5",
+        ...     context=static_doc,
+        ...     messages="Analyze this",
+        ...     options=ModelOptions(cache_ttl="300s")  # Cache for 5 minutes
+        ... )
+        >>> # Disable caching when context changes frequently
+        >>> response = await llm.generate(
+        ...     "gpt-5",
+        ...     context=dynamic_doc,
+        ...     messages="Process this",
+        ...     options=ModelOptions(cache_ttl=None)  # No caching
+        ... )
         >>> # AVOID unnecessary options (defaults are optimal)
         >>> response = await llm.generate(
         ...     "gpt-5",
@@ -310,14 +334,17 @@ async def generate(
     Performance:
         - Context caching saves ~50-90% tokens on repeated calls
         - First call: full token cost
-        - Subsequent calls (within 120s): only messages tokens
+        - Subsequent calls (within cache TTL): only messages tokens
+        - Default cache TTL is 120s (configurable via ModelOptions.cache_ttl)
         - Default retry delay is 10s (configurable via ModelOptions.retry_delay_seconds)
     Caching:
         When enabled in your LiteLLM proxy and supported by the upstream provider,
-        context messages may be cached (typical TTL ~120s) to reduce token usage on
-        repeated calls. Savings depend on provider and payload; treat this as an
-        optimization, not a guarantee. Cache behavior varies by proxy configuration.
+        context messages may be cached to reduce token usage on repeated calls.
+        Default TTL is 120s, configurable via ModelOptions.cache_ttl (e.g. "300s", "5m").
+        Set cache_ttl=None to disable caching. Savings depend on provider and payload;
+        treat this as an optimization, not a guarantee. Cache behavior varies by proxy
+        configuration.
     Note:
         - Context argument is ignored by the tracer to avoid recording large data
@@ -350,7 +377,7 @@ T = TypeVar("T", bound=BaseModel)
 @trace(ignore_inputs=["context"])
 async def generate_structured(
-    model: ModelName | str,
+    model: ModelName,
     response_format: type[T],
     *,
     context: AIMessages | None = None,
@@ -364,10 +391,8 @@ async def generate_structured(
     Type-safe generation that returns validated Pydantic model instances.
     Uses OpenAI's structured output feature for guaranteed schema compliance.
-    Best Practices (same as generate):
-        1. OPTIONS: Omit in 90% of cases - defaults are optimized
-        2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
-        3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
+    Best Practices:
+        Same as generate() - see generate() documentation for details.
     Args:
         model: Model to use (must support structured output).
@@ -473,3 +498,9 @@ async def generate_structured(
     # Create a StructuredModelResponse with the parsed value
     return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
+# Public aliases for testing internal functions
+# These are exported to allow testing of implementation details
+process_messages_for_testing = _process_messages
+generate_with_retry_for_testing = _generate_with_retry

ai_pipeline_core/llm/model_options.py CHANGED Viewed

@@ -49,6 +49,10 @@ class ModelOptions(BaseModel):
         timeout: Maximum seconds to wait for response (default: 300).
+        cache_ttl: Cache TTL for context messages (default: "120s").
+                   String format like "60s", "5m", or None to disable caching.
+                   Applied to the last context message for efficient token reuse.
         service_tier: API tier selection for performance/cost trade-offs.
                      "auto": Let API choose
                      "default": Standard tier
@@ -79,6 +83,18 @@ class ModelOptions(BaseModel):
         ...     temperature=0.3  # Lower for code generation
         ... )
         >>>
+        >>> # With custom cache TTL
+        >>> options = ModelOptions(
+        ...     cache_ttl="300s",  # Cache context for 5 minutes
+        ...     max_completion_tokens=1000
+        ... )
+        >>>
+        >>> # Disable caching
+        >>> options = ModelOptions(
+        ...     cache_ttl=None,  # No context caching
+        ...     temperature=0.5
+        ... )
+        >>>
         >>> # For search-enabled models
         >>> options = ModelOptions(
         ...     search_context_size="high",  # Get more search results
@@ -96,6 +112,7 @@ class ModelOptions(BaseModel):
         - search_context_size only works with search models
         - reasoning_effort only works with models that support explicit reasoning
         - response_format is set internally by generate_structured()
+        - cache_ttl accepts formats like "120s", "5m", "1h" or None to disable caching
     """
     temperature: float | None = None
@@ -105,6 +122,7 @@ class ModelOptions(BaseModel):
     retries: int = 3
     retry_delay_seconds: int = 10
     timeout: int = 300
+    cache_ttl: str | None = "120s"
     service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
     max_completion_tokens: int | None = None
     response_format: type[BaseModel] | None = None

ai_pipeline_core/llm/model_response.py CHANGED Viewed

@@ -146,36 +146,83 @@ class ModelResponse(ChatCompletion):
         self.headers = copy.deepcopy(headers)
     def get_laminar_metadata(self) -> dict[str, str | int | float]:
-        """Extract metadata for LMNR (Laminar) observability.
+        """Extract metadata for LMNR (Laminar) observability including cost tracking.
-        Collects comprehensive metadata about the generation for
-        tracing and monitoring in the LMNR platform.
+        Collects comprehensive metadata about the generation for tracing,
+        monitoring, and cost analysis in the LMNR platform. This method
+        provides detailed insights into token usage, caching effectiveness,
+        and generation costs.
         Returns:
             Dictionary containing:
-            - LiteLLM headers (call ID, costs, etc.)
-            - Token usage statistics
-            - Model configuration
-            - Cost information
-            - Cached token counts
+            - LiteLLM headers (call ID, costs, model info, etc.)
+            - Token usage statistics (input, output, total, cached)
+            - Model configuration used for generation
+            - Cost information in multiple formats
+            - Cached token counts (when context caching enabled)
             - Reasoning token counts (for O1 models)
         Metadata structure:
             - litellm.*: All LiteLLM-specific headers
-            - gen_ai.usage.*: Token usage statistics
+            - gen_ai.usage.prompt_tokens: Input token count
+            - gen_ai.usage.completion_tokens: Output token count
+            - gen_ai.usage.total_tokens: Total tokens used
+            - gen_ai.usage.cached_tokens: Cached tokens (if applicable)
+            - gen_ai.usage.reasoning_tokens: Reasoning tokens (O1 models)
+            - gen_ai.usage.output_cost: Generation cost in dollars
+            - gen_ai.usage.cost: Alternative cost field (same value)
+            - gen_ai.cost: Simple cost field (same value)
             - gen_ai.response.*: Response identifiers
-            - gen_ai.cost: Cost information
             - model_options.*: Configuration used
+        Cost tracking:
+            Cost information is extracted from two sources:
+            1. x-litellm-response-cost header (primary)
+            2. usage.cost attribute (fallback)
+            Cost is stored in three fields for compatibility:
+            - gen_ai.usage.output_cost (standard)
+            - gen_ai.usage.cost (alternative)
+            - gen_ai.cost (simple)
         Example:
-            >>> response = await llm.generate(...)
+            >>> response = await llm.generate(
+            ...     "gpt-5",
+            ...     context=large_doc,
+            ...     messages="Summarize this",
+            ...     options=ModelOptions(cache_ttl="300s")
+            ... )
+            >>>
+            >>> # Get comprehensive metadata
             >>> metadata = response.get_laminar_metadata()
-            >>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
-            >>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
+            >>>
+            >>> # Track generation cost
+            >>> cost = metadata.get('gen_ai.usage.output_cost', 0)
+            >>> if cost > 0:
+            ...     print(f"Generation cost: ${cost:.4f}")
+            >>>
+            >>> # Monitor token usage
+            >>> print(f"Input: {metadata.get('gen_ai.usage.prompt_tokens', 0)} tokens")
+            >>> print(f"Output: {metadata.get('gen_ai.usage.completion_tokens', 0)} tokens")
+            >>> print(f"Total: {metadata.get('gen_ai.usage.total_tokens', 0)} tokens")
+            >>>
+            >>> # Check cache effectiveness
+            >>> cached = metadata.get('gen_ai.usage.cached_tokens', 0)
+            >>> if cached > 0:
+            ...     total = metadata.get('gen_ai.usage.total_tokens', 1)
+            ...     savings = (cached / total) * 100
+            ...     print(f"Cache hit: {cached} tokens ({savings:.1f}% savings)")
+            >>>
+            >>> # Calculate cost per token
+            >>> if cost > 0 and metadata.get('gen_ai.usage.total_tokens'):
+            ...     cost_per_1k = (cost / metadata['gen_ai.usage.total_tokens']) * 1000
+            ...     print(f"Cost per 1K tokens: ${cost_per_1k:.4f}")
         Note:
-            Used internally by the tracing system for observability.
-            Cost is extracted from headers or usage object.
+            - Cost availability depends on LiteLLM proxy configuration
+            - Not all providers return cost information
+            - Cached tokens reduce actual cost but may not be reflected
+            - Used internally by tracing but accessible for cost analysis
         """
         metadata: dict[str, str | int | float] = {}

ai_pipeline_core/llm/model_types.py CHANGED Viewed

@@ -12,28 +12,32 @@ Model categories:
 from typing import Literal, TypeAlias
-ModelName: TypeAlias = Literal[
-    # Core models
-    "gemini-2.5-pro",
-    "gpt-5",
-    "grok-4",
-    # Small models
-    "gemini-2.5-flash",
-    "gpt-5-mini",
-    "grok-3-mini",
-    # Search models
-    "gemini-2.5-flash-search",
-    "sonar-pro-search",
-    "gpt-4o-search",
-    "grok-3-mini-search",
-]
-"""Type-safe model name identifiers.
+ModelName: TypeAlias = (
+    Literal[
+        # Core models
+        "gemini-2.5-pro",
+        "gpt-5",
+        "grok-4",
+        # Small models
+        "gemini-2.5-flash",
+        "gpt-5-mini",
+        "grok-3-mini",
+        # Search models
+        "gemini-2.5-flash-search",
+        "sonar-pro-search",
+        "gpt-4o-search",
+        "grok-3-mini-search",
+    ]
+    | str
+)
+"""Type-safe model name identifiers with support for custom models.
 @public
-Provides compile-time validation and IDE autocompletion for supported
-language model names. Used throughout the library to prevent typos
-and ensure only valid models are referenced.
+Provides IDE autocompletion for common model names while allowing any
+string for custom models. The type is a union of predefined literals
+and str, giving you the best of both worlds: suggestions for known
+models and flexibility for custom ones.
 Note: These are example common model names as of Q3 2025. Actual availability
 depends on your LiteLLM proxy configuration and provider access.
@@ -51,32 +55,30 @@ Model categories:
         Models with integrated web search capabilities for retrieving
         and synthesizing current information.
-Extending with custom models:
-    The generate functions accept any string, not just ModelName literals.
-    To add custom models for type safety:
-    1. Create a new type alias: CustomModel = Literal["my-model"]
-    2. Use Union: model: ModelName | CustomModel = "my-model"
-    3. Or simply use strings: model = "any-model-via-litellm"
+Using custom models:
+    ModelName now includes str, so you can use any model name directly:
+    - Predefined models get IDE autocomplete and validation
+    - Custom models work seamlessly as strings
+    - No need for Union types or additional type aliases
 Example:
     >>> from ai_pipeline_core import llm, ModelName
     >>>
-    >>> # Type-safe model selection
-    >>> model: ModelName = "gpt-5"  # IDE autocomplete works
+    >>> # Predefined model with IDE autocomplete
+    >>> model: ModelName = "gpt-5"  # IDE suggests common models
     >>> response = await llm.generate(model, messages="Hello")
     >>>
-    >>> # Also accepts string for custom models
-    >>> response = await llm.generate("custom-model-v2", messages="Hello")
+    >>> # Custom model works directly
+    >>> model: ModelName = "custom-model-v2"  # Any string is valid
+    >>> response = await llm.generate(model, messages="Hello")
     >>>
-    >>> # Custom type safety
-    >>> from typing import Literal
-    >>> MyModel = Literal["company-llm-v1"]
-    >>> model: ModelName | MyModel = "company-llm-v1"
+    >>> # Both types work seamlessly
+    >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
 Note:
-    While the type alias provides suggestions for common models,
-    the generate functions also accept string literals to support
-    custom or newer models accessed via LiteLLM proxy.
+    The ModelName type includes both predefined literals and str,
+    allowing full flexibility while maintaining IDE support for
+    common models.
 See Also:
     - llm.generate: Main generation function

ai-pipeline-core 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

ai-pipeline-core 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl