PyPI - ai-pipeline-core - Versions diffs - 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

ai_pipeline_core/__init__.py +21 -13
ai_pipeline_core/documents/document.py +202 -51
ai_pipeline_core/documents/document_list.py +148 -24
ai_pipeline_core/documents/flow_document.py +2 -6
ai_pipeline_core/documents/task_document.py +0 -4
ai_pipeline_core/documents/temporary_document.py +1 -8
ai_pipeline_core/flow/config.py +174 -5
ai_pipeline_core/llm/__init__.py +1 -6
ai_pipeline_core/llm/ai_messages.py +137 -4
ai_pipeline_core/llm/client.py +118 -65
ai_pipeline_core/llm/model_options.py +6 -7
ai_pipeline_core/llm/model_response.py +17 -16
ai_pipeline_core/llm/model_types.py +3 -7
ai_pipeline_core/logging/__init__.py +0 -2
ai_pipeline_core/logging/logging_config.py +0 -6
ai_pipeline_core/logging/logging_mixin.py +2 -10
ai_pipeline_core/pipeline.py +54 -68
ai_pipeline_core/prefect.py +12 -3
ai_pipeline_core/prompt_manager.py +14 -7
ai_pipeline_core/settings.py +13 -5
ai_pipeline_core/simple_runner/__init__.py +1 -11
ai_pipeline_core/simple_runner/cli.py +13 -12
ai_pipeline_core/simple_runner/simple_runner.py +34 -189
ai_pipeline_core/storage/__init__.py +8 -0
ai_pipeline_core/storage/storage.py +628 -0
ai_pipeline_core/tracing.py +234 -30
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/METADATA +35 -20
ai_pipeline_core-0.2.1.dist-info/RECORD +38 -0
ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/ai_messages.py CHANGED Viewed

@@ -9,6 +9,8 @@ including text, documents, and model responses.
 import base64
 import hashlib
 import json
+from copy import deepcopy
+from typing import Any, Callable, Iterable, SupportsIndex, Union
 from openai.types.chat import (
     ChatCompletionContentPartParam,
@@ -48,15 +50,25 @@ class AIMessages(list[AIMessageType]):
         - ModelResponse: Becomes {"role": "assistant", "content": response.content}
     Note: Document conversion is automatic. Text content becomes user text messages.
-    Images are sent to vision-capable models (non-vision models will raise ValueError).
-    PDFs are attached when supported by the model, otherwise a text extraction
-    fallback is used. LiteLLM proxy handles the specific encoding requirements
-    for each provider.
+    VISION/PDF MODEL COMPATIBILITY WARNING:
+    Images require vision-capable models (e.g., gpt-4o, gemini-pro-vision, claude-3-haiku).
+    Non-vision models will raise ValueError when encountering image documents.
+    PDFs require models with document processing support - check your model's capabilities
+    before including PDF documents in messages. Unsupported models may fall back to
+    text extraction or raise errors depending on provider configuration.
+    LiteLLM proxy handles the specific encoding requirements for each provider.
     IMPORTANT: Although AIMessages can contain Document entries, the LLM client functions
     expect `messages` to be `AIMessages` or `str`. If you start from a Document or a list
     of Documents, build AIMessages first (e.g., `AIMessages([doc])` or `AIMessages(docs)`).
+    CAUTION: AIMessages is a list subclass. Always use list construction (e.g.,
+    `AIMessages(["text"])`) or empty constructor with append (e.g.,
+    `AIMessages(); messages.append("text")`). Never pass raw strings directly to the
+    constructor (`AIMessages("text")`) as this will raise a TypeError to prevent
+    accidental character iteration.
     Example:
         >>> from ai_pipeline_core import llm
         >>> messages = AIMessages()
@@ -65,6 +77,127 @@ class AIMessages(list[AIMessageType]):
         >>> messages.append(response)  # Add the actual response
     """
+    def __init__(self, iterable: Iterable[AIMessageType] | None = None, *, frozen: bool = False):
+        """Initialize AIMessages with optional iterable.
+        Args:
+            iterable: Optional iterable of messages (list, tuple, etc.).
+                     Must not be a string.
+            frozen: If True, list is immutable from creation.
+        Raises:
+            TypeError: If a string is passed directly to the constructor.
+        """
+        if isinstance(iterable, str):
+            raise TypeError(
+                "AIMessages cannot be constructed from a string directly. "
+                "Use AIMessages(['text']) for a single message or "
+                "AIMessages() and then append('text')."
+            )
+        self._frozen = False  # Initialize as unfrozen to allow initial population
+        if iterable is None:
+            super().__init__()
+        else:
+            super().__init__(iterable)
+        self._frozen = frozen  # Set frozen state after initial population
+    def freeze(self) -> None:
+        """Permanently freeze the list, preventing modifications.
+        Once frozen, the list cannot be unfrozen.
+        """
+        self._frozen = True
+    def copy(self) -> "AIMessages":
+        """Create an unfrozen deep copy of the list.
+        Returns:
+            New unfrozen AIMessages with deep-copied messages.
+        """
+        copied_messages = deepcopy(list(self))
+        return AIMessages(copied_messages, frozen=False)
+    def _check_frozen(self) -> None:
+        """Check if list is frozen and raise if it is.
+        Raises:
+            RuntimeError: If the list is frozen.
+        """
+        if self._frozen:
+            raise RuntimeError("Cannot modify frozen AIMessages")
+    def append(self, message: AIMessageType) -> None:
+        """Add a message to the end of the list."""
+        self._check_frozen()
+        super().append(message)
+    def extend(self, messages: Iterable[AIMessageType]) -> None:
+        """Add multiple messages to the list."""
+        self._check_frozen()
+        super().extend(messages)
+    def insert(self, index: SupportsIndex, message: AIMessageType) -> None:
+        """Insert a message at the specified position."""
+        self._check_frozen()
+        super().insert(index, message)
+    def __setitem__(
+        self,
+        index: Union[SupportsIndex, slice],
+        value: Union[AIMessageType, Iterable[AIMessageType]],
+    ) -> None:
+        """Set item or slice."""
+        self._check_frozen()
+        super().__setitem__(index, value)  # type: ignore[arg-type]
+    def __iadd__(self, other: Iterable[AIMessageType]) -> "AIMessages":
+        """In-place addition (+=).
+        Returns:
+            This AIMessages instance after modification.
+        """
+        self._check_frozen()
+        return super().__iadd__(other)
+    def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
+        """Delete item or slice from list."""
+        self._check_frozen()
+        super().__delitem__(index)
+    def pop(self, index: SupportsIndex = -1) -> AIMessageType:
+        """Remove and return item at index.
+        Returns:
+            AIMessageType removed from the list.
+        """
+        self._check_frozen()
+        return super().pop(index)
+    def remove(self, message: AIMessageType) -> None:
+        """Remove first occurrence of message."""
+        self._check_frozen()
+        super().remove(message)
+    def clear(self) -> None:
+        """Remove all items from list."""
+        self._check_frozen()
+        super().clear()
+    def reverse(self) -> None:
+        """Reverse list in place."""
+        self._check_frozen()
+        super().reverse()
+    def sort(
+        self, *, key: Callable[[AIMessageType], Any] | None = None, reverse: bool = False
+    ) -> None:
+        """Sort list in place."""
+        self._check_frozen()
+        if key is None:
+            super().sort(reverse=reverse)  # type: ignore[call-arg]
+        else:
+            super().sort(key=key, reverse=reverse)
     def get_last_message(self) -> AIMessageType:
         """Get the last message in the conversation.

ai_pipeline_core/llm/client.py CHANGED Viewed

@@ -24,7 +24,6 @@ from pydantic import BaseModel
 from ai_pipeline_core.exceptions import LLMError
 from ai_pipeline_core.settings import settings
-from ai_pipeline_core.tracing import trace
 from .ai_messages import AIMessages
 from .model_options import ModelOptions
@@ -60,9 +59,9 @@ def _process_messages(
         - Regular messages without caching
     System Prompt Location:
-        The system prompt from ModelOptions.system_prompt is always injected
-        as the FIRST message with role="system". It is NOT cached with context,
-        allowing dynamic system prompts without breaking cache efficiency.
+        The system prompt parameter is always injected as the FIRST message
+        with role="system". It is NOT cached with context, allowing dynamic
+        system prompts without breaking cache efficiency.
     Cache behavior:
         The last context message gets ephemeral caching with specified TTL
@@ -221,7 +220,6 @@ async def _generate_with_retry(
     raise LLMError("Unknown error occurred during LLM generation.")
-@trace(ignore_inputs=["context"])
 async def generate(
     model: ModelName,
     *,
@@ -238,9 +236,10 @@ async def generate(
     expensive static content separately from dynamic queries.
     Best Practices:
-        1. OPTIONS: Omit in 90% of cases - defaults are optimized
+        1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
         2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
         3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
+        4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
     Args:
         model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
@@ -250,8 +249,11 @@ async def generate(
         messages: Dynamic messages/queries. AIMessages or str ONLY.
                  Do not pass Document or DocumentList directly.
                  If string, converted to AIMessages internally.
-        options: Model configuration (temperature, retries, timeout, etc.).
-                Defaults to None (uses ModelOptions() with standard settings).
+        options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
+                Framework defaults are production-optimized (3 retries, 10s delay, 300s timeout).
+                Configure model behavior centrally via LiteLLM proxy settings or environment
+                variables, not per API call. Provider-specific settings should be configured
+                at the proxy level.
     Returns:
         ModelResponse containing:
@@ -276,17 +278,26 @@ async def generate(
         # WRONG - don't convert to string yourself
         response = await llm.generate("gpt-5", messages=my_document.text)  # NO!
+    VISION/PDF MODEL COMPATIBILITY:
+        When using Documents containing images or PDFs, ensure your model supports these formats:
+        - Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
+        - PDFs require document processing support (varies by provider)
+        - Non-compatible models will raise ValueError or fall back to text extraction
+        - Check model capabilities before including visual/PDF content
     Context vs Messages Strategy:
-        context: Static, reusable content (cached 120 seconds)
+        context: Static, reusable content for caching efficiency
             - Large documents, instructions, examples
-            - Same across multiple calls
+            - Remains constant across multiple calls
+            - Cached when supported by provider/proxy configuration
-        messages: Dynamic, query-specific content
+        messages: Dynamic, per-call specific content
             - User questions, current conversation turn
-            - Changes every call
+            - Changes with each API call
+            - Never cached, always processed fresh
     Example:
-        >>> # Simple case - no options needed (90% of cases)
+        >>> # CORRECT - No options parameter (this is the recommended pattern)
         >>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
         >>> print(response.content)  # In production, use get_pipeline_logger instead of print
@@ -300,29 +311,6 @@ async def generate(
         >>> # Second call: reuses cache, saves tokens!
         >>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
-        >>> # Custom cache TTL for longer-lived contexts
-        >>> response = await llm.generate(
-        ...     "gpt-5",
-        ...     context=static_doc,
-        ...     messages="Analyze this",
-        ...     options=ModelOptions(cache_ttl="300s")  # Cache for 5 minutes
-        ... )
-        >>> # Disable caching when context changes frequently
-        >>> response = await llm.generate(
-        ...     "gpt-5",
-        ...     context=dynamic_doc,
-        ...     messages="Process this",
-        ...     options=ModelOptions(cache_ttl=None)  # No caching
-        ... )
-        >>> # AVOID unnecessary options (defaults are optimal)
-        >>> response = await llm.generate(
-        ...     "gpt-5",
-        ...     messages="Hello",
-        ...     options=ModelOptions(temperature=0.7)  # Default is probably fine!
-        ... )
         >>> # Multi-turn conversation
         >>> messages = AIMessages([
         ...     "What is Python?",
@@ -331,31 +319,48 @@ async def generate(
         ... ])
         >>> response = await llm.generate("gpt-5", messages=messages)
+        Configuration via LiteLLM Proxy:
+        >>> # Configure temperature in litellm_config.yaml:
+        >>> # model_list:
+        >>> #   - model_name: gpt-5
+        >>> #     litellm_params:
+        >>> #       model: openai/gpt-4o
+        >>> #       temperature: 0.3
+        >>> #       max_tokens: 1000
+        >>>
+        >>> # Configure retry logic in proxy:
+        >>> # general_settings:
+        >>> #   master_key: sk-1234
+        >>> #   max_retries: 5
+        >>> #   retry_delay: 15
     Performance:
         - Context caching saves ~50-90% tokens on repeated calls
         - First call: full token cost
         - Subsequent calls (within cache TTL): only messages tokens
-        - Default cache TTL is 120s (configurable via ModelOptions.cache_ttl)
-        - Default retry delay is 10s (configurable via ModelOptions.retry_delay_seconds)
+        - Default cache TTL is 120s (production-optimized)
+        - Default retry logic: 3 attempts with 10s delay (production-optimized)
     Caching:
         When enabled in your LiteLLM proxy and supported by the upstream provider,
         context messages may be cached to reduce token usage on repeated calls.
-        Default TTL is 120s, configurable via ModelOptions.cache_ttl (e.g. "300s", "5m").
-        Set cache_ttl=None to disable caching. Savings depend on provider and payload;
-        treat this as an optimization, not a guarantee. Cache behavior varies by proxy
-        configuration.
+        Default TTL is 120s (optimized for production workloads). Configure caching
+        behavior centrally via your LiteLLM proxy settings, not per API call.
+        Savings depend on provider and payload; treat this as an optimization, not a guarantee.
+    Configuration:
+        All model behavior should be configured at the LiteLLM proxy level:
+        - Temperature, max_tokens: Set in litellm_config.yaml model_list
+        - Retry logic: Configure in proxy general_settings
+        - Timeouts: Set via proxy configuration
+        - Caching: Enable/configure in proxy cache settings
+        This centralizes configuration and ensures consistency across all API calls.
     Note:
-        - Context argument is ignored by the tracer to avoid recording large data
         - All models are accessed via LiteLLM proxy
         - Automatic retry with configurable delay between attempts
         - Cost tracking via response headers
-    See Also:
-        - generate_structured: For typed/structured output
-        - AIMessages: Message container with document support
-        - ModelOptions: Configuration options
     """
     if isinstance(messages, str):
         messages = AIMessages([messages])
@@ -375,7 +380,6 @@ T = TypeVar("T", bound=BaseModel)
 """Type variable for Pydantic model types in structured generation."""
-@trace(ignore_inputs=["context"])
 async def generate_structured(
     model: ModelName,
     response_format: type[T],
@@ -391,18 +395,71 @@ async def generate_structured(
     Type-safe generation that returns validated Pydantic model instances.
     Uses OpenAI's structured output feature for guaranteed schema compliance.
+    IMPORTANT: Search models (models with '-search' suffix) do not support
+    structured output. Use generate() instead for search models.
     Best Practices:
-        Same as generate() - see generate() documentation for details.
+        1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
+        2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
+        3. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
+        4. See generate() documentation for more details
+    Context vs Messages Strategy:
+        context: Static, reusable content for caching efficiency
+            - Schemas, examples, instructions
+            - Remains constant across multiple calls
+            - Cached when supported by provider/proxy configuration
+        messages: Dynamic, per-call specific content
+            - Data to be structured, user queries
+            - Changes with each API call
+            - Never cached, always processed fresh
+    Complex Task Pattern:
+        For complex tasks like research or deep analysis, it's recommended to use
+        a two-step approach:
+        1. First use generate() with a capable model to perform the analysis
+        2. Then use generate_structured() with a smaller model to convert the
+           response into structured output
+        This pattern is more reliable than trying to force complex reasoning
+        directly into structured format:
+        >>> # Step 1: Research/analysis with generate() - no options parameter
+        >>> research = await llm.generate(
+        ...     "gpt-5",
+        ...     messages="Research and analyze this complex topic..."
+        ... )
+        >>>
+        >>> # Step 2: Structure the results with generate_structured()
+        >>> structured = await llm.generate_structured(
+        ...     "gpt-5-mini",  # Smaller model is fine for structuring
+        ...     response_format=ResearchSummary,
+        ...     messages=f"Extract key information: {research.content}"
+        ... )
     Args:
         model: Model to use (must support structured output).
+               Search models (models with '-search' suffix) do not support structured output.
         response_format: Pydantic model class defining the output schema.
                         The model will generate JSON matching this schema.
         context: Static context to cache (documents, schemas, examples).
                 Defaults to None (empty AIMessages).
         messages: Dynamic prompts/queries. AIMessages or str ONLY.
                  Do not pass Document or DocumentList directly.
-        options: Model configuration. response_format is set automatically.
+        options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
+                Framework defaults are production-optimized. Configure model behavior
+                centrally via LiteLLM proxy settings, not per API call.
+                The response_format is set automatically from the response_format parameter.
+    VISION/PDF MODEL COMPATIBILITY:
+        When using Documents with images/PDFs in structured output:
+        - Images require vision-capable models that also support structured output
+        - PDFs require models with both document processing AND structured output support
+        - Many models support either vision OR structured output, but not both
+        - Test your specific model+document combination before production use
+        - Consider two-step approach: generate() for analysis, then generate_structured()
+          for formatting
     Returns:
         StructuredModelResponse[T] containing:
@@ -412,6 +469,7 @@ async def generate_structured(
     Raises:
         TypeError: If response_format is not a Pydantic model class.
         ValueError: If model doesn't support structured output or no parsed content returned.
+                   Structured output support varies by provider and model.
         LLMError: If generation fails after retries.
         ValidationError: If response cannot be parsed into response_format.
@@ -423,8 +481,9 @@ async def generate_structured(
         ...     sentiment: float = Field(ge=-1, le=1)
         ...     key_points: list[str] = Field(max_length=5)
         >>>
+        >>> # CORRECT - No options parameter
         >>> response = await llm.generate_structured(
-        ...     model="gpt-5",
+        ...     "gpt-5",
         ...     response_format=Analysis,
         ...     messages="Analyze this product review: ..."
         ... )
@@ -435,11 +494,13 @@ async def generate_structured(
         ...     print(f"- {point}")
     Supported models:
-        Support varies by provider and model. Generally includes:
+        Structured output support varies by provider and model. Generally includes:
         - OpenAI: GPT-4 and newer models
         - Anthropic: Claude 3+ models
         - Google: Gemini Pro models
-        Check provider documentation for specific model support.
+        Search models (models with '-search' suffix) do not support structured output.
+        Check provider documentation for specific support.
     Performance:
         - Structured output may use more tokens than free text
@@ -451,11 +512,7 @@ async def generate_structured(
         - The model generates JSON matching the schema
         - Validation happens automatically via Pydantic
         - Use Field() descriptions to guide generation
-    See Also:
-        - generate: For unstructured text generation
-        - ModelOptions: Configuration including response_format
-        - StructuredModelResponse: Response wrapper with .parsed property
+        - Search models (models with '-search' suffix) do not support structured output
     """
     if context is None:
         context = AIMessages()
@@ -467,6 +524,8 @@ async def generate_structured(
     if isinstance(messages, str):
         messages = AIMessages([messages])
+    assert isinstance(messages, AIMessages)
     # Call the internal generate function with structured output enabled
     try:
         response = await _generate_with_retry(model, context, messages, options)
@@ -498,9 +557,3 @@ async def generate_structured(
     # Create a StructuredModelResponse with the parsed value
     return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
-# Public aliases for testing internal functions
-# These are exported to allow testing of implementation details
-process_messages_for_testing = _process_messages
-generate_with_retry_for_testing = _generate_with_retry

ai_pipeline_core/llm/model_options.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Configuration options for LLM generation.
-@public
 Provides the ModelOptions class for configuring model behavior,
 retry logic, and advanced features like web search and reasoning.
 """
@@ -14,8 +12,6 @@ from pydantic import BaseModel
 class ModelOptions(BaseModel):
     """Configuration options for LLM generation requests.
-    @public
     ModelOptions encapsulates all configuration parameters for model
     generation, including model behavior settings, retry logic, and
     advanced features. All fields are optional with sensible defaults.
@@ -68,7 +64,8 @@ class ModelOptions(BaseModel):
         response_format: Pydantic model class for structured output.
                         Pass a Pydantic model; the client converts it to JSON Schema.
-                        Set automatically by generate_structured(). Provider support varies.
+                        Set automatically by generate_structured().
+                        Structured output support varies by provider and model.
     Example:
         >>> # Basic configuration
@@ -162,11 +159,13 @@ class ModelOptions(BaseModel):
         Note:
             - system_prompt is handled separately in _process_messages()
             - retries and retry_delay_seconds are used by retry logic
-            - extra_body is always included for potential extensions
+            - extra_body always includes usage tracking for cost monitoring
         """
         kwargs: dict[str, Any] = {
             "timeout": self.timeout,
-            "extra_body": {},
+            "extra_body": {
+                "usage": {"include": True},  # For openrouter cost tracking
+            },
         }
         if self.temperature:

ai_pipeline_core/llm/model_response.py CHANGED Viewed

@@ -2,7 +2,7 @@
 @public
-Provides enhanced response classes that wrap OpenAI API responses
+Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
 with additional metadata, cost tracking, and structured output support.
 """
@@ -23,8 +23,8 @@ class ModelResponse(ChatCompletion):
     Primary usage is adding to AIMessages for multi-turn conversations:
-        >>> response = await llm.generate(messages=messages)
-        >>> messages.add(response)  # Add assistant response to conversation
+        >>> response = await llm.generate("gpt-5", messages=messages)
+        >>> messages.append(response)  # Add assistant response to conversation
         >>> print(response.content)  # Access generated text
     The two main interactions with ModelResponse:
@@ -35,13 +35,13 @@ class ModelResponse(ChatCompletion):
     like token usage and cost tracking are available but rarely needed.
     Example:
-        >>> from ai_pipeline_core.llm import AIMessages, generate
+        >>> from ai_pipeline_core import llm, AIMessages
         >>>
-        >>> messages = AIMessages("Explain quantum computing")
-        >>> response = await generate(messages=messages)
+        >>> messages = AIMessages(["Explain quantum computing"])
+        >>> response = await llm.generate("gpt-5", messages=messages)
         >>>
         >>> # Primary usage: add to conversation
-        >>> messages.add(response)
+        >>> messages.append(response)
         >>>
         >>> # Access generated text
         >>> print(response.content)
@@ -96,17 +96,17 @@ class ModelResponse(ChatCompletion):
         @public
         Primary property for accessing the LLM's response text.
-        This covers 99% of use cases with ModelResponse.
+        This is the main property you'll use with ModelResponse.
         Returns:
             Generated text from the model, or empty string if none.
         Example:
-            >>> response = await generate(messages="Hello")
+            >>> response = await generate("gpt-5", messages="Hello")
             >>> text = response.content  # The generated response
             >>>
             >>> # Common pattern: add to messages then use content
-            >>> messages.add(response)
+            >>> messages.append(response)
             >>> if "error" in response.content.lower():
             ...     # Handle error case
         """
@@ -189,8 +189,7 @@ class ModelResponse(ChatCompletion):
             >>> response = await llm.generate(
             ...     "gpt-5",
             ...     context=large_doc,
-            ...     messages="Summarize this",
-            ...     options=ModelOptions(cache_ttl="300s")
+            ...     messages="Summarize this"
             ... )
             >>>
             >>> # Get comprehensive metadata
@@ -292,6 +291,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
         ...     summary: str
         >>>
         >>> response = await generate_structured(
+        ...     "gpt-5",
         ...     response_format=Analysis,
         ...     messages="Analyze this text..."
         ... )
@@ -301,7 +301,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
         >>> print(f"Sentiment: {analysis.sentiment}")
         >>>
         >>> # Can add to messages for conversation
-        >>> messages.add(response)
+        >>> messages.append(response)
     The two main interactions:
     1. Accessing .parsed property for the structured data
@@ -377,6 +377,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
             ...     age: int
             >>>
             >>> response = await generate_structured(
+            ...     "gpt-5",
             ...     response_format=UserInfo,
             ...     messages="Extract user info..."
             ... )
@@ -386,11 +387,11 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
             >>> print(f"{user.name} is {user.age} years old")
             >>>
             >>> # Can also add to messages
-            >>> messages.add(response)
+            >>> messages.append(response)
         Note:
-            Type-safe with full IDE support. This property covers
-            99% of structured response use cases.
+            Type-safe with full IDE support. This is the main property
+            you'll use with structured responses.
         """
         if self._parsed_value is not None:
             return self._parsed_value

ai_pipeline_core/llm/model_types.py CHANGED Viewed

@@ -21,12 +21,12 @@ ModelName: TypeAlias = (
         # Small models
         "gemini-2.5-flash",
         "gpt-5-mini",
-        "grok-3-mini",
+        "grok-4-fast",
         # Search models
         "gemini-2.5-flash-search",
         "sonar-pro-search",
         "gpt-4o-search",
-        "grok-3-mini-search",
+        "grok-4-fast-search",
     ]
     | str
 )
@@ -47,7 +47,7 @@ Model categories:
         High-capability models for complex tasks requiring deep reasoning,
         nuanced understanding, or creative generation.
-    Small models (gemini-2.5-flash, gpt-5-mini, grok-3-mini):
+    Small models (gemini-2.5-flash, gpt-5-mini, grok-4-fast):
         Efficient models optimized for speed and cost, suitable for
         simpler tasks or high-volume processing.
@@ -79,8 +79,4 @@ Note:
     The ModelName type includes both predefined literals and str,
     allowing full flexibility while maintaining IDE support for
     common models.
-See Also:
-    - llm.generate: Main generation function
-    - ModelOptions: Model configuration options
 """

ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl

ai-pipeline-core 0.1.14py3-none-any.whl → 0.2.1py3-none-any.whl