PyPI - ai-pipeline-core - Versions diffs - 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

ai_pipeline_core/__init__.py +83 -119
ai_pipeline_core/deployment/__init__.py +34 -0
ai_pipeline_core/deployment/base.py +861 -0
ai_pipeline_core/deployment/contract.py +80 -0
ai_pipeline_core/deployment/deploy.py +561 -0
ai_pipeline_core/deployment/helpers.py +97 -0
ai_pipeline_core/deployment/progress.py +126 -0
ai_pipeline_core/deployment/remote.py +116 -0
ai_pipeline_core/docs_generator/__init__.py +54 -0
ai_pipeline_core/docs_generator/__main__.py +5 -0
ai_pipeline_core/docs_generator/cli.py +196 -0
ai_pipeline_core/docs_generator/extractor.py +324 -0
ai_pipeline_core/docs_generator/guide_builder.py +644 -0
ai_pipeline_core/docs_generator/trimmer.py +35 -0
ai_pipeline_core/docs_generator/validator.py +114 -0
ai_pipeline_core/document_store/__init__.py +13 -0
ai_pipeline_core/document_store/_summary.py +9 -0
ai_pipeline_core/document_store/_summary_worker.py +170 -0
ai_pipeline_core/document_store/clickhouse.py +492 -0
ai_pipeline_core/document_store/factory.py +38 -0
ai_pipeline_core/document_store/local.py +312 -0
ai_pipeline_core/document_store/memory.py +85 -0
ai_pipeline_core/document_store/protocol.py +68 -0
ai_pipeline_core/documents/__init__.py +14 -15
ai_pipeline_core/documents/_context_vars.py +85 -0
ai_pipeline_core/documents/_hashing.py +52 -0
ai_pipeline_core/documents/attachment.py +85 -0
ai_pipeline_core/documents/context.py +128 -0
ai_pipeline_core/documents/document.py +349 -1062
ai_pipeline_core/documents/mime_type.py +40 -85
ai_pipeline_core/documents/utils.py +62 -7
ai_pipeline_core/exceptions.py +10 -62
ai_pipeline_core/images/__init__.py +309 -0
ai_pipeline_core/images/_processing.py +151 -0
ai_pipeline_core/llm/__init__.py +5 -3
ai_pipeline_core/llm/ai_messages.py +284 -73
ai_pipeline_core/llm/client.py +462 -209
ai_pipeline_core/llm/model_options.py +86 -53
ai_pipeline_core/llm/model_response.py +187 -241
ai_pipeline_core/llm/model_types.py +34 -54
ai_pipeline_core/logging/__init__.py +2 -9
ai_pipeline_core/logging/logging.yml +1 -1
ai_pipeline_core/logging/logging_config.py +27 -43
ai_pipeline_core/logging/logging_mixin.py +17 -51
ai_pipeline_core/observability/__init__.py +32 -0
ai_pipeline_core/observability/_debug/__init__.py +30 -0
ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
ai_pipeline_core/observability/_debug/_config.py +95 -0
ai_pipeline_core/observability/_debug/_content.py +764 -0
ai_pipeline_core/observability/_debug/_processor.py +98 -0
ai_pipeline_core/observability/_debug/_summary.py +312 -0
ai_pipeline_core/observability/_debug/_types.py +75 -0
ai_pipeline_core/observability/_debug/_writer.py +843 -0
ai_pipeline_core/observability/_document_tracking.py +146 -0
ai_pipeline_core/observability/_initialization.py +194 -0
ai_pipeline_core/observability/_logging_bridge.py +57 -0
ai_pipeline_core/observability/_summary.py +81 -0
ai_pipeline_core/observability/_tracking/__init__.py +6 -0
ai_pipeline_core/observability/_tracking/_client.py +178 -0
ai_pipeline_core/observability/_tracking/_internal.py +28 -0
ai_pipeline_core/observability/_tracking/_models.py +138 -0
ai_pipeline_core/observability/_tracking/_processor.py +158 -0
ai_pipeline_core/observability/_tracking/_service.py +311 -0
ai_pipeline_core/observability/_tracking/_writer.py +229 -0
ai_pipeline_core/observability/tracing.py +640 -0
ai_pipeline_core/pipeline/__init__.py +10 -0
ai_pipeline_core/pipeline/decorators.py +915 -0
ai_pipeline_core/pipeline/options.py +16 -0
ai_pipeline_core/prompt_manager.py +26 -105
ai_pipeline_core/settings.py +41 -32
ai_pipeline_core/testing.py +9 -0
ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
ai_pipeline_core/documents/document_list.py +0 -240
ai_pipeline_core/documents/flow_document.py +0 -128
ai_pipeline_core/documents/task_document.py +0 -133
ai_pipeline_core/documents/temporary_document.py +0 -95
ai_pipeline_core/flow/__init__.py +0 -9
ai_pipeline_core/flow/config.py +0 -314
ai_pipeline_core/flow/options.py +0 -75
ai_pipeline_core/pipeline.py +0 -717
ai_pipeline_core/prefect.py +0 -54
ai_pipeline_core/simple_runner/__init__.py +0 -24
ai_pipeline_core/simple_runner/cli.py +0 -255
ai_pipeline_core/simple_runner/simple_runner.py +0 -385
ai_pipeline_core/tracing.py +0 -475
ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/model_response.py CHANGED Viewed

@@ -1,30 +1,40 @@
 """Model response structures for LLM interactions.
-@public
-Provides enhanced response classes that wrap OpenAI API responses
+Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
 with additional metadata, cost tracking, and structured output support.
 """
-import copy
+import json
+from copy import deepcopy
+from dataclasses import dataclass
 from typing import Any, Generic, TypeVar
-from openai.types.chat import ChatCompletion, ParsedChatCompletion
-from pydantic import BaseModel, Field
+from openai.types.chat import ChatCompletion
+from openai.types.completion_usage import CompletionUsage
+from pydantic import BaseModel
-T = TypeVar("T", bound=BaseModel)
+T = TypeVar(
+    "T",
+    bound=BaseModel,
+)
 """Type parameter for structured response Pydantic models."""
+@dataclass(frozen=True)
+class Citation:
+    """A URL citation returned by search-enabled models (e.g. sonar-pro-search, gemini-3-flash-search)."""
+    title: str
+    url: str
 class ModelResponse(ChatCompletion):
     """Response wrapper for LLM text generation.
-    @public
     Primary usage is adding to AIMessages for multi-turn conversations:
-        >>> response = await llm.generate(messages=messages)
-        >>> messages.add(response)  # Add assistant response to conversation
+        >>> response = await llm.generate("gpt-5.1", messages=messages)
+        >>> messages.append(response)  # Add assistant response to conversation
         >>> print(response.content)  # Access generated text
     The two main interactions with ModelResponse:
@@ -34,168 +44,153 @@ class ModelResponse(ChatCompletion):
     Almost all use cases are covered by these two patterns. Advanced features
     like token usage and cost tracking are available but rarely needed.
-    Example:
-        >>> from ai_pipeline_core.llm import AIMessages, generate
-        >>>
-        >>> messages = AIMessages("Explain quantum computing")
-        >>> response = await generate(messages=messages)
-        >>>
-        >>> # Primary usage: add to conversation
-        >>> messages.add(response)
-        >>>
-        >>> # Access generated text
-        >>> print(response.content)
-    Note:
-        Inherits from OpenAI's ChatCompletion for compatibility.
-        Other properties (usage, model, id) should only be accessed
-        when absolutely necessary.
+    Inherits from OpenAI's ChatCompletion for compatibility.
+    Other properties (usage, model, id) should only be accessed
+    when absolutely necessary.
     """
-    headers: dict[str, str] = Field(default_factory=dict)
-    model_options: dict[str, Any] = Field(default_factory=dict)
-    def __init__(self, chat_completion: ChatCompletion | None = None, **kwargs: Any) -> None:
-        """Initialize ModelResponse from ChatCompletion or kwargs.
+    def __init__(
+        self,
+        chat_completion: ChatCompletion,
+        model_options: dict[str, Any],
+        metadata: dict[str, Any],
+        usage: CompletionUsage | None = None,
+    ) -> None:
+        """Initialize ModelResponse from ChatCompletion.
-        Can be initialized from an existing ChatCompletion object or
-        directly from keyword arguments. Automatically initializes
-        headers dict if not provided.
+        Wraps an OpenAI ChatCompletion object with additional metadata
+        and model options for tracking and observability.
         Args:
-            chat_completion: Optional ChatCompletion to wrap.
-            **kwargs: Direct initialization parameters if no
-                     ChatCompletion provided.
-        Example:
-            >>> # From ChatCompletion
-            >>> response = ModelResponse(chat_completion_obj)
-            >>>
-            >>> # Direct initialization (mainly for testing)
-            >>> response = ModelResponse(
-            ...     id="test",
-            ...     model="gpt-5",
-            ...     choices=[...]
-            ... )
+            chat_completion: ChatCompletion object from the API.
+            model_options: Model configuration options used for the request.
+                          Stored for metadata extraction and tracing.
+            metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
+                     Includes timing information and custom tags.
+            usage: Optional usage information from streaming response.
         """
-        if chat_completion:
-            # Copy all attributes from the ChatCompletion instance
-            data = chat_completion.model_dump()
-            data["headers"] = {}  # Add default headers
-            super().__init__(**data)
-        else:
-            # Initialize from kwargs
-            if "headers" not in kwargs:
-                kwargs["headers"] = {}
-            super().__init__(**kwargs)
+        data = chat_completion.model_dump()
+        # fixes issue where the role is "assistantassistant" instead of "assistant"
+        valid_finish_reasons = {"stop", "length", "tool_calls", "content_filter", "function_call"}
+        for i in range(len(data["choices"])):
+            data["choices"][i]["message"]["role"] = "assistant"
+            # Only update finish_reason if it's not already a valid value
+            current_finish_reason = data["choices"][i].get("finish_reason")
+            if current_finish_reason not in valid_finish_reasons:
+                data["choices"][i]["finish_reason"] = "stop"
+            # Strip annotations with unsupported types (e.g. Grok returns type="file" for PDFs,
+            # but OpenAI's ChatCompletion only accepts type="url_citation")
+            if annotations := data["choices"][i]["message"].get("annotations"):
+                data["choices"][i]["message"]["annotations"] = [a for a in annotations if a.get("type") == "url_citation"]
+        super().__init__(**data)
+        self._model_options = model_options
+        self._metadata = metadata
+        if usage:
+            self.usage = usage
     @property
     def content(self) -> str:
         """Get the generated text content.
-        @public
         Primary property for accessing the LLM's response text.
-        This covers 99% of use cases with ModelResponse.
+        This is the main property you'll use with ModelResponse.
         Returns:
             Generated text from the model, or empty string if none.
-        Example:
-            >>> response = await generate(messages="Hello")
-            >>> text = response.content  # The generated response
-            >>>
-            >>> # Common pattern: add to messages then use content
-            >>> messages.add(response)
-            >>> if "error" in response.content.lower():
-            ...     # Handle error case
         """
-        return self.choices[0].message.content or ""
-    def set_model_options(self, options: dict[str, Any]) -> None:
-        """Store the model configuration used for generation.
-        Saves a deep copy of the options used for this generation,
-        excluding the messages for brevity.
+        content = self.choices[0].message.content or ""
+        return content.split("</think>")[-1].strip()
-        Args:
-            options: Dictionary of model options from the API call.
+    @property
+    def reasoning_content(self) -> str:
+        """Get the reasoning content.
-        Note:
-            Messages are removed to avoid storing large prompts.
-            Called internally by the generation functions.
+        Returns:
+            The reasoning content from the model, or empty string if none.
         """
-        self.model_options = copy.deepcopy(options)
-        if "messages" in self.model_options:
-            del self.model_options["messages"]
-    def set_headers(self, headers: dict[str, str]) -> None:
-        """Store HTTP response headers.
+        message = self.choices[0].message
+        if reasoning_content := getattr(message, "reasoning_content", None):
+            return reasoning_content
+        if not message.content or "</think>" not in message.content:
+            return ""
+        return message.content.split("</think>")[0].strip()
-        Saves response headers which contain LiteLLM metadata
-        including cost information and call IDs.
-        Args:
-            headers: Dictionary of HTTP headers from the response.
+    @property
+    def citations(self) -> list[Citation]:
+        """Get URL citations from search-enabled models.
-        Headers of interest:
-            - x-litellm-response-cost: Generation cost
-            - x-litellm-call-id: Unique call identifier
-            - x-litellm-model-id: Actual model used
+        Returns:
+            List of Citation objects with title and url. Empty list for non-search models.
         """
-        self.headers = copy.deepcopy(headers)
+        annotations = self.choices[0].message.annotations
+        if not annotations:
+            return []
+        return [Citation(title=a.url_citation.title, url=a.url_citation.url) for a in annotations if a.url_citation]
-    def get_laminar_metadata(self) -> dict[str, str | int | float]:
-        """Extract metadata for LMNR (Laminar) observability.
+    def get_laminar_metadata(self) -> dict[str, str | int | float]:  # noqa: C901
+        """Extract metadata for LMNR (Laminar) observability including cost tracking.
-        Collects comprehensive metadata about the generation for
-        tracing and monitoring in the LMNR platform.
+        Collects comprehensive metadata about the generation for tracing,
+        monitoring, and cost analysis in the LMNR platform. This method
+        provides detailed insights into token usage, caching effectiveness,
+        and generation costs.
         Returns:
             Dictionary containing:
-            - LiteLLM headers (call ID, costs, etc.)
-            - Token usage statistics
-            - Model configuration
-            - Cost information
-            - Cached token counts
+            - LiteLLM headers (call ID, costs, model info, etc.)
+            - Token usage statistics (input, output, total, cached)
+            - Model configuration used for generation
+            - Cost information in multiple formats
+            - Cached token counts (when context caching enabled)
             - Reasoning token counts (for O1 models)
         Metadata structure:
             - litellm.*: All LiteLLM-specific headers
-            - gen_ai.usage.*: Token usage statistics
+            - gen_ai.usage.prompt_tokens: Input token count
+            - gen_ai.usage.completion_tokens: Output token count
+            - gen_ai.usage.total_tokens: Total tokens used
+            - gen_ai.usage.cached_tokens: Cached tokens (if applicable)
+            - gen_ai.usage.reasoning_tokens: Reasoning tokens (O1 models)
+            - gen_ai.usage.output_cost: Generation cost in dollars
+            - gen_ai.usage.cost: Alternative cost field (same value)
+            - gen_ai.cost: Simple cost field (same value)
             - gen_ai.response.*: Response identifiers
-            - gen_ai.cost: Cost information
             - model_options.*: Configuration used
-        Example:
-            >>> response = await llm.generate(...)
-            >>> metadata = response.get_laminar_metadata()
-            >>> print(f"Cost: ${metadata.get('gen_ai.cost', 0)}")
-            >>> print(f"Tokens: {metadata.get('gen_ai.usage.total_tokens')}")
+        Cost tracking:
+            Cost information is extracted from two sources:
+            1. x-litellm-response-cost header (primary)
+            2. usage.cost attribute (fallback)
-        Note:
-            Used internally by the tracing system for observability.
-            Cost is extracted from headers or usage object.
-        """
-        metadata: dict[str, str | int | float] = {}
+            Cost is stored in three fields for observability tool consumption:
+            - gen_ai.usage.output_cost (OpenTelemetry GenAI semantic convention)
+            - gen_ai.usage.cost (aggregated cost)
+            - gen_ai.cost (short-form)
-        litellm_id = self.headers.get("x-litellm-call-id")
-        cost = float(self.headers.get("x-litellm-response-cost") or 0)
-        # Add all x-litellm-* headers
-        for header, value in self.headers.items():
-            if header.startswith("x-litellm-"):
-                header_name = header.replace("x-litellm-", "").lower()
-                metadata[f"litellm.{header_name}"] = value
+        Cost availability depends on LiteLLM proxy configuration. Not all providers
+        return cost information. Cached tokens reduce actual cost but may not be reflected.
+        Used internally by tracing but accessible for cost analysis.
+        """
+        metadata: dict[str, str | int | float] = deepcopy(self._metadata)
         # Add base metadata
+        # NOTE: gen_ai.response.model is intentionally omitted — Laminar's UI uses it
+        # to override the span display name in the tree view, hiding the actual span name
+        # (set via `purpose` parameter). Tracked upstream: Laminar's getSpanDisplayName()
+        # in frontend/components/traces/trace-view/utils.ts prefers model over span name
+        # for LLM spans. Restore once Laminar shows both or prefers span name.
         metadata.update({
-            "gen_ai.response.id": litellm_id or self.id,
-            "gen_ai.response.model": self.model,
-            "get_ai.system": "litellm",
+            "gen_ai.response.id": self.id,
+            "gen_ai.system": "litellm",
         })
         # Add usage metadata if available
+        cost = None
         if self.usage:
             metadata.update({
                 "gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
@@ -209,145 +204,96 @@ class ModelResponse(ChatCompletion):
                 cost = float(self.usage.cost)  # type: ignore[attr-defined]
             # Add reasoning tokens if available
-            if completion_details := self.usage.completion_tokens_details:
-                if reasoning_tokens := completion_details.reasoning_tokens:
-                    metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
+            if (completion_details := self.usage.completion_tokens_details) and (reasoning_tokens := completion_details.reasoning_tokens):
+                metadata["gen_ai.usage.reasoning_tokens"] = reasoning_tokens
             # Add cached tokens if available
-            if prompt_details := self.usage.prompt_tokens_details:
-                if cached_tokens := prompt_details.cached_tokens:
-                    metadata["gen_ai.usage.cached_tokens"] = cached_tokens
+            if (prompt_details := self.usage.prompt_tokens_details) and (cached_tokens := prompt_details.cached_tokens):
+                metadata["gen_ai.usage.cached_tokens"] = cached_tokens
         # Add cost metadata if available
         if cost and cost > 0:
             metadata.update({
                 "gen_ai.usage.output_cost": cost,
                 "gen_ai.usage.cost": cost,
-                "get_ai.cost": cost,
+                "gen_ai.cost": cost,
             })
-        if self.model_options:
-            for key, value in self.model_options.items():
-                metadata[f"model_options.{key}"] = str(value)
+        for key, value in self._model_options.items():
+            if "messages" in key:
+                continue
+            metadata[f"model_options.{key}"] = str(value)
+        other_fields = self.__dict__
+        for key, value in other_fields.items():
+            if key in {"_model_options", "_metadata", "choices"}:
+                continue
+            try:
+                metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
+            except Exception:
+                metadata[f"response.raw.{key}"] = str(value)
+        message = self.choices[0].message
+        for key, value in message.__dict__.items():
+            if key in {"content"}:
+                continue
+            metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
         return metadata
+    def validate_output(self) -> None:
+        """Validate response output content and format.
+        Checks that response has non-empty content and validates against
+        response_format if structured output was requested.
+        Raises:
+            ValueError: If response content is empty.
+            ValidationError: If content doesn't match response_format schema.
+        """
+        if not self.content:
+            raise ValueError("Empty response content")
+        if (response_format := self._model_options.get("response_format")) and isinstance(response_format, BaseModel):
+            response_format.model_validate_json(self.content)
-class StructuredModelResponse(ModelResponse, Generic[T]):
+class StructuredModelResponse(ModelResponse, Generic[T]):  # noqa: UP046
     """Response wrapper for structured/typed LLM output.
-    @public
-    Primary usage is adding to AIMessages and accessing .parsed property:
-        >>> class Analysis(BaseModel):
-        ...     sentiment: float
-        ...     summary: str
-        >>>
-        >>> response = await generate_structured(
-        ...     response_format=Analysis,
-        ...     messages="Analyze this text..."
-        ... )
-        >>>
-        >>> # Primary usage: access parsed model
-        >>> analysis = response.parsed
-        >>> print(f"Sentiment: {analysis.sentiment}")
-        >>>
-        >>> # Can add to messages for conversation
-        >>> messages.add(response)
-    The two main interactions:
-    1. Accessing .parsed property for the structured data
-    2. Adding to AIMessages for conversation continuity
-    These patterns cover virtually all use cases. Advanced features exist
-    but should only be used when absolutely necessary.
-    Type Parameter:
-        T: The Pydantic model type for the structured output.
-    Note:
-        Extends ModelResponse with type-safe parsed data access.
-        Other inherited properties should rarely be needed.
+    Primary usage is accessing the .parsed property for the structured data.
     """
-    def __init__(
-        self,
-        chat_completion: ChatCompletion | None = None,
-        parsed_value: T | None = None,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize with ChatCompletion and parsed value.
+    @classmethod
+    def from_model_response(cls, model_response: ModelResponse) -> "StructuredModelResponse[T]":
+        """Convert a ModelResponse to StructuredModelResponse.
-        Creates a structured response from a base completion and
-        optionally a pre-parsed value. Can extract parsed value
-        from ParsedChatCompletion automatically.
+        Takes an existing ModelResponse and converts it to a StructuredModelResponse
+        for accessing parsed structured output. Used internally by generate_structured().
         Args:
-            chat_completion: Base chat completion response.
-            parsed_value: Pre-parsed Pydantic model instance.
-                         If None, attempts extraction from
-                         ParsedChatCompletion.
-            **kwargs: Additional ChatCompletion parameters.
-        Extraction behavior:
-            1. Use provided parsed_value if given
-            2. Extract from ParsedChatCompletion if available
-            3. Store as None (access will raise ValueError)
-        Note:
-            Usually created internally by generate_structured().
-            The parsed value is validated by Pydantic automatically.
-        """
-        super().__init__(chat_completion, **kwargs)
-        self._parsed_value: T | None = parsed_value
+            model_response: The ModelResponse to convert.
-        # Extract parsed value from ParsedChatCompletion if available
-        if chat_completion and isinstance(chat_completion, ParsedChatCompletion):
-            if chat_completion.choices:  # type: ignore[attr-defined]
-                message = chat_completion.choices[0].message  # type: ignore[attr-defined]
-                if hasattr(message, "parsed"):  # type: ignore
-                    self._parsed_value = message.parsed  # type: ignore[attr-defined]
+        Returns:
+            StructuredModelResponse with lazy parsing support.
+        """
+        model_response.__class__ = cls
+        return model_response  # type: ignore[return-value]
     @property
     def parsed(self) -> T:
-        """Get the parsed Pydantic model instance.
+        """Get the parsed structured output.
-        @public
-        Primary property for accessing structured output.
-        This is the main reason to use generate_structured().
+        Lazily parses the JSON content into the specified Pydantic model.
+        Result is cached after first access.
         Returns:
-            Validated instance of the Pydantic model type T.
+            Parsed Pydantic model instance.
         Raises:
-            ValueError: If no parsed content available (internal error).
-        Example:
-            >>> class UserInfo(BaseModel):
-            ...     name: str
-            ...     age: int
-            >>>
-            >>> response = await generate_structured(
-            ...     response_format=UserInfo,
-            ...     messages="Extract user info..."
-            ... )
-            >>>
-            >>> # Primary usage: get the parsed model
-            >>> user = response.parsed
-            >>> print(f"{user.name} is {user.age} years old")
-            >>>
-            >>> # Can also add to messages
-            >>> messages.add(response)
-        Note:
-            Type-safe with full IDE support. This property covers
-            99% of structured response use cases.
+            ValidationError: If content doesn't match the response_format schema.
         """
-        if self._parsed_value is not None:
-            return self._parsed_value
-        raise ValueError(
-            "No parsed content available. This should not happen for StructuredModelResponse."
-        )
+        if not hasattr(self, "_parsed_value"):
+            response_format = self._model_options.get("response_format")
+            self._parsed_value: T = response_format.model_validate_json(self.content)  # type: ignore[return-value]
+        return self._parsed_value

ai_pipeline_core/llm/model_types.py CHANGED Viewed

@@ -10,40 +10,41 @@ Model categories:
 - Search models: Models with web search capabilities
 """
-from typing import Literal, TypeAlias
+from typing import Literal
-ModelName: TypeAlias = Literal[
-    # Core models
-    "gemini-2.5-pro",
-    "gpt-5",
-    "grok-4",
-    # Small models
-    "gemini-2.5-flash",
-    "gpt-5-mini",
-    "grok-3-mini",
-    # Search models
-    "gemini-2.5-flash-search",
-    "sonar-pro-search",
-    "gpt-4o-search",
-    "grok-3-mini-search",
-]
-"""Type-safe model name identifiers.
+type ModelName = (
+    Literal[
+        # Core models
+        "gemini-3-pro",
+        "gpt-5.1",
+        # Small models
+        "gemini-3-flash",
+        "gpt-5-mini",
+        "grok-4.1-fast",
+        # Search models
+        "gemini-3-flash-search",
+        "gpt-5-mini-search",
+        "grok-4.1-fast-search",
+        "sonar-pro-search",
+    ]
+    | str
+)
+"""Type-safe model name identifiers with support for custom models.
-@public
+Provides IDE autocompletion for common model names while allowing any
+string for custom models. The type is a union of predefined literals
+and str, giving you the best of both worlds: suggestions for known
+models and flexibility for custom ones.
-Provides compile-time validation and IDE autocompletion for supported
-language model names. Used throughout the library to prevent typos
-and ensure only valid models are referenced.
-Note: These are example common model names as of Q3 2025. Actual availability
+These are example common model names as of Q1 2026. Actual availability
 depends on your LiteLLM proxy configuration and provider access.
 Model categories:
-    Core models (gemini-2.5-pro, gpt-5, grok-4):
+    Core models (gemini-3-pro, gpt-5.1):
         High-capability models for complex tasks requiring deep reasoning,
         nuanced understanding, or creative generation.
-    Small models (gemini-2.5-flash, gpt-5-mini, grok-3-mini):
+    Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
         Efficient models optimized for speed and cost, suitable for
         simpler tasks or high-volume processing.
@@ -51,34 +52,13 @@ Model categories:
         Models with integrated web search capabilities for retrieving
         and synthesizing current information.
-Extending with custom models:
-    The generate functions accept any string, not just ModelName literals.
-    To add custom models for type safety:
-    1. Create a new type alias: CustomModel = Literal["my-model"]
-    2. Use Union: model: ModelName | CustomModel = "my-model"
-    3. Or simply use strings: model = "any-model-via-litellm"
-Example:
-    >>> from ai_pipeline_core import llm, ModelName
-    >>>
-    >>> # Type-safe model selection
-    >>> model: ModelName = "gpt-5"  # IDE autocomplete works
-    >>> response = await llm.generate(model, messages="Hello")
-    >>>
-    >>> # Also accepts string for custom models
-    >>> response = await llm.generate("custom-model-v2", messages="Hello")
-    >>>
-    >>> # Custom type safety
-    >>> from typing import Literal
-    >>> MyModel = Literal["company-llm-v1"]
-    >>> model: ModelName | MyModel = "company-llm-v1"
-Note:
-    While the type alias provides suggestions for common models,
-    the generate functions also accept string literals to support
-    custom or newer models accessed via LiteLLM proxy.
+Using custom models:
+    ModelName now includes str, so you can use any model name directly:
+    - Predefined models get IDE autocomplete and validation
+    - Custom models work seamlessly as strings
+    - No need for Union types or additional type aliases
-See Also:
-    - llm.generate: Main generation function
-    - ModelOptions: Model configuration options
+The ModelName type includes both predefined literals and str,
+allowing full flexibility while maintaining IDE support for
+common models.
 """

ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

ai-pipeline-core 0.1.12py3-none-any.whl → 0.4.1py3-none-any.whl