PyPI - ai-pipeline-core - Versions diffs - 0.2.3__tar.gz → 0.2.5__tar.gz - Mend

ai-pipeline-core 0.2.3tar.gz → 0.2.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.2.3
+Version: 0.2.5
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.12
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: jinja2>=3.1.6
-Requires-Dist: lmnr>=0.7.17
+Requires-Dist: lmnr>=0.7.18
 Requires-Dist: openai>=1.109.1
 Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
 Requires-Dist: prefect>=3.4.21
@@ -28,7 +28,7 @@ Requires-Dist: pydantic-settings>=2.10.1
 Requires-Dist: pydantic>=2.11.9
 Requires-Dist: python-magic>=0.4.27
 Requires-Dist: ruamel-yaml>=0.18.14
-Requires-Dist: tiktoken>=0.11.0
+Requires-Dist: tiktoken>=0.12.0
 Provides-Extra: dev
 Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
 Requires-Dist: bump2version>=1.0.1; extra == 'dev'
@@ -40,7 +40,7 @@ Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
 Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
 Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
 Requires-Dist: pytest>=8.4.1; extra == 'dev'
-Requires-Dist: ruff>=0.12.9; extra == 'dev'
+Requires-Dist: ruff>=0.14.1; extra == 'dev'
 Description-Content-Type: text/markdown
 # AI Pipeline Core

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.2.3"
+__version__ = "0.2.5"
 __all__ = [
     # Config/Settings

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/document.py RENAMED Viewed

@@ -29,6 +29,7 @@ from typing import (
     overload,
 )
+import tiktoken
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -980,7 +981,7 @@ class Document(BaseModel, ABC):
         """Detect the MIME type from document content.
         Detection strategy (in order):
-        1. Returns 'application/x-empty' for empty content
+        1. Returns 'text/plain' for empty content
         2. Extension-based detection for known text formats (preferred)
         3. python-magic content analysis for unknown extensions
         4. Fallback to extension or 'application/octet-stream'
@@ -1103,6 +1104,28 @@ class Document(BaseModel, ABC):
             raise ValueError(f"Document is not text: {self.name}")
         return self.content.decode("utf-8")
+    @property
+    def approximate_tokens_count(self) -> int:
+        """Approximate tokens count for the document content.
+        @public
+        Uses tiktoken with gpt-4 encoding to estimate token count.
+        For text documents, encodes the actual text. For non-text
+        documents (images, PDFs, etc.), returns a fixed estimate of 1024 tokens.
+        Returns:
+            Approximate number of tokens for this document.
+        Example:
+            >>> doc = MyDocument.create(name="data.txt", content="Hello world")
+            >>> doc.approximate_tokens_count  # ~2 tokens
+        """
+        if self.is_text:
+            return len(tiktoken.encoding_for_model("gpt-4").encode(self.text))
+        else:
+            return 1024  # Fixed estimate for non-text documents
     def as_yaml(self) -> Any:
         r"""Parse document content as YAML.

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/mime_type.py RENAMED Viewed

@@ -43,7 +43,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
     r"""Detect MIME type from document content and filename.
     Uses a multi-stage detection strategy for maximum accuracy:
-    1. Returns 'application/x-empty' for empty content
+    1. Returns 'text/plain' for empty content
     2. Uses extension-based detection for known formats (most reliable)
     3. Falls back to python-magic content analysis
     4. Final fallback to extension or 'application/octet-stream'
@@ -57,7 +57,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
         Never returns None or empty string.
     Fallback behavior:
-        - Empty content: 'application/x-empty'
+        - Empty content: 'text/plain'
         - Unknown extension with binary content: 'application/octet-stream'
         - Magic library failure: Falls back to extension or 'application/octet-stream'
@@ -75,13 +75,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
         >>> detect_mime_type(b'Hello World', "text.txt")
         'text/plain'
         >>> detect_mime_type(b'', "empty.txt")
-        'application/x-empty'
+        'text/plain'
         >>> detect_mime_type(b'\\x89PNG', "image.xyz")
         'image/png'  # Magic detects PNG despite wrong extension
     """
     # Check for empty content
     if len(content) == 0:
-        return "application/x-empty"
+        return "text/plain"
     # Try extension-based detection first for known formats
     # This is more reliable for text formats that magic might misidentify

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/ai_messages.py RENAMED Viewed

@@ -12,6 +12,7 @@ import json
 from copy import deepcopy
 from typing import Any, Callable, Iterable, SupportsIndex, Union
+import tiktoken
 from openai.types.chat import (
     ChatCompletionContentPartParam,
     ChatCompletionMessageParam,
@@ -301,6 +302,37 @@ class AIMessages(list[AIMessageType]):
             system_prompt = ""
         return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
+    @property
+    def approximate_tokens_count(self) -> int:
+        """Approximate tokens count for the messages.
+        @public
+        Uses tiktoken with gpt-4 encoding to estimate total token count
+        across all messages in the conversation.
+        Returns:
+            Approximate tokens count for all messages.
+        Raises:
+            ValueError: If message contains unsupported type.
+        Example:
+            >>> messages = AIMessages(["Hello", "World"])
+            >>> messages.approximate_tokens_count  # ~2-3 tokens
+        """
+        count = 0
+        for message in self:
+            if isinstance(message, str):
+                count += len(tiktoken.encoding_for_model("gpt-4").encode(message))
+            elif isinstance(message, Document):
+                count += message.approximate_tokens_count
+            elif isinstance(message, ModelResponse):  # type: ignore
+                count += len(tiktoken.encoding_for_model("gpt-4").encode(message.content))
+            else:
+                raise ValueError(f"Unsupported message type: {type(message)}")
+        return count
     @staticmethod
     def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
         """Convert a document to prompt format for LLM consumption.

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/client.py RENAMED Viewed

@@ -12,15 +12,17 @@ Key functions:
 """
 import asyncio
+import time
 from typing import Any, TypeVar
 from lmnr import Laminar
 from openai import AsyncOpenAI
+from openai.lib.streaming.chat import ContentDeltaEvent, ContentDoneEvent
 from openai.types.chat import (
     ChatCompletionMessageParam,
 )
 from prefect.logging import get_logger
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 from ai_pipeline_core.exceptions import LLMError
 from ai_pipeline_core.settings import settings
@@ -130,19 +132,31 @@ async def _generate(
         api_key=settings.openai_api_key,
         base_url=settings.openai_base_url,
     ) as client:
-        # Use parse for structured output, create for regular
-        if completion_kwargs.get("response_format"):
-            raw_response = await client.chat.completions.with_raw_response.parse(  # type: ignore[var-annotated]
-                **completion_kwargs,
-            )
-        else:
-            raw_response = await client.chat.completions.with_raw_response.create(  # type: ignore[var-annotated]
-                **completion_kwargs
-            )
-        response = ModelResponse(raw_response.parse())  # type: ignore[arg-type]
-        response.set_model_options(completion_kwargs)
-        response.set_headers(dict(raw_response.headers.items()))  # type: ignore[arg-type]
+        start_time, first_token_time = time.time(), None
+        async with client.chat.completions.stream(
+            model=model,
+            messages=messages,
+            **completion_kwargs,
+        ) as stream:
+            async for event in stream:
+                if isinstance(event, ContentDeltaEvent):
+                    if not first_token_time:
+                        first_token_time = time.time()
+                elif isinstance(event, ContentDoneEvent):
+                    pass
+            if not first_token_time:
+                first_token_time = time.time()
+            raw_response = await stream.get_final_completion()
+        metadata = {
+            "time_taken": round(time.time() - start_time, 2),
+            "first_token_time": round(first_token_time - start_time, 2),
+        }
+        response = ModelResponse(
+            raw_response,
+            model_options=completion_kwargs,
+            metadata=metadata,
+        )
         return response
@@ -182,12 +196,10 @@ async def _generate_with_retry(
         context, messages, options.system_prompt, options.cache_ttl
     )
     completion_kwargs: dict[str, Any] = {
-        "model": model,
-        "messages": processed_messages,
         **options.to_openai_completion_kwargs(),
     }
-    if context:
+    if context and options.cache_ttl:
         completion_kwargs["prompt_cache_key"] = context.get_prompt_cache_key(options.system_prompt)
     for attempt in range(options.retries):
@@ -197,20 +209,18 @@ async def _generate_with_retry(
             ) as span:
                 response = await _generate(model, processed_messages, completion_kwargs)
                 span.set_attributes(response.get_laminar_metadata())
-                Laminar.set_span_output(response.content)
-                if not response.content:
-                    raise ValueError(f"Model {model} returned an empty response.")
+                Laminar.set_span_output([
+                    r for r in (response.reasoning_content, response.content) if r
+                ])
+                response.validate_output()
                 return response
-        except (asyncio.TimeoutError, ValueError, Exception) as e:
+        except (asyncio.TimeoutError, ValueError, ValidationError, Exception) as e:
             if not isinstance(e, asyncio.TimeoutError):
                 # disable cache if it's not a timeout because it may cause an error
                 completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
             logger.warning(
-                "LLM generation failed (attempt %d/%d): %s",
-                attempt + 1,
-                options.retries,
-                e,
+                f"LLM generation failed (attempt {attempt + 1}/{options.retries}): {e}",
             )
             if attempt == options.retries - 1:
                 raise LLMError("Exhausted all retry attempts for LLM generation.") from e
@@ -453,8 +463,8 @@ async def generate_structured(
                 In most cases, leave as None to use framework defaults.
                 Configure model behavior centrally via LiteLLM proxy settings when possible.
-    VISION/PDF MODEL COMPATIBILITY:
-        When using Documents with images/PDFs in structured output:
+    Note:
+        Vision/PDF model compatibility considerations:
         - Images require vision-capable models that also support structured output
         - PDFs require models with both document processing AND structured output support
         - Many models support either vision OR structured output, but not both
@@ -536,28 +546,4 @@ async def generate_structured(
     except (ValueError, LLMError):
         raise  # Explicitly re-raise to satisfy DOC502
-    # Extract the parsed value from the response
-    parsed_value: T | None = None
-    # Check if response has choices and parsed content
-    if response.choices and hasattr(response.choices[0].message, "parsed"):
-        parsed: Any = response.choices[0].message.parsed  # type: ignore[attr-defined]
-        # If parsed is a dict, instantiate it as the response format class
-        if isinstance(parsed, dict):
-            parsed_value = response_format(**parsed)
-        # If it's already the right type, use it
-        elif isinstance(parsed, response_format):
-            parsed_value = parsed
-        else:
-            # Otherwise try to convert it
-            raise TypeError(
-                f"Unable to convert parsed response to {response_format.__name__}: "
-                f"got type {type(parsed).__name__}"  # type: ignore[reportUnknownArgumentType]
-            )
-    if parsed_value is None:
-        raise ValueError("No parsed content available from the model response")
-    # Create a StructuredModelResponse with the parsed value
-    return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
+    return StructuredModelResponse[T].from_model_response(response)

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/model_options.py RENAMED Viewed

@@ -88,6 +88,12 @@ class ModelOptions(BaseModel):
              and detect abuse. Maximum length is typically 256 characters.
              Useful for multi-tenant applications or per-user billing.
+        metadata: Custom metadata tags for tracking and observability.
+                 Dictionary of string key-value pairs for tagging requests.
+                 Useful for tracking experiments, versions, or custom attributes.
+                 Maximum of 16 key-value pairs, each key/value max 64 characters.
+                 Passed through to LMNR tracing and API provider metadata.
         extra_body: Additional provider-specific parameters to pass in request body.
                    Dictionary of custom parameters not covered by standard options.
                    Merged with usage_tracking if both are set.
@@ -147,6 +153,12 @@ class ModelOptions(BaseModel):
         ...     user="user_12345",  # Track costs per user
         ...     temperature=0.7
         ... )
+        >>>
+        >>> # With metadata for tracking and observability
+        >>> options = ModelOptions(
+        ...     metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
+        ...     temperature=0.7
+        ... )
     Note:
         - Not all options apply to all models
@@ -165,7 +177,7 @@ class ModelOptions(BaseModel):
     search_context_size: Literal["low", "medium", "high"] | None = None
     reasoning_effort: Literal["low", "medium", "high"] | None = None
     retries: int = 3
-    retry_delay_seconds: int = 10
+    retry_delay_seconds: int = 20
     timeout: int = 600
     cache_ttl: str | None = "5m"
     service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
@@ -175,6 +187,7 @@ class ModelOptions(BaseModel):
     verbosity: Literal["low", "medium", "high"] | None = None
     usage_tracking: bool = True
     user: str | None = None
+    metadata: dict[str, str] | None = None
     extra_body: dict[str, Any] | None = None
     def to_openai_completion_kwargs(self) -> dict[str, Any]:
@@ -200,6 +213,7 @@ class ModelOptions(BaseModel):
             - service_tier -> service_tier
             - verbosity -> verbosity
             - user -> user (for cost tracking)
+            - metadata -> metadata (for tracking/observability)
             - extra_body -> extra_body (merged with usage tracking)
         Web Search Structure:
@@ -253,7 +267,11 @@ class ModelOptions(BaseModel):
         if self.user:
             kwargs["user"] = self.user
+        if self.metadata:
+            kwargs["metadata"] = self.metadata
         if self.usage_tracking:
             kwargs["extra_body"]["usage"] = {"include": True}
+            kwargs["stream_options"] = {"include_usage": True}
         return kwargs

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/model_response.py RENAMED Viewed

@@ -6,13 +6,17 @@ Provides enhanced response classes that use OpenAI-compatible base types via Lit
 with additional metadata, cost tracking, and structured output support.
 """
-import copy
+import json
+from copy import deepcopy
 from typing import Any, Generic, TypeVar
-from openai.types.chat import ChatCompletion, ParsedChatCompletion
-from pydantic import BaseModel, Field
+from openai.types.chat import ChatCompletion
+from pydantic import BaseModel
-T = TypeVar("T", bound=BaseModel)
+T = TypeVar(
+    "T",
+    bound=BaseModel,
+)
 """Type parameter for structured response Pydantic models."""
@@ -52,42 +56,37 @@ class ModelResponse(ChatCompletion):
         when absolutely necessary.
     """
-    headers: dict[str, str] = Field(default_factory=dict)
-    model_options: dict[str, Any] = Field(default_factory=dict)
-    def __init__(self, chat_completion: ChatCompletion | None = None, **kwargs: Any) -> None:
-        """Initialize ModelResponse from ChatCompletion or kwargs.
+    def __init__(
+        self,
+        chat_completion: ChatCompletion,
+        model_options: dict[str, Any],
+        metadata: dict[str, Any],
+    ) -> None:
+        """Initialize ModelResponse from ChatCompletion.
-        Can be initialized from an existing ChatCompletion object or
-        directly from keyword arguments. Automatically initializes
-        headers dict if not provided.
+        Wraps an OpenAI ChatCompletion object with additional metadata
+        and model options for tracking and observability.
         Args:
-            chat_completion: Optional ChatCompletion to wrap.
-            **kwargs: Direct initialization parameters if no
-                     ChatCompletion provided.
+            chat_completion: ChatCompletion object from the API.
+            model_options: Model configuration options used for the request.
+                          Stored for metadata extraction and tracing.
+            metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
+                     Includes timing information and custom tags.
         Example:
-            >>> # From ChatCompletion
-            >>> response = ModelResponse(chat_completion_obj)
-            >>>
-            >>> # Direct initialization (mainly for testing)
+            >>> # Usually created internally by generate()
             >>> response = ModelResponse(
-            ...     id="test",
-            ...     model="gpt-5",
-            ...     choices=[...]
+            ...     chat_completion=completion,
+            ...     model_options={"temperature": 0.7, "model": "gpt-4"},
+            ...     metadata={"time_taken": 1.5, "first_token_time": 0.3}
             ... )
         """
-        if chat_completion:
-            # Copy all attributes from the ChatCompletion instance
-            data = chat_completion.model_dump()
-            data["headers"] = {}  # Add default headers
-            super().__init__(**data)
-        else:
-            # Initialize from kwargs
-            if "headers" not in kwargs:
-                kwargs["headers"] = {}
-            super().__init__(**kwargs)
+        data = chat_completion.model_dump()
+        super().__init__(**data)
+        self._model_options = model_options
+        self._metadata = metadata
     @property
     def content(self) -> str:
@@ -113,38 +112,21 @@ class ModelResponse(ChatCompletion):
         content = self.choices[0].message.content or ""
         return content.split("</think>")[-1].strip()
-    def set_model_options(self, options: dict[str, Any]) -> None:
-        """Store the model configuration used for generation.
-        Saves a deep copy of the options used for this generation,
-        excluding the messages for brevity.
-        Args:
-            options: Dictionary of model options from the API call.
-        Note:
-            Messages are removed to avoid storing large prompts.
-            Called internally by the generation functions.
-        """
-        self.model_options = copy.deepcopy(options)
-        if "messages" in self.model_options:
-            del self.model_options["messages"]
-    def set_headers(self, headers: dict[str, str]) -> None:
-        """Store HTTP response headers.
-        Saves response headers which contain LiteLLM metadata
-        including cost information and call IDs.
+    @property
+    def reasoning_content(self) -> str:
+        """Get the reasoning content.
-        Args:
-            headers: Dictionary of HTTP headers from the response.
+        @public
-        Headers of interest:
-            - x-litellm-response-cost: Generation cost
-            - x-litellm-call-id: Unique call identifier
-            - x-litellm-model-id: Actual model used
+        Returns:
+            The reasoning content from the model, or empty string if none.
         """
-        self.headers = copy.deepcopy(headers)
+        message = self.choices[0].message
+        if reasoning_content := getattr(message, "reasoning_content", None):
+            return reasoning_content
+        if not message.content or "</think>" not in message.content:
+            return ""
+        return message.content.split("</think>")[0].strip()
     def get_laminar_metadata(self) -> dict[str, str | int | float]:
         """Extract metadata for LMNR (Laminar) observability including cost tracking.
@@ -224,25 +206,17 @@ class ModelResponse(ChatCompletion):
             - Cached tokens reduce actual cost but may not be reflected
             - Used internally by tracing but accessible for cost analysis
         """
-        metadata: dict[str, str | int | float] = {}
-        litellm_id = self.headers.get("x-litellm-call-id")
-        cost = float(self.headers.get("x-litellm-response-cost") or 0)
-        # Add all x-litellm-* headers
-        for header, value in self.headers.items():
-            if header.startswith("x-litellm-"):
-                header_name = header.replace("x-litellm-", "").lower()
-                metadata[f"litellm.{header_name}"] = value
+        metadata: dict[str, str | int | float] = deepcopy(self._metadata)
         # Add base metadata
         metadata.update({
-            "gen_ai.response.id": litellm_id or self.id,
+            "gen_ai.response.id": self.id,
             "gen_ai.response.model": self.model,
             "get_ai.system": "litellm",
         })
         # Add usage metadata if available
+        cost = None
         if self.usage:
             metadata.update({
                 "gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
@@ -273,130 +247,84 @@ class ModelResponse(ChatCompletion):
                 "get_ai.cost": cost,
             })
-        if self.model_options:
-            for key, value in self.model_options.items():
-                metadata[f"model_options.{key}"] = str(value)
+        for key, value in self._model_options.items():
+            if "messages" in key:
+                continue
+            metadata[f"model_options.{key}"] = str(value)
+        other_fields = self.__dict__
+        for key, value in other_fields.items():
+            if key in ["_model_options", "_metadata", "choices", "usage"]:
+                continue
+            try:
+                metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
+            except Exception:
+                metadata[f"response.raw.{key}"] = str(value)
+        message = self.choices[0].message
+        for key, value in message.__dict__.items():
+            if key in ["content"]:
+                continue
+            metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
         return metadata
+    def validate_output(self) -> None:
+        """Validate response output content and format.
-class StructuredModelResponse(ModelResponse, Generic[T]):
-    """Response wrapper for structured/typed LLM output.
-    @public
+        Checks that response has non-empty content and validates against
+        response_format if structured output was requested.
-    Primary usage is adding to AIMessages and accessing .parsed property:
+        Raises:
+            ValueError: If response content is empty.
+            ValidationError: If content doesn't match response_format schema.
+        """
+        if not self.content:
+            raise ValueError("Empty response content")
-        >>> class Analysis(BaseModel):
-        ...     sentiment: float
-        ...     summary: str
-        >>>
-        >>> response = await generate_structured(
-        ...     "gpt-5",
-        ...     response_format=Analysis,
-        ...     messages="Analyze this text..."
-        ... )
-        >>>
-        >>> # Primary usage: access parsed model
-        >>> analysis = response.parsed
-        >>> print(f"Sentiment: {analysis.sentiment}")
-        >>>
-        >>> # Can add to messages for conversation
-        >>> messages.append(response)
+        if response_format := self._model_options.get("response_format"):
+            if isinstance(response_format, BaseModel):
+                response_format.model_validate_json(self.content)
-    The two main interactions:
-    1. Accessing .parsed property for the structured data
-    2. Adding to AIMessages for conversation continuity
-    These patterns cover virtually all use cases. Advanced features exist
-    but should only be used when absolutely necessary.
+class StructuredModelResponse(ModelResponse, Generic[T]):
+    """Response wrapper for structured/typed LLM output.
-    Type Parameter:
-        T: The Pydantic model type for the structured output.
+    @public
-    Note:
-        Extends ModelResponse with type-safe parsed data access.
-        Other inherited properties should rarely be needed.
+    Primary usage is accessing the .parsed property for the structured data.
     """
-    def __init__(
-        self,
-        chat_completion: ChatCompletion | None = None,
-        parsed_value: T | None = None,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize with ChatCompletion and parsed value.
+    @classmethod
+    def from_model_response(cls, model_response: ModelResponse) -> "StructuredModelResponse[T]":
+        """Convert a ModelResponse to StructuredModelResponse.
-        Creates a structured response from a base completion and
-        optionally a pre-parsed value. Can extract parsed value
-        from ParsedChatCompletion automatically.
+        Takes an existing ModelResponse and converts it to a StructuredModelResponse
+        for accessing parsed structured output. Used internally by generate_structured().
         Args:
-            chat_completion: Base chat completion response.
-            parsed_value: Pre-parsed Pydantic model instance.
-                         If None, attempts extraction from
-                         ParsedChatCompletion.
-            **kwargs: Additional ChatCompletion parameters.
-        Extraction behavior:
-            1. Use provided parsed_value if given
-            2. Extract from ParsedChatCompletion if available
-            3. Store as None (access will raise ValueError)
+            model_response: The ModelResponse to convert.
-        Note:
-            Usually created internally by generate_structured().
-            The parsed value is validated by Pydantic automatically.
+        Returns:
+            StructuredModelResponse with lazy parsing support.
         """
-        super().__init__(chat_completion, **kwargs)
-        self._parsed_value: T | None = parsed_value
-        # Extract parsed value from ParsedChatCompletion if available
-        if chat_completion and isinstance(chat_completion, ParsedChatCompletion):
-            if chat_completion.choices:  # type: ignore[attr-defined]
-                message = chat_completion.choices[0].message  # type: ignore[attr-defined]
-                if hasattr(message, "parsed"):  # type: ignore
-                    self._parsed_value = message.parsed  # type: ignore[attr-defined]
+        model_response.__class__ = cls
+        return model_response  # type: ignore[return-value]
     @property
     def parsed(self) -> T:
-        """Get the parsed Pydantic model instance.
+        """Get the parsed structured output.
-        @public
-        Primary property for accessing structured output.
-        This is the main reason to use generate_structured().
+        Lazily parses the JSON content into the specified Pydantic model.
+        Result is cached after first access.
         Returns:
-            Validated instance of the Pydantic model type T.
+            Parsed Pydantic model instance.
         Raises:
-            ValueError: If no parsed content available (internal error).
-        Example:
-            >>> class UserInfo(BaseModel):
-            ...     name: str
-            ...     age: int
-            >>>
-            >>> response = await generate_structured(
-            ...     "gpt-5",
-            ...     response_format=UserInfo,
-            ...     messages="Extract user info..."
-            ... )
-            >>>
-            >>> # Primary usage: get the parsed model
-            >>> user = response.parsed
-            >>> print(f"{user.name} is {user.age} years old")
-            >>>
-            >>> # Can also add to messages
-            >>> messages.append(response)
-        Note:
-            Type-safe with full IDE support. This is the main property
-            you'll use with structured responses.
+            ValidationError: If content doesn't match the response_format schema.
         """
-        if self._parsed_value is not None:
-            return self._parsed_value
-        raise ValueError(
-            "No parsed content available. This should not happen for StructuredModelResponse."
-        )
+        if not hasattr(self, "_parsed_value"):
+            response_format = self._model_options.get("response_format")
+            self._parsed_value: T = response_format.model_validate_json(self.content)  # type: ignore[return-value]
+        return self._parsed_value

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/model_types.py RENAMED Viewed

@@ -20,7 +20,7 @@ ModelName: TypeAlias = (
         "grok-4",
         # Small models
         "gemini-2.5-flash",
-        "gpt-5-mini",
+        "gpt-5-nano",
         "grok-4-fast",
         # Search models
         "gemini-2.5-flash-search",

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/pipeline.py RENAMED Viewed

@@ -306,8 +306,6 @@ def pipeline_task(
     Args:
         __fn: Function to decorate (when used without parentheses).
-        Tracing parameters:
         trace_level: When to trace ("always", "debug", "off").
                     - "always": Always trace (default)
                     - "debug": Only trace when LMNR_DEBUG="true"
@@ -322,8 +320,6 @@ def pipeline_task(
              Also forces trace level to "always" if not already set.
         trace_trim_documents: Trim document content in traces to first 100 chars (default True).
                              Reduces trace size with large documents.
-        Prefect task parameters:
         name: Task name (defaults to function name).
         description: Human-readable task description.
         tags: Tags for organization and filtering.
@@ -523,13 +519,8 @@ def pipeline_flow(
         ) -> DocumentList             # Must return DocumentList
     Args:
-        __fn: Function to decorate (when used without parentheses).
-        Config parameter:
         config: Required FlowConfig class for document loading/saving. Enables
                 automatic loading from string paths and saving outputs.
-        Tracing parameters:
         trace_level: When to trace ("always", "debug", "off").
                     - "always": Always trace (default)
                     - "debug": Only trace when LMNR_DEBUG="true"
@@ -544,8 +535,6 @@ def pipeline_flow(
              Also forces trace level to "always" if not already set.
         trace_trim_documents: Trim document content in traces to first 100 chars (default True).
                              Reduces trace size with large documents.
-        Prefect flow parameters:
         name: Flow name (defaults to function name).
         version: Flow version identifier.
         flow_run_name: Static or dynamic run name.

{ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ai-pipeline-core"
-version = "0.2.3"
+version = "0.2.5"
 description = "Core utilities for AI-powered processing pipelines using prefect"
 readme = "README.md"
 license = {text = "MIT"}
@@ -22,7 +22,7 @@ classifiers = [
 dependencies = [
     "httpx>=0.28.1",
     "Jinja2>=3.1.6",
-    "lmnr>=0.7.17",
+    "lmnr>=0.7.18",
     "openai>=1.109.1",
     "prefect>=3.4.21",
     "prefect-gcp[cloud_storage]>=0.6.10",
@@ -30,7 +30,7 @@ dependencies = [
     "pydantic>=2.11.9",
     "python-magic>=0.4.27",
     "ruamel.yaml>=0.18.14",
-    "tiktoken>=0.11.0",
+    "tiktoken>=0.12.0",
 ]
 [project.urls]
@@ -50,7 +50,7 @@ dev = [
     "pytest-mock>=3.14.0",
     "pytest-xdist>=3.8.0",
     "pytest>=8.4.1",
-    "ruff>=0.12.9",
+    "ruff>=0.14.1",
 ]
 [tool.pytest.ini_options]
@@ -177,7 +177,7 @@ reportIncompatibleVariableOverride = "error"
 reportMissingParameterType = "warning"
 [tool.bumpversion]
-current_version = "0.2.3"
+current_version = "0.2.5"
 commit = true
 tag = true
 tag_name = "v{new_version}"