PyPI - ai-pipeline-core - Versions diffs - 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

ai-pipeline-core 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

ai_pipeline_core/__init__.py +25 -14
ai_pipeline_core/documents/__init__.py +2 -1
ai_pipeline_core/documents/document.py +317 -49
ai_pipeline_core/documents/document_list.py +136 -33
ai_pipeline_core/documents/flow_document.py +8 -29
ai_pipeline_core/documents/task_document.py +6 -27
ai_pipeline_core/documents/temporary_document.py +6 -27
ai_pipeline_core/documents/utils.py +64 -1
ai_pipeline_core/flow/config.py +174 -5
ai_pipeline_core/flow/options.py +2 -2
ai_pipeline_core/llm/__init__.py +6 -1
ai_pipeline_core/llm/ai_messages.py +14 -7
ai_pipeline_core/llm/client.py +143 -55
ai_pipeline_core/llm/model_options.py +20 -5
ai_pipeline_core/llm/model_response.py +77 -29
ai_pipeline_core/llm/model_types.py +38 -40
ai_pipeline_core/logging/__init__.py +0 -2
ai_pipeline_core/logging/logging_config.py +0 -6
ai_pipeline_core/logging/logging_mixin.py +2 -10
ai_pipeline_core/pipeline.py +68 -65
ai_pipeline_core/prefect.py +12 -3
ai_pipeline_core/prompt_manager.py +6 -7
ai_pipeline_core/settings.py +13 -5
ai_pipeline_core/simple_runner/__init__.py +1 -11
ai_pipeline_core/simple_runner/cli.py +13 -12
ai_pipeline_core/simple_runner/simple_runner.py +34 -172
ai_pipeline_core/storage/__init__.py +8 -0
ai_pipeline_core/storage/storage.py +628 -0
ai_pipeline_core/tracing.py +110 -26
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +60 -23
ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
ai_pipeline_core-0.1.13.dist-info/RECORD +0 -36
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/flow/config.py CHANGED Viewed

@@ -10,11 +10,16 @@ Best Practice:
     to ensure type safety and proper validation of output documents.
 """
+import json
 from abc import ABC
 from typing import Any, ClassVar, Iterable
-from ai_pipeline_core.documents import DocumentList, FlowDocument
+from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
 from ai_pipeline_core.exceptions import DocumentValidationError
+from ai_pipeline_core.logging import get_pipeline_logger
+from ai_pipeline_core.storage import Storage
+logger = get_pipeline_logger(__name__)
 class FlowConfig(ABC):
@@ -51,8 +56,10 @@ class FlowConfig(ABC):
         ...     OUTPUT_DOCUMENT_TYPE = ProcessedDocument  # Different type!
         >>>
         >>> # Use in @pipeline_flow - RECOMMENDED PATTERN
-        >>> @pipeline_flow(name="processing")
-        >>> async def process(config: ProcessingFlowConfig, docs: DocumentList) -> DocumentList:
+        >>> @pipeline_flow(config=ProcessingFlowConfig, name="processing")
+        >>> async def process(
+        ...     project_name: str, docs: DocumentList, flow_options: FlowOptions
+        ... ) -> DocumentList:
         ...     outputs = []
         ...     # ... processing logic ...
         ...     return config.create_and_validate_output(outputs)
@@ -289,8 +296,10 @@ class FlowConfig(ABC):
             DocumentValidationError: If output type doesn't match OUTPUT_DOCUMENT_TYPE.
         Example:
-            >>> @pipeline_flow(name="my_flow")
-            >>> async def process_flow(config: MyFlowConfig, ...) -> DocumentList:
+            >>> @pipeline_flow(config=MyFlowConfig, name="my_flow")
+            >>> async def process_flow(
+            ...     project_name: str, documents: DocumentList, flow_options: FlowOptions
+            ... ) -> DocumentList:
             >>>     outputs = []
             >>>     # ... processing logic ...
             >>>     outputs.append(OutputDoc(...))
@@ -312,3 +321,163 @@ class FlowConfig(ABC):
             documents = DocumentList(list(output))  # type: ignore[arg-type]
         cls.validate_output_documents(documents)
         return documents
+    @classmethod
+    async def load_documents(
+        cls,
+        uri: str,
+    ) -> DocumentList:
+        """Load documents from storage matching INPUT_DOCUMENT_TYPES.
+        Loads documents from a storage location based on the class's INPUT_DOCUMENT_TYPES.
+        Supports both local filesystem and Google Cloud Storage backends.
+        Automatically loads metadata (.description.md and .sources.json) when present.
+        Args:
+            uri: Storage URI (file://, gs://, or local path)
+        Returns:
+            DocumentList containing loaded documents matching INPUT_DOCUMENT_TYPES
+        Example:
+            >>> # Load from local filesystem
+            >>> docs = await MyFlowConfig.load_documents("./data")
+            >>>
+            >>> # Load from GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
+            >>> docs = await MyFlowConfig.load_documents("gs://bucket/data")
+        """
+        # Use INPUT_DOCUMENT_TYPES if not specified
+        storage = await Storage.from_uri(uri)
+        loaded_documents = DocumentList()
+        # Process each document type
+        for doc_type in cls.INPUT_DOCUMENT_TYPES:
+            canonical_name = doc_type.canonical_name()
+            doc_storage = storage.with_base(canonical_name)
+            # Check if subdirectory exists
+            if not await doc_storage.exists(""):
+                logger.debug(f"Subdirectory {canonical_name} not found, skipping")
+                continue
+            # List files in subdirectory
+            objects = await doc_storage.list("", recursive=False, include_dirs=False)
+            # Create lookup set for metadata files
+            object_keys = {obj.key for obj in objects}
+            # Filter out metadata files
+            doc_files = [
+                obj
+                for obj in objects
+                if not obj.key.endswith(Document.DESCRIPTION_EXTENSION)
+                and not obj.key.endswith(Document.SOURCES_EXTENSION)
+            ]
+            for obj in doc_files:
+                try:
+                    # Load document content
+                    content = await doc_storage.read_bytes(obj.key)
+                    # Load metadata if present
+                    description = None
+                    sources: list[str] = []
+                    # Check for description in objects list
+                    desc_path = f"{obj.key}{Document.DESCRIPTION_EXTENSION}"
+                    if desc_path in object_keys:
+                        try:
+                            description = await doc_storage.read_text(desc_path)
+                        except Exception as e:
+                            logger.warning(f"Failed to load description for {obj.key}: {e}")
+                    # Check for sources in objects list
+                    sources_path = f"{obj.key}{Document.SOURCES_EXTENSION}"
+                    if sources_path in object_keys:
+                        try:
+                            sources_text = await doc_storage.read_text(sources_path)
+                            sources = json.loads(sources_text)
+                        except Exception as e:
+                            logger.warning(f"Failed to load sources for {obj.key}: {e}")
+                    # Create document instance
+                    doc = doc_type(
+                        name=obj.key,
+                        content=content,
+                        description=description,
+                        sources=sources,
+                    )
+                    loaded_documents.append(doc)
+                    logger.debug(f"Loaded {doc_type.__name__} document: {obj.key}")
+                except Exception as e:
+                    logger.error(f"Failed to load {doc_type.__name__} document {obj.key}: {e}")
+        logger.info(f"Loaded {len(loaded_documents)} documents from {uri}")
+        return loaded_documents
+    @classmethod
+    async def save_documents(
+        cls,
+        uri: str,
+        documents: DocumentList,
+        *,
+        validate_output_type: bool = True,
+    ) -> None:
+        """Save documents to storage with metadata.
+        Saves FlowDocument instances to a storage location with their content
+        and metadata files (Document.DESCRIPTION_EXTENSION and Document.SOURCES_EXTENSION).
+        Non-FlowDocument instances (TaskDocument, TemporaryDocument) are skipped.
+        Args:
+            uri: Storage URI (file://, gs://, or local path)
+            documents: DocumentList to save
+            validate_output_type: If True, validate documents match cls.OUTPUT_DOCUMENT_TYPE
+        Raises:
+            DocumentValidationError: If validate_output_type=True and documents don't match
+                                   OUTPUT_DOCUMENT_TYPE
+        Example:
+            >>> # Save to local filesystem
+            >>> await MyFlowConfig.save_documents("./output", docs)
+            >>>
+            >>> # Save to GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
+            >>> await MyFlowConfig.save_documents("gs://bucket/output", docs)
+        """
+        # Validate output type if requested
+        if validate_output_type:
+            cls.validate_output_documents(documents)
+        storage = await Storage.from_uri(uri)
+        saved_count = 0
+        for doc in documents:
+            # Skip non-FlowDocument instances
+            if not isinstance(doc, FlowDocument):
+                logger.warning(f"Skipping non-FlowDocument: {type(doc).__name__}")
+                continue
+            # Get canonical name for subdirectory
+            canonical_name = doc.canonical_name()
+            doc_storage = storage.with_base(canonical_name)
+            # Save document content
+            await doc_storage.write_bytes(doc.name, doc.content)
+            saved_count += 1
+            # Save description if present
+            if doc.description:
+                desc_path = f"{doc.name}{Document.DESCRIPTION_EXTENSION}"
+                await doc_storage.write_text(desc_path, doc.description)
+            # Save sources if present
+            if doc.sources:
+                sources_path = f"{doc.name}{Document.SOURCES_EXTENSION}"
+                sources_json = json.dumps(doc.sources, indent=2)
+                await doc_storage.write_text(sources_path, sources_json)
+            logger.debug(f"Saved {type(doc).__name__} document: {doc.name}")
+        logger.info(f"Saved {saved_count} documents to {uri}")

ai_pipeline_core/flow/options.py CHANGED Viewed

@@ -60,11 +60,11 @@ class FlowOptions(BaseSettings):
         add flow-specific parameters with appropriate validation.
     """
-    core_model: ModelName | str = Field(
+    core_model: ModelName = Field(
         default="gpt-5",
         description="Primary model for complex analysis and generation tasks.",
     )
-    small_model: ModelName | str = Field(
+    small_model: ModelName = Field(
         default="gpt-5-mini",
         description="Fast, cost-effective model for simple tasks and orchestration.",
     )

ai_pipeline_core/llm/__init__.py CHANGED Viewed

@@ -8,6 +8,8 @@ from .ai_messages import AIMessages, AIMessageType
 from .client import (
     generate,
     generate_structured,
+    generate_with_retry_for_testing,
+    process_messages_for_testing,
 )
 from .model_options import ModelOptions
 from .model_response import ModelResponse, StructuredModelResponse
@@ -17,9 +19,12 @@ __all__ = [
     "AIMessages",
     "AIMessageType",
     "ModelName",
-    "ModelOptions",
     "ModelResponse",
+    "ModelOptions",
     "StructuredModelResponse",
     "generate",
     "generate_structured",
+    # Internal functions exposed for testing only
+    "process_messages_for_testing",
+    "generate_with_retry_for_testing",
 ]

ai_pipeline_core/llm/ai_messages.py CHANGED Viewed

@@ -48,22 +48,31 @@ class AIMessages(list[AIMessageType]):
         - ModelResponse: Becomes {"role": "assistant", "content": response.content}
     Note: Document conversion is automatic. Text content becomes user text messages.
-    Images are sent to vision-capable models (non-vision models will raise ValueError).
-    PDFs are attached when supported by the model, otherwise a text extraction
-    fallback is used. LiteLLM proxy handles the specific encoding requirements
-    for each provider.
+    VISION/PDF MODEL COMPATIBILITY WARNING:
+    Images require vision-capable models (e.g., gpt-4o, gemini-pro-vision, claude-3-haiku).
+    Non-vision models will raise ValueError when encountering image documents.
+    PDFs require models with document processing support - check your model's capabilities
+    before including PDF documents in messages. Unsupported models may fall back to
+    text extraction or raise errors depending on provider configuration.
+    LiteLLM proxy handles the specific encoding requirements for each provider.
     IMPORTANT: Although AIMessages can contain Document entries, the LLM client functions
     expect `messages` to be `AIMessages` or `str`. If you start from a Document or a list
     of Documents, build AIMessages first (e.g., `AIMessages([doc])` or `AIMessages(docs)`).
+    CAUTION: AIMessages is a list subclass. Always use list construction (e.g.,
+    `AIMessages(["text"])`) or empty constructor with append (e.g.,
+    `AIMessages(); messages.append("text")`). Never pass raw strings directly to the
+    constructor (`AIMessages("text")`) as this will iterate over the string characters
+    instead of treating it as a single message.
     Example:
         >>> from ai_pipeline_core import llm
         >>> messages = AIMessages()
         >>> messages.append("What is the capital of France?")
         >>> response = await llm.generate("gpt-5", messages=messages)
         >>> messages.append(response)  # Add the actual response
-        >>> prompt = messages.get_last_message_as_str()  # Get the last message as a string
     """
     def get_last_message(self) -> AIMessageType:
@@ -78,8 +87,6 @@ class AIMessages(list[AIMessageType]):
     def get_last_message_as_str(self) -> str:
         """Get the last message as a string, raising if not a string.
-        @public
         Returns:
             The last message as a string.

ai_pipeline_core/llm/client.py CHANGED Viewed

@@ -24,7 +24,6 @@ from pydantic import BaseModel
 from ai_pipeline_core.exceptions import LLMError
 from ai_pipeline_core.settings import settings
-from ai_pipeline_core.tracing import trace
 from .ai_messages import AIMessages
 from .model_options import ModelOptions
@@ -38,6 +37,7 @@ def _process_messages(
     context: AIMessages,
     messages: AIMessages,
     system_prompt: str | None = None,
+    cache_ttl: str | None = "120s",
 ) -> list[ChatCompletionMessageParam]:
     """Process and format messages for LLM API consumption.
@@ -49,21 +49,25 @@ def _process_messages(
         context: Messages to be cached (typically expensive/static content).
         messages: Regular messages without caching (dynamic queries).
         system_prompt: Optional system instructions for the model.
+        cache_ttl: Cache TTL for context messages (e.g. "120s", "5m", "1h").
+                   Set to None or empty string to disable caching.
     Returns:
         List of formatted messages ready for API calls, with:
         - System prompt at the beginning (if provided)
-        - Context messages with cache_control on the last one
+        - Context messages with cache_control on the last one (if cache_ttl)
         - Regular messages without caching
     System Prompt Location:
-        The system prompt from ModelOptions.system_prompt is always injected
-        as the FIRST message with role="system". It is NOT cached with context,
-        allowing dynamic system prompts without breaking cache efficiency.
+        The system prompt parameter is always injected as the FIRST message
+        with role="system". It is NOT cached with context, allowing dynamic
+        system prompts without breaking cache efficiency.
     Cache behavior:
-        The last context message gets ephemeral caching (120s TTL)
+        The last context message gets ephemeral caching with specified TTL
         to reduce token usage on repeated calls with same context.
+        If cache_ttl is None or empty string (falsy), no caching is applied.
+        Only the last context message receives cache_control to maximize efficiency.
     Note:
         This is an internal function used by _generate_with_retry().
@@ -80,11 +84,12 @@ def _process_messages(
         # Use AIMessages.to_prompt() for context
         context_messages = context.to_prompt()
-        # Apply caching to last context message
-        context_messages[-1]["cache_control"] = {  # type: ignore
-            "type": "ephemeral",
-            "ttl": "120s",  # Cache for 2m
-        }
+        # Apply caching to last context message if cache_ttl is set
+        if cache_ttl:
+            context_messages[-1]["cache_control"] = {  # type: ignore
+                "type": "ephemeral",
+                "ttl": cache_ttl,
+            }
         processed_messages.extend(context_messages)
@@ -173,7 +178,9 @@ async def _generate_with_retry(
     if not context and not messages:
         raise ValueError("Either context or messages must be provided")
-    processed_messages = _process_messages(context, messages, options.system_prompt)
+    processed_messages = _process_messages(
+        context, messages, options.system_prompt, options.cache_ttl
+    )
     completion_kwargs: dict[str, Any] = {
         "model": model,
         "messages": processed_messages,
@@ -213,9 +220,8 @@ async def _generate_with_retry(
     raise LLMError("Unknown error occurred during LLM generation.")
-@trace(ignore_inputs=["context"])
 async def generate(
-    model: ModelName | str,
+    model: ModelName,
     *,
     context: AIMessages | None = None,
     messages: AIMessages | str,
@@ -230,20 +236,24 @@ async def generate(
     expensive static content separately from dynamic queries.
     Best Practices:
-        1. OPTIONS: Omit in 90% of cases - defaults are optimized
+        1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
         2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
         3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
+        4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
     Args:
         model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
-               Can be ModelName literal or any string for custom models.
+               Accepts predefined models or any string for custom models.
         context: Static context to cache (documents, examples, instructions).
                 Defaults to None (empty context). Cached for 120 seconds.
         messages: Dynamic messages/queries. AIMessages or str ONLY.
                  Do not pass Document or DocumentList directly.
                  If string, converted to AIMessages internally.
-        options: Model configuration (temperature, retries, timeout, etc.).
-                Defaults to None (uses ModelOptions() with standard settings).
+        options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
+                Framework defaults are production-optimized (3 retries, 10s delay, 300s timeout).
+                Configure model behavior centrally via LiteLLM proxy settings or environment
+                variables, not per API call. Provider-specific settings should be configured
+                at the proxy level.
     Returns:
         ModelResponse containing:
@@ -268,17 +278,26 @@ async def generate(
         # WRONG - don't convert to string yourself
         response = await llm.generate("gpt-5", messages=my_document.text)  # NO!
+    VISION/PDF MODEL COMPATIBILITY:
+        When using Documents containing images or PDFs, ensure your model supports these formats:
+        - Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
+        - PDFs require document processing support (varies by provider)
+        - Non-compatible models will raise ValueError or fall back to text extraction
+        - Check model capabilities before including visual/PDF content
     Context vs Messages Strategy:
-        context: Static, reusable content (cached 120 seconds)
+        context: Static, reusable content for caching efficiency
             - Large documents, instructions, examples
-            - Same across multiple calls
+            - Remains constant across multiple calls
+            - Cached when supported by provider/proxy configuration
-        messages: Dynamic, query-specific content
+        messages: Dynamic, per-call specific content
             - User questions, current conversation turn
-            - Changes every call
+            - Changes with each API call
+            - Never cached, always processed fresh
     Example:
-        >>> # Simple case - no options needed (90% of cases)
+        >>> # CORRECT - No options parameter (this is the recommended pattern)
         >>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
         >>> print(response.content)  # In production, use get_pipeline_logger instead of print
@@ -292,13 +311,6 @@ async def generate(
         >>> # Second call: reuses cache, saves tokens!
         >>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
-        >>> # AVOID unnecessary options (defaults are optimal)
-        >>> response = await llm.generate(
-        ...     "gpt-5",
-        ...     messages="Hello",
-        ...     options=ModelOptions(temperature=0.7)  # Default is probably fine!
-        ... )
         >>> # Multi-turn conversation
         >>> messages = AIMessages([
         ...     "What is Python?",
@@ -307,28 +319,48 @@ async def generate(
         ... ])
         >>> response = await llm.generate("gpt-5", messages=messages)
+        Configuration via LiteLLM Proxy:
+        >>> # Configure temperature in litellm_config.yaml:
+        >>> # model_list:
+        >>> #   - model_name: gpt-5
+        >>> #     litellm_params:
+        >>> #       model: openai/gpt-4o
+        >>> #       temperature: 0.3
+        >>> #       max_tokens: 1000
+        >>>
+        >>> # Configure retry logic in proxy:
+        >>> # general_settings:
+        >>> #   master_key: sk-1234
+        >>> #   max_retries: 5
+        >>> #   retry_delay: 15
     Performance:
         - Context caching saves ~50-90% tokens on repeated calls
         - First call: full token cost
-        - Subsequent calls (within 120s): only messages tokens
-        - Default retry delay is 10s (configurable via ModelOptions.retry_delay_seconds)
+        - Subsequent calls (within cache TTL): only messages tokens
+        - Default cache TTL is 120s (production-optimized)
+        - Default retry logic: 3 attempts with 10s delay (production-optimized)
     Caching:
         When enabled in your LiteLLM proxy and supported by the upstream provider,
-        context messages may be cached (typical TTL ~120s) to reduce token usage on
-        repeated calls. Savings depend on provider and payload; treat this as an
-        optimization, not a guarantee. Cache behavior varies by proxy configuration.
+        context messages may be cached to reduce token usage on repeated calls.
+        Default TTL is 120s (optimized for production workloads). Configure caching
+        behavior centrally via your LiteLLM proxy settings, not per API call.
+        Savings depend on provider and payload; treat this as an optimization, not a guarantee.
+    Configuration:
+        All model behavior should be configured at the LiteLLM proxy level:
+        - Temperature, max_tokens: Set in litellm_config.yaml model_list
+        - Retry logic: Configure in proxy general_settings
+        - Timeouts: Set via proxy configuration
+        - Caching: Enable/configure in proxy cache settings
+        This centralizes configuration and ensures consistency across all API calls.
     Note:
-        - Context argument is ignored by the tracer to avoid recording large data
         - All models are accessed via LiteLLM proxy
         - Automatic retry with configurable delay between attempts
         - Cost tracking via response headers
-    See Also:
-        - generate_structured: For typed/structured output
-        - AIMessages: Message container with document support
-        - ModelOptions: Configuration options
     """
     if isinstance(messages, str):
         messages = AIMessages([messages])
@@ -348,9 +380,8 @@ T = TypeVar("T", bound=BaseModel)
 """Type variable for Pydantic model types in structured generation."""
-@trace(ignore_inputs=["context"])
 async def generate_structured(
-    model: ModelName | str,
+    model: ModelName,
     response_format: type[T],
     *,
     context: AIMessages | None = None,
@@ -364,20 +395,71 @@ async def generate_structured(
     Type-safe generation that returns validated Pydantic model instances.
     Uses OpenAI's structured output feature for guaranteed schema compliance.
-    Best Practices (same as generate):
-        1. OPTIONS: Omit in 90% of cases - defaults are optimized
+    IMPORTANT: Search models (models with '-search' suffix) do not support
+    structured output. Use generate() instead for search models.
+    Best Practices:
+        1. OPTIONS: DO NOT use the options parameter - omit it entirely for production use
         2. MESSAGES: Use AIMessages or str - wrap Documents in AIMessages
-        3. CONTEXT vs MESSAGES: Use context for static/cacheable, messages for dynamic
+        3. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
+        4. See generate() documentation for more details
+    Context vs Messages Strategy:
+        context: Static, reusable content for caching efficiency
+            - Schemas, examples, instructions
+            - Remains constant across multiple calls
+            - Cached when supported by provider/proxy configuration
+        messages: Dynamic, per-call specific content
+            - Data to be structured, user queries
+            - Changes with each API call
+            - Never cached, always processed fresh
+    Complex Task Pattern:
+        For complex tasks like research or deep analysis, it's recommended to use
+        a two-step approach:
+        1. First use generate() with a capable model to perform the analysis
+        2. Then use generate_structured() with a smaller model to convert the
+           response into structured output
+        This pattern is more reliable than trying to force complex reasoning
+        directly into structured format:
+        >>> # Step 1: Research/analysis with generate() - no options parameter
+        >>> research = await llm.generate(
+        ...     "gpt-5",
+        ...     messages="Research and analyze this complex topic..."
+        ... )
+        >>>
+        >>> # Step 2: Structure the results with generate_structured()
+        >>> structured = await llm.generate_structured(
+        ...     "gpt-5-mini",  # Smaller model is fine for structuring
+        ...     response_format=ResearchSummary,
+        ...     messages=f"Extract key information: {research.content}"
+        ... )
     Args:
         model: Model to use (must support structured output).
+               Search models (models with '-search' suffix) do not support structured output.
         response_format: Pydantic model class defining the output schema.
                         The model will generate JSON matching this schema.
         context: Static context to cache (documents, schemas, examples).
                 Defaults to None (empty AIMessages).
         messages: Dynamic prompts/queries. AIMessages or str ONLY.
                  Do not pass Document or DocumentList directly.
-        options: Model configuration. response_format is set automatically.
+        options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
+                Framework defaults are production-optimized. Configure model behavior
+                centrally via LiteLLM proxy settings, not per API call.
+                The response_format is set automatically from the response_format parameter.
+    VISION/PDF MODEL COMPATIBILITY:
+        When using Documents with images/PDFs in structured output:
+        - Images require vision-capable models that also support structured output
+        - PDFs require models with both document processing AND structured output support
+        - Many models support either vision OR structured output, but not both
+        - Test your specific model+document combination before production use
+        - Consider two-step approach: generate() for analysis, then generate_structured()
+          for formatting
     Returns:
         StructuredModelResponse[T] containing:
@@ -387,6 +469,7 @@ async def generate_structured(
     Raises:
         TypeError: If response_format is not a Pydantic model class.
         ValueError: If model doesn't support structured output or no parsed content returned.
+                   Structured output support varies by provider and model.
         LLMError: If generation fails after retries.
         ValidationError: If response cannot be parsed into response_format.
@@ -398,8 +481,9 @@ async def generate_structured(
         ...     sentiment: float = Field(ge=-1, le=1)
         ...     key_points: list[str] = Field(max_length=5)
         >>>
+        >>> # CORRECT - No options parameter
         >>> response = await llm.generate_structured(
-        ...     model="gpt-5",
+        ...     "gpt-5",
         ...     response_format=Analysis,
         ...     messages="Analyze this product review: ..."
         ... )
@@ -410,11 +494,13 @@ async def generate_structured(
         ...     print(f"- {point}")
     Supported models:
-        Support varies by provider and model. Generally includes:
+        Structured output support varies by provider and model. Generally includes:
         - OpenAI: GPT-4 and newer models
         - Anthropic: Claude 3+ models
         - Google: Gemini Pro models
-        Check provider documentation for specific model support.
+        Search models (models with '-search' suffix) do not support structured output.
+        Check provider documentation for specific support.
     Performance:
         - Structured output may use more tokens than free text
@@ -426,11 +512,7 @@ async def generate_structured(
         - The model generates JSON matching the schema
         - Validation happens automatically via Pydantic
         - Use Field() descriptions to guide generation
-    See Also:
-        - generate: For unstructured text generation
-        - ModelOptions: Configuration including response_format
-        - StructuredModelResponse: Response wrapper with .parsed property
+        - Search models (models with '-search' suffix) do not support structured output
     """
     if context is None:
         context = AIMessages()
@@ -473,3 +555,9 @@ async def generate_structured(
     # Create a StructuredModelResponse with the parsed value
     return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
+# Public aliases for testing internal functions
+# These are exported to allow testing of implementation details
+process_messages_for_testing = _process_messages
+generate_with_retry_for_testing = _generate_with_retry

ai-pipeline-core 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

ai-pipeline-core 0.1.13py3-none-any.whl → 0.2.0py3-none-any.whl