PyPI - ai-pipeline-core - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

ai-pipeline-core 0.3.0py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

ai_pipeline_core/__init__.py +39 -2
ai_pipeline_core/debug/__init__.py +26 -0
ai_pipeline_core/debug/config.py +91 -0
ai_pipeline_core/debug/content.py +706 -0
ai_pipeline_core/debug/processor.py +99 -0
ai_pipeline_core/debug/summary.py +236 -0
ai_pipeline_core/debug/writer.py +913 -0
ai_pipeline_core/documents/mime_type.py +28 -0
ai_pipeline_core/flow/options.py +3 -3
ai_pipeline_core/images/__init__.py +362 -0
ai_pipeline_core/images/_processing.py +157 -0
ai_pipeline_core/llm/ai_messages.py +41 -7
ai_pipeline_core/llm/client.py +78 -17
ai_pipeline_core/llm/model_response.py +5 -5
ai_pipeline_core/llm/model_types.py +10 -12
ai_pipeline_core/logging/logging_mixin.py +2 -2
ai_pipeline_core/prompt_builder/prompt_builder.py +3 -3
ai_pipeline_core/tracing.py +53 -1
ai_pipeline_core/utils/deploy.py +214 -6
{ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/METADATA +74 -8
{ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/RECORD +23 -15
{ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.3.0.dist-info → ai_pipeline_core-0.3.4.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/llm/client.py CHANGED Viewed

@@ -13,6 +13,7 @@ Key functions:
 import asyncio
 import time
+from io import BytesIO
 from typing import Any, TypeVar
 from lmnr import Laminar
@@ -21,19 +22,77 @@ from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDone
 from openai.types.chat import (
     ChatCompletionMessageParam,
 )
+from PIL import Image
 from prefect.logging import get_logger
 from pydantic import BaseModel, ValidationError
+from ai_pipeline_core.documents import Document
 from ai_pipeline_core.exceptions import LLMError
+from ai_pipeline_core.images import ImageProcessingConfig, process_image_to_documents
 from ai_pipeline_core.settings import settings
-from .ai_messages import AIMessages
+from .ai_messages import AIMessages, AIMessageType
 from .model_options import ModelOptions
 from .model_response import ModelResponse, StructuredModelResponse
 from .model_types import ModelName
 logger = get_logger()
+# Image splitting configs for automatic large-image handling at the LLM boundary.
+# Gemini supports up to 3000x3000; all other models use a conservative 1000x1000 default.
+_GEMINI_IMAGE_CONFIG = ImageProcessingConfig(
+    max_dimension=3000, max_pixels=9_000_000, jpeg_quality=75
+)
+_DEFAULT_IMAGE_CONFIG = ImageProcessingConfig(
+    max_dimension=1000, max_pixels=1_000_000, jpeg_quality=75
+)
+def _get_image_config(model: str) -> ImageProcessingConfig:
+    """Return the image splitting config for a model."""
+    if "gemini" in model.lower():
+        return _GEMINI_IMAGE_CONFIG
+    return _DEFAULT_IMAGE_CONFIG
+def _prepare_images_for_model(messages: AIMessages, model: str) -> AIMessages:
+    """Split image documents that exceed model constraints.
+    Returns a new AIMessages with oversized images replaced by tiles.
+    Returns the original instance unchanged if no splitting is needed.
+    """
+    if not any(isinstance(m, Document) and m.is_image for m in messages):
+        return messages
+    config = _get_image_config(model)
+    result: list[AIMessageType] = []
+    changed = False
+    for msg in messages:
+        if not (isinstance(msg, Document) and msg.is_image):
+            result.append(msg)
+            continue
+        try:
+            with Image.open(BytesIO(msg.content)) as img:
+                w, h = img.size
+        except Exception:
+            result.append(msg)
+            continue
+        if w <= config.max_dimension and h <= config.max_dimension and w * h <= config.max_pixels:
+            result.append(msg)
+            continue
+        name_prefix = msg.name.rsplit(".", 1)[0] if "." in msg.name else msg.name
+        tiles = process_image_to_documents(msg, config=config, name_prefix=name_prefix)
+        result.extend(tiles)
+        changed = True
+    if not changed:
+        return messages
+    return AIMessages(result)
 def _process_messages(
     context: AIMessages,
@@ -150,10 +209,8 @@ def _model_name_to_openrouter_model(model: ModelName) -> str:
     Returns:
         OpenRouter model name.
     """
-    if model == "gpt-4o-search":
-        return "openai/gpt-4o-search-preview"
-    if model == "gemini-2.5-flash-search":
-        return "google/gemini-2.5-flash:online"
+    if model == "gemini-3-flash-search":
+        return "google/gemini-3-flash:online"
     if model == "sonar-pro-search":
         return "perplexity/sonar-pro-search"
     if model.startswith("gemini"):
@@ -184,7 +241,7 @@ async def _generate(
     Handles both regular and structured output generation.
     Args:
-        model: Model identifier (e.g., "gpt-5", "gemini-2.5-pro").
+        model: Model identifier (e.g., "gpt-5.1", "gemini-3-pro").
         messages: Formatted messages for the API.
         completion_kwargs: Additional parameters for the completion API.
@@ -273,6 +330,10 @@ async def _generate_with_retry(
     if not context and not messages:
         raise ValueError("Either context or messages must be provided")
+    # Auto-split large images based on model-specific constraints
+    context = _prepare_images_for_model(context, model)
+    messages = _prepare_images_for_model(messages, model)
     if "gemini" in model.lower() and context.approximate_tokens_count < 10000:
         # Bug fix for minimum explicit context size for Gemini models
         options.cache_ttl = None
@@ -339,7 +400,7 @@ async def generate(
         4. CONFIGURATION: Configure model behavior via LiteLLM proxy or environment variables
     Args:
-        model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
+        model: Model to use (e.g., "gpt-5.1", "gemini-3-pro", "grok-4.1-fast").
                Accepts predefined models or any string for custom models.
         context: Static context to cache (documents, examples, instructions).
                 Defaults to None (empty context). Cached for 5 minutes by default.
@@ -367,17 +428,17 @@ async def generate(
         Wrap Documents in AIMessages - DO NOT pass directly or convert to .text:
         # CORRECT - wrap Document in AIMessages
-        response = await llm.generate("gpt-5", messages=AIMessages([my_document]))
+        response = await llm.generate("gpt-5.1", messages=AIMessages([my_document]))
         # WRONG - don't pass Document directly
-        response = await llm.generate("gpt-5", messages=my_document)  # NO!
+        response = await llm.generate("gpt-5.1", messages=my_document)  # NO!
         # WRONG - don't convert to string yourself
-        response = await llm.generate("gpt-5", messages=my_document.text)  # NO!
+        response = await llm.generate("gpt-5.1", messages=my_document.text)  # NO!
     VISION/PDF MODEL COMPATIBILITY:
         When using Documents containing images or PDFs, ensure your model supports these formats:
-        - Images require vision-capable models (gpt-4o, gemini-pro-vision, claude-3-sonnet)
+        - Images require vision-capable models (gpt-5.1, gemini-3-flash, gemini-3-pro)
         - PDFs require document processing support (varies by provider)
         - Non-compatible models will raise ValueError or fall back to text extraction
         - Check model capabilities before including visual/PDF content
@@ -395,7 +456,7 @@ async def generate(
     Example:
         >>> # CORRECT - No options parameter (this is the recommended pattern)
-        >>> response = await llm.generate("gpt-5", messages="Explain quantum computing")
+        >>> response = await llm.generate("gpt-5.1", messages="Explain quantum computing")
         >>> print(response.content)  # In production, use get_pipeline_logger instead of print
         >>> # With context caching for efficiency
@@ -403,10 +464,10 @@ async def generate(
         >>> static_doc = AIMessages([large_document, "few-shot example: ..."])
         >>>
         >>> # First call: caches context
-        >>> r1 = await llm.generate("gpt-5", context=static_doc, messages="Summarize")
+        >>> r1 = await llm.generate("gpt-5.1", context=static_doc, messages="Summarize")
         >>>
         >>> # Second call: reuses cache, saves tokens!
-        >>> r2 = await llm.generate("gpt-5", context=static_doc, messages="Key points?")
+        >>> r2 = await llm.generate("gpt-5.1", context=static_doc, messages="Key points?")
         >>> # Multi-turn conversation
         >>> messages = AIMessages([
@@ -414,7 +475,7 @@ async def generate(
         ...     previous_response,
         ...     "Can you give an example?"
         ... ])
-        >>> response = await llm.generate("gpt-5", messages=messages)
+        >>> response = await llm.generate("gpt-5.1", messages=messages)
     Performance:
         - Context caching saves ~50-90% tokens on repeated calls
@@ -509,7 +570,7 @@ async def generate_structured(
         >>> # Step 1: Research/analysis with generate() - no options parameter
         >>> research = await llm.generate(
-        ...     "gpt-5",
+        ...     "gpt-5.1",
         ...     messages="Research and analyze this complex topic..."
         ... )
         >>>
@@ -566,7 +627,7 @@ async def generate_structured(
         >>>
         >>> # CORRECT - No options parameter
         >>> response = await llm.generate_structured(
-        ...     "gpt-5",
+        ...     "gpt-5.1",
         ...     response_format=Analysis,
         ...     messages="Analyze this product review: ..."
         ... )

ai_pipeline_core/llm/model_response.py CHANGED Viewed

@@ -28,7 +28,7 @@ class ModelResponse(ChatCompletion):
     Primary usage is adding to AIMessages for multi-turn conversations:
-        >>> response = await llm.generate("gpt-5", messages=messages)
+        >>> response = await llm.generate("gpt-5.1", messages=messages)
         >>> messages.append(response)  # Add assistant response to conversation
         >>> print(response.content)  # Access generated text
@@ -43,7 +43,7 @@ class ModelResponse(ChatCompletion):
         >>> from ai_pipeline_core import llm, AIMessages
         >>>
         >>> messages = AIMessages(["Explain quantum computing"])
-        >>> response = await llm.generate("gpt-5", messages=messages)
+        >>> response = await llm.generate("gpt-5.1", messages=messages)
         >>>
         >>> # Primary usage: add to conversation
         >>> messages.append(response)
@@ -81,7 +81,7 @@ class ModelResponse(ChatCompletion):
             >>> # Usually created internally by generate()
             >>> response = ModelResponse(
             ...     chat_completion=completion,
-            ...     model_options={"temperature": 0.7, "model": "gpt-4"},
+            ...     model_options={"temperature": 0.7, "model": "gpt-5.1"},
             ...     metadata={"time_taken": 1.5, "first_token_time": 0.3}
             ... )
         """
@@ -116,7 +116,7 @@ class ModelResponse(ChatCompletion):
             Generated text from the model, or empty string if none.
         Example:
-            >>> response = await generate("gpt-5", messages="Hello")
+            >>> response = await generate("gpt-5.1", messages="Hello")
             >>> text = response.content  # The generated response
             >>>
             >>> # Common pattern: add to messages then use content
@@ -185,7 +185,7 @@ class ModelResponse(ChatCompletion):
         Example:
             >>> response = await llm.generate(
-            ...     "gpt-5",
+            ...     "gpt-5.1",
             ...     context=large_doc,
             ...     messages="Summarize this"
             ... )

ai_pipeline_core/llm/model_types.py CHANGED Viewed

@@ -15,17 +15,15 @@ from typing import Literal, TypeAlias
 ModelName: TypeAlias = (
     Literal[
         # Core models
-        "gemini-2.5-pro",
-        "gpt-5",
-        "grok-4",
+        "gemini-3-pro",
+        "gpt-5.1",
         # Small models
-        "gemini-2.5-flash",
+        "gemini-3-flash",
         "gpt-5-mini",
-        "grok-4-fast",
+        "grok-4.1-fast",
         # Search models
-        "gemini-2.5-flash-search",
+        "gemini-3-flash-search",
         "sonar-pro-search",
-        "gpt-4o-search",
     ]
     | str
 )
@@ -38,15 +36,15 @@ string for custom models. The type is a union of predefined literals
 and str, giving you the best of both worlds: suggestions for known
 models and flexibility for custom ones.
-Note: These are example common model names as of Q3 2025. Actual availability
+Note: These are example common model names as of Q1 2026. Actual availability
 depends on your LiteLLM proxy configuration and provider access.
 Model categories:
-    Core models (gemini-2.5-pro, gpt-5, grok-4):
+    Core models (gemini-3-pro, gpt-5.1):
         High-capability models for complex tasks requiring deep reasoning,
         nuanced understanding, or creative generation.
-    Small models (gemini-2.5-flash, gpt-5-mini, grok-4-fast):
+    Small models (gemini-3-flash, gpt-5-mini, grok-4.1-fast):
         Efficient models optimized for speed and cost, suitable for
         simpler tasks or high-volume processing.
@@ -64,7 +62,7 @@ Example:
     >>> from ai_pipeline_core import llm, ModelName
     >>>
     >>> # Predefined model with IDE autocomplete
-    >>> model: ModelName = "gpt-5"  # IDE suggests common models
+    >>> model: ModelName = "gpt-5.1"  # IDE suggests common models
     >>> response = await llm.generate(model, messages="Hello")
     >>>
     >>> # Custom model works directly
@@ -72,7 +70,7 @@ Example:
     >>> response = await llm.generate(model, messages="Hello")
     >>>
     >>> # Both types work seamlessly
-    >>> models: list[ModelName] = ["gpt-5", "custom-llm", "gemini-2.5-pro"]
+    >>> models: list[ModelName] = ["gpt-5.1", "custom-llm", "gemini-3-pro"]
 Note:
     The ModelName type includes both predefined literals and str,

ai_pipeline_core/logging/logging_mixin.py CHANGED Viewed

@@ -117,7 +117,7 @@ class StructuredLoggerMixin(LoggerMixin):
         Example:
             self.log_metric("processing_time", 1.23, "seconds",
-                          document_type="pdf", model="gpt-4")
+                          document_type="pdf", model="gpt-5.1")
         """
         self.logger.info(
             f"Metric: {metric_name}",
@@ -140,7 +140,7 @@ class StructuredLoggerMixin(LoggerMixin):
         Example:
             self.log_span("llm_generation", 1234.5,
-                         model="gpt-4", tokens=500)
+                         model="gpt-5.1", tokens=500)
         """
         self.logger.info(
             f"Span: {operation}",

ai_pipeline_core/prompt_builder/prompt_builder.py CHANGED Viewed

@@ -144,7 +144,7 @@ class PromptBuilder(BaseModel):
             options.service_tier = None
             options.cache_ttl = None
             cache_lock = False
-        if "grok-4-fast" in model:
+        if "grok-4.1-fast" in model:
             options.max_completion_tokens = 30000
         if self.mode == "test":
@@ -154,7 +154,7 @@ class PromptBuilder(BaseModel):
             options.reasoning_effort = "medium"
             options.verbosity = None
-        if model.startswith("gpt-5"):
+        if model.startswith("gpt-5.1"):
             options.service_tier = "flex"
         return options, cache_lock
@@ -224,7 +224,7 @@ class PromptBuilder(BaseModel):
         self, model: ModelName, prompt: str | AIMessages, options: ModelOptions | None = None
     ) -> str:
         options, _ = self._get_options(model, options)
-        if "gpt-5" not in model and "grok-4" not in model and "openrouter/" not in model:
+        if "gpt-5.1" not in model and "grok-4.1-fast" not in model and "openrouter/" not in model:
             options.stop = "</document>"
         response = await self.call(model, prompt, options)

ai_pipeline_core/tracing.py CHANGED Viewed

@@ -276,6 +276,9 @@ class TraceInfo(BaseModel):
 # ---------------------------------------------------------------------------
+_debug_processor_initialized = False
 def _initialise_laminar() -> None:
     """Initialize Laminar SDK with project configuration.
@@ -287,17 +290,66 @@ def _initialise_laminar() -> None:
         - Uses settings.lmnr_project_api_key for authentication
         - Disables OPENAI instrument to prevent double-tracing
         - Called automatically by trace decorator on first use
+        - Optionally adds local debug processor if TRACE_DEBUG_PATH is set
     Note:
         This is an internal function called once per process.
         Multiple calls are safe (Laminar handles idempotency).
     """
+    global _debug_processor_initialized
     if settings.lmnr_project_api_key:
         Laminar.initialize(
             project_api_key=settings.lmnr_project_api_key,
             disabled_instruments=[Instruments.OPENAI] if Instruments.OPENAI else [],
         )
+    # Add local debug processor if configured (only once)
+    if not _debug_processor_initialized:
+        _debug_processor_initialized = True
+        debug_path = os.environ.get("TRACE_DEBUG_PATH")
+        if debug_path:
+            _setup_debug_processor(debug_path)
+def _setup_debug_processor(debug_path: str) -> None:
+    """Set up local debug trace processor."""
+    try:
+        from pathlib import Path  # noqa: PLC0415
+        from opentelemetry import trace  # noqa: PLC0415
+        from ai_pipeline_core.debug import (  # noqa: PLC0415
+            LocalDebugSpanProcessor,
+            LocalTraceWriter,
+            TraceDebugConfig,
+        )
+        config = TraceDebugConfig(
+            path=Path(debug_path),
+            max_element_bytes=int(os.environ.get("TRACE_DEBUG_MAX_INLINE", 10000)),
+            max_traces=int(os.environ.get("TRACE_DEBUG_MAX_TRACES", 20)) or None,
+        )
+        writer = LocalTraceWriter(config)
+        processor = LocalDebugSpanProcessor(writer)
+        # Add to tracer provider
+        provider = trace.get_tracer_provider()
+        add_processor = getattr(provider, "add_span_processor", None)
+        if add_processor is not None:
+            add_processor(processor)
+        # Register shutdown
+        import atexit  # noqa: PLC0415
+        atexit.register(processor.shutdown)
+    except Exception as e:
+        import logging  # noqa: PLC0415
+        logging.getLogger(__name__).warning(f"Failed to setup debug trace processor: {e}")
 # Overload for calls like @trace(name="...", level="debug")
 @overload
@@ -728,7 +780,7 @@ def set_trace_cost(cost: float | str) -> None:
         >>> @pipeline_task
         >>> async def enriched_generation(prompt: str) -> str:
         ...     # LLM cost tracked automatically via ModelResponse
-        ...     response = await llm.generate("gpt-5", messages=prompt)
+        ...     response = await llm.generate("gpt-5.1", messages=prompt)
         ...
         ...     # Add cost for post-processing
         ...     processing_cost = 0.02  # Fixed cost for enrichment

ai-pipeline-core 0.3.0__py3-none-any.whl → 0.3.4__py3-none-any.whl

ai-pipeline-core 0.3.0py3-none-any.whl → 0.3.4py3-none-any.whl