PyPI - ai-pipeline-core - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

ai-pipeline-core 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

ai_pipeline_core/__init__.py +1 -1
ai_pipeline_core/documents/document.py +24 -1
ai_pipeline_core/documents/mime_type.py +4 -4
ai_pipeline_core/llm/ai_messages.py +32 -0
ai_pipeline_core/llm/client.py +82 -51
ai_pipeline_core/llm/model_options.py +19 -1
ai_pipeline_core/llm/model_response.py +113 -173
ai_pipeline_core/llm/model_types.py +1 -1
ai_pipeline_core/pipeline.py +0 -11
ai_pipeline_core/settings.py +4 -2
ai_pipeline_core/simple_runner/cli.py +0 -2
ai_pipeline_core/tracing.py +0 -2
ai_pipeline_core/utils/__init__.py +8 -0
ai_pipeline_core/utils/deploy.py +373 -0
ai_pipeline_core/utils/remote_deployment.py +269 -0
{ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/METADATA +4 -4
{ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/RECORD +19 -16
{ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/WHEEL +0 -0
{ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/licenses/LICENSE +0 -0

ai_pipeline_core/__init__.py CHANGED Viewed

@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.2.4"
+__version__ = "0.2.6"
 __all__ = [
     # Config/Settings

ai_pipeline_core/documents/document.py CHANGED Viewed

@@ -29,6 +29,7 @@ from typing import (
     overload,
 )
+import tiktoken
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -980,7 +981,7 @@ class Document(BaseModel, ABC):
         """Detect the MIME type from document content.
         Detection strategy (in order):
-        1. Returns 'application/x-empty' for empty content
+        1. Returns 'text/plain' for empty content
         2. Extension-based detection for known text formats (preferred)
         3. python-magic content analysis for unknown extensions
         4. Fallback to extension or 'application/octet-stream'
@@ -1103,6 +1104,28 @@ class Document(BaseModel, ABC):
             raise ValueError(f"Document is not text: {self.name}")
         return self.content.decode("utf-8")
+    @property
+    def approximate_tokens_count(self) -> int:
+        """Approximate tokens count for the document content.
+        @public
+        Uses tiktoken with gpt-4 encoding to estimate token count.
+        For text documents, encodes the actual text. For non-text
+        documents (images, PDFs, etc.), returns a fixed estimate of 1024 tokens.
+        Returns:
+            Approximate number of tokens for this document.
+        Example:
+            >>> doc = MyDocument.create(name="data.txt", content="Hello world")
+            >>> doc.approximate_tokens_count  # ~2 tokens
+        """
+        if self.is_text:
+            return len(tiktoken.encoding_for_model("gpt-4").encode(self.text))
+        else:
+            return 1024  # Fixed estimate for non-text documents
     def as_yaml(self) -> Any:
         r"""Parse document content as YAML.

ai_pipeline_core/documents/mime_type.py CHANGED Viewed

@@ -43,7 +43,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
     r"""Detect MIME type from document content and filename.
     Uses a multi-stage detection strategy for maximum accuracy:
-    1. Returns 'application/x-empty' for empty content
+    1. Returns 'text/plain' for empty content
     2. Uses extension-based detection for known formats (most reliable)
     3. Falls back to python-magic content analysis
     4. Final fallback to extension or 'application/octet-stream'
@@ -57,7 +57,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
         Never returns None or empty string.
     Fallback behavior:
-        - Empty content: 'application/x-empty'
+        - Empty content: 'text/plain'
         - Unknown extension with binary content: 'application/octet-stream'
         - Magic library failure: Falls back to extension or 'application/octet-stream'
@@ -75,13 +75,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
         >>> detect_mime_type(b'Hello World', "text.txt")
         'text/plain'
         >>> detect_mime_type(b'', "empty.txt")
-        'application/x-empty'
+        'text/plain'
         >>> detect_mime_type(b'\\x89PNG', "image.xyz")
         'image/png'  # Magic detects PNG despite wrong extension
     """
     # Check for empty content
     if len(content) == 0:
-        return "application/x-empty"
+        return "text/plain"
     # Try extension-based detection first for known formats
     # This is more reliable for text formats that magic might misidentify

ai_pipeline_core/llm/ai_messages.py CHANGED Viewed

@@ -12,6 +12,7 @@ import json
 from copy import deepcopy
 from typing import Any, Callable, Iterable, SupportsIndex, Union
+import tiktoken
 from openai.types.chat import (
     ChatCompletionContentPartParam,
     ChatCompletionMessageParam,
@@ -301,6 +302,37 @@ class AIMessages(list[AIMessageType]):
             system_prompt = ""
         return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
+    @property
+    def approximate_tokens_count(self) -> int:
+        """Approximate tokens count for the messages.
+        @public
+        Uses tiktoken with gpt-4 encoding to estimate total token count
+        across all messages in the conversation.
+        Returns:
+            Approximate tokens count for all messages.
+        Raises:
+            ValueError: If message contains unsupported type.
+        Example:
+            >>> messages = AIMessages(["Hello", "World"])
+            >>> messages.approximate_tokens_count  # ~2-3 tokens
+        """
+        count = 0
+        for message in self:
+            if isinstance(message, str):
+                count += len(tiktoken.encoding_for_model("gpt-4").encode(message))
+            elif isinstance(message, Document):
+                count += message.approximate_tokens_count
+            elif isinstance(message, ModelResponse):  # type: ignore
+                count += len(tiktoken.encoding_for_model("gpt-4").encode(message.content))
+            else:
+                raise ValueError(f"Unsupported message type: {type(message)}")
+        return count
     @staticmethod
     def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
         """Convert a document to prompt format for LLM consumption.

ai_pipeline_core/llm/client.py CHANGED Viewed

@@ -12,15 +12,17 @@ Key functions:
 """
 import asyncio
+import time
 from typing import Any, TypeVar
 from lmnr import Laminar
 from openai import AsyncOpenAI
+from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDoneEvent
 from openai.types.chat import (
     ChatCompletionMessageParam,
 )
 from prefect.logging import get_logger
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 from ai_pipeline_core.exceptions import LLMError
 from ai_pipeline_core.settings import settings
@@ -101,6 +103,42 @@ def _process_messages(
     return processed_messages
+def _model_name_to_openrouter_model(model: ModelName) -> str:
+    """Convert a model name to an OpenRouter model name.
+    Args:
+        model: Model name to convert.
+    Returns:
+        OpenRouter model name.
+    """
+    if model == "gpt-4o-search":
+        return "openai/gpt-4o-search-preview"
+    if model == "gemini-2.5-flash-search":
+        return "google/gemini-2.5-flash:online"
+    if model == "grok-4-fast-search":
+        return "x-ai/grok-4-fast:online"
+    if model == "sonar-pro-search":
+        return "perplexity/sonar-reasoning-pro"
+    if model.startswith("gemini"):
+        return f"google/{model}"
+    elif model.startswith("gpt"):
+        return f"openai/{model}"
+    elif model.startswith("grok"):
+        return f"x-ai/{model}"
+    elif model.startswith("claude"):
+        return f"anthropic/{model}"
+    elif model.startswith("qwen3"):
+        return f"qwen/{model}"
+    elif model.startswith("deepseek-"):
+        return f"deepseek/{model}"
+    elif model.startswith("glm-"):
+        return f"z-ai/{model}"
+    elif model.startswith("kimi-"):
+        return f"moonshotai/{model}"
+    return model
 async def _generate(
     model: str, messages: list[ChatCompletionMessageParam], completion_kwargs: dict[str, Any]
 ) -> ModelResponse:
@@ -126,23 +164,44 @@ async def _generate(
         - Captures response headers for cost tracking
         - Response includes model options for debugging
     """
+    if "openrouter" in settings.openai_base_url.lower():
+        model = _model_name_to_openrouter_model(model)
     async with AsyncOpenAI(
         api_key=settings.openai_api_key,
         base_url=settings.openai_base_url,
     ) as client:
-        # Use parse for structured output, create for regular
-        if completion_kwargs.get("response_format"):
-            raw_response = await client.chat.completions.with_raw_response.parse(  # type: ignore[var-annotated]
-                **completion_kwargs,
-            )
-        else:
-            raw_response = await client.chat.completions.with_raw_response.create(  # type: ignore[var-annotated]
-                **completion_kwargs
-            )
-        response = ModelResponse(raw_response.parse())  # type: ignore[arg-type]
-        response.set_model_options(completion_kwargs)
-        response.set_headers(dict(raw_response.headers.items()))  # type: ignore[arg-type]
+        start_time = time.time()
+        first_token_time = None
+        usage = None
+        async with client.chat.completions.stream(
+            model=model,
+            messages=messages,
+            **completion_kwargs,
+        ) as stream:
+            async for event in stream:
+                if isinstance(event, ContentDeltaEvent):
+                    if not first_token_time:
+                        first_token_time = time.time()
+                elif isinstance(event, ContentDoneEvent):
+                    pass
+                elif isinstance(event, ChunkEvent):
+                    if event.chunk.usage:  # used to fix a bug with missing usage data
+                        usage = event.chunk.usage
+            if not first_token_time:
+                first_token_time = time.time()
+            raw_response = await stream.get_final_completion()
+        metadata = {
+            "time_taken": round(time.time() - start_time, 2),
+            "first_token_time": round(first_token_time - start_time, 2),
+        }
+        response = ModelResponse(
+            raw_response,
+            model_options=completion_kwargs,
+            metadata=metadata,
+            usage=usage,
+        )
         return response
@@ -182,8 +241,6 @@ async def _generate_with_retry(
         context, messages, options.system_prompt, options.cache_ttl
     )
     completion_kwargs: dict[str, Any] = {
-        "model": model,
-        "messages": processed_messages,
         **options.to_openai_completion_kwargs(),
     }
@@ -197,20 +254,18 @@ async def _generate_with_retry(
             ) as span:
                 response = await _generate(model, processed_messages, completion_kwargs)
                 span.set_attributes(response.get_laminar_metadata())
-                Laminar.set_span_output(response.content)
-                if not response.content:
-                    raise ValueError(f"Model {model} returned an empty response.")
+                Laminar.set_span_output([
+                    r for r in (response.reasoning_content, response.content) if r
+                ])
+                response.validate_output()
                 return response
-        except (asyncio.TimeoutError, ValueError, Exception) as e:
+        except (asyncio.TimeoutError, ValueError, ValidationError, Exception) as e:
             if not isinstance(e, asyncio.TimeoutError):
                 # disable cache if it's not a timeout because it may cause an error
                 completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
             logger.warning(
-                "LLM generation failed (attempt %d/%d): %s",
-                attempt + 1,
-                options.retries,
-                e,
+                f"LLM generation failed (attempt {attempt + 1}/{options.retries}): {e}",
             )
             if attempt == options.retries - 1:
                 raise LLMError("Exhausted all retry attempts for LLM generation.") from e
@@ -453,8 +508,8 @@ async def generate_structured(
                 In most cases, leave as None to use framework defaults.
                 Configure model behavior centrally via LiteLLM proxy settings when possible.
-    VISION/PDF MODEL COMPATIBILITY:
-        When using Documents with images/PDFs in structured output:
+    Note:
+        Vision/PDF model compatibility considerations:
         - Images require vision-capable models that also support structured output
         - PDFs require models with both document processing AND structured output support
         - Many models support either vision OR structured output, but not both
@@ -536,28 +591,4 @@ async def generate_structured(
     except (ValueError, LLMError):
         raise  # Explicitly re-raise to satisfy DOC502
-    # Extract the parsed value from the response
-    parsed_value: T | None = None
-    # Check if response has choices and parsed content
-    if response.choices and hasattr(response.choices[0].message, "parsed"):
-        parsed: Any = response.choices[0].message.parsed  # type: ignore[attr-defined]
-        # If parsed is a dict, instantiate it as the response format class
-        if isinstance(parsed, dict):
-            parsed_value = response_format(**parsed)
-        # If it's already the right type, use it
-        elif isinstance(parsed, response_format):
-            parsed_value = parsed
-        else:
-            # Otherwise try to convert it
-            raise TypeError(
-                f"Unable to convert parsed response to {response_format.__name__}: "
-                f"got type {type(parsed).__name__}"  # type: ignore[reportUnknownArgumentType]
-            )
-    if parsed_value is None:
-        raise ValueError("No parsed content available from the model response")
-    # Create a StructuredModelResponse with the parsed value
-    return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
+    return StructuredModelResponse[T].from_model_response(response)

ai_pipeline_core/llm/model_options.py CHANGED Viewed

@@ -88,6 +88,12 @@ class ModelOptions(BaseModel):
              and detect abuse. Maximum length is typically 256 characters.
              Useful for multi-tenant applications or per-user billing.
+        metadata: Custom metadata tags for tracking and observability.
+                 Dictionary of string key-value pairs for tagging requests.
+                 Useful for tracking experiments, versions, or custom attributes.
+                 Maximum of 16 key-value pairs, each key/value max 64 characters.
+                 Passed through to LMNR tracing and API provider metadata.
         extra_body: Additional provider-specific parameters to pass in request body.
                    Dictionary of custom parameters not covered by standard options.
                    Merged with usage_tracking if both are set.
@@ -147,6 +153,12 @@ class ModelOptions(BaseModel):
         ...     user="user_12345",  # Track costs per user
         ...     temperature=0.7
         ... )
+        >>>
+        >>> # With metadata for tracking and observability
+        >>> options = ModelOptions(
+        ...     metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
+        ...     temperature=0.7
+        ... )
     Note:
         - Not all options apply to all models
@@ -165,7 +177,7 @@ class ModelOptions(BaseModel):
     search_context_size: Literal["low", "medium", "high"] | None = None
     reasoning_effort: Literal["low", "medium", "high"] | None = None
     retries: int = 3
-    retry_delay_seconds: int = 10
+    retry_delay_seconds: int = 20
     timeout: int = 600
     cache_ttl: str | None = "5m"
     service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
@@ -175,6 +187,7 @@ class ModelOptions(BaseModel):
     verbosity: Literal["low", "medium", "high"] | None = None
     usage_tracking: bool = True
     user: str | None = None
+    metadata: dict[str, str] | None = None
     extra_body: dict[str, Any] | None = None
     def to_openai_completion_kwargs(self) -> dict[str, Any]:
@@ -200,6 +213,7 @@ class ModelOptions(BaseModel):
             - service_tier -> service_tier
             - verbosity -> verbosity
             - user -> user (for cost tracking)
+            - metadata -> metadata (for tracking/observability)
             - extra_body -> extra_body (merged with usage tracking)
         Web Search Structure:
@@ -253,7 +267,11 @@ class ModelOptions(BaseModel):
         if self.user:
             kwargs["user"] = self.user
+        if self.metadata:
+            kwargs["metadata"] = self.metadata
         if self.usage_tracking:
             kwargs["extra_body"]["usage"] = {"include": True}
+            kwargs["stream_options"] = {"include_usage": True}
         return kwargs

ai-pipeline-core 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

ai-pipeline-core 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl