PyPI - ai-pipeline-core - Versions diffs - 0.2.1__tar.gz → 0.2.3__tar.gz - Mend

ai-pipeline-core 0.2.1tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.2.1
+Version: 0.2.3
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,12 +20,12 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.12
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: jinja2>=3.1.6
-Requires-Dist: lmnr>=0.7.13
-Requires-Dist: openai>=1.108.1
+Requires-Dist: lmnr>=0.7.17
+Requires-Dist: openai>=1.109.1
 Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
-Requires-Dist: prefect>=3.4.19
+Requires-Dist: prefect>=3.4.21
 Requires-Dist: pydantic-settings>=2.10.1
-Requires-Dist: pydantic>=2.11.7
+Requires-Dist: pydantic>=2.11.9
 Requires-Dist: python-magic>=0.4.27
 Requires-Dist: ruamel-yaml>=0.18.14
 Requires-Dist: tiktoken>=0.11.0

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.2.1"
+__version__ = "0.2.3"
 __all__ = [
     # Config/Settings

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/documents/document.py RENAMED Viewed

@@ -302,7 +302,7 @@ class Document(BaseModel, ABC):
         name: str,
         content: bytes,
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> Self: ...
     @overload
@@ -313,7 +313,7 @@ class Document(BaseModel, ABC):
         name: str,
         content: str,
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> Self: ...
     @overload
@@ -324,7 +324,7 @@ class Document(BaseModel, ABC):
         name: str,
         content: dict[str, Any],
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> Self: ...
     @overload
@@ -335,7 +335,7 @@ class Document(BaseModel, ABC):
         name: str,
         content: list[Any],
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> Self: ...
     @overload
@@ -346,7 +346,7 @@ class Document(BaseModel, ABC):
         name: str,
         content: BaseModel,
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> Self: ...
     @classmethod
@@ -356,7 +356,7 @@ class Document(BaseModel, ABC):
         name: str,
         content: str | bytes | dict[str, Any] | list[Any] | BaseModel,
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> Self:
         r"""Create a Document with automatic content type conversion (recommended).
@@ -469,7 +469,7 @@ class Document(BaseModel, ABC):
         name: str,
         content: bytes,
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> None:
         """Initialize a Document instance with raw bytes content.
@@ -509,7 +509,11 @@ class Document(BaseModel, ABC):
         if type(self) is Document:
             raise TypeError("Cannot instantiate abstract Document class directly")
-        super().__init__(name=name, content=content, description=description, sources=sources)
+        # Only pass sources if not None to let Pydantic's default_factory handle it
+        if sources is not None:
+            super().__init__(name=name, content=content, description=description, sources=sources)
+        else:
+            super().__init__(name=name, content=content, description=description)
     name: str
     description: str | None = None

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/documents/flow_document.py RENAMED Viewed

@@ -46,7 +46,7 @@ class FlowDocument(Document):
         name: str,
         content: bytes,
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> None:
         """Initialize a FlowDocument with raw bytes content.
@@ -88,7 +88,12 @@ class FlowDocument(Document):
         """
         if type(self) is FlowDocument:
             raise TypeError("Cannot instantiate abstract FlowDocument class directly")
-        super().__init__(name=name, content=content, description=description, sources=sources)
+        # Only pass sources if not None to let Pydantic's default_factory handle it
+        if sources is not None:
+            super().__init__(name=name, content=content, description=description, sources=sources)
+        else:
+            super().__init__(name=name, content=content, description=description)
     @final
     def get_base_type(self) -> Literal["flow"]:

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/documents/task_document.py RENAMED Viewed

@@ -51,7 +51,7 @@ class TaskDocument(Document):
         name: str,
         content: bytes,
         description: str | None = None,
-        sources: list[str] = [],
+        sources: list[str] | None = None,
     ) -> None:
         """Initialize a TaskDocument with raw bytes content.
@@ -93,7 +93,12 @@ class TaskDocument(Document):
         """
         if type(self) is TaskDocument:
             raise TypeError("Cannot instantiate abstract TaskDocument class directly")
-        super().__init__(name=name, content=content, description=description, sources=sources)
+        # Only pass sources if not None to let Pydantic's default_factory handle it
+        if sources is not None:
+            super().__init__(name=name, content=content, description=description, sources=sources)
+        else:
+            super().__init__(name=name, content=content, description=description)
     @final
     def get_base_type(self) -> Literal["task"]:

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/llm/ai_messages.py RENAMED Viewed

@@ -279,8 +279,8 @@ class AIMessages(list[AIMessageType]):
         for message in self:
             if isinstance(message, Document):
                 serialized_document = message.serialize_model()
-                del serialized_document["content"]
-                messages.append(json.dumps(serialized_document, indent=2))
+                filtered_doc = {k: v for k, v in serialized_document.items() if k != "content"}
+                messages.append(json.dumps(filtered_doc, indent=2))
             elif isinstance(message, ModelResponse):
                 messages.append(message.content)
             else:

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/llm/client.py RENAMED Viewed

@@ -37,7 +37,7 @@ def _process_messages(
     context: AIMessages,
     messages: AIMessages,
     system_prompt: str | None = None,
-    cache_ttl: str | None = "120s",
+    cache_ttl: str | None = "5m",
 ) -> list[ChatCompletionMessageParam]:
     """Process and format messages for LLM API consumption.
@@ -245,7 +245,7 @@ async def generate(
         model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
                Accepts predefined models or any string for custom models.
         context: Static context to cache (documents, examples, instructions).
-                Defaults to None (empty context). Cached for 120 seconds.
+                Defaults to None (empty context). Cached for 5 minutes by default.
         messages: Dynamic messages/queries. AIMessages or str ONLY.
                  Do not pass Document or DocumentList directly.
                  If string, converted to AIMessages internally.
@@ -338,13 +338,13 @@ async def generate(
         - Context caching saves ~50-90% tokens on repeated calls
         - First call: full token cost
         - Subsequent calls (within cache TTL): only messages tokens
-        - Default cache TTL is 120s (production-optimized)
+        - Default cache TTL is 5m (production-optimized)
         - Default retry logic: 3 attempts with 10s delay (production-optimized)
     Caching:
         When enabled in your LiteLLM proxy and supported by the upstream provider,
         context messages may be cached to reduce token usage on repeated calls.
-        Default TTL is 120s (optimized for production workloads). Configure caching
+        Default TTL is 5m (optimized for production workloads). Configure caching
         behavior centrally via your LiteLLM proxy settings, not per API call.
         Savings depend on provider and payload; treat this as an optimization, not a guarantee.
@@ -447,10 +447,11 @@ async def generate_structured(
                 Defaults to None (empty AIMessages).
         messages: Dynamic prompts/queries. AIMessages or str ONLY.
                  Do not pass Document or DocumentList directly.
-        options: DEPRECATED - DO NOT USE. Reserved for internal framework usage only.
-                Framework defaults are production-optimized. Configure model behavior
-                centrally via LiteLLM proxy settings, not per API call.
-                The response_format is set automatically from the response_format parameter.
+        options: Optional ModelOptions for configuring temperature, retries, etc.
+                If provided, it will NOT be mutated (a copy is created internally).
+                The response_format field is set automatically from the response_format parameter.
+                In most cases, leave as None to use framework defaults.
+                Configure model behavior centrally via LiteLLM proxy settings when possible.
     VISION/PDF MODEL COMPATIBILITY:
         When using Documents with images/PDFs in structured output:
@@ -518,6 +519,9 @@ async def generate_structured(
         context = AIMessages()
     if options is None:
         options = ModelOptions()
+    else:
+        # Create a copy to avoid mutating the caller's options object
+        options = options.model_copy()
     options.response_format = response_format

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/llm/model_options.py RENAMED Viewed

@@ -10,7 +10,7 @@ from pydantic import BaseModel
 class ModelOptions(BaseModel):
-    """Configuration options for LLM generation requests.
+    r"""Configuration options for LLM generation requests.
     ModelOptions encapsulates all configuration parameters for model
     generation, including model behavior settings, retry logic, and
@@ -45,7 +45,7 @@ class ModelOptions(BaseModel):
         timeout: Maximum seconds to wait for response (default: 300).
-        cache_ttl: Cache TTL for context messages (default: "120s").
+        cache_ttl: Cache TTL for context messages (default: "5m").
                    String format like "60s", "5m", or None to disable caching.
                    Applied to the last context message for efficient token reuse.
@@ -62,11 +62,37 @@ class ModelOptions(BaseModel):
         max_completion_tokens: Maximum tokens to generate.
                               None uses model default.
+        stop: Stop sequences that halt generation when encountered.
+             Can be a single string or list of strings.
+             When the model generates any of these sequences, it stops immediately.
+             Maximum of 4 stop sequences supported by most providers.
         response_format: Pydantic model class for structured output.
                         Pass a Pydantic model; the client converts it to JSON Schema.
                         Set automatically by generate_structured().
                         Structured output support varies by provider and model.
+        verbosity: Controls output verbosity for models that support it.
+                  Literal["low", "medium", "high"] | None
+                  "low": Minimal output
+                  "medium": Standard output
+                  "high": Detailed output
+                  Note: Only some models support verbosity control.
+        usage_tracking: Enable token usage tracking in API responses (default: True).
+                       When enabled, adds {"usage": {"include": True}} to extra_body.
+                       Disable for providers that don't support usage tracking.
+        user: User identifier for cost tracking and monitoring.
+             A unique identifier representing the end-user, which can help track costs
+             and detect abuse. Maximum length is typically 256 characters.
+             Useful for multi-tenant applications or per-user billing.
+        extra_body: Additional provider-specific parameters to pass in request body.
+                   Dictionary of custom parameters not covered by standard options.
+                   Merged with usage_tracking if both are set.
+                   Useful for beta features or provider-specific capabilities.
     Example:
         >>> # Basic configuration
         >>> options = ModelOptions(
@@ -103,13 +129,35 @@ class ModelOptions(BaseModel):
         ...     reasoning_effort="high",  # Deep reasoning
         ...     timeout=600  # More time for complex reasoning
         ... )
+        >>>
+        >>> # With stop sequences
+        >>> options = ModelOptions(
+        ...     stop=["STOP", "END", "\n\n"],  # Stop on these sequences
+        ...     temperature=0.7
+        ... )
+        >>>
+        >>> # With custom extra_body parameters
+        >>> options = ModelOptions(
+        ...     extra_body={"custom_param": "value", "beta_feature": True},
+        ...     usage_tracking=True  # Still tracks usage alongside custom params
+        ... )
+        >>>
+        >>> # With user tracking for cost monitoring
+        >>> options = ModelOptions(
+        ...     user="user_12345",  # Track costs per user
+        ...     temperature=0.7
+        ... )
     Note:
         - Not all options apply to all models
         - search_context_size only works with search models
         - reasoning_effort only works with models that support explicit reasoning
         - response_format is set internally by generate_structured()
-        - cache_ttl accepts formats like "120s", "5m", "1h" or None to disable caching
+        - cache_ttl accepts formats like "120s", "5m" (default), "1h" or None to disable caching
+        - stop sequences are limited to 4 by most providers
+        - user identifier helps track costs per end-user (max 256 chars)
+        - extra_body allows passing provider-specific parameters
+        - usage_tracking is enabled by default for cost monitoring
     """
     temperature: float | None = None
@@ -118,11 +166,16 @@ class ModelOptions(BaseModel):
     reasoning_effort: Literal["low", "medium", "high"] | None = None
     retries: int = 3
     retry_delay_seconds: int = 10
-    timeout: int = 300
-    cache_ttl: str | None = "120s"
+    timeout: int = 600
+    cache_ttl: str | None = "5m"
     service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
     max_completion_tokens: int | None = None
+    stop: str | list[str] | None = None
     response_format: type[BaseModel] | None = None
+    verbosity: Literal["low", "medium", "high"] | None = None
+    usage_tracking: bool = True
+    user: str | None = None
+    extra_body: dict[str, Any] | None = None
     def to_openai_completion_kwargs(self) -> dict[str, Any]:
         """Convert options to OpenAI API completion parameters.
@@ -140,10 +193,14 @@ class ModelOptions(BaseModel):
         API parameter mapping:
             - temperature -> temperature
             - max_completion_tokens -> max_completion_tokens
+            - stop -> stop (string or list of strings)
             - reasoning_effort -> reasoning_effort
             - search_context_size -> web_search_options.search_context_size
             - response_format -> response_format
             - service_tier -> service_tier
+            - verbosity -> verbosity
+            - user -> user (for cost tracking)
+            - extra_body -> extra_body (merged with usage tracking)
         Web Search Structure:
             When search_context_size is set, creates:
@@ -163,17 +220,21 @@ class ModelOptions(BaseModel):
         """
         kwargs: dict[str, Any] = {
             "timeout": self.timeout,
-            "extra_body": {
-                "usage": {"include": True},  # For openrouter cost tracking
-            },
+            "extra_body": {},
         }
+        if self.extra_body:
+            kwargs["extra_body"] = self.extra_body
         if self.temperature:
             kwargs["temperature"] = self.temperature
         if self.max_completion_tokens:
             kwargs["max_completion_tokens"] = self.max_completion_tokens
+        if self.stop:
+            kwargs["stop"] = self.stop
         if self.reasoning_effort:
             kwargs["reasoning_effort"] = self.reasoning_effort
@@ -186,4 +247,13 @@ class ModelOptions(BaseModel):
         if self.service_tier:
             kwargs["service_tier"] = self.service_tier
+        if self.verbosity:
+            kwargs["verbosity"] = self.verbosity
+        if self.user:
+            kwargs["user"] = self.user
+        if self.usage_tracking:
+            kwargs["extra_body"]["usage"] = {"include": True}
         return kwargs

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/llm/model_response.py RENAMED Viewed

@@ -110,7 +110,8 @@ class ModelResponse(ChatCompletion):
             >>> if "error" in response.content.lower():
             ...     # Handle error case
         """
-        return self.choices[0].message.content or ""
+        content = self.choices[0].message.content or ""
+        return content.split("</think>")[-1].strip()
     def set_model_options(self, options: dict[str, Any]) -> None:
         """Store the model configuration used for generation.

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/ai_pipeline_core/tracing.py RENAMED Viewed

@@ -15,7 +15,7 @@ from functools import wraps
 from typing import Any, Callable, Literal, ParamSpec, TypeVar, cast, overload
 from lmnr import Attributes, Instruments, Laminar, observe
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 # Import for document trimming - needed for isinstance checks
 # These are lazy imports only used when trim_documents is enabled
@@ -226,8 +226,8 @@ class TraceInfo(BaseModel):
     session_id: str | None = None
     user_id: str | None = None
-    metadata: dict[str, str] = {}
-    tags: list[str] = []
+    metadata: dict[str, str] = Field(default_factory=dict)
+    tags: list[str] = Field(default_factory=list)
     def get_observe_kwargs(self) -> dict[str, Any]:
         """Convert TraceInfo to kwargs for Laminar's observe decorator.
@@ -502,11 +502,10 @@ def trace(
         observe_name = name or f.__name__
         _observe = observe
-        # Store the new parameters
         _session_id = session_id
         _user_id = user_id
-        _metadata = metadata or {}
-        _tags = tags or []
+        _metadata = metadata if metadata is not None else {}
+        _tags = tags if tags is not None else []
         _span_type = span_type
         _ignore_input = ignore_input
         _ignore_output = ignore_output

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ai-pipeline-core"
-version = "0.2.1"
+version = "0.2.3"
 description = "Core utilities for AI-powered processing pipelines using prefect"
 readme = "README.md"
 license = {text = "MIT"}
@@ -22,12 +22,12 @@ classifiers = [
 dependencies = [
     "httpx>=0.28.1",
     "Jinja2>=3.1.6",
-    "lmnr>=0.7.13",
-    "openai>=1.108.1",
-    "prefect>=3.4.19",
+    "lmnr>=0.7.17",
+    "openai>=1.109.1",
+    "prefect>=3.4.21",
     "prefect-gcp[cloud_storage]>=0.6.10",
     "pydantic-settings>=2.10.1",
-    "pydantic>=2.11.7",
+    "pydantic>=2.11.9",
     "python-magic>=0.4.27",
     "ruamel.yaml>=0.18.14",
     "tiktoken>=0.11.0",
@@ -177,7 +177,7 @@ reportIncompatibleVariableOverride = "error"
 reportMissingParameterType = "warning"
 [tool.bumpversion]
-current_version = "0.2.1"
+current_version = "0.2.3"
 commit = true
 tag = true
 tag_name = "v{new_version}"