PyPI - ai-pipeline-core - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

ai-pipeline-core 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.2.1
+Version: 0.2.2
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.2}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __all__ = [
     # Config/Settings

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.2}/ai_pipeline_core/llm/client.py RENAMED Viewed

@@ -37,7 +37,7 @@ def _process_messages(
     context: AIMessages,
     messages: AIMessages,
     system_prompt: str | None = None,
-    cache_ttl: str | None = "120s",
+    cache_ttl: str | None = "5m",
 ) -> list[ChatCompletionMessageParam]:
     """Process and format messages for LLM API consumption.
@@ -245,7 +245,7 @@ async def generate(
         model: Model to use (e.g., "gpt-5", "gemini-2.5-pro", "grok-4").
                Accepts predefined models or any string for custom models.
         context: Static context to cache (documents, examples, instructions).
-                Defaults to None (empty context). Cached for 120 seconds.
+                Defaults to None (empty context). Cached for 5 minutes by default.
         messages: Dynamic messages/queries. AIMessages or str ONLY.
                  Do not pass Document or DocumentList directly.
                  If string, converted to AIMessages internally.
@@ -338,13 +338,13 @@ async def generate(
         - Context caching saves ~50-90% tokens on repeated calls
         - First call: full token cost
         - Subsequent calls (within cache TTL): only messages tokens
-        - Default cache TTL is 120s (production-optimized)
+        - Default cache TTL is 5m (production-optimized)
         - Default retry logic: 3 attempts with 10s delay (production-optimized)
     Caching:
         When enabled in your LiteLLM proxy and supported by the upstream provider,
         context messages may be cached to reduce token usage on repeated calls.
-        Default TTL is 120s (optimized for production workloads). Configure caching
+        Default TTL is 5m (optimized for production workloads). Configure caching
         behavior centrally via your LiteLLM proxy settings, not per API call.
         Savings depend on provider and payload; treat this as an optimization, not a guarantee.

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.2}/ai_pipeline_core/llm/model_options.py RENAMED Viewed

@@ -45,7 +45,7 @@ class ModelOptions(BaseModel):
         timeout: Maximum seconds to wait for response (default: 300).
-        cache_ttl: Cache TTL for context messages (default: "120s").
+        cache_ttl: Cache TTL for context messages (default: "5m").
                    String format like "60s", "5m", or None to disable caching.
                    Applied to the last context message for efficient token reuse.
@@ -109,7 +109,7 @@ class ModelOptions(BaseModel):
         - search_context_size only works with search models
         - reasoning_effort only works with models that support explicit reasoning
         - response_format is set internally by generate_structured()
-        - cache_ttl accepts formats like "120s", "5m", "1h" or None to disable caching
+        - cache_ttl accepts formats like "120s", "5m" (default), "1h" or None to disable caching
     """
     temperature: float | None = None
@@ -118,11 +118,13 @@ class ModelOptions(BaseModel):
     reasoning_effort: Literal["low", "medium", "high"] | None = None
     retries: int = 3
     retry_delay_seconds: int = 10
-    timeout: int = 300
-    cache_ttl: str | None = "120s"
+    timeout: int = 600
+    cache_ttl: str | None = "5m"
     service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
     max_completion_tokens: int | None = None
     response_format: type[BaseModel] | None = None
+    verbosity: Literal["low", "medium", "high"] | None = None
+    usage_tracking: bool = True
     def to_openai_completion_kwargs(self) -> dict[str, Any]:
         """Convert options to OpenAI API completion parameters.
@@ -163,9 +165,7 @@ class ModelOptions(BaseModel):
         """
         kwargs: dict[str, Any] = {
             "timeout": self.timeout,
-            "extra_body": {
-                "usage": {"include": True},  # For openrouter cost tracking
-            },
+            "extra_body": {},
         }
         if self.temperature:
@@ -186,4 +186,10 @@ class ModelOptions(BaseModel):
         if self.service_tier:
             kwargs["service_tier"] = self.service_tier
+        if self.verbosity:
+            kwargs["verbosity"] = self.verbosity
+        if self.usage_tracking:
+            kwargs["extra_body"]["usage"] = {"include": True}
         return kwargs

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.2}/ai_pipeline_core/llm/model_response.py RENAMED Viewed

@@ -110,7 +110,8 @@ class ModelResponse(ChatCompletion):
             >>> if "error" in response.content.lower():
             ...     # Handle error case
         """
-        return self.choices[0].message.content or ""
+        content = self.choices[0].message.content or ""
+        return content.split("</think>")[-1].strip()
     def set_model_options(self, options: dict[str, Any]) -> None:
         """Store the model configuration used for generation.

{ai_pipeline_core-0.2.1 → ai_pipeline_core-0.2.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ai-pipeline-core"
-version = "0.2.1"
+version = "0.2.2"
 description = "Core utilities for AI-powered processing pipelines using prefect"
 readme = "README.md"
 license = {text = "MIT"}
@@ -177,7 +177,7 @@ reportIncompatibleVariableOverride = "error"
 reportMissingParameterType = "warning"
 [tool.bumpversion]
-current_version = "0.2.1"
+current_version = "0.2.2"
 commit = true
 tag = true
 tag_name = "v{new_version}"