PyPI - ai-pipeline-core - Versions diffs - 0.2.6__tar.gz → 0.2.7__tar.gz - Mend

ai-pipeline-core 0.2.6tar.gz → 0.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-pipeline-core
-Version: 0.2.6
+Version: 0.2.7
 Summary: Core utilities for AI-powered processing pipelines using prefect
 Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
 Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/__init__.py RENAMED Viewed

@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
 from .settings import Settings
 from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
-__version__ = "0.2.6"
+__version__ = "0.2.7"
 __all__ = [
     # Config/Settings

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/ai_messages.py RENAMED Viewed

@@ -260,11 +260,14 @@ class AIMessages(list[AIMessageType]):
         for message in self:
             if isinstance(message, str):
-                messages.append({"role": "user", "content": message})
+                messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
             elif isinstance(message, Document):
                 messages.append({"role": "user", "content": AIMessages.document_to_prompt(message)})
             elif isinstance(message, ModelResponse):  # type: ignore
-                messages.append({"role": "assistant", "content": message.content})
+                messages.append({
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": message.content}],
+                })
             else:
                 raise ValueError(f"Unsupported message type: {type(message)}")

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/client.py RENAMED Viewed

@@ -39,7 +39,7 @@ def _process_messages(
     context: AIMessages,
     messages: AIMessages,
     system_prompt: str | None = None,
-    cache_ttl: str | None = "5m",
+    cache_ttl: str | None = "300s",
 ) -> list[ChatCompletionMessageParam]:
     """Process and format messages for LLM API consumption.
@@ -51,7 +51,7 @@ def _process_messages(
         context: Messages to be cached (typically expensive/static content).
         messages: Regular messages without caching (dynamic queries).
         system_prompt: Optional system instructions for the model.
-        cache_ttl: Cache TTL for context messages (e.g. "120s", "5m", "1h").
+        cache_ttl: Cache TTL for context messages (e.g. "120s", "300s", "1h").
                    Set to None or empty string to disable caching.
     Returns:
@@ -86,12 +86,17 @@ def _process_messages(
         # Use AIMessages.to_prompt() for context
         context_messages = context.to_prompt()
-        # Apply caching to last context message if cache_ttl is set
+        # Apply caching to last context message and last content part if cache_ttl is set
         if cache_ttl:
             context_messages[-1]["cache_control"] = {  # type: ignore
                 "type": "ephemeral",
                 "ttl": cache_ttl,
             }
+            assert isinstance(context_messages[-1]["content"], list)  # type: ignore
+            context_messages[-1]["content"][-1]["cache_control"] = {  # type: ignore
+                "type": "ephemeral",
+                "ttl": cache_ttl,
+            }
         processed_messages.extend(context_messages)
@@ -237,6 +242,10 @@ async def _generate_with_retry(
     if not context and not messages:
         raise ValueError("Either context or messages must be provided")
+    if "gemini" in model.lower() and context.approximate_tokens_count < 5000:
+        # Bug fix for minimum explicit context size for Gemini models
+        options.cache_ttl = None
     processed_messages = _process_messages(
         context, messages, options.system_prompt, options.cache_ttl
     )
@@ -374,26 +383,11 @@ async def generate(
         ... ])
         >>> response = await llm.generate("gpt-5", messages=messages)
-        Configuration via LiteLLM Proxy:
-        >>> # Configure temperature in litellm_config.yaml:
-        >>> # model_list:
-        >>> #   - model_name: gpt-5
-        >>> #     litellm_params:
-        >>> #       model: openai/gpt-4o
-        >>> #       temperature: 0.3
-        >>> #       max_tokens: 1000
-        >>>
-        >>> # Configure retry logic in proxy:
-        >>> # general_settings:
-        >>> #   master_key: sk-1234
-        >>> #   max_retries: 5
-        >>> #   retry_delay: 15
     Performance:
         - Context caching saves ~50-90% tokens on repeated calls
         - First call: full token cost
         - Subsequent calls (within cache TTL): only messages tokens
-        - Default cache TTL is 5m (production-optimized)
+        - Default cache TTL is 300s/5 minutes (production-optimized)
         - Default retry logic: 3 attempts with 10s delay (production-optimized)
     Caching:

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/model_options.py RENAMED Viewed

@@ -45,7 +45,7 @@ class ModelOptions(BaseModel):
         timeout: Maximum seconds to wait for response (default: 300).
-        cache_ttl: Cache TTL for context messages (default: "5m").
+        cache_ttl: Cache TTL for context messages (default: "300s").
                    String format like "60s", "5m", or None to disable caching.
                    Applied to the last context message for efficient token reuse.
@@ -165,7 +165,7 @@ class ModelOptions(BaseModel):
         - search_context_size only works with search models
         - reasoning_effort only works with models that support explicit reasoning
         - response_format is set internally by generate_structured()
-        - cache_ttl accepts formats like "120s", "5m" (default), "1h" or None to disable caching
+        - cache_ttl accepts formats like "120s", "5m", "1h" or None (default: "300s")
         - stop sequences are limited to 4 by most providers
         - user identifier helps track costs per end-user (max 256 chars)
         - extra_body allows passing provider-specific parameters
@@ -179,7 +179,7 @@ class ModelOptions(BaseModel):
     retries: int = 3
     retry_delay_seconds: int = 20
     timeout: int = 600
-    cache_ttl: str | None = "5m"
+    cache_ttl: str | None = "300s"
     service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
     max_completion_tokens: int | None = None
     stop: str | list[str] | None = None

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/model_response.py RENAMED Viewed

@@ -266,7 +266,7 @@ class ModelResponse(ChatCompletion):
         other_fields = self.__dict__
         for key, value in other_fields.items():
-            if key in ["_model_options", "_metadata", "choices", "usage"]:
+            if key in ["_model_options", "_metadata", "choices"]:
                 continue
             try:
                 metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/model_types.py RENAMED Viewed

@@ -20,7 +20,7 @@ ModelName: TypeAlias = (
         "grok-4",
         # Small models
         "gemini-2.5-flash",
-        "gpt-5-nano",
+        "gpt-5-mini",
         "grok-4-fast",
         # Search models
         "gemini-2.5-flash-search",

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/utils/deploy.py RENAMED Viewed

@@ -7,8 +7,8 @@ This script:
 3. Creates/updates a Prefect deployment using the RunnerDeployment pattern
 Requirements:
-- .env file with PREFECT_API_URL and optionally PREFECT_API_KEY
-- .env file with PREFECT_GCS_BUCKET
+- Settings configured with PREFECT_API_URL and optionally PREFECT_API_KEY
+- Settings configured with PREFECT_GCS_BUCKET
 - pyproject.toml with project name and version
 - Local package installed for flow metadata extraction
@@ -18,7 +18,6 @@ Usage:
 import argparse
 import asyncio
-import os
 import subprocess
 import sys
 import tomllib
@@ -34,14 +33,6 @@ from prefect.flows import load_flow_from_entrypoint
 from ai_pipeline_core.settings import settings
 from ai_pipeline_core.storage import Storage
-# ============================================================================
-# Configuration
-# ============================================================================
-WORK_POOL_NAME = settings.prefect_work_pool_name
-DEFAULT_WORK_QUEUE = settings.prefect_work_queue_name
-PREDEFINED_BUCKET = settings.prefect_gcs_bucket
 # ============================================================================
 # Deployer Class
 # ============================================================================
@@ -57,7 +48,7 @@ class Deployer:
     def __init__(self):
         """Initialize deployer."""
         self.config = self._load_config()
-        self._setup_prefect_env()
+        self._validate_prefect_settings()
     def _load_config(self) -> dict[str, Any]:
         """Load and normalize project configuration from pyproject.toml.
@@ -65,10 +56,10 @@ class Deployer:
         Returns:
             Configuration dictionary with project metadata and deployment settings.
         """
-        if not PREDEFINED_BUCKET:
+        if not settings.prefect_gcs_bucket:
             self._die(
-                "PREFECT_GCS_BUCKET not found in .env file.\n"
-                "Create a .env file with:\n"
+                "PREFECT_GCS_BUCKET not configured in settings.\n"
+                "Configure via environment variable or .env file:\n"
                 "  PREFECT_GCS_BUCKET=your-bucket-name"
             )
@@ -97,33 +88,23 @@ class Deployer:
             "name": name,
             "package": package_name,
             "version": version,
-            "bucket": PREDEFINED_BUCKET,
+            "bucket": settings.prefect_gcs_bucket,
             "folder": f"flows/{flow_folder}",
             "tarball": f"{package_name}-{version}.tar.gz",
-            "work_pool": WORK_POOL_NAME,
-            "work_queue": DEFAULT_WORK_QUEUE,
+            "work_pool": settings.prefect_work_pool_name,
+            "work_queue": settings.prefect_work_queue_name,
         }
-    def _setup_prefect_env(self):
-        """Configure Prefect environment variables from .env file."""
-        self.api_url = os.getenv("PREFECT_API_URL")
+    def _validate_prefect_settings(self):
+        """Validate that required Prefect settings are configured."""
+        self.api_url = settings.prefect_api_url
         if not self.api_url:
             self._die(
-                "PREFECT_API_URL not found in .env file.\n"
-                "Create a .env file with:\n"
+                "PREFECT_API_URL not configured in settings.\n"
+                "Configure via environment variable or .env file:\n"
                 "  PREFECT_API_URL=https://api.prefect.cloud/api/accounts/.../workspaces/..."
             )
-        os.environ["PREFECT_API_URL"] = self.api_url
-        # Optional: API key for authentication
-        if api_key := os.getenv("PREFECT_API_KEY"):
-            os.environ["PREFECT_API_KEY"] = api_key
-        # Optional: Alternative auth method
-        if api_auth := os.getenv("PREFECT_API_AUTH_STRING"):
-            os.environ["PREFECT_API_AUTH_STRING"] = api_auth
     def _run(self, cmd: str, check: bool = True) -> Optional[str]:
         """Execute shell command and return output.
@@ -345,12 +326,16 @@ Example:
   python -m ai_pipeline_core.utils.deploy
 Prerequisites:
-  - .env file with PREFECT_API_URL (and optionally PREFECT_API_KEY)
-  - .env file with PREFECT_GCS_BUCKET
+  - Settings configured with PREFECT_API_URL (and optionally PREFECT_API_KEY)
+  - Settings configured with PREFECT_GCS_BUCKET
   - pyproject.toml with project name and version
   - Package installed locally: pip install -e .
   - GCP authentication configured (via service account or default credentials)
   - Work pool created in Prefect UI or CLI
+Settings can be configured via:
+  - Environment variables (e.g., export PREFECT_API_URL=...)
+  - .env file in the current directory
         """,
     )

{ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ai-pipeline-core"
-version = "0.2.6"
+version = "0.2.7"
 description = "Core utilities for AI-powered processing pipelines using prefect"
 readme = "README.md"
 license = {text = "MIT"}
@@ -178,7 +178,7 @@ reportIncompatibleVariableOverride = "error"
 reportMissingParameterType = "warning"
 [tool.bumpversion]
-current_version = "0.2.6"
+current_version = "0.2.7"
 commit = true
 tag = true
 tag_name = "v{new_version}"