PyPI - jaf-py - Versions diffs - 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl - Mend

jaf-py 2.6.3py3-none-any.whl → 2.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

jaf/__init__.py +1 -1
jaf/core/agent_tool.py +64 -3
jaf/core/engine.py +2 -2
jaf/core/tracing.py +47 -11
jaf/core/types.py +45 -2
jaf/providers/model.py +265 -47
{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/METADATA +2 -2
{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/RECORD +12 -12
{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/WHEEL +0 -0
{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/entry_points.txt +0 -0
{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/licenses/LICENSE +0 -0
{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/top_level.txt +0 -0

jaf/__init__.py CHANGED Viewed

@@ -201,7 +201,7 @@ def generate_run_id() -> RunId:
     return create_run_id(str(uuid.uuid4()))
-__version__ = "2.6.3"
+__version__ = "2.6.5"
 __all__ = [
     # Core types and functions
     "TraceId",

jaf/core/agent_tool.py CHANGED Viewed

@@ -8,7 +8,6 @@ by other agents, enabling hierarchical agent orchestration patterns.
 import asyncio
 import json
 import inspect
-import inspect
 import contextvars
 from typing import Any, Callable, Dict, List, Optional, Union, Awaitable, TypeVar, get_type_hints
@@ -181,11 +180,73 @@ def create_agent_tool(
         # Session inheritance is configurable via preserve_session.
         # - When True: inherit parent's conversation_id and memory (shared memory/session)
         # - When False: do not inherit (ephemeral, per-invocation sub-agent run)
+        #
+        # Model selection for subagents:
+        # - If subagent has its own model_config, use that model AND create appropriate provider
+        # - If subagent has no model_config, inherit parent's model_override and provider
+        # This allows subagents to run on different models than the parent agent
+        subagent_model_override = None
+        subagent_model_provider = parent_config.model_provider
+        if agent.model_config and agent.model_config.name:
+            subagent_model_name = agent.model_config.name
+            # Subagent has explicit model_config - create appropriate provider for it
+            # Use model_override to force the subagent's model
+            subagent_model_override = subagent_model_name
+            # Create provider based on model type
+            import os
+            if subagent_model_name.startswith("azure/"):
+                try:
+                    from jaf.providers import make_litellm_sdk_provider
+                    azure_api_key = os.getenv("AZURE_API_KEY")
+                    azure_api_base = os.getenv("AZURE_API_BASE")
+                    azure_api_version = os.getenv("AZURE_API_VERSION")
+                    subagent_model_provider = make_litellm_sdk_provider(
+                        model=subagent_model_name,
+                        api_key=azure_api_key,
+                        base_url=azure_api_base,
+                        api_version=azure_api_version,
+                    )
+                except Exception as e:
+                    # Fallback to parent provider if Azure provider creation fails
+                    subagent_model_provider = parent_config.model_provider
+            elif subagent_model_name.startswith("vertex_ai/"):
+                try:
+                    from jaf.providers import make_litellm_sdk_provider
+                    vertex_project = os.getenv("VERTEXAI_PROJECT")
+                    vertex_location = os.getenv("VERTEXAI_LOCATION")
+                    if not vertex_project or not vertex_location:
+                        raise ValueError(
+                            "VERTEXAI_PROJECT and VERTEXAI_LOCATION environment variables are required for vertex_ai/ models"
+                        )
+                    subagent_model_provider = make_litellm_sdk_provider(
+                        model=subagent_model_name,
+                        vertex_project=vertex_project,
+                        vertex_location=vertex_location,
+                    )
+                except Exception:
+                    subagent_model_provider = parent_config.model_provider
+            elif subagent_model_name.startswith("glm"):
+                try:
+                    from jaf.providers import make_litellm_provider
+                    subagent_model_provider = make_litellm_provider(
+                        base_url=os.getenv("LITELLM_BASE_URL"),
+                        api_key=os.getenv("LITELLM_KEY")
+                    )
+                except Exception:
+                    subagent_model_provider = parent_config.model_provider
+            # For other models, use parent's provider (may work or may not)
+        else:
+            # No subagent model_config - inherit from parent
+            subagent_model_override = parent_config.model_override
+            subagent_model_provider = parent_config.model_provider
         sub_config = RunConfig(
             agent_registry={agent.name: agent, **parent_config.agent_registry},
-            model_provider=parent_config.model_provider,
+            model_provider=subagent_model_provider,
             max_turns=max_turns or parent_config.max_turns,
-            model_override=parent_config.model_override,
+            model_override=subagent_model_override,
             initial_input_guardrails=parent_config.initial_input_guardrails,
             final_output_guardrails=parent_config.final_output_guardrails,
             on_event=parent_config.on_event,

jaf/core/engine.py CHANGED Viewed

@@ -704,7 +704,7 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
                             stream_usage = raw_chunk["usage"]
                         if not stream_model and "model" in raw_chunk and raw_chunk["model"]:
                             stream_model = raw_chunk["model"]
                     # Text deltas
                     delta_text = getattr(chunk, "delta", None)
                     if delta_text:
@@ -820,7 +820,7 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
                     llm_response["usage"] = stream_usage
                 if stream_model:
                     llm_response["model"] = stream_model
             except Exception:
                 # Fallback to non-streaming on error
                 assistant_event_streamed = False

jaf/core/tracing.py CHANGED Viewed

@@ -469,7 +469,7 @@ class LangfuseTraceCollector:
             public_key=public_key,
             secret_key=secret_key,
             host=host,
-            release="jaf-py-v2.6.3",
+            release="jaf-py-v2.6.5",
             httpx_client=client,
         )
         self._httpx_client = client
@@ -753,7 +753,9 @@ class LangfuseTraceCollector:
                         system_prompt = context.system_prompt
                     if system_prompt:
-                        print(f"[LANGFUSE DEBUG] Extracted system_prompt: {system_prompt[:100] if isinstance(system_prompt, str) else system_prompt}...")
+                        print(
+                            f"[LANGFUSE DEBUG] Extracted system_prompt: {system_prompt[:100] if isinstance(system_prompt, str) else system_prompt}..."
+                        )
                 print(
                     f"[LANGFUSE DEBUG] Final extracted - user_query: {user_query}, user_id: {user_id}"
@@ -912,24 +914,24 @@ class LangfuseTraceCollector:
                     # End the generation
                     generation = self.active_spans[span_id]
                     choice = self._get_event_data(event, "choice", {})
                     usage = self._get_event_data(event, "usage", {})
                     model = self._get_event_data(event, "model", "unknown")
                     # Also try to get model from the choice if not at top level
                     if model == "unknown" and isinstance(choice, dict):
                         model = choice.get("model", "unknown")
                     print(f"[LANGFUSE] Extracted - model: '{model}', usage: {usage}")
-                    # Convert to Langfuse v2 format - let Langfuse handle cost calculation automatically
+                    # Convert to Langfuse format with detailed cache information
                     langfuse_usage = None
                     if usage:
                         prompt_tokens = usage.get("prompt_tokens", 0)
                         completion_tokens = usage.get("completion_tokens", 0)
                         total_tokens = usage.get("total_tokens", 0)
+                        # Build detailed usage dict with cache information
                         langfuse_usage = {
                             "input": prompt_tokens,
                             "output": completion_tokens,
@@ -937,9 +939,40 @@ class LangfuseTraceCollector:
                             "unit": "TOKENS",
                         }
-                        print(
-                            f"[LANGFUSE] Usage data for automatic cost calculation: {langfuse_usage}"
-                        )
+                        # Add cache-related fields if available (for prompt caching support)
+                        if (
+                            "cache_creation_input_tokens" in usage
+                            and usage["cache_creation_input_tokens"]
+                        ):
+                            langfuse_usage["cache_creation_input_tokens"] = usage[
+                                "cache_creation_input_tokens"
+                            ]
+                        if "cache_read_input_tokens" in usage and usage["cache_read_input_tokens"]:
+                            langfuse_usage["cache_read_input_tokens"] = usage[
+                                "cache_read_input_tokens"
+                            ]
+                        # Add detailed token breakdowns if available
+                        if "prompt_tokens_details" in usage and usage["prompt_tokens_details"]:
+                            details = usage["prompt_tokens_details"]
+                            if "cached_tokens" in details and details["cached_tokens"]:
+                                langfuse_usage["input_cached_tokens"] = details["cached_tokens"]
+                            if "audio_tokens" in details and details["audio_tokens"]:
+                                langfuse_usage["input_audio_tokens"] = details["audio_tokens"]
+                        if (
+                            "completion_tokens_details" in usage
+                            and usage["completion_tokens_details"]
+                        ):
+                            details = usage["completion_tokens_details"]
+                            if "reasoning_tokens" in details and details["reasoning_tokens"]:
+                                langfuse_usage["output_reasoning_tokens"] = details[
+                                    "reasoning_tokens"
+                                ]
+                            if "audio_tokens" in details and details["audio_tokens"]:
+                                langfuse_usage["output_audio_tokens"] = details["audio_tokens"]
+                        print(f"[LANGFUSE] Usage data with cache details: {langfuse_usage}")
                     # Include model information in the generation end - Langfuse will calculate costs automatically
                     # Use compatibility wrapper for ending spans/generations
@@ -1260,7 +1293,10 @@ def create_composite_trace_collector(
     # Automatically add Langfuse collector if keys are configured
     if os.getenv("LANGFUSE_PUBLIC_KEY") and os.getenv("LANGFUSE_SECRET_KEY"):
         langfuse_collector = LangfuseTraceCollector(
-            httpx_client=httpx_client, proxy=proxy, timeout=timeout, include_system_prompt=include_system_prompt
+            httpx_client=httpx_client,
+            proxy=proxy,
+            timeout=timeout,
+            include_system_prompt=include_system_prompt,
         )
         collector_list.append(langfuse_collector)

jaf/core/types.py CHANGED Viewed

@@ -1009,6 +1009,38 @@ class RetryEvent:
     )
+@dataclass(frozen=True)
+class FallbackEventData:
+    """Data for model fallback events."""
+    from_model: str  # Model that failed
+    to_model: str  # Fallback model being tried
+    reason: str  # Reason for fallback (e.g., "Content Policy Violation", "Context Window Exceeded", "Rate Limit")
+    fallback_type: Literal["general", "content_policy", "context_window"]  # Type of fallback
+    attempt: int  # Which fallback attempt this is (1-indexed)
+    trace_id: TraceId
+    run_id: RunId
+    error_details: Optional[Dict[str, Any]] = None  # Additional error context
+@dataclass(frozen=True)
+class FallbackEvent:
+    """Event emitted when a model fallback occurs."""
+    type: Literal["fallback"] = "fallback"
+    data: FallbackEventData = field(
+        default_factory=lambda: FallbackEventData(
+            from_model="",
+            to_model="",
+            reason="",
+            fallback_type="general",
+            attempt=1,
+            trace_id=TraceId(""),
+            run_id=RunId(""),
+        )
+    )
 # Union type for all trace events
 TraceEvent = Union[
     RunStartEvent,
@@ -1024,6 +1056,7 @@ TraceEvent = Union[
     HandoffEvent,
     RunEndEvent,
     RetryEvent,
+    FallbackEvent,
 ]
@@ -1096,7 +1129,9 @@ class RunConfig(Generic[Ctx]):
     agent_registry: Dict[str, Agent[Ctx, Any]]
     model_provider: ModelProvider[Ctx]
     max_turns: Optional[int] = 50
-    max_tokens: Optional[int] = None  # Default max_tokens for all agents (can be overridden per agent)
+    max_tokens: Optional[int] = (
+        None  # Default max_tokens for all agents (can be overridden per agent)
+    )
     model_override: Optional[str] = None
     initial_input_guardrails: Optional[List[Guardrail]] = None
     final_output_guardrails: Optional[List[Guardrail]] = None
@@ -1120,7 +1155,7 @@ class RunConfig(Generic[Ctx]):
             [List[Message], RunState[Ctx]],
             Union[List[Message], Awaitable[List[Message]]],
         ]
-    ] = None
+    ] = None
     max_empty_response_retries: int = 3  # Maximum retries when LLM returns empty response
     empty_response_retry_delay: float = (
         1.0  # Initial delay in seconds before retrying empty response (uses exponential backoff)
@@ -1129,6 +1164,14 @@ class RunConfig(Generic[Ctx]):
     prefer_streaming: Optional[bool] = (
         None  # Whether to prefer streaming responses. None (default) = use streaming if available, True = prefer streaming, False = disable streaming
     )
+    # Model fallback configuration
+    fallbacks: Optional[List[str]] = None  # List of fallback models to try if primary model fails
+    content_policy_fallbacks: Optional[List[str]] = (
+        None  # Fallback models for content policy violations
+    )
+    context_window_fallbacks: Optional[List[str]] = (
+        None  # Fallback models for context window exceeded errors
+    )
 # Regeneration types for conversation management

jaf/providers/model.py CHANGED Viewed

@@ -30,6 +30,8 @@ from ..core.types import (
     get_text_content,
     RetryEvent,
     RetryEventData,
+    FallbackEvent,
+    FallbackEventData,
 )
 from ..core.proxy import ProxyConfig
 from ..utils.document_processor import (
@@ -113,6 +115,55 @@ async def _is_vision_model(model: str, base_url: str) -> bool:
     return is_known_vision_model
+def _classify_error_for_fallback(e: Exception) -> tuple[str, str]:
+    """
+    Classify an error to determine the fallback type and reason.
+    Args:
+        e: Exception from model call
+    Returns:
+        Tuple of (fallback_type, reason)
+    """
+    error_message = str(e).lower()
+    error_type = type(e).__name__
+    # Check for content policy violations
+    if (
+        "content" in error_message
+        and ("policy" in error_message or "filter" in error_message)
+        or "contentpolicyviolation" in error_type.lower()
+        or "content_filter" in error_message
+        or "safety" in error_message
+    ):
+        return ("content_policy", "Content Policy Violation")
+    # Check for context window exceeded
+    if (
+        "context" in error_message
+        and "window" in error_message
+        or "too long" in error_message
+        or "maximum context" in error_message
+        or "contextwindowexceeded" in error_type.lower()
+        or "prompt is too long" in error_message
+        or "tokens" in error_message
+        and "limit" in error_message
+    ):
+        return ("context_window", "Context Window Exceeded")
+    # Default to general fallback
+    if hasattr(e, "status_code"):
+        status_code = e.status_code
+        if status_code == 429:
+            return ("general", f"HTTP {status_code} - Rate Limit")
+        elif 500 <= status_code < 600:
+            return ("general", f"HTTP {status_code} - Server Error")
+        else:
+            return ("general", f"HTTP {status_code}")
+    return ("general", error_type)
 async def _retry_with_events(
     operation_func,
     state: RunState,
@@ -259,10 +310,10 @@ def make_litellm_provider(
         async def get_completion(
             self, state: RunState[Ctx], agent: Agent[Ctx, Any], config: RunConfig[Ctx]
         ) -> Dict[str, Any]:
-            """Get completion from the model."""
+            """Get completion from the model with fallback support."""
-            # Determine model to use
-            model = config.model_override or (
+            # Determine initial model to use
+            primary_model = config.model_override or (
                 agent.model_config.name if agent.model_config else "gpt-4o"
             )
@@ -277,10 +328,10 @@ def make_litellm_provider(
             )
             if has_image_content:
-                supports_vision = await _is_vision_model(model, base_url)
+                supports_vision = await _is_vision_model(primary_model, base_url)
                 if not supports_vision:
                     raise ValueError(
-                        f"Model {model} does not support vision capabilities. "
+                        f"Model {primary_model} does not support vision capabilities. "
                         f"Please use a vision-capable model like gpt-4o, claude-3-5-sonnet, or gemini-1.5-pro."
                     )
@@ -322,39 +373,123 @@ def make_litellm_provider(
                 last_message.role == ContentRole.TOOL or last_message.role == "tool"
             )
-            # Prepare request parameters
-            request_params = {"model": model, "messages": messages, "stream": False}
-            # Add optional parameters
-            if agent.model_config:
-                if agent.model_config.temperature is not None:
-                    request_params["temperature"] = agent.model_config.temperature
-                # Use agent's max_tokens if set, otherwise fall back to config's max_tokens
-                max_tokens = agent.model_config.max_tokens
-                if max_tokens is None:
-                    max_tokens = config.max_tokens
-                if max_tokens is not None:
-                    request_params["max_tokens"] = max_tokens
-            elif config.max_tokens is not None:
-                # No model_config but config has max_tokens
-                request_params["max_tokens"] = config.max_tokens
-            if tools:
-                request_params["tools"] = tools
-                # Always set tool_choice to auto when tools are available
-                request_params["tool_choice"] = "auto"
-            if agent.output_codec:
-                request_params["response_format"] = {"type": "json_object"}
+            # Helper function to make API call with a specific model
+            async def _make_completion_call(model_name: str) -> Dict[str, Any]:
+                # Prepare request parameters
+                request_params = {"model": model_name, "messages": messages, "stream": False}
+                # Add optional parameters
+                if agent.model_config:
+                    if agent.model_config.temperature is not None:
+                        request_params["temperature"] = agent.model_config.temperature
+                    # Use agent's max_tokens if set, otherwise fall back to config's max_tokens
+                    max_tokens = agent.model_config.max_tokens
+                    if max_tokens is None:
+                        max_tokens = config.max_tokens
+                    if max_tokens is not None:
+                        request_params["max_tokens"] = max_tokens
+                elif config.max_tokens is not None:
+                    # No model_config but config has max_tokens
+                    request_params["max_tokens"] = config.max_tokens
+                if tools:
+                    request_params["tools"] = tools
+                    # Always set tool_choice to auto when tools are available
+                    request_params["tool_choice"] = "auto"
+                if agent.output_codec:
+                    request_params["response_format"] = {"type": "json_object"}
+                # Make the API call with retry handling
+                async def _api_call():
+                    return await self.client.chat.completions.create(**request_params)
+                # Use retry wrapper to track retries in Langfuse
+                return await _retry_with_events(
+                    _api_call,
+                    state,
+                    config,
+                    operation_name="llm_call",
+                    max_retries=3,
+                    backoff_factor=1.0,
+                )
-            # Make the API call with retry handling
-            async def _api_call():
-                return await self.client.chat.completions.create(**request_params)
+            # Try primary model first
+            last_exception = None
+            current_model = primary_model
+            try:
+                response = await _make_completion_call(current_model)
+            except Exception as e:
+                last_exception = e
+                # Classify the error to determine which fallback list to use
+                fallback_type, reason = _classify_error_for_fallback(e)
+                # Determine which fallback list to use
+                fallback_models = []
+                if fallback_type == "content_policy" and config.content_policy_fallbacks:
+                    fallback_models = config.content_policy_fallbacks
+                elif fallback_type == "context_window" and config.context_window_fallbacks:
+                    fallback_models = config.context_window_fallbacks
+                elif config.fallbacks:
+                    fallback_models = config.fallbacks
+                # Try fallback models
+                if fallback_models:
+                    print(
+                        f"[JAF:FALLBACK] Primary model '{current_model}' failed with {reason}. "
+                        f"Trying {len(fallback_models)} fallback model(s)..."
+                    )
-            # Use retry wrapper to track retries in Langfuse
-            response = await _retry_with_events(
-                _api_call, state, config, operation_name="llm_call", max_retries=3, backoff_factor=1.0
-            )
+                    for i, fallback_model in enumerate(fallback_models, 1):
+                        try:
+                            # Emit fallback event
+                            if config.on_event:
+                                fallback_event = FallbackEvent(
+                                    data=FallbackEventData(
+                                        from_model=current_model,
+                                        to_model=fallback_model,
+                                        reason=reason,
+                                        fallback_type=fallback_type,
+                                        attempt=i,
+                                        trace_id=state.trace_id,
+                                        run_id=state.run_id,
+                                        error_details={
+                                            "error_type": type(last_exception).__name__,
+                                            "error_message": str(last_exception),
+                                        },
+                                    )
+                                )
+                                config.on_event(fallback_event)
+                            print(
+                                f"[JAF:FALLBACK] Attempting fallback {i}/{len(fallback_models)}: {fallback_model}"
+                            )
+                            # Try the fallback model
+                            response = await _make_completion_call(fallback_model)
+                            current_model = fallback_model
+                            print(
+                                f"[JAF:FALLBACK] Successfully used fallback model: {fallback_model}"
+                            )
+                            break  # Success - exit the fallback loop
+                        except Exception as fallback_error:
+                            last_exception = fallback_error
+                            print(
+                                f"[JAF:FALLBACK] Fallback model '{fallback_model}' also failed: {fallback_error}"
+                            )
+                            # If this was the last fallback, re-raise
+                            if i == len(fallback_models):
+                                print(
+                                    f"[JAF:FALLBACK] All fallback models exhausted. Raising last exception."
+                                )
+                                raise
+                else:
+                    # No fallbacks configured, re-raise original exception
+                    raise
             # Return in the expected format that the engine expects
             choice = response.choices[0]
@@ -371,7 +506,7 @@ def make_litellm_provider(
                     for tc in choice.message.tool_calls
                 ]
-            # Extract usage data
+            # Extract usage data with detailed cache information
             usage_data = None
             if response.usage:
                 usage_data = {
@@ -380,6 +515,45 @@ def make_litellm_provider(
                     "total_tokens": response.usage.total_tokens,
                 }
+                # Extract cache-related fields if available (for prompt caching support)
+                if hasattr(response.usage, "cache_creation_input_tokens"):
+                    usage_data["cache_creation_input_tokens"] = (
+                        response.usage.cache_creation_input_tokens
+                    )
+                if hasattr(response.usage, "cache_read_input_tokens"):
+                    usage_data["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
+                # Extract detailed token breakdowns
+                if (
+                    hasattr(response.usage, "prompt_tokens_details")
+                    and response.usage.prompt_tokens_details
+                ):
+                    details = {}
+                    if hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
+                        details["cached_tokens"] = (
+                            response.usage.prompt_tokens_details.cached_tokens
+                        )
+                    if hasattr(response.usage.prompt_tokens_details, "audio_tokens"):
+                        details["audio_tokens"] = response.usage.prompt_tokens_details.audio_tokens
+                    if details:
+                        usage_data["prompt_tokens_details"] = details
+                if (
+                    hasattr(response.usage, "completion_tokens_details")
+                    and response.usage.completion_tokens_details
+                ):
+                    details = {}
+                    if hasattr(response.usage.completion_tokens_details, "reasoning_tokens"):
+                        details["reasoning_tokens"] = (
+                            response.usage.completion_tokens_details.reasoning_tokens
+                        )
+                    if hasattr(response.usage.completion_tokens_details, "audio_tokens"):
+                        details["audio_tokens"] = (
+                            response.usage.completion_tokens_details.audio_tokens
+                        )
+                    if details:
+                        usage_data["completion_tokens_details"] = details
             return {
                 "id": response.id,
                 "created": response.created,
@@ -688,7 +862,12 @@ def make_litellm_sdk_provider(
             # Use retry wrapper to track retries in Langfuse
             response = await _retry_with_events(
-                _api_call, state, config, operation_name="llm_call", max_retries=3, backoff_factor=1.0
+                _api_call,
+                state,
+                config,
+                operation_name="llm_call",
+                max_retries=3,
+                backoff_factor=1.0,
             )
             # Return in the expected format that the engine expects
@@ -706,23 +885,62 @@ def make_litellm_sdk_provider(
                     for tc in choice.message.tool_calls
                 ]
-            # Extract usage data - ALWAYS return a dict with defaults for Langfuse cost tracking
+            # Extract usage data with detailed cache information - ALWAYS return a dict with defaults for Langfuse cost tracking
             # Initialize with zeros as defensive default (matches AzureDirectProvider pattern)
             usage_data = {
                 "prompt_tokens": 0,
                 "completion_tokens": 0,
                 "total_tokens": 0,
             }
             actual_model = getattr(response, "model", model_name)
             if response.usage:
                 usage_data = {
                     "prompt_tokens": response.usage.prompt_tokens,
                     "completion_tokens": response.usage.completion_tokens,
                     "total_tokens": response.usage.total_tokens,
                 }
+                # Extract cache-related fields if available (for prompt caching support)
+                if hasattr(response.usage, "cache_creation_input_tokens"):
+                    usage_data["cache_creation_input_tokens"] = (
+                        response.usage.cache_creation_input_tokens
+                    )
+                if hasattr(response.usage, "cache_read_input_tokens"):
+                    usage_data["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
+                # Extract detailed token breakdowns
+                if (
+                    hasattr(response.usage, "prompt_tokens_details")
+                    and response.usage.prompt_tokens_details
+                ):
+                    details = {}
+                    if hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
+                        details["cached_tokens"] = (
+                            response.usage.prompt_tokens_details.cached_tokens
+                        )
+                    if hasattr(response.usage.prompt_tokens_details, "audio_tokens"):
+                        details["audio_tokens"] = response.usage.prompt_tokens_details.audio_tokens
+                    if details:
+                        usage_data["prompt_tokens_details"] = details
+                if (
+                    hasattr(response.usage, "completion_tokens_details")
+                    and response.usage.completion_tokens_details
+                ):
+                    details = {}
+                    if hasattr(response.usage.completion_tokens_details, "reasoning_tokens"):
+                        details["reasoning_tokens"] = (
+                            response.usage.completion_tokens_details.reasoning_tokens
+                        )
+                    if hasattr(response.usage.completion_tokens_details, "audio_tokens"):
+                        details["audio_tokens"] = (
+                            response.usage.completion_tokens_details.audio_tokens
+                        )
+                    if details:
+                        usage_data["completion_tokens_details"] = details
             message_content = {
                 "content": choice.message.content,
                 "tool_calls": tool_calls,
@@ -730,7 +948,7 @@ def make_litellm_sdk_provider(
                 "_usage": usage_data,
                 "_model": actual_model,
             }
             return {
                 "id": response.id,
                 "created": response.created,
@@ -820,7 +1038,7 @@ def make_litellm_sdk_provider(
             # Stream using litellm
             stream = await litellm.acompletion(**request_params)
             accumulated_usage: Optional[Dict[str, int]] = None
             response_model: Optional[str] = None
@@ -829,15 +1047,15 @@ def make_litellm_sdk_provider(
                     # Best-effort extraction of raw for debugging
                     try:
                         raw_obj = chunk.model_dump() if hasattr(chunk, "model_dump") else None
                         # Capture usage from chunk if present
                         if raw_obj and "usage" in raw_obj and raw_obj["usage"]:
                             accumulated_usage = raw_obj["usage"]
                         # Capture model from chunk if present
                         if raw_obj and "model" in raw_obj and raw_obj["model"]:
                             response_model = raw_obj["model"]
                     except Exception as e:
                         raw_obj = None

{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: jaf-py
-Version: 2.6.3
+Version: 2.6.5
 Summary: A purely functional agent framework with immutable state and composable tools - Python implementation
 Author: JAF Contributors
 Maintainer: JAF Contributors
@@ -82,7 +82,7 @@ Dynamic: license-file
 <!-- ![JAF Banner](docs/cover.png) -->
-[![Version](https://img.shields.io/badge/version-2.6.3-blue.svg)](https://github.com/xynehq/jaf-py)
+[![Version](https://img.shields.io/badge/version-2.6.5-blue.svg)](https://github.com/xynehq/jaf-py)
 [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
 [![Docs](https://img.shields.io/badge/Docs-Live-brightgreen)](https://xynehq.github.io/jaf-py/)

{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-jaf/__init__.py,sha256=48U83mM6oMabMj8ZmPfEPn8YXnDKTvbN5ofHSJoJ5Lk,8652
+jaf/__init__.py,sha256=clE4UWW2Y5bty2ataCPqnL__bVP8HGO1EBIR1VYI9ZU,8652
 jaf/cli.py,sha256=EDMMA5uX0e3TUIedLdyP3p4Qy-aXADvpht3VgJPJagU,8299
 jaf/exceptions.py,sha256=FdLIw7bdCNtBYfqRyJBkRT4Z1vWuvkzrMqFiMAzjL8Y,9158
 jaf/a2a/__init__.py,sha256=r4W-WHZNjoxR8EQ0x41_rY3fl12OH5qcSn0KycXaKKU,7752
@@ -39,11 +39,11 @@ jaf/a2a/tests/test_integration.py,sha256=hfGAtwXOfV9OXrFgS94twMbzxMQ4Vfj0KYoNT5V
 jaf/a2a/tests/test_protocol.py,sha256=3Ov9fTqznDqJLg8PqY2oy9I2Tpvwv_N0aN-rpFpAmjM,22215
 jaf/a2a/tests/test_types.py,sha256=rSUhZmOQcFrgNiEg4hDCZwypj19h6mSamVapWkrzZWc,17329
 jaf/core/__init__.py,sha256=4IqKRspv8gvgAtbmvaMvUgYZB1fSIy3vsyCXkjF8PjU,2013
-jaf/core/agent_tool.py,sha256=gZje8_gZSaWCecySg2ZBK07RcD8bc2hxHsR4z87oKJE,12075
+jaf/core/agent_tool.py,sha256=bwYQtRK9YfwPM_3s2kjp3Vl-6vR64jUlOnviqM0Z5tM,15411
 jaf/core/analytics.py,sha256=ypdhllyOThXZB-TY_eR1t1n2qrnAVN7Ljb8PaOtJft0,23267
 jaf/core/checkpoint.py,sha256=O7mfi7gFOAUgJ3zHzgJsr11uzn-BU-Vj1iKyKjcirMk,8398
 jaf/core/composition.py,sha256=Tj0-FRTVWygmAfsBLld7pnZK4nrGMMBx2YYJW_KQPoo,25393
-jaf/core/engine.py,sha256=JqAPOll50FyU1kUelRCHu2_zGmkoO-a9edBJXefu_xs,71219
+jaf/core/engine.py,sha256=D_RtMWI43oSm7gK_J2kFRsJ2EJkHX4hMj0soUNXC92k,71179
 jaf/core/errors.py,sha256=iDw00o3WH0gHcenRcTj3QEbbloZVpgwnPij6mtaJJk4,5710
 jaf/core/guardrails.py,sha256=oPB7MpD3xWiCWoyaS-xQQp-glaPON7GNVrIL0h1Jefs,26931
 jaf/core/handoff.py,sha256=M7TQfd7BXuer1ZeRJ51nLsI55KifbM6faNtmA2Nsj3I,6196
@@ -56,8 +56,8 @@ jaf/core/state.py,sha256=fdWDc2DQ-o_g_8E4ibg2QM0Vad_XUique3a5iYBwGZo,9516
 jaf/core/streaming.py,sha256=5ntOtJrZVCHuGsygquyCLG2J5yuSxE6DN5OM-BrQiGw,16818
 jaf/core/tool_results.py,sha256=L9U3JDQAjAH5YR7iMpSxfVky2Nxo6FYQs4WE05RATaQ,11283
 jaf/core/tools.py,sha256=rHxzAfGVGpYk3YJKmrq3AQLW0oE3ACkiJBOwle2bLdc,15146
-jaf/core/tracing.py,sha256=4as-pBYrg2nYvP5kj0IOLopDeXCl2tXUPXA6tQrpYHA,57324
-jaf/core/types.py,sha256=MwHSXSamOz3QDjTEaOQzNqOMU1JxwFbHg8Fd9Xzw33Y,35576
+jaf/core/tracing.py,sha256=p5C7l0X1Is3cNjsINiEsUv01rnUFz9Z0lh4DFWRXsUE,59360
+jaf/core/types.py,sha256=lJXlkL55devvzbc5efT5FdQ_LX3JcsMWA10Hy8Cd5Qs,37015
 jaf/core/workflows.py,sha256=0825AoD1QwEiGAs5IRlWHmaKrjurx6xF7oDJR6POBsg,25651
 jaf/memory/__init__.py,sha256=YfANOg5vUFSPVG7gpBE4_lYkV5X3_U6Yj9v1_QexfN0,1396
 jaf/memory/approval_storage.py,sha256=DcwtERcoIMH7B-abK9hqND3Moz4zSETsPlgJNkvqcaM,10573
@@ -75,7 +75,7 @@ jaf/policies/handoff.py,sha256=3lPegkSV_2LUf6jEZnj68_g3XUGFB_Fsj1C_6Svr2Kg,8128
 jaf/policies/validation.py,sha256=-zhB5ysH0Y4JnstHzo3I8tt-PFB9FSHBwSUALITBxw4,11016
 jaf/providers/__init__.py,sha256=PfIQkCtXb_yiTEjqs5msGv5-a6De2ujFCEaDGJEe_TQ,2100
 jaf/providers/mcp.py,sha256=fGfrlYx5g7ZX1fBUkPmAYSePKrCc4pG_HKngV_QCdRU,13148
-jaf/providers/model.py,sha256=4RSjBUpmpkU4JePwjbVd3WlXdBDoU1w_n1VLVQSPL9Q,48591
+jaf/providers/model.py,sha256=FCnenKOLwh5JJ8hcXy7pemJb32EO0uvoww5ZTqd4mlE,58619
 jaf/server/__init__.py,sha256=cYqdruJCJ3W1AMmmxMjAnDlj9gh3XbHhtegjq4nYRNY,391
 jaf/server/main.py,sha256=usdCRZfDP3GWQchh1o2tHd4KqTTFyQQCD9w4khd9rSo,2113
 jaf/server/server.py,sha256=ZhZ2gmY10eQNaKUlE7ecMkrwMkYkAh-QgKdUJ2q7ktM,51532
@@ -89,9 +89,9 @@ jaf/visualization/functional_core.py,sha256=0Xs2R8ELADKNIgokcbjuxmWwxEyCH1yXIEdG
 jaf/visualization/graphviz.py,sha256=EwWVIRv8Z7gTiO5Spvcm-z_UUQ1oWNPRgdE33ZzFwx8,11569
 jaf/visualization/imperative_shell.py,sha256=N5lWzOLMIU_iCoy3n5WCg49eec8VxV8f7JIG6_wNtVw,2506
 jaf/visualization/types.py,sha256=90G8oClsFa_APqTuMrTW6KjD0oG9I4kVur773dXNW0E,1393
-jaf_py-2.6.3.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
-jaf_py-2.6.3.dist-info/METADATA,sha256=FxmX-n2tzG4xRB7ZDnzs_Veo1v_wHLN5SO0fBe0mJAM,27743
-jaf_py-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-jaf_py-2.6.3.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
-jaf_py-2.6.3.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
-jaf_py-2.6.3.dist-info/RECORD,,
+jaf_py-2.6.5.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
+jaf_py-2.6.5.dist-info/METADATA,sha256=sacV8SfppPc9buMj-yoaNvDqrtF7S-k9P51zRZqp6ls,27743
+jaf_py-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+jaf_py-2.6.5.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
+jaf_py-2.6.5.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
+jaf_py-2.6.5.dist-info/RECORD,,

{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

jaf-py 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl

jaf-py 2.6.3py3-none-any.whl → 2.6.5py3-none-any.whl