jaf-py 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jaf/__init__.py CHANGED
@@ -201,7 +201,7 @@ def generate_run_id() -> RunId:
201
201
  return create_run_id(str(uuid.uuid4()))
202
202
 
203
203
 
204
- __version__ = "2.6.3"
204
+ __version__ = "2.6.5"
205
205
  __all__ = [
206
206
  # Core types and functions
207
207
  "TraceId",
jaf/core/agent_tool.py CHANGED
@@ -8,7 +8,6 @@ by other agents, enabling hierarchical agent orchestration patterns.
8
8
  import asyncio
9
9
  import json
10
10
  import inspect
11
- import inspect
12
11
  import contextvars
13
12
  from typing import Any, Callable, Dict, List, Optional, Union, Awaitable, TypeVar, get_type_hints
14
13
 
@@ -181,11 +180,73 @@ def create_agent_tool(
181
180
  # Session inheritance is configurable via preserve_session.
182
181
  # - When True: inherit parent's conversation_id and memory (shared memory/session)
183
182
  # - When False: do not inherit (ephemeral, per-invocation sub-agent run)
183
+ #
184
+ # Model selection for subagents:
185
+ # - If subagent has its own model_config, use that model AND create appropriate provider
186
+ # - If subagent has no model_config, inherit parent's model_override and provider
187
+ # This allows subagents to run on different models than the parent agent
188
+ subagent_model_override = None
189
+ subagent_model_provider = parent_config.model_provider
190
+
191
+ if agent.model_config and agent.model_config.name:
192
+ subagent_model_name = agent.model_config.name
193
+ # Subagent has explicit model_config - create appropriate provider for it
194
+ # Use model_override to force the subagent's model
195
+ subagent_model_override = subagent_model_name
196
+
197
+ # Create provider based on model type
198
+ import os
199
+ if subagent_model_name.startswith("azure/"):
200
+ try:
201
+ from jaf.providers import make_litellm_sdk_provider
202
+ azure_api_key = os.getenv("AZURE_API_KEY")
203
+ azure_api_base = os.getenv("AZURE_API_BASE")
204
+ azure_api_version = os.getenv("AZURE_API_VERSION")
205
+ subagent_model_provider = make_litellm_sdk_provider(
206
+ model=subagent_model_name,
207
+ api_key=azure_api_key,
208
+ base_url=azure_api_base,
209
+ api_version=azure_api_version,
210
+ )
211
+ except Exception as e:
212
+ # Fallback to parent provider if Azure provider creation fails
213
+ subagent_model_provider = parent_config.model_provider
214
+ elif subagent_model_name.startswith("vertex_ai/"):
215
+ try:
216
+ from jaf.providers import make_litellm_sdk_provider
217
+ vertex_project = os.getenv("VERTEXAI_PROJECT")
218
+ vertex_location = os.getenv("VERTEXAI_LOCATION")
219
+ if not vertex_project or not vertex_location:
220
+ raise ValueError(
221
+ "VERTEXAI_PROJECT and VERTEXAI_LOCATION environment variables are required for vertex_ai/ models"
222
+ )
223
+ subagent_model_provider = make_litellm_sdk_provider(
224
+ model=subagent_model_name,
225
+ vertex_project=vertex_project,
226
+ vertex_location=vertex_location,
227
+ )
228
+ except Exception:
229
+ subagent_model_provider = parent_config.model_provider
230
+ elif subagent_model_name.startswith("glm"):
231
+ try:
232
+ from jaf.providers import make_litellm_provider
233
+ subagent_model_provider = make_litellm_provider(
234
+ base_url=os.getenv("LITELLM_BASE_URL"),
235
+ api_key=os.getenv("LITELLM_KEY")
236
+ )
237
+ except Exception:
238
+ subagent_model_provider = parent_config.model_provider
239
+ # For other models, use parent's provider (may work or may not)
240
+ else:
241
+ # No subagent model_config - inherit from parent
242
+ subagent_model_override = parent_config.model_override
243
+ subagent_model_provider = parent_config.model_provider
244
+
184
245
  sub_config = RunConfig(
185
246
  agent_registry={agent.name: agent, **parent_config.agent_registry},
186
- model_provider=parent_config.model_provider,
247
+ model_provider=subagent_model_provider,
187
248
  max_turns=max_turns or parent_config.max_turns,
188
- model_override=parent_config.model_override,
249
+ model_override=subagent_model_override,
189
250
  initial_input_guardrails=parent_config.initial_input_guardrails,
190
251
  final_output_guardrails=parent_config.final_output_guardrails,
191
252
  on_event=parent_config.on_event,
jaf/core/engine.py CHANGED
@@ -704,7 +704,7 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
704
704
  stream_usage = raw_chunk["usage"]
705
705
  if not stream_model and "model" in raw_chunk and raw_chunk["model"]:
706
706
  stream_model = raw_chunk["model"]
707
-
707
+
708
708
  # Text deltas
709
709
  delta_text = getattr(chunk, "delta", None)
710
710
  if delta_text:
@@ -820,7 +820,7 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
820
820
  llm_response["usage"] = stream_usage
821
821
  if stream_model:
822
822
  llm_response["model"] = stream_model
823
-
823
+
824
824
  except Exception:
825
825
  # Fallback to non-streaming on error
826
826
  assistant_event_streamed = False
jaf/core/tracing.py CHANGED
@@ -469,7 +469,7 @@ class LangfuseTraceCollector:
469
469
  public_key=public_key,
470
470
  secret_key=secret_key,
471
471
  host=host,
472
- release="jaf-py-v2.6.3",
472
+ release="jaf-py-v2.6.5",
473
473
  httpx_client=client,
474
474
  )
475
475
  self._httpx_client = client
@@ -753,7 +753,9 @@ class LangfuseTraceCollector:
753
753
  system_prompt = context.system_prompt
754
754
 
755
755
  if system_prompt:
756
- print(f"[LANGFUSE DEBUG] Extracted system_prompt: {system_prompt[:100] if isinstance(system_prompt, str) else system_prompt}...")
756
+ print(
757
+ f"[LANGFUSE DEBUG] Extracted system_prompt: {system_prompt[:100] if isinstance(system_prompt, str) else system_prompt}..."
758
+ )
757
759
 
758
760
  print(
759
761
  f"[LANGFUSE DEBUG] Final extracted - user_query: {user_query}, user_id: {user_id}"
@@ -912,24 +914,24 @@ class LangfuseTraceCollector:
912
914
  # End the generation
913
915
  generation = self.active_spans[span_id]
914
916
 
915
-
916
917
  choice = self._get_event_data(event, "choice", {})
917
918
  usage = self._get_event_data(event, "usage", {})
918
919
  model = self._get_event_data(event, "model", "unknown")
919
-
920
+
920
921
  # Also try to get model from the choice if not at top level
921
922
  if model == "unknown" and isinstance(choice, dict):
922
923
  model = choice.get("model", "unknown")
923
-
924
+
924
925
  print(f"[LANGFUSE] Extracted - model: '{model}', usage: {usage}")
925
-
926
- # Convert to Langfuse v2 format - let Langfuse handle cost calculation automatically
926
+
927
+ # Convert to Langfuse format with detailed cache information
927
928
  langfuse_usage = None
928
929
  if usage:
929
930
  prompt_tokens = usage.get("prompt_tokens", 0)
930
931
  completion_tokens = usage.get("completion_tokens", 0)
931
932
  total_tokens = usage.get("total_tokens", 0)
932
933
 
934
+ # Build detailed usage dict with cache information
933
935
  langfuse_usage = {
934
936
  "input": prompt_tokens,
935
937
  "output": completion_tokens,
@@ -937,9 +939,40 @@ class LangfuseTraceCollector:
937
939
  "unit": "TOKENS",
938
940
  }
939
941
 
940
- print(
941
- f"[LANGFUSE] Usage data for automatic cost calculation: {langfuse_usage}"
942
- )
942
+ # Add cache-related fields if available (for prompt caching support)
943
+ if (
944
+ "cache_creation_input_tokens" in usage
945
+ and usage["cache_creation_input_tokens"]
946
+ ):
947
+ langfuse_usage["cache_creation_input_tokens"] = usage[
948
+ "cache_creation_input_tokens"
949
+ ]
950
+ if "cache_read_input_tokens" in usage and usage["cache_read_input_tokens"]:
951
+ langfuse_usage["cache_read_input_tokens"] = usage[
952
+ "cache_read_input_tokens"
953
+ ]
954
+
955
+ # Add detailed token breakdowns if available
956
+ if "prompt_tokens_details" in usage and usage["prompt_tokens_details"]:
957
+ details = usage["prompt_tokens_details"]
958
+ if "cached_tokens" in details and details["cached_tokens"]:
959
+ langfuse_usage["input_cached_tokens"] = details["cached_tokens"]
960
+ if "audio_tokens" in details and details["audio_tokens"]:
961
+ langfuse_usage["input_audio_tokens"] = details["audio_tokens"]
962
+
963
+ if (
964
+ "completion_tokens_details" in usage
965
+ and usage["completion_tokens_details"]
966
+ ):
967
+ details = usage["completion_tokens_details"]
968
+ if "reasoning_tokens" in details and details["reasoning_tokens"]:
969
+ langfuse_usage["output_reasoning_tokens"] = details[
970
+ "reasoning_tokens"
971
+ ]
972
+ if "audio_tokens" in details and details["audio_tokens"]:
973
+ langfuse_usage["output_audio_tokens"] = details["audio_tokens"]
974
+
975
+ print(f"[LANGFUSE] Usage data with cache details: {langfuse_usage}")
943
976
 
944
977
  # Include model information in the generation end - Langfuse will calculate costs automatically
945
978
  # Use compatibility wrapper for ending spans/generations
@@ -1260,7 +1293,10 @@ def create_composite_trace_collector(
1260
1293
  # Automatically add Langfuse collector if keys are configured
1261
1294
  if os.getenv("LANGFUSE_PUBLIC_KEY") and os.getenv("LANGFUSE_SECRET_KEY"):
1262
1295
  langfuse_collector = LangfuseTraceCollector(
1263
- httpx_client=httpx_client, proxy=proxy, timeout=timeout, include_system_prompt=include_system_prompt
1296
+ httpx_client=httpx_client,
1297
+ proxy=proxy,
1298
+ timeout=timeout,
1299
+ include_system_prompt=include_system_prompt,
1264
1300
  )
1265
1301
  collector_list.append(langfuse_collector)
1266
1302
 
jaf/core/types.py CHANGED
@@ -1009,6 +1009,38 @@ class RetryEvent:
1009
1009
  )
1010
1010
 
1011
1011
 
1012
+ @dataclass(frozen=True)
1013
+ class FallbackEventData:
1014
+ """Data for model fallback events."""
1015
+
1016
+ from_model: str # Model that failed
1017
+ to_model: str # Fallback model being tried
1018
+ reason: str # Reason for fallback (e.g., "Content Policy Violation", "Context Window Exceeded", "Rate Limit")
1019
+ fallback_type: Literal["general", "content_policy", "context_window"] # Type of fallback
1020
+ attempt: int # Which fallback attempt this is (1-indexed)
1021
+ trace_id: TraceId
1022
+ run_id: RunId
1023
+ error_details: Optional[Dict[str, Any]] = None # Additional error context
1024
+
1025
+
1026
+ @dataclass(frozen=True)
1027
+ class FallbackEvent:
1028
+ """Event emitted when a model fallback occurs."""
1029
+
1030
+ type: Literal["fallback"] = "fallback"
1031
+ data: FallbackEventData = field(
1032
+ default_factory=lambda: FallbackEventData(
1033
+ from_model="",
1034
+ to_model="",
1035
+ reason="",
1036
+ fallback_type="general",
1037
+ attempt=1,
1038
+ trace_id=TraceId(""),
1039
+ run_id=RunId(""),
1040
+ )
1041
+ )
1042
+
1043
+
1012
1044
  # Union type for all trace events
1013
1045
  TraceEvent = Union[
1014
1046
  RunStartEvent,
@@ -1024,6 +1056,7 @@ TraceEvent = Union[
1024
1056
  HandoffEvent,
1025
1057
  RunEndEvent,
1026
1058
  RetryEvent,
1059
+ FallbackEvent,
1027
1060
  ]
1028
1061
 
1029
1062
 
@@ -1096,7 +1129,9 @@ class RunConfig(Generic[Ctx]):
1096
1129
  agent_registry: Dict[str, Agent[Ctx, Any]]
1097
1130
  model_provider: ModelProvider[Ctx]
1098
1131
  max_turns: Optional[int] = 50
1099
- max_tokens: Optional[int] = None # Default max_tokens for all agents (can be overridden per agent)
1132
+ max_tokens: Optional[int] = (
1133
+ None # Default max_tokens for all agents (can be overridden per agent)
1134
+ )
1100
1135
  model_override: Optional[str] = None
1101
1136
  initial_input_guardrails: Optional[List[Guardrail]] = None
1102
1137
  final_output_guardrails: Optional[List[Guardrail]] = None
@@ -1120,7 +1155,7 @@ class RunConfig(Generic[Ctx]):
1120
1155
  [List[Message], RunState[Ctx]],
1121
1156
  Union[List[Message], Awaitable[List[Message]]],
1122
1157
  ]
1123
- ] = None
1158
+ ] = None
1124
1159
  max_empty_response_retries: int = 3 # Maximum retries when LLM returns empty response
1125
1160
  empty_response_retry_delay: float = (
1126
1161
  1.0 # Initial delay in seconds before retrying empty response (uses exponential backoff)
@@ -1129,6 +1164,14 @@ class RunConfig(Generic[Ctx]):
1129
1164
  prefer_streaming: Optional[bool] = (
1130
1165
  None # Whether to prefer streaming responses. None (default) = use streaming if available, True = prefer streaming, False = disable streaming
1131
1166
  )
1167
+ # Model fallback configuration
1168
+ fallbacks: Optional[List[str]] = None # List of fallback models to try if primary model fails
1169
+ content_policy_fallbacks: Optional[List[str]] = (
1170
+ None # Fallback models for content policy violations
1171
+ )
1172
+ context_window_fallbacks: Optional[List[str]] = (
1173
+ None # Fallback models for context window exceeded errors
1174
+ )
1132
1175
 
1133
1176
 
1134
1177
  # Regeneration types for conversation management
jaf/providers/model.py CHANGED
@@ -30,6 +30,8 @@ from ..core.types import (
30
30
  get_text_content,
31
31
  RetryEvent,
32
32
  RetryEventData,
33
+ FallbackEvent,
34
+ FallbackEventData,
33
35
  )
34
36
  from ..core.proxy import ProxyConfig
35
37
  from ..utils.document_processor import (
@@ -113,6 +115,55 @@ async def _is_vision_model(model: str, base_url: str) -> bool:
113
115
  return is_known_vision_model
114
116
 
115
117
 
118
+ def _classify_error_for_fallback(e: Exception) -> tuple[str, str]:
119
+ """
120
+ Classify an error to determine the fallback type and reason.
121
+
122
+ Args:
123
+ e: Exception from model call
124
+
125
+ Returns:
126
+ Tuple of (fallback_type, reason)
127
+ """
128
+ error_message = str(e).lower()
129
+ error_type = type(e).__name__
130
+
131
+ # Check for content policy violations
132
+ if (
133
+ "content" in error_message
134
+ and ("policy" in error_message or "filter" in error_message)
135
+ or "contentpolicyviolation" in error_type.lower()
136
+ or "content_filter" in error_message
137
+ or "safety" in error_message
138
+ ):
139
+ return ("content_policy", "Content Policy Violation")
140
+
141
+ # Check for context window exceeded
142
+ if (
143
+ "context" in error_message
144
+ and "window" in error_message
145
+ or "too long" in error_message
146
+ or "maximum context" in error_message
147
+ or "contextwindowexceeded" in error_type.lower()
148
+ or "prompt is too long" in error_message
149
+ or "tokens" in error_message
150
+ and "limit" in error_message
151
+ ):
152
+ return ("context_window", "Context Window Exceeded")
153
+
154
+ # Default to general fallback
155
+ if hasattr(e, "status_code"):
156
+ status_code = e.status_code
157
+ if status_code == 429:
158
+ return ("general", f"HTTP {status_code} - Rate Limit")
159
+ elif 500 <= status_code < 600:
160
+ return ("general", f"HTTP {status_code} - Server Error")
161
+ else:
162
+ return ("general", f"HTTP {status_code}")
163
+
164
+ return ("general", error_type)
165
+
166
+
116
167
  async def _retry_with_events(
117
168
  operation_func,
118
169
  state: RunState,
@@ -259,10 +310,10 @@ def make_litellm_provider(
259
310
  async def get_completion(
260
311
  self, state: RunState[Ctx], agent: Agent[Ctx, Any], config: RunConfig[Ctx]
261
312
  ) -> Dict[str, Any]:
262
- """Get completion from the model."""
313
+ """Get completion from the model with fallback support."""
263
314
 
264
- # Determine model to use
265
- model = config.model_override or (
315
+ # Determine initial model to use
316
+ primary_model = config.model_override or (
266
317
  agent.model_config.name if agent.model_config else "gpt-4o"
267
318
  )
268
319
 
@@ -277,10 +328,10 @@ def make_litellm_provider(
277
328
  )
278
329
 
279
330
  if has_image_content:
280
- supports_vision = await _is_vision_model(model, base_url)
331
+ supports_vision = await _is_vision_model(primary_model, base_url)
281
332
  if not supports_vision:
282
333
  raise ValueError(
283
- f"Model {model} does not support vision capabilities. "
334
+ f"Model {primary_model} does not support vision capabilities. "
284
335
  f"Please use a vision-capable model like gpt-4o, claude-3-5-sonnet, or gemini-1.5-pro."
285
336
  )
286
337
 
@@ -322,39 +373,123 @@ def make_litellm_provider(
322
373
  last_message.role == ContentRole.TOOL or last_message.role == "tool"
323
374
  )
324
375
 
325
- # Prepare request parameters
326
- request_params = {"model": model, "messages": messages, "stream": False}
327
-
328
- # Add optional parameters
329
- if agent.model_config:
330
- if agent.model_config.temperature is not None:
331
- request_params["temperature"] = agent.model_config.temperature
332
- # Use agent's max_tokens if set, otherwise fall back to config's max_tokens
333
- max_tokens = agent.model_config.max_tokens
334
- if max_tokens is None:
335
- max_tokens = config.max_tokens
336
- if max_tokens is not None:
337
- request_params["max_tokens"] = max_tokens
338
- elif config.max_tokens is not None:
339
- # No model_config but config has max_tokens
340
- request_params["max_tokens"] = config.max_tokens
341
-
342
- if tools:
343
- request_params["tools"] = tools
344
- # Always set tool_choice to auto when tools are available
345
- request_params["tool_choice"] = "auto"
346
-
347
- if agent.output_codec:
348
- request_params["response_format"] = {"type": "json_object"}
376
+ # Helper function to make API call with a specific model
377
+ async def _make_completion_call(model_name: str) -> Dict[str, Any]:
378
+ # Prepare request parameters
379
+ request_params = {"model": model_name, "messages": messages, "stream": False}
380
+
381
+ # Add optional parameters
382
+ if agent.model_config:
383
+ if agent.model_config.temperature is not None:
384
+ request_params["temperature"] = agent.model_config.temperature
385
+ # Use agent's max_tokens if set, otherwise fall back to config's max_tokens
386
+ max_tokens = agent.model_config.max_tokens
387
+ if max_tokens is None:
388
+ max_tokens = config.max_tokens
389
+ if max_tokens is not None:
390
+ request_params["max_tokens"] = max_tokens
391
+ elif config.max_tokens is not None:
392
+ # No model_config but config has max_tokens
393
+ request_params["max_tokens"] = config.max_tokens
394
+
395
+ if tools:
396
+ request_params["tools"] = tools
397
+ # Always set tool_choice to auto when tools are available
398
+ request_params["tool_choice"] = "auto"
399
+
400
+ if agent.output_codec:
401
+ request_params["response_format"] = {"type": "json_object"}
402
+
403
+ # Make the API call with retry handling
404
+ async def _api_call():
405
+ return await self.client.chat.completions.create(**request_params)
406
+
407
+ # Use retry wrapper to track retries in Langfuse
408
+ return await _retry_with_events(
409
+ _api_call,
410
+ state,
411
+ config,
412
+ operation_name="llm_call",
413
+ max_retries=3,
414
+ backoff_factor=1.0,
415
+ )
349
416
 
350
- # Make the API call with retry handling
351
- async def _api_call():
352
- return await self.client.chat.completions.create(**request_params)
417
+ # Try primary model first
418
+ last_exception = None
419
+ current_model = primary_model
420
+
421
+ try:
422
+ response = await _make_completion_call(current_model)
423
+ except Exception as e:
424
+ last_exception = e
425
+
426
+ # Classify the error to determine which fallback list to use
427
+ fallback_type, reason = _classify_error_for_fallback(e)
428
+
429
+ # Determine which fallback list to use
430
+ fallback_models = []
431
+ if fallback_type == "content_policy" and config.content_policy_fallbacks:
432
+ fallback_models = config.content_policy_fallbacks
433
+ elif fallback_type == "context_window" and config.context_window_fallbacks:
434
+ fallback_models = config.context_window_fallbacks
435
+ elif config.fallbacks:
436
+ fallback_models = config.fallbacks
437
+
438
+ # Try fallback models
439
+ if fallback_models:
440
+ print(
441
+ f"[JAF:FALLBACK] Primary model '{current_model}' failed with {reason}. "
442
+ f"Trying {len(fallback_models)} fallback model(s)..."
443
+ )
353
444
 
354
- # Use retry wrapper to track retries in Langfuse
355
- response = await _retry_with_events(
356
- _api_call, state, config, operation_name="llm_call", max_retries=3, backoff_factor=1.0
357
- )
445
+ for i, fallback_model in enumerate(fallback_models, 1):
446
+ try:
447
+ # Emit fallback event
448
+ if config.on_event:
449
+ fallback_event = FallbackEvent(
450
+ data=FallbackEventData(
451
+ from_model=current_model,
452
+ to_model=fallback_model,
453
+ reason=reason,
454
+ fallback_type=fallback_type,
455
+ attempt=i,
456
+ trace_id=state.trace_id,
457
+ run_id=state.run_id,
458
+ error_details={
459
+ "error_type": type(last_exception).__name__,
460
+ "error_message": str(last_exception),
461
+ },
462
+ )
463
+ )
464
+ config.on_event(fallback_event)
465
+
466
+ print(
467
+ f"[JAF:FALLBACK] Attempting fallback {i}/{len(fallback_models)}: {fallback_model}"
468
+ )
469
+
470
+ # Try the fallback model
471
+ response = await _make_completion_call(fallback_model)
472
+ current_model = fallback_model
473
+ print(
474
+ f"[JAF:FALLBACK] Successfully used fallback model: {fallback_model}"
475
+ )
476
+ break # Success - exit the fallback loop
477
+
478
+ except Exception as fallback_error:
479
+ last_exception = fallback_error
480
+ print(
481
+ f"[JAF:FALLBACK] Fallback model '{fallback_model}' also failed: {fallback_error}"
482
+ )
483
+
484
+ # If this was the last fallback, re-raise
485
+ if i == len(fallback_models):
486
+ print(
487
+ f"[JAF:FALLBACK] All fallback models exhausted. Raising last exception."
488
+ )
489
+ raise
490
+ else:
491
+ # No fallbacks configured, re-raise original exception
492
+ raise
358
493
 
359
494
  # Return in the expected format that the engine expects
360
495
  choice = response.choices[0]
@@ -371,7 +506,7 @@ def make_litellm_provider(
371
506
  for tc in choice.message.tool_calls
372
507
  ]
373
508
 
374
- # Extract usage data
509
+ # Extract usage data with detailed cache information
375
510
  usage_data = None
376
511
  if response.usage:
377
512
  usage_data = {
@@ -380,6 +515,45 @@ def make_litellm_provider(
380
515
  "total_tokens": response.usage.total_tokens,
381
516
  }
382
517
 
518
+ # Extract cache-related fields if available (for prompt caching support)
519
+ if hasattr(response.usage, "cache_creation_input_tokens"):
520
+ usage_data["cache_creation_input_tokens"] = (
521
+ response.usage.cache_creation_input_tokens
522
+ )
523
+ if hasattr(response.usage, "cache_read_input_tokens"):
524
+ usage_data["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
525
+
526
+ # Extract detailed token breakdowns
527
+ if (
528
+ hasattr(response.usage, "prompt_tokens_details")
529
+ and response.usage.prompt_tokens_details
530
+ ):
531
+ details = {}
532
+ if hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
533
+ details["cached_tokens"] = (
534
+ response.usage.prompt_tokens_details.cached_tokens
535
+ )
536
+ if hasattr(response.usage.prompt_tokens_details, "audio_tokens"):
537
+ details["audio_tokens"] = response.usage.prompt_tokens_details.audio_tokens
538
+ if details:
539
+ usage_data["prompt_tokens_details"] = details
540
+
541
+ if (
542
+ hasattr(response.usage, "completion_tokens_details")
543
+ and response.usage.completion_tokens_details
544
+ ):
545
+ details = {}
546
+ if hasattr(response.usage.completion_tokens_details, "reasoning_tokens"):
547
+ details["reasoning_tokens"] = (
548
+ response.usage.completion_tokens_details.reasoning_tokens
549
+ )
550
+ if hasattr(response.usage.completion_tokens_details, "audio_tokens"):
551
+ details["audio_tokens"] = (
552
+ response.usage.completion_tokens_details.audio_tokens
553
+ )
554
+ if details:
555
+ usage_data["completion_tokens_details"] = details
556
+
383
557
  return {
384
558
  "id": response.id,
385
559
  "created": response.created,
@@ -688,7 +862,12 @@ def make_litellm_sdk_provider(
688
862
 
689
863
  # Use retry wrapper to track retries in Langfuse
690
864
  response = await _retry_with_events(
691
- _api_call, state, config, operation_name="llm_call", max_retries=3, backoff_factor=1.0
865
+ _api_call,
866
+ state,
867
+ config,
868
+ operation_name="llm_call",
869
+ max_retries=3,
870
+ backoff_factor=1.0,
692
871
  )
693
872
 
694
873
  # Return in the expected format that the engine expects
@@ -706,23 +885,62 @@ def make_litellm_sdk_provider(
706
885
  for tc in choice.message.tool_calls
707
886
  ]
708
887
 
709
- # Extract usage data - ALWAYS return a dict with defaults for Langfuse cost tracking
888
+ # Extract usage data with detailed cache information - ALWAYS return a dict with defaults for Langfuse cost tracking
710
889
  # Initialize with zeros as defensive default (matches AzureDirectProvider pattern)
711
890
  usage_data = {
712
891
  "prompt_tokens": 0,
713
892
  "completion_tokens": 0,
714
893
  "total_tokens": 0,
715
894
  }
716
-
895
+
717
896
  actual_model = getattr(response, "model", model_name)
718
-
897
+
719
898
  if response.usage:
720
899
  usage_data = {
721
900
  "prompt_tokens": response.usage.prompt_tokens,
722
901
  "completion_tokens": response.usage.completion_tokens,
723
902
  "total_tokens": response.usage.total_tokens,
724
903
  }
725
-
904
+
905
+ # Extract cache-related fields if available (for prompt caching support)
906
+ if hasattr(response.usage, "cache_creation_input_tokens"):
907
+ usage_data["cache_creation_input_tokens"] = (
908
+ response.usage.cache_creation_input_tokens
909
+ )
910
+ if hasattr(response.usage, "cache_read_input_tokens"):
911
+ usage_data["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
912
+
913
+ # Extract detailed token breakdowns
914
+ if (
915
+ hasattr(response.usage, "prompt_tokens_details")
916
+ and response.usage.prompt_tokens_details
917
+ ):
918
+ details = {}
919
+ if hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
920
+ details["cached_tokens"] = (
921
+ response.usage.prompt_tokens_details.cached_tokens
922
+ )
923
+ if hasattr(response.usage.prompt_tokens_details, "audio_tokens"):
924
+ details["audio_tokens"] = response.usage.prompt_tokens_details.audio_tokens
925
+ if details:
926
+ usage_data["prompt_tokens_details"] = details
927
+
928
+ if (
929
+ hasattr(response.usage, "completion_tokens_details")
930
+ and response.usage.completion_tokens_details
931
+ ):
932
+ details = {}
933
+ if hasattr(response.usage.completion_tokens_details, "reasoning_tokens"):
934
+ details["reasoning_tokens"] = (
935
+ response.usage.completion_tokens_details.reasoning_tokens
936
+ )
937
+ if hasattr(response.usage.completion_tokens_details, "audio_tokens"):
938
+ details["audio_tokens"] = (
939
+ response.usage.completion_tokens_details.audio_tokens
940
+ )
941
+ if details:
942
+ usage_data["completion_tokens_details"] = details
943
+
726
944
  message_content = {
727
945
  "content": choice.message.content,
728
946
  "tool_calls": tool_calls,
@@ -730,7 +948,7 @@ def make_litellm_sdk_provider(
730
948
  "_usage": usage_data,
731
949
  "_model": actual_model,
732
950
  }
733
-
951
+
734
952
  return {
735
953
  "id": response.id,
736
954
  "created": response.created,
@@ -820,7 +1038,7 @@ def make_litellm_sdk_provider(
820
1038
 
821
1039
  # Stream using litellm
822
1040
  stream = await litellm.acompletion(**request_params)
823
-
1041
+
824
1042
  accumulated_usage: Optional[Dict[str, int]] = None
825
1043
  response_model: Optional[str] = None
826
1044
 
@@ -829,15 +1047,15 @@ def make_litellm_sdk_provider(
829
1047
  # Best-effort extraction of raw for debugging
830
1048
  try:
831
1049
  raw_obj = chunk.model_dump() if hasattr(chunk, "model_dump") else None
832
-
1050
+
833
1051
  # Capture usage from chunk if present
834
1052
  if raw_obj and "usage" in raw_obj and raw_obj["usage"]:
835
1053
  accumulated_usage = raw_obj["usage"]
836
-
1054
+
837
1055
  # Capture model from chunk if present
838
1056
  if raw_obj and "model" in raw_obj and raw_obj["model"]:
839
1057
  response_model = raw_obj["model"]
840
-
1058
+
841
1059
  except Exception as e:
842
1060
  raw_obj = None
843
1061
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jaf-py
3
- Version: 2.6.3
3
+ Version: 2.6.5
4
4
  Summary: A purely functional agent framework with immutable state and composable tools - Python implementation
5
5
  Author: JAF Contributors
6
6
  Maintainer: JAF Contributors
@@ -82,7 +82,7 @@ Dynamic: license-file
82
82
 
83
83
  <!-- ![JAF Banner](docs/cover.png) -->
84
84
 
85
- [![Version](https://img.shields.io/badge/version-2.6.3-blue.svg)](https://github.com/xynehq/jaf-py)
85
+ [![Version](https://img.shields.io/badge/version-2.6.5-blue.svg)](https://github.com/xynehq/jaf-py)
86
86
  [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
87
87
  [![Docs](https://img.shields.io/badge/Docs-Live-brightgreen)](https://xynehq.github.io/jaf-py/)
88
88
 
@@ -1,4 +1,4 @@
1
- jaf/__init__.py,sha256=48U83mM6oMabMj8ZmPfEPn8YXnDKTvbN5ofHSJoJ5Lk,8652
1
+ jaf/__init__.py,sha256=clE4UWW2Y5bty2ataCPqnL__bVP8HGO1EBIR1VYI9ZU,8652
2
2
  jaf/cli.py,sha256=EDMMA5uX0e3TUIedLdyP3p4Qy-aXADvpht3VgJPJagU,8299
3
3
  jaf/exceptions.py,sha256=FdLIw7bdCNtBYfqRyJBkRT4Z1vWuvkzrMqFiMAzjL8Y,9158
4
4
  jaf/a2a/__init__.py,sha256=r4W-WHZNjoxR8EQ0x41_rY3fl12OH5qcSn0KycXaKKU,7752
@@ -39,11 +39,11 @@ jaf/a2a/tests/test_integration.py,sha256=hfGAtwXOfV9OXrFgS94twMbzxMQ4Vfj0KYoNT5V
39
39
  jaf/a2a/tests/test_protocol.py,sha256=3Ov9fTqznDqJLg8PqY2oy9I2Tpvwv_N0aN-rpFpAmjM,22215
40
40
  jaf/a2a/tests/test_types.py,sha256=rSUhZmOQcFrgNiEg4hDCZwypj19h6mSamVapWkrzZWc,17329
41
41
  jaf/core/__init__.py,sha256=4IqKRspv8gvgAtbmvaMvUgYZB1fSIy3vsyCXkjF8PjU,2013
42
- jaf/core/agent_tool.py,sha256=gZje8_gZSaWCecySg2ZBK07RcD8bc2hxHsR4z87oKJE,12075
42
+ jaf/core/agent_tool.py,sha256=bwYQtRK9YfwPM_3s2kjp3Vl-6vR64jUlOnviqM0Z5tM,15411
43
43
  jaf/core/analytics.py,sha256=ypdhllyOThXZB-TY_eR1t1n2qrnAVN7Ljb8PaOtJft0,23267
44
44
  jaf/core/checkpoint.py,sha256=O7mfi7gFOAUgJ3zHzgJsr11uzn-BU-Vj1iKyKjcirMk,8398
45
45
  jaf/core/composition.py,sha256=Tj0-FRTVWygmAfsBLld7pnZK4nrGMMBx2YYJW_KQPoo,25393
46
- jaf/core/engine.py,sha256=JqAPOll50FyU1kUelRCHu2_zGmkoO-a9edBJXefu_xs,71219
46
+ jaf/core/engine.py,sha256=D_RtMWI43oSm7gK_J2kFRsJ2EJkHX4hMj0soUNXC92k,71179
47
47
  jaf/core/errors.py,sha256=iDw00o3WH0gHcenRcTj3QEbbloZVpgwnPij6mtaJJk4,5710
48
48
  jaf/core/guardrails.py,sha256=oPB7MpD3xWiCWoyaS-xQQp-glaPON7GNVrIL0h1Jefs,26931
49
49
  jaf/core/handoff.py,sha256=M7TQfd7BXuer1ZeRJ51nLsI55KifbM6faNtmA2Nsj3I,6196
@@ -56,8 +56,8 @@ jaf/core/state.py,sha256=fdWDc2DQ-o_g_8E4ibg2QM0Vad_XUique3a5iYBwGZo,9516
56
56
  jaf/core/streaming.py,sha256=5ntOtJrZVCHuGsygquyCLG2J5yuSxE6DN5OM-BrQiGw,16818
57
57
  jaf/core/tool_results.py,sha256=L9U3JDQAjAH5YR7iMpSxfVky2Nxo6FYQs4WE05RATaQ,11283
58
58
  jaf/core/tools.py,sha256=rHxzAfGVGpYk3YJKmrq3AQLW0oE3ACkiJBOwle2bLdc,15146
59
- jaf/core/tracing.py,sha256=4as-pBYrg2nYvP5kj0IOLopDeXCl2tXUPXA6tQrpYHA,57324
60
- jaf/core/types.py,sha256=MwHSXSamOz3QDjTEaOQzNqOMU1JxwFbHg8Fd9Xzw33Y,35576
59
+ jaf/core/tracing.py,sha256=p5C7l0X1Is3cNjsINiEsUv01rnUFz9Z0lh4DFWRXsUE,59360
60
+ jaf/core/types.py,sha256=lJXlkL55devvzbc5efT5FdQ_LX3JcsMWA10Hy8Cd5Qs,37015
61
61
  jaf/core/workflows.py,sha256=0825AoD1QwEiGAs5IRlWHmaKrjurx6xF7oDJR6POBsg,25651
62
62
  jaf/memory/__init__.py,sha256=YfANOg5vUFSPVG7gpBE4_lYkV5X3_U6Yj9v1_QexfN0,1396
63
63
  jaf/memory/approval_storage.py,sha256=DcwtERcoIMH7B-abK9hqND3Moz4zSETsPlgJNkvqcaM,10573
@@ -75,7 +75,7 @@ jaf/policies/handoff.py,sha256=3lPegkSV_2LUf6jEZnj68_g3XUGFB_Fsj1C_6Svr2Kg,8128
75
75
  jaf/policies/validation.py,sha256=-zhB5ysH0Y4JnstHzo3I8tt-PFB9FSHBwSUALITBxw4,11016
76
76
  jaf/providers/__init__.py,sha256=PfIQkCtXb_yiTEjqs5msGv5-a6De2ujFCEaDGJEe_TQ,2100
77
77
  jaf/providers/mcp.py,sha256=fGfrlYx5g7ZX1fBUkPmAYSePKrCc4pG_HKngV_QCdRU,13148
78
- jaf/providers/model.py,sha256=4RSjBUpmpkU4JePwjbVd3WlXdBDoU1w_n1VLVQSPL9Q,48591
78
+ jaf/providers/model.py,sha256=FCnenKOLwh5JJ8hcXy7pemJb32EO0uvoww5ZTqd4mlE,58619
79
79
  jaf/server/__init__.py,sha256=cYqdruJCJ3W1AMmmxMjAnDlj9gh3XbHhtegjq4nYRNY,391
80
80
  jaf/server/main.py,sha256=usdCRZfDP3GWQchh1o2tHd4KqTTFyQQCD9w4khd9rSo,2113
81
81
  jaf/server/server.py,sha256=ZhZ2gmY10eQNaKUlE7ecMkrwMkYkAh-QgKdUJ2q7ktM,51532
@@ -89,9 +89,9 @@ jaf/visualization/functional_core.py,sha256=0Xs2R8ELADKNIgokcbjuxmWwxEyCH1yXIEdG
89
89
  jaf/visualization/graphviz.py,sha256=EwWVIRv8Z7gTiO5Spvcm-z_UUQ1oWNPRgdE33ZzFwx8,11569
90
90
  jaf/visualization/imperative_shell.py,sha256=N5lWzOLMIU_iCoy3n5WCg49eec8VxV8f7JIG6_wNtVw,2506
91
91
  jaf/visualization/types.py,sha256=90G8oClsFa_APqTuMrTW6KjD0oG9I4kVur773dXNW0E,1393
92
- jaf_py-2.6.3.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
93
- jaf_py-2.6.3.dist-info/METADATA,sha256=FxmX-n2tzG4xRB7ZDnzs_Veo1v_wHLN5SO0fBe0mJAM,27743
94
- jaf_py-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
95
- jaf_py-2.6.3.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
96
- jaf_py-2.6.3.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
97
- jaf_py-2.6.3.dist-info/RECORD,,
92
+ jaf_py-2.6.5.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
93
+ jaf_py-2.6.5.dist-info/METADATA,sha256=sacV8SfppPc9buMj-yoaNvDqrtF7S-k9P51zRZqp6ls,27743
94
+ jaf_py-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
95
+ jaf_py-2.6.5.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
96
+ jaf_py-2.6.5.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
97
+ jaf_py-2.6.5.dist-info/RECORD,,
File without changes