jaf-py 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jaf/__init__.py +1 -1
- jaf/core/agent_tool.py +64 -3
- jaf/core/engine.py +2 -2
- jaf/core/tracing.py +47 -11
- jaf/core/types.py +45 -2
- jaf/providers/model.py +265 -47
- {jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/METADATA +2 -2
- {jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/RECORD +12 -12
- {jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/WHEEL +0 -0
- {jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/entry_points.txt +0 -0
- {jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/licenses/LICENSE +0 -0
- {jaf_py-2.6.3.dist-info → jaf_py-2.6.5.dist-info}/top_level.txt +0 -0
jaf/__init__.py
CHANGED
jaf/core/agent_tool.py
CHANGED
|
@@ -8,7 +8,6 @@ by other agents, enabling hierarchical agent orchestration patterns.
|
|
|
8
8
|
import asyncio
|
|
9
9
|
import json
|
|
10
10
|
import inspect
|
|
11
|
-
import inspect
|
|
12
11
|
import contextvars
|
|
13
12
|
from typing import Any, Callable, Dict, List, Optional, Union, Awaitable, TypeVar, get_type_hints
|
|
14
13
|
|
|
@@ -181,11 +180,73 @@ def create_agent_tool(
|
|
|
181
180
|
# Session inheritance is configurable via preserve_session.
|
|
182
181
|
# - When True: inherit parent's conversation_id and memory (shared memory/session)
|
|
183
182
|
# - When False: do not inherit (ephemeral, per-invocation sub-agent run)
|
|
183
|
+
#
|
|
184
|
+
# Model selection for subagents:
|
|
185
|
+
# - If subagent has its own model_config, use that model AND create appropriate provider
|
|
186
|
+
# - If subagent has no model_config, inherit parent's model_override and provider
|
|
187
|
+
# This allows subagents to run on different models than the parent agent
|
|
188
|
+
subagent_model_override = None
|
|
189
|
+
subagent_model_provider = parent_config.model_provider
|
|
190
|
+
|
|
191
|
+
if agent.model_config and agent.model_config.name:
|
|
192
|
+
subagent_model_name = agent.model_config.name
|
|
193
|
+
# Subagent has explicit model_config - create appropriate provider for it
|
|
194
|
+
# Use model_override to force the subagent's model
|
|
195
|
+
subagent_model_override = subagent_model_name
|
|
196
|
+
|
|
197
|
+
# Create provider based on model type
|
|
198
|
+
import os
|
|
199
|
+
if subagent_model_name.startswith("azure/"):
|
|
200
|
+
try:
|
|
201
|
+
from jaf.providers import make_litellm_sdk_provider
|
|
202
|
+
azure_api_key = os.getenv("AZURE_API_KEY")
|
|
203
|
+
azure_api_base = os.getenv("AZURE_API_BASE")
|
|
204
|
+
azure_api_version = os.getenv("AZURE_API_VERSION")
|
|
205
|
+
subagent_model_provider = make_litellm_sdk_provider(
|
|
206
|
+
model=subagent_model_name,
|
|
207
|
+
api_key=azure_api_key,
|
|
208
|
+
base_url=azure_api_base,
|
|
209
|
+
api_version=azure_api_version,
|
|
210
|
+
)
|
|
211
|
+
except Exception as e:
|
|
212
|
+
# Fallback to parent provider if Azure provider creation fails
|
|
213
|
+
subagent_model_provider = parent_config.model_provider
|
|
214
|
+
elif subagent_model_name.startswith("vertex_ai/"):
|
|
215
|
+
try:
|
|
216
|
+
from jaf.providers import make_litellm_sdk_provider
|
|
217
|
+
vertex_project = os.getenv("VERTEXAI_PROJECT")
|
|
218
|
+
vertex_location = os.getenv("VERTEXAI_LOCATION")
|
|
219
|
+
if not vertex_project or not vertex_location:
|
|
220
|
+
raise ValueError(
|
|
221
|
+
"VERTEXAI_PROJECT and VERTEXAI_LOCATION environment variables are required for vertex_ai/ models"
|
|
222
|
+
)
|
|
223
|
+
subagent_model_provider = make_litellm_sdk_provider(
|
|
224
|
+
model=subagent_model_name,
|
|
225
|
+
vertex_project=vertex_project,
|
|
226
|
+
vertex_location=vertex_location,
|
|
227
|
+
)
|
|
228
|
+
except Exception:
|
|
229
|
+
subagent_model_provider = parent_config.model_provider
|
|
230
|
+
elif subagent_model_name.startswith("glm"):
|
|
231
|
+
try:
|
|
232
|
+
from jaf.providers import make_litellm_provider
|
|
233
|
+
subagent_model_provider = make_litellm_provider(
|
|
234
|
+
base_url=os.getenv("LITELLM_BASE_URL"),
|
|
235
|
+
api_key=os.getenv("LITELLM_KEY")
|
|
236
|
+
)
|
|
237
|
+
except Exception:
|
|
238
|
+
subagent_model_provider = parent_config.model_provider
|
|
239
|
+
# For other models, use parent's provider (may work or may not)
|
|
240
|
+
else:
|
|
241
|
+
# No subagent model_config - inherit from parent
|
|
242
|
+
subagent_model_override = parent_config.model_override
|
|
243
|
+
subagent_model_provider = parent_config.model_provider
|
|
244
|
+
|
|
184
245
|
sub_config = RunConfig(
|
|
185
246
|
agent_registry={agent.name: agent, **parent_config.agent_registry},
|
|
186
|
-
model_provider=
|
|
247
|
+
model_provider=subagent_model_provider,
|
|
187
248
|
max_turns=max_turns or parent_config.max_turns,
|
|
188
|
-
model_override=
|
|
249
|
+
model_override=subagent_model_override,
|
|
189
250
|
initial_input_guardrails=parent_config.initial_input_guardrails,
|
|
190
251
|
final_output_guardrails=parent_config.final_output_guardrails,
|
|
191
252
|
on_event=parent_config.on_event,
|
jaf/core/engine.py
CHANGED
|
@@ -704,7 +704,7 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
|
|
|
704
704
|
stream_usage = raw_chunk["usage"]
|
|
705
705
|
if not stream_model and "model" in raw_chunk and raw_chunk["model"]:
|
|
706
706
|
stream_model = raw_chunk["model"]
|
|
707
|
-
|
|
707
|
+
|
|
708
708
|
# Text deltas
|
|
709
709
|
delta_text = getattr(chunk, "delta", None)
|
|
710
710
|
if delta_text:
|
|
@@ -820,7 +820,7 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
|
|
|
820
820
|
llm_response["usage"] = stream_usage
|
|
821
821
|
if stream_model:
|
|
822
822
|
llm_response["model"] = stream_model
|
|
823
|
-
|
|
823
|
+
|
|
824
824
|
except Exception:
|
|
825
825
|
# Fallback to non-streaming on error
|
|
826
826
|
assistant_event_streamed = False
|
jaf/core/tracing.py
CHANGED
|
@@ -469,7 +469,7 @@ class LangfuseTraceCollector:
|
|
|
469
469
|
public_key=public_key,
|
|
470
470
|
secret_key=secret_key,
|
|
471
471
|
host=host,
|
|
472
|
-
release="jaf-py-v2.6.
|
|
472
|
+
release="jaf-py-v2.6.5",
|
|
473
473
|
httpx_client=client,
|
|
474
474
|
)
|
|
475
475
|
self._httpx_client = client
|
|
@@ -753,7 +753,9 @@ class LangfuseTraceCollector:
|
|
|
753
753
|
system_prompt = context.system_prompt
|
|
754
754
|
|
|
755
755
|
if system_prompt:
|
|
756
|
-
print(
|
|
756
|
+
print(
|
|
757
|
+
f"[LANGFUSE DEBUG] Extracted system_prompt: {system_prompt[:100] if isinstance(system_prompt, str) else system_prompt}..."
|
|
758
|
+
)
|
|
757
759
|
|
|
758
760
|
print(
|
|
759
761
|
f"[LANGFUSE DEBUG] Final extracted - user_query: {user_query}, user_id: {user_id}"
|
|
@@ -912,24 +914,24 @@ class LangfuseTraceCollector:
|
|
|
912
914
|
# End the generation
|
|
913
915
|
generation = self.active_spans[span_id]
|
|
914
916
|
|
|
915
|
-
|
|
916
917
|
choice = self._get_event_data(event, "choice", {})
|
|
917
918
|
usage = self._get_event_data(event, "usage", {})
|
|
918
919
|
model = self._get_event_data(event, "model", "unknown")
|
|
919
|
-
|
|
920
|
+
|
|
920
921
|
# Also try to get model from the choice if not at top level
|
|
921
922
|
if model == "unknown" and isinstance(choice, dict):
|
|
922
923
|
model = choice.get("model", "unknown")
|
|
923
|
-
|
|
924
|
+
|
|
924
925
|
print(f"[LANGFUSE] Extracted - model: '{model}', usage: {usage}")
|
|
925
|
-
|
|
926
|
-
# Convert to Langfuse
|
|
926
|
+
|
|
927
|
+
# Convert to Langfuse format with detailed cache information
|
|
927
928
|
langfuse_usage = None
|
|
928
929
|
if usage:
|
|
929
930
|
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
930
931
|
completion_tokens = usage.get("completion_tokens", 0)
|
|
931
932
|
total_tokens = usage.get("total_tokens", 0)
|
|
932
933
|
|
|
934
|
+
# Build detailed usage dict with cache information
|
|
933
935
|
langfuse_usage = {
|
|
934
936
|
"input": prompt_tokens,
|
|
935
937
|
"output": completion_tokens,
|
|
@@ -937,9 +939,40 @@ class LangfuseTraceCollector:
|
|
|
937
939
|
"unit": "TOKENS",
|
|
938
940
|
}
|
|
939
941
|
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
942
|
+
# Add cache-related fields if available (for prompt caching support)
|
|
943
|
+
if (
|
|
944
|
+
"cache_creation_input_tokens" in usage
|
|
945
|
+
and usage["cache_creation_input_tokens"]
|
|
946
|
+
):
|
|
947
|
+
langfuse_usage["cache_creation_input_tokens"] = usage[
|
|
948
|
+
"cache_creation_input_tokens"
|
|
949
|
+
]
|
|
950
|
+
if "cache_read_input_tokens" in usage and usage["cache_read_input_tokens"]:
|
|
951
|
+
langfuse_usage["cache_read_input_tokens"] = usage[
|
|
952
|
+
"cache_read_input_tokens"
|
|
953
|
+
]
|
|
954
|
+
|
|
955
|
+
# Add detailed token breakdowns if available
|
|
956
|
+
if "prompt_tokens_details" in usage and usage["prompt_tokens_details"]:
|
|
957
|
+
details = usage["prompt_tokens_details"]
|
|
958
|
+
if "cached_tokens" in details and details["cached_tokens"]:
|
|
959
|
+
langfuse_usage["input_cached_tokens"] = details["cached_tokens"]
|
|
960
|
+
if "audio_tokens" in details and details["audio_tokens"]:
|
|
961
|
+
langfuse_usage["input_audio_tokens"] = details["audio_tokens"]
|
|
962
|
+
|
|
963
|
+
if (
|
|
964
|
+
"completion_tokens_details" in usage
|
|
965
|
+
and usage["completion_tokens_details"]
|
|
966
|
+
):
|
|
967
|
+
details = usage["completion_tokens_details"]
|
|
968
|
+
if "reasoning_tokens" in details and details["reasoning_tokens"]:
|
|
969
|
+
langfuse_usage["output_reasoning_tokens"] = details[
|
|
970
|
+
"reasoning_tokens"
|
|
971
|
+
]
|
|
972
|
+
if "audio_tokens" in details and details["audio_tokens"]:
|
|
973
|
+
langfuse_usage["output_audio_tokens"] = details["audio_tokens"]
|
|
974
|
+
|
|
975
|
+
print(f"[LANGFUSE] Usage data with cache details: {langfuse_usage}")
|
|
943
976
|
|
|
944
977
|
# Include model information in the generation end - Langfuse will calculate costs automatically
|
|
945
978
|
# Use compatibility wrapper for ending spans/generations
|
|
@@ -1260,7 +1293,10 @@ def create_composite_trace_collector(
|
|
|
1260
1293
|
# Automatically add Langfuse collector if keys are configured
|
|
1261
1294
|
if os.getenv("LANGFUSE_PUBLIC_KEY") and os.getenv("LANGFUSE_SECRET_KEY"):
|
|
1262
1295
|
langfuse_collector = LangfuseTraceCollector(
|
|
1263
|
-
httpx_client=httpx_client,
|
|
1296
|
+
httpx_client=httpx_client,
|
|
1297
|
+
proxy=proxy,
|
|
1298
|
+
timeout=timeout,
|
|
1299
|
+
include_system_prompt=include_system_prompt,
|
|
1264
1300
|
)
|
|
1265
1301
|
collector_list.append(langfuse_collector)
|
|
1266
1302
|
|
jaf/core/types.py
CHANGED
|
@@ -1009,6 +1009,38 @@ class RetryEvent:
|
|
|
1009
1009
|
)
|
|
1010
1010
|
|
|
1011
1011
|
|
|
1012
|
+
@dataclass(frozen=True)
|
|
1013
|
+
class FallbackEventData:
|
|
1014
|
+
"""Data for model fallback events."""
|
|
1015
|
+
|
|
1016
|
+
from_model: str # Model that failed
|
|
1017
|
+
to_model: str # Fallback model being tried
|
|
1018
|
+
reason: str # Reason for fallback (e.g., "Content Policy Violation", "Context Window Exceeded", "Rate Limit")
|
|
1019
|
+
fallback_type: Literal["general", "content_policy", "context_window"] # Type of fallback
|
|
1020
|
+
attempt: int # Which fallback attempt this is (1-indexed)
|
|
1021
|
+
trace_id: TraceId
|
|
1022
|
+
run_id: RunId
|
|
1023
|
+
error_details: Optional[Dict[str, Any]] = None # Additional error context
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
@dataclass(frozen=True)
|
|
1027
|
+
class FallbackEvent:
|
|
1028
|
+
"""Event emitted when a model fallback occurs."""
|
|
1029
|
+
|
|
1030
|
+
type: Literal["fallback"] = "fallback"
|
|
1031
|
+
data: FallbackEventData = field(
|
|
1032
|
+
default_factory=lambda: FallbackEventData(
|
|
1033
|
+
from_model="",
|
|
1034
|
+
to_model="",
|
|
1035
|
+
reason="",
|
|
1036
|
+
fallback_type="general",
|
|
1037
|
+
attempt=1,
|
|
1038
|
+
trace_id=TraceId(""),
|
|
1039
|
+
run_id=RunId(""),
|
|
1040
|
+
)
|
|
1041
|
+
)
|
|
1042
|
+
|
|
1043
|
+
|
|
1012
1044
|
# Union type for all trace events
|
|
1013
1045
|
TraceEvent = Union[
|
|
1014
1046
|
RunStartEvent,
|
|
@@ -1024,6 +1056,7 @@ TraceEvent = Union[
|
|
|
1024
1056
|
HandoffEvent,
|
|
1025
1057
|
RunEndEvent,
|
|
1026
1058
|
RetryEvent,
|
|
1059
|
+
FallbackEvent,
|
|
1027
1060
|
]
|
|
1028
1061
|
|
|
1029
1062
|
|
|
@@ -1096,7 +1129,9 @@ class RunConfig(Generic[Ctx]):
|
|
|
1096
1129
|
agent_registry: Dict[str, Agent[Ctx, Any]]
|
|
1097
1130
|
model_provider: ModelProvider[Ctx]
|
|
1098
1131
|
max_turns: Optional[int] = 50
|
|
1099
|
-
max_tokens: Optional[int] =
|
|
1132
|
+
max_tokens: Optional[int] = (
|
|
1133
|
+
None # Default max_tokens for all agents (can be overridden per agent)
|
|
1134
|
+
)
|
|
1100
1135
|
model_override: Optional[str] = None
|
|
1101
1136
|
initial_input_guardrails: Optional[List[Guardrail]] = None
|
|
1102
1137
|
final_output_guardrails: Optional[List[Guardrail]] = None
|
|
@@ -1120,7 +1155,7 @@ class RunConfig(Generic[Ctx]):
|
|
|
1120
1155
|
[List[Message], RunState[Ctx]],
|
|
1121
1156
|
Union[List[Message], Awaitable[List[Message]]],
|
|
1122
1157
|
]
|
|
1123
|
-
] = None
|
|
1158
|
+
] = None
|
|
1124
1159
|
max_empty_response_retries: int = 3 # Maximum retries when LLM returns empty response
|
|
1125
1160
|
empty_response_retry_delay: float = (
|
|
1126
1161
|
1.0 # Initial delay in seconds before retrying empty response (uses exponential backoff)
|
|
@@ -1129,6 +1164,14 @@ class RunConfig(Generic[Ctx]):
|
|
|
1129
1164
|
prefer_streaming: Optional[bool] = (
|
|
1130
1165
|
None # Whether to prefer streaming responses. None (default) = use streaming if available, True = prefer streaming, False = disable streaming
|
|
1131
1166
|
)
|
|
1167
|
+
# Model fallback configuration
|
|
1168
|
+
fallbacks: Optional[List[str]] = None # List of fallback models to try if primary model fails
|
|
1169
|
+
content_policy_fallbacks: Optional[List[str]] = (
|
|
1170
|
+
None # Fallback models for content policy violations
|
|
1171
|
+
)
|
|
1172
|
+
context_window_fallbacks: Optional[List[str]] = (
|
|
1173
|
+
None # Fallback models for context window exceeded errors
|
|
1174
|
+
)
|
|
1132
1175
|
|
|
1133
1176
|
|
|
1134
1177
|
# Regeneration types for conversation management
|
jaf/providers/model.py
CHANGED
|
@@ -30,6 +30,8 @@ from ..core.types import (
|
|
|
30
30
|
get_text_content,
|
|
31
31
|
RetryEvent,
|
|
32
32
|
RetryEventData,
|
|
33
|
+
FallbackEvent,
|
|
34
|
+
FallbackEventData,
|
|
33
35
|
)
|
|
34
36
|
from ..core.proxy import ProxyConfig
|
|
35
37
|
from ..utils.document_processor import (
|
|
@@ -113,6 +115,55 @@ async def _is_vision_model(model: str, base_url: str) -> bool:
|
|
|
113
115
|
return is_known_vision_model
|
|
114
116
|
|
|
115
117
|
|
|
118
|
+
def _classify_error_for_fallback(e: Exception) -> tuple[str, str]:
|
|
119
|
+
"""
|
|
120
|
+
Classify an error to determine the fallback type and reason.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
e: Exception from model call
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Tuple of (fallback_type, reason)
|
|
127
|
+
"""
|
|
128
|
+
error_message = str(e).lower()
|
|
129
|
+
error_type = type(e).__name__
|
|
130
|
+
|
|
131
|
+
# Check for content policy violations
|
|
132
|
+
if (
|
|
133
|
+
"content" in error_message
|
|
134
|
+
and ("policy" in error_message or "filter" in error_message)
|
|
135
|
+
or "contentpolicyviolation" in error_type.lower()
|
|
136
|
+
or "content_filter" in error_message
|
|
137
|
+
or "safety" in error_message
|
|
138
|
+
):
|
|
139
|
+
return ("content_policy", "Content Policy Violation")
|
|
140
|
+
|
|
141
|
+
# Check for context window exceeded
|
|
142
|
+
if (
|
|
143
|
+
"context" in error_message
|
|
144
|
+
and "window" in error_message
|
|
145
|
+
or "too long" in error_message
|
|
146
|
+
or "maximum context" in error_message
|
|
147
|
+
or "contextwindowexceeded" in error_type.lower()
|
|
148
|
+
or "prompt is too long" in error_message
|
|
149
|
+
or "tokens" in error_message
|
|
150
|
+
and "limit" in error_message
|
|
151
|
+
):
|
|
152
|
+
return ("context_window", "Context Window Exceeded")
|
|
153
|
+
|
|
154
|
+
# Default to general fallback
|
|
155
|
+
if hasattr(e, "status_code"):
|
|
156
|
+
status_code = e.status_code
|
|
157
|
+
if status_code == 429:
|
|
158
|
+
return ("general", f"HTTP {status_code} - Rate Limit")
|
|
159
|
+
elif 500 <= status_code < 600:
|
|
160
|
+
return ("general", f"HTTP {status_code} - Server Error")
|
|
161
|
+
else:
|
|
162
|
+
return ("general", f"HTTP {status_code}")
|
|
163
|
+
|
|
164
|
+
return ("general", error_type)
|
|
165
|
+
|
|
166
|
+
|
|
116
167
|
async def _retry_with_events(
|
|
117
168
|
operation_func,
|
|
118
169
|
state: RunState,
|
|
@@ -259,10 +310,10 @@ def make_litellm_provider(
|
|
|
259
310
|
async def get_completion(
|
|
260
311
|
self, state: RunState[Ctx], agent: Agent[Ctx, Any], config: RunConfig[Ctx]
|
|
261
312
|
) -> Dict[str, Any]:
|
|
262
|
-
"""Get completion from the model."""
|
|
313
|
+
"""Get completion from the model with fallback support."""
|
|
263
314
|
|
|
264
|
-
# Determine model to use
|
|
265
|
-
|
|
315
|
+
# Determine initial model to use
|
|
316
|
+
primary_model = config.model_override or (
|
|
266
317
|
agent.model_config.name if agent.model_config else "gpt-4o"
|
|
267
318
|
)
|
|
268
319
|
|
|
@@ -277,10 +328,10 @@ def make_litellm_provider(
|
|
|
277
328
|
)
|
|
278
329
|
|
|
279
330
|
if has_image_content:
|
|
280
|
-
supports_vision = await _is_vision_model(
|
|
331
|
+
supports_vision = await _is_vision_model(primary_model, base_url)
|
|
281
332
|
if not supports_vision:
|
|
282
333
|
raise ValueError(
|
|
283
|
-
f"Model {
|
|
334
|
+
f"Model {primary_model} does not support vision capabilities. "
|
|
284
335
|
f"Please use a vision-capable model like gpt-4o, claude-3-5-sonnet, or gemini-1.5-pro."
|
|
285
336
|
)
|
|
286
337
|
|
|
@@ -322,39 +373,123 @@ def make_litellm_provider(
|
|
|
322
373
|
last_message.role == ContentRole.TOOL or last_message.role == "tool"
|
|
323
374
|
)
|
|
324
375
|
|
|
325
|
-
#
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
max_tokens =
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
376
|
+
# Helper function to make API call with a specific model
|
|
377
|
+
async def _make_completion_call(model_name: str) -> Dict[str, Any]:
|
|
378
|
+
# Prepare request parameters
|
|
379
|
+
request_params = {"model": model_name, "messages": messages, "stream": False}
|
|
380
|
+
|
|
381
|
+
# Add optional parameters
|
|
382
|
+
if agent.model_config:
|
|
383
|
+
if agent.model_config.temperature is not None:
|
|
384
|
+
request_params["temperature"] = agent.model_config.temperature
|
|
385
|
+
# Use agent's max_tokens if set, otherwise fall back to config's max_tokens
|
|
386
|
+
max_tokens = agent.model_config.max_tokens
|
|
387
|
+
if max_tokens is None:
|
|
388
|
+
max_tokens = config.max_tokens
|
|
389
|
+
if max_tokens is not None:
|
|
390
|
+
request_params["max_tokens"] = max_tokens
|
|
391
|
+
elif config.max_tokens is not None:
|
|
392
|
+
# No model_config but config has max_tokens
|
|
393
|
+
request_params["max_tokens"] = config.max_tokens
|
|
394
|
+
|
|
395
|
+
if tools:
|
|
396
|
+
request_params["tools"] = tools
|
|
397
|
+
# Always set tool_choice to auto when tools are available
|
|
398
|
+
request_params["tool_choice"] = "auto"
|
|
399
|
+
|
|
400
|
+
if agent.output_codec:
|
|
401
|
+
request_params["response_format"] = {"type": "json_object"}
|
|
402
|
+
|
|
403
|
+
# Make the API call with retry handling
|
|
404
|
+
async def _api_call():
|
|
405
|
+
return await self.client.chat.completions.create(**request_params)
|
|
406
|
+
|
|
407
|
+
# Use retry wrapper to track retries in Langfuse
|
|
408
|
+
return await _retry_with_events(
|
|
409
|
+
_api_call,
|
|
410
|
+
state,
|
|
411
|
+
config,
|
|
412
|
+
operation_name="llm_call",
|
|
413
|
+
max_retries=3,
|
|
414
|
+
backoff_factor=1.0,
|
|
415
|
+
)
|
|
349
416
|
|
|
350
|
-
#
|
|
351
|
-
|
|
352
|
-
|
|
417
|
+
# Try primary model first
|
|
418
|
+
last_exception = None
|
|
419
|
+
current_model = primary_model
|
|
420
|
+
|
|
421
|
+
try:
|
|
422
|
+
response = await _make_completion_call(current_model)
|
|
423
|
+
except Exception as e:
|
|
424
|
+
last_exception = e
|
|
425
|
+
|
|
426
|
+
# Classify the error to determine which fallback list to use
|
|
427
|
+
fallback_type, reason = _classify_error_for_fallback(e)
|
|
428
|
+
|
|
429
|
+
# Determine which fallback list to use
|
|
430
|
+
fallback_models = []
|
|
431
|
+
if fallback_type == "content_policy" and config.content_policy_fallbacks:
|
|
432
|
+
fallback_models = config.content_policy_fallbacks
|
|
433
|
+
elif fallback_type == "context_window" and config.context_window_fallbacks:
|
|
434
|
+
fallback_models = config.context_window_fallbacks
|
|
435
|
+
elif config.fallbacks:
|
|
436
|
+
fallback_models = config.fallbacks
|
|
437
|
+
|
|
438
|
+
# Try fallback models
|
|
439
|
+
if fallback_models:
|
|
440
|
+
print(
|
|
441
|
+
f"[JAF:FALLBACK] Primary model '{current_model}' failed with {reason}. "
|
|
442
|
+
f"Trying {len(fallback_models)} fallback model(s)..."
|
|
443
|
+
)
|
|
353
444
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
445
|
+
for i, fallback_model in enumerate(fallback_models, 1):
|
|
446
|
+
try:
|
|
447
|
+
# Emit fallback event
|
|
448
|
+
if config.on_event:
|
|
449
|
+
fallback_event = FallbackEvent(
|
|
450
|
+
data=FallbackEventData(
|
|
451
|
+
from_model=current_model,
|
|
452
|
+
to_model=fallback_model,
|
|
453
|
+
reason=reason,
|
|
454
|
+
fallback_type=fallback_type,
|
|
455
|
+
attempt=i,
|
|
456
|
+
trace_id=state.trace_id,
|
|
457
|
+
run_id=state.run_id,
|
|
458
|
+
error_details={
|
|
459
|
+
"error_type": type(last_exception).__name__,
|
|
460
|
+
"error_message": str(last_exception),
|
|
461
|
+
},
|
|
462
|
+
)
|
|
463
|
+
)
|
|
464
|
+
config.on_event(fallback_event)
|
|
465
|
+
|
|
466
|
+
print(
|
|
467
|
+
f"[JAF:FALLBACK] Attempting fallback {i}/{len(fallback_models)}: {fallback_model}"
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# Try the fallback model
|
|
471
|
+
response = await _make_completion_call(fallback_model)
|
|
472
|
+
current_model = fallback_model
|
|
473
|
+
print(
|
|
474
|
+
f"[JAF:FALLBACK] Successfully used fallback model: {fallback_model}"
|
|
475
|
+
)
|
|
476
|
+
break # Success - exit the fallback loop
|
|
477
|
+
|
|
478
|
+
except Exception as fallback_error:
|
|
479
|
+
last_exception = fallback_error
|
|
480
|
+
print(
|
|
481
|
+
f"[JAF:FALLBACK] Fallback model '{fallback_model}' also failed: {fallback_error}"
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# If this was the last fallback, re-raise
|
|
485
|
+
if i == len(fallback_models):
|
|
486
|
+
print(
|
|
487
|
+
f"[JAF:FALLBACK] All fallback models exhausted. Raising last exception."
|
|
488
|
+
)
|
|
489
|
+
raise
|
|
490
|
+
else:
|
|
491
|
+
# No fallbacks configured, re-raise original exception
|
|
492
|
+
raise
|
|
358
493
|
|
|
359
494
|
# Return in the expected format that the engine expects
|
|
360
495
|
choice = response.choices[0]
|
|
@@ -371,7 +506,7 @@ def make_litellm_provider(
|
|
|
371
506
|
for tc in choice.message.tool_calls
|
|
372
507
|
]
|
|
373
508
|
|
|
374
|
-
# Extract usage data
|
|
509
|
+
# Extract usage data with detailed cache information
|
|
375
510
|
usage_data = None
|
|
376
511
|
if response.usage:
|
|
377
512
|
usage_data = {
|
|
@@ -380,6 +515,45 @@ def make_litellm_provider(
|
|
|
380
515
|
"total_tokens": response.usage.total_tokens,
|
|
381
516
|
}
|
|
382
517
|
|
|
518
|
+
# Extract cache-related fields if available (for prompt caching support)
|
|
519
|
+
if hasattr(response.usage, "cache_creation_input_tokens"):
|
|
520
|
+
usage_data["cache_creation_input_tokens"] = (
|
|
521
|
+
response.usage.cache_creation_input_tokens
|
|
522
|
+
)
|
|
523
|
+
if hasattr(response.usage, "cache_read_input_tokens"):
|
|
524
|
+
usage_data["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
|
|
525
|
+
|
|
526
|
+
# Extract detailed token breakdowns
|
|
527
|
+
if (
|
|
528
|
+
hasattr(response.usage, "prompt_tokens_details")
|
|
529
|
+
and response.usage.prompt_tokens_details
|
|
530
|
+
):
|
|
531
|
+
details = {}
|
|
532
|
+
if hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
|
|
533
|
+
details["cached_tokens"] = (
|
|
534
|
+
response.usage.prompt_tokens_details.cached_tokens
|
|
535
|
+
)
|
|
536
|
+
if hasattr(response.usage.prompt_tokens_details, "audio_tokens"):
|
|
537
|
+
details["audio_tokens"] = response.usage.prompt_tokens_details.audio_tokens
|
|
538
|
+
if details:
|
|
539
|
+
usage_data["prompt_tokens_details"] = details
|
|
540
|
+
|
|
541
|
+
if (
|
|
542
|
+
hasattr(response.usage, "completion_tokens_details")
|
|
543
|
+
and response.usage.completion_tokens_details
|
|
544
|
+
):
|
|
545
|
+
details = {}
|
|
546
|
+
if hasattr(response.usage.completion_tokens_details, "reasoning_tokens"):
|
|
547
|
+
details["reasoning_tokens"] = (
|
|
548
|
+
response.usage.completion_tokens_details.reasoning_tokens
|
|
549
|
+
)
|
|
550
|
+
if hasattr(response.usage.completion_tokens_details, "audio_tokens"):
|
|
551
|
+
details["audio_tokens"] = (
|
|
552
|
+
response.usage.completion_tokens_details.audio_tokens
|
|
553
|
+
)
|
|
554
|
+
if details:
|
|
555
|
+
usage_data["completion_tokens_details"] = details
|
|
556
|
+
|
|
383
557
|
return {
|
|
384
558
|
"id": response.id,
|
|
385
559
|
"created": response.created,
|
|
@@ -688,7 +862,12 @@ def make_litellm_sdk_provider(
|
|
|
688
862
|
|
|
689
863
|
# Use retry wrapper to track retries in Langfuse
|
|
690
864
|
response = await _retry_with_events(
|
|
691
|
-
_api_call,
|
|
865
|
+
_api_call,
|
|
866
|
+
state,
|
|
867
|
+
config,
|
|
868
|
+
operation_name="llm_call",
|
|
869
|
+
max_retries=3,
|
|
870
|
+
backoff_factor=1.0,
|
|
692
871
|
)
|
|
693
872
|
|
|
694
873
|
# Return in the expected format that the engine expects
|
|
@@ -706,23 +885,62 @@ def make_litellm_sdk_provider(
|
|
|
706
885
|
for tc in choice.message.tool_calls
|
|
707
886
|
]
|
|
708
887
|
|
|
709
|
-
# Extract usage data - ALWAYS return a dict with defaults for Langfuse cost tracking
|
|
888
|
+
# Extract usage data with detailed cache information - ALWAYS return a dict with defaults for Langfuse cost tracking
|
|
710
889
|
# Initialize with zeros as defensive default (matches AzureDirectProvider pattern)
|
|
711
890
|
usage_data = {
|
|
712
891
|
"prompt_tokens": 0,
|
|
713
892
|
"completion_tokens": 0,
|
|
714
893
|
"total_tokens": 0,
|
|
715
894
|
}
|
|
716
|
-
|
|
895
|
+
|
|
717
896
|
actual_model = getattr(response, "model", model_name)
|
|
718
|
-
|
|
897
|
+
|
|
719
898
|
if response.usage:
|
|
720
899
|
usage_data = {
|
|
721
900
|
"prompt_tokens": response.usage.prompt_tokens,
|
|
722
901
|
"completion_tokens": response.usage.completion_tokens,
|
|
723
902
|
"total_tokens": response.usage.total_tokens,
|
|
724
903
|
}
|
|
725
|
-
|
|
904
|
+
|
|
905
|
+
# Extract cache-related fields if available (for prompt caching support)
|
|
906
|
+
if hasattr(response.usage, "cache_creation_input_tokens"):
|
|
907
|
+
usage_data["cache_creation_input_tokens"] = (
|
|
908
|
+
response.usage.cache_creation_input_tokens
|
|
909
|
+
)
|
|
910
|
+
if hasattr(response.usage, "cache_read_input_tokens"):
|
|
911
|
+
usage_data["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
|
|
912
|
+
|
|
913
|
+
# Extract detailed token breakdowns
|
|
914
|
+
if (
|
|
915
|
+
hasattr(response.usage, "prompt_tokens_details")
|
|
916
|
+
and response.usage.prompt_tokens_details
|
|
917
|
+
):
|
|
918
|
+
details = {}
|
|
919
|
+
if hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
|
|
920
|
+
details["cached_tokens"] = (
|
|
921
|
+
response.usage.prompt_tokens_details.cached_tokens
|
|
922
|
+
)
|
|
923
|
+
if hasattr(response.usage.prompt_tokens_details, "audio_tokens"):
|
|
924
|
+
details["audio_tokens"] = response.usage.prompt_tokens_details.audio_tokens
|
|
925
|
+
if details:
|
|
926
|
+
usage_data["prompt_tokens_details"] = details
|
|
927
|
+
|
|
928
|
+
if (
|
|
929
|
+
hasattr(response.usage, "completion_tokens_details")
|
|
930
|
+
and response.usage.completion_tokens_details
|
|
931
|
+
):
|
|
932
|
+
details = {}
|
|
933
|
+
if hasattr(response.usage.completion_tokens_details, "reasoning_tokens"):
|
|
934
|
+
details["reasoning_tokens"] = (
|
|
935
|
+
response.usage.completion_tokens_details.reasoning_tokens
|
|
936
|
+
)
|
|
937
|
+
if hasattr(response.usage.completion_tokens_details, "audio_tokens"):
|
|
938
|
+
details["audio_tokens"] = (
|
|
939
|
+
response.usage.completion_tokens_details.audio_tokens
|
|
940
|
+
)
|
|
941
|
+
if details:
|
|
942
|
+
usage_data["completion_tokens_details"] = details
|
|
943
|
+
|
|
726
944
|
message_content = {
|
|
727
945
|
"content": choice.message.content,
|
|
728
946
|
"tool_calls": tool_calls,
|
|
@@ -730,7 +948,7 @@ def make_litellm_sdk_provider(
|
|
|
730
948
|
"_usage": usage_data,
|
|
731
949
|
"_model": actual_model,
|
|
732
950
|
}
|
|
733
|
-
|
|
951
|
+
|
|
734
952
|
return {
|
|
735
953
|
"id": response.id,
|
|
736
954
|
"created": response.created,
|
|
@@ -820,7 +1038,7 @@ def make_litellm_sdk_provider(
|
|
|
820
1038
|
|
|
821
1039
|
# Stream using litellm
|
|
822
1040
|
stream = await litellm.acompletion(**request_params)
|
|
823
|
-
|
|
1041
|
+
|
|
824
1042
|
accumulated_usage: Optional[Dict[str, int]] = None
|
|
825
1043
|
response_model: Optional[str] = None
|
|
826
1044
|
|
|
@@ -829,15 +1047,15 @@ def make_litellm_sdk_provider(
|
|
|
829
1047
|
# Best-effort extraction of raw for debugging
|
|
830
1048
|
try:
|
|
831
1049
|
raw_obj = chunk.model_dump() if hasattr(chunk, "model_dump") else None
|
|
832
|
-
|
|
1050
|
+
|
|
833
1051
|
# Capture usage from chunk if present
|
|
834
1052
|
if raw_obj and "usage" in raw_obj and raw_obj["usage"]:
|
|
835
1053
|
accumulated_usage = raw_obj["usage"]
|
|
836
|
-
|
|
1054
|
+
|
|
837
1055
|
# Capture model from chunk if present
|
|
838
1056
|
if raw_obj and "model" in raw_obj and raw_obj["model"]:
|
|
839
1057
|
response_model = raw_obj["model"]
|
|
840
|
-
|
|
1058
|
+
|
|
841
1059
|
except Exception as e:
|
|
842
1060
|
raw_obj = None
|
|
843
1061
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: jaf-py
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.5
|
|
4
4
|
Summary: A purely functional agent framework with immutable state and composable tools - Python implementation
|
|
5
5
|
Author: JAF Contributors
|
|
6
6
|
Maintainer: JAF Contributors
|
|
@@ -82,7 +82,7 @@ Dynamic: license-file
|
|
|
82
82
|
|
|
83
83
|
<!--  -->
|
|
84
84
|
|
|
85
|
-
[](https://github.com/xynehq/jaf-py)
|
|
86
86
|
[](https://www.python.org/)
|
|
87
87
|
[](https://xynehq.github.io/jaf-py/)
|
|
88
88
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
jaf/__init__.py,sha256=
|
|
1
|
+
jaf/__init__.py,sha256=clE4UWW2Y5bty2ataCPqnL__bVP8HGO1EBIR1VYI9ZU,8652
|
|
2
2
|
jaf/cli.py,sha256=EDMMA5uX0e3TUIedLdyP3p4Qy-aXADvpht3VgJPJagU,8299
|
|
3
3
|
jaf/exceptions.py,sha256=FdLIw7bdCNtBYfqRyJBkRT4Z1vWuvkzrMqFiMAzjL8Y,9158
|
|
4
4
|
jaf/a2a/__init__.py,sha256=r4W-WHZNjoxR8EQ0x41_rY3fl12OH5qcSn0KycXaKKU,7752
|
|
@@ -39,11 +39,11 @@ jaf/a2a/tests/test_integration.py,sha256=hfGAtwXOfV9OXrFgS94twMbzxMQ4Vfj0KYoNT5V
|
|
|
39
39
|
jaf/a2a/tests/test_protocol.py,sha256=3Ov9fTqznDqJLg8PqY2oy9I2Tpvwv_N0aN-rpFpAmjM,22215
|
|
40
40
|
jaf/a2a/tests/test_types.py,sha256=rSUhZmOQcFrgNiEg4hDCZwypj19h6mSamVapWkrzZWc,17329
|
|
41
41
|
jaf/core/__init__.py,sha256=4IqKRspv8gvgAtbmvaMvUgYZB1fSIy3vsyCXkjF8PjU,2013
|
|
42
|
-
jaf/core/agent_tool.py,sha256=
|
|
42
|
+
jaf/core/agent_tool.py,sha256=bwYQtRK9YfwPM_3s2kjp3Vl-6vR64jUlOnviqM0Z5tM,15411
|
|
43
43
|
jaf/core/analytics.py,sha256=ypdhllyOThXZB-TY_eR1t1n2qrnAVN7Ljb8PaOtJft0,23267
|
|
44
44
|
jaf/core/checkpoint.py,sha256=O7mfi7gFOAUgJ3zHzgJsr11uzn-BU-Vj1iKyKjcirMk,8398
|
|
45
45
|
jaf/core/composition.py,sha256=Tj0-FRTVWygmAfsBLld7pnZK4nrGMMBx2YYJW_KQPoo,25393
|
|
46
|
-
jaf/core/engine.py,sha256=
|
|
46
|
+
jaf/core/engine.py,sha256=D_RtMWI43oSm7gK_J2kFRsJ2EJkHX4hMj0soUNXC92k,71179
|
|
47
47
|
jaf/core/errors.py,sha256=iDw00o3WH0gHcenRcTj3QEbbloZVpgwnPij6mtaJJk4,5710
|
|
48
48
|
jaf/core/guardrails.py,sha256=oPB7MpD3xWiCWoyaS-xQQp-glaPON7GNVrIL0h1Jefs,26931
|
|
49
49
|
jaf/core/handoff.py,sha256=M7TQfd7BXuer1ZeRJ51nLsI55KifbM6faNtmA2Nsj3I,6196
|
|
@@ -56,8 +56,8 @@ jaf/core/state.py,sha256=fdWDc2DQ-o_g_8E4ibg2QM0Vad_XUique3a5iYBwGZo,9516
|
|
|
56
56
|
jaf/core/streaming.py,sha256=5ntOtJrZVCHuGsygquyCLG2J5yuSxE6DN5OM-BrQiGw,16818
|
|
57
57
|
jaf/core/tool_results.py,sha256=L9U3JDQAjAH5YR7iMpSxfVky2Nxo6FYQs4WE05RATaQ,11283
|
|
58
58
|
jaf/core/tools.py,sha256=rHxzAfGVGpYk3YJKmrq3AQLW0oE3ACkiJBOwle2bLdc,15146
|
|
59
|
-
jaf/core/tracing.py,sha256=
|
|
60
|
-
jaf/core/types.py,sha256=
|
|
59
|
+
jaf/core/tracing.py,sha256=p5C7l0X1Is3cNjsINiEsUv01rnUFz9Z0lh4DFWRXsUE,59360
|
|
60
|
+
jaf/core/types.py,sha256=lJXlkL55devvzbc5efT5FdQ_LX3JcsMWA10Hy8Cd5Qs,37015
|
|
61
61
|
jaf/core/workflows.py,sha256=0825AoD1QwEiGAs5IRlWHmaKrjurx6xF7oDJR6POBsg,25651
|
|
62
62
|
jaf/memory/__init__.py,sha256=YfANOg5vUFSPVG7gpBE4_lYkV5X3_U6Yj9v1_QexfN0,1396
|
|
63
63
|
jaf/memory/approval_storage.py,sha256=DcwtERcoIMH7B-abK9hqND3Moz4zSETsPlgJNkvqcaM,10573
|
|
@@ -75,7 +75,7 @@ jaf/policies/handoff.py,sha256=3lPegkSV_2LUf6jEZnj68_g3XUGFB_Fsj1C_6Svr2Kg,8128
|
|
|
75
75
|
jaf/policies/validation.py,sha256=-zhB5ysH0Y4JnstHzo3I8tt-PFB9FSHBwSUALITBxw4,11016
|
|
76
76
|
jaf/providers/__init__.py,sha256=PfIQkCtXb_yiTEjqs5msGv5-a6De2ujFCEaDGJEe_TQ,2100
|
|
77
77
|
jaf/providers/mcp.py,sha256=fGfrlYx5g7ZX1fBUkPmAYSePKrCc4pG_HKngV_QCdRU,13148
|
|
78
|
-
jaf/providers/model.py,sha256=
|
|
78
|
+
jaf/providers/model.py,sha256=FCnenKOLwh5JJ8hcXy7pemJb32EO0uvoww5ZTqd4mlE,58619
|
|
79
79
|
jaf/server/__init__.py,sha256=cYqdruJCJ3W1AMmmxMjAnDlj9gh3XbHhtegjq4nYRNY,391
|
|
80
80
|
jaf/server/main.py,sha256=usdCRZfDP3GWQchh1o2tHd4KqTTFyQQCD9w4khd9rSo,2113
|
|
81
81
|
jaf/server/server.py,sha256=ZhZ2gmY10eQNaKUlE7ecMkrwMkYkAh-QgKdUJ2q7ktM,51532
|
|
@@ -89,9 +89,9 @@ jaf/visualization/functional_core.py,sha256=0Xs2R8ELADKNIgokcbjuxmWwxEyCH1yXIEdG
|
|
|
89
89
|
jaf/visualization/graphviz.py,sha256=EwWVIRv8Z7gTiO5Spvcm-z_UUQ1oWNPRgdE33ZzFwx8,11569
|
|
90
90
|
jaf/visualization/imperative_shell.py,sha256=N5lWzOLMIU_iCoy3n5WCg49eec8VxV8f7JIG6_wNtVw,2506
|
|
91
91
|
jaf/visualization/types.py,sha256=90G8oClsFa_APqTuMrTW6KjD0oG9I4kVur773dXNW0E,1393
|
|
92
|
-
jaf_py-2.6.
|
|
93
|
-
jaf_py-2.6.
|
|
94
|
-
jaf_py-2.6.
|
|
95
|
-
jaf_py-2.6.
|
|
96
|
-
jaf_py-2.6.
|
|
97
|
-
jaf_py-2.6.
|
|
92
|
+
jaf_py-2.6.5.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
|
|
93
|
+
jaf_py-2.6.5.dist-info/METADATA,sha256=sacV8SfppPc9buMj-yoaNvDqrtF7S-k9P51zRZqp6ls,27743
|
|
94
|
+
jaf_py-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
95
|
+
jaf_py-2.6.5.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
|
|
96
|
+
jaf_py-2.6.5.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
|
|
97
|
+
jaf_py-2.6.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|