fred-runtime 2.0.1__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/PKG-INFO +1 -1
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/agent_app.py +297 -175
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/__init__.py +8 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/completion.py +1 -1
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/history_display.py +149 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/pod_client.py +35 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/repl.py +68 -53
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/repl_helpers.py +30 -16
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/client.py +8 -0
- fred_runtime-2.0.2/fred_runtime/eval/__init__.py +13 -0
- fred_runtime-2.0.2/fred_runtime/eval/collector.py +143 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/PKG-INFO +1 -1
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/SOURCES.txt +3 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/pyproject.toml +1 -1
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_client.py +6 -4
- fred_runtime-2.0.2/tests/test_eval_trace.py +314 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/README.md +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/_catalogs.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/config.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/config_loader.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/container.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/context.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/dependencies.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/mcp_config.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/observability_factory.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/openai_compat_router.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/entrypoint.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/kpi_display.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/url_helpers.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/context_aware_tool.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_base_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_fast_text_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_http_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_logs_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_markdown_media_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_vectorsearch_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_workspace_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_interceptors.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_runtime.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_toolkit.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_utils.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/structures.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/token_expiry.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/tool_node_utils.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/deep/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/deep/deep_runtime.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/graph/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/graph/graph_runtime.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/integrations/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/integrations/v2_runtime/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/integrations/v2_runtime/adapters.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/catalog.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/contracts.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/provider.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/resolver.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_langchain_adapter.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_message_codec.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_model_adapter.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_prompting.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_runtime.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_stream_adapter.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_binding.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_loop.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_rendering.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_resolution.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_utils.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tracing.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_context.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/checkpoints.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/model_metadata.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/request_context_helpers.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/sql_checkpointer.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/user_token_refresher.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/__init__.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/filesystem_context.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/tool_approval.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/tool_loop.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/dependency_links.txt +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/entry_points.txt +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/requires.txt +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/top_level.txt +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/setup.cfg +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_agent_app.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_config_loader.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_context.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_graph_runtime_observability.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_history.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_kf_workspace_client.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_kpi_display.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_mcp_config.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_openai_compat_router.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_smoke.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_url_helpers.py +0 -0
- {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_user_token_refresher.py +0 -0
|
@@ -62,6 +62,7 @@ from fred_core.logs.log_setup import log_setup
|
|
|
62
62
|
from fred_core.logs.memory_log_store import RamLogStore
|
|
63
63
|
from fred_core.security.oidc import get_keycloak_client_id, get_keycloak_url
|
|
64
64
|
from fred_core.security.structure import KeycloakUser
|
|
65
|
+
from fred_sdk.contracts.eval import EvalStep, EvalTrace
|
|
65
66
|
from fred_sdk.contracts.context import (
|
|
66
67
|
AgentInvocationRequest,
|
|
67
68
|
AgentInvocationResult,
|
|
@@ -558,8 +559,14 @@ class LocalRegistryAgentInvoker(AgentInvokerPort):
|
|
|
558
559
|
is_error=True,
|
|
559
560
|
)
|
|
560
561
|
|
|
561
|
-
|
|
562
|
-
|
|
562
|
+
context_dict = request.context.model_dump(mode="json")
|
|
563
|
+
context_dict.setdefault("execution_action", ExecutionGrantAction.EXECUTE.value)
|
|
564
|
+
execute_request = _AgentExecuteRequest.model_construct(
|
|
565
|
+
agent_id=request.agent_id,
|
|
566
|
+
agent_instance_id=None,
|
|
567
|
+
message=request.message,
|
|
568
|
+
context=context_dict,
|
|
569
|
+
resume_payload=None,
|
|
563
570
|
)
|
|
564
571
|
|
|
565
572
|
content_parts: list[str] = []
|
|
@@ -766,13 +773,6 @@ class _AgentExecuteRequest(BaseModel):
|
|
|
766
773
|
return self
|
|
767
774
|
|
|
768
775
|
|
|
769
|
-
@dataclass(slots=True)
|
|
770
|
-
class _PreparedRuntimeExecution:
|
|
771
|
-
runtime: ReActRuntime | GraphRuntime
|
|
772
|
-
execution_config: ExecutionConfig
|
|
773
|
-
executor_input: Any
|
|
774
|
-
|
|
775
|
-
|
|
776
776
|
def _to_internal_request(r: RuntimeExecuteRequest) -> "_AgentExecuteRequest":
|
|
777
777
|
"""
|
|
778
778
|
Bridge a public RuntimeExecuteRequest to the internal execution model.
|
|
@@ -797,36 +797,6 @@ def _to_internal_request(r: RuntimeExecuteRequest) -> "_AgentExecuteRequest":
|
|
|
797
797
|
)
|
|
798
798
|
|
|
799
799
|
|
|
800
|
-
def _build_runtime_execute_request_from_invocation(
|
|
801
|
-
request: AgentInvocationRequest,
|
|
802
|
-
) -> RuntimeExecuteRequest:
|
|
803
|
-
"""
|
|
804
|
-
Project one in-process agent invocation onto the public execute contract.
|
|
805
|
-
|
|
806
|
-
Why this exists:
|
|
807
|
-
- pod-local agent-to-agent calls should follow the same request projection
|
|
808
|
-
path as HTTP execution, rather than hand-constructing a second private
|
|
809
|
-
request shape
|
|
810
|
-
- future continuity fields should therefore land once on the typed runtime
|
|
811
|
-
contract, then flow through both local and remote invocation paths
|
|
812
|
-
|
|
813
|
-
How to use it:
|
|
814
|
-
- call from `LocalRegistryAgentInvoker.invoke(...)`
|
|
815
|
-
- pass the result through `_to_internal_request(...)` until the remaining
|
|
816
|
-
internal helpers consume `RuntimeExecuteRequest` directly
|
|
817
|
-
|
|
818
|
-
Example:
|
|
819
|
-
- `runtime_request = _build_runtime_execute_request_from_invocation(request)`
|
|
820
|
-
"""
|
|
821
|
-
|
|
822
|
-
return RuntimeExecuteRequest(
|
|
823
|
-
agent_id=request.agent_id,
|
|
824
|
-
input=request.message,
|
|
825
|
-
session_id=request.context.session_id,
|
|
826
|
-
runtime_context=request.context.model_dump(mode="json"),
|
|
827
|
-
)
|
|
828
|
-
|
|
829
|
-
|
|
830
800
|
class _AgentTemplateSummary(BaseModel):
|
|
831
801
|
template_agent_id: str
|
|
832
802
|
title: str
|
|
@@ -871,7 +841,7 @@ def _apply_runtime_tuning(
|
|
|
871
841
|
- `definition = _apply_runtime_tuning(template_definition, resolution.tuning)`
|
|
872
842
|
"""
|
|
873
843
|
|
|
874
|
-
update: dict[str,
|
|
844
|
+
update: dict[str, object] = {
|
|
875
845
|
"role": tuning.role,
|
|
876
846
|
"description": tuning.description,
|
|
877
847
|
"tags": tuple(tuning.tags),
|
|
@@ -881,11 +851,7 @@ def _apply_runtime_tuning(
|
|
|
881
851
|
),
|
|
882
852
|
}
|
|
883
853
|
system_prompt = tuning.values.get("prompts.system")
|
|
884
|
-
if (
|
|
885
|
-
isinstance(definition, ReActAgentDefinition)
|
|
886
|
-
and isinstance(system_prompt, str)
|
|
887
|
-
and system_prompt.strip()
|
|
888
|
-
):
|
|
854
|
+
if isinstance(system_prompt, str) and system_prompt.strip():
|
|
889
855
|
update["system_prompt_template"] = system_prompt
|
|
890
856
|
return definition.model_copy(update=update)
|
|
891
857
|
|
|
@@ -1416,6 +1382,120 @@ def _sse(payload: str) -> str:
|
|
|
1416
1382
|
return f"data: {payload}\n\n"
|
|
1417
1383
|
|
|
1418
1384
|
|
|
1385
|
+
@dataclass(frozen=True)
|
|
1386
|
+
class _TurnOutcome:
|
|
1387
|
+
model_name: str | None
|
|
1388
|
+
finish_reason: str
|
|
1389
|
+
token_usage: dict[str, Any] | None
|
|
1390
|
+
input_tokens: int | None
|
|
1391
|
+
output_tokens: int | None
|
|
1392
|
+
tool_count: int
|
|
1393
|
+
is_error: bool
|
|
1394
|
+
total_ms: int
|
|
1395
|
+
final_content: str | None
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
def _parse_turn_outcome(
|
|
1399
|
+
payloads: list[dict[str, Any]],
|
|
1400
|
+
turn_start: float,
|
|
1401
|
+
) -> _TurnOutcome:
|
|
1402
|
+
total_ms = int((time.monotonic() - turn_start) * 1000)
|
|
1403
|
+
tool_count = sum(1 for p in payloads if p.get("kind") == "tool_call")
|
|
1404
|
+
final = next((p for p in reversed(payloads) if p.get("kind") == "final"), None)
|
|
1405
|
+
is_error = any(p.get("kind") == "execution_error" for p in payloads)
|
|
1406
|
+
token_usage: dict[str, Any] | None = final.get("token_usage") if final else None
|
|
1407
|
+
return _TurnOutcome(
|
|
1408
|
+
model_name=final.get("model_name") if final else None,
|
|
1409
|
+
finish_reason="error"
|
|
1410
|
+
if is_error
|
|
1411
|
+
else ((final.get("finish_reason") or "") if final else ""),
|
|
1412
|
+
token_usage=token_usage,
|
|
1413
|
+
input_tokens=token_usage.get("input_tokens") if token_usage else None,
|
|
1414
|
+
output_tokens=token_usage.get("output_tokens") if token_usage else None,
|
|
1415
|
+
tool_count=tool_count,
|
|
1416
|
+
is_error=is_error,
|
|
1417
|
+
total_ms=total_ms,
|
|
1418
|
+
final_content=(final.get("content") or None) if final else None,
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
|
|
1422
|
+
def _build_eval_trace(
|
|
1423
|
+
payloads: list[dict[str, Any]],
|
|
1424
|
+
input_text: str,
|
|
1425
|
+
agent_id: str,
|
|
1426
|
+
session_id: str,
|
|
1427
|
+
turn_start: float,
|
|
1428
|
+
) -> EvalTrace:
|
|
1429
|
+
outcome = _parse_turn_outcome(payloads, turn_start)
|
|
1430
|
+
steps: list[EvalStep] = []
|
|
1431
|
+
retrieval_context: list[str] = []
|
|
1432
|
+
tools_called: list[str] = []
|
|
1433
|
+
error: str | None = None
|
|
1434
|
+
|
|
1435
|
+
for p in payloads:
|
|
1436
|
+
kind = p.get("kind")
|
|
1437
|
+
if kind == "tool_call":
|
|
1438
|
+
steps.append(
|
|
1439
|
+
EvalStep(
|
|
1440
|
+
kind="tool_call",
|
|
1441
|
+
tool_name=p.get("tool_name"),
|
|
1442
|
+
call_id=p.get("call_id"),
|
|
1443
|
+
arguments=p.get("arguments") or {},
|
|
1444
|
+
)
|
|
1445
|
+
)
|
|
1446
|
+
if p.get("tool_name"):
|
|
1447
|
+
tools_called.append(p["tool_name"])
|
|
1448
|
+
elif kind == "tool_result":
|
|
1449
|
+
content = p.get("content", "")
|
|
1450
|
+
is_err = p.get("is_error", False)
|
|
1451
|
+
steps.append(
|
|
1452
|
+
EvalStep(
|
|
1453
|
+
kind="tool_result",
|
|
1454
|
+
tool_name=p.get("tool_name"),
|
|
1455
|
+
call_id=p.get("call_id"),
|
|
1456
|
+
content=content,
|
|
1457
|
+
is_error=is_err,
|
|
1458
|
+
)
|
|
1459
|
+
)
|
|
1460
|
+
if not is_err:
|
|
1461
|
+
sources = p.get("sources") or []
|
|
1462
|
+
if sources:
|
|
1463
|
+
retrieval_context.extend(
|
|
1464
|
+
s["content"] for s in sources if s.get("content")
|
|
1465
|
+
)
|
|
1466
|
+
elif content:
|
|
1467
|
+
retrieval_context.append(content)
|
|
1468
|
+
elif kind == "final":
|
|
1469
|
+
steps.append(EvalStep(kind="final", content=p.get("content")))
|
|
1470
|
+
elif kind == "node_error":
|
|
1471
|
+
steps.append(
|
|
1472
|
+
EvalStep(
|
|
1473
|
+
kind="node_error",
|
|
1474
|
+
node_id=p.get("node_id"),
|
|
1475
|
+
error_message=p.get("error_message"),
|
|
1476
|
+
)
|
|
1477
|
+
)
|
|
1478
|
+
elif kind == "awaiting_human":
|
|
1479
|
+
steps.append(EvalStep(kind="awaiting_human"))
|
|
1480
|
+
elif kind == "execution_error":
|
|
1481
|
+
error = p.get("message")
|
|
1482
|
+
|
|
1483
|
+
return EvalTrace(
|
|
1484
|
+
session_id=session_id,
|
|
1485
|
+
agent_id=agent_id,
|
|
1486
|
+
input=input_text,
|
|
1487
|
+
output=outcome.final_content,
|
|
1488
|
+
error=error,
|
|
1489
|
+
latency_ms=outcome.total_ms,
|
|
1490
|
+
model_name=outcome.model_name,
|
|
1491
|
+
token_usage=outcome.token_usage,
|
|
1492
|
+
finish_reason=outcome.finish_reason or None,
|
|
1493
|
+
steps=tuple(steps),
|
|
1494
|
+
retrieval_context=tuple(retrieval_context),
|
|
1495
|
+
tools_called=tuple(tools_called),
|
|
1496
|
+
)
|
|
1497
|
+
|
|
1498
|
+
|
|
1419
1499
|
def _emit_turn_completed(
|
|
1420
1500
|
container: PodApplicationContext,
|
|
1421
1501
|
*,
|
|
@@ -1447,21 +1527,7 @@ def _emit_turn_completed(
|
|
|
1447
1527
|
"""
|
|
1448
1528
|
try:
|
|
1449
1529
|
kpi = get_runtime_context().get_kpi_writer()
|
|
1450
|
-
|
|
1451
|
-
tool_count = sum(1 for p in payloads if p.get("kind") == "tool_call")
|
|
1452
|
-
final = next((p for p in reversed(payloads) if p.get("kind") == "final"), None)
|
|
1453
|
-
is_error = any(p.get("kind") == "execution_error" for p in payloads)
|
|
1454
|
-
model_name: str | None = final.get("model_name") if final else None
|
|
1455
|
-
finish_reason: str = (
|
|
1456
|
-
"error" if is_error else (final.get("finish_reason") or "") if final else ""
|
|
1457
|
-
)
|
|
1458
|
-
token_usage: dict[str, Any] | None = final.get("token_usage") if final else None
|
|
1459
|
-
input_tokens: int | None = (
|
|
1460
|
-
token_usage.get("input_tokens") if token_usage else None
|
|
1461
|
-
)
|
|
1462
|
-
output_tokens: int | None = (
|
|
1463
|
-
token_usage.get("output_tokens") if token_usage else None
|
|
1464
|
-
)
|
|
1530
|
+
outcome = _parse_turn_outcome(payloads, turn_start)
|
|
1465
1531
|
runtime_id = get_runtime_context().config.service_name
|
|
1466
1532
|
|
|
1467
1533
|
# Prometheus-safe dims: low-cardinality only.
|
|
@@ -1472,25 +1538,25 @@ def _emit_turn_completed(
|
|
|
1472
1538
|
"team_id": team_id,
|
|
1473
1539
|
"template_agent_id": template_agent_id,
|
|
1474
1540
|
"runtime_id": runtime_id,
|
|
1475
|
-
"model_name": model_name,
|
|
1476
|
-
"finish_reason": finish_reason,
|
|
1541
|
+
"model_name": outcome.model_name,
|
|
1542
|
+
"finish_reason": outcome.finish_reason,
|
|
1477
1543
|
}
|
|
1478
1544
|
|
|
1479
1545
|
kpi.emit(
|
|
1480
1546
|
name="agent.turn_completed",
|
|
1481
1547
|
type="timer",
|
|
1482
|
-
value=total_ms,
|
|
1548
|
+
value=outcome.total_ms,
|
|
1483
1549
|
unit="ms",
|
|
1484
1550
|
dims=prom_dims,
|
|
1485
1551
|
quantities={
|
|
1486
|
-
"tool_count": tool_count,
|
|
1487
|
-
"input_tokens": input_tokens,
|
|
1488
|
-
"output_tokens": output_tokens,
|
|
1552
|
+
"tool_count": outcome.tool_count,
|
|
1553
|
+
"input_tokens": outcome.input_tokens,
|
|
1554
|
+
"output_tokens": outcome.output_tokens,
|
|
1489
1555
|
},
|
|
1490
1556
|
actor=KPIActor(type="system"),
|
|
1491
1557
|
)
|
|
1492
1558
|
|
|
1493
|
-
if is_error:
|
|
1559
|
+
if outcome.is_error:
|
|
1494
1560
|
kpi.emit(
|
|
1495
1561
|
name="agent.turn_error_total",
|
|
1496
1562
|
type="counter",
|
|
@@ -1507,12 +1573,12 @@ def _emit_turn_completed(
|
|
|
1507
1573
|
"session_id": session_id,
|
|
1508
1574
|
"exchange_id": exchange_id,
|
|
1509
1575
|
"user_id": user_id,
|
|
1510
|
-
"total_ms": total_ms,
|
|
1511
|
-
"is_error": is_error,
|
|
1576
|
+
"total_ms": outcome.total_ms,
|
|
1577
|
+
"is_error": outcome.is_error,
|
|
1512
1578
|
**prom_dims,
|
|
1513
|
-
"tool_count": tool_count,
|
|
1514
|
-
"input_tokens": input_tokens,
|
|
1515
|
-
"output_tokens": output_tokens,
|
|
1579
|
+
"tool_count": outcome.tool_count,
|
|
1580
|
+
"input_tokens": outcome.input_tokens,
|
|
1581
|
+
"output_tokens": outcome.output_tokens,
|
|
1516
1582
|
},
|
|
1517
1583
|
)
|
|
1518
1584
|
with container._kpi_turns_lock:
|
|
@@ -1604,42 +1670,7 @@ async def _stream(
|
|
|
1604
1670
|
)
|
|
1605
1671
|
|
|
1606
1672
|
|
|
1607
|
-
def
|
|
1608
|
-
definition: ReActAgentDefinition | GraphAgentDefinition,
|
|
1609
|
-
request: _AgentExecuteRequest,
|
|
1610
|
-
) -> Any:
|
|
1611
|
-
"""
|
|
1612
|
-
Normalize one turn into the executor input expected by the selected runtime.
|
|
1613
|
-
|
|
1614
|
-
Why this exists:
|
|
1615
|
-
- `ReActRuntime` and `GraphRuntime` accept different input shapes
|
|
1616
|
-
- resume turns also bypass normal message validation, so the mapping should
|
|
1617
|
-
live in one helper instead of being repeated inline in the execution loop
|
|
1618
|
-
|
|
1619
|
-
How to use it:
|
|
1620
|
-
- call while assembling one prepared runtime execution
|
|
1621
|
-
- pass the returned object unchanged to `executor.stream(...)`
|
|
1622
|
-
|
|
1623
|
-
Example:
|
|
1624
|
-
- `executor_input = _build_executor_input(definition, request)`
|
|
1625
|
-
"""
|
|
1626
|
-
|
|
1627
|
-
if isinstance(definition, GraphAgentDefinition):
|
|
1628
|
-
input_cls = definition.input_model()
|
|
1629
|
-
if request.resume_payload is not None:
|
|
1630
|
-
return input_cls.model_construct(message="")
|
|
1631
|
-
return input_cls.model_validate({"message": request.message or ""})
|
|
1632
|
-
|
|
1633
|
-
return ReActInput(
|
|
1634
|
-
messages=(
|
|
1635
|
-
()
|
|
1636
|
-
if request.resume_payload is not None
|
|
1637
|
-
else (ReActMessage(role=ReActMessageRole.USER, content=request.message),)
|
|
1638
|
-
),
|
|
1639
|
-
)
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
def _prepare_runtime_execution(
|
|
1673
|
+
async def _iterate_runtime_event_payloads(
|
|
1643
1674
|
definition: ReActAgentDefinition | GraphAgentDefinition,
|
|
1644
1675
|
request: _AgentExecuteRequest,
|
|
1645
1676
|
access_token: str | None = None,
|
|
@@ -1647,24 +1678,26 @@ def _prepare_runtime_execution(
|
|
|
1647
1678
|
team_id: str | None = None,
|
|
1648
1679
|
registry: Mapping[str, ReActAgentDefinition | GraphAgentDefinition] | None = None,
|
|
1649
1680
|
exchange_id: str | None = None,
|
|
1650
|
-
) ->
|
|
1681
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
1651
1682
|
"""
|
|
1652
|
-
|
|
1683
|
+
Execute one agent turn and yield runtime-event payloads as JSON-ready dicts.
|
|
1653
1684
|
|
|
1654
|
-
Why this exists:
|
|
1655
|
-
- `
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
gives future continuity fields a single place to enter the runtime stack
|
|
1685
|
+
Why this helper exists:
|
|
1686
|
+
- both `/agents/execute` and `/agents/execute/stream` share the same runtime
|
|
1687
|
+
wiring and event production path
|
|
1688
|
+
- keeping the generator payload-oriented lets the HTTP layer choose whether
|
|
1689
|
+
it renders SSE or returns a terminal JSON response
|
|
1660
1690
|
|
|
1661
|
-
|
|
1662
|
-
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1691
|
+
team_id:
|
|
1692
|
+
- callers are responsible for resolving the effective team before calling this
|
|
1693
|
+
function; see _stream() for the standalone "personal" default logic
|
|
1694
|
+
- None is accepted for agent-to-agent (AgentInvoker) invocations where no
|
|
1695
|
+
team scope is required
|
|
1665
1696
|
|
|
1666
|
-
|
|
1667
|
-
-
|
|
1697
|
+
access_token:
|
|
1698
|
+
- the user's JWT forwarded via the Authorization header
|
|
1699
|
+
- stored in RuntimeContext so KF tool adapters can use it for outbound calls
|
|
1700
|
+
- None in local dev when security is disabled
|
|
1668
1701
|
"""
|
|
1669
1702
|
|
|
1670
1703
|
request_id = str(uuid4())
|
|
@@ -1723,6 +1756,7 @@ def _prepare_runtime_execution(
|
|
|
1723
1756
|
runtime_context=runtime_context,
|
|
1724
1757
|
portable_context=portable_context,
|
|
1725
1758
|
)
|
|
1759
|
+
|
|
1726
1760
|
services = _build_runtime_services(
|
|
1727
1761
|
definition,
|
|
1728
1762
|
binding,
|
|
@@ -1730,9 +1764,8 @@ def _prepare_runtime_execution(
|
|
|
1730
1764
|
registry=registry,
|
|
1731
1765
|
access_token=access_token,
|
|
1732
1766
|
)
|
|
1733
|
-
runtime: ReActRuntime | GraphRuntime
|
|
1734
1767
|
if isinstance(definition, GraphAgentDefinition):
|
|
1735
|
-
runtime = GraphRuntime(
|
|
1768
|
+
runtime: ReActRuntime | GraphRuntime = GraphRuntime(
|
|
1736
1769
|
definition=definition,
|
|
1737
1770
|
services=services,
|
|
1738
1771
|
)
|
|
@@ -1751,58 +1784,40 @@ def _prepare_runtime_execution(
|
|
|
1751
1784
|
checkpoint_id=request.checkpoint_id,
|
|
1752
1785
|
resume_payload=request.resume_payload,
|
|
1753
1786
|
)
|
|
1754
|
-
return _PreparedRuntimeExecution(
|
|
1755
|
-
runtime=runtime,
|
|
1756
|
-
execution_config=execution_config,
|
|
1757
|
-
executor_input=_build_executor_input(definition, request),
|
|
1758
|
-
)
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
async def _iterate_runtime_event_payloads(
|
|
1762
|
-
definition: ReActAgentDefinition | GraphAgentDefinition,
|
|
1763
|
-
request: _AgentExecuteRequest,
|
|
1764
|
-
access_token: str | None = None,
|
|
1765
|
-
*,
|
|
1766
|
-
team_id: str | None = None,
|
|
1767
|
-
registry: Mapping[str, ReActAgentDefinition | GraphAgentDefinition] | None = None,
|
|
1768
|
-
exchange_id: str | None = None,
|
|
1769
|
-
) -> AsyncIterator[dict[str, Any]]:
|
|
1770
|
-
"""
|
|
1771
|
-
Execute one agent turn and yield runtime-event payloads as JSON-ready dicts.
|
|
1772
|
-
|
|
1773
|
-
Why this helper exists:
|
|
1774
|
-
- both `/agents/execute` and `/agents/execute/stream` share the same runtime
|
|
1775
|
-
wiring and event production path
|
|
1776
|
-
- keeping the generator payload-oriented lets the HTTP layer choose whether
|
|
1777
|
-
it renders SSE or returns a terminal JSON response
|
|
1778
|
-
|
|
1779
|
-
team_id:
|
|
1780
|
-
- callers are responsible for resolving the effective team before calling this
|
|
1781
|
-
function; see _stream() for the standalone "personal" default logic
|
|
1782
|
-
- None is accepted for agent-to-agent (AgentInvoker) invocations where no
|
|
1783
|
-
team scope is required
|
|
1784
|
-
|
|
1785
|
-
access_token:
|
|
1786
|
-
- the user's JWT forwarded via the Authorization header
|
|
1787
|
-
- stored in RuntimeContext so KF tool adapters can use it for outbound calls
|
|
1788
|
-
- None in local dev when security is disabled
|
|
1789
|
-
"""
|
|
1790
|
-
prepared = _prepare_runtime_execution(
|
|
1791
|
-
definition,
|
|
1792
|
-
request,
|
|
1793
|
-
access_token=access_token,
|
|
1794
|
-
team_id=team_id,
|
|
1795
|
-
registry=registry,
|
|
1796
|
-
exchange_id=exchange_id,
|
|
1797
|
-
)
|
|
1798
1787
|
|
|
1799
1788
|
try:
|
|
1800
|
-
await
|
|
1801
|
-
executor = await
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1789
|
+
await runtime.activate()
|
|
1790
|
+
executor = await runtime.get_executor()
|
|
1791
|
+
if isinstance(definition, GraphAgentDefinition):
|
|
1792
|
+
# Graph agents receive their typed input schema; the agent's
|
|
1793
|
+
# build_turn_state() maps it to graph state before the first node runs.
|
|
1794
|
+
# The standard contract is a single "message" field in the input schema.
|
|
1795
|
+
# On a HITL resume the runtime ignores input entirely (state is loaded
|
|
1796
|
+
# from the checkpoint), so bypass validation with model_construct.
|
|
1797
|
+
input_cls = definition.input_model()
|
|
1798
|
+
if request.resume_payload is not None:
|
|
1799
|
+
graph_input = input_cls.model_construct(message="")
|
|
1800
|
+
else:
|
|
1801
|
+
graph_input = input_cls.model_validate(
|
|
1802
|
+
{"message": request.message or ""}
|
|
1803
|
+
)
|
|
1804
|
+
executor_input: ReActInput | object = graph_input
|
|
1805
|
+
else:
|
|
1806
|
+
# On HITL resume, messages are ignored by the codec — the graph
|
|
1807
|
+
# resumes from its checkpointed interrupt via Command(resume=...).
|
|
1808
|
+
# On a normal turn, the user message is the only input.
|
|
1809
|
+
executor_input = ReActInput(
|
|
1810
|
+
messages=(
|
|
1811
|
+
()
|
|
1812
|
+
if request.resume_payload is not None
|
|
1813
|
+
else (
|
|
1814
|
+
ReActMessage(
|
|
1815
|
+
role=ReActMessageRole.USER, content=request.message
|
|
1816
|
+
),
|
|
1817
|
+
)
|
|
1818
|
+
),
|
|
1819
|
+
)
|
|
1820
|
+
async for event in executor.stream(executor_input, execution_config):
|
|
1806
1821
|
payload = event.model_dump(mode="json")
|
|
1807
1822
|
if not isinstance(payload, dict):
|
|
1808
1823
|
raise RuntimeError(
|
|
@@ -1815,7 +1830,7 @@ async def _iterate_runtime_event_payloads(
|
|
|
1815
1830
|
)
|
|
1816
1831
|
yield RuntimeErrorEvent(message=str(exc)).model_dump(mode="json")
|
|
1817
1832
|
finally:
|
|
1818
|
-
await
|
|
1833
|
+
await runtime.dispose()
|
|
1819
1834
|
|
|
1820
1835
|
|
|
1821
1836
|
def _terminal_execute_payload(
|
|
@@ -2418,6 +2433,113 @@ def _build_agent_router(
|
|
|
2418
2433
|
)
|
|
2419
2434
|
return _terminal_execute_payload(payloads)
|
|
2420
2435
|
|
|
2436
|
+
@router.post(
|
|
2437
|
+
"/evaluate",
|
|
2438
|
+
response_model=EvalTrace,
|
|
2439
|
+
)
|
|
2440
|
+
async def evaluate(
|
|
2441
|
+
request: RuntimeExecuteRequest,
|
|
2442
|
+
http_request: Request,
|
|
2443
|
+
authenticated_user: KeycloakUser | None = Depends(_authenticated_user),
|
|
2444
|
+
container: PodApplicationContext = Depends(get_pod_container),
|
|
2445
|
+
) -> EvalTrace:
|
|
2446
|
+
"""
|
|
2447
|
+
Execute one agent turn and return a complete EvalTrace as JSON.
|
|
2448
|
+
|
|
2449
|
+
POST <configured base_url>/agents/evaluate
|
|
2450
|
+
Authorization: Bearer <user JWT>
|
|
2451
|
+
Body: RuntimeExecuteRequest
|
|
2452
|
+
Response: EvalTrace — synchronous, no SSE, no Langfuse dependency
|
|
2453
|
+
|
|
2454
|
+
Intended for evaluation harnesses (DeepEval, Promptfoo) that need
|
|
2455
|
+
input, output, retrieval_context, tools_called, and steps in one response.
|
|
2456
|
+
"""
|
|
2457
|
+
auth = http_request.headers.get("Authorization", "")
|
|
2458
|
+
access_token = auth.removeprefix("Bearer ").strip() or None
|
|
2459
|
+
|
|
2460
|
+
expected_action = _expected_execution_action(request)
|
|
2461
|
+
|
|
2462
|
+
try:
|
|
2463
|
+
validate_execution_grant(request, expected_action=expected_action)
|
|
2464
|
+
except ExecutionGrantViolation as exc:
|
|
2465
|
+
_emit_audit_event(
|
|
2466
|
+
container,
|
|
2467
|
+
"warning",
|
|
2468
|
+
"grant_validation_failed",
|
|
2469
|
+
agent_instance_id=request.agent_instance_id,
|
|
2470
|
+
user_id=request.effective_user_id(),
|
|
2471
|
+
action=expected_action.value,
|
|
2472
|
+
reason=str(exc),
|
|
2473
|
+
)
|
|
2474
|
+
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(exc))
|
|
2475
|
+
if request.execution_grant is not None:
|
|
2476
|
+
_emit_audit_event(
|
|
2477
|
+
container,
|
|
2478
|
+
"info",
|
|
2479
|
+
"grant_validated",
|
|
2480
|
+
agent_instance_id=request.agent_instance_id,
|
|
2481
|
+
user_id=request.effective_user_id(),
|
|
2482
|
+
action=expected_action.value,
|
|
2483
|
+
)
|
|
2484
|
+
_validate_grant_user_correlation(request, authenticated_user, container)
|
|
2485
|
+
await _validate_session_checkpoint_access(request)
|
|
2486
|
+
|
|
2487
|
+
exchange_id = str(uuid4())
|
|
2488
|
+
turn_start = time.monotonic()
|
|
2489
|
+
internal_req = _to_internal_request(request)
|
|
2490
|
+
target = await _resolve_agent_instance(
|
|
2491
|
+
request=internal_req,
|
|
2492
|
+
registry=registry,
|
|
2493
|
+
access_token=access_token,
|
|
2494
|
+
control_plane_url=get_runtime_context().config.control_plane_url,
|
|
2495
|
+
)
|
|
2496
|
+
payloads = [
|
|
2497
|
+
payload
|
|
2498
|
+
async for payload in _iterate_runtime_event_payloads(
|
|
2499
|
+
target.definition,
|
|
2500
|
+
internal_req,
|
|
2501
|
+
access_token=access_token,
|
|
2502
|
+
team_id=target.team_id,
|
|
2503
|
+
registry=registry,
|
|
2504
|
+
exchange_id=exchange_id,
|
|
2505
|
+
)
|
|
2506
|
+
]
|
|
2507
|
+
session_id: str | None = request.effective_session_id()
|
|
2508
|
+
eval_session_id = session_id or str(uuid4())
|
|
2509
|
+
user_id_str = request.effective_user_id() or "unknown"
|
|
2510
|
+
_emit_turn_completed(
|
|
2511
|
+
container,
|
|
2512
|
+
session_id=session_id,
|
|
2513
|
+
exchange_id=exchange_id,
|
|
2514
|
+
user_id=user_id_str,
|
|
2515
|
+
team_id=target.team_id,
|
|
2516
|
+
agent_instance_id=request.agent_instance_id,
|
|
2517
|
+
template_agent_id=target.definition.agent_id,
|
|
2518
|
+
payloads=payloads,
|
|
2519
|
+
turn_start=turn_start,
|
|
2520
|
+
)
|
|
2521
|
+
if session_id:
|
|
2522
|
+
history_store = get_runtime_context().config.history_store
|
|
2523
|
+
if history_store is not None:
|
|
2524
|
+
await _write_turn_history(
|
|
2525
|
+
session_id=session_id,
|
|
2526
|
+
user_id=user_id_str,
|
|
2527
|
+
request_message=request.input,
|
|
2528
|
+
payloads=payloads,
|
|
2529
|
+
history_store=history_store,
|
|
2530
|
+
team_id=target.team_id,
|
|
2531
|
+
agent_instance_id=request.agent_instance_id,
|
|
2532
|
+
exchange_id=exchange_id,
|
|
2533
|
+
resume_payload=request.resume_payload,
|
|
2534
|
+
)
|
|
2535
|
+
return _build_eval_trace(
|
|
2536
|
+
payloads=payloads,
|
|
2537
|
+
input_text=request.input or "",
|
|
2538
|
+
agent_id=target.definition.agent_id,
|
|
2539
|
+
session_id=eval_session_id,
|
|
2540
|
+
turn_start=turn_start,
|
|
2541
|
+
)
|
|
2542
|
+
|
|
2421
2543
|
@router.post(
|
|
2422
2544
|
"/execute/stream",
|
|
2423
2545
|
)
|
|
@@ -2,8 +2,10 @@ from .completion import completion_candidates
|
|
|
2
2
|
from .entrypoint import build_parser, main
|
|
3
3
|
from .history_display import (
|
|
4
4
|
build_hitl_resume_payload,
|
|
5
|
+
print_eval_trace,
|
|
5
6
|
print_history,
|
|
6
7
|
print_runtime_event,
|
|
8
|
+
run_eval_turn,
|
|
7
9
|
run_single_turn,
|
|
8
10
|
)
|
|
9
11
|
from .kpi_display import (
|
|
@@ -19,6 +21,8 @@ from .kpi_display import (
|
|
|
19
21
|
from .pod_client import DEFAULT_AGENT_POD_BASE_URL, AgentPodClient
|
|
20
22
|
from .repl import run_interactive_chat
|
|
21
23
|
from .repl_helpers import (
|
|
24
|
+
ExecutionMode,
|
|
25
|
+
execution_mode_color,
|
|
22
26
|
execution_mode_label,
|
|
23
27
|
fmt_bytes,
|
|
24
28
|
parse_mode_command,
|
|
@@ -33,6 +37,7 @@ from .url_helpers import (
|
|
|
33
37
|
__all__ = [
|
|
34
38
|
"AgentPodClient",
|
|
35
39
|
"DEFAULT_AGENT_POD_BASE_URL",
|
|
40
|
+
"ExecutionMode",
|
|
36
41
|
"HistogramSeriesSummary",
|
|
37
42
|
"PrometheusSample",
|
|
38
43
|
"build_hitl_resume_payload",
|
|
@@ -40,7 +45,9 @@ __all__ = [
|
|
|
40
45
|
"completion_candidates",
|
|
41
46
|
"default_agent_metrics_url",
|
|
42
47
|
"default_agent_pod_base_url",
|
|
48
|
+
"execution_mode_color",
|
|
43
49
|
"execution_mode_label",
|
|
50
|
+
"print_eval_trace",
|
|
44
51
|
"filter_prometheus_samples",
|
|
45
52
|
"fmt_bytes",
|
|
46
53
|
"format_metric_value",
|
|
@@ -53,6 +60,7 @@ __all__ = [
|
|
|
53
60
|
"print_history",
|
|
54
61
|
"print_runtime_event",
|
|
55
62
|
"render_kpi_report",
|
|
63
|
+
"run_eval_turn",
|
|
56
64
|
"run_interactive_chat",
|
|
57
65
|
"run_single_turn",
|
|
58
66
|
"summarize_prometheus_histograms",
|
|
@@ -49,7 +49,7 @@ def completion_candidates(
|
|
|
49
49
|
return [sid for sid in session_ids if sid.startswith(prefix)]
|
|
50
50
|
if stripped.startswith("/mode "):
|
|
51
51
|
prefix = stripped.removeprefix("/mode ").strip()
|
|
52
|
-
return [mode for mode in ("final", "stream") if mode.startswith(prefix)]
|
|
52
|
+
return [mode for mode in ("eval", "final", "stream") if mode.startswith(prefix)]
|
|
53
53
|
if stripped.startswith("/"):
|
|
54
54
|
return complete_slash_commands(stripped, commands=_COMMANDS)
|
|
55
55
|
return []
|