fred-runtime 2.0.1__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/PKG-INFO +1 -1
  2. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/agent_app.py +297 -175
  3. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/__init__.py +8 -0
  4. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/completion.py +1 -1
  5. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/history_display.py +149 -0
  6. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/pod_client.py +35 -0
  7. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/repl.py +68 -53
  8. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/repl_helpers.py +30 -16
  9. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/client.py +8 -0
  10. fred_runtime-2.0.2/fred_runtime/eval/__init__.py +13 -0
  11. fred_runtime-2.0.2/fred_runtime/eval/collector.py +143 -0
  12. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/PKG-INFO +1 -1
  13. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/SOURCES.txt +3 -0
  14. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/pyproject.toml +1 -1
  15. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_client.py +6 -4
  16. fred_runtime-2.0.2/tests/test_eval_trace.py +314 -0
  17. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/README.md +0 -0
  18. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/__init__.py +0 -0
  19. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/__init__.py +0 -0
  20. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/_catalogs.py +0 -0
  21. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/config.py +0 -0
  22. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/config_loader.py +0 -0
  23. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/container.py +0 -0
  24. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/context.py +0 -0
  25. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/dependencies.py +0 -0
  26. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/mcp_config.py +0 -0
  27. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/observability_factory.py +0 -0
  28. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/app/openai_compat_router.py +0 -0
  29. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/entrypoint.py +0 -0
  30. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/kpi_display.py +0 -0
  31. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/cli/url_helpers.py +0 -0
  32. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/__init__.py +0 -0
  33. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/context_aware_tool.py +0 -0
  34. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_base_client.py +0 -0
  35. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_fast_text_client.py +0 -0
  36. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_http_client.py +0 -0
  37. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_logs_client.py +0 -0
  38. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_markdown_media_client.py +0 -0
  39. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_vectorsearch_client.py +0 -0
  40. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/kf_workspace_client.py +0 -0
  41. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_interceptors.py +0 -0
  42. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_runtime.py +0 -0
  43. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_toolkit.py +0 -0
  44. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/mcp_utils.py +0 -0
  45. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/structures.py +0 -0
  46. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/token_expiry.py +0 -0
  47. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/common/tool_node_utils.py +0 -0
  48. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/deep/__init__.py +0 -0
  49. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/deep/deep_runtime.py +0 -0
  50. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/graph/__init__.py +0 -0
  51. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/graph/graph_runtime.py +0 -0
  52. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/integrations/__init__.py +0 -0
  53. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/integrations/v2_runtime/__init__.py +0 -0
  54. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/integrations/v2_runtime/adapters.py +0 -0
  55. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/__init__.py +0 -0
  56. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/catalog.py +0 -0
  57. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/contracts.py +0 -0
  58. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/provider.py +0 -0
  59. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/model_routing/resolver.py +0 -0
  60. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/__init__.py +0 -0
  61. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_langchain_adapter.py +0 -0
  62. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_message_codec.py +0 -0
  63. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_model_adapter.py +0 -0
  64. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_prompting.py +0 -0
  65. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_runtime.py +0 -0
  66. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_stream_adapter.py +0 -0
  67. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_binding.py +0 -0
  68. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_loop.py +0 -0
  69. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_rendering.py +0 -0
  70. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_resolution.py +0 -0
  71. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tool_utils.py +0 -0
  72. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/react/react_tracing.py +0 -0
  73. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_context.py +0 -0
  74. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/__init__.py +0 -0
  75. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/checkpoints.py +0 -0
  76. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/model_metadata.py +0 -0
  77. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/request_context_helpers.py +0 -0
  78. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/sql_checkpointer.py +0 -0
  79. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/runtime_support/user_token_refresher.py +0 -0
  80. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/__init__.py +0 -0
  81. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/filesystem_context.py +0 -0
  82. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/tool_approval.py +0 -0
  83. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime/support/tool_loop.py +0 -0
  84. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/dependency_links.txt +0 -0
  85. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/entry_points.txt +0 -0
  86. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/requires.txt +0 -0
  87. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/fred_runtime.egg-info/top_level.txt +0 -0
  88. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/setup.cfg +0 -0
  89. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_agent_app.py +0 -0
  90. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_config_loader.py +0 -0
  91. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_context.py +0 -0
  92. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_graph_runtime_observability.py +0 -0
  93. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_history.py +0 -0
  94. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_kf_workspace_client.py +0 -0
  95. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_kpi_display.py +0 -0
  96. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_mcp_config.py +0 -0
  97. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_openai_compat_router.py +0 -0
  98. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_smoke.py +0 -0
  99. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_url_helpers.py +0 -0
  100. {fred_runtime-2.0.1 → fred_runtime-2.0.2}/tests/test_user_token_refresher.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fred-runtime
3
- Version: 2.0.1
3
+ Version: 2.0.2
4
4
  Summary: Runtime adapters and infrastructure wiring for Fred v2 agents.
5
5
  Author-email: Thales <noreply@thalesgroup.com>
6
6
  License: Apache-2.0
@@ -62,6 +62,7 @@ from fred_core.logs.log_setup import log_setup
62
62
  from fred_core.logs.memory_log_store import RamLogStore
63
63
  from fred_core.security.oidc import get_keycloak_client_id, get_keycloak_url
64
64
  from fred_core.security.structure import KeycloakUser
65
+ from fred_sdk.contracts.eval import EvalStep, EvalTrace
65
66
  from fred_sdk.contracts.context import (
66
67
  AgentInvocationRequest,
67
68
  AgentInvocationResult,
@@ -558,8 +559,14 @@ class LocalRegistryAgentInvoker(AgentInvokerPort):
558
559
  is_error=True,
559
560
  )
560
561
 
561
- execute_request = _to_internal_request(
562
- _build_runtime_execute_request_from_invocation(request)
562
+ context_dict = request.context.model_dump(mode="json")
563
+ context_dict.setdefault("execution_action", ExecutionGrantAction.EXECUTE.value)
564
+ execute_request = _AgentExecuteRequest.model_construct(
565
+ agent_id=request.agent_id,
566
+ agent_instance_id=None,
567
+ message=request.message,
568
+ context=context_dict,
569
+ resume_payload=None,
563
570
  )
564
571
 
565
572
  content_parts: list[str] = []
@@ -766,13 +773,6 @@ class _AgentExecuteRequest(BaseModel):
766
773
  return self
767
774
 
768
775
 
769
- @dataclass(slots=True)
770
- class _PreparedRuntimeExecution:
771
- runtime: ReActRuntime | GraphRuntime
772
- execution_config: ExecutionConfig
773
- executor_input: Any
774
-
775
-
776
776
  def _to_internal_request(r: RuntimeExecuteRequest) -> "_AgentExecuteRequest":
777
777
  """
778
778
  Bridge a public RuntimeExecuteRequest to the internal execution model.
@@ -797,36 +797,6 @@ def _to_internal_request(r: RuntimeExecuteRequest) -> "_AgentExecuteRequest":
797
797
  )
798
798
 
799
799
 
800
- def _build_runtime_execute_request_from_invocation(
801
- request: AgentInvocationRequest,
802
- ) -> RuntimeExecuteRequest:
803
- """
804
- Project one in-process agent invocation onto the public execute contract.
805
-
806
- Why this exists:
807
- - pod-local agent-to-agent calls should follow the same request projection
808
- path as HTTP execution, rather than hand-constructing a second private
809
- request shape
810
- - future continuity fields should therefore land once on the typed runtime
811
- contract, then flow through both local and remote invocation paths
812
-
813
- How to use it:
814
- - call from `LocalRegistryAgentInvoker.invoke(...)`
815
- - pass the result through `_to_internal_request(...)` until the remaining
816
- internal helpers consume `RuntimeExecuteRequest` directly
817
-
818
- Example:
819
- - `runtime_request = _build_runtime_execute_request_from_invocation(request)`
820
- """
821
-
822
- return RuntimeExecuteRequest(
823
- agent_id=request.agent_id,
824
- input=request.message,
825
- session_id=request.context.session_id,
826
- runtime_context=request.context.model_dump(mode="json"),
827
- )
828
-
829
-
830
800
  class _AgentTemplateSummary(BaseModel):
831
801
  template_agent_id: str
832
802
  title: str
@@ -871,7 +841,7 @@ def _apply_runtime_tuning(
871
841
  - `definition = _apply_runtime_tuning(template_definition, resolution.tuning)`
872
842
  """
873
843
 
874
- update: dict[str, Any] = {
844
+ update: dict[str, object] = {
875
845
  "role": tuning.role,
876
846
  "description": tuning.description,
877
847
  "tags": tuple(tuning.tags),
@@ -881,11 +851,7 @@ def _apply_runtime_tuning(
881
851
  ),
882
852
  }
883
853
  system_prompt = tuning.values.get("prompts.system")
884
- if (
885
- isinstance(definition, ReActAgentDefinition)
886
- and isinstance(system_prompt, str)
887
- and system_prompt.strip()
888
- ):
854
+ if isinstance(system_prompt, str) and system_prompt.strip():
889
855
  update["system_prompt_template"] = system_prompt
890
856
  return definition.model_copy(update=update)
891
857
 
@@ -1416,6 +1382,120 @@ def _sse(payload: str) -> str:
1416
1382
  return f"data: {payload}\n\n"
1417
1383
 
1418
1384
 
1385
+ @dataclass(frozen=True)
1386
+ class _TurnOutcome:
1387
+ model_name: str | None
1388
+ finish_reason: str
1389
+ token_usage: dict[str, Any] | None
1390
+ input_tokens: int | None
1391
+ output_tokens: int | None
1392
+ tool_count: int
1393
+ is_error: bool
1394
+ total_ms: int
1395
+ final_content: str | None
1396
+
1397
+
1398
+ def _parse_turn_outcome(
1399
+ payloads: list[dict[str, Any]],
1400
+ turn_start: float,
1401
+ ) -> _TurnOutcome:
1402
+ total_ms = int((time.monotonic() - turn_start) * 1000)
1403
+ tool_count = sum(1 for p in payloads if p.get("kind") == "tool_call")
1404
+ final = next((p for p in reversed(payloads) if p.get("kind") == "final"), None)
1405
+ is_error = any(p.get("kind") == "execution_error" for p in payloads)
1406
+ token_usage: dict[str, Any] | None = final.get("token_usage") if final else None
1407
+ return _TurnOutcome(
1408
+ model_name=final.get("model_name") if final else None,
1409
+ finish_reason="error"
1410
+ if is_error
1411
+ else ((final.get("finish_reason") or "") if final else ""),
1412
+ token_usage=token_usage,
1413
+ input_tokens=token_usage.get("input_tokens") if token_usage else None,
1414
+ output_tokens=token_usage.get("output_tokens") if token_usage else None,
1415
+ tool_count=tool_count,
1416
+ is_error=is_error,
1417
+ total_ms=total_ms,
1418
+ final_content=(final.get("content") or None) if final else None,
1419
+ )
1420
+
1421
+
1422
+ def _build_eval_trace(
1423
+ payloads: list[dict[str, Any]],
1424
+ input_text: str,
1425
+ agent_id: str,
1426
+ session_id: str,
1427
+ turn_start: float,
1428
+ ) -> EvalTrace:
1429
+ outcome = _parse_turn_outcome(payloads, turn_start)
1430
+ steps: list[EvalStep] = []
1431
+ retrieval_context: list[str] = []
1432
+ tools_called: list[str] = []
1433
+ error: str | None = None
1434
+
1435
+ for p in payloads:
1436
+ kind = p.get("kind")
1437
+ if kind == "tool_call":
1438
+ steps.append(
1439
+ EvalStep(
1440
+ kind="tool_call",
1441
+ tool_name=p.get("tool_name"),
1442
+ call_id=p.get("call_id"),
1443
+ arguments=p.get("arguments") or {},
1444
+ )
1445
+ )
1446
+ if p.get("tool_name"):
1447
+ tools_called.append(p["tool_name"])
1448
+ elif kind == "tool_result":
1449
+ content = p.get("content", "")
1450
+ is_err = p.get("is_error", False)
1451
+ steps.append(
1452
+ EvalStep(
1453
+ kind="tool_result",
1454
+ tool_name=p.get("tool_name"),
1455
+ call_id=p.get("call_id"),
1456
+ content=content,
1457
+ is_error=is_err,
1458
+ )
1459
+ )
1460
+ if not is_err:
1461
+ sources = p.get("sources") or []
1462
+ if sources:
1463
+ retrieval_context.extend(
1464
+ s["content"] for s in sources if s.get("content")
1465
+ )
1466
+ elif content:
1467
+ retrieval_context.append(content)
1468
+ elif kind == "final":
1469
+ steps.append(EvalStep(kind="final", content=p.get("content")))
1470
+ elif kind == "node_error":
1471
+ steps.append(
1472
+ EvalStep(
1473
+ kind="node_error",
1474
+ node_id=p.get("node_id"),
1475
+ error_message=p.get("error_message"),
1476
+ )
1477
+ )
1478
+ elif kind == "awaiting_human":
1479
+ steps.append(EvalStep(kind="awaiting_human"))
1480
+ elif kind == "execution_error":
1481
+ error = p.get("message")
1482
+
1483
+ return EvalTrace(
1484
+ session_id=session_id,
1485
+ agent_id=agent_id,
1486
+ input=input_text,
1487
+ output=outcome.final_content,
1488
+ error=error,
1489
+ latency_ms=outcome.total_ms,
1490
+ model_name=outcome.model_name,
1491
+ token_usage=outcome.token_usage,
1492
+ finish_reason=outcome.finish_reason or None,
1493
+ steps=tuple(steps),
1494
+ retrieval_context=tuple(retrieval_context),
1495
+ tools_called=tuple(tools_called),
1496
+ )
1497
+
1498
+
1419
1499
  def _emit_turn_completed(
1420
1500
  container: PodApplicationContext,
1421
1501
  *,
@@ -1447,21 +1527,7 @@ def _emit_turn_completed(
1447
1527
  """
1448
1528
  try:
1449
1529
  kpi = get_runtime_context().get_kpi_writer()
1450
- total_ms = int((time.monotonic() - turn_start) * 1000)
1451
- tool_count = sum(1 for p in payloads if p.get("kind") == "tool_call")
1452
- final = next((p for p in reversed(payloads) if p.get("kind") == "final"), None)
1453
- is_error = any(p.get("kind") == "execution_error" for p in payloads)
1454
- model_name: str | None = final.get("model_name") if final else None
1455
- finish_reason: str = (
1456
- "error" if is_error else (final.get("finish_reason") or "") if final else ""
1457
- )
1458
- token_usage: dict[str, Any] | None = final.get("token_usage") if final else None
1459
- input_tokens: int | None = (
1460
- token_usage.get("input_tokens") if token_usage else None
1461
- )
1462
- output_tokens: int | None = (
1463
- token_usage.get("output_tokens") if token_usage else None
1464
- )
1530
+ outcome = _parse_turn_outcome(payloads, turn_start)
1465
1531
  runtime_id = get_runtime_context().config.service_name
1466
1532
 
1467
1533
  # Prometheus-safe dims: low-cardinality only.
@@ -1472,25 +1538,25 @@ def _emit_turn_completed(
1472
1538
  "team_id": team_id,
1473
1539
  "template_agent_id": template_agent_id,
1474
1540
  "runtime_id": runtime_id,
1475
- "model_name": model_name,
1476
- "finish_reason": finish_reason,
1541
+ "model_name": outcome.model_name,
1542
+ "finish_reason": outcome.finish_reason,
1477
1543
  }
1478
1544
 
1479
1545
  kpi.emit(
1480
1546
  name="agent.turn_completed",
1481
1547
  type="timer",
1482
- value=total_ms,
1548
+ value=outcome.total_ms,
1483
1549
  unit="ms",
1484
1550
  dims=prom_dims,
1485
1551
  quantities={
1486
- "tool_count": tool_count,
1487
- "input_tokens": input_tokens,
1488
- "output_tokens": output_tokens,
1552
+ "tool_count": outcome.tool_count,
1553
+ "input_tokens": outcome.input_tokens,
1554
+ "output_tokens": outcome.output_tokens,
1489
1555
  },
1490
1556
  actor=KPIActor(type="system"),
1491
1557
  )
1492
1558
 
1493
- if is_error:
1559
+ if outcome.is_error:
1494
1560
  kpi.emit(
1495
1561
  name="agent.turn_error_total",
1496
1562
  type="counter",
@@ -1507,12 +1573,12 @@ def _emit_turn_completed(
1507
1573
  "session_id": session_id,
1508
1574
  "exchange_id": exchange_id,
1509
1575
  "user_id": user_id,
1510
- "total_ms": total_ms,
1511
- "is_error": is_error,
1576
+ "total_ms": outcome.total_ms,
1577
+ "is_error": outcome.is_error,
1512
1578
  **prom_dims,
1513
- "tool_count": tool_count,
1514
- "input_tokens": input_tokens,
1515
- "output_tokens": output_tokens,
1579
+ "tool_count": outcome.tool_count,
1580
+ "input_tokens": outcome.input_tokens,
1581
+ "output_tokens": outcome.output_tokens,
1516
1582
  },
1517
1583
  )
1518
1584
  with container._kpi_turns_lock:
@@ -1604,42 +1670,7 @@ async def _stream(
1604
1670
  )
1605
1671
 
1606
1672
 
1607
- def _build_executor_input(
1608
- definition: ReActAgentDefinition | GraphAgentDefinition,
1609
- request: _AgentExecuteRequest,
1610
- ) -> Any:
1611
- """
1612
- Normalize one turn into the executor input expected by the selected runtime.
1613
-
1614
- Why this exists:
1615
- - `ReActRuntime` and `GraphRuntime` accept different input shapes
1616
- - resume turns also bypass normal message validation, so the mapping should
1617
- live in one helper instead of being repeated inline in the execution loop
1618
-
1619
- How to use it:
1620
- - call while assembling one prepared runtime execution
1621
- - pass the returned object unchanged to `executor.stream(...)`
1622
-
1623
- Example:
1624
- - `executor_input = _build_executor_input(definition, request)`
1625
- """
1626
-
1627
- if isinstance(definition, GraphAgentDefinition):
1628
- input_cls = definition.input_model()
1629
- if request.resume_payload is not None:
1630
- return input_cls.model_construct(message="")
1631
- return input_cls.model_validate({"message": request.message or ""})
1632
-
1633
- return ReActInput(
1634
- messages=(
1635
- ()
1636
- if request.resume_payload is not None
1637
- else (ReActMessage(role=ReActMessageRole.USER, content=request.message),)
1638
- ),
1639
- )
1640
-
1641
-
1642
- def _prepare_runtime_execution(
1673
+ async def _iterate_runtime_event_payloads(
1643
1674
  definition: ReActAgentDefinition | GraphAgentDefinition,
1644
1675
  request: _AgentExecuteRequest,
1645
1676
  access_token: str | None = None,
@@ -1647,24 +1678,26 @@ def _prepare_runtime_execution(
1647
1678
  team_id: str | None = None,
1648
1679
  registry: Mapping[str, ReActAgentDefinition | GraphAgentDefinition] | None = None,
1649
1680
  exchange_id: str | None = None,
1650
- ) -> _PreparedRuntimeExecution:
1681
+ ) -> AsyncIterator[dict[str, Any]]:
1651
1682
  """
1652
- Build the bound runtime, executor input, and execution config for one turn.
1683
+ Execute one agent turn and yield runtime-event payloads as JSON-ready dicts.
1653
1684
 
1654
- Why this exists:
1655
- - `execute`, `execute/stream`, and in-process agent invocation all converge
1656
- on `_iterate_runtime_event_payloads`, so this is the narrowest place to
1657
- centralize request projection before memory fields are added
1658
- - it removes one long block of binding/runtime setup from the event loop and
1659
- gives future continuity fields a single place to enter the runtime stack
1685
+ Why this helper exists:
1686
+ - both `/agents/execute` and `/agents/execute/stream` share the same runtime
1687
+ wiring and event production path
1688
+ - keeping the generator payload-oriented lets the HTTP layer choose whether
1689
+ it renders SSE or returns a terminal JSON response
1660
1690
 
1661
- How to use it:
1662
- - call from `_iterate_runtime_event_payloads(...)`
1663
- - activate the returned runtime, obtain its executor, then stream with the
1664
- returned `executor_input` and `execution_config`
1691
+ team_id:
1692
+ - callers are responsible for resolving the effective team before calling this
1693
+ function; see _stream() for the standalone "personal" default logic
1694
+ - None is accepted for agent-to-agent (AgentInvoker) invocations where no
1695
+ team scope is required
1665
1696
 
1666
- Example:
1667
- - `prepared = _prepare_runtime_execution(definition, request, team_id="fredlab")`
1697
+ access_token:
1698
+ - the user's JWT forwarded via the Authorization header
1699
+ - stored in RuntimeContext so KF tool adapters can use it for outbound calls
1700
+ - None in local dev when security is disabled
1668
1701
  """
1669
1702
 
1670
1703
  request_id = str(uuid4())
@@ -1723,6 +1756,7 @@ def _prepare_runtime_execution(
1723
1756
  runtime_context=runtime_context,
1724
1757
  portable_context=portable_context,
1725
1758
  )
1759
+
1726
1760
  services = _build_runtime_services(
1727
1761
  definition,
1728
1762
  binding,
@@ -1730,9 +1764,8 @@ def _prepare_runtime_execution(
1730
1764
  registry=registry,
1731
1765
  access_token=access_token,
1732
1766
  )
1733
- runtime: ReActRuntime | GraphRuntime
1734
1767
  if isinstance(definition, GraphAgentDefinition):
1735
- runtime = GraphRuntime(
1768
+ runtime: ReActRuntime | GraphRuntime = GraphRuntime(
1736
1769
  definition=definition,
1737
1770
  services=services,
1738
1771
  )
@@ -1751,58 +1784,40 @@ def _prepare_runtime_execution(
1751
1784
  checkpoint_id=request.checkpoint_id,
1752
1785
  resume_payload=request.resume_payload,
1753
1786
  )
1754
- return _PreparedRuntimeExecution(
1755
- runtime=runtime,
1756
- execution_config=execution_config,
1757
- executor_input=_build_executor_input(definition, request),
1758
- )
1759
-
1760
-
1761
- async def _iterate_runtime_event_payloads(
1762
- definition: ReActAgentDefinition | GraphAgentDefinition,
1763
- request: _AgentExecuteRequest,
1764
- access_token: str | None = None,
1765
- *,
1766
- team_id: str | None = None,
1767
- registry: Mapping[str, ReActAgentDefinition | GraphAgentDefinition] | None = None,
1768
- exchange_id: str | None = None,
1769
- ) -> AsyncIterator[dict[str, Any]]:
1770
- """
1771
- Execute one agent turn and yield runtime-event payloads as JSON-ready dicts.
1772
-
1773
- Why this helper exists:
1774
- - both `/agents/execute` and `/agents/execute/stream` share the same runtime
1775
- wiring and event production path
1776
- - keeping the generator payload-oriented lets the HTTP layer choose whether
1777
- it renders SSE or returns a terminal JSON response
1778
-
1779
- team_id:
1780
- - callers are responsible for resolving the effective team before calling this
1781
- function; see _stream() for the standalone "personal" default logic
1782
- - None is accepted for agent-to-agent (AgentInvoker) invocations where no
1783
- team scope is required
1784
-
1785
- access_token:
1786
- - the user's JWT forwarded via the Authorization header
1787
- - stored in RuntimeContext so KF tool adapters can use it for outbound calls
1788
- - None in local dev when security is disabled
1789
- """
1790
- prepared = _prepare_runtime_execution(
1791
- definition,
1792
- request,
1793
- access_token=access_token,
1794
- team_id=team_id,
1795
- registry=registry,
1796
- exchange_id=exchange_id,
1797
- )
1798
1787
 
1799
1788
  try:
1800
- await prepared.runtime.activate()
1801
- executor = await prepared.runtime.get_executor()
1802
- async for event in executor.stream(
1803
- prepared.executor_input,
1804
- prepared.execution_config,
1805
- ):
1789
+ await runtime.activate()
1790
+ executor = await runtime.get_executor()
1791
+ if isinstance(definition, GraphAgentDefinition):
1792
+ # Graph agents receive their typed input schema; the agent's
1793
+ # build_turn_state() maps it to graph state before the first node runs.
1794
+ # The standard contract is a single "message" field in the input schema.
1795
+ # On a HITL resume the runtime ignores input entirely (state is loaded
1796
+ # from the checkpoint), so bypass validation with model_construct.
1797
+ input_cls = definition.input_model()
1798
+ if request.resume_payload is not None:
1799
+ graph_input = input_cls.model_construct(message="")
1800
+ else:
1801
+ graph_input = input_cls.model_validate(
1802
+ {"message": request.message or ""}
1803
+ )
1804
+ executor_input: ReActInput | object = graph_input
1805
+ else:
1806
+ # On HITL resume, messages are ignored by the codec — the graph
1807
+ # resumes from its checkpointed interrupt via Command(resume=...).
1808
+ # On a normal turn, the user message is the only input.
1809
+ executor_input = ReActInput(
1810
+ messages=(
1811
+ ()
1812
+ if request.resume_payload is not None
1813
+ else (
1814
+ ReActMessage(
1815
+ role=ReActMessageRole.USER, content=request.message
1816
+ ),
1817
+ )
1818
+ ),
1819
+ )
1820
+ async for event in executor.stream(executor_input, execution_config):
1806
1821
  payload = event.model_dump(mode="json")
1807
1822
  if not isinstance(payload, dict):
1808
1823
  raise RuntimeError(
@@ -1815,7 +1830,7 @@ async def _iterate_runtime_event_payloads(
1815
1830
  )
1816
1831
  yield RuntimeErrorEvent(message=str(exc)).model_dump(mode="json")
1817
1832
  finally:
1818
- await prepared.runtime.dispose()
1833
+ await runtime.dispose()
1819
1834
 
1820
1835
 
1821
1836
  def _terminal_execute_payload(
@@ -2418,6 +2433,113 @@ def _build_agent_router(
2418
2433
  )
2419
2434
  return _terminal_execute_payload(payloads)
2420
2435
 
2436
+ @router.post(
2437
+ "/evaluate",
2438
+ response_model=EvalTrace,
2439
+ )
2440
+ async def evaluate(
2441
+ request: RuntimeExecuteRequest,
2442
+ http_request: Request,
2443
+ authenticated_user: KeycloakUser | None = Depends(_authenticated_user),
2444
+ container: PodApplicationContext = Depends(get_pod_container),
2445
+ ) -> EvalTrace:
2446
+ """
2447
+ Execute one agent turn and return a complete EvalTrace as JSON.
2448
+
2449
+ POST <configured base_url>/agents/evaluate
2450
+ Authorization: Bearer <user JWT>
2451
+ Body: RuntimeExecuteRequest
2452
+ Response: EvalTrace — synchronous, no SSE, no Langfuse dependency
2453
+
2454
+ Intended for evaluation harnesses (DeepEval, Promptfoo) that need
2455
+ input, output, retrieval_context, tools_called, and steps in one response.
2456
+ """
2457
+ auth = http_request.headers.get("Authorization", "")
2458
+ access_token = auth.removeprefix("Bearer ").strip() or None
2459
+
2460
+ expected_action = _expected_execution_action(request)
2461
+
2462
+ try:
2463
+ validate_execution_grant(request, expected_action=expected_action)
2464
+ except ExecutionGrantViolation as exc:
2465
+ _emit_audit_event(
2466
+ container,
2467
+ "warning",
2468
+ "grant_validation_failed",
2469
+ agent_instance_id=request.agent_instance_id,
2470
+ user_id=request.effective_user_id(),
2471
+ action=expected_action.value,
2472
+ reason=str(exc),
2473
+ )
2474
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(exc))
2475
+ if request.execution_grant is not None:
2476
+ _emit_audit_event(
2477
+ container,
2478
+ "info",
2479
+ "grant_validated",
2480
+ agent_instance_id=request.agent_instance_id,
2481
+ user_id=request.effective_user_id(),
2482
+ action=expected_action.value,
2483
+ )
2484
+ _validate_grant_user_correlation(request, authenticated_user, container)
2485
+ await _validate_session_checkpoint_access(request)
2486
+
2487
+ exchange_id = str(uuid4())
2488
+ turn_start = time.monotonic()
2489
+ internal_req = _to_internal_request(request)
2490
+ target = await _resolve_agent_instance(
2491
+ request=internal_req,
2492
+ registry=registry,
2493
+ access_token=access_token,
2494
+ control_plane_url=get_runtime_context().config.control_plane_url,
2495
+ )
2496
+ payloads = [
2497
+ payload
2498
+ async for payload in _iterate_runtime_event_payloads(
2499
+ target.definition,
2500
+ internal_req,
2501
+ access_token=access_token,
2502
+ team_id=target.team_id,
2503
+ registry=registry,
2504
+ exchange_id=exchange_id,
2505
+ )
2506
+ ]
2507
+ session_id: str | None = request.effective_session_id()
2508
+ eval_session_id = session_id or str(uuid4())
2509
+ user_id_str = request.effective_user_id() or "unknown"
2510
+ _emit_turn_completed(
2511
+ container,
2512
+ session_id=session_id,
2513
+ exchange_id=exchange_id,
2514
+ user_id=user_id_str,
2515
+ team_id=target.team_id,
2516
+ agent_instance_id=request.agent_instance_id,
2517
+ template_agent_id=target.definition.agent_id,
2518
+ payloads=payloads,
2519
+ turn_start=turn_start,
2520
+ )
2521
+ if session_id:
2522
+ history_store = get_runtime_context().config.history_store
2523
+ if history_store is not None:
2524
+ await _write_turn_history(
2525
+ session_id=session_id,
2526
+ user_id=user_id_str,
2527
+ request_message=request.input,
2528
+ payloads=payloads,
2529
+ history_store=history_store,
2530
+ team_id=target.team_id,
2531
+ agent_instance_id=request.agent_instance_id,
2532
+ exchange_id=exchange_id,
2533
+ resume_payload=request.resume_payload,
2534
+ )
2535
+ return _build_eval_trace(
2536
+ payloads=payloads,
2537
+ input_text=request.input or "",
2538
+ agent_id=target.definition.agent_id,
2539
+ session_id=eval_session_id,
2540
+ turn_start=turn_start,
2541
+ )
2542
+
2421
2543
  @router.post(
2422
2544
  "/execute/stream",
2423
2545
  )
@@ -2,8 +2,10 @@ from .completion import completion_candidates
2
2
  from .entrypoint import build_parser, main
3
3
  from .history_display import (
4
4
  build_hitl_resume_payload,
5
+ print_eval_trace,
5
6
  print_history,
6
7
  print_runtime_event,
8
+ run_eval_turn,
7
9
  run_single_turn,
8
10
  )
9
11
  from .kpi_display import (
@@ -19,6 +21,8 @@ from .kpi_display import (
19
21
  from .pod_client import DEFAULT_AGENT_POD_BASE_URL, AgentPodClient
20
22
  from .repl import run_interactive_chat
21
23
  from .repl_helpers import (
24
+ ExecutionMode,
25
+ execution_mode_color,
22
26
  execution_mode_label,
23
27
  fmt_bytes,
24
28
  parse_mode_command,
@@ -33,6 +37,7 @@ from .url_helpers import (
33
37
  __all__ = [
34
38
  "AgentPodClient",
35
39
  "DEFAULT_AGENT_POD_BASE_URL",
40
+ "ExecutionMode",
36
41
  "HistogramSeriesSummary",
37
42
  "PrometheusSample",
38
43
  "build_hitl_resume_payload",
@@ -40,7 +45,9 @@ __all__ = [
40
45
  "completion_candidates",
41
46
  "default_agent_metrics_url",
42
47
  "default_agent_pod_base_url",
48
+ "execution_mode_color",
43
49
  "execution_mode_label",
50
+ "print_eval_trace",
44
51
  "filter_prometheus_samples",
45
52
  "fmt_bytes",
46
53
  "format_metric_value",
@@ -53,6 +60,7 @@ __all__ = [
53
60
  "print_history",
54
61
  "print_runtime_event",
55
62
  "render_kpi_report",
63
+ "run_eval_turn",
56
64
  "run_interactive_chat",
57
65
  "run_single_turn",
58
66
  "summarize_prometheus_histograms",
@@ -49,7 +49,7 @@ def completion_candidates(
49
49
  return [sid for sid in session_ids if sid.startswith(prefix)]
50
50
  if stripped.startswith("/mode "):
51
51
  prefix = stripped.removeprefix("/mode ").strip()
52
- return [mode for mode in ("final", "stream") if mode.startswith(prefix)]
52
+ return [mode for mode in ("eval", "final", "stream") if mode.startswith(prefix)]
53
53
  if stripped.startswith("/"):
54
54
  return complete_slash_commands(stripped, commands=_COMMANDS)
55
55
  return []