fred-runtime 2.0.1__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/PKG-INFO +1 -1
  2. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/agent_app.py +304 -175
  3. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/__init__.py +8 -0
  4. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/completion.py +1 -1
  5. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/history_display.py +149 -0
  6. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/pod_client.py +35 -0
  7. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/repl.py +68 -53
  8. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/repl_helpers.py +30 -16
  9. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/client.py +8 -0
  10. fred_runtime-2.0.3/fred_runtime/eval/__init__.py +13 -0
  11. fred_runtime-2.0.3/fred_runtime/eval/collector.py +143 -0
  12. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/graph/graph_runtime.py +10 -2
  13. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_runtime.py +33 -13
  14. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime.egg-info/PKG-INFO +1 -1
  15. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime.egg-info/SOURCES.txt +3 -0
  16. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/pyproject.toml +1 -1
  17. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_client.py +6 -4
  18. fred_runtime-2.0.3/tests/test_eval_trace.py +314 -0
  19. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/README.md +0 -0
  20. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/__init__.py +0 -0
  21. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/__init__.py +0 -0
  22. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/_catalogs.py +0 -0
  23. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/config.py +0 -0
  24. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/config_loader.py +0 -0
  25. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/container.py +0 -0
  26. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/context.py +0 -0
  27. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/dependencies.py +0 -0
  28. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/mcp_config.py +0 -0
  29. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/observability_factory.py +0 -0
  30. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/app/openai_compat_router.py +0 -0
  31. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/entrypoint.py +0 -0
  32. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/kpi_display.py +0 -0
  33. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/cli/url_helpers.py +0 -0
  34. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/__init__.py +0 -0
  35. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/context_aware_tool.py +0 -0
  36. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/kf_base_client.py +0 -0
  37. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/kf_fast_text_client.py +0 -0
  38. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/kf_http_client.py +0 -0
  39. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/kf_logs_client.py +0 -0
  40. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/kf_markdown_media_client.py +0 -0
  41. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/kf_vectorsearch_client.py +0 -0
  42. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/kf_workspace_client.py +0 -0
  43. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/mcp_interceptors.py +0 -0
  44. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/mcp_runtime.py +0 -0
  45. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/mcp_toolkit.py +0 -0
  46. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/mcp_utils.py +0 -0
  47. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/structures.py +0 -0
  48. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/token_expiry.py +0 -0
  49. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/common/tool_node_utils.py +0 -0
  50. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/deep/__init__.py +0 -0
  51. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/deep/deep_runtime.py +0 -0
  52. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/graph/__init__.py +0 -0
  53. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/integrations/__init__.py +0 -0
  54. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/integrations/v2_runtime/__init__.py +0 -0
  55. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/integrations/v2_runtime/adapters.py +0 -0
  56. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/model_routing/__init__.py +0 -0
  57. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/model_routing/catalog.py +0 -0
  58. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/model_routing/contracts.py +0 -0
  59. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/model_routing/provider.py +0 -0
  60. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/model_routing/resolver.py +0 -0
  61. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/__init__.py +0 -0
  62. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_langchain_adapter.py +0 -0
  63. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_message_codec.py +0 -0
  64. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_model_adapter.py +0 -0
  65. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_prompting.py +0 -0
  66. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_stream_adapter.py +0 -0
  67. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_tool_binding.py +0 -0
  68. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_tool_loop.py +0 -0
  69. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_tool_rendering.py +0 -0
  70. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_tool_resolution.py +0 -0
  71. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_tool_utils.py +0 -0
  72. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/react/react_tracing.py +0 -0
  73. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/runtime_context.py +0 -0
  74. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/runtime_support/__init__.py +0 -0
  75. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/runtime_support/checkpoints.py +0 -0
  76. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/runtime_support/model_metadata.py +0 -0
  77. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/runtime_support/request_context_helpers.py +0 -0
  78. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/runtime_support/sql_checkpointer.py +0 -0
  79. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/runtime_support/user_token_refresher.py +0 -0
  80. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/support/__init__.py +0 -0
  81. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/support/filesystem_context.py +0 -0
  82. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/support/tool_approval.py +0 -0
  83. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime/support/tool_loop.py +0 -0
  84. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime.egg-info/dependency_links.txt +0 -0
  85. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime.egg-info/entry_points.txt +0 -0
  86. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime.egg-info/requires.txt +0 -0
  87. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/fred_runtime.egg-info/top_level.txt +0 -0
  88. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/setup.cfg +0 -0
  89. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_agent_app.py +0 -0
  90. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_config_loader.py +0 -0
  91. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_context.py +0 -0
  92. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_graph_runtime_observability.py +0 -0
  93. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_history.py +0 -0
  94. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_kf_workspace_client.py +0 -0
  95. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_kpi_display.py +0 -0
  96. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_mcp_config.py +0 -0
  97. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_openai_compat_router.py +0 -0
  98. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_smoke.py +0 -0
  99. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_url_helpers.py +0 -0
  100. {fred_runtime-2.0.1 → fred_runtime-2.0.3}/tests/test_user_token_refresher.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fred-runtime
3
- Version: 2.0.1
3
+ Version: 2.0.3
4
4
  Summary: Runtime adapters and infrastructure wiring for Fred v2 agents.
5
5
  Author-email: Thales <noreply@thalesgroup.com>
6
6
  License: Apache-2.0
@@ -62,10 +62,12 @@ from fred_core.logs.log_setup import log_setup
62
62
  from fred_core.logs.memory_log_store import RamLogStore
63
63
  from fred_core.security.oidc import get_keycloak_client_id, get_keycloak_url
64
64
  from fred_core.security.structure import KeycloakUser
65
+ from fred_sdk.contracts.eval import EvalStep, EvalTrace
65
66
  from fred_sdk.contracts.context import (
66
67
  AgentInvocationRequest,
67
68
  AgentInvocationResult,
68
69
  BoundRuntimeContext,
70
+ ConversationTurn,
69
71
  PortableContext,
70
72
  PortableEnvironment,
71
73
  RuntimeContext,
@@ -558,8 +560,15 @@ class LocalRegistryAgentInvoker(AgentInvokerPort):
558
560
  is_error=True,
559
561
  )
560
562
 
561
- execute_request = _to_internal_request(
562
- _build_runtime_execute_request_from_invocation(request)
563
+ context_dict = request.context.model_dump(mode="json")
564
+ context_dict.setdefault("execution_action", ExecutionGrantAction.EXECUTE.value)
565
+ execute_request = _AgentExecuteRequest.model_construct(
566
+ agent_id=request.agent_id,
567
+ agent_instance_id=None,
568
+ message=request.message,
569
+ context=context_dict,
570
+ resume_payload=None,
571
+ invocation_turns=request.prior_turns,
563
572
  )
564
573
 
565
574
  content_parts: list[str] = []
@@ -741,6 +750,10 @@ class _AgentExecuteRequest(BaseModel):
741
750
  "LangGraph Command(resume=...) — the message field is ignored."
742
751
  ),
743
752
  )
753
+ invocation_turns: tuple[ConversationTurn, ...] = Field(
754
+ default=(),
755
+ description="Prior conversation turns forwarded by the calling agent.",
756
+ )
744
757
 
745
758
  @model_validator(mode="after")
746
759
  def _require_message_or_resume(self) -> "_AgentExecuteRequest":
@@ -766,13 +779,6 @@ class _AgentExecuteRequest(BaseModel):
766
779
  return self
767
780
 
768
781
 
769
- @dataclass(slots=True)
770
- class _PreparedRuntimeExecution:
771
- runtime: ReActRuntime | GraphRuntime
772
- execution_config: ExecutionConfig
773
- executor_input: Any
774
-
775
-
776
782
  def _to_internal_request(r: RuntimeExecuteRequest) -> "_AgentExecuteRequest":
777
783
  """
778
784
  Bridge a public RuntimeExecuteRequest to the internal execution model.
@@ -797,36 +803,6 @@ def _to_internal_request(r: RuntimeExecuteRequest) -> "_AgentExecuteRequest":
797
803
  )
798
804
 
799
805
 
800
- def _build_runtime_execute_request_from_invocation(
801
- request: AgentInvocationRequest,
802
- ) -> RuntimeExecuteRequest:
803
- """
804
- Project one in-process agent invocation onto the public execute contract.
805
-
806
- Why this exists:
807
- - pod-local agent-to-agent calls should follow the same request projection
808
- path as HTTP execution, rather than hand-constructing a second private
809
- request shape
810
- - future continuity fields should therefore land once on the typed runtime
811
- contract, then flow through both local and remote invocation paths
812
-
813
- How to use it:
814
- - call from `LocalRegistryAgentInvoker.invoke(...)`
815
- - pass the result through `_to_internal_request(...)` until the remaining
816
- internal helpers consume `RuntimeExecuteRequest` directly
817
-
818
- Example:
819
- - `runtime_request = _build_runtime_execute_request_from_invocation(request)`
820
- """
821
-
822
- return RuntimeExecuteRequest(
823
- agent_id=request.agent_id,
824
- input=request.message,
825
- session_id=request.context.session_id,
826
- runtime_context=request.context.model_dump(mode="json"),
827
- )
828
-
829
-
830
806
  class _AgentTemplateSummary(BaseModel):
831
807
  template_agent_id: str
832
808
  title: str
@@ -871,7 +847,7 @@ def _apply_runtime_tuning(
871
847
  - `definition = _apply_runtime_tuning(template_definition, resolution.tuning)`
872
848
  """
873
849
 
874
- update: dict[str, Any] = {
850
+ update: dict[str, object] = {
875
851
  "role": tuning.role,
876
852
  "description": tuning.description,
877
853
  "tags": tuple(tuning.tags),
@@ -881,11 +857,7 @@ def _apply_runtime_tuning(
881
857
  ),
882
858
  }
883
859
  system_prompt = tuning.values.get("prompts.system")
884
- if (
885
- isinstance(definition, ReActAgentDefinition)
886
- and isinstance(system_prompt, str)
887
- and system_prompt.strip()
888
- ):
860
+ if isinstance(system_prompt, str) and system_prompt.strip():
889
861
  update["system_prompt_template"] = system_prompt
890
862
  return definition.model_copy(update=update)
891
863
 
@@ -1416,6 +1388,120 @@ def _sse(payload: str) -> str:
1416
1388
  return f"data: {payload}\n\n"
1417
1389
 
1418
1390
 
1391
+ @dataclass(frozen=True)
1392
+ class _TurnOutcome:
1393
+ model_name: str | None
1394
+ finish_reason: str
1395
+ token_usage: dict[str, Any] | None
1396
+ input_tokens: int | None
1397
+ output_tokens: int | None
1398
+ tool_count: int
1399
+ is_error: bool
1400
+ total_ms: int
1401
+ final_content: str | None
1402
+
1403
+
1404
+ def _parse_turn_outcome(
1405
+ payloads: list[dict[str, Any]],
1406
+ turn_start: float,
1407
+ ) -> _TurnOutcome:
1408
+ total_ms = int((time.monotonic() - turn_start) * 1000)
1409
+ tool_count = sum(1 for p in payloads if p.get("kind") == "tool_call")
1410
+ final = next((p for p in reversed(payloads) if p.get("kind") == "final"), None)
1411
+ is_error = any(p.get("kind") == "execution_error" for p in payloads)
1412
+ token_usage: dict[str, Any] | None = final.get("token_usage") if final else None
1413
+ return _TurnOutcome(
1414
+ model_name=final.get("model_name") if final else None,
1415
+ finish_reason="error"
1416
+ if is_error
1417
+ else ((final.get("finish_reason") or "") if final else ""),
1418
+ token_usage=token_usage,
1419
+ input_tokens=token_usage.get("input_tokens") if token_usage else None,
1420
+ output_tokens=token_usage.get("output_tokens") if token_usage else None,
1421
+ tool_count=tool_count,
1422
+ is_error=is_error,
1423
+ total_ms=total_ms,
1424
+ final_content=(final.get("content") or None) if final else None,
1425
+ )
1426
+
1427
+
1428
+ def _build_eval_trace(
1429
+ payloads: list[dict[str, Any]],
1430
+ input_text: str,
1431
+ agent_id: str,
1432
+ session_id: str,
1433
+ turn_start: float,
1434
+ ) -> EvalTrace:
1435
+ outcome = _parse_turn_outcome(payloads, turn_start)
1436
+ steps: list[EvalStep] = []
1437
+ retrieval_context: list[str] = []
1438
+ tools_called: list[str] = []
1439
+ error: str | None = None
1440
+
1441
+ for p in payloads:
1442
+ kind = p.get("kind")
1443
+ if kind == "tool_call":
1444
+ steps.append(
1445
+ EvalStep(
1446
+ kind="tool_call",
1447
+ tool_name=p.get("tool_name"),
1448
+ call_id=p.get("call_id"),
1449
+ arguments=p.get("arguments") or {},
1450
+ )
1451
+ )
1452
+ if p.get("tool_name"):
1453
+ tools_called.append(p["tool_name"])
1454
+ elif kind == "tool_result":
1455
+ content = p.get("content", "")
1456
+ is_err = p.get("is_error", False)
1457
+ steps.append(
1458
+ EvalStep(
1459
+ kind="tool_result",
1460
+ tool_name=p.get("tool_name"),
1461
+ call_id=p.get("call_id"),
1462
+ content=content,
1463
+ is_error=is_err,
1464
+ )
1465
+ )
1466
+ if not is_err:
1467
+ sources = p.get("sources") or []
1468
+ if sources:
1469
+ retrieval_context.extend(
1470
+ s["content"] for s in sources if s.get("content")
1471
+ )
1472
+ elif content:
1473
+ retrieval_context.append(content)
1474
+ elif kind == "final":
1475
+ steps.append(EvalStep(kind="final", content=p.get("content")))
1476
+ elif kind == "node_error":
1477
+ steps.append(
1478
+ EvalStep(
1479
+ kind="node_error",
1480
+ node_id=p.get("node_id"),
1481
+ error_message=p.get("error_message"),
1482
+ )
1483
+ )
1484
+ elif kind == "awaiting_human":
1485
+ steps.append(EvalStep(kind="awaiting_human"))
1486
+ elif kind == "execution_error":
1487
+ error = p.get("message")
1488
+
1489
+ return EvalTrace(
1490
+ session_id=session_id,
1491
+ agent_id=agent_id,
1492
+ input=input_text,
1493
+ output=outcome.final_content,
1494
+ error=error,
1495
+ latency_ms=outcome.total_ms,
1496
+ model_name=outcome.model_name,
1497
+ token_usage=outcome.token_usage,
1498
+ finish_reason=outcome.finish_reason or None,
1499
+ steps=tuple(steps),
1500
+ retrieval_context=tuple(retrieval_context),
1501
+ tools_called=tuple(tools_called),
1502
+ )
1503
+
1504
+
1419
1505
  def _emit_turn_completed(
1420
1506
  container: PodApplicationContext,
1421
1507
  *,
@@ -1447,21 +1533,7 @@ def _emit_turn_completed(
1447
1533
  """
1448
1534
  try:
1449
1535
  kpi = get_runtime_context().get_kpi_writer()
1450
- total_ms = int((time.monotonic() - turn_start) * 1000)
1451
- tool_count = sum(1 for p in payloads if p.get("kind") == "tool_call")
1452
- final = next((p for p in reversed(payloads) if p.get("kind") == "final"), None)
1453
- is_error = any(p.get("kind") == "execution_error" for p in payloads)
1454
- model_name: str | None = final.get("model_name") if final else None
1455
- finish_reason: str = (
1456
- "error" if is_error else (final.get("finish_reason") or "") if final else ""
1457
- )
1458
- token_usage: dict[str, Any] | None = final.get("token_usage") if final else None
1459
- input_tokens: int | None = (
1460
- token_usage.get("input_tokens") if token_usage else None
1461
- )
1462
- output_tokens: int | None = (
1463
- token_usage.get("output_tokens") if token_usage else None
1464
- )
1536
+ outcome = _parse_turn_outcome(payloads, turn_start)
1465
1537
  runtime_id = get_runtime_context().config.service_name
1466
1538
 
1467
1539
  # Prometheus-safe dims: low-cardinality only.
@@ -1472,25 +1544,25 @@ def _emit_turn_completed(
1472
1544
  "team_id": team_id,
1473
1545
  "template_agent_id": template_agent_id,
1474
1546
  "runtime_id": runtime_id,
1475
- "model_name": model_name,
1476
- "finish_reason": finish_reason,
1547
+ "model_name": outcome.model_name,
1548
+ "finish_reason": outcome.finish_reason,
1477
1549
  }
1478
1550
 
1479
1551
  kpi.emit(
1480
1552
  name="agent.turn_completed",
1481
1553
  type="timer",
1482
- value=total_ms,
1554
+ value=outcome.total_ms,
1483
1555
  unit="ms",
1484
1556
  dims=prom_dims,
1485
1557
  quantities={
1486
- "tool_count": tool_count,
1487
- "input_tokens": input_tokens,
1488
- "output_tokens": output_tokens,
1558
+ "tool_count": outcome.tool_count,
1559
+ "input_tokens": outcome.input_tokens,
1560
+ "output_tokens": outcome.output_tokens,
1489
1561
  },
1490
1562
  actor=KPIActor(type="system"),
1491
1563
  )
1492
1564
 
1493
- if is_error:
1565
+ if outcome.is_error:
1494
1566
  kpi.emit(
1495
1567
  name="agent.turn_error_total",
1496
1568
  type="counter",
@@ -1507,12 +1579,12 @@ def _emit_turn_completed(
1507
1579
  "session_id": session_id,
1508
1580
  "exchange_id": exchange_id,
1509
1581
  "user_id": user_id,
1510
- "total_ms": total_ms,
1511
- "is_error": is_error,
1582
+ "total_ms": outcome.total_ms,
1583
+ "is_error": outcome.is_error,
1512
1584
  **prom_dims,
1513
- "tool_count": tool_count,
1514
- "input_tokens": input_tokens,
1515
- "output_tokens": output_tokens,
1585
+ "tool_count": outcome.tool_count,
1586
+ "input_tokens": outcome.input_tokens,
1587
+ "output_tokens": outcome.output_tokens,
1516
1588
  },
1517
1589
  )
1518
1590
  with container._kpi_turns_lock:
@@ -1604,42 +1676,7 @@ async def _stream(
1604
1676
  )
1605
1677
 
1606
1678
 
1607
- def _build_executor_input(
1608
- definition: ReActAgentDefinition | GraphAgentDefinition,
1609
- request: _AgentExecuteRequest,
1610
- ) -> Any:
1611
- """
1612
- Normalize one turn into the executor input expected by the selected runtime.
1613
-
1614
- Why this exists:
1615
- - `ReActRuntime` and `GraphRuntime` accept different input shapes
1616
- - resume turns also bypass normal message validation, so the mapping should
1617
- live in one helper instead of being repeated inline in the execution loop
1618
-
1619
- How to use it:
1620
- - call while assembling one prepared runtime execution
1621
- - pass the returned object unchanged to `executor.stream(...)`
1622
-
1623
- Example:
1624
- - `executor_input = _build_executor_input(definition, request)`
1625
- """
1626
-
1627
- if isinstance(definition, GraphAgentDefinition):
1628
- input_cls = definition.input_model()
1629
- if request.resume_payload is not None:
1630
- return input_cls.model_construct(message="")
1631
- return input_cls.model_validate({"message": request.message or ""})
1632
-
1633
- return ReActInput(
1634
- messages=(
1635
- ()
1636
- if request.resume_payload is not None
1637
- else (ReActMessage(role=ReActMessageRole.USER, content=request.message),)
1638
- ),
1639
- )
1640
-
1641
-
1642
- def _prepare_runtime_execution(
1679
+ async def _iterate_runtime_event_payloads(
1643
1680
  definition: ReActAgentDefinition | GraphAgentDefinition,
1644
1681
  request: _AgentExecuteRequest,
1645
1682
  access_token: str | None = None,
@@ -1647,24 +1684,26 @@ def _prepare_runtime_execution(
1647
1684
  team_id: str | None = None,
1648
1685
  registry: Mapping[str, ReActAgentDefinition | GraphAgentDefinition] | None = None,
1649
1686
  exchange_id: str | None = None,
1650
- ) -> _PreparedRuntimeExecution:
1687
+ ) -> AsyncIterator[dict[str, Any]]:
1651
1688
  """
1652
- Build the bound runtime, executor input, and execution config for one turn.
1689
+ Execute one agent turn and yield runtime-event payloads as JSON-ready dicts.
1653
1690
 
1654
- Why this exists:
1655
- - `execute`, `execute/stream`, and in-process agent invocation all converge
1656
- on `_iterate_runtime_event_payloads`, so this is the narrowest place to
1657
- centralize request projection before memory fields are added
1658
- - it removes one long block of binding/runtime setup from the event loop and
1659
- gives future continuity fields a single place to enter the runtime stack
1691
+ Why this helper exists:
1692
+ - both `/agents/execute` and `/agents/execute/stream` share the same runtime
1693
+ wiring and event production path
1694
+ - keeping the generator payload-oriented lets the HTTP layer choose whether
1695
+ it renders SSE or returns a terminal JSON response
1660
1696
 
1661
- How to use it:
1662
- - call from `_iterate_runtime_event_payloads(...)`
1663
- - activate the returned runtime, obtain its executor, then stream with the
1664
- returned `executor_input` and `execution_config`
1697
+ team_id:
1698
+ - callers are responsible for resolving the effective team before calling this
1699
+ function; see _stream() for the standalone "personal" default logic
1700
+ - None is accepted for agent-to-agent (AgentInvoker) invocations where no
1701
+ team scope is required
1665
1702
 
1666
- Example:
1667
- - `prepared = _prepare_runtime_execution(definition, request, team_id="fredlab")`
1703
+ access_token:
1704
+ - the user's JWT forwarded via the Authorization header
1705
+ - stored in RuntimeContext so KF tool adapters can use it for outbound calls
1706
+ - None in local dev when security is disabled
1668
1707
  """
1669
1708
 
1670
1709
  request_id = str(uuid4())
@@ -1723,6 +1762,7 @@ def _prepare_runtime_execution(
1723
1762
  runtime_context=runtime_context,
1724
1763
  portable_context=portable_context,
1725
1764
  )
1765
+
1726
1766
  services = _build_runtime_services(
1727
1767
  definition,
1728
1768
  binding,
@@ -1730,9 +1770,8 @@ def _prepare_runtime_execution(
1730
1770
  registry=registry,
1731
1771
  access_token=access_token,
1732
1772
  )
1733
- runtime: ReActRuntime | GraphRuntime
1734
1773
  if isinstance(definition, GraphAgentDefinition):
1735
- runtime = GraphRuntime(
1774
+ runtime: ReActRuntime | GraphRuntime = GraphRuntime(
1736
1775
  definition=definition,
1737
1776
  services=services,
1738
1777
  )
@@ -1750,59 +1789,42 @@ def _prepare_runtime_execution(
1750
1789
  session_id=ctx.get("session_id") or request_id,
1751
1790
  checkpoint_id=request.checkpoint_id,
1752
1791
  resume_payload=request.resume_payload,
1753
- )
1754
- return _PreparedRuntimeExecution(
1755
- runtime=runtime,
1756
- execution_config=execution_config,
1757
- executor_input=_build_executor_input(definition, request),
1758
- )
1759
-
1760
-
1761
- async def _iterate_runtime_event_payloads(
1762
- definition: ReActAgentDefinition | GraphAgentDefinition,
1763
- request: _AgentExecuteRequest,
1764
- access_token: str | None = None,
1765
- *,
1766
- team_id: str | None = None,
1767
- registry: Mapping[str, ReActAgentDefinition | GraphAgentDefinition] | None = None,
1768
- exchange_id: str | None = None,
1769
- ) -> AsyncIterator[dict[str, Any]]:
1770
- """
1771
- Execute one agent turn and yield runtime-event payloads as JSON-ready dicts.
1772
-
1773
- Why this helper exists:
1774
- - both `/agents/execute` and `/agents/execute/stream` share the same runtime
1775
- wiring and event production path
1776
- - keeping the generator payload-oriented lets the HTTP layer choose whether
1777
- it renders SSE or returns a terminal JSON response
1778
-
1779
- team_id:
1780
- - callers are responsible for resolving the effective team before calling this
1781
- function; see _stream() for the standalone "personal" default logic
1782
- - None is accepted for agent-to-agent (AgentInvoker) invocations where no
1783
- team scope is required
1784
-
1785
- access_token:
1786
- - the user's JWT forwarded via the Authorization header
1787
- - stored in RuntimeContext so KF tool adapters can use it for outbound calls
1788
- - None in local dev when security is disabled
1789
- """
1790
- prepared = _prepare_runtime_execution(
1791
- definition,
1792
- request,
1793
- access_token=access_token,
1794
- team_id=team_id,
1795
- registry=registry,
1796
- exchange_id=exchange_id,
1792
+ invocation_turns=getattr(request, "invocation_turns", ()),
1797
1793
  )
1798
1794
 
1799
1795
  try:
1800
- await prepared.runtime.activate()
1801
- executor = await prepared.runtime.get_executor()
1802
- async for event in executor.stream(
1803
- prepared.executor_input,
1804
- prepared.execution_config,
1805
- ):
1796
+ await runtime.activate()
1797
+ executor = await runtime.get_executor()
1798
+ if isinstance(definition, GraphAgentDefinition):
1799
+ # Graph agents receive their typed input schema; the agent's
1800
+ # build_turn_state() maps it to graph state before the first node runs.
1801
+ # The standard contract is a single "message" field in the input schema.
1802
+ # On a HITL resume the runtime ignores input entirely (state is loaded
1803
+ # from the checkpoint), so bypass validation with model_construct.
1804
+ input_cls = definition.input_model()
1805
+ if request.resume_payload is not None:
1806
+ graph_input = input_cls.model_construct(message="")
1807
+ else:
1808
+ graph_input = input_cls.model_validate(
1809
+ {"message": request.message or ""}
1810
+ )
1811
+ executor_input: ReActInput | object = graph_input
1812
+ else:
1813
+ # On HITL resume, messages are ignored by the codec — the graph
1814
+ # resumes from its checkpointed interrupt via Command(resume=...).
1815
+ # On a normal turn, the user message is the only input.
1816
+ executor_input = ReActInput(
1817
+ messages=(
1818
+ ()
1819
+ if request.resume_payload is not None
1820
+ else (
1821
+ ReActMessage(
1822
+ role=ReActMessageRole.USER, content=request.message
1823
+ ),
1824
+ )
1825
+ ),
1826
+ )
1827
+ async for event in executor.stream(executor_input, execution_config):
1806
1828
  payload = event.model_dump(mode="json")
1807
1829
  if not isinstance(payload, dict):
1808
1830
  raise RuntimeError(
@@ -1815,7 +1837,7 @@ async def _iterate_runtime_event_payloads(
1815
1837
  )
1816
1838
  yield RuntimeErrorEvent(message=str(exc)).model_dump(mode="json")
1817
1839
  finally:
1818
- await prepared.runtime.dispose()
1840
+ await runtime.dispose()
1819
1841
 
1820
1842
 
1821
1843
  def _terminal_execute_payload(
@@ -2418,6 +2440,113 @@ def _build_agent_router(
2418
2440
  )
2419
2441
  return _terminal_execute_payload(payloads)
2420
2442
 
2443
+ @router.post(
2444
+ "/evaluate",
2445
+ response_model=EvalTrace,
2446
+ )
2447
+ async def evaluate(
2448
+ request: RuntimeExecuteRequest,
2449
+ http_request: Request,
2450
+ authenticated_user: KeycloakUser | None = Depends(_authenticated_user),
2451
+ container: PodApplicationContext = Depends(get_pod_container),
2452
+ ) -> EvalTrace:
2453
+ """
2454
+ Execute one agent turn and return a complete EvalTrace as JSON.
2455
+
2456
+ POST <configured base_url>/agents/evaluate
2457
+ Authorization: Bearer <user JWT>
2458
+ Body: RuntimeExecuteRequest
2459
+ Response: EvalTrace — synchronous, no SSE, no Langfuse dependency
2460
+
2461
+ Intended for evaluation harnesses (DeepEval, Promptfoo) that need
2462
+ input, output, retrieval_context, tools_called, and steps in one response.
2463
+ """
2464
+ auth = http_request.headers.get("Authorization", "")
2465
+ access_token = auth.removeprefix("Bearer ").strip() or None
2466
+
2467
+ expected_action = _expected_execution_action(request)
2468
+
2469
+ try:
2470
+ validate_execution_grant(request, expected_action=expected_action)
2471
+ except ExecutionGrantViolation as exc:
2472
+ _emit_audit_event(
2473
+ container,
2474
+ "warning",
2475
+ "grant_validation_failed",
2476
+ agent_instance_id=request.agent_instance_id,
2477
+ user_id=request.effective_user_id(),
2478
+ action=expected_action.value,
2479
+ reason=str(exc),
2480
+ )
2481
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(exc))
2482
+ if request.execution_grant is not None:
2483
+ _emit_audit_event(
2484
+ container,
2485
+ "info",
2486
+ "grant_validated",
2487
+ agent_instance_id=request.agent_instance_id,
2488
+ user_id=request.effective_user_id(),
2489
+ action=expected_action.value,
2490
+ )
2491
+ _validate_grant_user_correlation(request, authenticated_user, container)
2492
+ await _validate_session_checkpoint_access(request)
2493
+
2494
+ exchange_id = str(uuid4())
2495
+ turn_start = time.monotonic()
2496
+ internal_req = _to_internal_request(request)
2497
+ target = await _resolve_agent_instance(
2498
+ request=internal_req,
2499
+ registry=registry,
2500
+ access_token=access_token,
2501
+ control_plane_url=get_runtime_context().config.control_plane_url,
2502
+ )
2503
+ payloads = [
2504
+ payload
2505
+ async for payload in _iterate_runtime_event_payloads(
2506
+ target.definition,
2507
+ internal_req,
2508
+ access_token=access_token,
2509
+ team_id=target.team_id,
2510
+ registry=registry,
2511
+ exchange_id=exchange_id,
2512
+ )
2513
+ ]
2514
+ session_id: str | None = request.effective_session_id()
2515
+ eval_session_id = session_id or str(uuid4())
2516
+ user_id_str = request.effective_user_id() or "unknown"
2517
+ _emit_turn_completed(
2518
+ container,
2519
+ session_id=session_id,
2520
+ exchange_id=exchange_id,
2521
+ user_id=user_id_str,
2522
+ team_id=target.team_id,
2523
+ agent_instance_id=request.agent_instance_id,
2524
+ template_agent_id=target.definition.agent_id,
2525
+ payloads=payloads,
2526
+ turn_start=turn_start,
2527
+ )
2528
+ if session_id:
2529
+ history_store = get_runtime_context().config.history_store
2530
+ if history_store is not None:
2531
+ await _write_turn_history(
2532
+ session_id=session_id,
2533
+ user_id=user_id_str,
2534
+ request_message=request.input,
2535
+ payloads=payloads,
2536
+ history_store=history_store,
2537
+ team_id=target.team_id,
2538
+ agent_instance_id=request.agent_instance_id,
2539
+ exchange_id=exchange_id,
2540
+ resume_payload=request.resume_payload,
2541
+ )
2542
+ return _build_eval_trace(
2543
+ payloads=payloads,
2544
+ input_text=request.input or "",
2545
+ agent_id=target.definition.agent_id,
2546
+ session_id=eval_session_id,
2547
+ turn_start=turn_start,
2548
+ )
2549
+
2421
2550
  @router.post(
2422
2551
  "/execute/stream",
2423
2552
  )
@@ -2,8 +2,10 @@ from .completion import completion_candidates
2
2
  from .entrypoint import build_parser, main
3
3
  from .history_display import (
4
4
  build_hitl_resume_payload,
5
+ print_eval_trace,
5
6
  print_history,
6
7
  print_runtime_event,
8
+ run_eval_turn,
7
9
  run_single_turn,
8
10
  )
9
11
  from .kpi_display import (
@@ -19,6 +21,8 @@ from .kpi_display import (
19
21
  from .pod_client import DEFAULT_AGENT_POD_BASE_URL, AgentPodClient
20
22
  from .repl import run_interactive_chat
21
23
  from .repl_helpers import (
24
+ ExecutionMode,
25
+ execution_mode_color,
22
26
  execution_mode_label,
23
27
  fmt_bytes,
24
28
  parse_mode_command,
@@ -33,6 +37,7 @@ from .url_helpers import (
33
37
  __all__ = [
34
38
  "AgentPodClient",
35
39
  "DEFAULT_AGENT_POD_BASE_URL",
40
+ "ExecutionMode",
36
41
  "HistogramSeriesSummary",
37
42
  "PrometheusSample",
38
43
  "build_hitl_resume_payload",
@@ -40,7 +45,9 @@ __all__ = [
40
45
  "completion_candidates",
41
46
  "default_agent_metrics_url",
42
47
  "default_agent_pod_base_url",
48
+ "execution_mode_color",
43
49
  "execution_mode_label",
50
+ "print_eval_trace",
44
51
  "filter_prometheus_samples",
45
52
  "fmt_bytes",
46
53
  "format_metric_value",
@@ -53,6 +60,7 @@ __all__ = [
53
60
  "print_history",
54
61
  "print_runtime_event",
55
62
  "render_kpi_report",
63
+ "run_eval_turn",
56
64
  "run_interactive_chat",
57
65
  "run_single_turn",
58
66
  "summarize_prometheus_histograms",
@@ -49,7 +49,7 @@ def completion_candidates(
49
49
  return [sid for sid in session_ids if sid.startswith(prefix)]
50
50
  if stripped.startswith("/mode "):
51
51
  prefix = stripped.removeprefix("/mode ").strip()
52
- return [mode for mode in ("final", "stream") if mode.startswith(prefix)]
52
+ return [mode for mode in ("eval", "final", "stream") if mode.startswith(prefix)]
53
53
  if stripped.startswith("/"):
54
54
  return complete_slash_commands(stripped, commands=_COMMANDS)
55
55
  return []