PyPI - fruxon - Versions diffs - 0.7.1__tar.gz → 0.7.2__tar.gz - Mend

fruxon 0.7.1tar.gz → 0.7.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

{fruxon-0.7.1 → fruxon-0.7.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fruxon
-Version: 0.7.1
+Version: 0.7.2
 Summary: The Fruxon SDK is a lightweight Python client for integrating with the Fruxon platform.
 Project-URL: bugs, https://github.com/fruxon-ai/fruxon-sdk/issues
 Project-URL: changelog, https://github.com/fruxon-ai/fruxon-sdk/blob/main/HISTORY.md

{fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '0.7.1'
-__version_tuple__ = version_tuple = (0, 7, 1)
+__version__ = version = '0.7.2'
+__version_tuple__ = version_tuple = (0, 7, 2)
 __commit_id__ = commit_id = None

{fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/run.py RENAMED Viewed

@@ -439,7 +439,7 @@ def _run_stream(
 # Bumped when the NDJSON event shape changes in a backwards-incompatible
 # way. Emitted on the very first event so an agent driver can detect
 # version drift without parsing the whole stream first.
-_STREAM_SCHEMA_VERSION = 1
+_STREAM_SCHEMA_VERSION = 2  # bumped: tool_call/result shape + usage/status/HITL
 def _emit_ndjson(record: dict) -> None:
@@ -466,24 +466,51 @@ def _run_stream_ndjson(
     parse the stream incrementally with ``readline()`` + ``json.loads``
     instead of waiting for the run to finish.
-    **Event shape** (stable — schema_version bumped on breaking changes):
+    **Event shape** (stable — schema_version bumped on breaking changes).
+    Mirrors the backend's ``SseWriter`` (see ``Fruxon.Model/Modules/Streaming/
+    SseWriter.cs``) with flat snake_case field names so an LLM driver doesn't
+    have to learn the camelCase wire shape:
-    * ``{"type":"start","schema_version":1,"agent":"<slug>"}``
+    * ``{"type":"start","schema_version":2,"agent":"<slug>"}``
       Always first. Lets the consumer pin the parser version up-front.
     * ``{"type":"text","delta":"..."}``
       Streamed text chunk. Concatenate every ``delta`` in arrival order
       to reconstruct the response body.
-    * ``{"type":"tool_call","id":"...","name":"...","arguments":{...}}``
+    * ``{"type":"tool_call","id":"...","name":"...","display_name":"...",
+       "integration_id":"...","tool_type":"...","arguments":{...},
+       "start_time_ms":<int>?}``
       The agent invoked a tool. ``id`` correlates with the matching
-      ``tool_result``.
-    * ``{"type":"tool_result","id":"...","result":<any>,"error":"..."?,
-       "duration_ms":<int>?}``
-      Tool call completed. ``error`` is set instead of ``result`` on
-      failure.
-    * ``{"type":"done","record_id":"...","duration_ms":<int>?,
-       "total_cost":<float>?,"agent":"<slug>"}``
+      ``tool_result``. ``name`` is the tool's wire id; ``display_name``
+      is the human label. ``integration_id`` is the empty string ``""``
+      for built-in (provider-native) tools and the integration slug
+      otherwise.
+    * ``{"type":"tool_result","id":"...","status":"succeeded|failed|...",
+       "result":<any>?, "end_time_ms":<int>?, "duration_ms":<int>?}``
+      Tool call completed. ``status`` is the authoritative pass/fail
+      signal (matches the backend's ``ToolStatus``). ``result`` carries
+      the tool's return payload (string for HTTP, structured for code
+      tools).
+    * ``{"type":"step_trace","id":"...","name":"...","step_type":"...",
+       "status":"...","duration_ms":<int>?}``
+      A flow step finished. Only emitted on the test-stream path
+      (``fruxon agents test``), not on production runs.
+    * ``{"type":"status","status":"..."}``
+      Backend state change. Surfaced so a driver knows whether the
+      stream is still progressing or paused for HITL approval.
+    * ``{"type":"usage","input_tokens":<int>,"output_tokens":<int>,
+       "cached_tokens":<int>,"thinking_tokens":<int>}``
+      Token accounting. Emitted near the end of the stream; pair with
+      ``done.total_cost`` for the cost picture.
+    * ``{"type":"done","record_id":"...","agent":"<slug>",
+       "session_id":"..."?,"duration_ms":<int>?,"total_cost":<float>?,
+       "input_cost":<float>?,"output_cost":<float>?,
+       "agent_revision":<int>?}``
       Terminal event on success. ``record_id`` is the handle for
       ``fruxon trace <agent> <record_id>``.
+    * ``{"type":"done","status":"waiting_for_human","record_id":"...",
+       "session_id":"...","human_approval_request_id":"..."}``
+      Terminal event when the run paused for HITL approval — same
+      ``done`` type, distinguished by the ``status`` field.
     * ``{"type":"error","message":"...","code":"..."?}``
       Terminal event on failure. Followed by process exit with
       :data:`EXIT_SERVER`. Also emits the standard agent-mode error
@@ -506,42 +533,119 @@ def _run_stream_ndjson(
                 continue
             if event.event == "tool_call":
-                # Server payloads use ``arguments`` or ``args`` historically;
-                # normalize to ``arguments`` so downstream parsers don't
-                # have to handle both. ``id`` is the correlator with the
-                # later ``tool_result`` — agents stitch them on this key.
-                args = event.data.get("arguments")
-                if args is None:
-                    args = event.data.get("args")
-                _emit_ndjson(
-                    {
-                        "type": "tool_call",
-                        "id": event.data.get("id") or event.data.get("toolCallId"),
-                        "name": event.data.get("name") or event.data.get("toolName"),
-                        "arguments": args,
-                    }
-                )
+                # The backend nests tool identity under ``toolTrace``:
+                # ``{toolTrace: {tool: {name, integrationId}, displayName,
+                # toolType, parameters}, arguments, startTime, id}``. We
+                # flatten the agent-relevant fields up so a driver
+                # doesn't have to navigate two levels of nesting.
+                trace = event.data.get("toolTrace") if isinstance(event.data.get("toolTrace"), dict) else {}
+                tool_inner = trace.get("tool") if isinstance(trace.get("tool"), dict) else {}
+                payload: dict = {
+                    "type": "tool_call",
+                    "id": event.data.get("id"),
+                    "name": tool_inner.get("name") if isinstance(tool_inner, dict) else None,
+                    "arguments": event.data.get("arguments"),
+                }
+                # Optional fields: include only when present so the
+                # record stays compact when the server doesn't supply
+                # them (older backends, simplified payloads).
+                display_name = trace.get("displayName") if isinstance(trace, dict) else None
+                if isinstance(display_name, str) and display_name:
+                    payload["display_name"] = display_name
+                integration_id = tool_inner.get("integrationId") if isinstance(tool_inner, dict) else None
+                if isinstance(integration_id, str):
+                    # ``""`` is meaningful — marks built-in (provider-
+                    # native) tools. Include it explicitly.
+                    payload["integration_id"] = integration_id
+                tool_type = trace.get("toolType") if isinstance(trace, dict) else None
+                if isinstance(tool_type, str) and tool_type:
+                    payload["tool_type"] = tool_type
+                start_time = event.data.get("startTime")
+                if isinstance(start_time, (int, float)):
+                    payload["start_time_ms"] = int(start_time)
+                _emit_ndjson(payload)
                 continue
             if event.event == "tool_result":
-                payload: dict = {
+                # Backend payload: ``{id, result, endTime, status}``.
+                # ``status`` (``succeeded|failed|cancelled|...``) is the
+                # authoritative pass/fail signal — surface it directly
+                # so an agent doesn't have to inspect ``result`` shape
+                # to know what happened.
+                payload = {
                     "type": "tool_result",
-                    "id": event.data.get("id") or event.data.get("toolCallId"),
+                    "id": event.data.get("id"),
                 }
-                if "error" in event.data and event.data.get("error"):
-                    payload["error"] = event.data["error"]
-                else:
-                    payload["result"] = event.data.get("result")
+                status = event.data.get("status")
+                if isinstance(status, str) and status:
+                    payload["status"] = status
+                if "result" in event.data:
+                    payload["result"] = event.data["result"]
+                end_time = event.data.get("endTime")
+                if isinstance(end_time, (int, float)):
+                    payload["end_time_ms"] = int(end_time)
+                # Some older paths used ``durationMs`` / ``duration``
+                # — keep that fallback so a mixed-version backend
+                # doesn't drop the field.
                 duration = event.data.get("durationMs") or event.data.get("duration")
                 if isinstance(duration, (int, float)):
                     payload["duration_ms"] = int(duration)
                 _emit_ndjson(payload)
                 continue
+            if event.event == "step_trace":
+                # Test-stream only — emitted when each flow step finishes.
+                # Lets a CI gate or an agent driver tell which step did
+                # what (cost attribution, debugging a broken flow).
+                payload = {
+                    "type": "step_trace",
+                    "id": event.data.get("id"),
+                }
+                for src_key, dst_key in (
+                    ("displayName", "name"),
+                    ("type", "step_type"),
+                    ("status", "status"),
+                ):
+                    val = event.data.get(src_key)
+                    if isinstance(val, str) and val:
+                        payload[dst_key] = val
+                duration = event.data.get("duration")
+                if isinstance(duration, (int, float)):
+                    payload["duration_ms"] = int(duration)
+                _emit_ndjson(payload)
+                continue
+            if event.event == "status":
+                # Backend state change — used for HITL "pausing for
+                # approval" and similar transitions. Pass the raw
+                # status string through; the set is small enough
+                # that we don't need to normalize.
+                status_val = event.data.get("status")
+                if isinstance(status_val, str) and status_val:
+                    _emit_ndjson({"type": "status", "status": status_val})
+                continue
+            if event.event == "usage":
+                # Token accounting at end of stream. Surfaces all four
+                # buckets the backend reports so an agent driver can
+                # roll its own per-bucket cost calc if needed.
+                usage_payload: dict = {"type": "usage"}
+                for src_key, dst_key in (
+                    ("inputTokens", "input_tokens"),
+                    ("outputTokens", "output_tokens"),
+                    ("cachedTokens", "cached_tokens"),
+                    ("thinkingTokens", "thinking_tokens"),
+                ):
+                    val = event.data.get(src_key)
+                    if isinstance(val, (int, float)):
+                        usage_payload[dst_key] = int(val)
+                _emit_ndjson(usage_payload)
+                continue
             if event.event == "error":
                 message = event.data.get("message") or "Unknown error"
                 code = event.data.get("code")
-                err_record = {"type": "error", "message": message}
+                err_record: dict = {"type": "error", "message": message}
                 if code:
                     err_record["code"] = code
                 _emit_ndjson(err_record)
@@ -552,25 +656,47 @@ def _run_stream_ndjson(
                 fail(message, code=EXIT_SERVER)
             if event.event == "done":
-                trace = event.data.get("trace") if isinstance(event.data.get("trace"), dict) else {}
+                # Two flavors of done:
+                # 1. Normal completion — carries the full ``trace`` envelope
+                #    (duration, costs, step tree). We flatten the headline
+                #    fields up.
+                # 2. HITL suspension — carries ``status: "WaitingForHuman"``
+                #    and a ``humanApprovalRequestId`` instead of a trace.
+                #    Surfaced under the same ``type: "done"`` so a driver's
+                #    end-of-stream loop is a single branch, distinguished
+                #    by the ``status`` field.
+                hitl_status = event.data.get("status")
                 done: dict = {
                     "type": "done",
                     "agent": agent,
                     "record_id": event.data.get("executionRecordId"),
                 }
-                duration = trace.get("duration") if isinstance(trace, dict) else None
-                if isinstance(duration, (int, float)):
-                    done["duration_ms"] = int(duration)
-                total_cost = trace.get("totalCost") if isinstance(trace, dict) else None
-                if isinstance(total_cost, (int, float)):
-                    done["total_cost"] = float(total_cost)
+                session_id = event.data.get("sessionId")
+                if isinstance(session_id, str) and session_id:
+                    done["session_id"] = session_id
+                if isinstance(hitl_status, str) and hitl_status.lower() == "waitingforhuman":
+                    done["status"] = "waiting_for_human"
+                    hitl_id = event.data.get("humanApprovalRequestId")
+                    if isinstance(hitl_id, str) and hitl_id:
+                        done["human_approval_request_id"] = hitl_id
+                    _emit_ndjson(done)
+                    continue
+                trace = event.data.get("trace") if isinstance(event.data.get("trace"), dict) else {}
+                for src_key, dst_key, cast in (
+                    ("duration", "duration_ms", int),
+                    ("totalCost", "total_cost", float),
+                    ("inputCost", "input_cost", float),
+                    ("outputCost", "output_cost", float),
+                    ("agentRevision", "agent_revision", int),
+                ):
+                    val = trace.get(src_key) if isinstance(trace, dict) else None
+                    if isinstance(val, (int, float)):
+                        done[dst_key] = cast(val)
                 _emit_ndjson(done)
                 continue
-            # Other event types (usage, status, step_trace) are intentionally
-            # dropped. See the docstring — surfacing unknown shapes forces
-            # every driver to handle fields it doesn't know.
     except FruxonError as e:
         # Stream-opening or mid-stream API failure. Emit the ``error``
         # NDJSON record on stdout so callers reading the stream see

{fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/fruxon-agent-mode/SKILL.md RENAMED Viewed

@@ -119,19 +119,59 @@ fruxon run my-agent -p user_query="hello"
 Emits one JSON record per line on stdout. Frame:
 ```json
-{"type":"start","schema_version":1,"agent":"my-agent"}
+{"type":"start","schema_version":2,"agent":"my-agent"}
 {"type":"text","delta":"Hel"}
 {"type":"text","delta":"lo."}
-{"type":"tool_call","id":"tc-1","name":"search","arguments":{"q":"x"}}
-{"type":"tool_result","id":"tc-1","result":{"hits":3},"duration_ms":42}
-{"type":"done","agent":"my-agent","record_id":"rec-99","duration_ms":1234,"total_cost":0.0012}
+{"type":"tool_call","id":"tc-1","name":"search","display_name":"GitHub search",
+  "integration_id":"github","tool_type":"Api","arguments":{"q":"x"},
+  "start_time_ms":1700000000000}
+{"type":"tool_result","id":"tc-1","status":"succeeded","result":{"hits":3},
+  "end_time_ms":1700000000042}
+{"type":"usage","input_tokens":100,"output_tokens":250,
+  "cached_tokens":30,"thinking_tokens":5}
+{"type":"done","agent":"my-agent","record_id":"rec-99","session_id":"sess-1",
+  "duration_ms":1234,"total_cost":0.0012,"input_cost":0.0008,
+  "output_cost":0.0004,"agent_revision":7}
 ```
 Stream `text.delta` strings in arrival order to reconstruct the
 response body. Match `tool_result.id` to the corresponding
-`tool_call.id`. The `done` record carries the `record_id` you'll pass
-to `fruxon trace` for post-mortem inspection. On failure, a single
-`{"type":"error","message":...}` record is emitted before exit.
+`tool_call.id`. Branch on `tool_result.status` (`succeeded` /
+`failed` / `cancelled`) for the authoritative pass/fail signal —
+don't infer from `result` shape. The `done` record carries the
+`record_id` you'll pass to `fruxon trace` for post-mortem inspection.
+**Event types you'll see** (all on a single ``run`` may overlap):
+| `type` | When | Key fields |
+|---|---|---|
+| `start` | First | `schema_version`, `agent` |
+| `text` | LLM streamed text | `delta` |
+| `tool_call` | Agent dispatched a tool | `id`, `name`, `arguments`, `integration_id`, `tool_type` |
+| `tool_result` | Tool finished | `id`, `status`, `result`, `end_time_ms` |
+| `status` | Backend state change | `status` |
+| `usage` | Near end of stream | `input_tokens`, `output_tokens`, `cached_tokens`, `thinking_tokens` |
+| `done` | Terminal | `record_id`, `session_id`, `duration_ms`, costs, `agent_revision` |
+| `error` | Terminal on failure | `message`, `code`? |
+**HITL pause.** If the run paused for human approval, `done` carries
+`status: "waiting_for_human"` and `human_approval_request_id` instead
+of the trace fields — same `type: "done"`, distinguished by the
+`status` field:
+```json
+{"type":"done","agent":"my-agent","record_id":"rec-99","session_id":"sess-1",
+  "status":"waiting_for_human","human_approval_request_id":"har-7"}
+```
+**Step traces.** `fruxon agents test` additionally emits
+`{"type":"step_trace","id":"…","name":"…","step_type":"LlmStep",
+"status":"succeeded","duration_ms":1234}` when each flow step
+finishes — useful for CI gates that need per-step cost attribution.
+On failure, a single `{"type":"error","message":"…","code":"…"?}`
+record is emitted before exit. The schema_version field on `start`
+is the parser-pinning point: bump = breaking shape change.
 ## Exit codes — typed, stable, sufficient for retry logic

{fruxon-0.7.1 → fruxon-0.7.2}/tests/test_cli.py RENAMED Viewed

@@ -3240,7 +3240,7 @@ class TestRunAgentModeNdjson:
         lines = self._parse_lines(result.stdout)
         # Frame: start → text* → done
         assert lines[0]["type"] == "start"
-        assert lines[0]["schema_version"] == 1
+        assert lines[0]["schema_version"] == 2
         assert lines[0]["agent"] == "my-agent"
         assert [line_["delta"] for line_ in lines if line_["type"] == "text"] == ["Hel", "lo!"]
         done = lines[-1]
@@ -3250,9 +3250,12 @@ class TestRunAgentModeNdjson:
         assert done["total_cost"] == pytest.approx(0.0012)
     def test_stream_normalizes_tool_call_and_result_correlation(self, runner, monkeypatch):
-        """The NDJSON contract uses ``arguments`` (not ``args``) and a
-        stable ``id`` field so an LLM driver can stitch each tool_result
-        back to its tool_call deterministically."""
+        """The NDJSON contract flattens the backend's nested
+        ``toolTrace.tool`` shape into top-level ``name`` /
+        ``integration_id`` / ``tool_type`` so an LLM driver doesn't
+        have to navigate two levels of nesting. The stable ``id`` field
+        correlates each ``tool_result`` back to its ``tool_call``.
+        """
         credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
         monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
@@ -3261,13 +3264,31 @@ class TestRunAgentModeNdjson:
         self._stub_stream(
             monkeypatch,
             [
+                # Real backend shape: identity nested under ``toolTrace``,
+                # ``arguments`` flat at the top, ``startTime`` epoch ms.
                 StreamEvent(
                     event="tool_call",
-                    data={"id": "tc-1", "name": "search", "args": {"q": "x"}},
+                    data={
+                        "id": "tc-1",
+                        "toolTrace": {
+                            "tool": {"name": "search", "integrationId": "github"},
+                            "displayName": "GitHub search",
+                            "toolType": "Api",
+                        },
+                        "arguments": {"q": "x"},
+                        "startTime": 1700000000000,
+                    },
                 ),
+                # Real tool_result shape: ``status`` is the pass/fail
+                # signal; ``endTime`` is the completion epoch ms.
                 StreamEvent(
                     event="tool_result",
-                    data={"id": "tc-1", "result": {"hits": 3}, "durationMs": 42},
+                    data={
+                        "id": "tc-1",
+                        "result": {"hits": 3},
+                        "status": "succeeded",
+                        "endTime": 1700000000042,
+                    },
                 ),
                 StreamEvent(event="done", data={"executionRecordId": "rec-1", "trace": {}}),
             ],
@@ -3277,12 +3298,23 @@ class TestRunAgentModeNdjson:
         lines = self._parse_lines(result.stdout)
         call = next(ln for ln in lines if ln["type"] == "tool_call")
-        assert call == {"type": "tool_call", "id": "tc-1", "name": "search", "arguments": {"q": "x"}}
+        assert call["id"] == "tc-1"
+        # Tool identity surfaces at the top level — agent shouldn't
+        # have to descend toolTrace.tool.* on its side.
+        assert call["name"] == "search"
+        assert call["display_name"] == "GitHub search"
+        assert call["integration_id"] == "github"
+        assert call["tool_type"] == "Api"
+        assert call["arguments"] == {"q": "x"}
+        assert call["start_time_ms"] == 1700000000000
         res = next(ln for ln in lines if ln["type"] == "tool_result")
         assert res["id"] == "tc-1"
         assert res["result"] == {"hits": 3}
-        assert res["duration_ms"] == 42
+        # ``status`` is the authoritative pass/fail signal — driver
+        # branches on this directly, no result-shape inspection needed.
+        assert res["status"] == "succeeded"
+        assert res["end_time_ms"] == 1700000000042
     def test_stream_error_event_emits_error_record_and_exits_server(self, runner, monkeypatch):
         """A server-side ``error`` event lands as a terminal NDJSON record
@@ -3316,6 +3348,126 @@ class TestRunAgentModeNdjson:
         assert envelope["error"]["code"] == "server_error"
         assert envelope["error"]["exit_code"] == EXIT_SERVER
+    def test_stream_surfaces_usage_event(self, runner, monkeypatch):
+        """The backend emits ``event: usage`` with token counts near
+        the end of every run — surface it as its own NDJSON record so
+        an agent driver can cost-account without parsing the trace
+        envelope. All four buckets (input / output / cached / thinking)
+        round-trip with snake-case keys."""
+        credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
+        monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
+        from fruxon.fruxon import StreamEvent
+        self._stub_stream(
+            monkeypatch,
+            [
+                StreamEvent(
+                    event="usage",
+                    data={"inputTokens": 100, "outputTokens": 250, "cachedTokens": 30, "thinkingTokens": 5},
+                ),
+                StreamEvent(event="done", data={"executionRecordId": "rec-1", "trace": {}}),
+            ],
+        )
+        result = runner.invoke(app, ["run", "my-agent"])
+        assert result.exit_code == 0, result.stderr
+        lines = self._parse_lines(result.stdout)
+        usage = next(ln for ln in lines if ln["type"] == "usage")
+        assert usage == {
+            "type": "usage",
+            "input_tokens": 100,
+            "output_tokens": 250,
+            "cached_tokens": 30,
+            "thinking_tokens": 5,
+        }
+    def test_stream_surfaces_hitl_done_with_status_field(self, runner, monkeypatch):
+        """When a run pauses for human approval the backend emits a
+        ``done`` event with ``status: "WaitingForHuman"`` instead of
+        a trace envelope. Surface that under the same ``type: "done"``
+        so a driver's end-of-stream branch is a single check — just
+        look at the ``status`` field to distinguish completion from
+        suspension. ``human_approval_request_id`` is the handle the
+        driver needs to resume."""
+        credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
+        monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
+        from fruxon.fruxon import StreamEvent
+        self._stub_stream(
+            monkeypatch,
+            [
+                StreamEvent(event="text", data={"chunk": "Awaiting approval..."}),
+                StreamEvent(
+                    event="done",
+                    data={
+                        "sessionId": "sess-1",
+                        "executionRecordId": "rec-1",
+                        "status": "WaitingForHuman",
+                        "humanApprovalRequestId": "har-7",
+                    },
+                ),
+            ],
+        )
+        result = runner.invoke(app, ["run", "my-agent"])
+        assert result.exit_code == 0, result.stderr
+        lines = self._parse_lines(result.stdout)
+        done = lines[-1]
+        assert done["type"] == "done"
+        assert done["status"] == "waiting_for_human"
+        assert done["human_approval_request_id"] == "har-7"
+        assert done["session_id"] == "sess-1"
+        # No duration_ms / total_cost on the HITL variant — there's no
+        # finished trace to extract those from.
+        assert "duration_ms" not in done
+    def test_stream_surfaces_done_cost_breakdown(self, runner, monkeypatch):
+        """Normal completion: ``done`` flattens the headline fields
+        from the trace envelope (input + output cost separately, agent
+        revision, session) so a driver doesn't have to descend into
+        ``trace.*`` on its side."""
+        credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
+        monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
+        from fruxon.fruxon import StreamEvent
+        self._stub_stream(
+            monkeypatch,
+            [
+                StreamEvent(
+                    event="done",
+                    data={
+                        "sessionId": "sess-1",
+                        "executionRecordId": "rec-9",
+                        "trace": {
+                            "duration": 1234,
+                            "totalCost": 0.005,
+                            "inputCost": 0.003,
+                            "outputCost": 0.002,
+                            "agentRevision": 7,
+                        },
+                    },
+                ),
+            ],
+        )
+        result = runner.invoke(app, ["run", "my-agent"])
+        assert result.exit_code == 0, result.stderr
+        done = self._parse_lines(result.stdout)[-1]
+        assert done["type"] == "done"
+        assert done["record_id"] == "rec-9"
+        assert done["session_id"] == "sess-1"
+        assert done["duration_ms"] == 1234
+        assert done["total_cost"] == pytest.approx(0.005)
+        assert done["input_cost"] == pytest.approx(0.003)
+        assert done["output_cost"] == pytest.approx(0.002)
+        assert done["agent_revision"] == 7
+        # ``status`` field is absent on the non-HITL path — that's
+        # the contract: presence of ``status`` distinguishes the
+        # two ``done`` flavors.
+        assert "status" not in done
 # ─────────────────────────────────────────────────────────────────────────────
 # Non-interactive enforcement — paths that would block on human input