fruxon 0.7.1__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {fruxon-0.7.1 → fruxon-0.7.2}/PKG-INFO +1 -1
  2. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/_version.py +2 -2
  3. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/run.py +170 -44
  4. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/fruxon-agent-mode/SKILL.md +47 -7
  5. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_cli.py +160 -8
  6. {fruxon-0.7.1 → fruxon-0.7.2}/.gitignore +0 -0
  7. {fruxon-0.7.1 → fruxon-0.7.2}/HISTORY.md +0 -0
  8. {fruxon-0.7.1 → fruxon-0.7.2}/LICENSE +0 -0
  9. {fruxon-0.7.1 → fruxon-0.7.2}/README.md +0 -0
  10. {fruxon-0.7.1 → fruxon-0.7.2}/pyproject.toml +0 -0
  11. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/__init__.py +0 -0
  12. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/__main__.py +0 -0
  13. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/_ssl.py +0 -0
  14. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/__init__.py +0 -0
  15. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/_schema.py +0 -0
  16. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/_shared.py +0 -0
  17. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/agents.py +0 -0
  18. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/agents_budget.py +0 -0
  19. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/agents_draft.py +0 -0
  20. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/agents_revisions.py +0 -0
  21. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/agents_tests.py +0 -0
  22. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/auth.py +0 -0
  23. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/chat.py +0 -0
  24. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/completion.py +0 -0
  25. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/config.py +0 -0
  26. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/describe.py +0 -0
  27. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/doctor.py +0 -0
  28. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/examples.py +0 -0
  29. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/guides.py +0 -0
  30. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/integrations.py +0 -0
  31. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/keys.py +0 -0
  32. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/llm_providers.py +0 -0
  33. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/skills.py +0 -0
  34. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/tools.py +0 -0
  35. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli/trace.py +0 -0
  36. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/cli_auth.py +0 -0
  37. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/credentials.py +0 -0
  38. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/doctor.py +0 -0
  39. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/exceptions.py +0 -0
  40. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/fruxon.py +0 -0
  41. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/models.py +0 -0
  42. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/output.py +0 -0
  43. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/params.py +0 -0
  44. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/__init__.py +0 -0
  45. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/fruxon-build-agent/SKILL.md +0 -0
  46. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/fruxon-create-integration/SKILL.md +0 -0
  47. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/fruxon-debug-revision/SKILL.md +0 -0
  48. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/fruxon-meet/SKILL.md +0 -0
  49. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/skills/fruxon-use-integrations/SKILL.md +0 -0
  50. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/ui.py +0 -0
  51. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/update_check.py +0 -0
  52. {fruxon-0.7.1 → fruxon-0.7.2}/src/fruxon/validation.py +0 -0
  53. {fruxon-0.7.1 → fruxon-0.7.2}/tests/__init__.py +0 -0
  54. {fruxon-0.7.1 → fruxon-0.7.2}/tests/conftest.py +0 -0
  55. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_actor.py +0 -0
  56. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_budgets.py +0 -0
  57. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_client.py +0 -0
  58. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_credentials.py +0 -0
  59. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_doctor.py +0 -0
  60. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_draft_evaluate_cli.py +0 -0
  61. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_drafts.py +0 -0
  62. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_fruxon.py +0 -0
  63. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_guides.py +0 -0
  64. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_output.py +0 -0
  65. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_params.py +0 -0
  66. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_schema.py +0 -0
  67. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_skills.py +0 -0
  68. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_ssl.py +0 -0
  69. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_test_chats.py +0 -0
  70. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_ui.py +0 -0
  71. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_update_check.py +0 -0
  72. {fruxon-0.7.1 → fruxon-0.7.2}/tests/test_validation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fruxon
3
- Version: 0.7.1
3
+ Version: 0.7.2
4
4
  Summary: The Fruxon SDK is a lightweight Python client for integrating with the Fruxon platform.
5
5
  Project-URL: bugs, https://github.com/fruxon-ai/fruxon-sdk/issues
6
6
  Project-URL: changelog, https://github.com/fruxon-ai/fruxon-sdk/blob/main/HISTORY.md
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.7.1'
22
- __version_tuple__ = version_tuple = (0, 7, 1)
21
+ __version__ = version = '0.7.2'
22
+ __version_tuple__ = version_tuple = (0, 7, 2)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -439,7 +439,7 @@ def _run_stream(
439
439
  # Bumped when the NDJSON event shape changes in a backwards-incompatible
440
440
  # way. Emitted on the very first event so an agent driver can detect
441
441
  # version drift without parsing the whole stream first.
442
- _STREAM_SCHEMA_VERSION = 1
442
+ _STREAM_SCHEMA_VERSION = 2 # bumped: tool_call/result shape + usage/status/HITL
443
443
 
444
444
 
445
445
  def _emit_ndjson(record: dict) -> None:
@@ -466,24 +466,51 @@ def _run_stream_ndjson(
466
466
  parse the stream incrementally with ``readline()`` + ``json.loads``
467
467
  instead of waiting for the run to finish.
468
468
 
469
- **Event shape** (stable — schema_version bumped on breaking changes):
469
+ **Event shape** (stable — schema_version bumped on breaking changes).
470
+ Mirrors the backend's ``SseWriter`` (see ``Fruxon.Model/Modules/Streaming/
471
+ SseWriter.cs``) with flat snake_case field names so an LLM driver doesn't
472
+ have to learn the camelCase wire shape:
470
473
 
471
- * ``{"type":"start","schema_version":1,"agent":"<slug>"}``
474
+ * ``{"type":"start","schema_version":2,"agent":"<slug>"}``
472
475
  Always first. Lets the consumer pin the parser version up-front.
473
476
  * ``{"type":"text","delta":"..."}``
474
477
  Streamed text chunk. Concatenate every ``delta`` in arrival order
475
478
  to reconstruct the response body.
476
- * ``{"type":"tool_call","id":"...","name":"...","arguments":{...}}``
479
+ * ``{"type":"tool_call","id":"...","name":"...","display_name":"...",
480
+ "integration_id":"...","tool_type":"...","arguments":{...},
481
+ "start_time_ms":<int>?}``
477
482
  The agent invoked a tool. ``id`` correlates with the matching
478
- ``tool_result``.
479
- * ``{"type":"tool_result","id":"...","result":<any>,"error":"..."?,
480
- "duration_ms":<int>?}``
481
- Tool call completed. ``error`` is set instead of ``result`` on
482
- failure.
483
- * ``{"type":"done","record_id":"...","duration_ms":<int>?,
484
- "total_cost":<float>?,"agent":"<slug>"}``
483
+ ``tool_result``. ``name`` is the tool's wire id; ``display_name``
484
+ is the human label. ``integration_id`` is the empty string ``""``
485
+ for built-in (provider-native) tools and the integration slug
486
+ otherwise.
487
+ * ``{"type":"tool_result","id":"...","status":"succeeded|failed|...",
488
+ "result":<any>?, "end_time_ms":<int>?, "duration_ms":<int>?}``
489
+ Tool call completed. ``status`` is the authoritative pass/fail
490
+ signal (matches the backend's ``ToolStatus``). ``result`` carries
491
+ the tool's return payload (string for HTTP, structured for code
492
+ tools).
493
+ * ``{"type":"step_trace","id":"...","name":"...","step_type":"...",
494
+ "status":"...","duration_ms":<int>?}``
495
+ A flow step finished. Only emitted on the test-stream path
496
+ (``fruxon agents test``), not on production runs.
497
+ * ``{"type":"status","status":"..."}``
498
+ Backend state change. Surfaced so a driver knows whether the
499
+ stream is still progressing or paused for HITL approval.
500
+ * ``{"type":"usage","input_tokens":<int>,"output_tokens":<int>,
501
+ "cached_tokens":<int>,"thinking_tokens":<int>}``
502
+ Token accounting. Emitted near the end of the stream; pair with
503
+ ``done.total_cost`` for the cost picture.
504
+ * ``{"type":"done","record_id":"...","agent":"<slug>",
505
+ "session_id":"..."?,"duration_ms":<int>?,"total_cost":<float>?,
506
+ "input_cost":<float>?,"output_cost":<float>?,
507
+ "agent_revision":<int>?}``
485
508
  Terminal event on success. ``record_id`` is the handle for
486
509
  ``fruxon trace <agent> <record_id>``.
510
+ * ``{"type":"done","status":"waiting_for_human","record_id":"...",
511
+ "session_id":"...","human_approval_request_id":"..."}``
512
+ Terminal event when the run paused for HITL approval — same
513
+ ``done`` type, distinguished by the ``status`` field.
487
514
  * ``{"type":"error","message":"...","code":"..."?}``
488
515
  Terminal event on failure. Followed by process exit with
489
516
  :data:`EXIT_SERVER`. Also emits the standard agent-mode error
@@ -506,42 +533,119 @@ def _run_stream_ndjson(
506
533
  continue
507
534
 
508
535
  if event.event == "tool_call":
509
- # Server payloads use ``arguments`` or ``args`` historically;
510
- # normalize to ``arguments`` so downstream parsers don't
511
- # have to handle both. ``id`` is the correlator with the
512
- # later ``tool_result`` agents stitch them on this key.
513
- args = event.data.get("arguments")
514
- if args is None:
515
- args = event.data.get("args")
516
- _emit_ndjson(
517
- {
518
- "type": "tool_call",
519
- "id": event.data.get("id") or event.data.get("toolCallId"),
520
- "name": event.data.get("name") or event.data.get("toolName"),
521
- "arguments": args,
522
- }
523
- )
536
+ # The backend nests tool identity under ``toolTrace``:
537
+ # ``{toolTrace: {tool: {name, integrationId}, displayName,
538
+ # toolType, parameters}, arguments, startTime, id}``. We
539
+ # flatten the agent-relevant fields up so a driver
540
+ # doesn't have to navigate two levels of nesting.
541
+ trace = event.data.get("toolTrace") if isinstance(event.data.get("toolTrace"), dict) else {}
542
+ tool_inner = trace.get("tool") if isinstance(trace.get("tool"), dict) else {}
543
+ payload: dict = {
544
+ "type": "tool_call",
545
+ "id": event.data.get("id"),
546
+ "name": tool_inner.get("name") if isinstance(tool_inner, dict) else None,
547
+ "arguments": event.data.get("arguments"),
548
+ }
549
+ # Optional fields: include only when present so the
550
+ # record stays compact when the server doesn't supply
551
+ # them (older backends, simplified payloads).
552
+ display_name = trace.get("displayName") if isinstance(trace, dict) else None
553
+ if isinstance(display_name, str) and display_name:
554
+ payload["display_name"] = display_name
555
+ integration_id = tool_inner.get("integrationId") if isinstance(tool_inner, dict) else None
556
+ if isinstance(integration_id, str):
557
+ # ``""`` is meaningful — marks built-in (provider-
558
+ # native) tools. Include it explicitly.
559
+ payload["integration_id"] = integration_id
560
+ tool_type = trace.get("toolType") if isinstance(trace, dict) else None
561
+ if isinstance(tool_type, str) and tool_type:
562
+ payload["tool_type"] = tool_type
563
+ start_time = event.data.get("startTime")
564
+ if isinstance(start_time, (int, float)):
565
+ payload["start_time_ms"] = int(start_time)
566
+ _emit_ndjson(payload)
524
567
  continue
525
568
 
526
569
  if event.event == "tool_result":
527
- payload: dict = {
570
+ # Backend payload: ``{id, result, endTime, status}``.
571
+ # ``status`` (``succeeded|failed|cancelled|...``) is the
572
+ # authoritative pass/fail signal — surface it directly
573
+ # so an agent doesn't have to inspect ``result`` shape
574
+ # to know what happened.
575
+ payload = {
528
576
  "type": "tool_result",
529
- "id": event.data.get("id") or event.data.get("toolCallId"),
577
+ "id": event.data.get("id"),
530
578
  }
531
- if "error" in event.data and event.data.get("error"):
532
- payload["error"] = event.data["error"]
533
- else:
534
- payload["result"] = event.data.get("result")
579
+ status = event.data.get("status")
580
+ if isinstance(status, str) and status:
581
+ payload["status"] = status
582
+ if "result" in event.data:
583
+ payload["result"] = event.data["result"]
584
+ end_time = event.data.get("endTime")
585
+ if isinstance(end_time, (int, float)):
586
+ payload["end_time_ms"] = int(end_time)
587
+ # Some older paths used ``durationMs`` / ``duration``
588
+ # — keep that fallback so a mixed-version backend
589
+ # doesn't drop the field.
535
590
  duration = event.data.get("durationMs") or event.data.get("duration")
536
591
  if isinstance(duration, (int, float)):
537
592
  payload["duration_ms"] = int(duration)
538
593
  _emit_ndjson(payload)
539
594
  continue
540
595
 
596
+ if event.event == "step_trace":
597
+ # Test-stream only — emitted when each flow step finishes.
598
+ # Lets a CI gate or an agent driver tell which step did
599
+ # what (cost attribution, debugging a broken flow).
600
+ payload = {
601
+ "type": "step_trace",
602
+ "id": event.data.get("id"),
603
+ }
604
+ for src_key, dst_key in (
605
+ ("displayName", "name"),
606
+ ("type", "step_type"),
607
+ ("status", "status"),
608
+ ):
609
+ val = event.data.get(src_key)
610
+ if isinstance(val, str) and val:
611
+ payload[dst_key] = val
612
+ duration = event.data.get("duration")
613
+ if isinstance(duration, (int, float)):
614
+ payload["duration_ms"] = int(duration)
615
+ _emit_ndjson(payload)
616
+ continue
617
+
618
+ if event.event == "status":
619
+ # Backend state change — used for HITL "pausing for
620
+ # approval" and similar transitions. Pass the raw
621
+ # status string through; the set is small enough
622
+ # that we don't need to normalize.
623
+ status_val = event.data.get("status")
624
+ if isinstance(status_val, str) and status_val:
625
+ _emit_ndjson({"type": "status", "status": status_val})
626
+ continue
627
+
628
+ if event.event == "usage":
629
+ # Token accounting at end of stream. Surfaces all four
630
+ # buckets the backend reports so an agent driver can
631
+ # roll its own per-bucket cost calc if needed.
632
+ usage_payload: dict = {"type": "usage"}
633
+ for src_key, dst_key in (
634
+ ("inputTokens", "input_tokens"),
635
+ ("outputTokens", "output_tokens"),
636
+ ("cachedTokens", "cached_tokens"),
637
+ ("thinkingTokens", "thinking_tokens"),
638
+ ):
639
+ val = event.data.get(src_key)
640
+ if isinstance(val, (int, float)):
641
+ usage_payload[dst_key] = int(val)
642
+ _emit_ndjson(usage_payload)
643
+ continue
644
+
541
645
  if event.event == "error":
542
646
  message = event.data.get("message") or "Unknown error"
543
647
  code = event.data.get("code")
544
- err_record = {"type": "error", "message": message}
648
+ err_record: dict = {"type": "error", "message": message}
545
649
  if code:
546
650
  err_record["code"] = code
547
651
  _emit_ndjson(err_record)
@@ -552,25 +656,47 @@ def _run_stream_ndjson(
552
656
  fail(message, code=EXIT_SERVER)
553
657
 
554
658
  if event.event == "done":
555
- trace = event.data.get("trace") if isinstance(event.data.get("trace"), dict) else {}
659
+ # Two flavors of done:
660
+ # 1. Normal completion — carries the full ``trace`` envelope
661
+ # (duration, costs, step tree). We flatten the headline
662
+ # fields up.
663
+ # 2. HITL suspension — carries ``status: "WaitingForHuman"``
664
+ # and a ``humanApprovalRequestId`` instead of a trace.
665
+ # Surfaced under the same ``type: "done"`` so a driver's
666
+ # end-of-stream loop is a single branch, distinguished
667
+ # by the ``status`` field.
668
+ hitl_status = event.data.get("status")
556
669
  done: dict = {
557
670
  "type": "done",
558
671
  "agent": agent,
559
672
  "record_id": event.data.get("executionRecordId"),
560
673
  }
561
- duration = trace.get("duration") if isinstance(trace, dict) else None
562
- if isinstance(duration, (int, float)):
563
- done["duration_ms"] = int(duration)
564
- total_cost = trace.get("totalCost") if isinstance(trace, dict) else None
565
- if isinstance(total_cost, (int, float)):
566
- done["total_cost"] = float(total_cost)
674
+ session_id = event.data.get("sessionId")
675
+ if isinstance(session_id, str) and session_id:
676
+ done["session_id"] = session_id
677
+
678
+ if isinstance(hitl_status, str) and hitl_status.lower() == "waitingforhuman":
679
+ done["status"] = "waiting_for_human"
680
+ hitl_id = event.data.get("humanApprovalRequestId")
681
+ if isinstance(hitl_id, str) and hitl_id:
682
+ done["human_approval_request_id"] = hitl_id
683
+ _emit_ndjson(done)
684
+ continue
685
+
686
+ trace = event.data.get("trace") if isinstance(event.data.get("trace"), dict) else {}
687
+ for src_key, dst_key, cast in (
688
+ ("duration", "duration_ms", int),
689
+ ("totalCost", "total_cost", float),
690
+ ("inputCost", "input_cost", float),
691
+ ("outputCost", "output_cost", float),
692
+ ("agentRevision", "agent_revision", int),
693
+ ):
694
+ val = trace.get(src_key) if isinstance(trace, dict) else None
695
+ if isinstance(val, (int, float)):
696
+ done[dst_key] = cast(val)
567
697
  _emit_ndjson(done)
568
698
  continue
569
699
 
570
- # Other event types (usage, status, step_trace) are intentionally
571
- # dropped. See the docstring — surfacing unknown shapes forces
572
- # every driver to handle fields it doesn't know.
573
-
574
700
  except FruxonError as e:
575
701
  # Stream-opening or mid-stream API failure. Emit the ``error``
576
702
  # NDJSON record on stdout so callers reading the stream see
@@ -119,19 +119,59 @@ fruxon run my-agent -p user_query="hello"
119
119
  Emits one JSON record per line on stdout. Frame:
120
120
 
121
121
  ```json
122
- {"type":"start","schema_version":1,"agent":"my-agent"}
122
+ {"type":"start","schema_version":2,"agent":"my-agent"}
123
123
  {"type":"text","delta":"Hel"}
124
124
  {"type":"text","delta":"lo."}
125
- {"type":"tool_call","id":"tc-1","name":"search","arguments":{"q":"x"}}
126
- {"type":"tool_result","id":"tc-1","result":{"hits":3},"duration_ms":42}
127
- {"type":"done","agent":"my-agent","record_id":"rec-99","duration_ms":1234,"total_cost":0.0012}
125
+ {"type":"tool_call","id":"tc-1","name":"search","display_name":"GitHub search",
126
+ "integration_id":"github","tool_type":"Api","arguments":{"q":"x"},
127
+ "start_time_ms":1700000000000}
128
+ {"type":"tool_result","id":"tc-1","status":"succeeded","result":{"hits":3},
129
+ "end_time_ms":1700000000042}
130
+ {"type":"usage","input_tokens":100,"output_tokens":250,
131
+ "cached_tokens":30,"thinking_tokens":5}
132
+ {"type":"done","agent":"my-agent","record_id":"rec-99","session_id":"sess-1",
133
+ "duration_ms":1234,"total_cost":0.0012,"input_cost":0.0008,
134
+ "output_cost":0.0004,"agent_revision":7}
128
135
  ```
129
136
 
130
137
  Stream `text.delta` strings in arrival order to reconstruct the
131
138
  response body. Match `tool_result.id` to the corresponding
132
- `tool_call.id`. The `done` record carries the `record_id` you'll pass
133
- to `fruxon trace` for post-mortem inspection. On failure, a single
134
- `{"type":"error","message":...}` record is emitted before exit.
139
+ `tool_call.id`. Branch on `tool_result.status` (`succeeded` /
140
+ `failed` / `cancelled`) for the authoritative pass/fail signal
141
+ don't infer from `result` shape. The `done` record carries the
142
+ `record_id` you'll pass to `fruxon trace` for post-mortem inspection.
143
+
144
+ **Event types you'll see** (all on a single ``run`` may overlap):
145
+
146
+ | `type` | When | Key fields |
147
+ |---|---|---|
148
+ | `start` | First | `schema_version`, `agent` |
149
+ | `text` | LLM streamed text | `delta` |
150
+ | `tool_call` | Agent dispatched a tool | `id`, `name`, `arguments`, `integration_id`, `tool_type` |
151
+ | `tool_result` | Tool finished | `id`, `status`, `result`, `end_time_ms` |
152
+ | `status` | Backend state change | `status` |
153
+ | `usage` | Near end of stream | `input_tokens`, `output_tokens`, `cached_tokens`, `thinking_tokens` |
154
+ | `done` | Terminal | `record_id`, `session_id`, `duration_ms`, costs, `agent_revision` |
155
+ | `error` | Terminal on failure | `message`, `code`? |
156
+
157
+ **HITL pause.** If the run paused for human approval, `done` carries
158
+ `status: "waiting_for_human"` and `human_approval_request_id` instead
159
+ of the trace fields — same `type: "done"`, distinguished by the
160
+ `status` field:
161
+
162
+ ```json
163
+ {"type":"done","agent":"my-agent","record_id":"rec-99","session_id":"sess-1",
164
+ "status":"waiting_for_human","human_approval_request_id":"har-7"}
165
+ ```
166
+
167
+ **Step traces.** `fruxon agents test` additionally emits
168
+ `{"type":"step_trace","id":"…","name":"…","step_type":"LlmStep",
169
+ "status":"succeeded","duration_ms":1234}` when each flow step
170
+ finishes — useful for CI gates that need per-step cost attribution.
171
+
172
+ On failure, a single `{"type":"error","message":"…","code":"…"?}`
173
+ record is emitted before exit. The schema_version field on `start`
174
+ is the parser-pinning point: bump = breaking shape change.
135
175
 
136
176
  ## Exit codes — typed, stable, sufficient for retry logic
137
177
 
@@ -3240,7 +3240,7 @@ class TestRunAgentModeNdjson:
3240
3240
  lines = self._parse_lines(result.stdout)
3241
3241
  # Frame: start → text* → done
3242
3242
  assert lines[0]["type"] == "start"
3243
- assert lines[0]["schema_version"] == 1
3243
+ assert lines[0]["schema_version"] == 2
3244
3244
  assert lines[0]["agent"] == "my-agent"
3245
3245
  assert [line_["delta"] for line_ in lines if line_["type"] == "text"] == ["Hel", "lo!"]
3246
3246
  done = lines[-1]
@@ -3250,9 +3250,12 @@ class TestRunAgentModeNdjson:
3250
3250
  assert done["total_cost"] == pytest.approx(0.0012)
3251
3251
 
3252
3252
  def test_stream_normalizes_tool_call_and_result_correlation(self, runner, monkeypatch):
3253
- """The NDJSON contract uses ``arguments`` (not ``args``) and a
3254
- stable ``id`` field so an LLM driver can stitch each tool_result
3255
- back to its tool_call deterministically."""
3253
+ """The NDJSON contract flattens the backend's nested
3254
+ ``toolTrace.tool`` shape into top-level ``name`` /
3255
+ ``integration_id`` / ``tool_type`` so an LLM driver doesn't
3256
+ have to navigate two levels of nesting. The stable ``id`` field
3257
+ correlates each ``tool_result`` back to its ``tool_call``.
3258
+ """
3256
3259
  credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
3257
3260
  monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
3258
3261
 
@@ -3261,13 +3264,31 @@ class TestRunAgentModeNdjson:
3261
3264
  self._stub_stream(
3262
3265
  monkeypatch,
3263
3266
  [
3267
+ # Real backend shape: identity nested under ``toolTrace``,
3268
+ # ``arguments`` flat at the top, ``startTime`` epoch ms.
3264
3269
  StreamEvent(
3265
3270
  event="tool_call",
3266
- data={"id": "tc-1", "name": "search", "args": {"q": "x"}},
3271
+ data={
3272
+ "id": "tc-1",
3273
+ "toolTrace": {
3274
+ "tool": {"name": "search", "integrationId": "github"},
3275
+ "displayName": "GitHub search",
3276
+ "toolType": "Api",
3277
+ },
3278
+ "arguments": {"q": "x"},
3279
+ "startTime": 1700000000000,
3280
+ },
3267
3281
  ),
3282
+ # Real tool_result shape: ``status`` is the pass/fail
3283
+ # signal; ``endTime`` is the completion epoch ms.
3268
3284
  StreamEvent(
3269
3285
  event="tool_result",
3270
- data={"id": "tc-1", "result": {"hits": 3}, "durationMs": 42},
3286
+ data={
3287
+ "id": "tc-1",
3288
+ "result": {"hits": 3},
3289
+ "status": "succeeded",
3290
+ "endTime": 1700000000042,
3291
+ },
3271
3292
  ),
3272
3293
  StreamEvent(event="done", data={"executionRecordId": "rec-1", "trace": {}}),
3273
3294
  ],
@@ -3277,12 +3298,23 @@ class TestRunAgentModeNdjson:
3277
3298
  lines = self._parse_lines(result.stdout)
3278
3299
 
3279
3300
  call = next(ln for ln in lines if ln["type"] == "tool_call")
3280
- assert call == {"type": "tool_call", "id": "tc-1", "name": "search", "arguments": {"q": "x"}}
3301
+ assert call["id"] == "tc-1"
3302
+ # Tool identity surfaces at the top level — agent shouldn't
3303
+ # have to descend toolTrace.tool.* on its side.
3304
+ assert call["name"] == "search"
3305
+ assert call["display_name"] == "GitHub search"
3306
+ assert call["integration_id"] == "github"
3307
+ assert call["tool_type"] == "Api"
3308
+ assert call["arguments"] == {"q": "x"}
3309
+ assert call["start_time_ms"] == 1700000000000
3281
3310
 
3282
3311
  res = next(ln for ln in lines if ln["type"] == "tool_result")
3283
3312
  assert res["id"] == "tc-1"
3284
3313
  assert res["result"] == {"hits": 3}
3285
- assert res["duration_ms"] == 42
3314
+ # ``status`` is the authoritative pass/fail signal — driver
3315
+ # branches on this directly, no result-shape inspection needed.
3316
+ assert res["status"] == "succeeded"
3317
+ assert res["end_time_ms"] == 1700000000042
3286
3318
 
3287
3319
  def test_stream_error_event_emits_error_record_and_exits_server(self, runner, monkeypatch):
3288
3320
  """A server-side ``error`` event lands as a terminal NDJSON record
@@ -3316,6 +3348,126 @@ class TestRunAgentModeNdjson:
3316
3348
  assert envelope["error"]["code"] == "server_error"
3317
3349
  assert envelope["error"]["exit_code"] == EXIT_SERVER
3318
3350
 
3351
+ def test_stream_surfaces_usage_event(self, runner, monkeypatch):
3352
+ """The backend emits ``event: usage`` with token counts near
3353
+ the end of every run — surface it as its own NDJSON record so
3354
+ an agent driver can cost-account without parsing the trace
3355
+ envelope. All four buckets (input / output / cached / thinking)
3356
+ round-trip with snake-case keys."""
3357
+ credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
3358
+ monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
3359
+
3360
+ from fruxon.fruxon import StreamEvent
3361
+
3362
+ self._stub_stream(
3363
+ monkeypatch,
3364
+ [
3365
+ StreamEvent(
3366
+ event="usage",
3367
+ data={"inputTokens": 100, "outputTokens": 250, "cachedTokens": 30, "thinkingTokens": 5},
3368
+ ),
3369
+ StreamEvent(event="done", data={"executionRecordId": "rec-1", "trace": {}}),
3370
+ ],
3371
+ )
3372
+ result = runner.invoke(app, ["run", "my-agent"])
3373
+ assert result.exit_code == 0, result.stderr
3374
+ lines = self._parse_lines(result.stdout)
3375
+
3376
+ usage = next(ln for ln in lines if ln["type"] == "usage")
3377
+ assert usage == {
3378
+ "type": "usage",
3379
+ "input_tokens": 100,
3380
+ "output_tokens": 250,
3381
+ "cached_tokens": 30,
3382
+ "thinking_tokens": 5,
3383
+ }
3384
+
3385
+ def test_stream_surfaces_hitl_done_with_status_field(self, runner, monkeypatch):
3386
+ """When a run pauses for human approval the backend emits a
3387
+ ``done`` event with ``status: "WaitingForHuman"`` instead of
3388
+ a trace envelope. Surface that under the same ``type: "done"``
3389
+ so a driver's end-of-stream branch is a single check — just
3390
+ look at the ``status`` field to distinguish completion from
3391
+ suspension. ``human_approval_request_id`` is the handle the
3392
+ driver needs to resume."""
3393
+ credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
3394
+ monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
3395
+
3396
+ from fruxon.fruxon import StreamEvent
3397
+
3398
+ self._stub_stream(
3399
+ monkeypatch,
3400
+ [
3401
+ StreamEvent(event="text", data={"chunk": "Awaiting approval..."}),
3402
+ StreamEvent(
3403
+ event="done",
3404
+ data={
3405
+ "sessionId": "sess-1",
3406
+ "executionRecordId": "rec-1",
3407
+ "status": "WaitingForHuman",
3408
+ "humanApprovalRequestId": "har-7",
3409
+ },
3410
+ ),
3411
+ ],
3412
+ )
3413
+ result = runner.invoke(app, ["run", "my-agent"])
3414
+ assert result.exit_code == 0, result.stderr
3415
+ lines = self._parse_lines(result.stdout)
3416
+
3417
+ done = lines[-1]
3418
+ assert done["type"] == "done"
3419
+ assert done["status"] == "waiting_for_human"
3420
+ assert done["human_approval_request_id"] == "har-7"
3421
+ assert done["session_id"] == "sess-1"
3422
+ # No duration_ms / total_cost on the HITL variant — there's no
3423
+ # finished trace to extract those from.
3424
+ assert "duration_ms" not in done
3425
+
3426
+ def test_stream_surfaces_done_cost_breakdown(self, runner, monkeypatch):
3427
+ """Normal completion: ``done`` flattens the headline fields
3428
+ from the trace envelope (input + output cost separately, agent
3429
+ revision, session) so a driver doesn't have to descend into
3430
+ ``trace.*`` on its side."""
3431
+ credentials.save(credentials.StoredCredentials(api_key="fxn_x", org="acme"))
3432
+ monkeypatch.setenv("FRUXON_AGENT_MODE", "1")
3433
+
3434
+ from fruxon.fruxon import StreamEvent
3435
+
3436
+ self._stub_stream(
3437
+ monkeypatch,
3438
+ [
3439
+ StreamEvent(
3440
+ event="done",
3441
+ data={
3442
+ "sessionId": "sess-1",
3443
+ "executionRecordId": "rec-9",
3444
+ "trace": {
3445
+ "duration": 1234,
3446
+ "totalCost": 0.005,
3447
+ "inputCost": 0.003,
3448
+ "outputCost": 0.002,
3449
+ "agentRevision": 7,
3450
+ },
3451
+ },
3452
+ ),
3453
+ ],
3454
+ )
3455
+ result = runner.invoke(app, ["run", "my-agent"])
3456
+ assert result.exit_code == 0, result.stderr
3457
+ done = self._parse_lines(result.stdout)[-1]
3458
+ assert done["type"] == "done"
3459
+ assert done["record_id"] == "rec-9"
3460
+ assert done["session_id"] == "sess-1"
3461
+ assert done["duration_ms"] == 1234
3462
+ assert done["total_cost"] == pytest.approx(0.005)
3463
+ assert done["input_cost"] == pytest.approx(0.003)
3464
+ assert done["output_cost"] == pytest.approx(0.002)
3465
+ assert done["agent_revision"] == 7
3466
+ # ``status`` field is absent on the non-HITL path — that's
3467
+ # the contract: presence of ``status`` distinguishes the
3468
+ # two ``done`` flavors.
3469
+ assert "status" not in done
3470
+
3319
3471
 
3320
3472
  # ─────────────────────────────────────────────────────────────────────────────
3321
3473
  # Non-interactive enforcement — paths that would block on human input
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes