juno-code 1.0.49 → 1.0.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +417 -203
  2. package/dist/bin/cli.d.mts +1 -1
  3. package/dist/bin/cli.d.ts +1 -1
  4. package/dist/bin/cli.js +1736 -976
  5. package/dist/bin/cli.js.map +1 -1
  6. package/dist/bin/cli.mjs +1735 -975
  7. package/dist/bin/cli.mjs.map +1 -1
  8. package/dist/bin/feedback-collector.js.map +1 -1
  9. package/dist/bin/feedback-collector.mjs.map +1 -1
  10. package/dist/index.d.mts +33 -7
  11. package/dist/index.d.ts +33 -7
  12. package/dist/index.js +202 -27
  13. package/dist/index.js.map +1 -1
  14. package/dist/index.mjs +202 -27
  15. package/dist/index.mjs.map +1 -1
  16. package/dist/templates/scripts/install_requirements.sh +41 -3
  17. package/dist/templates/scripts/kanban.sh +4 -0
  18. package/dist/templates/services/__pycache__/pi.cpython-313.pyc +0 -0
  19. package/dist/templates/services/pi.py +1281 -238
  20. package/dist/templates/skills/claude/kanban-workflow/SKILL.md +138 -0
  21. package/dist/templates/skills/claude/plan-kanban-tasks/SKILL.md +1 -1
  22. package/dist/templates/skills/claude/ralph-loop/scripts/kanban.sh +4 -0
  23. package/dist/templates/skills/claude/understand-project/SKILL.md +1 -1
  24. package/dist/templates/skills/codex/kanban-workflow/SKILL.md +139 -0
  25. package/dist/templates/skills/codex/plan-kanban-tasks/SKILL.md +32 -0
  26. package/dist/templates/skills/codex/ralph-loop/scripts/kanban.sh +4 -0
  27. package/dist/templates/skills/codex/understand-project/SKILL.md +46 -0
  28. package/dist/templates/skills/pi/kanban-workflow/SKILL.md +139 -0
  29. package/dist/templates/skills/pi/plan-kanban-tasks/SKILL.md +1 -1
  30. package/dist/templates/skills/pi/ralph-loop/SKILL.md +4 -0
  31. package/dist/templates/skills/pi/understand-project/SKILL.md +1 -1
  32. package/package.json +7 -5
@@ -7,13 +7,14 @@ Headless wrapper around the Pi coding agent CLI with JSON streaming and shorthan
7
7
  import argparse
8
8
  import json
9
9
  import os
10
+ import re
10
11
  import subprocess
11
12
  import sys
12
13
  import threading
13
14
  import time
14
15
  from datetime import datetime
15
16
  from pathlib import Path
16
- from typing import Dict, List, Optional, Tuple
17
+ from typing import Dict, List, Optional, Set, Tuple
17
18
 
18
19
 
19
20
  class PiService:
@@ -35,7 +36,8 @@ class PiService:
35
36
  ":gpt-5": "openai/gpt-5",
36
37
  ":gpt-4o": "openai/gpt-4o",
37
38
  ":o3": "openai/o3",
38
- ":codex": "openai/gpt-5.3-codex",
39
+ ":codex": "openai-codex/gpt-5.3-codex",
40
+ ":api-codex": "openai/gpt-5.3-codex",
39
41
  # Google
40
42
  ":gemini-pro": "google/gemini-2.5-pro",
41
43
  ":gemini-flash": "google/gemini-2.5-flash",
@@ -74,6 +76,17 @@ class PiService:
74
76
  PRETTIFIER_CODEX = "codex"
75
77
  PRETTIFIER_LIVE = "live"
76
78
 
79
+ # ANSI colors for tool prettifier output.
80
+ # - command/args blocks are green for readability
81
+ # - error results are red
82
+ ANSI_GREEN = "\x1b[38;5;40m"
83
+ ANSI_RED = "\x1b[38;5;203m"
84
+ ANSI_RESET = "\x1b[0m"
85
+
86
+ # Keep tool args readable while preventing giant inline payloads.
87
+ TOOL_ARG_STRING_MAX_CHARS = 400
88
+ _ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
89
+
77
90
  def __init__(self):
78
91
  self.model_name = self.DEFAULT_MODEL
79
92
  self.project_path = os.getcwd()
@@ -83,6 +96,18 @@ class PiService:
83
96
  self.session_id: Optional[str] = None
84
97
  self.message_counter = 0
85
98
  self.prettifier_mode = self.PRETTIFIER_PI
99
+ # Tool call grouping: buffer toolcall_end until tool_execution_end arrives
100
+ self._pending_tool_calls: Dict[str, dict] = {} # toolCallId -> {tool, args/command}
101
+ # Buffer tool_execution_start data for fallback + timing (when toolcall_end arrives late)
102
+ self._pending_exec_starts: Dict[str, dict] = {} # toolCallId -> {tool, args/command, started_at}
103
+ # Track whether we're inside a tool execution
104
+ self._in_tool_execution: bool = False
105
+ # Buffer raw non-JSON tool stdout so it doesn't interleave with structured events
106
+ self._buffered_tool_stdout_lines: List[str] = []
107
+ # Per-run usage/cost accumulation (used for result + agent_end total cost visibility)
108
+ self._run_usage_totals: Optional[dict] = None
109
+ self._run_total_cost_usd: Optional[float] = None
110
+ self._run_seen_usage_keys: Set[str] = set()
86
111
  # Claude prettifier state
87
112
  self.user_message_truncate = int(os.environ.get("CLAUDE_USER_MESSAGE_PRETTY_TRUNCATE", "4"))
88
113
  # Codex prettifier state
@@ -92,6 +117,114 @@ class PiService:
92
117
  # Keys to hide from intermediate assistant messages in Codex mode
93
118
  self._codex_metadata_keys = {"api", "provider", "model", "usage", "stopReason", "timestamp"}
94
119
 
120
+ def _color_enabled(self) -> bool:
121
+ """Check if ANSI color output is appropriate (TTY + NO_COLOR not set)."""
122
+ if os.environ.get("NO_COLOR") is not None:
123
+ return False
124
+ return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
125
+
126
+ def _colorize_lines(self, text: str, color_code: str) -> str:
127
+ """Apply ANSI coloring per line so line-based renderers keep colors stable."""
128
+ if "\n" not in text:
129
+ return f"{color_code}{text}{self.ANSI_RESET}"
130
+ return "\n".join(f"{color_code}{line}{self.ANSI_RESET}" for line in text.split("\n"))
131
+
132
+ def _colorize_result(self, text: str, is_error: bool = False) -> str:
133
+ """Colorize tool output only for errors; success stays terminal-default."""
134
+ if not self._color_enabled():
135
+ return text
136
+ if not is_error:
137
+ return text
138
+ return self._colorize_lines(text, self.ANSI_RED)
139
+
140
+ def _colorize_command(self, text: str) -> str:
141
+ """Colorize tool command/args blocks in green when ANSI color is enabled."""
142
+ if not self._color_enabled():
143
+ return text
144
+ return self._colorize_lines(text, self.ANSI_GREEN)
145
+
146
+ def _normalize_multiline_tool_text(self, text: str) -> str:
147
+ """Render escaped newline sequences as real newlines for tool command/args blocks."""
148
+ if "\n" in text:
149
+ return text
150
+ if "\\n" in text:
151
+ return text.replace("\\n", "\n")
152
+ return text
153
+
154
+ def _format_tool_invocation_header(self, header: Dict) -> str:
155
+ """Serialize a tool header and render multiline command/args as separate readable blocks."""
156
+ metadata = dict(header)
157
+ block_label: Optional[str] = None
158
+ block_text: Optional[str] = None
159
+
160
+ command_val = metadata.get("command")
161
+ if isinstance(command_val, str) and command_val.strip():
162
+ command_text = self._normalize_multiline_tool_text(command_val)
163
+ if "\n" in command_text:
164
+ metadata.pop("command", None)
165
+ block_label = "command:"
166
+ block_text = self._colorize_command(command_text)
167
+
168
+ if block_text is None:
169
+ args_val = metadata.get("args")
170
+ if isinstance(args_val, str) and args_val.strip():
171
+ args_text = self._normalize_multiline_tool_text(args_val)
172
+ if "\n" in args_text:
173
+ metadata.pop("args", None)
174
+ block_label = "args:"
175
+ block_text = self._colorize_command(args_text)
176
+
177
+ output = json.dumps(metadata, ensure_ascii=False)
178
+ if block_text is None:
179
+ return output
180
+ return output + "\n" + block_label + "\n" + block_text
181
+
182
+ def _strip_ansi_sequences(self, text: str) -> str:
183
+ """Remove ANSI escape sequences to prevent color bleed in prettified output."""
184
+ if not isinstance(text, str) or "\x1b" not in text:
185
+ return text
186
+ return self._ANSI_ESCAPE_RE.sub("", text)
187
+
188
+ def _sanitize_tool_argument_value(self, value):
189
+ """Recursively sanitize tool args while preserving JSON structure."""
190
+ if isinstance(value, str):
191
+ clean = self._strip_ansi_sequences(value)
192
+ if len(clean) > self.TOOL_ARG_STRING_MAX_CHARS:
193
+ return clean[:self.TOOL_ARG_STRING_MAX_CHARS] + "..."
194
+ return clean
195
+ if isinstance(value, dict):
196
+ return {k: self._sanitize_tool_argument_value(v) for k, v in value.items()}
197
+ if isinstance(value, list):
198
+ return [self._sanitize_tool_argument_value(v) for v in value]
199
+ return value
200
+
201
+ def _format_execution_time(self, payload: dict, pending: Optional[dict] = None) -> Optional[str]:
202
+ """Return execution time string (e.g. 0.12s) from payload or measured start time."""
203
+ seconds: Optional[float] = None
204
+
205
+ # Prefer explicit durations if Pi adds them in future versions.
206
+ for key in ("executionTimeSeconds", "durationSeconds", "elapsedSeconds"):
207
+ value = payload.get(key)
208
+ if isinstance(value, (int, float)):
209
+ seconds = float(value)
210
+ break
211
+
212
+ if seconds is None:
213
+ for key in ("executionTimeMs", "durationMs", "elapsedMs"):
214
+ value = payload.get(key)
215
+ if isinstance(value, (int, float)):
216
+ seconds = float(value) / 1000.0
217
+ break
218
+
219
+ if seconds is None and isinstance(pending, dict):
220
+ started_at = pending.get("started_at")
221
+ if isinstance(started_at, (int, float)):
222
+ seconds = max(0.0, time.perf_counter() - started_at)
223
+
224
+ if seconds is None:
225
+ return None
226
+ return f"{seconds:.2f}s"
227
+
95
228
  def expand_model_shorthand(self, model: str) -> str:
96
229
  """Expand shorthand model names (colon-prefixed) to full identifiers."""
97
230
  if model.startswith(":"):
@@ -103,13 +236,15 @@ class PiService:
103
236
 
104
237
  Pi CLI always uses its own event protocol (message, turn_end,
105
238
  message_update, agent_end, etc.) regardless of the underlying LLM.
106
- The exception is Codex models where Pi wraps Codex-format events
107
- (agent_reasoning, agent_message, exec_command_end).
239
+ Codex models also use Pi's event protocol but may additionally emit
240
+ native Codex events (agent_reasoning, agent_message, exec_command_end).
241
+ The LIVE prettifier handles both Pi-native and Codex-native events,
242
+ giving real-time streaming output for all model types.
108
243
  Claude models still use Pi's event protocol, NOT Claude CLI events.
109
244
  """
110
245
  model_lower = model.lower()
111
246
  if "codex" in model_lower:
112
- return self.PRETTIFIER_CODEX
247
+ return self.PRETTIFIER_LIVE
113
248
  # All non-Codex models (including Claude) use Pi's native event protocol
114
249
  return self.PRETTIFIER_PI
115
250
 
@@ -147,7 +282,8 @@ Model shorthands:
147
282
  :gpt-5 -> openai/gpt-5
148
283
  :gpt-4o -> openai/gpt-4o
149
284
  :o3 -> openai/o3
150
- :codex -> openai/gpt-5.3-codex
285
+ :codex -> openai-codex/gpt-5.3-codex
286
+ :api-codex -> openai/gpt-5.3-codex
151
287
  :gemini-pro -> google/gemini-2.5-pro
152
288
  :gemini-flash -> google/gemini-2.5-flash
153
289
  :groq -> groq/llama-4-scout-17b-16e-instruct
@@ -544,6 +680,7 @@ Model shorthands:
544
680
  return text
545
681
  # Unescape JSON-escaped newlines for human-readable display
546
682
  display_text = text.replace("\\n", "\n").replace("\\t", "\t")
683
+ display_text = self._strip_ansi_sequences(display_text)
547
684
  lines = display_text.split("\n")
548
685
  max_lines = self._codex_tool_result_max_lines
549
686
  if len(lines) <= max_lines:
@@ -643,12 +780,11 @@ Model shorthands:
643
780
  args = item.get("arguments", {})
644
781
  if isinstance(args, dict):
645
782
  cmd = args.get("command", "")
646
- if cmd:
647
- parts.append(f"[toolCall] {name}: {cmd}")
783
+ if isinstance(cmd, str) and cmd:
784
+ parts.append(f"[toolCall] {name}: {self._sanitize_tool_argument_value(cmd)}")
648
785
  else:
649
- args_str = json.dumps(args, ensure_ascii=False)
650
- if len(args_str) > 200:
651
- args_str = args_str[:200] + "..."
786
+ args_clean = self._sanitize_tool_argument_value(args)
787
+ args_str = json.dumps(args_clean, ensure_ascii=False)
652
788
  parts.append(f"[toolCall] {name}: {args_str}")
653
789
  else:
654
790
  parts.append(f"[toolCall] {name}")
@@ -734,10 +870,13 @@ Model shorthands:
734
870
  header["thinking"] = thinking_text
735
871
  return json.dumps(header, ensure_ascii=False)
736
872
 
737
- # toolcall_end: show tool name and arguments
873
+ # toolcall_end: buffer for grouping with tool_execution_end
738
874
  if ame_type == "toolcall_end":
739
- self.message_counter += 1
740
875
  tool_call = ame.get("toolCall", {})
876
+ if self._buffer_tool_call_end(tool_call, now):
877
+ return "" # suppress — will emit combined event on tool_execution_end
878
+ # No toolCallId — fallback to original format
879
+ self.message_counter += 1
741
880
  header = {
742
881
  "type": "toolcall_end",
743
882
  "datetime": now,
@@ -748,14 +887,13 @@ Model shorthands:
748
887
  args = tool_call.get("arguments", {})
749
888
  if isinstance(args, dict):
750
889
  cmd = args.get("command", "")
751
- if cmd:
752
- header["command"] = cmd
890
+ if isinstance(cmd, str) and cmd:
891
+ header["command"] = self._sanitize_tool_argument_value(cmd)
753
892
  else:
754
- args_str = json.dumps(args, ensure_ascii=False)
755
- if len(args_str) > 200:
756
- args_str = args_str[:200] + "..."
757
- header["args"] = args_str if isinstance(args_str, str) else args
758
- return json.dumps(header, ensure_ascii=False)
893
+ header["args"] = self._sanitize_tool_argument_value(args)
894
+ elif isinstance(args, str) and args.strip():
895
+ header["args"] = self._sanitize_tool_argument_value(args)
896
+ return self._format_tool_invocation_header(header)
759
897
 
760
898
  # Other message_update subtypes: suppress by default
761
899
  return ""
@@ -773,14 +911,12 @@ Model shorthands:
773
911
  header["tool_results_count"] = len(tool_results)
774
912
  return json.dumps(header, ensure_ascii=False)
775
913
 
776
- # --- message_start: minimal header ---
914
+ # --- message_start: minimal header (no counter — only *_end events get counters) ---
777
915
  if event_type == "message_start":
778
- self.message_counter += 1
779
916
  message = parsed.get("message", {})
780
917
  header = {
781
918
  "type": "message_start",
782
919
  "datetime": now,
783
- "counter": f"#{self.message_counter}",
784
920
  }
785
921
  if isinstance(message, dict):
786
922
  role = message.get("role")
@@ -798,58 +934,104 @@ Model shorthands:
798
934
  }
799
935
  return json.dumps(header, ensure_ascii=False)
800
936
 
801
- # --- tool_execution_start ---
937
+ # --- tool_execution_start: always suppress, buffer args ---
802
938
  if event_type == "tool_execution_start":
803
- self.message_counter += 1
804
- header = {
805
- "type": "tool_execution_start",
806
- "datetime": now,
807
- "counter": f"#{self.message_counter}",
808
- "tool": parsed.get("toolName", ""),
809
- }
810
- args_val = parsed.get("args")
811
- if isinstance(args_val, dict):
812
- args_str = json.dumps(args_val, ensure_ascii=False)
813
- if len(args_str) > 200:
814
- header["args"] = args_str[:200] + "..."
815
- else:
816
- header["args"] = args_val
817
- return json.dumps(header, ensure_ascii=False)
939
+ self._buffer_exec_start(parsed)
940
+ self._in_tool_execution = True
941
+ return "" # suppress
818
942
 
819
- # --- tool_execution_end ---
943
+ # --- tool_execution_end: combine with buffered data ---
820
944
  if event_type == "tool_execution_end":
945
+ self._in_tool_execution = False
946
+ tool_call_id = parsed.get("toolCallId")
947
+
948
+ pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
949
+ pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
950
+ if pending_tool and pending_exec and "started_at" in pending_exec:
951
+ pending_tool["started_at"] = pending_exec["started_at"]
952
+ pending = pending_tool or pending_exec
953
+
954
+ if pending:
955
+ return self._build_combined_tool_event(pending, parsed, now)
956
+
957
+ # No buffered data — minimal fallback
821
958
  self.message_counter += 1
822
959
  header = {
823
- "type": "tool_execution_end",
960
+ "type": "tool",
824
961
  "datetime": now,
825
962
  "counter": f"#{self.message_counter}",
826
963
  "tool": parsed.get("toolName", ""),
827
964
  }
965
+ execution_time = self._format_execution_time(parsed)
966
+ if execution_time:
967
+ header["execution_time"] = execution_time
968
+
828
969
  is_error = parsed.get("isError", False)
829
970
  if is_error:
830
971
  header["isError"] = True
972
+
831
973
  result_val = parsed.get("result")
974
+ colorize_error = self._color_enabled() and bool(is_error)
975
+
976
+ if isinstance(result_val, str) and result_val.strip():
977
+ truncated = self._truncate_tool_result_text(result_val)
978
+ if "\n" in truncated or colorize_error:
979
+ label = "result:"
980
+ colored = self._colorize_result(truncated, is_error=bool(is_error))
981
+ if colorize_error:
982
+ label = self._colorize_result(label, is_error=True)
983
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
984
+ header["result"] = truncated
985
+ return self._format_tool_invocation_header(header)
986
+
832
987
  if isinstance(result_val, dict):
833
- # Extract text content from result
834
988
  result_content = result_val.get("content")
835
989
  if isinstance(result_content, list):
836
990
  for rc_item in result_content:
837
991
  if isinstance(rc_item, dict) and rc_item.get("type") == "text":
838
992
  text = rc_item.get("text", "")
839
993
  truncated = self._truncate_tool_result_text(text)
840
- if "\n" in truncated:
841
- return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated
994
+ if "\n" in truncated or colorize_error:
995
+ label = "result:"
996
+ colored = self._colorize_result(truncated, is_error=bool(is_error))
997
+ if colorize_error:
998
+ label = self._colorize_result(label, is_error=True)
999
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
842
1000
  header["result"] = truncated
843
- return json.dumps(header, ensure_ascii=False)
844
- return json.dumps(header, ensure_ascii=False)
1001
+ return self._format_tool_invocation_header(header)
1002
+
1003
+ result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1004
+ if "\n" in result_json or colorize_error:
1005
+ label = "result:"
1006
+ colored = self._colorize_result(result_json, is_error=bool(is_error))
1007
+ if colorize_error:
1008
+ label = self._colorize_result(label, is_error=True)
1009
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
1010
+ header["result"] = result_json
1011
+ return self._format_tool_invocation_header(header)
1012
+
1013
+ if isinstance(result_val, list):
1014
+ result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1015
+ if "\n" in result_json or colorize_error:
1016
+ label = "result:"
1017
+ colored = self._colorize_result(result_json, is_error=bool(is_error))
1018
+ if colorize_error:
1019
+ label = self._colorize_result(label, is_error=True)
1020
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
1021
+ header["result"] = result_json
1022
+ return self._format_tool_invocation_header(header)
1023
+
1024
+ return self._format_tool_invocation_header(header)
1025
+
1026
+ # --- turn_start: suppress (no user-visible value) ---
1027
+ if event_type == "turn_start":
1028
+ return ""
845
1029
 
846
- # --- agent_start, turn_start: simple headers ---
847
- if event_type in ("agent_start", "turn_start"):
848
- self.message_counter += 1
1030
+ # --- agent_start: simple header (no counter — only *_end events get counters) ---
1031
+ if event_type == "agent_start":
849
1032
  return json.dumps({
850
1033
  "type": event_type,
851
1034
  "datetime": now,
852
- "counter": f"#{self.message_counter}",
853
1035
  }, ensure_ascii=False)
854
1036
 
855
1037
  # --- agent_end: capture and show summary ---
@@ -863,6 +1045,9 @@ Model shorthands:
863
1045
  messages = parsed.get("messages")
864
1046
  if isinstance(messages, list):
865
1047
  header["message_count"] = len(messages)
1048
+ total_cost_usd = self._extract_total_cost_usd(parsed)
1049
+ if total_cost_usd is not None:
1050
+ header["total_cost_usd"] = total_cost_usd
866
1051
  return json.dumps(header, ensure_ascii=False)
867
1052
 
868
1053
  # Not a Pi-wrapped event type we handle
@@ -880,7 +1065,7 @@ Model shorthands:
880
1065
  base_type = header_type or msg_type or "message"
881
1066
 
882
1067
  def make_header(type_value: str):
883
- hdr: Dict = {"type": type_value, "datetime": now}
1068
+ hdr: Dict = {"type": type_value, "datetime": now, "counter": f"#{self.message_counter}"}
884
1069
  if item_id:
885
1070
  hdr["id"] = item_id
886
1071
  if outer_type and msg_type and outer_type != msg_type:
@@ -1091,6 +1276,107 @@ Model shorthands:
1091
1276
 
1092
1277
  return ""
1093
1278
 
1279
+ def _buffer_tool_call_end(self, tool_call: dict, now: str) -> bool:
1280
+ """Buffer toolcall_end info for grouping with tool_execution_end.
1281
+
1282
+ Returns True if successfully buffered (caller should suppress output),
1283
+ False if no toolCallId present (caller should emit normally).
1284
+ """
1285
+ tc_id = tool_call.get("toolCallId", "") if isinstance(tool_call, dict) else ""
1286
+ if not tc_id:
1287
+ return False
1288
+
1289
+ pending: Dict = {"tool": tool_call.get("name", ""), "datetime": now}
1290
+ args = tool_call.get("arguments", {})
1291
+
1292
+ if isinstance(args, dict):
1293
+ cmd = args.get("command", "")
1294
+ if isinstance(cmd, str) and cmd:
1295
+ pending["command"] = self._sanitize_tool_argument_value(cmd)
1296
+ else:
1297
+ pending["args"] = self._sanitize_tool_argument_value(args)
1298
+ elif isinstance(args, str) and args.strip():
1299
+ pending["args"] = self._sanitize_tool_argument_value(args)
1300
+
1301
+ self._pending_tool_calls[tc_id] = pending
1302
+ return True
1303
+
1304
+ def _buffer_exec_start(self, payload: dict) -> None:
1305
+ """Buffer tool_execution_start data for tool_execution_end fallback + timing."""
1306
+ tc_id = payload.get("toolCallId", "")
1307
+ if not tc_id:
1308
+ return
1309
+
1310
+ pending: Dict = {
1311
+ "tool": payload.get("toolName", ""),
1312
+ "started_at": time.perf_counter(),
1313
+ }
1314
+ args_val = payload.get("args")
1315
+ if isinstance(args_val, dict):
1316
+ cmd = args_val.get("command", "")
1317
+ if isinstance(cmd, str) and cmd:
1318
+ pending["command"] = self._sanitize_tool_argument_value(cmd)
1319
+ else:
1320
+ pending["args"] = self._sanitize_tool_argument_value(args_val)
1321
+ elif isinstance(args_val, str) and args_val.strip():
1322
+ pending["args"] = self._sanitize_tool_argument_value(args_val)
1323
+
1324
+ self._pending_exec_starts[tc_id] = pending
1325
+
1326
+ def _build_combined_tool_event(self, pending: dict, payload: dict, now: str) -> str:
1327
+ """Build a combined 'tool' event from buffered toolcall_end + tool_execution_end."""
1328
+ self.message_counter += 1
1329
+ header: Dict = {
1330
+ "type": "tool",
1331
+ "datetime": now,
1332
+ "counter": f"#{self.message_counter}",
1333
+ "tool": pending.get("tool", payload.get("toolName", "")),
1334
+ }
1335
+
1336
+ # Args from buffered toolcall/tool_execution_start
1337
+ if "command" in pending:
1338
+ header["command"] = pending["command"]
1339
+ elif "args" in pending:
1340
+ header["args"] = pending["args"]
1341
+
1342
+ # Execution time (source of truth: tool_execution_start -> tool_execution_end)
1343
+ execution_time = self._format_execution_time(payload, pending)
1344
+ if execution_time:
1345
+ header["execution_time"] = execution_time
1346
+
1347
+ is_error = payload.get("isError", False)
1348
+ if is_error:
1349
+ header["isError"] = True
1350
+
1351
+ # Result extraction (handles string, dict with content array, and list)
1352
+ result_val = payload.get("result")
1353
+ result_text = None
1354
+ if isinstance(result_val, str) and result_val.strip():
1355
+ result_text = self._truncate_tool_result_text(result_val)
1356
+ elif isinstance(result_val, dict):
1357
+ result_content = result_val.get("content")
1358
+ if isinstance(result_content, list):
1359
+ for rc_item in result_content:
1360
+ if isinstance(rc_item, dict) and rc_item.get("type") == "text":
1361
+ result_text = self._truncate_tool_result_text(rc_item.get("text", ""))
1362
+ break
1363
+ if result_text is None:
1364
+ result_text = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1365
+ elif isinstance(result_val, list):
1366
+ result_text = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1367
+
1368
+ if result_text:
1369
+ colorize_error = self._color_enabled() and bool(is_error)
1370
+ if "\n" in result_text or colorize_error:
1371
+ label = "result:"
1372
+ colored_text = self._colorize_result(result_text, is_error=bool(is_error))
1373
+ if colorize_error:
1374
+ label = self._colorize_result(label, is_error=True)
1375
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored_text
1376
+ header["result"] = result_text
1377
+
1378
+ return self._format_tool_invocation_header(header)
1379
+
1094
1380
  def _format_event_pretty(self, payload: dict) -> Optional[str]:
1095
1381
  """
1096
1382
  Format a Pi JSON streaming event for human-readable output.
@@ -1099,31 +1385,41 @@ Model shorthands:
1099
1385
  try:
1100
1386
  event_type = payload.get("type", "")
1101
1387
  now = datetime.now().strftime("%I:%M:%S %p")
1102
- self.message_counter += 1
1103
1388
 
1389
+ # Counter is only added to *_end events (below, per-branch)
1104
1390
  header: Dict = {
1105
1391
  "type": event_type,
1106
1392
  "datetime": now,
1107
- "counter": f"#{self.message_counter}",
1108
1393
  }
1109
1394
 
1110
- # --- Session header ---
1395
+ # --- Session header (no counter) ---
1111
1396
  if event_type == "session":
1112
1397
  header["version"] = payload.get("version")
1113
1398
  header["id"] = payload.get("id")
1114
1399
  return json.dumps(header, ensure_ascii=False)
1115
1400
 
1116
- # --- Agent lifecycle events ---
1117
- if event_type in ("agent_start", "turn_start"):
1401
+ # --- turn_start: suppress (no user-visible value) ---
1402
+ if event_type == "turn_start":
1403
+ return None
1404
+
1405
+ # --- agent_start: simple header (no counter) ---
1406
+ if event_type == "agent_start":
1118
1407
  return json.dumps(header, ensure_ascii=False)
1119
1408
 
1120
1409
  if event_type == "agent_end":
1410
+ self.message_counter += 1
1411
+ header["counter"] = f"#{self.message_counter}"
1121
1412
  messages = payload.get("messages")
1122
1413
  if isinstance(messages, list):
1123
1414
  header["message_count"] = len(messages)
1415
+ total_cost_usd = self._extract_total_cost_usd(payload)
1416
+ if total_cost_usd is not None:
1417
+ header["total_cost_usd"] = total_cost_usd
1124
1418
  return json.dumps(header, ensure_ascii=False)
1125
1419
 
1126
1420
  if event_type == "turn_end":
1421
+ self.message_counter += 1
1422
+ header["counter"] = f"#{self.message_counter}"
1127
1423
  tool_results = payload.get("toolResults")
1128
1424
  if isinstance(tool_results, list):
1129
1425
  header["tool_results_count"] = len(tool_results)
@@ -1146,6 +1442,43 @@ Model shorthands:
1146
1442
  if event_subtype in self._PI_HIDDEN_MESSAGE_UPDATE_EVENTS:
1147
1443
  return None # Suppress noisy streaming deltas
1148
1444
 
1445
+ # toolcall_end: buffer for grouping with tool_execution_end
1446
+ if isinstance(ame, dict) and ame_type == "toolcall_end":
1447
+ tool_call = ame.get("toolCall", {})
1448
+ if self._buffer_tool_call_end(tool_call, now):
1449
+ return None # suppress — will emit combined event on tool_execution_end
1450
+ # No toolCallId — fallback to original format
1451
+ self.message_counter += 1
1452
+ header["counter"] = f"#{self.message_counter}"
1453
+ header["event"] = ame_type
1454
+ if isinstance(tool_call, dict):
1455
+ header["tool"] = tool_call.get("name", "")
1456
+ args = tool_call.get("arguments", {})
1457
+ if isinstance(args, dict):
1458
+ cmd = args.get("command", "")
1459
+ if isinstance(cmd, str) and cmd:
1460
+ header["command"] = self._sanitize_tool_argument_value(cmd)
1461
+ else:
1462
+ header["args"] = self._sanitize_tool_argument_value(args)
1463
+ elif isinstance(args, str) and args.strip():
1464
+ header["args"] = self._sanitize_tool_argument_value(args)
1465
+ return self._format_tool_invocation_header(header)
1466
+
1467
+ # thinking_end: show thinking content (*_end → gets counter)
1468
+ if isinstance(ame, dict) and ame_type == "thinking_end":
1469
+ self.message_counter += 1
1470
+ header["counter"] = f"#{self.message_counter}"
1471
+ header["event"] = ame_type
1472
+ thinking_text = ame.get("thinking", "") or ame.get("content", "") or ame.get("text", "")
1473
+ if isinstance(thinking_text, str) and thinking_text.strip():
1474
+ header["thinking"] = thinking_text
1475
+ return json.dumps(header, ensure_ascii=False)
1476
+
1477
+ # Any other *_end subtypes (e.g. text_end) get counter
1478
+ if isinstance(ame, dict) and ame_type and ame_type.endswith("_end"):
1479
+ self.message_counter += 1
1480
+ header["counter"] = f"#{self.message_counter}"
1481
+
1149
1482
  message = payload.get("message", {})
1150
1483
  text = self._extract_text_from_message(message) if isinstance(message, dict) else ""
1151
1484
 
@@ -1165,61 +1498,103 @@ Model shorthands:
1165
1498
  return json.dumps(header, ensure_ascii=False)
1166
1499
 
1167
1500
  if event_type == "message_end":
1501
+ self.message_counter += 1
1502
+ header["counter"] = f"#{self.message_counter}"
1168
1503
  # Skip message text - already displayed by text_end/thinking_end/toolcall_end
1169
1504
  return json.dumps(header, ensure_ascii=False)
1170
1505
 
1171
1506
  # --- Tool execution events ---
1507
+ # Always suppress tool_execution_start: buffer its args for
1508
+ # tool_execution_end to use. The user sees nothing until the
1509
+ # tool finishes, then gets a single combined "tool" event.
1172
1510
  if event_type == "tool_execution_start":
1173
- header["tool"] = payload.get("toolName", "")
1174
- tool_call_id = payload.get("toolCallId")
1175
- if tool_call_id:
1176
- header["id"] = tool_call_id
1177
- args_val = payload.get("args")
1178
- if isinstance(args_val, dict):
1179
- # Show abbreviated args inline
1180
- args_str = json.dumps(args_val, ensure_ascii=False)
1181
- if len(args_str) > 200:
1182
- # Truncate for readability
1183
- header["args"] = args_str[:200] + "..."
1184
- else:
1185
- header["args"] = args_val
1186
- elif isinstance(args_val, str) and args_val.strip():
1187
- if "\n" in args_val:
1188
- return json.dumps(header, ensure_ascii=False) + "\nargs:\n" + args_val
1189
- header["args"] = args_val
1190
- return json.dumps(header, ensure_ascii=False)
1511
+ self._buffer_exec_start(payload)
1512
+ self._in_tool_execution = True
1513
+ return None
1191
1514
 
1192
1515
  if event_type == "tool_execution_update":
1193
- header["tool"] = payload.get("toolName", "")
1194
- tool_call_id = payload.get("toolCallId")
1195
- if tool_call_id:
1196
- header["id"] = tool_call_id
1197
- partial = payload.get("partialResult")
1198
- if isinstance(partial, str) and partial.strip():
1199
- if "\n" in partial:
1200
- return json.dumps(header, ensure_ascii=False) + "\npartialResult:\n" + partial
1201
- header["partialResult"] = partial
1202
- return json.dumps(header, ensure_ascii=False)
1516
+ # Suppress updates — result will arrive in tool_execution_end
1517
+ return None
1203
1518
 
1204
1519
  if event_type == "tool_execution_end":
1205
- header["tool"] = payload.get("toolName", "")
1520
+ self._in_tool_execution = False
1206
1521
  tool_call_id = payload.get("toolCallId")
1207
- if tool_call_id:
1208
- header["id"] = tool_call_id
1522
+
1523
+ pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
1524
+ pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
1525
+ if pending_tool and pending_exec and "started_at" in pending_exec:
1526
+ pending_tool["started_at"] = pending_exec["started_at"]
1527
+ pending = pending_tool or pending_exec
1528
+
1529
+ if pending:
1530
+ return self._build_combined_tool_event(pending, payload, now)
1531
+
1532
+ # No buffered data at all — minimal fallback
1533
+ self.message_counter += 1
1534
+ header["type"] = "tool"
1535
+ header["counter"] = f"#{self.message_counter}"
1536
+ header["tool"] = payload.get("toolName", "")
1537
+
1538
+ execution_time = self._format_execution_time(payload)
1539
+ if execution_time:
1540
+ header["execution_time"] = execution_time
1541
+
1209
1542
  is_error = payload.get("isError", False)
1210
1543
  if is_error:
1211
1544
  header["isError"] = True
1545
+
1212
1546
  result_val = payload.get("result")
1547
+ colorize_error = self._color_enabled() and bool(is_error)
1548
+
1213
1549
  if isinstance(result_val, str) and result_val.strip():
1214
- if "\n" in result_val:
1215
- return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + result_val
1216
- header["result"] = result_val
1217
- elif isinstance(result_val, (dict, list)):
1218
- result_str = json.dumps(result_val, ensure_ascii=False)
1219
- if "\n" in result_str or len(result_str) > 200:
1220
- return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + result_str
1221
- header["result"] = result_val
1222
- return json.dumps(header, ensure_ascii=False)
1550
+ truncated = self._truncate_tool_result_text(result_val)
1551
+ if "\n" in truncated or colorize_error:
1552
+ label = "result:"
1553
+ colored = self._colorize_result(truncated, is_error=bool(is_error))
1554
+ if colorize_error:
1555
+ label = self._colorize_result(label, is_error=True)
1556
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
1557
+ header["result"] = truncated
1558
+ return self._format_tool_invocation_header(header)
1559
+
1560
+ if isinstance(result_val, dict):
1561
+ result_content = result_val.get("content")
1562
+ if isinstance(result_content, list):
1563
+ for rc_item in result_content:
1564
+ if isinstance(rc_item, dict) and rc_item.get("type") == "text":
1565
+ text = rc_item.get("text", "")
1566
+ truncated = self._truncate_tool_result_text(text)
1567
+ if "\n" in truncated or colorize_error:
1568
+ label = "result:"
1569
+ colored = self._colorize_result(truncated, is_error=bool(is_error))
1570
+ if colorize_error:
1571
+ label = self._colorize_result(label, is_error=True)
1572
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
1573
+ header["result"] = truncated
1574
+ return self._format_tool_invocation_header(header)
1575
+
1576
+ result_str = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1577
+ if "\n" in result_str or len(result_str) > 200 or colorize_error:
1578
+ label = "result:"
1579
+ colored = self._colorize_result(result_str, is_error=bool(is_error))
1580
+ if colorize_error:
1581
+ label = self._colorize_result(label, is_error=True)
1582
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
1583
+ header["result"] = result_str
1584
+ return self._format_tool_invocation_header(header)
1585
+
1586
+ if isinstance(result_val, list):
1587
+ result_str = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1588
+ if "\n" in result_str or len(result_str) > 200 or colorize_error:
1589
+ label = "result:"
1590
+ colored = self._colorize_result(result_str, is_error=bool(is_error))
1591
+ if colorize_error:
1592
+ label = self._colorize_result(label, is_error=True)
1593
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
1594
+ header["result"] = result_str
1595
+ return self._format_tool_invocation_header(header)
1596
+
1597
+ return self._format_tool_invocation_header(header)
1223
1598
 
1224
1599
  # --- Retry/compaction events ---
1225
1600
  if event_type == "auto_retry_start":
@@ -1232,6 +1607,8 @@ Model shorthands:
1232
1607
  return json.dumps(header, ensure_ascii=False)
1233
1608
 
1234
1609
  if event_type == "auto_retry_end":
1610
+ self.message_counter += 1
1611
+ header["counter"] = f"#{self.message_counter}"
1235
1612
  header["success"] = payload.get("success")
1236
1613
  header["attempt"] = payload.get("attempt")
1237
1614
  final_err = payload.get("finalError")
@@ -1277,7 +1654,7 @@ Model shorthands:
1277
1654
  return delta
1278
1655
  return ""
1279
1656
 
1280
- # Section start markers
1657
+ # Section start markers (no counter — only *_end events get counters)
1281
1658
  if ame_type == "text_start":
1282
1659
  return json.dumps({"type": "text_start", "datetime": now}) + "\n"
1283
1660
 
@@ -1286,26 +1663,33 @@ Model shorthands:
1286
1663
 
1287
1664
  # Section end markers (text was already streamed)
1288
1665
  if ame_type == "text_end":
1289
- return "\n" + json.dumps({"type": "text_end", "datetime": now}) + "\n"
1666
+ self.message_counter += 1
1667
+ return "\n" + json.dumps({"type": "text_end", "datetime": now, "counter": f"#{self.message_counter}"}) + "\n"
1290
1668
 
1291
1669
  if ame_type == "thinking_end":
1292
- return "\n" + json.dumps({"type": "thinking_end", "datetime": now}) + "\n"
1670
+ self.message_counter += 1
1671
+ return "\n" + json.dumps({"type": "thinking_end", "datetime": now, "counter": f"#{self.message_counter}"}) + "\n"
1293
1672
 
1294
- # Tool call end: show tool info
1673
+ # Tool call end: buffer for grouping with tool_execution_end
1295
1674
  if ame_type == "toolcall_end":
1296
1675
  tc = ame.get("toolCall", {})
1297
- header = {"type": "toolcall_end", "datetime": now}
1676
+ if self._buffer_tool_call_end(tc, now):
1677
+ return "" # suppress — will emit combined event on tool_execution_end
1678
+ # No toolCallId — fallback to original format
1679
+ self.message_counter += 1
1680
+ header = {"type": "toolcall_end", "datetime": now, "counter": f"#{self.message_counter}"}
1298
1681
  if isinstance(tc, dict):
1299
1682
  header["tool"] = tc.get("name", "")
1300
1683
  args = tc.get("arguments", {})
1301
1684
  if isinstance(args, dict):
1302
1685
  cmd = args.get("command", "")
1303
- if cmd:
1304
- header["command"] = cmd
1686
+ if isinstance(cmd, str) and cmd:
1687
+ header["command"] = self._sanitize_tool_argument_value(cmd)
1305
1688
  else:
1306
- args_str = json.dumps(args, ensure_ascii=False)
1307
- header["args"] = args_str[:200] + "..." if len(args_str) > 200 else args
1308
- return json.dumps(header, ensure_ascii=False) + "\n"
1689
+ header["args"] = self._sanitize_tool_argument_value(args)
1690
+ elif isinstance(args, str) and args.strip():
1691
+ header["args"] = self._sanitize_tool_argument_value(args)
1692
+ return self._format_tool_invocation_header(header) + "\n"
1309
1693
 
1310
1694
  # Suppress all other message_update subtypes (toolcall_start, toolcall_delta, etc.)
1311
1695
  return ""
@@ -1314,69 +1698,224 @@ Model shorthands:
1314
1698
  if event_type in ("message_start", "message_end"):
1315
1699
  return ""
1316
1700
 
1317
- # tool_execution_start
1701
+ # tool_execution_start: always suppress, buffer args
1318
1702
  if event_type == "tool_execution_start":
1319
- header = {
1320
- "type": "tool_execution_start",
1321
- "datetime": now,
1322
- "tool": parsed.get("toolName", ""),
1323
- }
1324
- args_val = parsed.get("args")
1325
- if isinstance(args_val, dict):
1326
- args_str = json.dumps(args_val, ensure_ascii=False)
1327
- if len(args_str) > 200:
1328
- header["args"] = args_str[:200] + "..."
1329
- else:
1330
- header["args"] = args_val
1331
- return json.dumps(header, ensure_ascii=False) + "\n"
1703
+ self._buffer_exec_start(parsed)
1704
+ self._in_tool_execution = True
1705
+ return "" # suppress
1332
1706
 
1333
- # tool_execution_end
1707
+ # tool_execution_end: combine with buffered data
1334
1708
  if event_type == "tool_execution_end":
1709
+ self._in_tool_execution = False
1710
+ tool_call_id = parsed.get("toolCallId")
1711
+
1712
+ pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
1713
+ pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
1714
+ if pending_tool and pending_exec and "started_at" in pending_exec:
1715
+ pending_tool["started_at"] = pending_exec["started_at"]
1716
+ pending = pending_tool or pending_exec
1717
+
1718
+ if pending:
1719
+ return self._build_combined_tool_event(pending, parsed, now) + "\n"
1720
+
1721
+ # No buffered data — minimal fallback
1722
+ self.message_counter += 1
1335
1723
  header = {
1336
- "type": "tool_execution_end",
1724
+ "type": "tool",
1337
1725
  "datetime": now,
1726
+ "counter": f"#{self.message_counter}",
1338
1727
  "tool": parsed.get("toolName", ""),
1339
1728
  }
1729
+ execution_time = self._format_execution_time(parsed)
1730
+ if execution_time:
1731
+ header["execution_time"] = execution_time
1732
+
1340
1733
  is_error = parsed.get("isError", False)
1341
1734
  if is_error:
1342
1735
  header["isError"] = True
1736
+
1343
1737
  result_val = parsed.get("result")
1738
+ colorize_error = self._color_enabled() and bool(is_error)
1739
+
1344
1740
  if isinstance(result_val, str) and result_val.strip():
1345
1741
  truncated = self._truncate_tool_result_text(result_val)
1346
- if "\n" in truncated:
1347
- return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated + "\n"
1742
+ if "\n" in truncated or colorize_error:
1743
+ label = "result:"
1744
+ colored = self._colorize_result(truncated, is_error=bool(is_error))
1745
+ if colorize_error:
1746
+ label = self._colorize_result(label, is_error=True)
1747
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
1348
1748
  header["result"] = truncated
1349
- elif isinstance(result_val, dict):
1749
+ return self._format_tool_invocation_header(header) + "\n"
1750
+
1751
+ if isinstance(result_val, dict):
1350
1752
  result_content = result_val.get("content")
1351
1753
  if isinstance(result_content, list):
1352
1754
  for rc_item in result_content:
1353
1755
  if isinstance(rc_item, dict) and rc_item.get("type") == "text":
1354
1756
  text = rc_item.get("text", "")
1355
1757
  truncated = self._truncate_tool_result_text(text)
1356
- if "\n" in truncated:
1357
- return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated + "\n"
1758
+ if "\n" in truncated or colorize_error:
1759
+ label = "result:"
1760
+ colored = self._colorize_result(truncated, is_error=bool(is_error))
1761
+ if colorize_error:
1762
+ label = self._colorize_result(label, is_error=True)
1763
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
1358
1764
  header["result"] = truncated
1359
- break
1360
- return json.dumps(header, ensure_ascii=False) + "\n"
1765
+ return self._format_tool_invocation_header(header) + "\n"
1766
+
1767
+ result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1768
+ if "\n" in result_json or colorize_error:
1769
+ label = "result:"
1770
+ colored = self._colorize_result(result_json, is_error=bool(is_error))
1771
+ if colorize_error:
1772
+ label = self._colorize_result(label, is_error=True)
1773
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
1774
+ header["result"] = result_json
1775
+ return self._format_tool_invocation_header(header) + "\n"
1776
+
1777
+ if isinstance(result_val, list):
1778
+ result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
1779
+ if "\n" in result_json or colorize_error:
1780
+ label = "result:"
1781
+ colored = self._colorize_result(result_json, is_error=bool(is_error))
1782
+ if colorize_error:
1783
+ label = self._colorize_result(label, is_error=True)
1784
+ return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
1785
+ header["result"] = result_json
1786
+ return self._format_tool_invocation_header(header) + "\n"
1787
+
1788
+ return self._format_tool_invocation_header(header) + "\n"
1361
1789
 
1362
1790
  # turn_end: metadata only
1363
1791
  if event_type == "turn_end":
1364
- header = {"type": "turn_end", "datetime": now}
1792
+ self.message_counter += 1
1793
+ header = {"type": "turn_end", "datetime": now, "counter": f"#{self.message_counter}"}
1365
1794
  tool_results = parsed.get("toolResults")
1366
1795
  if isinstance(tool_results, list):
1367
1796
  header["tool_results_count"] = len(tool_results)
1368
1797
  return json.dumps(header, ensure_ascii=False) + "\n"
1369
1798
 
1370
- # agent_start, turn_start
1371
- if event_type in ("agent_start", "turn_start"):
1799
+ # turn_start: suppress (no user-visible value)
1800
+ if event_type == "turn_start":
1801
+ return ""
1802
+
1803
+ # agent_start (no counter — only *_end events get counters)
1804
+ if event_type == "agent_start":
1372
1805
  return json.dumps({"type": event_type, "datetime": now}) + "\n"
1373
1806
 
1374
1807
  # agent_end
1375
1808
  if event_type == "agent_end":
1376
- header = {"type": "agent_end", "datetime": now}
1809
+ self.message_counter += 1
1810
+ header = {"type": "agent_end", "datetime": now, "counter": f"#{self.message_counter}"}
1377
1811
  messages = parsed.get("messages")
1378
1812
  if isinstance(messages, list):
1379
1813
  header["message_count"] = len(messages)
1814
+ total_cost_usd = self._extract_total_cost_usd(parsed)
1815
+ if total_cost_usd is not None:
1816
+ header["total_cost_usd"] = total_cost_usd
1817
+ return json.dumps(header, ensure_ascii=False) + "\n"
1818
+
1819
+ # --- Role-based messages (Pi-wrapped Codex messages) ---
1820
+ role = parsed.get("role", "")
1821
+ if role == "toolResult":
1822
+ self.message_counter += 1
1823
+ header = {
1824
+ "type": "toolResult",
1825
+ "datetime": now,
1826
+ "counter": f"#{self.message_counter}",
1827
+ "toolName": parsed.get("toolName", ""),
1828
+ }
1829
+ is_error = parsed.get("isError", False)
1830
+ if is_error:
1831
+ header["isError"] = True
1832
+ content = parsed.get("content")
1833
+ if isinstance(content, list):
1834
+ for item in content:
1835
+ if isinstance(item, dict) and item.get("type") == "text":
1836
+ text_val = item.get("text", "")
1837
+ truncated = self._truncate_tool_result_text(text_val)
1838
+ use_color = self._color_enabled()
1839
+ if "\n" in truncated or use_color:
1840
+ colored = self._colorize_result(truncated, is_error=bool(is_error))
1841
+ label = self._colorize_result("content:", is_error=bool(is_error))
1842
+ return json.dumps(header, ensure_ascii=False) + "\n" + label + "\n" + colored + "\n"
1843
+ header["content"] = truncated
1844
+ return json.dumps(header, ensure_ascii=False) + "\n"
1845
+ return json.dumps(header, ensure_ascii=False) + "\n"
1846
+
1847
+ if role == "assistant":
1848
+ self.message_counter += 1
1849
+ content = parsed.get("content")
1850
+ if isinstance(content, list):
1851
+ self._strip_thinking_signature(content)
1852
+ header = {"type": "assistant", "datetime": now, "counter": f"#{self.message_counter}"}
1853
+ text_parts = []
1854
+ if isinstance(content, list):
1855
+ for item in content:
1856
+ if isinstance(item, dict):
1857
+ if item.get("type") == "text":
1858
+ text_parts.append(item.get("text", ""))
1859
+ elif item.get("type") == "thinking":
1860
+ text_parts.append(f"[thinking] {item.get('thinking', '')}")
1861
+ elif item.get("type") == "toolCall":
1862
+ name = item.get("name", "")
1863
+ args = item.get("arguments", {})
1864
+ cmd = args.get("command", "") if isinstance(args, dict) else ""
1865
+ text_parts.append(f"[toolCall] {name}: {cmd}" if cmd else f"[toolCall] {name}")
1866
+ if text_parts:
1867
+ combined = "\n".join(text_parts)
1868
+ if "\n" in combined:
1869
+ return json.dumps(header, ensure_ascii=False) + "\n" + combined + "\n"
1870
+ header["content"] = combined
1871
+ return json.dumps(header, ensure_ascii=False) + "\n"
1872
+
1873
+ if role:
1874
+ # Other roles — minimal JSON header
1875
+ self.message_counter += 1
1876
+ return json.dumps({"type": role, "datetime": now, "counter": f"#{self.message_counter}"}, ensure_ascii=False) + "\n"
1877
+
1878
+ # --- Native Codex events (agent_reasoning, agent_message, exec_command_end, etc.) ---
1879
+ msg_type, payload, outer_type = self._normalize_codex_event(parsed)
1880
+
1881
+ if msg_type in ("agent_reasoning", "reasoning"):
1882
+ self.message_counter += 1
1883
+ content = self._extract_reasoning_text(payload)
1884
+ header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
1885
+ if "\n" in content:
1886
+ return json.dumps(header, ensure_ascii=False) + "\ntext:\n" + content + "\n"
1887
+ if content:
1888
+ header["text"] = content
1889
+ return json.dumps(header, ensure_ascii=False) + "\n"
1890
+
1891
+ if msg_type in ("agent_message", "assistant_message"):
1892
+ self.message_counter += 1
1893
+ content = self._extract_message_text_codex(payload)
1894
+ header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
1895
+ if "\n" in content:
1896
+ return json.dumps(header, ensure_ascii=False) + "\nmessage:\n" + content + "\n"
1897
+ if content:
1898
+ header["message"] = content
1899
+ return json.dumps(header, ensure_ascii=False) + "\n"
1900
+
1901
+ if msg_type == "exec_command_end":
1902
+ self.message_counter += 1
1903
+ formatted_output = payload.get("formatted_output", "") if isinstance(payload, dict) else ""
1904
+ header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
1905
+ if "\n" in formatted_output:
1906
+ return json.dumps(header, ensure_ascii=False) + "\nformatted_output:\n" + formatted_output + "\n"
1907
+ if formatted_output:
1908
+ header["formatted_output"] = formatted_output
1909
+ return json.dumps(header, ensure_ascii=False) + "\n"
1910
+
1911
+ if msg_type == "command_execution":
1912
+ self.message_counter += 1
1913
+ aggregated_output = self._extract_command_output_text(payload)
1914
+ header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
1915
+ if "\n" in aggregated_output:
1916
+ return json.dumps(header, ensure_ascii=False) + "\naggregated_output:\n" + aggregated_output + "\n"
1917
+ if aggregated_output:
1918
+ header["aggregated_output"] = aggregated_output
1380
1919
  return json.dumps(header, ensure_ascii=False) + "\n"
1381
1920
 
1382
1921
  # Fallback: not handled
@@ -1392,11 +1931,312 @@ Model shorthands:
1392
1931
  hide_types.update(parts)
1393
1932
  return hide_types
1394
1933
 
1934
+ @staticmethod
1935
+ def _toolcall_end_delay_seconds() -> float:
1936
+ """Return delay for fallback toolcall_end visibility (default 3s)."""
1937
+ raw = os.environ.get("PI_TOOLCALL_END_DELAY_SECONDS", "3")
1938
+ try:
1939
+ delay = float(raw)
1940
+ except (TypeError, ValueError):
1941
+ delay = 3.0
1942
+ return max(0.0, delay)
1943
+
1395
1944
  @staticmethod
1396
1945
  def _sanitize_sub_agent_response(event: dict) -> dict:
1397
1946
  """Strip bulky fields (messages, type) from sub_agent_response to reduce token usage."""
1398
1947
  return {k: v for k, v in event.items() if k not in ("messages", "type")}
1399
1948
 
1949
+ def _reset_run_cost_tracking(self) -> None:
1950
+ """Reset per-run usage/cost accumulation state."""
1951
+ self._run_usage_totals = None
1952
+ self._run_total_cost_usd = None
1953
+ self._run_seen_usage_keys.clear()
1954
+
1955
+ @staticmethod
1956
+ def _is_numeric_value(value: object) -> bool:
1957
+ """True for int/float values (excluding bool)."""
1958
+ return isinstance(value, (int, float)) and not isinstance(value, bool)
1959
+
1960
+ @staticmethod
1961
+ def _normalize_usage_payload(usage: dict) -> Optional[dict]:
1962
+ """Normalize usage payload into numeric totals for accumulation."""
1963
+ if not isinstance(usage, dict):
1964
+ return None
1965
+
1966
+ usage_cost = usage.get("cost")
1967
+ cost_payload = usage_cost if isinstance(usage_cost, dict) else {}
1968
+
1969
+ input_tokens = float(usage.get("input")) if PiService._is_numeric_value(usage.get("input")) else 0.0
1970
+ output_tokens = float(usage.get("output")) if PiService._is_numeric_value(usage.get("output")) else 0.0
1971
+ cache_read_tokens = float(usage.get("cacheRead")) if PiService._is_numeric_value(usage.get("cacheRead")) else 0.0
1972
+ cache_write_tokens = float(usage.get("cacheWrite")) if PiService._is_numeric_value(usage.get("cacheWrite")) else 0.0
1973
+
1974
+ total_tokens_raw = usage.get("totalTokens")
1975
+ total_tokens = (
1976
+ float(total_tokens_raw)
1977
+ if PiService._is_numeric_value(total_tokens_raw)
1978
+ else input_tokens + output_tokens + cache_read_tokens + cache_write_tokens
1979
+ )
1980
+
1981
+ cost_input = float(cost_payload.get("input")) if PiService._is_numeric_value(cost_payload.get("input")) else 0.0
1982
+ cost_output = float(cost_payload.get("output")) if PiService._is_numeric_value(cost_payload.get("output")) else 0.0
1983
+ cost_cache_read = (
1984
+ float(cost_payload.get("cacheRead")) if PiService._is_numeric_value(cost_payload.get("cacheRead")) else 0.0
1985
+ )
1986
+ cost_cache_write = (
1987
+ float(cost_payload.get("cacheWrite")) if PiService._is_numeric_value(cost_payload.get("cacheWrite")) else 0.0
1988
+ )
1989
+
1990
+ cost_total_raw = cost_payload.get("total")
1991
+ cost_total = (
1992
+ float(cost_total_raw)
1993
+ if PiService._is_numeric_value(cost_total_raw)
1994
+ else cost_input + cost_output + cost_cache_read + cost_cache_write
1995
+ )
1996
+
1997
+ has_any_value = any(
1998
+ PiService._is_numeric_value(v)
1999
+ for v in (
2000
+ usage.get("input"),
2001
+ usage.get("output"),
2002
+ usage.get("cacheRead"),
2003
+ usage.get("cacheWrite"),
2004
+ usage.get("totalTokens"),
2005
+ cost_payload.get("input"),
2006
+ cost_payload.get("output"),
2007
+ cost_payload.get("cacheRead"),
2008
+ cost_payload.get("cacheWrite"),
2009
+ cost_payload.get("total"),
2010
+ )
2011
+ )
2012
+
2013
+ if not has_any_value:
2014
+ return None
2015
+
2016
+ return {
2017
+ "input": input_tokens,
2018
+ "output": output_tokens,
2019
+ "cacheRead": cache_read_tokens,
2020
+ "cacheWrite": cache_write_tokens,
2021
+ "totalTokens": total_tokens,
2022
+ "cost": {
2023
+ "input": cost_input,
2024
+ "output": cost_output,
2025
+ "cacheRead": cost_cache_read,
2026
+ "cacheWrite": cost_cache_write,
2027
+ "total": cost_total,
2028
+ },
2029
+ }
2030
+
2031
+ @staticmethod
2032
+ def _merge_usage_payloads(base: Optional[dict], delta: Optional[dict]) -> Optional[dict]:
2033
+ """Merge normalized usage payloads by summing token/cost fields."""
2034
+ if not isinstance(base, dict):
2035
+ return delta
2036
+ if not isinstance(delta, dict):
2037
+ return base
2038
+
2039
+ base_cost = base.get("cost") if isinstance(base.get("cost"), dict) else {}
2040
+ delta_cost = delta.get("cost") if isinstance(delta.get("cost"), dict) else {}
2041
+
2042
+ return {
2043
+ "input": float(base.get("input", 0.0)) + float(delta.get("input", 0.0)),
2044
+ "output": float(base.get("output", 0.0)) + float(delta.get("output", 0.0)),
2045
+ "cacheRead": float(base.get("cacheRead", 0.0)) + float(delta.get("cacheRead", 0.0)),
2046
+ "cacheWrite": float(base.get("cacheWrite", 0.0)) + float(delta.get("cacheWrite", 0.0)),
2047
+ "totalTokens": float(base.get("totalTokens", 0.0)) + float(delta.get("totalTokens", 0.0)),
2048
+ "cost": {
2049
+ "input": float(base_cost.get("input", 0.0)) + float(delta_cost.get("input", 0.0)),
2050
+ "output": float(base_cost.get("output", 0.0)) + float(delta_cost.get("output", 0.0)),
2051
+ "cacheRead": float(base_cost.get("cacheRead", 0.0)) + float(delta_cost.get("cacheRead", 0.0)),
2052
+ "cacheWrite": float(base_cost.get("cacheWrite", 0.0)) + float(delta_cost.get("cacheWrite", 0.0)),
2053
+ "total": float(base_cost.get("total", 0.0)) + float(delta_cost.get("total", 0.0)),
2054
+ },
2055
+ }
2056
+
2057
+ @staticmethod
2058
+ def _aggregate_assistant_usages(messages: list) -> Optional[dict]:
2059
+ """Aggregate assistant usage payloads from an event messages array."""
2060
+ if not isinstance(messages, list):
2061
+ return None
2062
+
2063
+ assistant_usages: List[dict] = []
2064
+ for msg in messages:
2065
+ if isinstance(msg, dict) and msg.get("role") == "assistant":
2066
+ usage = msg.get("usage")
2067
+ if isinstance(usage, dict):
2068
+ assistant_usages.append(usage)
2069
+
2070
+ if not assistant_usages:
2071
+ return None
2072
+ if len(assistant_usages) == 1:
2073
+ return assistant_usages[0]
2074
+
2075
+ totals: Optional[dict] = None
2076
+ for usage in assistant_usages:
2077
+ normalized = PiService._normalize_usage_payload(usage)
2078
+ totals = PiService._merge_usage_payloads(totals, normalized)
2079
+
2080
+ return totals
2081
+
2082
+ def _assistant_usage_dedupe_key(self, message: dict, usage: dict) -> Optional[str]:
2083
+ """Build a stable dedupe key for assistant usage seen across message/turn_end events."""
2084
+ if not isinstance(message, dict) or not isinstance(usage, dict):
2085
+ return None
2086
+
2087
+ for id_key in ("id", "messageId", "message_id"):
2088
+ value = message.get(id_key)
2089
+ if isinstance(value, str) and value.strip():
2090
+ return f"id:{value.strip()}"
2091
+
2092
+ timestamp = message.get("timestamp")
2093
+ if self._is_numeric_value(timestamp):
2094
+ return f"ts:{int(float(timestamp))}"
2095
+ if isinstance(timestamp, str) and timestamp.strip():
2096
+ return f"ts:{timestamp.strip()}"
2097
+
2098
+ usage_cost = usage.get("cost") if isinstance(usage.get("cost"), dict) else {}
2099
+ signature: Dict[str, object] = {
2100
+ "stopReason": message.get("stopReason") if isinstance(message.get("stopReason"), str) else "",
2101
+ "input": usage.get("input", 0.0),
2102
+ "output": usage.get("output", 0.0),
2103
+ "cacheRead": usage.get("cacheRead", 0.0),
2104
+ "cacheWrite": usage.get("cacheWrite", 0.0),
2105
+ "totalTokens": usage.get("totalTokens", 0.0),
2106
+ "costTotal": usage_cost.get("total", 0.0),
2107
+ }
2108
+
2109
+ text = self._extract_text_from_message(message)
2110
+ if text:
2111
+ signature["text"] = text[:120]
2112
+
2113
+ return "sig:" + json.dumps(signature, sort_keys=True, ensure_ascii=False)
2114
+
2115
+ def _track_assistant_usage_from_event(self, event: dict) -> None:
2116
+ """Accumulate per-run assistant usage from stream events."""
2117
+ if not isinstance(event, dict):
2118
+ return
2119
+
2120
+ event_type = event.get("type")
2121
+ if event_type not in ("message", "message_end", "turn_end"):
2122
+ return
2123
+
2124
+ message = event.get("message")
2125
+ if not isinstance(message, dict) or message.get("role") != "assistant":
2126
+ return
2127
+
2128
+ normalized_usage = self._normalize_usage_payload(message.get("usage"))
2129
+ if not isinstance(normalized_usage, dict):
2130
+ return
2131
+
2132
+ usage_key = self._assistant_usage_dedupe_key(message, normalized_usage)
2133
+ if usage_key and usage_key in self._run_seen_usage_keys:
2134
+ return
2135
+ if usage_key:
2136
+ self._run_seen_usage_keys.add(usage_key)
2137
+
2138
+ self._run_usage_totals = self._merge_usage_payloads(self._run_usage_totals, normalized_usage)
2139
+ self._run_total_cost_usd = self._extract_total_cost_usd(
2140
+ {"usage": self._run_usage_totals},
2141
+ self._run_usage_totals,
2142
+ )
2143
+
2144
+ def _get_accumulated_total_cost_usd(self) -> Optional[float]:
2145
+ """Return accumulated per-run total cost when available."""
2146
+ if self._is_numeric_value(self._run_total_cost_usd):
2147
+ return float(self._run_total_cost_usd)
2148
+ if isinstance(self._run_usage_totals, dict):
2149
+ return self._extract_total_cost_usd({"usage": self._run_usage_totals}, self._run_usage_totals)
2150
+ return None
2151
+
2152
+ @staticmethod
2153
+ def _extract_usage_from_event(event: dict) -> Optional[dict]:
2154
+ """Extract usage payload from Pi event shapes (event/message/messages)."""
2155
+ if not isinstance(event, dict):
2156
+ return None
2157
+
2158
+ messages = event.get("messages")
2159
+ if event.get("type") == "agent_end" and isinstance(messages, list):
2160
+ aggregated = PiService._aggregate_assistant_usages(messages)
2161
+ if isinstance(aggregated, dict):
2162
+ return aggregated
2163
+
2164
+ direct_usage = event.get("usage")
2165
+ if isinstance(direct_usage, dict):
2166
+ return direct_usage
2167
+
2168
+ message = event.get("message")
2169
+ if isinstance(message, dict):
2170
+ message_usage = message.get("usage")
2171
+ if isinstance(message_usage, dict):
2172
+ return message_usage
2173
+
2174
+ if isinstance(messages, list):
2175
+ aggregated = PiService._aggregate_assistant_usages(messages)
2176
+ if isinstance(aggregated, dict):
2177
+ return aggregated
2178
+
2179
+ return None
2180
+
2181
+ @staticmethod
2182
+ def _extract_total_cost_usd(event: dict, usage: Optional[dict] = None) -> Optional[float]:
2183
+ """Extract total USD cost from explicit fields or usage.cost.total."""
2184
+ if not isinstance(event, dict):
2185
+ return None
2186
+
2187
+ for key in ("total_cost_usd", "totalCostUsd", "totalCostUSD"):
2188
+ value = event.get(key)
2189
+ if PiService._is_numeric_value(value):
2190
+ return float(value)
2191
+
2192
+ direct_cost = event.get("cost")
2193
+ if PiService._is_numeric_value(direct_cost):
2194
+ return float(direct_cost)
2195
+ if isinstance(direct_cost, dict):
2196
+ total = direct_cost.get("total")
2197
+ if PiService._is_numeric_value(total):
2198
+ return float(total)
2199
+
2200
+ usage_payload = usage if isinstance(usage, dict) else None
2201
+ if usage_payload is None:
2202
+ usage_payload = PiService._extract_usage_from_event(event)
2203
+
2204
+ if isinstance(usage_payload, dict):
2205
+ usage_cost = usage_payload.get("cost")
2206
+ if isinstance(usage_cost, dict):
2207
+ total = usage_cost.get("total")
2208
+ if PiService._is_numeric_value(total):
2209
+ return float(total)
2210
+
2211
+ return None
2212
+
2213
+ def _build_success_result_event(self, text: str, event: dict) -> dict:
2214
+ """Build standardized success envelope for shell-backend capture."""
2215
+ usage = self._extract_usage_from_event(event)
2216
+ if isinstance(self._run_usage_totals, dict):
2217
+ usage = self._run_usage_totals
2218
+
2219
+ total_cost_usd = self._extract_total_cost_usd(event, usage)
2220
+ accumulated_total_cost = self._get_accumulated_total_cost_usd()
2221
+ if accumulated_total_cost is not None:
2222
+ total_cost_usd = accumulated_total_cost
2223
+
2224
+ result_event: Dict = {
2225
+ "type": "result",
2226
+ "subtype": "success",
2227
+ "is_error": False,
2228
+ "result": text,
2229
+ "session_id": self.session_id,
2230
+ "sub_agent_response": self._sanitize_sub_agent_response(event),
2231
+ }
2232
+
2233
+ if isinstance(usage, dict):
2234
+ result_event["usage"] = usage
2235
+ if total_cost_usd is not None:
2236
+ result_event["total_cost_usd"] = total_cost_usd
2237
+
2238
+ return result_event
2239
+
1400
2240
  def _write_capture_file(self, capture_path: Optional[str]) -> None:
1401
2241
  """Write final result event to capture file for shell backend."""
1402
2242
  if not capture_path or not self.last_result_event:
@@ -1423,6 +2263,9 @@ Model shorthands:
1423
2263
  pretty = args.pretty.lower() != "false"
1424
2264
  capture_path = os.environ.get("JUNO_SUBAGENT_CAPTURE_PATH")
1425
2265
  hide_types = self._build_hide_types()
2266
+ self._buffered_tool_stdout_lines.clear()
2267
+ self._reset_run_cost_tracking()
2268
+ cancel_delayed_toolcalls = lambda: None
1426
2269
 
1427
2270
  if verbose:
1428
2271
  # Truncate prompt in display to avoid confusing multi-line output
@@ -1528,7 +2371,264 @@ Model shorthands:
1528
2371
  stderr_thread = threading.Thread(target=_stderr_reader, daemon=True)
1529
2372
  stderr_thread.start()
1530
2373
 
2374
+ cancel_delayed_toolcalls = lambda: None
2375
+
1531
2376
  if process.stdout:
2377
+ pending_tool_execution_end: Optional[dict] = None
2378
+ pending_turn_end_after_tool: Optional[dict] = None
2379
+ toolcall_end_delay_seconds = self._toolcall_end_delay_seconds()
2380
+ pending_delayed_toolcalls: Dict[int, dict] = {}
2381
+ delayed_toolcalls_lock = threading.Lock()
2382
+ delayed_toolcall_seq = 0
2383
+
2384
+ def _extract_fallback_toolcall_name(parsed_event: dict) -> Optional[str]:
2385
+ if parsed_event.get("type") != "message_update":
2386
+ return None
2387
+ assistant_event = parsed_event.get("assistantMessageEvent")
2388
+ if not isinstance(assistant_event, dict) or assistant_event.get("type") != "toolcall_end":
2389
+ return None
2390
+ tool_call = assistant_event.get("toolCall")
2391
+ if not isinstance(tool_call, dict):
2392
+ return None
2393
+ tool_call_id = tool_call.get("toolCallId")
2394
+ if isinstance(tool_call_id, str) and tool_call_id.strip():
2395
+ return None
2396
+ name = tool_call.get("name", "")
2397
+ return name if isinstance(name, str) else ""
2398
+
2399
+ def _format_deferred_toolcall(parsed_event: dict, mode: str) -> Optional[str]:
2400
+ if mode == self.PRETTIFIER_LIVE:
2401
+ return self._format_event_live(parsed_event)
2402
+ if mode == self.PRETTIFIER_CODEX:
2403
+ return self._format_pi_codex_event(parsed_event)
2404
+ if mode == self.PRETTIFIER_CLAUDE:
2405
+ return self._format_event_pretty_claude(parsed_event)
2406
+ return self._format_event_pretty(parsed_event)
2407
+
2408
+ def _emit_stdout(formatted: str, raw: bool = False) -> None:
2409
+ if raw:
2410
+ sys.stdout.write(formatted)
2411
+ sys.stdout.flush()
2412
+ return
2413
+ print(formatted, flush=True)
2414
+
2415
+ def _schedule_delayed_toolcall(parsed_event: dict, tool_name: str, mode: str) -> None:
2416
+ nonlocal delayed_toolcall_seq
2417
+
2418
+ def _emit_delayed_toolcall(event_payload: dict, event_mode: str) -> None:
2419
+ formatted = _format_deferred_toolcall(event_payload, event_mode)
2420
+ if not formatted:
2421
+ return
2422
+ _emit_stdout(formatted, raw=event_mode == self.PRETTIFIER_LIVE)
2423
+
2424
+ if toolcall_end_delay_seconds <= 0:
2425
+ _emit_delayed_toolcall(parsed_event, mode)
2426
+ return
2427
+
2428
+ delayed_toolcall_seq += 1
2429
+ entry_id = delayed_toolcall_seq
2430
+ entry: Dict = {
2431
+ "id": entry_id,
2432
+ "tool": tool_name,
2433
+ "event": parsed_event,
2434
+ "mode": mode,
2435
+ }
2436
+
2437
+ def _timer_emit() -> None:
2438
+ with delayed_toolcalls_lock:
2439
+ pending = pending_delayed_toolcalls.pop(entry_id, None)
2440
+ if not pending:
2441
+ return
2442
+ _emit_delayed_toolcall(pending["event"], pending["mode"])
2443
+
2444
+ timer = threading.Timer(toolcall_end_delay_seconds, _timer_emit)
2445
+ timer.daemon = True
2446
+ entry["timer"] = timer
2447
+ with delayed_toolcalls_lock:
2448
+ pending_delayed_toolcalls[entry_id] = entry
2449
+ timer.start()
2450
+
2451
+ def _cancel_delayed_toolcall(tool_name: str) -> None:
2452
+ with delayed_toolcalls_lock:
2453
+ if not pending_delayed_toolcalls:
2454
+ return
2455
+
2456
+ selected_id: Optional[int] = None
2457
+ if tool_name:
2458
+ for entry_id, entry in pending_delayed_toolcalls.items():
2459
+ if entry.get("tool") == tool_name:
2460
+ selected_id = entry_id
2461
+ break
2462
+
2463
+ if selected_id is None:
2464
+ selected_id = min(pending_delayed_toolcalls.keys())
2465
+
2466
+ pending = pending_delayed_toolcalls.pop(selected_id, None)
2467
+
2468
+ if pending:
2469
+ timer = pending.get("timer")
2470
+ if timer:
2471
+ timer.cancel()
2472
+
2473
+ def _cancel_all_delayed_toolcalls() -> None:
2474
+ with delayed_toolcalls_lock:
2475
+ pending = list(pending_delayed_toolcalls.values())
2476
+ pending_delayed_toolcalls.clear()
2477
+ for entry in pending:
2478
+ timer = entry.get("timer")
2479
+ if timer:
2480
+ timer.cancel()
2481
+
2482
+ cancel_delayed_toolcalls = _cancel_all_delayed_toolcalls
2483
+
2484
+ def _emit_parsed_event(parsed_event: dict, raw_json_line: Optional[str] = None) -> None:
2485
+ event_type = parsed_event.get("type", "")
2486
+
2487
+ # Capture session ID from the session event (sent at stream start)
2488
+ if event_type == "session":
2489
+ self.session_id = parsed_event.get("id")
2490
+
2491
+ # Track per-run assistant usage from stream events.
2492
+ self._track_assistant_usage_from_event(parsed_event)
2493
+
2494
+ # Ensure agent_end reflects cumulative per-run totals when available.
2495
+ if event_type == "agent_end":
2496
+ accumulated_total_cost = self._get_accumulated_total_cost_usd()
2497
+ if accumulated_total_cost is not None:
2498
+ parsed_event["total_cost_usd"] = accumulated_total_cost
2499
+ if isinstance(self._run_usage_totals, dict):
2500
+ parsed_event["usage"] = self._run_usage_totals
2501
+
2502
+ # Capture result event for shell backend
2503
+ if event_type == "agent_end":
2504
+ # agent_end has a 'messages' array; extract final assistant text
2505
+ messages = parsed_event.get("messages", [])
2506
+ text = ""
2507
+ if isinstance(messages, list):
2508
+ # Walk messages in reverse to find last assistant message with text
2509
+ for m in reversed(messages):
2510
+ if isinstance(m, dict) and m.get("role") == "assistant":
2511
+ text = self._extract_text_from_message(m)
2512
+ if text:
2513
+ break
2514
+ if text:
2515
+ self.last_result_event = self._build_success_result_event(text, parsed_event)
2516
+ else:
2517
+ self.last_result_event = parsed_event
2518
+ elif event_type == "message":
2519
+ # OpenAI-compatible format: capture last assistant message
2520
+ msg = parsed_event.get("message", {})
2521
+ if isinstance(msg, dict) and msg.get("role") == "assistant":
2522
+ text = self._extract_text_from_message(msg)
2523
+ if text:
2524
+ self.last_result_event = self._build_success_result_event(text, parsed_event)
2525
+ elif event_type == "turn_end":
2526
+ # turn_end may contain the final assistant message
2527
+ msg = parsed_event.get("message", {})
2528
+ if isinstance(msg, dict):
2529
+ text = self._extract_text_from_message(msg)
2530
+ if text:
2531
+ self.last_result_event = self._build_success_result_event(text, parsed_event)
2532
+
2533
+ # Filter hidden stream types (live mode handles its own filtering)
2534
+ if event_type in hide_types and self.prettifier_mode != self.PRETTIFIER_LIVE:
2535
+ return
2536
+
2537
+ # Fallback toolcall_end events (without toolCallId) are delayed so
2538
+ # short tool executions only show the final combined tool event.
2539
+ if pretty:
2540
+ fallback_tool_name = _extract_fallback_toolcall_name(parsed_event)
2541
+ if fallback_tool_name is not None:
2542
+ _schedule_delayed_toolcall(parsed_event, fallback_tool_name, self.prettifier_mode)
2543
+ return
2544
+
2545
+ # Live stream mode: stream deltas in real-time
2546
+ if self.prettifier_mode == self.PRETTIFIER_LIVE:
2547
+ if event_type in hide_types:
2548
+ # In live mode, still suppress session/compaction/retry events
2549
+ # but NOT message_start/message_end (handled by _format_event_live)
2550
+ if event_type not in ("message_start", "message_end"):
2551
+ return
2552
+ formatted_live = self._format_event_live(parsed_event)
2553
+ if formatted_live is not None:
2554
+ if formatted_live == "":
2555
+ return
2556
+ sys.stdout.write(formatted_live)
2557
+ sys.stdout.flush()
2558
+ else:
2559
+ # Fallback: print raw JSON for unhandled event types
2560
+ print(json.dumps(parsed_event, ensure_ascii=False), flush=True)
2561
+ return
2562
+
2563
+ # Format and print using model-appropriate prettifier
2564
+ if pretty:
2565
+ if self.prettifier_mode == self.PRETTIFIER_CODEX:
2566
+ # Try Pi-wrapped Codex format first (role-based messages)
2567
+ if "role" in parsed_event:
2568
+ formatted = self._format_pi_codex_message(parsed_event)
2569
+ else:
2570
+ # Try Pi event handler (message_update, turn_end, etc.)
2571
+ formatted = self._format_pi_codex_event(parsed_event)
2572
+ if formatted is None:
2573
+ # Try native Codex event handler
2574
+ formatted = self._format_event_pretty_codex(parsed_event)
2575
+ if formatted is None:
2576
+ # Sanitize before raw JSON fallback: strip thinkingSignature,
2577
+ # encrypted_content, and metadata from nested Codex events.
2578
+ self._sanitize_codex_event(parsed_event, strip_metadata=True)
2579
+ formatted = json.dumps(parsed_event, ensure_ascii=False)
2580
+ elif formatted == "":
2581
+ return
2582
+ elif self.prettifier_mode == self.PRETTIFIER_CLAUDE:
2583
+ formatted = self._format_event_pretty_claude(parsed_event)
2584
+ else:
2585
+ formatted = self._format_event_pretty(parsed_event)
2586
+ if formatted is not None:
2587
+ print(formatted, flush=True)
2588
+ else:
2589
+ if raw_json_line is not None:
2590
+ print(raw_json_line, flush=True)
2591
+ else:
2592
+ print(json.dumps(parsed_event, ensure_ascii=False), flush=True)
2593
+
2594
+ def _merge_buffered_tool_stdout_into(event_payload: dict) -> None:
2595
+ buffered_text = "\n".join(self._buffered_tool_stdout_lines).strip()
2596
+ if not buffered_text:
2597
+ self._buffered_tool_stdout_lines.clear()
2598
+ return
2599
+
2600
+ result_val = event_payload.get("result")
2601
+ if result_val in (None, "", [], {}):
2602
+ event_payload["result"] = buffered_text
2603
+ elif isinstance(result_val, str):
2604
+ existing = self._strip_ansi_sequences(result_val)
2605
+ if existing:
2606
+ if not existing.endswith("\n"):
2607
+ existing += "\n"
2608
+ event_payload["result"] = existing + buffered_text
2609
+ else:
2610
+ event_payload["result"] = buffered_text
2611
+ else:
2612
+ # Keep complex result structures untouched; print trailing raw lines
2613
+ # before the next structured event for stable transcript ordering.
2614
+ print(buffered_text, flush=True)
2615
+
2616
+ self._buffered_tool_stdout_lines.clear()
2617
+
2618
+ def _flush_pending_tool_events() -> None:
2619
+ nonlocal pending_tool_execution_end, pending_turn_end_after_tool
2620
+ if pending_tool_execution_end is not None:
2621
+ _merge_buffered_tool_stdout_into(pending_tool_execution_end)
2622
+ _emit_parsed_event(pending_tool_execution_end)
2623
+ pending_tool_execution_end = None
2624
+
2625
+ if pending_turn_end_after_tool is not None:
2626
+ if self._buffered_tool_stdout_lines:
2627
+ print("\n".join(self._buffered_tool_stdout_lines), flush=True)
2628
+ self._buffered_tool_stdout_lines.clear()
2629
+ _emit_parsed_event(pending_turn_end_after_tool)
2630
+ pending_turn_end_after_tool = None
2631
+
1532
2632
  try:
1533
2633
  for raw_line in process.stdout:
1534
2634
  line = raw_line.rstrip("\n\r")
@@ -1539,119 +2639,57 @@ Model shorthands:
1539
2639
  try:
1540
2640
  parsed = json.loads(line)
1541
2641
  except json.JSONDecodeError:
1542
- # Non-JSON output print as-is
2642
+ # Non-JSON output (raw tool stdout). In pretty mode, buffer raw
2643
+ # lines while tool execution events are pending to avoid
2644
+ # interleaving with structured events (e.g. turn_end).
2645
+ if pretty and (
2646
+ self._in_tool_execution
2647
+ or pending_tool_execution_end is not None
2648
+ or pending_turn_end_after_tool is not None
2649
+ ):
2650
+ self._buffered_tool_stdout_lines.append(self._strip_ansi_sequences(line))
2651
+ continue
1543
2652
  print(line, flush=True)
1544
2653
  continue
1545
2654
 
1546
2655
  event_type = parsed.get("type", "")
1547
2656
 
1548
- # Capture session ID from the session event (sent at stream start)
1549
- if event_type == "session":
1550
- self.session_id = parsed.get("id")
1551
-
1552
- # Capture result event for shell backend
1553
- if event_type == "agent_end":
1554
- # agent_end has a 'messages' array; extract final assistant text
1555
- messages = parsed.get("messages", [])
1556
- text = ""
1557
- if isinstance(messages, list):
1558
- # Walk messages in reverse to find last assistant message with text
1559
- for m in reversed(messages):
1560
- if isinstance(m, dict) and m.get("role") == "assistant":
1561
- text = self._extract_text_from_message(m)
1562
- if text:
1563
- break
1564
- if text:
1565
- self.last_result_event = {
1566
- "type": "result",
1567
- "subtype": "success",
1568
- "is_error": False,
1569
- "result": text,
1570
- "session_id": self.session_id,
1571
- "sub_agent_response": self._sanitize_sub_agent_response(parsed),
1572
- }
1573
- else:
1574
- self.last_result_event = parsed
1575
- elif event_type == "message":
1576
- # OpenAI-compatible format: capture last assistant message
1577
- msg = parsed.get("message", {})
1578
- if isinstance(msg, dict) and msg.get("role") == "assistant":
1579
- text = self._extract_text_from_message(msg)
1580
- if text:
1581
- self.last_result_event = {
1582
- "type": "result",
1583
- "subtype": "success",
1584
- "is_error": False,
1585
- "result": text,
1586
- "session_id": self.session_id,
1587
- "sub_agent_response": self._sanitize_sub_agent_response(parsed),
1588
- }
1589
- elif event_type == "turn_end":
1590
- # turn_end may contain the final assistant message
1591
- msg = parsed.get("message", {})
1592
- if isinstance(msg, dict):
1593
- text = self._extract_text_from_message(msg)
1594
- if text:
1595
- self.last_result_event = {
1596
- "type": "result",
1597
- "subtype": "success",
1598
- "is_error": False,
1599
- "result": text,
1600
- "session_id": self.session_id,
1601
- "sub_agent_response": self._sanitize_sub_agent_response(parsed),
1602
- }
1603
-
1604
- # Filter hidden stream types (live mode handles its own filtering)
1605
- if event_type in hide_types and self.prettifier_mode != self.PRETTIFIER_LIVE:
2657
+ if pretty and event_type == "tool_execution_start":
2658
+ # Reset raw tool stdout buffer per tool execution.
2659
+ self._buffered_tool_stdout_lines.clear()
2660
+
2661
+ if pretty and event_type == "tool_execution_end":
2662
+ # Tool finished before the delayed fallback timer fired — suppress
2663
+ # the pending fallback toolcall_end preview.
2664
+ tool_name = parsed.get("toolName", "")
2665
+ _cancel_delayed_toolcall(tool_name if isinstance(tool_name, str) else "")
2666
+
2667
+ # Defer emission so any trailing raw stdout can be grouped before
2668
+ # downstream structured metadata like turn_end.
2669
+ pending_tool_execution_end = parsed
1606
2670
  continue
1607
2671
 
1608
- # Live stream mode: stream deltas in real-time
1609
- if self.prettifier_mode == self.PRETTIFIER_LIVE:
1610
- if event_type in hide_types:
1611
- # In live mode, still suppress session/compaction/retry events
1612
- # but NOT message_start/message_end (handled by _format_event_live)
1613
- if event_type not in ("message_start", "message_end"):
1614
- continue
1615
- formatted = self._format_event_live(parsed)
1616
- if formatted is not None:
1617
- if formatted == "":
1618
- continue
1619
- sys.stdout.write(formatted)
1620
- sys.stdout.flush()
1621
- else:
1622
- # Fallback: print raw JSON for unhandled event types
1623
- print(json.dumps(parsed, ensure_ascii=False), flush=True)
2672
+ if pretty and event_type == "turn_end" and pending_tool_execution_end is not None:
2673
+ # Hold turn_end until buffered trailing raw stdout is flushed with
2674
+ # the pending tool event.
2675
+ pending_turn_end_after_tool = parsed
1624
2676
  continue
1625
2677
 
1626
- # Format and print using model-appropriate prettifier
1627
- if pretty:
1628
- if self.prettifier_mode == self.PRETTIFIER_CODEX:
1629
- # Try Pi-wrapped Codex format first (role-based messages)
1630
- if "role" in parsed:
1631
- formatted = self._format_pi_codex_message(parsed)
1632
- else:
1633
- # Try Pi event handler (message_update, turn_end, etc.)
1634
- formatted = self._format_pi_codex_event(parsed)
1635
- if formatted is not None:
1636
- # Empty string means "suppress this event"
1637
- if formatted == "":
1638
- continue
1639
- else:
1640
- # Try native Codex event handler
1641
- formatted = self._format_event_pretty_codex(parsed)
1642
- if formatted is None:
1643
- # Sanitize before raw JSON fallback: strip thinkingSignature,
1644
- # encrypted_content, and metadata from nested Codex events.
1645
- self._sanitize_codex_event(parsed, strip_metadata=True)
1646
- formatted = json.dumps(parsed, ensure_ascii=False)
1647
- elif self.prettifier_mode == self.PRETTIFIER_CLAUDE:
1648
- formatted = self._format_event_pretty_claude(parsed)
1649
- else:
1650
- formatted = self._format_event_pretty(parsed)
1651
- if formatted is not None:
1652
- print(formatted, flush=True)
1653
- else:
1654
- print(line, flush=True)
2678
+ if pretty and (
2679
+ pending_tool_execution_end is not None or pending_turn_end_after_tool is not None
2680
+ ):
2681
+ _flush_pending_tool_events()
2682
+
2683
+ _emit_parsed_event(parsed, raw_json_line=line)
2684
+
2685
+ # Flush any deferred tool/turn events at end-of-stream.
2686
+ if pretty and (
2687
+ pending_tool_execution_end is not None or pending_turn_end_after_tool is not None
2688
+ ):
2689
+ _flush_pending_tool_events()
2690
+ elif self._buffered_tool_stdout_lines:
2691
+ print("\n".join(self._buffered_tool_stdout_lines), flush=True)
2692
+ self._buffered_tool_stdout_lines.clear()
1655
2693
 
1656
2694
  except ValueError:
1657
2695
  # Watchdog closed stdout — expected when process exits but pipe stays open.
@@ -1659,6 +2697,7 @@ Model shorthands:
1659
2697
 
1660
2698
  # Signal watchdog that output loop is done
1661
2699
  output_done.set()
2700
+ cancel_delayed_toolcalls()
1662
2701
 
1663
2702
  # Write capture file for shell backend
1664
2703
  self._write_capture_file(capture_path)
@@ -1676,6 +2715,7 @@ Model shorthands:
1676
2715
 
1677
2716
  except KeyboardInterrupt:
1678
2717
  print("\nInterrupted by user", file=sys.stderr)
2718
+ cancel_delayed_toolcalls()
1679
2719
  try:
1680
2720
  process.terminate()
1681
2721
  try:
@@ -1690,6 +2730,7 @@ Model shorthands:
1690
2730
 
1691
2731
  except Exception as e:
1692
2732
  print(f"Error executing pi: {e}", file=sys.stderr)
2733
+ cancel_delayed_toolcalls()
1693
2734
  try:
1694
2735
  if process.poll() is None:
1695
2736
  process.terminate()
@@ -1728,7 +2769,9 @@ Model shorthands:
1728
2769
  self.prettifier_mode = self._detect_prettifier_mode(self.model_name)
1729
2770
  self.verbose = args.verbose
1730
2771
 
1731
- # Verbose mode enables live stream prettifier for real-time output
2772
+ # Verbose mode enables live stream prettifier for real-time output.
2773
+ # Codex models already default to LIVE; this ensures all models get
2774
+ # real-time streaming when -v is used.
1732
2775
  if args.verbose:
1733
2776
  self.prettifier_mode = self.PRETTIFIER_LIVE
1734
2777