npm - juno-code - Versions diffs - 1.0.49 → 1.0.50 - Mend

juno-code 1.0.49 → 1.0.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +417 -203
package/dist/bin/cli.d.mts +1 -1
package/dist/bin/cli.d.ts +1 -1
package/dist/bin/cli.js +1736 -976
package/dist/bin/cli.js.map +1 -1
package/dist/bin/cli.mjs +1735 -975
package/dist/bin/cli.mjs.map +1 -1
package/dist/bin/feedback-collector.js.map +1 -1
package/dist/bin/feedback-collector.mjs.map +1 -1
package/dist/index.d.mts +33 -7
package/dist/index.d.ts +33 -7
package/dist/index.js +202 -27
package/dist/index.js.map +1 -1
package/dist/index.mjs +202 -27
package/dist/index.mjs.map +1 -1
package/dist/templates/scripts/install_requirements.sh +41 -3
package/dist/templates/scripts/kanban.sh +4 -0
package/dist/templates/services/__pycache__/pi.cpython-313.pyc +0 -0
package/dist/templates/services/pi.py +1281 -238
package/dist/templates/skills/claude/kanban-workflow/SKILL.md +138 -0
package/dist/templates/skills/claude/plan-kanban-tasks/SKILL.md +1 -1
package/dist/templates/skills/claude/ralph-loop/scripts/kanban.sh +4 -0
package/dist/templates/skills/claude/understand-project/SKILL.md +1 -1
package/dist/templates/skills/codex/kanban-workflow/SKILL.md +139 -0
package/dist/templates/skills/codex/plan-kanban-tasks/SKILL.md +32 -0
package/dist/templates/skills/codex/ralph-loop/scripts/kanban.sh +4 -0
package/dist/templates/skills/codex/understand-project/SKILL.md +46 -0
package/dist/templates/skills/pi/kanban-workflow/SKILL.md +139 -0
package/dist/templates/skills/pi/plan-kanban-tasks/SKILL.md +1 -1
package/dist/templates/skills/pi/ralph-loop/SKILL.md +4 -0
package/dist/templates/skills/pi/understand-project/SKILL.md +1 -1
package/package.json +7 -5

package/dist/templates/services/pi.py CHANGED Viewed

@@ -7,13 +7,14 @@ Headless wrapper around the Pi coding agent CLI with JSON streaming and shorthan
 import argparse
 import json
 import os
+import re
 import subprocess
 import sys
 import threading
 import time
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Set, Tuple
 class PiService:
@@ -35,7 +36,8 @@ class PiService:
         ":gpt-5": "openai/gpt-5",
         ":gpt-4o": "openai/gpt-4o",
         ":o3": "openai/o3",
-        ":codex": "openai/gpt-5.3-codex",
+        ":codex": "openai-codex/gpt-5.3-codex",
+        ":api-codex": "openai/gpt-5.3-codex",
         # Google
         ":gemini-pro": "google/gemini-2.5-pro",
         ":gemini-flash": "google/gemini-2.5-flash",
@@ -74,6 +76,17 @@ class PiService:
     PRETTIFIER_CODEX = "codex"
     PRETTIFIER_LIVE = "live"
+    # ANSI colors for tool prettifier output.
+    # - command/args blocks are green for readability
+    # - error results are red
+    ANSI_GREEN = "\x1b[38;5;40m"
+    ANSI_RED = "\x1b[38;5;203m"
+    ANSI_RESET = "\x1b[0m"
+    # Keep tool args readable while preventing giant inline payloads.
+    TOOL_ARG_STRING_MAX_CHARS = 400
+    _ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
     def __init__(self):
         self.model_name = self.DEFAULT_MODEL
         self.project_path = os.getcwd()
@@ -83,6 +96,18 @@ class PiService:
         self.session_id: Optional[str] = None
         self.message_counter = 0
         self.prettifier_mode = self.PRETTIFIER_PI
+        # Tool call grouping: buffer toolcall_end until tool_execution_end arrives
+        self._pending_tool_calls: Dict[str, dict] = {}  # toolCallId -> {tool, args/command}
+        # Buffer tool_execution_start data for fallback + timing (when toolcall_end arrives late)
+        self._pending_exec_starts: Dict[str, dict] = {}  # toolCallId -> {tool, args/command, started_at}
+        # Track whether we're inside a tool execution
+        self._in_tool_execution: bool = False
+        # Buffer raw non-JSON tool stdout so it doesn't interleave with structured events
+        self._buffered_tool_stdout_lines: List[str] = []
+        # Per-run usage/cost accumulation (used for result + agent_end total cost visibility)
+        self._run_usage_totals: Optional[dict] = None
+        self._run_total_cost_usd: Optional[float] = None
+        self._run_seen_usage_keys: Set[str] = set()
         # Claude prettifier state
         self.user_message_truncate = int(os.environ.get("CLAUDE_USER_MESSAGE_PRETTY_TRUNCATE", "4"))
         # Codex prettifier state
@@ -92,6 +117,114 @@ class PiService:
         # Keys to hide from intermediate assistant messages in Codex mode
         self._codex_metadata_keys = {"api", "provider", "model", "usage", "stopReason", "timestamp"}
+    def _color_enabled(self) -> bool:
+        """Check if ANSI color output is appropriate (TTY + NO_COLOR not set)."""
+        if os.environ.get("NO_COLOR") is not None:
+            return False
+        return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
+    def _colorize_lines(self, text: str, color_code: str) -> str:
+        """Apply ANSI coloring per line so line-based renderers keep colors stable."""
+        if "\n" not in text:
+            return f"{color_code}{text}{self.ANSI_RESET}"
+        return "\n".join(f"{color_code}{line}{self.ANSI_RESET}" for line in text.split("\n"))
+    def _colorize_result(self, text: str, is_error: bool = False) -> str:
+        """Colorize tool output only for errors; success stays terminal-default."""
+        if not self._color_enabled():
+            return text
+        if not is_error:
+            return text
+        return self._colorize_lines(text, self.ANSI_RED)
+    def _colorize_command(self, text: str) -> str:
+        """Colorize tool command/args blocks in green when ANSI color is enabled."""
+        if not self._color_enabled():
+            return text
+        return self._colorize_lines(text, self.ANSI_GREEN)
+    def _normalize_multiline_tool_text(self, text: str) -> str:
+        """Render escaped newline sequences as real newlines for tool command/args blocks."""
+        if "\n" in text:
+            return text
+        if "\\n" in text:
+            return text.replace("\\n", "\n")
+        return text
+    def _format_tool_invocation_header(self, header: Dict) -> str:
+        """Serialize a tool header and render multiline command/args as separate readable blocks."""
+        metadata = dict(header)
+        block_label: Optional[str] = None
+        block_text: Optional[str] = None
+        command_val = metadata.get("command")
+        if isinstance(command_val, str) and command_val.strip():
+            command_text = self._normalize_multiline_tool_text(command_val)
+            if "\n" in command_text:
+                metadata.pop("command", None)
+                block_label = "command:"
+                block_text = self._colorize_command(command_text)
+        if block_text is None:
+            args_val = metadata.get("args")
+            if isinstance(args_val, str) and args_val.strip():
+                args_text = self._normalize_multiline_tool_text(args_val)
+                if "\n" in args_text:
+                    metadata.pop("args", None)
+                    block_label = "args:"
+                    block_text = self._colorize_command(args_text)
+        output = json.dumps(metadata, ensure_ascii=False)
+        if block_text is None:
+            return output
+        return output + "\n" + block_label + "\n" + block_text
+    def _strip_ansi_sequences(self, text: str) -> str:
+        """Remove ANSI escape sequences to prevent color bleed in prettified output."""
+        if not isinstance(text, str) or "\x1b" not in text:
+            return text
+        return self._ANSI_ESCAPE_RE.sub("", text)
+    def _sanitize_tool_argument_value(self, value):
+        """Recursively sanitize tool args while preserving JSON structure."""
+        if isinstance(value, str):
+            clean = self._strip_ansi_sequences(value)
+            if len(clean) > self.TOOL_ARG_STRING_MAX_CHARS:
+                return clean[:self.TOOL_ARG_STRING_MAX_CHARS] + "..."
+            return clean
+        if isinstance(value, dict):
+            return {k: self._sanitize_tool_argument_value(v) for k, v in value.items()}
+        if isinstance(value, list):
+            return [self._sanitize_tool_argument_value(v) for v in value]
+        return value
+    def _format_execution_time(self, payload: dict, pending: Optional[dict] = None) -> Optional[str]:
+        """Return execution time string (e.g. 0.12s) from payload or measured start time."""
+        seconds: Optional[float] = None
+        # Prefer explicit durations if Pi adds them in future versions.
+        for key in ("executionTimeSeconds", "durationSeconds", "elapsedSeconds"):
+            value = payload.get(key)
+            if isinstance(value, (int, float)):
+                seconds = float(value)
+                break
+        if seconds is None:
+            for key in ("executionTimeMs", "durationMs", "elapsedMs"):
+                value = payload.get(key)
+                if isinstance(value, (int, float)):
+                    seconds = float(value) / 1000.0
+                    break
+        if seconds is None and isinstance(pending, dict):
+            started_at = pending.get("started_at")
+            if isinstance(started_at, (int, float)):
+                seconds = max(0.0, time.perf_counter() - started_at)
+        if seconds is None:
+            return None
+        return f"{seconds:.2f}s"
     def expand_model_shorthand(self, model: str) -> str:
         """Expand shorthand model names (colon-prefixed) to full identifiers."""
         if model.startswith(":"):
@@ -103,13 +236,15 @@ class PiService:
         Pi CLI always uses its own event protocol (message, turn_end,
         message_update, agent_end, etc.) regardless of the underlying LLM.
-        The exception is Codex models where Pi wraps Codex-format events
-        (agent_reasoning, agent_message, exec_command_end).
+        Codex models also use Pi's event protocol but may additionally emit
+        native Codex events (agent_reasoning, agent_message, exec_command_end).
+        The LIVE prettifier handles both Pi-native and Codex-native events,
+        giving real-time streaming output for all model types.
         Claude models still use Pi's event protocol, NOT Claude CLI events.
         """
         model_lower = model.lower()
         if "codex" in model_lower:
-            return self.PRETTIFIER_CODEX
+            return self.PRETTIFIER_LIVE
         # All non-Codex models (including Claude) use Pi's native event protocol
         return self.PRETTIFIER_PI
@@ -147,7 +282,8 @@ Model shorthands:
   :gpt-5           -> openai/gpt-5
   :gpt-4o          -> openai/gpt-4o
   :o3              -> openai/o3
-  :codex           -> openai/gpt-5.3-codex
+  :codex           -> openai-codex/gpt-5.3-codex
+  :api-codex       -> openai/gpt-5.3-codex
   :gemini-pro      -> google/gemini-2.5-pro
   :gemini-flash    -> google/gemini-2.5-flash
   :groq            -> groq/llama-4-scout-17b-16e-instruct
@@ -544,6 +680,7 @@ Model shorthands:
             return text
         # Unescape JSON-escaped newlines for human-readable display
         display_text = text.replace("\\n", "\n").replace("\\t", "\t")
+        display_text = self._strip_ansi_sequences(display_text)
         lines = display_text.split("\n")
         max_lines = self._codex_tool_result_max_lines
         if len(lines) <= max_lines:
@@ -643,12 +780,11 @@ Model shorthands:
                             args = item.get("arguments", {})
                             if isinstance(args, dict):
                                 cmd = args.get("command", "")
-                                if cmd:
-                                    parts.append(f"[toolCall] {name}: {cmd}")
+                                if isinstance(cmd, str) and cmd:
+                                    parts.append(f"[toolCall] {name}: {self._sanitize_tool_argument_value(cmd)}")
                                 else:
-                                    args_str = json.dumps(args, ensure_ascii=False)
-                                    if len(args_str) > 200:
-                                        args_str = args_str[:200] + "..."
+                                    args_clean = self._sanitize_tool_argument_value(args)
+                                    args_str = json.dumps(args_clean, ensure_ascii=False)
                                     parts.append(f"[toolCall] {name}: {args_str}")
                             else:
                                 parts.append(f"[toolCall] {name}")
@@ -734,10 +870,13 @@ Model shorthands:
                         header["thinking"] = thinking_text
                     return json.dumps(header, ensure_ascii=False)
-                # toolcall_end: show tool name and arguments
+                # toolcall_end: buffer for grouping with tool_execution_end
                 if ame_type == "toolcall_end":
-                    self.message_counter += 1
                     tool_call = ame.get("toolCall", {})
+                    if self._buffer_tool_call_end(tool_call, now):
+                        return ""  # suppress — will emit combined event on tool_execution_end
+                    # No toolCallId — fallback to original format
+                    self.message_counter += 1
                     header = {
                         "type": "toolcall_end",
                         "datetime": now,
@@ -748,14 +887,13 @@ Model shorthands:
                         args = tool_call.get("arguments", {})
                         if isinstance(args, dict):
                             cmd = args.get("command", "")
-                            if cmd:
-                                header["command"] = cmd
+                            if isinstance(cmd, str) and cmd:
+                                header["command"] = self._sanitize_tool_argument_value(cmd)
                             else:
-                                args_str = json.dumps(args, ensure_ascii=False)
-                                if len(args_str) > 200:
-                                    args_str = args_str[:200] + "..."
-                                header["args"] = args_str if isinstance(args_str, str) else args
-                    return json.dumps(header, ensure_ascii=False)
+                                header["args"] = self._sanitize_tool_argument_value(args)
+                        elif isinstance(args, str) and args.strip():
+                            header["args"] = self._sanitize_tool_argument_value(args)
+                    return self._format_tool_invocation_header(header)
             # Other message_update subtypes: suppress by default
             return ""
@@ -773,14 +911,12 @@ Model shorthands:
                 header["tool_results_count"] = len(tool_results)
             return json.dumps(header, ensure_ascii=False)
-        # --- message_start: minimal header ---
+        # --- message_start: minimal header (no counter — only *_end events get counters) ---
         if event_type == "message_start":
-            self.message_counter += 1
             message = parsed.get("message", {})
             header = {
                 "type": "message_start",
                 "datetime": now,
-                "counter": f"#{self.message_counter}",
             }
             if isinstance(message, dict):
                 role = message.get("role")
@@ -798,58 +934,104 @@ Model shorthands:
             }
             return json.dumps(header, ensure_ascii=False)
-        # --- tool_execution_start ---
+        # --- tool_execution_start: always suppress, buffer args ---
         if event_type == "tool_execution_start":
-            self.message_counter += 1
-            header = {
-                "type": "tool_execution_start",
-                "datetime": now,
-                "counter": f"#{self.message_counter}",
-                "tool": parsed.get("toolName", ""),
-            }
-            args_val = parsed.get("args")
-            if isinstance(args_val, dict):
-                args_str = json.dumps(args_val, ensure_ascii=False)
-                if len(args_str) > 200:
-                    header["args"] = args_str[:200] + "..."
-                else:
-                    header["args"] = args_val
-            return json.dumps(header, ensure_ascii=False)
+            self._buffer_exec_start(parsed)
+            self._in_tool_execution = True
+            return ""  # suppress
-        # --- tool_execution_end ---
+        # --- tool_execution_end: combine with buffered data ---
         if event_type == "tool_execution_end":
+            self._in_tool_execution = False
+            tool_call_id = parsed.get("toolCallId")
+            pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
+            pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
+            if pending_tool and pending_exec and "started_at" in pending_exec:
+                pending_tool["started_at"] = pending_exec["started_at"]
+            pending = pending_tool or pending_exec
+            if pending:
+                return self._build_combined_tool_event(pending, parsed, now)
+            # No buffered data — minimal fallback
             self.message_counter += 1
             header = {
-                "type": "tool_execution_end",
+                "type": "tool",
                 "datetime": now,
                 "counter": f"#{self.message_counter}",
                 "tool": parsed.get("toolName", ""),
             }
+            execution_time = self._format_execution_time(parsed)
+            if execution_time:
+                header["execution_time"] = execution_time
             is_error = parsed.get("isError", False)
             if is_error:
                 header["isError"] = True
             result_val = parsed.get("result")
+            colorize_error = self._color_enabled() and bool(is_error)
+            if isinstance(result_val, str) and result_val.strip():
+                truncated = self._truncate_tool_result_text(result_val)
+                if "\n" in truncated or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(truncated, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                header["result"] = truncated
+                return self._format_tool_invocation_header(header)
             if isinstance(result_val, dict):
-                # Extract text content from result
                 result_content = result_val.get("content")
                 if isinstance(result_content, list):
                     for rc_item in result_content:
                         if isinstance(rc_item, dict) and rc_item.get("type") == "text":
                             text = rc_item.get("text", "")
                             truncated = self._truncate_tool_result_text(text)
-                            if "\n" in truncated:
-                                return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated
+                            if "\n" in truncated or colorize_error:
+                                label = "result:"
+                                colored = self._colorize_result(truncated, is_error=bool(is_error))
+                                if colorize_error:
+                                    label = self._colorize_result(label, is_error=True)
+                                return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
                             header["result"] = truncated
-                            return json.dumps(header, ensure_ascii=False)
-            return json.dumps(header, ensure_ascii=False)
+                            return self._format_tool_invocation_header(header)
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header)
+            if isinstance(result_val, list):
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header)
+            return self._format_tool_invocation_header(header)
+        # --- turn_start: suppress (no user-visible value) ---
+        if event_type == "turn_start":
+            return ""
-        # --- agent_start, turn_start: simple headers ---
-        if event_type in ("agent_start", "turn_start"):
-            self.message_counter += 1
+        # --- agent_start: simple header (no counter — only *_end events get counters) ---
+        if event_type == "agent_start":
             return json.dumps({
                 "type": event_type,
                 "datetime": now,
-                "counter": f"#{self.message_counter}",
             }, ensure_ascii=False)
         # --- agent_end: capture and show summary ---
@@ -863,6 +1045,9 @@ Model shorthands:
             messages = parsed.get("messages")
             if isinstance(messages, list):
                 header["message_count"] = len(messages)
+            total_cost_usd = self._extract_total_cost_usd(parsed)
+            if total_cost_usd is not None:
+                header["total_cost_usd"] = total_cost_usd
             return json.dumps(header, ensure_ascii=False)
         # Not a Pi-wrapped event type we handle
@@ -880,7 +1065,7 @@ Model shorthands:
             base_type = header_type or msg_type or "message"
             def make_header(type_value: str):
-                hdr: Dict = {"type": type_value, "datetime": now}
+                hdr: Dict = {"type": type_value, "datetime": now, "counter": f"#{self.message_counter}"}
                 if item_id:
                     hdr["id"] = item_id
                 if outer_type and msg_type and outer_type != msg_type:
@@ -1091,6 +1276,107 @@ Model shorthands:
         return ""
+    def _buffer_tool_call_end(self, tool_call: dict, now: str) -> bool:
+        """Buffer toolcall_end info for grouping with tool_execution_end.
+        Returns True if successfully buffered (caller should suppress output),
+        False if no toolCallId present (caller should emit normally).
+        """
+        tc_id = tool_call.get("toolCallId", "") if isinstance(tool_call, dict) else ""
+        if not tc_id:
+            return False
+        pending: Dict = {"tool": tool_call.get("name", ""), "datetime": now}
+        args = tool_call.get("arguments", {})
+        if isinstance(args, dict):
+            cmd = args.get("command", "")
+            if isinstance(cmd, str) and cmd:
+                pending["command"] = self._sanitize_tool_argument_value(cmd)
+            else:
+                pending["args"] = self._sanitize_tool_argument_value(args)
+        elif isinstance(args, str) and args.strip():
+            pending["args"] = self._sanitize_tool_argument_value(args)
+        self._pending_tool_calls[tc_id] = pending
+        return True
+    def _buffer_exec_start(self, payload: dict) -> None:
+        """Buffer tool_execution_start data for tool_execution_end fallback + timing."""
+        tc_id = payload.get("toolCallId", "")
+        if not tc_id:
+            return
+        pending: Dict = {
+            "tool": payload.get("toolName", ""),
+            "started_at": time.perf_counter(),
+        }
+        args_val = payload.get("args")
+        if isinstance(args_val, dict):
+            cmd = args_val.get("command", "")
+            if isinstance(cmd, str) and cmd:
+                pending["command"] = self._sanitize_tool_argument_value(cmd)
+            else:
+                pending["args"] = self._sanitize_tool_argument_value(args_val)
+        elif isinstance(args_val, str) and args_val.strip():
+            pending["args"] = self._sanitize_tool_argument_value(args_val)
+        self._pending_exec_starts[tc_id] = pending
+    def _build_combined_tool_event(self, pending: dict, payload: dict, now: str) -> str:
+        """Build a combined 'tool' event from buffered toolcall_end + tool_execution_end."""
+        self.message_counter += 1
+        header: Dict = {
+            "type": "tool",
+            "datetime": now,
+            "counter": f"#{self.message_counter}",
+            "tool": pending.get("tool", payload.get("toolName", "")),
+        }
+        # Args from buffered toolcall/tool_execution_start
+        if "command" in pending:
+            header["command"] = pending["command"]
+        elif "args" in pending:
+            header["args"] = pending["args"]
+        # Execution time (source of truth: tool_execution_start -> tool_execution_end)
+        execution_time = self._format_execution_time(payload, pending)
+        if execution_time:
+            header["execution_time"] = execution_time
+        is_error = payload.get("isError", False)
+        if is_error:
+            header["isError"] = True
+        # Result extraction (handles string, dict with content array, and list)
+        result_val = payload.get("result")
+        result_text = None
+        if isinstance(result_val, str) and result_val.strip():
+            result_text = self._truncate_tool_result_text(result_val)
+        elif isinstance(result_val, dict):
+            result_content = result_val.get("content")
+            if isinstance(result_content, list):
+                for rc_item in result_content:
+                    if isinstance(rc_item, dict) and rc_item.get("type") == "text":
+                        result_text = self._truncate_tool_result_text(rc_item.get("text", ""))
+                        break
+            if result_text is None:
+                result_text = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+        elif isinstance(result_val, list):
+            result_text = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+        if result_text:
+            colorize_error = self._color_enabled() and bool(is_error)
+            if "\n" in result_text or colorize_error:
+                label = "result:"
+                colored_text = self._colorize_result(result_text, is_error=bool(is_error))
+                if colorize_error:
+                    label = self._colorize_result(label, is_error=True)
+                return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored_text
+            header["result"] = result_text
+        return self._format_tool_invocation_header(header)
     def _format_event_pretty(self, payload: dict) -> Optional[str]:
         """
         Format a Pi JSON streaming event for human-readable output.
@@ -1099,31 +1385,41 @@ Model shorthands:
         try:
             event_type = payload.get("type", "")
             now = datetime.now().strftime("%I:%M:%S %p")
-            self.message_counter += 1
+            # Counter is only added to *_end events (below, per-branch)
             header: Dict = {
                 "type": event_type,
                 "datetime": now,
-                "counter": f"#{self.message_counter}",
             }
-            # --- Session header ---
+            # --- Session header (no counter) ---
             if event_type == "session":
                 header["version"] = payload.get("version")
                 header["id"] = payload.get("id")
                 return json.dumps(header, ensure_ascii=False)
-            # --- Agent lifecycle events ---
-            if event_type in ("agent_start", "turn_start"):
+            # --- turn_start: suppress (no user-visible value) ---
+            if event_type == "turn_start":
+                return None
+            # --- agent_start: simple header (no counter) ---
+            if event_type == "agent_start":
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "agent_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 messages = payload.get("messages")
                 if isinstance(messages, list):
                     header["message_count"] = len(messages)
+                total_cost_usd = self._extract_total_cost_usd(payload)
+                if total_cost_usd is not None:
+                    header["total_cost_usd"] = total_cost_usd
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "turn_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 tool_results = payload.get("toolResults")
                 if isinstance(tool_results, list):
                     header["tool_results_count"] = len(tool_results)
@@ -1146,6 +1442,43 @@ Model shorthands:
                 if event_subtype in self._PI_HIDDEN_MESSAGE_UPDATE_EVENTS:
                     return None  # Suppress noisy streaming deltas
+                # toolcall_end: buffer for grouping with tool_execution_end
+                if isinstance(ame, dict) and ame_type == "toolcall_end":
+                    tool_call = ame.get("toolCall", {})
+                    if self._buffer_tool_call_end(tool_call, now):
+                        return None  # suppress — will emit combined event on tool_execution_end
+                    # No toolCallId — fallback to original format
+                    self.message_counter += 1
+                    header["counter"] = f"#{self.message_counter}"
+                    header["event"] = ame_type
+                    if isinstance(tool_call, dict):
+                        header["tool"] = tool_call.get("name", "")
+                        args = tool_call.get("arguments", {})
+                        if isinstance(args, dict):
+                            cmd = args.get("command", "")
+                            if isinstance(cmd, str) and cmd:
+                                header["command"] = self._sanitize_tool_argument_value(cmd)
+                            else:
+                                header["args"] = self._sanitize_tool_argument_value(args)
+                        elif isinstance(args, str) and args.strip():
+                            header["args"] = self._sanitize_tool_argument_value(args)
+                    return self._format_tool_invocation_header(header)
+                # thinking_end: show thinking content (*_end → gets counter)
+                if isinstance(ame, dict) and ame_type == "thinking_end":
+                    self.message_counter += 1
+                    header["counter"] = f"#{self.message_counter}"
+                    header["event"] = ame_type
+                    thinking_text = ame.get("thinking", "") or ame.get("content", "") or ame.get("text", "")
+                    if isinstance(thinking_text, str) and thinking_text.strip():
+                        header["thinking"] = thinking_text
+                    return json.dumps(header, ensure_ascii=False)
+                # Any other *_end subtypes (e.g. text_end) get counter
+                if isinstance(ame, dict) and ame_type and ame_type.endswith("_end"):
+                    self.message_counter += 1
+                    header["counter"] = f"#{self.message_counter}"
                 message = payload.get("message", {})
                 text = self._extract_text_from_message(message) if isinstance(message, dict) else ""
@@ -1165,61 +1498,103 @@ Model shorthands:
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "message_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 # Skip message text - already displayed by text_end/thinking_end/toolcall_end
                 return json.dumps(header, ensure_ascii=False)
             # --- Tool execution events ---
+            # Always suppress tool_execution_start: buffer its args for
+            # tool_execution_end to use.  The user sees nothing until the
+            # tool finishes, then gets a single combined "tool" event.
             if event_type == "tool_execution_start":
-                header["tool"] = payload.get("toolName", "")
-                tool_call_id = payload.get("toolCallId")
-                if tool_call_id:
-                    header["id"] = tool_call_id
-                args_val = payload.get("args")
-                if isinstance(args_val, dict):
-                    # Show abbreviated args inline
-                    args_str = json.dumps(args_val, ensure_ascii=False)
-                    if len(args_str) > 200:
-                        # Truncate for readability
-                        header["args"] = args_str[:200] + "..."
-                    else:
-                        header["args"] = args_val
-                elif isinstance(args_val, str) and args_val.strip():
-                    if "\n" in args_val:
-                        return json.dumps(header, ensure_ascii=False) + "\nargs:\n" + args_val
-                    header["args"] = args_val
-                return json.dumps(header, ensure_ascii=False)
+                self._buffer_exec_start(payload)
+                self._in_tool_execution = True
+                return None
             if event_type == "tool_execution_update":
-                header["tool"] = payload.get("toolName", "")
-                tool_call_id = payload.get("toolCallId")
-                if tool_call_id:
-                    header["id"] = tool_call_id
-                partial = payload.get("partialResult")
-                if isinstance(partial, str) and partial.strip():
-                    if "\n" in partial:
-                        return json.dumps(header, ensure_ascii=False) + "\npartialResult:\n" + partial
-                    header["partialResult"] = partial
-                return json.dumps(header, ensure_ascii=False)
+                # Suppress updates — result will arrive in tool_execution_end
+                return None
             if event_type == "tool_execution_end":
-                header["tool"] = payload.get("toolName", "")
+                self._in_tool_execution = False
                 tool_call_id = payload.get("toolCallId")
-                if tool_call_id:
-                    header["id"] = tool_call_id
+                pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
+                pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
+                if pending_tool and pending_exec and "started_at" in pending_exec:
+                    pending_tool["started_at"] = pending_exec["started_at"]
+                pending = pending_tool or pending_exec
+                if pending:
+                    return self._build_combined_tool_event(pending, payload, now)
+                # No buffered data at all — minimal fallback
+                self.message_counter += 1
+                header["type"] = "tool"
+                header["counter"] = f"#{self.message_counter}"
+                header["tool"] = payload.get("toolName", "")
+                execution_time = self._format_execution_time(payload)
+                if execution_time:
+                    header["execution_time"] = execution_time
                 is_error = payload.get("isError", False)
                 if is_error:
                     header["isError"] = True
                 result_val = payload.get("result")
+                colorize_error = self._color_enabled() and bool(is_error)
                 if isinstance(result_val, str) and result_val.strip():
-                    if "\n" in result_val:
-                        return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + result_val
-                    header["result"] = result_val
-                elif isinstance(result_val, (dict, list)):
-                    result_str = json.dumps(result_val, ensure_ascii=False)
-                    if "\n" in result_str or len(result_str) > 200:
-                        return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + result_str
-                    header["result"] = result_val
-                return json.dumps(header, ensure_ascii=False)
+                    truncated = self._truncate_tool_result_text(result_val)
+                    if "\n" in truncated or colorize_error:
+                        label = "result:"
+                        colored = self._colorize_result(truncated, is_error=bool(is_error))
+                        if colorize_error:
+                            label = self._colorize_result(label, is_error=True)
+                        return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                    header["result"] = truncated
+                    return self._format_tool_invocation_header(header)
+                if isinstance(result_val, dict):
+                    result_content = result_val.get("content")
+                    if isinstance(result_content, list):
+                        for rc_item in result_content:
+                            if isinstance(rc_item, dict) and rc_item.get("type") == "text":
+                                text = rc_item.get("text", "")
+                                truncated = self._truncate_tool_result_text(text)
+                                if "\n" in truncated or colorize_error:
+                                    label = "result:"
+                                    colored = self._colorize_result(truncated, is_error=bool(is_error))
+                                    if colorize_error:
+                                        label = self._colorize_result(label, is_error=True)
+                                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                                header["result"] = truncated
+                                return self._format_tool_invocation_header(header)
+                    result_str = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                    if "\n" in result_str or len(result_str) > 200 or colorize_error:
+                        label = "result:"
+                        colored = self._colorize_result(result_str, is_error=bool(is_error))
+                        if colorize_error:
+                            label = self._colorize_result(label, is_error=True)
+                        return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                    header["result"] = result_str
+                    return self._format_tool_invocation_header(header)
+                if isinstance(result_val, list):
+                    result_str = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                    if "\n" in result_str or len(result_str) > 200 or colorize_error:
+                        label = "result:"
+                        colored = self._colorize_result(result_str, is_error=bool(is_error))
+                        if colorize_error:
+                            label = self._colorize_result(label, is_error=True)
+                        return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                    header["result"] = result_str
+                    return self._format_tool_invocation_header(header)
+                return self._format_tool_invocation_header(header)
             # --- Retry/compaction events ---
             if event_type == "auto_retry_start":
@@ -1232,6 +1607,8 @@ Model shorthands:
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "auto_retry_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 header["success"] = payload.get("success")
                 header["attempt"] = payload.get("attempt")
                 final_err = payload.get("finalError")
@@ -1277,7 +1654,7 @@ Model shorthands:
                     return delta
                 return ""
-            # Section start markers
+            # Section start markers (no counter — only *_end events get counters)
             if ame_type == "text_start":
                 return json.dumps({"type": "text_start", "datetime": now}) + "\n"
@@ -1286,26 +1663,33 @@ Model shorthands:
             # Section end markers (text was already streamed)
             if ame_type == "text_end":
-                return "\n" + json.dumps({"type": "text_end", "datetime": now}) + "\n"
+                self.message_counter += 1
+                return "\n" + json.dumps({"type": "text_end", "datetime": now, "counter": f"#{self.message_counter}"}) + "\n"
             if ame_type == "thinking_end":
-                return "\n" + json.dumps({"type": "thinking_end", "datetime": now}) + "\n"
+                self.message_counter += 1
+                return "\n" + json.dumps({"type": "thinking_end", "datetime": now, "counter": f"#{self.message_counter}"}) + "\n"
-            # Tool call end: show tool info
+            # Tool call end: buffer for grouping with tool_execution_end
             if ame_type == "toolcall_end":
                 tc = ame.get("toolCall", {})
-                header = {"type": "toolcall_end", "datetime": now}
+                if self._buffer_tool_call_end(tc, now):
+                    return ""  # suppress — will emit combined event on tool_execution_end
+                # No toolCallId — fallback to original format
+                self.message_counter += 1
+                header = {"type": "toolcall_end", "datetime": now, "counter": f"#{self.message_counter}"}
                 if isinstance(tc, dict):
                     header["tool"] = tc.get("name", "")
                     args = tc.get("arguments", {})
                     if isinstance(args, dict):
                         cmd = args.get("command", "")
-                        if cmd:
-                            header["command"] = cmd
+                        if isinstance(cmd, str) and cmd:
+                            header["command"] = self._sanitize_tool_argument_value(cmd)
                         else:
-                            args_str = json.dumps(args, ensure_ascii=False)
-                            header["args"] = args_str[:200] + "..." if len(args_str) > 200 else args
-                return json.dumps(header, ensure_ascii=False) + "\n"
+                            header["args"] = self._sanitize_tool_argument_value(args)
+                    elif isinstance(args, str) and args.strip():
+                        header["args"] = self._sanitize_tool_argument_value(args)
+                return self._format_tool_invocation_header(header) + "\n"
             # Suppress all other message_update subtypes (toolcall_start, toolcall_delta, etc.)
             return ""
@@ -1314,69 +1698,224 @@ Model shorthands:
         if event_type in ("message_start", "message_end"):
             return ""
-        # tool_execution_start
+        # tool_execution_start: always suppress, buffer args
         if event_type == "tool_execution_start":
-            header = {
-                "type": "tool_execution_start",
-                "datetime": now,
-                "tool": parsed.get("toolName", ""),
-            }
-            args_val = parsed.get("args")
-            if isinstance(args_val, dict):
-                args_str = json.dumps(args_val, ensure_ascii=False)
-                if len(args_str) > 200:
-                    header["args"] = args_str[:200] + "..."
-                else:
-                    header["args"] = args_val
-            return json.dumps(header, ensure_ascii=False) + "\n"
+            self._buffer_exec_start(parsed)
+            self._in_tool_execution = True
+            return ""  # suppress
-        # tool_execution_end
+        # tool_execution_end: combine with buffered data
         if event_type == "tool_execution_end":
+            self._in_tool_execution = False
+            tool_call_id = parsed.get("toolCallId")
+            pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
+            pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
+            if pending_tool and pending_exec and "started_at" in pending_exec:
+                pending_tool["started_at"] = pending_exec["started_at"]
+            pending = pending_tool or pending_exec
+            if pending:
+                return self._build_combined_tool_event(pending, parsed, now) + "\n"
+            # No buffered data — minimal fallback
+            self.message_counter += 1
             header = {
-                "type": "tool_execution_end",
+                "type": "tool",
                 "datetime": now,
+                "counter": f"#{self.message_counter}",
                 "tool": parsed.get("toolName", ""),
             }
+            execution_time = self._format_execution_time(parsed)
+            if execution_time:
+                header["execution_time"] = execution_time
             is_error = parsed.get("isError", False)
             if is_error:
                 header["isError"] = True
             result_val = parsed.get("result")
+            colorize_error = self._color_enabled() and bool(is_error)
             if isinstance(result_val, str) and result_val.strip():
                 truncated = self._truncate_tool_result_text(result_val)
-                if "\n" in truncated:
-                    return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated + "\n"
+                if "\n" in truncated or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(truncated, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
                 header["result"] = truncated
-            elif isinstance(result_val, dict):
+                return self._format_tool_invocation_header(header) + "\n"
+            if isinstance(result_val, dict):
                 result_content = result_val.get("content")
                 if isinstance(result_content, list):
                     for rc_item in result_content:
                         if isinstance(rc_item, dict) and rc_item.get("type") == "text":
                             text = rc_item.get("text", "")
                             truncated = self._truncate_tool_result_text(text)
-                            if "\n" in truncated:
-                                return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated + "\n"
+                            if "\n" in truncated or colorize_error:
+                                label = "result:"
+                                colored = self._colorize_result(truncated, is_error=bool(is_error))
+                                if colorize_error:
+                                    label = self._colorize_result(label, is_error=True)
+                                return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
                             header["result"] = truncated
-                            break
-            return json.dumps(header, ensure_ascii=False) + "\n"
+                            return self._format_tool_invocation_header(header) + "\n"
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header) + "\n"
+            if isinstance(result_val, list):
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header) + "\n"
+            return self._format_tool_invocation_header(header) + "\n"
         # turn_end: metadata only
         if event_type == "turn_end":
-            header = {"type": "turn_end", "datetime": now}
+            self.message_counter += 1
+            header = {"type": "turn_end", "datetime": now, "counter": f"#{self.message_counter}"}
             tool_results = parsed.get("toolResults")
             if isinstance(tool_results, list):
                 header["tool_results_count"] = len(tool_results)
             return json.dumps(header, ensure_ascii=False) + "\n"
-        # agent_start, turn_start
-        if event_type in ("agent_start", "turn_start"):
+        # turn_start: suppress (no user-visible value)
+        if event_type == "turn_start":
+            return ""
+        # agent_start (no counter — only *_end events get counters)
+        if event_type == "agent_start":
             return json.dumps({"type": event_type, "datetime": now}) + "\n"
         # agent_end
         if event_type == "agent_end":
-            header = {"type": "agent_end", "datetime": now}
+            self.message_counter += 1
+            header = {"type": "agent_end", "datetime": now, "counter": f"#{self.message_counter}"}
             messages = parsed.get("messages")
             if isinstance(messages, list):
                 header["message_count"] = len(messages)
+            total_cost_usd = self._extract_total_cost_usd(parsed)
+            if total_cost_usd is not None:
+                header["total_cost_usd"] = total_cost_usd
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        # --- Role-based messages (Pi-wrapped Codex messages) ---
+        role = parsed.get("role", "")
+        if role == "toolResult":
+            self.message_counter += 1
+            header = {
+                "type": "toolResult",
+                "datetime": now,
+                "counter": f"#{self.message_counter}",
+                "toolName": parsed.get("toolName", ""),
+            }
+            is_error = parsed.get("isError", False)
+            if is_error:
+                header["isError"] = True
+            content = parsed.get("content")
+            if isinstance(content, list):
+                for item in content:
+                    if isinstance(item, dict) and item.get("type") == "text":
+                        text_val = item.get("text", "")
+                        truncated = self._truncate_tool_result_text(text_val)
+                        use_color = self._color_enabled()
+                        if "\n" in truncated or use_color:
+                            colored = self._colorize_result(truncated, is_error=bool(is_error))
+                            label = self._colorize_result("content:", is_error=bool(is_error))
+                            return json.dumps(header, ensure_ascii=False) + "\n" + label + "\n" + colored + "\n"
+                        header["content"] = truncated
+                        return json.dumps(header, ensure_ascii=False) + "\n"
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if role == "assistant":
+            self.message_counter += 1
+            content = parsed.get("content")
+            if isinstance(content, list):
+                self._strip_thinking_signature(content)
+            header = {"type": "assistant", "datetime": now, "counter": f"#{self.message_counter}"}
+            text_parts = []
+            if isinstance(content, list):
+                for item in content:
+                    if isinstance(item, dict):
+                        if item.get("type") == "text":
+                            text_parts.append(item.get("text", ""))
+                        elif item.get("type") == "thinking":
+                            text_parts.append(f"[thinking] {item.get('thinking', '')}")
+                        elif item.get("type") == "toolCall":
+                            name = item.get("name", "")
+                            args = item.get("arguments", {})
+                            cmd = args.get("command", "") if isinstance(args, dict) else ""
+                            text_parts.append(f"[toolCall] {name}: {cmd}" if cmd else f"[toolCall] {name}")
+            if text_parts:
+                combined = "\n".join(text_parts)
+                if "\n" in combined:
+                    return json.dumps(header, ensure_ascii=False) + "\n" + combined + "\n"
+                header["content"] = combined
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if role:
+            # Other roles — minimal JSON header
+            self.message_counter += 1
+            return json.dumps({"type": role, "datetime": now, "counter": f"#{self.message_counter}"}, ensure_ascii=False) + "\n"
+        # --- Native Codex events (agent_reasoning, agent_message, exec_command_end, etc.) ---
+        msg_type, payload, outer_type = self._normalize_codex_event(parsed)
+        if msg_type in ("agent_reasoning", "reasoning"):
+            self.message_counter += 1
+            content = self._extract_reasoning_text(payload)
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in content:
+                return json.dumps(header, ensure_ascii=False) + "\ntext:\n" + content + "\n"
+            if content:
+                header["text"] = content
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if msg_type in ("agent_message", "assistant_message"):
+            self.message_counter += 1
+            content = self._extract_message_text_codex(payload)
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in content:
+                return json.dumps(header, ensure_ascii=False) + "\nmessage:\n" + content + "\n"
+            if content:
+                header["message"] = content
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if msg_type == "exec_command_end":
+            self.message_counter += 1
+            formatted_output = payload.get("formatted_output", "") if isinstance(payload, dict) else ""
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in formatted_output:
+                return json.dumps(header, ensure_ascii=False) + "\nformatted_output:\n" + formatted_output + "\n"
+            if formatted_output:
+                header["formatted_output"] = formatted_output
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if msg_type == "command_execution":
+            self.message_counter += 1
+            aggregated_output = self._extract_command_output_text(payload)
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in aggregated_output:
+                return json.dumps(header, ensure_ascii=False) + "\naggregated_output:\n" + aggregated_output + "\n"
+            if aggregated_output:
+                header["aggregated_output"] = aggregated_output
             return json.dumps(header, ensure_ascii=False) + "\n"
         # Fallback: not handled
@@ -1392,11 +1931,312 @@ Model shorthands:
                 hide_types.update(parts)
         return hide_types
+    @staticmethod
+    def _toolcall_end_delay_seconds() -> float:
+        """Return delay for fallback toolcall_end visibility (default 3s)."""
+        raw = os.environ.get("PI_TOOLCALL_END_DELAY_SECONDS", "3")
+        try:
+            delay = float(raw)
+        except (TypeError, ValueError):
+            delay = 3.0
+        return max(0.0, delay)
     @staticmethod
     def _sanitize_sub_agent_response(event: dict) -> dict:
         """Strip bulky fields (messages, type) from sub_agent_response to reduce token usage."""
         return {k: v for k, v in event.items() if k not in ("messages", "type")}
+    def _reset_run_cost_tracking(self) -> None:
+        """Reset per-run usage/cost accumulation state."""
+        self._run_usage_totals = None
+        self._run_total_cost_usd = None
+        self._run_seen_usage_keys.clear()
+    @staticmethod
+    def _is_numeric_value(value: object) -> bool:
+        """True for int/float values (excluding bool)."""
+        return isinstance(value, (int, float)) and not isinstance(value, bool)
+    @staticmethod
+    def _normalize_usage_payload(usage: dict) -> Optional[dict]:
+        """Normalize usage payload into numeric totals for accumulation."""
+        if not isinstance(usage, dict):
+            return None
+        usage_cost = usage.get("cost")
+        cost_payload = usage_cost if isinstance(usage_cost, dict) else {}
+        input_tokens = float(usage.get("input")) if PiService._is_numeric_value(usage.get("input")) else 0.0
+        output_tokens = float(usage.get("output")) if PiService._is_numeric_value(usage.get("output")) else 0.0
+        cache_read_tokens = float(usage.get("cacheRead")) if PiService._is_numeric_value(usage.get("cacheRead")) else 0.0
+        cache_write_tokens = float(usage.get("cacheWrite")) if PiService._is_numeric_value(usage.get("cacheWrite")) else 0.0
+        total_tokens_raw = usage.get("totalTokens")
+        total_tokens = (
+            float(total_tokens_raw)
+            if PiService._is_numeric_value(total_tokens_raw)
+            else input_tokens + output_tokens + cache_read_tokens + cache_write_tokens
+        )
+        cost_input = float(cost_payload.get("input")) if PiService._is_numeric_value(cost_payload.get("input")) else 0.0
+        cost_output = float(cost_payload.get("output")) if PiService._is_numeric_value(cost_payload.get("output")) else 0.0
+        cost_cache_read = (
+            float(cost_payload.get("cacheRead")) if PiService._is_numeric_value(cost_payload.get("cacheRead")) else 0.0
+        )
+        cost_cache_write = (
+            float(cost_payload.get("cacheWrite")) if PiService._is_numeric_value(cost_payload.get("cacheWrite")) else 0.0
+        )
+        cost_total_raw = cost_payload.get("total")
+        cost_total = (
+            float(cost_total_raw)
+            if PiService._is_numeric_value(cost_total_raw)
+            else cost_input + cost_output + cost_cache_read + cost_cache_write
+        )
+        has_any_value = any(
+            PiService._is_numeric_value(v)
+            for v in (
+                usage.get("input"),
+                usage.get("output"),
+                usage.get("cacheRead"),
+                usage.get("cacheWrite"),
+                usage.get("totalTokens"),
+                cost_payload.get("input"),
+                cost_payload.get("output"),
+                cost_payload.get("cacheRead"),
+                cost_payload.get("cacheWrite"),
+                cost_payload.get("total"),
+            )
+        )
+        if not has_any_value:
+            return None
+        return {
+            "input": input_tokens,
+            "output": output_tokens,
+            "cacheRead": cache_read_tokens,
+            "cacheWrite": cache_write_tokens,
+            "totalTokens": total_tokens,
+            "cost": {
+                "input": cost_input,
+                "output": cost_output,
+                "cacheRead": cost_cache_read,
+                "cacheWrite": cost_cache_write,
+                "total": cost_total,
+            },
+        }
+    @staticmethod
+    def _merge_usage_payloads(base: Optional[dict], delta: Optional[dict]) -> Optional[dict]:
+        """Merge normalized usage payloads by summing token/cost fields."""
+        if not isinstance(base, dict):
+            return delta
+        if not isinstance(delta, dict):
+            return base
+        base_cost = base.get("cost") if isinstance(base.get("cost"), dict) else {}
+        delta_cost = delta.get("cost") if isinstance(delta.get("cost"), dict) else {}
+        return {
+            "input": float(base.get("input", 0.0)) + float(delta.get("input", 0.0)),
+            "output": float(base.get("output", 0.0)) + float(delta.get("output", 0.0)),
+            "cacheRead": float(base.get("cacheRead", 0.0)) + float(delta.get("cacheRead", 0.0)),
+            "cacheWrite": float(base.get("cacheWrite", 0.0)) + float(delta.get("cacheWrite", 0.0)),
+            "totalTokens": float(base.get("totalTokens", 0.0)) + float(delta.get("totalTokens", 0.0)),
+            "cost": {
+                "input": float(base_cost.get("input", 0.0)) + float(delta_cost.get("input", 0.0)),
+                "output": float(base_cost.get("output", 0.0)) + float(delta_cost.get("output", 0.0)),
+                "cacheRead": float(base_cost.get("cacheRead", 0.0)) + float(delta_cost.get("cacheRead", 0.0)),
+                "cacheWrite": float(base_cost.get("cacheWrite", 0.0)) + float(delta_cost.get("cacheWrite", 0.0)),
+                "total": float(base_cost.get("total", 0.0)) + float(delta_cost.get("total", 0.0)),
+            },
+        }
+    @staticmethod
+    def _aggregate_assistant_usages(messages: list) -> Optional[dict]:
+        """Aggregate assistant usage payloads from an event messages array."""
+        if not isinstance(messages, list):
+            return None
+        assistant_usages: List[dict] = []
+        for msg in messages:
+            if isinstance(msg, dict) and msg.get("role") == "assistant":
+                usage = msg.get("usage")
+                if isinstance(usage, dict):
+                    assistant_usages.append(usage)
+        if not assistant_usages:
+            return None
+        if len(assistant_usages) == 1:
+            return assistant_usages[0]
+        totals: Optional[dict] = None
+        for usage in assistant_usages:
+            normalized = PiService._normalize_usage_payload(usage)
+            totals = PiService._merge_usage_payloads(totals, normalized)
+        return totals
+    def _assistant_usage_dedupe_key(self, message: dict, usage: dict) -> Optional[str]:
+        """Build a stable dedupe key for assistant usage seen across message/turn_end events."""
+        if not isinstance(message, dict) or not isinstance(usage, dict):
+            return None
+        for id_key in ("id", "messageId", "message_id"):
+            value = message.get(id_key)
+            if isinstance(value, str) and value.strip():
+                return f"id:{value.strip()}"
+        timestamp = message.get("timestamp")
+        if self._is_numeric_value(timestamp):
+            return f"ts:{int(float(timestamp))}"
+        if isinstance(timestamp, str) and timestamp.strip():
+            return f"ts:{timestamp.strip()}"
+        usage_cost = usage.get("cost") if isinstance(usage.get("cost"), dict) else {}
+        signature: Dict[str, object] = {
+            "stopReason": message.get("stopReason") if isinstance(message.get("stopReason"), str) else "",
+            "input": usage.get("input", 0.0),
+            "output": usage.get("output", 0.0),
+            "cacheRead": usage.get("cacheRead", 0.0),
+            "cacheWrite": usage.get("cacheWrite", 0.0),
+            "totalTokens": usage.get("totalTokens", 0.0),
+            "costTotal": usage_cost.get("total", 0.0),
+        }
+        text = self._extract_text_from_message(message)
+        if text:
+            signature["text"] = text[:120]
+        return "sig:" + json.dumps(signature, sort_keys=True, ensure_ascii=False)
+    def _track_assistant_usage_from_event(self, event: dict) -> None:
+        """Accumulate per-run assistant usage from stream events."""
+        if not isinstance(event, dict):
+            return
+        event_type = event.get("type")
+        if event_type not in ("message", "message_end", "turn_end"):
+            return
+        message = event.get("message")
+        if not isinstance(message, dict) or message.get("role") != "assistant":
+            return
+        normalized_usage = self._normalize_usage_payload(message.get("usage"))
+        if not isinstance(normalized_usage, dict):
+            return
+        usage_key = self._assistant_usage_dedupe_key(message, normalized_usage)
+        if usage_key and usage_key in self._run_seen_usage_keys:
+            return
+        if usage_key:
+            self._run_seen_usage_keys.add(usage_key)
+        self._run_usage_totals = self._merge_usage_payloads(self._run_usage_totals, normalized_usage)
+        self._run_total_cost_usd = self._extract_total_cost_usd(
+            {"usage": self._run_usage_totals},
+            self._run_usage_totals,
+        )
+    def _get_accumulated_total_cost_usd(self) -> Optional[float]:
+        """Return accumulated per-run total cost when available."""
+        if self._is_numeric_value(self._run_total_cost_usd):
+            return float(self._run_total_cost_usd)
+        if isinstance(self._run_usage_totals, dict):
+            return self._extract_total_cost_usd({"usage": self._run_usage_totals}, self._run_usage_totals)
+        return None
+    @staticmethod
+    def _extract_usage_from_event(event: dict) -> Optional[dict]:
+        """Extract usage payload from Pi event shapes (event/message/messages)."""
+        if not isinstance(event, dict):
+            return None
+        messages = event.get("messages")
+        if event.get("type") == "agent_end" and isinstance(messages, list):
+            aggregated = PiService._aggregate_assistant_usages(messages)
+            if isinstance(aggregated, dict):
+                return aggregated
+        direct_usage = event.get("usage")
+        if isinstance(direct_usage, dict):
+            return direct_usage
+        message = event.get("message")
+        if isinstance(message, dict):
+            message_usage = message.get("usage")
+            if isinstance(message_usage, dict):
+                return message_usage
+        if isinstance(messages, list):
+            aggregated = PiService._aggregate_assistant_usages(messages)
+            if isinstance(aggregated, dict):
+                return aggregated
+        return None
+    @staticmethod
+    def _extract_total_cost_usd(event: dict, usage: Optional[dict] = None) -> Optional[float]:
+        """Extract total USD cost from explicit fields or usage.cost.total."""
+        if not isinstance(event, dict):
+            return None
+        for key in ("total_cost_usd", "totalCostUsd", "totalCostUSD"):
+            value = event.get(key)
+            if PiService._is_numeric_value(value):
+                return float(value)
+        direct_cost = event.get("cost")
+        if PiService._is_numeric_value(direct_cost):
+            return float(direct_cost)
+        if isinstance(direct_cost, dict):
+            total = direct_cost.get("total")
+            if PiService._is_numeric_value(total):
+                return float(total)
+        usage_payload = usage if isinstance(usage, dict) else None
+        if usage_payload is None:
+            usage_payload = PiService._extract_usage_from_event(event)
+        if isinstance(usage_payload, dict):
+            usage_cost = usage_payload.get("cost")
+            if isinstance(usage_cost, dict):
+                total = usage_cost.get("total")
+                if PiService._is_numeric_value(total):
+                    return float(total)
+        return None
+    def _build_success_result_event(self, text: str, event: dict) -> dict:
+        """Build standardized success envelope for shell-backend capture."""
+        usage = self._extract_usage_from_event(event)
+        if isinstance(self._run_usage_totals, dict):
+            usage = self._run_usage_totals
+        total_cost_usd = self._extract_total_cost_usd(event, usage)
+        accumulated_total_cost = self._get_accumulated_total_cost_usd()
+        if accumulated_total_cost is not None:
+            total_cost_usd = accumulated_total_cost
+        result_event: Dict = {
+            "type": "result",
+            "subtype": "success",
+            "is_error": False,
+            "result": text,
+            "session_id": self.session_id,
+            "sub_agent_response": self._sanitize_sub_agent_response(event),
+        }
+        if isinstance(usage, dict):
+            result_event["usage"] = usage
+        if total_cost_usd is not None:
+            result_event["total_cost_usd"] = total_cost_usd
+        return result_event
     def _write_capture_file(self, capture_path: Optional[str]) -> None:
         """Write final result event to capture file for shell backend."""
         if not capture_path or not self.last_result_event:
@@ -1423,6 +2263,9 @@ Model shorthands:
         pretty = args.pretty.lower() != "false"
         capture_path = os.environ.get("JUNO_SUBAGENT_CAPTURE_PATH")
         hide_types = self._build_hide_types()
+        self._buffered_tool_stdout_lines.clear()
+        self._reset_run_cost_tracking()
+        cancel_delayed_toolcalls = lambda: None
         if verbose:
             # Truncate prompt in display to avoid confusing multi-line output
@@ -1528,7 +2371,264 @@ Model shorthands:
             stderr_thread = threading.Thread(target=_stderr_reader, daemon=True)
             stderr_thread.start()
+            cancel_delayed_toolcalls = lambda: None
             if process.stdout:
+                pending_tool_execution_end: Optional[dict] = None
+                pending_turn_end_after_tool: Optional[dict] = None
+                toolcall_end_delay_seconds = self._toolcall_end_delay_seconds()
+                pending_delayed_toolcalls: Dict[int, dict] = {}
+                delayed_toolcalls_lock = threading.Lock()
+                delayed_toolcall_seq = 0
+                def _extract_fallback_toolcall_name(parsed_event: dict) -> Optional[str]:
+                    if parsed_event.get("type") != "message_update":
+                        return None
+                    assistant_event = parsed_event.get("assistantMessageEvent")
+                    if not isinstance(assistant_event, dict) or assistant_event.get("type") != "toolcall_end":
+                        return None
+                    tool_call = assistant_event.get("toolCall")
+                    if not isinstance(tool_call, dict):
+                        return None
+                    tool_call_id = tool_call.get("toolCallId")
+                    if isinstance(tool_call_id, str) and tool_call_id.strip():
+                        return None
+                    name = tool_call.get("name", "")
+                    return name if isinstance(name, str) else ""
+                def _format_deferred_toolcall(parsed_event: dict, mode: str) -> Optional[str]:
+                    if mode == self.PRETTIFIER_LIVE:
+                        return self._format_event_live(parsed_event)
+                    if mode == self.PRETTIFIER_CODEX:
+                        return self._format_pi_codex_event(parsed_event)
+                    if mode == self.PRETTIFIER_CLAUDE:
+                        return self._format_event_pretty_claude(parsed_event)
+                    return self._format_event_pretty(parsed_event)
+                def _emit_stdout(formatted: str, raw: bool = False) -> None:
+                    if raw:
+                        sys.stdout.write(formatted)
+                        sys.stdout.flush()
+                        return
+                    print(formatted, flush=True)
+                def _schedule_delayed_toolcall(parsed_event: dict, tool_name: str, mode: str) -> None:
+                    nonlocal delayed_toolcall_seq
+                    def _emit_delayed_toolcall(event_payload: dict, event_mode: str) -> None:
+                        formatted = _format_deferred_toolcall(event_payload, event_mode)
+                        if not formatted:
+                            return
+                        _emit_stdout(formatted, raw=event_mode == self.PRETTIFIER_LIVE)
+                    if toolcall_end_delay_seconds <= 0:
+                        _emit_delayed_toolcall(parsed_event, mode)
+                        return
+                    delayed_toolcall_seq += 1
+                    entry_id = delayed_toolcall_seq
+                    entry: Dict = {
+                        "id": entry_id,
+                        "tool": tool_name,
+                        "event": parsed_event,
+                        "mode": mode,
+                    }
+                    def _timer_emit() -> None:
+                        with delayed_toolcalls_lock:
+                            pending = pending_delayed_toolcalls.pop(entry_id, None)
+                        if not pending:
+                            return
+                        _emit_delayed_toolcall(pending["event"], pending["mode"])
+                    timer = threading.Timer(toolcall_end_delay_seconds, _timer_emit)
+                    timer.daemon = True
+                    entry["timer"] = timer
+                    with delayed_toolcalls_lock:
+                        pending_delayed_toolcalls[entry_id] = entry
+                    timer.start()
+                def _cancel_delayed_toolcall(tool_name: str) -> None:
+                    with delayed_toolcalls_lock:
+                        if not pending_delayed_toolcalls:
+                            return
+                        selected_id: Optional[int] = None
+                        if tool_name:
+                            for entry_id, entry in pending_delayed_toolcalls.items():
+                                if entry.get("tool") == tool_name:
+                                    selected_id = entry_id
+                                    break
+                        if selected_id is None:
+                            selected_id = min(pending_delayed_toolcalls.keys())
+                        pending = pending_delayed_toolcalls.pop(selected_id, None)
+                    if pending:
+                        timer = pending.get("timer")
+                        if timer:
+                            timer.cancel()
+                def _cancel_all_delayed_toolcalls() -> None:
+                    with delayed_toolcalls_lock:
+                        pending = list(pending_delayed_toolcalls.values())
+                        pending_delayed_toolcalls.clear()
+                    for entry in pending:
+                        timer = entry.get("timer")
+                        if timer:
+                            timer.cancel()
+                cancel_delayed_toolcalls = _cancel_all_delayed_toolcalls
+                def _emit_parsed_event(parsed_event: dict, raw_json_line: Optional[str] = None) -> None:
+                    event_type = parsed_event.get("type", "")
+                    # Capture session ID from the session event (sent at stream start)
+                    if event_type == "session":
+                        self.session_id = parsed_event.get("id")
+                    # Track per-run assistant usage from stream events.
+                    self._track_assistant_usage_from_event(parsed_event)
+                    # Ensure agent_end reflects cumulative per-run totals when available.
+                    if event_type == "agent_end":
+                        accumulated_total_cost = self._get_accumulated_total_cost_usd()
+                        if accumulated_total_cost is not None:
+                            parsed_event["total_cost_usd"] = accumulated_total_cost
+                        if isinstance(self._run_usage_totals, dict):
+                            parsed_event["usage"] = self._run_usage_totals
+                    # Capture result event for shell backend
+                    if event_type == "agent_end":
+                        # agent_end has a 'messages' array; extract final assistant text
+                        messages = parsed_event.get("messages", [])
+                        text = ""
+                        if isinstance(messages, list):
+                            # Walk messages in reverse to find last assistant message with text
+                            for m in reversed(messages):
+                                if isinstance(m, dict) and m.get("role") == "assistant":
+                                    text = self._extract_text_from_message(m)
+                                    if text:
+                                        break
+                        if text:
+                            self.last_result_event = self._build_success_result_event(text, parsed_event)
+                        else:
+                            self.last_result_event = parsed_event
+                    elif event_type == "message":
+                        # OpenAI-compatible format: capture last assistant message
+                        msg = parsed_event.get("message", {})
+                        if isinstance(msg, dict) and msg.get("role") == "assistant":
+                            text = self._extract_text_from_message(msg)
+                            if text:
+                                self.last_result_event = self._build_success_result_event(text, parsed_event)
+                    elif event_type == "turn_end":
+                        # turn_end may contain the final assistant message
+                        msg = parsed_event.get("message", {})
+                        if isinstance(msg, dict):
+                            text = self._extract_text_from_message(msg)
+                            if text:
+                                self.last_result_event = self._build_success_result_event(text, parsed_event)
+                    # Filter hidden stream types (live mode handles its own filtering)
+                    if event_type in hide_types and self.prettifier_mode != self.PRETTIFIER_LIVE:
+                        return
+                    # Fallback toolcall_end events (without toolCallId) are delayed so
+                    # short tool executions only show the final combined tool event.
+                    if pretty:
+                        fallback_tool_name = _extract_fallback_toolcall_name(parsed_event)
+                        if fallback_tool_name is not None:
+                            _schedule_delayed_toolcall(parsed_event, fallback_tool_name, self.prettifier_mode)
+                            return
+                    # Live stream mode: stream deltas in real-time
+                    if self.prettifier_mode == self.PRETTIFIER_LIVE:
+                        if event_type in hide_types:
+                            # In live mode, still suppress session/compaction/retry events
+                            # but NOT message_start/message_end (handled by _format_event_live)
+                            if event_type not in ("message_start", "message_end"):
+                                return
+                        formatted_live = self._format_event_live(parsed_event)
+                        if formatted_live is not None:
+                            if formatted_live == "":
+                                return
+                            sys.stdout.write(formatted_live)
+                            sys.stdout.flush()
+                        else:
+                            # Fallback: print raw JSON for unhandled event types
+                            print(json.dumps(parsed_event, ensure_ascii=False), flush=True)
+                        return
+                    # Format and print using model-appropriate prettifier
+                    if pretty:
+                        if self.prettifier_mode == self.PRETTIFIER_CODEX:
+                            # Try Pi-wrapped Codex format first (role-based messages)
+                            if "role" in parsed_event:
+                                formatted = self._format_pi_codex_message(parsed_event)
+                            else:
+                                # Try Pi event handler (message_update, turn_end, etc.)
+                                formatted = self._format_pi_codex_event(parsed_event)
+                                if formatted is None:
+                                    # Try native Codex event handler
+                                    formatted = self._format_event_pretty_codex(parsed_event)
+                            if formatted is None:
+                                # Sanitize before raw JSON fallback: strip thinkingSignature,
+                                # encrypted_content, and metadata from nested Codex events.
+                                self._sanitize_codex_event(parsed_event, strip_metadata=True)
+                                formatted = json.dumps(parsed_event, ensure_ascii=False)
+                            elif formatted == "":
+                                return
+                        elif self.prettifier_mode == self.PRETTIFIER_CLAUDE:
+                            formatted = self._format_event_pretty_claude(parsed_event)
+                        else:
+                            formatted = self._format_event_pretty(parsed_event)
+                        if formatted is not None:
+                            print(formatted, flush=True)
+                    else:
+                        if raw_json_line is not None:
+                            print(raw_json_line, flush=True)
+                        else:
+                            print(json.dumps(parsed_event, ensure_ascii=False), flush=True)
+                def _merge_buffered_tool_stdout_into(event_payload: dict) -> None:
+                    buffered_text = "\n".join(self._buffered_tool_stdout_lines).strip()
+                    if not buffered_text:
+                        self._buffered_tool_stdout_lines.clear()
+                        return
+                    result_val = event_payload.get("result")
+                    if result_val in (None, "", [], {}):
+                        event_payload["result"] = buffered_text
+                    elif isinstance(result_val, str):
+                        existing = self._strip_ansi_sequences(result_val)
+                        if existing:
+                            if not existing.endswith("\n"):
+                                existing += "\n"
+                            event_payload["result"] = existing + buffered_text
+                        else:
+                            event_payload["result"] = buffered_text
+                    else:
+                        # Keep complex result structures untouched; print trailing raw lines
+                        # before the next structured event for stable transcript ordering.
+                        print(buffered_text, flush=True)
+                    self._buffered_tool_stdout_lines.clear()
+                def _flush_pending_tool_events() -> None:
+                    nonlocal pending_tool_execution_end, pending_turn_end_after_tool
+                    if pending_tool_execution_end is not None:
+                        _merge_buffered_tool_stdout_into(pending_tool_execution_end)
+                        _emit_parsed_event(pending_tool_execution_end)
+                        pending_tool_execution_end = None
+                    if pending_turn_end_after_tool is not None:
+                        if self._buffered_tool_stdout_lines:
+                            print("\n".join(self._buffered_tool_stdout_lines), flush=True)
+                            self._buffered_tool_stdout_lines.clear()
+                        _emit_parsed_event(pending_turn_end_after_tool)
+                        pending_turn_end_after_tool = None
                 try:
                     for raw_line in process.stdout:
                         line = raw_line.rstrip("\n\r")
@@ -1539,119 +2639,57 @@ Model shorthands:
                         try:
                             parsed = json.loads(line)
                         except json.JSONDecodeError:
-                            # Non-JSON output — print as-is
+                            # Non-JSON output (raw tool stdout). In pretty mode, buffer raw
+                            # lines while tool execution events are pending to avoid
+                            # interleaving with structured events (e.g. turn_end).
+                            if pretty and (
+                                self._in_tool_execution
+                                or pending_tool_execution_end is not None
+                                or pending_turn_end_after_tool is not None
+                            ):
+                                self._buffered_tool_stdout_lines.append(self._strip_ansi_sequences(line))
+                                continue
                             print(line, flush=True)
                             continue
                         event_type = parsed.get("type", "")
-                        # Capture session ID from the session event (sent at stream start)
-                        if event_type == "session":
-                            self.session_id = parsed.get("id")
-                        # Capture result event for shell backend
-                        if event_type == "agent_end":
-                            # agent_end has a 'messages' array; extract final assistant text
-                            messages = parsed.get("messages", [])
-                            text = ""
-                            if isinstance(messages, list):
-                                # Walk messages in reverse to find last assistant message with text
-                                for m in reversed(messages):
-                                    if isinstance(m, dict) and m.get("role") == "assistant":
-                                        text = self._extract_text_from_message(m)
-                                        if text:
-                                            break
-                            if text:
-                                self.last_result_event = {
-                                    "type": "result",
-                                    "subtype": "success",
-                                    "is_error": False,
-                                    "result": text,
-                                    "session_id": self.session_id,
-                                    "sub_agent_response": self._sanitize_sub_agent_response(parsed),
-                                }
-                            else:
-                                self.last_result_event = parsed
-                        elif event_type == "message":
-                            # OpenAI-compatible format: capture last assistant message
-                            msg = parsed.get("message", {})
-                            if isinstance(msg, dict) and msg.get("role") == "assistant":
-                                text = self._extract_text_from_message(msg)
-                                if text:
-                                    self.last_result_event = {
-                                        "type": "result",
-                                        "subtype": "success",
-                                        "is_error": False,
-                                        "result": text,
-                                        "session_id": self.session_id,
-                                        "sub_agent_response": self._sanitize_sub_agent_response(parsed),
-                                    }
-                        elif event_type == "turn_end":
-                            # turn_end may contain the final assistant message
-                            msg = parsed.get("message", {})
-                            if isinstance(msg, dict):
-                                text = self._extract_text_from_message(msg)
-                                if text:
-                                    self.last_result_event = {
-                                        "type": "result",
-                                        "subtype": "success",
-                                        "is_error": False,
-                                        "result": text,
-                                        "session_id": self.session_id,
-                                        "sub_agent_response": self._sanitize_sub_agent_response(parsed),
-                                    }
-                        # Filter hidden stream types (live mode handles its own filtering)
-                        if event_type in hide_types and self.prettifier_mode != self.PRETTIFIER_LIVE:
+                        if pretty and event_type == "tool_execution_start":
+                            # Reset raw tool stdout buffer per tool execution.
+                            self._buffered_tool_stdout_lines.clear()
+                        if pretty and event_type == "tool_execution_end":
+                            # Tool finished before the delayed fallback timer fired — suppress
+                            # the pending fallback toolcall_end preview.
+                            tool_name = parsed.get("toolName", "")
+                            _cancel_delayed_toolcall(tool_name if isinstance(tool_name, str) else "")
+                            # Defer emission so any trailing raw stdout can be grouped before
+                            # downstream structured metadata like turn_end.
+                            pending_tool_execution_end = parsed
                             continue
-                        # Live stream mode: stream deltas in real-time
-                        if self.prettifier_mode == self.PRETTIFIER_LIVE:
-                            if event_type in hide_types:
-                                # In live mode, still suppress session/compaction/retry events
-                                # but NOT message_start/message_end (handled by _format_event_live)
-                                if event_type not in ("message_start", "message_end"):
-                                    continue
-                            formatted = self._format_event_live(parsed)
-                            if formatted is not None:
-                                if formatted == "":
-                                    continue
-                                sys.stdout.write(formatted)
-                                sys.stdout.flush()
-                            else:
-                                # Fallback: print raw JSON for unhandled event types
-                                print(json.dumps(parsed, ensure_ascii=False), flush=True)
+                        if pretty and event_type == "turn_end" and pending_tool_execution_end is not None:
+                            # Hold turn_end until buffered trailing raw stdout is flushed with
+                            # the pending tool event.
+                            pending_turn_end_after_tool = parsed
                             continue
-                        # Format and print using model-appropriate prettifier
-                        if pretty:
-                            if self.prettifier_mode == self.PRETTIFIER_CODEX:
-                                # Try Pi-wrapped Codex format first (role-based messages)
-                                if "role" in parsed:
-                                    formatted = self._format_pi_codex_message(parsed)
-                                else:
-                                    # Try Pi event handler (message_update, turn_end, etc.)
-                                    formatted = self._format_pi_codex_event(parsed)
-                                    if formatted is not None:
-                                        # Empty string means "suppress this event"
-                                        if formatted == "":
-                                            continue
-                                    else:
-                                        # Try native Codex event handler
-                                        formatted = self._format_event_pretty_codex(parsed)
-                                if formatted is None:
-                                    # Sanitize before raw JSON fallback: strip thinkingSignature,
-                                    # encrypted_content, and metadata from nested Codex events.
-                                    self._sanitize_codex_event(parsed, strip_metadata=True)
-                                    formatted = json.dumps(parsed, ensure_ascii=False)
-                            elif self.prettifier_mode == self.PRETTIFIER_CLAUDE:
-                                formatted = self._format_event_pretty_claude(parsed)
-                            else:
-                                formatted = self._format_event_pretty(parsed)
-                            if formatted is not None:
-                                print(formatted, flush=True)
-                        else:
-                            print(line, flush=True)
+                        if pretty and (
+                            pending_tool_execution_end is not None or pending_turn_end_after_tool is not None
+                        ):
+                            _flush_pending_tool_events()
+                        _emit_parsed_event(parsed, raw_json_line=line)
+                    # Flush any deferred tool/turn events at end-of-stream.
+                    if pretty and (
+                        pending_tool_execution_end is not None or pending_turn_end_after_tool is not None
+                    ):
+                        _flush_pending_tool_events()
+                    elif self._buffered_tool_stdout_lines:
+                        print("\n".join(self._buffered_tool_stdout_lines), flush=True)
+                        self._buffered_tool_stdout_lines.clear()
                 except ValueError:
                     # Watchdog closed stdout — expected when process exits but pipe stays open.
@@ -1659,6 +2697,7 @@ Model shorthands:
             # Signal watchdog that output loop is done
             output_done.set()
+            cancel_delayed_toolcalls()
             # Write capture file for shell backend
             self._write_capture_file(capture_path)
@@ -1676,6 +2715,7 @@ Model shorthands:
         except KeyboardInterrupt:
             print("\nInterrupted by user", file=sys.stderr)
+            cancel_delayed_toolcalls()
             try:
                 process.terminate()
                 try:
@@ -1690,6 +2730,7 @@ Model shorthands:
         except Exception as e:
             print(f"Error executing pi: {e}", file=sys.stderr)
+            cancel_delayed_toolcalls()
             try:
                 if process.poll() is None:
                     process.terminate()
@@ -1728,7 +2769,9 @@ Model shorthands:
         self.prettifier_mode = self._detect_prettifier_mode(self.model_name)
         self.verbose = args.verbose
-        # Verbose mode enables live stream prettifier for real-time output
+        # Verbose mode enables live stream prettifier for real-time output.
+        # Codex models already default to LIVE; this ensures all models get
+        # real-time streaming when -v is used.
         if args.verbose:
             self.prettifier_mode = self.PRETTIFIER_LIVE