npm - juno-code - Versions diffs - 1.0.49 → 1.0.51 - Mend

juno-code 1.0.49 → 1.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +508 -202
package/dist/bin/cli.d.mts +1 -1
package/dist/bin/cli.d.ts +1 -1
package/dist/bin/cli.js +3332 -1421
package/dist/bin/cli.js.map +1 -1
package/dist/bin/cli.mjs +3316 -1405
package/dist/bin/cli.mjs.map +1 -1
package/dist/bin/feedback-collector.js.map +1 -1
package/dist/bin/feedback-collector.mjs.map +1 -1
package/dist/index.d.mts +56 -19
package/dist/index.d.ts +56 -19
package/dist/index.js +240 -36
package/dist/index.js.map +1 -1
package/dist/index.mjs +240 -36
package/dist/index.mjs.map +1 -1
package/dist/templates/scripts/install_requirements.sh +55 -5
package/dist/templates/scripts/kanban.sh +11 -0
package/dist/templates/services/README.md +23 -4
package/dist/templates/services/__pycache__/pi.cpython-313.pyc +0 -0
package/dist/templates/services/pi.py +1933 -262
package/dist/templates/skills/claude/kanban-workflow/SKILL.md +138 -0
package/dist/templates/skills/claude/plan-kanban-tasks/SKILL.md +1 -1
package/dist/templates/skills/claude/ralph-loop/scripts/kanban.sh +11 -0
package/dist/templates/skills/claude/understand-project/SKILL.md +1 -1
package/dist/templates/skills/codex/kanban-workflow/SKILL.md +139 -0
package/dist/templates/skills/codex/plan-kanban-tasks/SKILL.md +32 -0
package/dist/templates/skills/codex/ralph-loop/scripts/kanban.sh +11 -0
package/dist/templates/skills/codex/understand-project/SKILL.md +46 -0
package/dist/templates/skills/pi/kanban-workflow/SKILL.md +139 -0
package/dist/templates/skills/pi/plan-kanban-tasks/SKILL.md +1 -1
package/dist/templates/skills/pi/ralph-loop/SKILL.md +4 -0
package/dist/templates/skills/pi/understand-project/SKILL.md +1 -1
package/package.json +7 -5

package/dist/templates/services/pi.py CHANGED Viewed

@@ -7,13 +7,15 @@ Headless wrapper around the Pi coding agent CLI with JSON streaming and shorthan
 import argparse
 import json
 import os
+import re
 import subprocess
 import sys
+import tempfile
 import threading
 import time
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Set, TextIO, Tuple
 class PiService:
@@ -35,7 +37,10 @@ class PiService:
         ":gpt-5": "openai/gpt-5",
         ":gpt-4o": "openai/gpt-4o",
         ":o3": "openai/o3",
-        ":codex": "openai/gpt-5.3-codex",
+        ":codex": "openai-codex/gpt-5.3-codex",
+        ":api-codex": "openai/gpt-5.3-codex",
+        ":codex-spark": "openai-codex/gpt-5.3-codex-spark",
+        ":api-codex-spark": "openai/gpt-5.3-codex-spark",
         # Google
         ":gemini-pro": "google/gemini-2.5-pro",
         ":gemini-flash": "google/gemini-2.5-flash",
@@ -74,6 +79,17 @@ class PiService:
     PRETTIFIER_CODEX = "codex"
     PRETTIFIER_LIVE = "live"
+    # ANSI colors for tool prettifier output.
+    # - command/args blocks are green for readability
+    # - error results are red
+    ANSI_GREEN = "\x1b[38;5;40m"
+    ANSI_RED = "\x1b[38;5;203m"
+    ANSI_RESET = "\x1b[0m"
+    # Keep tool args readable while preventing giant inline payloads.
+    TOOL_ARG_STRING_MAX_CHARS = 400
+    _ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
     def __init__(self):
         self.model_name = self.DEFAULT_MODEL
         self.project_path = os.getcwd()
@@ -83,6 +99,18 @@ class PiService:
         self.session_id: Optional[str] = None
         self.message_counter = 0
         self.prettifier_mode = self.PRETTIFIER_PI
+        # Tool call grouping: buffer toolcall_end until tool_execution_end arrives
+        self._pending_tool_calls: Dict[str, dict] = {}  # toolCallId -> {tool, args/command}
+        # Buffer tool_execution_start data for fallback + timing (when toolcall_end arrives late)
+        self._pending_exec_starts: Dict[str, dict] = {}  # toolCallId -> {tool, args/command, started_at}
+        # Track whether we're inside a tool execution
+        self._in_tool_execution: bool = False
+        # Buffer raw non-JSON tool stdout so it doesn't interleave with structured events
+        self._buffered_tool_stdout_lines: List[str] = []
+        # Per-run usage/cost accumulation (used for result + agent_end total cost visibility)
+        self._run_usage_totals: Optional[dict] = None
+        self._run_total_cost_usd: Optional[float] = None
+        self._run_seen_usage_keys: Set[str] = set()
         # Claude prettifier state
         self.user_message_truncate = int(os.environ.get("CLAUDE_USER_MESSAGE_PRETTY_TRUNCATE", "4"))
         # Codex prettifier state
@@ -92,6 +120,114 @@ class PiService:
         # Keys to hide from intermediate assistant messages in Codex mode
         self._codex_metadata_keys = {"api", "provider", "model", "usage", "stopReason", "timestamp"}
+    def _color_enabled(self) -> bool:
+        """Check if ANSI color output is appropriate (TTY + NO_COLOR not set)."""
+        if os.environ.get("NO_COLOR") is not None:
+            return False
+        return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
+    def _colorize_lines(self, text: str, color_code: str) -> str:
+        """Apply ANSI coloring per line so line-based renderers keep colors stable."""
+        if "\n" not in text:
+            return f"{color_code}{text}{self.ANSI_RESET}"
+        return "\n".join(f"{color_code}{line}{self.ANSI_RESET}" for line in text.split("\n"))
+    def _colorize_result(self, text: str, is_error: bool = False) -> str:
+        """Colorize tool output only for errors; success stays terminal-default."""
+        if not self._color_enabled():
+            return text
+        if not is_error:
+            return text
+        return self._colorize_lines(text, self.ANSI_RED)
+    def _colorize_command(self, text: str) -> str:
+        """Colorize tool command/args blocks in green when ANSI color is enabled."""
+        if not self._color_enabled():
+            return text
+        return self._colorize_lines(text, self.ANSI_GREEN)
+    def _normalize_multiline_tool_text(self, text: str) -> str:
+        """Render escaped newline sequences as real newlines for tool command/args blocks."""
+        if "\n" in text:
+            return text
+        if "\\n" in text:
+            return text.replace("\\n", "\n")
+        return text
+    def _format_tool_invocation_header(self, header: Dict) -> str:
+        """Serialize a tool header and render multiline command/args as separate readable blocks."""
+        metadata = dict(header)
+        block_label: Optional[str] = None
+        block_text: Optional[str] = None
+        command_val = metadata.get("command")
+        if isinstance(command_val, str) and command_val.strip():
+            command_text = self._normalize_multiline_tool_text(command_val)
+            if "\n" in command_text:
+                metadata.pop("command", None)
+                block_label = "command:"
+                block_text = self._colorize_command(command_text)
+        if block_text is None:
+            args_val = metadata.get("args")
+            if isinstance(args_val, str) and args_val.strip():
+                args_text = self._normalize_multiline_tool_text(args_val)
+                if "\n" in args_text:
+                    metadata.pop("args", None)
+                    block_label = "args:"
+                    block_text = self._colorize_command(args_text)
+        output = json.dumps(metadata, ensure_ascii=False)
+        if block_text is None:
+            return output
+        return output + "\n" + block_label + "\n" + block_text
+    def _strip_ansi_sequences(self, text: str) -> str:
+        """Remove ANSI escape sequences to prevent color bleed in prettified output."""
+        if not isinstance(text, str) or "\x1b" not in text:
+            return text
+        return self._ANSI_ESCAPE_RE.sub("", text)
+    def _sanitize_tool_argument_value(self, value):
+        """Recursively sanitize tool args while preserving JSON structure."""
+        if isinstance(value, str):
+            clean = self._strip_ansi_sequences(value)
+            if len(clean) > self.TOOL_ARG_STRING_MAX_CHARS:
+                return clean[:self.TOOL_ARG_STRING_MAX_CHARS] + "..."
+            return clean
+        if isinstance(value, dict):
+            return {k: self._sanitize_tool_argument_value(v) for k, v in value.items()}
+        if isinstance(value, list):
+            return [self._sanitize_tool_argument_value(v) for v in value]
+        return value
+    def _format_execution_time(self, payload: dict, pending: Optional[dict] = None) -> Optional[str]:
+        """Return execution time string (e.g. 0.12s) from payload or measured start time."""
+        seconds: Optional[float] = None
+        # Prefer explicit durations if Pi adds them in future versions.
+        for key in ("executionTimeSeconds", "durationSeconds", "elapsedSeconds"):
+            value = payload.get(key)
+            if isinstance(value, (int, float)):
+                seconds = float(value)
+                break
+        if seconds is None:
+            for key in ("executionTimeMs", "durationMs", "elapsedMs"):
+                value = payload.get(key)
+                if isinstance(value, (int, float)):
+                    seconds = float(value) / 1000.0
+                    break
+        if seconds is None and isinstance(pending, dict):
+            started_at = pending.get("started_at")
+            if isinstance(started_at, (int, float)):
+                seconds = max(0.0, time.perf_counter() - started_at)
+        if seconds is None:
+            return None
+        return f"{seconds:.2f}s"
     def expand_model_shorthand(self, model: str) -> str:
         """Expand shorthand model names (colon-prefixed) to full identifiers."""
         if model.startswith(":"):
@@ -103,13 +239,15 @@ class PiService:
         Pi CLI always uses its own event protocol (message, turn_end,
         message_update, agent_end, etc.) regardless of the underlying LLM.
-        The exception is Codex models where Pi wraps Codex-format events
-        (agent_reasoning, agent_message, exec_command_end).
+        Codex models also use Pi's event protocol but may additionally emit
+        native Codex events (agent_reasoning, agent_message, exec_command_end).
+        The LIVE prettifier handles both Pi-native and Codex-native events,
+        giving real-time streaming output for all model types.
         Claude models still use Pi's event protocol, NOT Claude CLI events.
         """
         model_lower = model.lower()
         if "codex" in model_lower:
-            return self.PRETTIFIER_CODEX
+            return self.PRETTIFIER_LIVE
         # All non-Codex models (including Claude) use Pi's native event protocol
         return self.PRETTIFIER_PI
@@ -147,7 +285,10 @@ Model shorthands:
   :gpt-5           -> openai/gpt-5
   :gpt-4o          -> openai/gpt-4o
   :o3              -> openai/o3
-  :codex           -> openai/gpt-5.3-codex
+  :codex           -> openai-codex/gpt-5.3-codex
+  :api-codex       -> openai/gpt-5.3-codex
+  :codex-spark     -> openai-codex/gpt-5.3-codex-spark
+  :api-codex-spark -> openai/gpt-5.3-codex-spark
   :gemini-pro      -> google/gemini-2.5-pro
   :gemini-flash    -> google/gemini-2.5-flash
   :groq            -> groq/llama-4-scout-17b-16e-instruct
@@ -259,6 +400,13 @@ Model shorthands:
             help="Space-separated additional pi CLI arguments to append.",
         )
+        parser.add_argument(
+            "--live",
+            action="store_true",
+            default=os.environ.get("PI_LIVE", "false").lower() == "true",
+            help="Run Pi in interactive/live mode (no --mode json). Uses an auto-exit extension to capture agent_end and shutdown cleanly. (env: PI_LIVE)",
+        )
         parser.add_argument(
             "--pretty",
             type=str,
@@ -287,15 +435,21 @@ Model shorthands:
             print(f"Error reading prompt file: {e}", file=sys.stderr)
             sys.exit(1)
-    def build_pi_command(self, args: argparse.Namespace) -> Tuple[List[str], Optional[str]]:
-        """Construct the Pi CLI command for headless JSON streaming execution.
+    def build_pi_command(
+        self,
+        args: argparse.Namespace,
+        live_extension_path: Optional[str] = None,
+    ) -> Tuple[List[str], Optional[str]]:
+        """Construct the Pi CLI command.
-        Returns (cmd, stdin_prompt): cmd is the argument list, stdin_prompt is
-        the prompt text to pipe via stdin (or None to pass as positional arg).
-        For multiline or large prompts we pipe via stdin so Pi reads it
-        naturally without command-line quoting issues.
+        Non-live mode keeps the existing headless JSON contract.
+        Live mode switches to Pi interactive defaults (no --mode json, no -p)
+        and passes the initial prompt positionally.
         """
-        cmd = ["pi", "--mode", "json"]
+        is_live_mode = bool(getattr(args, "live", False))
+        cmd = ["pi"]
+        if not is_live_mode:
+            cmd.extend(["--mode", "json"])
         # Model: if provider/model format, split and pass separately
         model = self.model_name
@@ -340,16 +494,33 @@ Model shorthands:
         elif args.no_session:
             cmd.append("--no-session")
+        # Attach live auto-exit extension when requested.
+        if is_live_mode and live_extension_path:
+            cmd.extend(["-e", live_extension_path])
         # Build prompt with optional auto-instruction
         full_prompt = self.prompt
         if args.auto_instruction:
             full_prompt = f"{args.auto_instruction}\n\n{full_prompt}"
+        stdin_prompt: Optional[str] = None
+        if is_live_mode:
+            # Live mode uses positional prompt input (no -p and no stdin piping).
+            cmd.append(full_prompt)
+            # Additional raw arguments should still be honored; place before the
+            # positional prompt so flags remain flags.
+            if args.additional_args:
+                extra = args.additional_args.strip().split()
+                if extra:
+                    cmd = cmd[:-1] + extra + [cmd[-1]]
+            return cmd, None
         # For multiline or large prompts, pipe via stdin to avoid command-line
         # argument issues. Pi CLI reads stdin when isTTY is false and
         # automatically prepends it to messages in print mode.
         # For simple single-line prompts, pass as positional arg + -p flag.
-        stdin_prompt: Optional[str] = None
         if "\n" in full_prompt or len(full_prompt) > 4096:
             # Pipe via stdin — Pi auto-enables print mode when stdin has data
             stdin_prompt = full_prompt
@@ -544,6 +715,7 @@ Model shorthands:
             return text
         # Unescape JSON-escaped newlines for human-readable display
         display_text = text.replace("\\n", "\n").replace("\\t", "\t")
+        display_text = self._strip_ansi_sequences(display_text)
         lines = display_text.split("\n")
         max_lines = self._codex_tool_result_max_lines
         if len(lines) <= max_lines:
@@ -643,12 +815,11 @@ Model shorthands:
                             args = item.get("arguments", {})
                             if isinstance(args, dict):
                                 cmd = args.get("command", "")
-                                if cmd:
-                                    parts.append(f"[toolCall] {name}: {cmd}")
+                                if isinstance(cmd, str) and cmd:
+                                    parts.append(f"[toolCall] {name}: {self._sanitize_tool_argument_value(cmd)}")
                                 else:
-                                    args_str = json.dumps(args, ensure_ascii=False)
-                                    if len(args_str) > 200:
-                                        args_str = args_str[:200] + "..."
+                                    args_clean = self._sanitize_tool_argument_value(args)
+                                    args_str = json.dumps(args_clean, ensure_ascii=False)
                                     parts.append(f"[toolCall] {name}: {args_str}")
                             else:
                                 parts.append(f"[toolCall] {name}")
@@ -734,10 +905,13 @@ Model shorthands:
                         header["thinking"] = thinking_text
                     return json.dumps(header, ensure_ascii=False)
-                # toolcall_end: show tool name and arguments
+                # toolcall_end: buffer for grouping with tool_execution_end
                 if ame_type == "toolcall_end":
-                    self.message_counter += 1
                     tool_call = ame.get("toolCall", {})
+                    if self._buffer_tool_call_end(tool_call, now):
+                        return ""  # suppress — will emit combined event on tool_execution_end
+                    # No toolCallId — fallback to original format
+                    self.message_counter += 1
                     header = {
                         "type": "toolcall_end",
                         "datetime": now,
@@ -748,14 +922,13 @@ Model shorthands:
                         args = tool_call.get("arguments", {})
                         if isinstance(args, dict):
                             cmd = args.get("command", "")
-                            if cmd:
-                                header["command"] = cmd
+                            if isinstance(cmd, str) and cmd:
+                                header["command"] = self._sanitize_tool_argument_value(cmd)
                             else:
-                                args_str = json.dumps(args, ensure_ascii=False)
-                                if len(args_str) > 200:
-                                    args_str = args_str[:200] + "..."
-                                header["args"] = args_str if isinstance(args_str, str) else args
-                    return json.dumps(header, ensure_ascii=False)
+                                header["args"] = self._sanitize_tool_argument_value(args)
+                        elif isinstance(args, str) and args.strip():
+                            header["args"] = self._sanitize_tool_argument_value(args)
+                    return self._format_tool_invocation_header(header)
             # Other message_update subtypes: suppress by default
             return ""
@@ -773,14 +946,12 @@ Model shorthands:
                 header["tool_results_count"] = len(tool_results)
             return json.dumps(header, ensure_ascii=False)
-        # --- message_start: minimal header ---
+        # --- message_start: minimal header (no counter — only *_end events get counters) ---
         if event_type == "message_start":
-            self.message_counter += 1
             message = parsed.get("message", {})
             header = {
                 "type": "message_start",
                 "datetime": now,
-                "counter": f"#{self.message_counter}",
             }
             if isinstance(message, dict):
                 role = message.get("role")
@@ -798,58 +969,104 @@ Model shorthands:
             }
             return json.dumps(header, ensure_ascii=False)
-        # --- tool_execution_start ---
+        # --- tool_execution_start: always suppress, buffer args ---
         if event_type == "tool_execution_start":
-            self.message_counter += 1
-            header = {
-                "type": "tool_execution_start",
-                "datetime": now,
-                "counter": f"#{self.message_counter}",
-                "tool": parsed.get("toolName", ""),
-            }
-            args_val = parsed.get("args")
-            if isinstance(args_val, dict):
-                args_str = json.dumps(args_val, ensure_ascii=False)
-                if len(args_str) > 200:
-                    header["args"] = args_str[:200] + "..."
-                else:
-                    header["args"] = args_val
-            return json.dumps(header, ensure_ascii=False)
+            self._buffer_exec_start(parsed)
+            self._in_tool_execution = True
+            return ""  # suppress
-        # --- tool_execution_end ---
+        # --- tool_execution_end: combine with buffered data ---
         if event_type == "tool_execution_end":
+            self._in_tool_execution = False
+            tool_call_id = parsed.get("toolCallId")
+            pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
+            pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
+            if pending_tool and pending_exec and "started_at" in pending_exec:
+                pending_tool["started_at"] = pending_exec["started_at"]
+            pending = pending_tool or pending_exec
+            if pending:
+                return self._build_combined_tool_event(pending, parsed, now)
+            # No buffered data — minimal fallback
             self.message_counter += 1
             header = {
-                "type": "tool_execution_end",
+                "type": "tool",
                 "datetime": now,
                 "counter": f"#{self.message_counter}",
                 "tool": parsed.get("toolName", ""),
             }
+            execution_time = self._format_execution_time(parsed)
+            if execution_time:
+                header["execution_time"] = execution_time
             is_error = parsed.get("isError", False)
             if is_error:
                 header["isError"] = True
             result_val = parsed.get("result")
+            colorize_error = self._color_enabled() and bool(is_error)
+            if isinstance(result_val, str) and result_val.strip():
+                truncated = self._truncate_tool_result_text(result_val)
+                if "\n" in truncated or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(truncated, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                header["result"] = truncated
+                return self._format_tool_invocation_header(header)
             if isinstance(result_val, dict):
-                # Extract text content from result
                 result_content = result_val.get("content")
                 if isinstance(result_content, list):
                     for rc_item in result_content:
                         if isinstance(rc_item, dict) and rc_item.get("type") == "text":
                             text = rc_item.get("text", "")
                             truncated = self._truncate_tool_result_text(text)
-                            if "\n" in truncated:
-                                return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated
+                            if "\n" in truncated or colorize_error:
+                                label = "result:"
+                                colored = self._colorize_result(truncated, is_error=bool(is_error))
+                                if colorize_error:
+                                    label = self._colorize_result(label, is_error=True)
+                                return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
                             header["result"] = truncated
-                            return json.dumps(header, ensure_ascii=False)
-            return json.dumps(header, ensure_ascii=False)
+                            return self._format_tool_invocation_header(header)
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header)
+            if isinstance(result_val, list):
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header)
+            return self._format_tool_invocation_header(header)
+        # --- turn_start: suppress (no user-visible value) ---
+        if event_type == "turn_start":
+            return ""
-        # --- agent_start, turn_start: simple headers ---
-        if event_type in ("agent_start", "turn_start"):
-            self.message_counter += 1
+        # --- agent_start: simple header (no counter — only *_end events get counters) ---
+        if event_type == "agent_start":
             return json.dumps({
                 "type": event_type,
                 "datetime": now,
-                "counter": f"#{self.message_counter}",
             }, ensure_ascii=False)
         # --- agent_end: capture and show summary ---
@@ -863,6 +1080,9 @@ Model shorthands:
             messages = parsed.get("messages")
             if isinstance(messages, list):
                 header["message_count"] = len(messages)
+            total_cost_usd = self._extract_total_cost_usd(parsed)
+            if total_cost_usd is not None:
+                header["total_cost_usd"] = total_cost_usd
             return json.dumps(header, ensure_ascii=False)
         # Not a Pi-wrapped event type we handle
@@ -880,7 +1100,7 @@ Model shorthands:
             base_type = header_type or msg_type or "message"
             def make_header(type_value: str):
-                hdr: Dict = {"type": type_value, "datetime": now}
+                hdr: Dict = {"type": type_value, "datetime": now, "counter": f"#{self.message_counter}"}
                 if item_id:
                     hdr["id"] = item_id
                 if outer_type and msg_type and outer_type != msg_type:
@@ -1091,6 +1311,107 @@ Model shorthands:
         return ""
+    def _buffer_tool_call_end(self, tool_call: dict, now: str) -> bool:
+        """Buffer toolcall_end info for grouping with tool_execution_end.
+        Returns True if successfully buffered (caller should suppress output),
+        False if no toolCallId present (caller should emit normally).
+        """
+        tc_id = tool_call.get("toolCallId", "") if isinstance(tool_call, dict) else ""
+        if not tc_id:
+            return False
+        pending: Dict = {"tool": tool_call.get("name", ""), "datetime": now}
+        args = tool_call.get("arguments", {})
+        if isinstance(args, dict):
+            cmd = args.get("command", "")
+            if isinstance(cmd, str) and cmd:
+                pending["command"] = self._sanitize_tool_argument_value(cmd)
+            else:
+                pending["args"] = self._sanitize_tool_argument_value(args)
+        elif isinstance(args, str) and args.strip():
+            pending["args"] = self._sanitize_tool_argument_value(args)
+        self._pending_tool_calls[tc_id] = pending
+        return True
+    def _buffer_exec_start(self, payload: dict) -> None:
+        """Buffer tool_execution_start data for tool_execution_end fallback + timing."""
+        tc_id = payload.get("toolCallId", "")
+        if not tc_id:
+            return
+        pending: Dict = {
+            "tool": payload.get("toolName", ""),
+            "started_at": time.perf_counter(),
+        }
+        args_val = payload.get("args")
+        if isinstance(args_val, dict):
+            cmd = args_val.get("command", "")
+            if isinstance(cmd, str) and cmd:
+                pending["command"] = self._sanitize_tool_argument_value(cmd)
+            else:
+                pending["args"] = self._sanitize_tool_argument_value(args_val)
+        elif isinstance(args_val, str) and args_val.strip():
+            pending["args"] = self._sanitize_tool_argument_value(args_val)
+        self._pending_exec_starts[tc_id] = pending
+    def _build_combined_tool_event(self, pending: dict, payload: dict, now: str) -> str:
+        """Build a combined 'tool' event from buffered toolcall_end + tool_execution_end."""
+        self.message_counter += 1
+        header: Dict = {
+            "type": "tool",
+            "datetime": now,
+            "counter": f"#{self.message_counter}",
+            "tool": pending.get("tool", payload.get("toolName", "")),
+        }
+        # Args from buffered toolcall/tool_execution_start
+        if "command" in pending:
+            header["command"] = pending["command"]
+        elif "args" in pending:
+            header["args"] = pending["args"]
+        # Execution time (source of truth: tool_execution_start -> tool_execution_end)
+        execution_time = self._format_execution_time(payload, pending)
+        if execution_time:
+            header["execution_time"] = execution_time
+        is_error = payload.get("isError", False)
+        if is_error:
+            header["isError"] = True
+        # Result extraction (handles string, dict with content array, and list)
+        result_val = payload.get("result")
+        result_text = None
+        if isinstance(result_val, str) and result_val.strip():
+            result_text = self._truncate_tool_result_text(result_val)
+        elif isinstance(result_val, dict):
+            result_content = result_val.get("content")
+            if isinstance(result_content, list):
+                for rc_item in result_content:
+                    if isinstance(rc_item, dict) and rc_item.get("type") == "text":
+                        result_text = self._truncate_tool_result_text(rc_item.get("text", ""))
+                        break
+            if result_text is None:
+                result_text = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+        elif isinstance(result_val, list):
+            result_text = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+        if result_text:
+            colorize_error = self._color_enabled() and bool(is_error)
+            if "\n" in result_text or colorize_error:
+                label = "result:"
+                colored_text = self._colorize_result(result_text, is_error=bool(is_error))
+                if colorize_error:
+                    label = self._colorize_result(label, is_error=True)
+                return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored_text
+            header["result"] = result_text
+        return self._format_tool_invocation_header(header)
     def _format_event_pretty(self, payload: dict) -> Optional[str]:
         """
         Format a Pi JSON streaming event for human-readable output.
@@ -1099,31 +1420,41 @@ Model shorthands:
         try:
             event_type = payload.get("type", "")
             now = datetime.now().strftime("%I:%M:%S %p")
-            self.message_counter += 1
+            # Counter is only added to *_end events (below, per-branch)
             header: Dict = {
                 "type": event_type,
                 "datetime": now,
-                "counter": f"#{self.message_counter}",
             }
-            # --- Session header ---
+            # --- Session header (no counter) ---
             if event_type == "session":
                 header["version"] = payload.get("version")
                 header["id"] = payload.get("id")
                 return json.dumps(header, ensure_ascii=False)
-            # --- Agent lifecycle events ---
-            if event_type in ("agent_start", "turn_start"):
+            # --- turn_start: suppress (no user-visible value) ---
+            if event_type == "turn_start":
+                return None
+            # --- agent_start: simple header (no counter) ---
+            if event_type == "agent_start":
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "agent_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 messages = payload.get("messages")
                 if isinstance(messages, list):
                     header["message_count"] = len(messages)
+                total_cost_usd = self._extract_total_cost_usd(payload)
+                if total_cost_usd is not None:
+                    header["total_cost_usd"] = total_cost_usd
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "turn_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 tool_results = payload.get("toolResults")
                 if isinstance(tool_results, list):
                     header["tool_results_count"] = len(tool_results)
@@ -1146,6 +1477,43 @@ Model shorthands:
                 if event_subtype in self._PI_HIDDEN_MESSAGE_UPDATE_EVENTS:
                     return None  # Suppress noisy streaming deltas
+                # toolcall_end: buffer for grouping with tool_execution_end
+                if isinstance(ame, dict) and ame_type == "toolcall_end":
+                    tool_call = ame.get("toolCall", {})
+                    if self._buffer_tool_call_end(tool_call, now):
+                        return None  # suppress — will emit combined event on tool_execution_end
+                    # No toolCallId — fallback to original format
+                    self.message_counter += 1
+                    header["counter"] = f"#{self.message_counter}"
+                    header["event"] = ame_type
+                    if isinstance(tool_call, dict):
+                        header["tool"] = tool_call.get("name", "")
+                        args = tool_call.get("arguments", {})
+                        if isinstance(args, dict):
+                            cmd = args.get("command", "")
+                            if isinstance(cmd, str) and cmd:
+                                header["command"] = self._sanitize_tool_argument_value(cmd)
+                            else:
+                                header["args"] = self._sanitize_tool_argument_value(args)
+                        elif isinstance(args, str) and args.strip():
+                            header["args"] = self._sanitize_tool_argument_value(args)
+                    return self._format_tool_invocation_header(header)
+                # thinking_end: show thinking content (*_end → gets counter)
+                if isinstance(ame, dict) and ame_type == "thinking_end":
+                    self.message_counter += 1
+                    header["counter"] = f"#{self.message_counter}"
+                    header["event"] = ame_type
+                    thinking_text = ame.get("thinking", "") or ame.get("content", "") or ame.get("text", "")
+                    if isinstance(thinking_text, str) and thinking_text.strip():
+                        header["thinking"] = thinking_text
+                    return json.dumps(header, ensure_ascii=False)
+                # Any other *_end subtypes (e.g. text_end) get counter
+                if isinstance(ame, dict) and ame_type and ame_type.endswith("_end"):
+                    self.message_counter += 1
+                    header["counter"] = f"#{self.message_counter}"
                 message = payload.get("message", {})
                 text = self._extract_text_from_message(message) if isinstance(message, dict) else ""
@@ -1165,61 +1533,103 @@ Model shorthands:
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "message_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 # Skip message text - already displayed by text_end/thinking_end/toolcall_end
                 return json.dumps(header, ensure_ascii=False)
             # --- Tool execution events ---
+            # Always suppress tool_execution_start: buffer its args for
+            # tool_execution_end to use.  The user sees nothing until the
+            # tool finishes, then gets a single combined "tool" event.
             if event_type == "tool_execution_start":
-                header["tool"] = payload.get("toolName", "")
-                tool_call_id = payload.get("toolCallId")
-                if tool_call_id:
-                    header["id"] = tool_call_id
-                args_val = payload.get("args")
-                if isinstance(args_val, dict):
-                    # Show abbreviated args inline
-                    args_str = json.dumps(args_val, ensure_ascii=False)
-                    if len(args_str) > 200:
-                        # Truncate for readability
-                        header["args"] = args_str[:200] + "..."
-                    else:
-                        header["args"] = args_val
-                elif isinstance(args_val, str) and args_val.strip():
-                    if "\n" in args_val:
-                        return json.dumps(header, ensure_ascii=False) + "\nargs:\n" + args_val
-                    header["args"] = args_val
-                return json.dumps(header, ensure_ascii=False)
+                self._buffer_exec_start(payload)
+                self._in_tool_execution = True
+                return None
             if event_type == "tool_execution_update":
-                header["tool"] = payload.get("toolName", "")
-                tool_call_id = payload.get("toolCallId")
-                if tool_call_id:
-                    header["id"] = tool_call_id
-                partial = payload.get("partialResult")
-                if isinstance(partial, str) and partial.strip():
-                    if "\n" in partial:
-                        return json.dumps(header, ensure_ascii=False) + "\npartialResult:\n" + partial
-                    header["partialResult"] = partial
-                return json.dumps(header, ensure_ascii=False)
+                # Suppress updates — result will arrive in tool_execution_end
+                return None
             if event_type == "tool_execution_end":
-                header["tool"] = payload.get("toolName", "")
+                self._in_tool_execution = False
                 tool_call_id = payload.get("toolCallId")
-                if tool_call_id:
-                    header["id"] = tool_call_id
+                pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
+                pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
+                if pending_tool and pending_exec and "started_at" in pending_exec:
+                    pending_tool["started_at"] = pending_exec["started_at"]
+                pending = pending_tool or pending_exec
+                if pending:
+                    return self._build_combined_tool_event(pending, payload, now)
+                # No buffered data at all — minimal fallback
+                self.message_counter += 1
+                header["type"] = "tool"
+                header["counter"] = f"#{self.message_counter}"
+                header["tool"] = payload.get("toolName", "")
+                execution_time = self._format_execution_time(payload)
+                if execution_time:
+                    header["execution_time"] = execution_time
                 is_error = payload.get("isError", False)
                 if is_error:
                     header["isError"] = True
                 result_val = payload.get("result")
+                colorize_error = self._color_enabled() and bool(is_error)
                 if isinstance(result_val, str) and result_val.strip():
-                    if "\n" in result_val:
-                        return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + result_val
-                    header["result"] = result_val
-                elif isinstance(result_val, (dict, list)):
-                    result_str = json.dumps(result_val, ensure_ascii=False)
-                    if "\n" in result_str or len(result_str) > 200:
-                        return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + result_str
-                    header["result"] = result_val
-                return json.dumps(header, ensure_ascii=False)
+                    truncated = self._truncate_tool_result_text(result_val)
+                    if "\n" in truncated or colorize_error:
+                        label = "result:"
+                        colored = self._colorize_result(truncated, is_error=bool(is_error))
+                        if colorize_error:
+                            label = self._colorize_result(label, is_error=True)
+                        return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                    header["result"] = truncated
+                    return self._format_tool_invocation_header(header)
+                if isinstance(result_val, dict):
+                    result_content = result_val.get("content")
+                    if isinstance(result_content, list):
+                        for rc_item in result_content:
+                            if isinstance(rc_item, dict) and rc_item.get("type") == "text":
+                                text = rc_item.get("text", "")
+                                truncated = self._truncate_tool_result_text(text)
+                                if "\n" in truncated or colorize_error:
+                                    label = "result:"
+                                    colored = self._colorize_result(truncated, is_error=bool(is_error))
+                                    if colorize_error:
+                                        label = self._colorize_result(label, is_error=True)
+                                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                                header["result"] = truncated
+                                return self._format_tool_invocation_header(header)
+                    result_str = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                    if "\n" in result_str or len(result_str) > 200 or colorize_error:
+                        label = "result:"
+                        colored = self._colorize_result(result_str, is_error=bool(is_error))
+                        if colorize_error:
+                            label = self._colorize_result(label, is_error=True)
+                        return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                    header["result"] = result_str
+                    return self._format_tool_invocation_header(header)
+                if isinstance(result_val, list):
+                    result_str = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                    if "\n" in result_str or len(result_str) > 200 or colorize_error:
+                        label = "result:"
+                        colored = self._colorize_result(result_str, is_error=bool(is_error))
+                        if colorize_error:
+                            label = self._colorize_result(label, is_error=True)
+                        return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored
+                    header["result"] = result_str
+                    return self._format_tool_invocation_header(header)
+                return self._format_tool_invocation_header(header)
             # --- Retry/compaction events ---
             if event_type == "auto_retry_start":
@@ -1232,6 +1642,8 @@ Model shorthands:
                 return json.dumps(header, ensure_ascii=False)
             if event_type == "auto_retry_end":
+                self.message_counter += 1
+                header["counter"] = f"#{self.message_counter}"
                 header["success"] = payload.get("success")
                 header["attempt"] = payload.get("attempt")
                 final_err = payload.get("finalError")
@@ -1277,7 +1689,7 @@ Model shorthands:
                     return delta
                 return ""
-            # Section start markers
+            # Section start markers (no counter — only *_end events get counters)
             if ame_type == "text_start":
                 return json.dumps({"type": "text_start", "datetime": now}) + "\n"
@@ -1286,26 +1698,33 @@ Model shorthands:
             # Section end markers (text was already streamed)
             if ame_type == "text_end":
-                return "\n" + json.dumps({"type": "text_end", "datetime": now}) + "\n"
+                self.message_counter += 1
+                return "\n" + json.dumps({"type": "text_end", "datetime": now, "counter": f"#{self.message_counter}"}) + "\n"
             if ame_type == "thinking_end":
-                return "\n" + json.dumps({"type": "thinking_end", "datetime": now}) + "\n"
+                self.message_counter += 1
+                return "\n" + json.dumps({"type": "thinking_end", "datetime": now, "counter": f"#{self.message_counter}"}) + "\n"
-            # Tool call end: show tool info
+            # Tool call end: buffer for grouping with tool_execution_end
             if ame_type == "toolcall_end":
                 tc = ame.get("toolCall", {})
-                header = {"type": "toolcall_end", "datetime": now}
+                if self._buffer_tool_call_end(tc, now):
+                    return ""  # suppress — will emit combined event on tool_execution_end
+                # No toolCallId — fallback to original format
+                self.message_counter += 1
+                header = {"type": "toolcall_end", "datetime": now, "counter": f"#{self.message_counter}"}
                 if isinstance(tc, dict):
                     header["tool"] = tc.get("name", "")
                     args = tc.get("arguments", {})
                     if isinstance(args, dict):
                         cmd = args.get("command", "")
-                        if cmd:
-                            header["command"] = cmd
+                        if isinstance(cmd, str) and cmd:
+                            header["command"] = self._sanitize_tool_argument_value(cmd)
                         else:
-                            args_str = json.dumps(args, ensure_ascii=False)
-                            header["args"] = args_str[:200] + "..." if len(args_str) > 200 else args
-                return json.dumps(header, ensure_ascii=False) + "\n"
+                            header["args"] = self._sanitize_tool_argument_value(args)
+                    elif isinstance(args, str) and args.strip():
+                        header["args"] = self._sanitize_tool_argument_value(args)
+                return self._format_tool_invocation_header(header) + "\n"
             # Suppress all other message_update subtypes (toolcall_start, toolcall_delta, etc.)
             return ""
@@ -1314,69 +1733,224 @@ Model shorthands:
         if event_type in ("message_start", "message_end"):
             return ""
-        # tool_execution_start
+        # tool_execution_start: always suppress, buffer args
         if event_type == "tool_execution_start":
-            header = {
-                "type": "tool_execution_start",
-                "datetime": now,
-                "tool": parsed.get("toolName", ""),
-            }
-            args_val = parsed.get("args")
-            if isinstance(args_val, dict):
-                args_str = json.dumps(args_val, ensure_ascii=False)
-                if len(args_str) > 200:
-                    header["args"] = args_str[:200] + "..."
-                else:
-                    header["args"] = args_val
-            return json.dumps(header, ensure_ascii=False) + "\n"
+            self._buffer_exec_start(parsed)
+            self._in_tool_execution = True
+            return ""  # suppress
-        # tool_execution_end
+        # tool_execution_end: combine with buffered data
         if event_type == "tool_execution_end":
+            self._in_tool_execution = False
+            tool_call_id = parsed.get("toolCallId")
+            pending_tool = self._pending_tool_calls.pop(tool_call_id, None) if tool_call_id else None
+            pending_exec = self._pending_exec_starts.pop(tool_call_id, None) if tool_call_id else None
+            if pending_tool and pending_exec and "started_at" in pending_exec:
+                pending_tool["started_at"] = pending_exec["started_at"]
+            pending = pending_tool or pending_exec
+            if pending:
+                return self._build_combined_tool_event(pending, parsed, now) + "\n"
+            # No buffered data — minimal fallback
+            self.message_counter += 1
             header = {
-                "type": "tool_execution_end",
+                "type": "tool",
                 "datetime": now,
+                "counter": f"#{self.message_counter}",
                 "tool": parsed.get("toolName", ""),
             }
+            execution_time = self._format_execution_time(parsed)
+            if execution_time:
+                header["execution_time"] = execution_time
             is_error = parsed.get("isError", False)
             if is_error:
                 header["isError"] = True
             result_val = parsed.get("result")
+            colorize_error = self._color_enabled() and bool(is_error)
             if isinstance(result_val, str) and result_val.strip():
                 truncated = self._truncate_tool_result_text(result_val)
-                if "\n" in truncated:
-                    return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated + "\n"
+                if "\n" in truncated or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(truncated, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
                 header["result"] = truncated
-            elif isinstance(result_val, dict):
+                return self._format_tool_invocation_header(header) + "\n"
+            if isinstance(result_val, dict):
                 result_content = result_val.get("content")
                 if isinstance(result_content, list):
                     for rc_item in result_content:
                         if isinstance(rc_item, dict) and rc_item.get("type") == "text":
                             text = rc_item.get("text", "")
                             truncated = self._truncate_tool_result_text(text)
-                            if "\n" in truncated:
-                                return json.dumps(header, ensure_ascii=False) + "\nresult:\n" + truncated + "\n"
+                            if "\n" in truncated or colorize_error:
+                                label = "result:"
+                                colored = self._colorize_result(truncated, is_error=bool(is_error))
+                                if colorize_error:
+                                    label = self._colorize_result(label, is_error=True)
+                                return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
                             header["result"] = truncated
-                            break
-            return json.dumps(header, ensure_ascii=False) + "\n"
+                            return self._format_tool_invocation_header(header) + "\n"
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header) + "\n"
+            if isinstance(result_val, list):
+                result_json = self._strip_ansi_sequences(json.dumps(result_val, ensure_ascii=False))
+                if "\n" in result_json or colorize_error:
+                    label = "result:"
+                    colored = self._colorize_result(result_json, is_error=bool(is_error))
+                    if colorize_error:
+                        label = self._colorize_result(label, is_error=True)
+                    return self._format_tool_invocation_header(header) + "\n" + label + "\n" + colored + "\n"
+                header["result"] = result_json
+                return self._format_tool_invocation_header(header) + "\n"
+            return self._format_tool_invocation_header(header) + "\n"
         # turn_end: metadata only
         if event_type == "turn_end":
-            header = {"type": "turn_end", "datetime": now}
+            self.message_counter += 1
+            header = {"type": "turn_end", "datetime": now, "counter": f"#{self.message_counter}"}
             tool_results = parsed.get("toolResults")
             if isinstance(tool_results, list):
                 header["tool_results_count"] = len(tool_results)
             return json.dumps(header, ensure_ascii=False) + "\n"
-        # agent_start, turn_start
-        if event_type in ("agent_start", "turn_start"):
+        # turn_start: suppress (no user-visible value)
+        if event_type == "turn_start":
+            return ""
+        # agent_start (no counter — only *_end events get counters)
+        if event_type == "agent_start":
             return json.dumps({"type": event_type, "datetime": now}) + "\n"
         # agent_end
         if event_type == "agent_end":
-            header = {"type": "agent_end", "datetime": now}
+            self.message_counter += 1
+            header = {"type": "agent_end", "datetime": now, "counter": f"#{self.message_counter}"}
             messages = parsed.get("messages")
             if isinstance(messages, list):
                 header["message_count"] = len(messages)
+            total_cost_usd = self._extract_total_cost_usd(parsed)
+            if total_cost_usd is not None:
+                header["total_cost_usd"] = total_cost_usd
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        # --- Role-based messages (Pi-wrapped Codex messages) ---
+        role = parsed.get("role", "")
+        if role == "toolResult":
+            self.message_counter += 1
+            header = {
+                "type": "toolResult",
+                "datetime": now,
+                "counter": f"#{self.message_counter}",
+                "toolName": parsed.get("toolName", ""),
+            }
+            is_error = parsed.get("isError", False)
+            if is_error:
+                header["isError"] = True
+            content = parsed.get("content")
+            if isinstance(content, list):
+                for item in content:
+                    if isinstance(item, dict) and item.get("type") == "text":
+                        text_val = item.get("text", "")
+                        truncated = self._truncate_tool_result_text(text_val)
+                        use_color = self._color_enabled()
+                        if "\n" in truncated or use_color:
+                            colored = self._colorize_result(truncated, is_error=bool(is_error))
+                            label = self._colorize_result("content:", is_error=bool(is_error))
+                            return json.dumps(header, ensure_ascii=False) + "\n" + label + "\n" + colored + "\n"
+                        header["content"] = truncated
+                        return json.dumps(header, ensure_ascii=False) + "\n"
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if role == "assistant":
+            self.message_counter += 1
+            content = parsed.get("content")
+            if isinstance(content, list):
+                self._strip_thinking_signature(content)
+            header = {"type": "assistant", "datetime": now, "counter": f"#{self.message_counter}"}
+            text_parts = []
+            if isinstance(content, list):
+                for item in content:
+                    if isinstance(item, dict):
+                        if item.get("type") == "text":
+                            text_parts.append(item.get("text", ""))
+                        elif item.get("type") == "thinking":
+                            text_parts.append(f"[thinking] {item.get('thinking', '')}")
+                        elif item.get("type") == "toolCall":
+                            name = item.get("name", "")
+                            args = item.get("arguments", {})
+                            cmd = args.get("command", "") if isinstance(args, dict) else ""
+                            text_parts.append(f"[toolCall] {name}: {cmd}" if cmd else f"[toolCall] {name}")
+            if text_parts:
+                combined = "\n".join(text_parts)
+                if "\n" in combined:
+                    return json.dumps(header, ensure_ascii=False) + "\n" + combined + "\n"
+                header["content"] = combined
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if role:
+            # Other roles — minimal JSON header
+            self.message_counter += 1
+            return json.dumps({"type": role, "datetime": now, "counter": f"#{self.message_counter}"}, ensure_ascii=False) + "\n"
+        # --- Native Codex events (agent_reasoning, agent_message, exec_command_end, etc.) ---
+        msg_type, payload, outer_type = self._normalize_codex_event(parsed)
+        if msg_type in ("agent_reasoning", "reasoning"):
+            self.message_counter += 1
+            content = self._extract_reasoning_text(payload)
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in content:
+                return json.dumps(header, ensure_ascii=False) + "\ntext:\n" + content + "\n"
+            if content:
+                header["text"] = content
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if msg_type in ("agent_message", "assistant_message"):
+            self.message_counter += 1
+            content = self._extract_message_text_codex(payload)
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in content:
+                return json.dumps(header, ensure_ascii=False) + "\nmessage:\n" + content + "\n"
+            if content:
+                header["message"] = content
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if msg_type == "exec_command_end":
+            self.message_counter += 1
+            formatted_output = payload.get("formatted_output", "") if isinstance(payload, dict) else ""
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in formatted_output:
+                return json.dumps(header, ensure_ascii=False) + "\nformatted_output:\n" + formatted_output + "\n"
+            if formatted_output:
+                header["formatted_output"] = formatted_output
+            return json.dumps(header, ensure_ascii=False) + "\n"
+        if msg_type == "command_execution":
+            self.message_counter += 1
+            aggregated_output = self._extract_command_output_text(payload)
+            header = {"type": msg_type, "datetime": now, "counter": f"#{self.message_counter}"}
+            if "\n" in aggregated_output:
+                return json.dumps(header, ensure_ascii=False) + "\naggregated_output:\n" + aggregated_output + "\n"
+            if aggregated_output:
+                header["aggregated_output"] = aggregated_output
             return json.dumps(header, ensure_ascii=False) + "\n"
         # Fallback: not handled
@@ -1392,23 +1966,778 @@ Model shorthands:
                 hide_types.update(parts)
         return hide_types
+    @staticmethod
+    def _toolcall_end_delay_seconds() -> float:
+        """Return delay for fallback toolcall_end visibility (default 3s)."""
+        raw = os.environ.get("PI_TOOLCALL_END_DELAY_SECONDS", "3")
+        try:
+            delay = float(raw)
+        except (TypeError, ValueError):
+            delay = 3.0
+        return max(0.0, delay)
     @staticmethod
     def _sanitize_sub_agent_response(event: dict) -> dict:
         """Strip bulky fields (messages, type) from sub_agent_response to reduce token usage."""
         return {k: v for k, v in event.items() if k not in ("messages", "type")}
+    def _reset_run_cost_tracking(self) -> None:
+        """Reset per-run usage/cost accumulation state."""
+        self._run_usage_totals = None
+        self._run_total_cost_usd = None
+        self._run_seen_usage_keys.clear()
+    @staticmethod
+    def _is_numeric_value(value: object) -> bool:
+        """True for int/float values (excluding bool)."""
+        return isinstance(value, (int, float)) and not isinstance(value, bool)
+    @staticmethod
+    def _normalize_usage_payload(usage: dict) -> Optional[dict]:
+        """Normalize usage payload into numeric totals for accumulation."""
+        if not isinstance(usage, dict):
+            return None
+        usage_cost = usage.get("cost")
+        cost_payload = usage_cost if isinstance(usage_cost, dict) else {}
+        input_tokens = float(usage.get("input")) if PiService._is_numeric_value(usage.get("input")) else 0.0
+        output_tokens = float(usage.get("output")) if PiService._is_numeric_value(usage.get("output")) else 0.0
+        cache_read_tokens = float(usage.get("cacheRead")) if PiService._is_numeric_value(usage.get("cacheRead")) else 0.0
+        cache_write_tokens = float(usage.get("cacheWrite")) if PiService._is_numeric_value(usage.get("cacheWrite")) else 0.0
+        total_tokens_raw = usage.get("totalTokens")
+        total_tokens = (
+            float(total_tokens_raw)
+            if PiService._is_numeric_value(total_tokens_raw)
+            else input_tokens + output_tokens + cache_read_tokens + cache_write_tokens
+        )
+        cost_input = float(cost_payload.get("input")) if PiService._is_numeric_value(cost_payload.get("input")) else 0.0
+        cost_output = float(cost_payload.get("output")) if PiService._is_numeric_value(cost_payload.get("output")) else 0.0
+        cost_cache_read = (
+            float(cost_payload.get("cacheRead")) if PiService._is_numeric_value(cost_payload.get("cacheRead")) else 0.0
+        )
+        cost_cache_write = (
+            float(cost_payload.get("cacheWrite")) if PiService._is_numeric_value(cost_payload.get("cacheWrite")) else 0.0
+        )
+        cost_total_raw = cost_payload.get("total")
+        cost_total = (
+            float(cost_total_raw)
+            if PiService._is_numeric_value(cost_total_raw)
+            else cost_input + cost_output + cost_cache_read + cost_cache_write
+        )
+        has_any_value = any(
+            PiService._is_numeric_value(v)
+            for v in (
+                usage.get("input"),
+                usage.get("output"),
+                usage.get("cacheRead"),
+                usage.get("cacheWrite"),
+                usage.get("totalTokens"),
+                cost_payload.get("input"),
+                cost_payload.get("output"),
+                cost_payload.get("cacheRead"),
+                cost_payload.get("cacheWrite"),
+                cost_payload.get("total"),
+            )
+        )
+        if not has_any_value:
+            return None
+        return {
+            "input": input_tokens,
+            "output": output_tokens,
+            "cacheRead": cache_read_tokens,
+            "cacheWrite": cache_write_tokens,
+            "totalTokens": total_tokens,
+            "cost": {
+                "input": cost_input,
+                "output": cost_output,
+                "cacheRead": cost_cache_read,
+                "cacheWrite": cost_cache_write,
+                "total": cost_total,
+            },
+        }
+    @staticmethod
+    def _merge_usage_payloads(base: Optional[dict], delta: Optional[dict]) -> Optional[dict]:
+        """Merge normalized usage payloads by summing token/cost fields."""
+        if not isinstance(base, dict):
+            return delta
+        if not isinstance(delta, dict):
+            return base
+        base_cost = base.get("cost") if isinstance(base.get("cost"), dict) else {}
+        delta_cost = delta.get("cost") if isinstance(delta.get("cost"), dict) else {}
+        return {
+            "input": float(base.get("input", 0.0)) + float(delta.get("input", 0.0)),
+            "output": float(base.get("output", 0.0)) + float(delta.get("output", 0.0)),
+            "cacheRead": float(base.get("cacheRead", 0.0)) + float(delta.get("cacheRead", 0.0)),
+            "cacheWrite": float(base.get("cacheWrite", 0.0)) + float(delta.get("cacheWrite", 0.0)),
+            "totalTokens": float(base.get("totalTokens", 0.0)) + float(delta.get("totalTokens", 0.0)),
+            "cost": {
+                "input": float(base_cost.get("input", 0.0)) + float(delta_cost.get("input", 0.0)),
+                "output": float(base_cost.get("output", 0.0)) + float(delta_cost.get("output", 0.0)),
+                "cacheRead": float(base_cost.get("cacheRead", 0.0)) + float(delta_cost.get("cacheRead", 0.0)),
+                "cacheWrite": float(base_cost.get("cacheWrite", 0.0)) + float(delta_cost.get("cacheWrite", 0.0)),
+                "total": float(base_cost.get("total", 0.0)) + float(delta_cost.get("total", 0.0)),
+            },
+        }
+    @staticmethod
+    def _aggregate_assistant_usages(messages: list) -> Optional[dict]:
+        """Aggregate assistant usage payloads from an event messages array."""
+        if not isinstance(messages, list):
+            return None
+        assistant_usages: List[dict] = []
+        for msg in messages:
+            if isinstance(msg, dict) and msg.get("role") == "assistant":
+                usage = msg.get("usage")
+                if isinstance(usage, dict):
+                    assistant_usages.append(usage)
+        if not assistant_usages:
+            return None
+        if len(assistant_usages) == 1:
+            return assistant_usages[0]
+        totals: Optional[dict] = None
+        for usage in assistant_usages:
+            normalized = PiService._normalize_usage_payload(usage)
+            totals = PiService._merge_usage_payloads(totals, normalized)
+        return totals
+    def _assistant_usage_dedupe_key(self, message: dict, usage: dict) -> Optional[str]:
+        """Build a stable dedupe key for assistant usage seen across message/turn_end events."""
+        if not isinstance(message, dict) or not isinstance(usage, dict):
+            return None
+        for id_key in ("id", "messageId", "message_id"):
+            value = message.get(id_key)
+            if isinstance(value, str) and value.strip():
+                return f"id:{value.strip()}"
+        timestamp = message.get("timestamp")
+        if self._is_numeric_value(timestamp):
+            return f"ts:{int(float(timestamp))}"
+        if isinstance(timestamp, str) and timestamp.strip():
+            return f"ts:{timestamp.strip()}"
+        usage_cost = usage.get("cost") if isinstance(usage.get("cost"), dict) else {}
+        signature: Dict[str, object] = {
+            "stopReason": message.get("stopReason") if isinstance(message.get("stopReason"), str) else "",
+            "input": usage.get("input", 0.0),
+            "output": usage.get("output", 0.0),
+            "cacheRead": usage.get("cacheRead", 0.0),
+            "cacheWrite": usage.get("cacheWrite", 0.0),
+            "totalTokens": usage.get("totalTokens", 0.0),
+            "costTotal": usage_cost.get("total", 0.0),
+        }
+        text = self._extract_text_from_message(message)
+        if text:
+            signature["text"] = text[:120]
+        return "sig:" + json.dumps(signature, sort_keys=True, ensure_ascii=False)
+    def _track_assistant_usage_from_event(self, event: dict) -> None:
+        """Accumulate per-run assistant usage from stream events."""
+        if not isinstance(event, dict):
+            return
+        event_type = event.get("type")
+        if event_type not in ("message", "message_end", "turn_end"):
+            return
+        message = event.get("message")
+        if not isinstance(message, dict) or message.get("role") != "assistant":
+            return
+        normalized_usage = self._normalize_usage_payload(message.get("usage"))
+        if not isinstance(normalized_usage, dict):
+            return
+        usage_key = self._assistant_usage_dedupe_key(message, normalized_usage)
+        if usage_key and usage_key in self._run_seen_usage_keys:
+            return
+        if usage_key:
+            self._run_seen_usage_keys.add(usage_key)
+        self._run_usage_totals = self._merge_usage_payloads(self._run_usage_totals, normalized_usage)
+        self._run_total_cost_usd = self._extract_total_cost_usd(
+            {"usage": self._run_usage_totals},
+            self._run_usage_totals,
+        )
+    def _get_accumulated_total_cost_usd(self) -> Optional[float]:
+        """Return accumulated per-run total cost when available."""
+        if self._is_numeric_value(self._run_total_cost_usd):
+            return float(self._run_total_cost_usd)
+        if isinstance(self._run_usage_totals, dict):
+            return self._extract_total_cost_usd({"usage": self._run_usage_totals}, self._run_usage_totals)
+        return None
+    @staticmethod
+    def _extract_usage_from_event(event: dict) -> Optional[dict]:
+        """Extract usage payload from Pi event shapes (event/message/messages)."""
+        if not isinstance(event, dict):
+            return None
+        messages = event.get("messages")
+        if event.get("type") == "agent_end" and isinstance(messages, list):
+            aggregated = PiService._aggregate_assistant_usages(messages)
+            if isinstance(aggregated, dict):
+                return aggregated
+        direct_usage = event.get("usage")
+        if isinstance(direct_usage, dict):
+            return direct_usage
+        message = event.get("message")
+        if isinstance(message, dict):
+            message_usage = message.get("usage")
+            if isinstance(message_usage, dict):
+                return message_usage
+        if isinstance(messages, list):
+            aggregated = PiService._aggregate_assistant_usages(messages)
+            if isinstance(aggregated, dict):
+                return aggregated
+        return None
+    @staticmethod
+    def _extract_total_cost_usd(event: dict, usage: Optional[dict] = None) -> Optional[float]:
+        """Extract total USD cost from explicit fields or usage.cost.total."""
+        if not isinstance(event, dict):
+            return None
+        for key in ("total_cost_usd", "totalCostUsd", "totalCostUSD"):
+            value = event.get(key)
+            if PiService._is_numeric_value(value):
+                return float(value)
+        direct_cost = event.get("cost")
+        if PiService._is_numeric_value(direct_cost):
+            return float(direct_cost)
+        if isinstance(direct_cost, dict):
+            total = direct_cost.get("total")
+            if PiService._is_numeric_value(total):
+                return float(total)
+        usage_payload = usage if isinstance(usage, dict) else None
+        if usage_payload is None:
+            usage_payload = PiService._extract_usage_from_event(event)
+        if isinstance(usage_payload, dict):
+            usage_cost = usage_payload.get("cost")
+            if isinstance(usage_cost, dict):
+                total = usage_cost.get("total")
+                if PiService._is_numeric_value(total):
+                    return float(total)
+        return None
+    @staticmethod
+    def _is_error_result_event(event: Optional[dict]) -> bool:
+        """Return True when event represents a terminal error payload."""
+        if not isinstance(event, dict):
+            return False
+        if event.get("is_error") is True:
+            return True
+        subtype = event.get("subtype")
+        if isinstance(subtype, str) and subtype.lower() == "error":
+            return True
+        event_type = event.get("type")
+        if isinstance(event_type, str) and event_type.lower() in {"error", "turn.failed", "turn_failed"}:
+            return True
+        return False
+    @staticmethod
+    def _is_success_result_event(event: Optional[dict]) -> bool:
+        """Return True when event is an explicit successful result envelope."""
+        if not isinstance(event, dict):
+            return False
+        if PiService._is_error_result_event(event):
+            return False
+        subtype = event.get("subtype")
+        if isinstance(subtype, str) and subtype.lower() == "success":
+            return True
+        event_type = event.get("type")
+        if isinstance(event_type, str) and event_type.lower() == "result" and event.get("is_error") is False:
+            result_value = event.get("result")
+            if isinstance(result_value, str):
+                return bool(result_value.strip())
+            if result_value not in (None, "", [], {}):
+                return True
+        return False
+    @staticmethod
+    def _extract_error_message_from_event(event: dict) -> Optional[str]:
+        """Extract a human-readable message from Pi/Codex error event shapes."""
+        if not isinstance(event, dict):
+            return None
+        if not PiService._is_error_result_event(event):
+            return None
+        def _stringify_error(value: object) -> Optional[str]:
+            if isinstance(value, str):
+                text = value.strip()
+                return text if text else None
+            if isinstance(value, dict):
+                nested_message = value.get("message")
+                if isinstance(nested_message, str) and nested_message.strip():
+                    return nested_message.strip()
+                nested_error = value.get("error")
+                if isinstance(nested_error, str) and nested_error.strip():
+                    return nested_error.strip()
+                try:
+                    return json.dumps(value, ensure_ascii=False)
+                except Exception:
+                    return str(value)
+            if value is not None:
+                return str(value)
+            return None
+        for key in ("error", "message", "errorMessage", "result"):
+            extracted = _stringify_error(event.get(key))
+            if extracted:
+                return extracted
+        return "Unknown Pi error"
+    @staticmethod
+    def _extract_error_message_from_text(raw_text: str) -> Optional[str]:
+        """Extract an error message from stderr/plaintext lines."""
+        if not isinstance(raw_text, str):
+            return None
+        text = raw_text.strip()
+        if not text:
+            return None
+        # Direct JSON line
+        try:
+            parsed = json.loads(text)
+            extracted = PiService._extract_error_message_from_event(parsed)
+            if extracted:
+                return extracted
+        except Exception:
+            pass
+        # Prefix + JSON payload pattern (e.g. "Error: Codex error: {...}")
+        json_start = text.find("{")
+        if json_start > 0:
+            json_candidate = text[json_start:]
+            try:
+                parsed = json.loads(json_candidate)
+                extracted = PiService._extract_error_message_from_event(parsed)
+                if extracted:
+                    return extracted
+            except Exception:
+                pass
+        lowered = text.lower()
+        if lowered.startswith("error:"):
+            message = text.split(":", 1)[1].strip()
+            return message or text
+        if "server_error" in lowered or "codex error" in lowered:
+            return text
+        return None
+    @staticmethod
+    def _extract_provider_error_from_result_text(result_text: str) -> Optional[str]:
+        """Detect provider-level failures that leaked into assistant result text."""
+        if not isinstance(result_text, str):
+            return None
+        text = result_text.strip()
+        if not text:
+            return None
+        normalized = " ".join(text.split())
+        lowered = normalized.lower()
+        provider_signatures = (
+            "chatgpt usage limit",
+            "usage limit",
+            "rate limit",
+            "insufficient_quota",
+            "too many requests",
+            "codex error",
+            "server_error",
+        )
+        if lowered.startswith("error:"):
+            payload = normalized.split(":", 1)[1].strip() if ":" in normalized else ""
+            if any(signature in lowered for signature in provider_signatures) or "try again in" in lowered:
+                return payload or normalized
+        if any(signature in lowered for signature in provider_signatures):
+            return normalized
+        return None
+    def _build_success_result_event(self, text: str, event: dict) -> dict:
+        """Build standardized success envelope for shell-backend capture."""
+        usage = self._extract_usage_from_event(event)
+        if isinstance(self._run_usage_totals, dict):
+            usage = self._run_usage_totals
+        total_cost_usd = self._extract_total_cost_usd(event, usage)
+        accumulated_total_cost = self._get_accumulated_total_cost_usd()
+        if accumulated_total_cost is not None:
+            total_cost_usd = accumulated_total_cost
+        result_event: Dict = {
+            "type": "result",
+            "subtype": "success",
+            "is_error": False,
+            "result": text,
+            "session_id": self.session_id,
+            "sub_agent_response": self._sanitize_sub_agent_response(event),
+        }
+        if isinstance(usage, dict):
+            result_event["usage"] = usage
+        if total_cost_usd is not None:
+            result_event["total_cost_usd"] = total_cost_usd
+        return result_event
+    def _build_error_result_event(self, error_message: str, event: Optional[dict] = None) -> dict:
+        """Build standardized error envelope for shell-backend capture."""
+        message = error_message.strip() if isinstance(error_message, str) else str(error_message)
+        result_event: Dict = {
+            "type": "result",
+            "subtype": "error",
+            "is_error": True,
+            "result": message,
+            "error": message,
+            "session_id": self.session_id,
+        }
+        if isinstance(event, dict):
+            result_event["sub_agent_response"] = self._sanitize_sub_agent_response(event)
+        return result_event
     def _write_capture_file(self, capture_path: Optional[str]) -> None:
         """Write final result event to capture file for shell backend."""
         if not capture_path or not self.last_result_event:
             return
+        payload = dict(self.last_result_event)
+        if not payload.get("session_id"):
+            existing_capture: Optional[dict] = None
+            try:
+                capture_file = Path(capture_path)
+                if capture_file.exists():
+                    raw_existing = capture_file.read_text(encoding="utf-8").strip()
+                    if raw_existing:
+                        parsed_existing = json.loads(raw_existing)
+                        if isinstance(parsed_existing, dict):
+                            existing_capture = parsed_existing
+            except Exception:
+                existing_capture = None
+            existing_session_id: Optional[str] = None
+            if isinstance(existing_capture, dict):
+                candidate = existing_capture.get("session_id")
+                if isinstance(candidate, str) and candidate.strip():
+                    existing_session_id = candidate.strip()
+                elif isinstance(existing_capture.get("sub_agent_response"), dict):
+                    nested = existing_capture["sub_agent_response"].get("session_id")
+                    if isinstance(nested, str) and nested.strip():
+                        existing_session_id = nested.strip()
+            if existing_session_id:
+                payload["session_id"] = existing_session_id
+                if not self.session_id:
+                    self.session_id = existing_session_id
+        self.last_result_event = payload
         try:
             Path(capture_path).write_text(
-                json.dumps(self.last_result_event, ensure_ascii=False),
+                json.dumps(payload, ensure_ascii=False),
                 encoding="utf-8",
             )
         except Exception as e:
             print(f"Warning: Could not write capture file: {e}", file=sys.stderr)
+    def _build_live_auto_exit_extension_source(self, capture_path: Optional[str]) -> str:
+        """Build a temporary Pi extension source used by --live mode.
+        The extension listens for agent_end, writes a compact result envelope to
+        JUNO_SUBAGENT_CAPTURE_PATH-compatible location, then requests
+        graceful shutdown via ctx.shutdown().
+        """
+        capture_literal = json.dumps(capture_path or "")
+        source = """import type { ExtensionAPI } from \"@mariozechner/pi-coding-agent\";
+import * as fs from \"node:fs\";
+const capturePath = __CAPTURE_PATH__;
+function extractTextFromMessages(messages: any[]): string {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (!msg || msg.role !== \"assistant\") continue;
+    const content = msg.content;
+    if (typeof content === \"string\") {
+      if (content.trim()) return content;
+      continue;
+    }
+    if (Array.isArray(content)) {
+      const parts: string[] = [];
+      for (const item of content) {
+        if (typeof item === \"string\" && item.trim()) {
+          parts.push(item);
+          continue;
+        }
+        if (item && item.type === \"text\" && typeof item.text === \"string\" && item.text.trim()) {
+          parts.push(item.text);
+        }
+      }
+      if (parts.length > 0) return parts.join(\"\\n\");
+    }
+  }
+  return \"\";
+}
+function isFiniteNumber(value: any): value is number {
+  return typeof value === \"number\" && Number.isFinite(value);
+}
+function normalizeUsage(usage: any): any | undefined {
+  if (!usage || typeof usage !== \"object\") return undefined;
+  const cost = usage.cost && typeof usage.cost === \"object\" ? usage.cost : {};
+  const input = isFiniteNumber(usage.input) ? usage.input : 0;
+  const output = isFiniteNumber(usage.output) ? usage.output : 0;
+  const cacheRead = isFiniteNumber(usage.cacheRead) ? usage.cacheRead : 0;
+  const cacheWrite = isFiniteNumber(usage.cacheWrite) ? usage.cacheWrite : 0;
+  const totalTokens = isFiniteNumber(usage.totalTokens)
+    ? usage.totalTokens
+    : input + output + cacheRead + cacheWrite;
+  const costInput = isFiniteNumber(cost.input) ? cost.input : 0;
+  const costOutput = isFiniteNumber(cost.output) ? cost.output : 0;
+  const costCacheRead = isFiniteNumber(cost.cacheRead) ? cost.cacheRead : 0;
+  const costCacheWrite = isFiniteNumber(cost.cacheWrite) ? cost.cacheWrite : 0;
+  const costTotal = isFiniteNumber(cost.total)
+    ? cost.total
+    : costInput + costOutput + costCacheRead + costCacheWrite;
+  const hasAnyValue =
+    isFiniteNumber(usage.input) ||
+    isFiniteNumber(usage.output) ||
+    isFiniteNumber(usage.cacheRead) ||
+    isFiniteNumber(usage.cacheWrite) ||
+    isFiniteNumber(usage.totalTokens) ||
+    isFiniteNumber(cost.input) ||
+    isFiniteNumber(cost.output) ||
+    isFiniteNumber(cost.cacheRead) ||
+    isFiniteNumber(cost.cacheWrite) ||
+    isFiniteNumber(cost.total);
+  if (!hasAnyValue) return undefined;
+  return {
+    input,
+    output,
+    cacheRead,
+    cacheWrite,
+    totalTokens,
+    cost: {
+      input: costInput,
+      output: costOutput,
+      cacheRead: costCacheRead,
+      cacheWrite: costCacheWrite,
+      total: costTotal,
+    },
+  };
+}
+function mergeUsage(base: any | undefined, delta: any | undefined): any | undefined {
+  if (!base) return delta;
+  if (!delta) return base;
+  const baseCost = base.cost && typeof base.cost === \"object\" ? base.cost : {};
+  const deltaCost = delta.cost && typeof delta.cost === \"object\" ? delta.cost : {};
+  return {
+    input: (base.input ?? 0) + (delta.input ?? 0),
+    output: (base.output ?? 0) + (delta.output ?? 0),
+    cacheRead: (base.cacheRead ?? 0) + (delta.cacheRead ?? 0),
+    cacheWrite: (base.cacheWrite ?? 0) + (delta.cacheWrite ?? 0),
+    totalTokens: (base.totalTokens ?? 0) + (delta.totalTokens ?? 0),
+    cost: {
+      input: (baseCost.input ?? 0) + (deltaCost.input ?? 0),
+      output: (baseCost.output ?? 0) + (deltaCost.output ?? 0),
+      cacheRead: (baseCost.cacheRead ?? 0) + (deltaCost.cacheRead ?? 0),
+      cacheWrite: (baseCost.cacheWrite ?? 0) + (deltaCost.cacheWrite ?? 0),
+      total: (baseCost.total ?? 0) + (deltaCost.total ?? 0),
+    },
+  };
+}
+function extractAssistantUsage(messages: any[]): any | undefined {
+  let totals: any | undefined;
+  for (const msg of messages) {
+    if (!msg || msg.role !== \"assistant\") {
+      continue;
+    }
+    const normalized = normalizeUsage(msg.usage);
+    if (!normalized) {
+      continue;
+    }
+    totals = mergeUsage(totals, normalized);
+  }
+  return totals;
+}
+function extractLatestAssistantStopReason(messages: any[]): string | undefined {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (!msg || msg.role !== \"assistant\") {
+      continue;
+    }
+    const reason = msg.stopReason;
+    return typeof reason === \"string\" && reason ? reason : undefined;
+  }
+  return undefined;
+}
+function writeCapturePayload(payload: any): void {
+  if (!capturePath) {
+    return;
+  }
+  fs.writeFileSync(capturePath, JSON.stringify(payload), \"utf-8\");
+}
+function persistSessionSnapshot(sessionId: unknown): void {
+  if (typeof sessionId !== \"string\" || !sessionId) {
+    return;
+  }
+  try {
+    writeCapturePayload({
+      type: \"result\",
+      subtype: \"session\",
+      is_error: false,
+      session_id: sessionId,
+    });
+  } catch {
+    // Non-fatal: runtime capture should continue even if snapshot write fails.
+  }
+}
+export default function (pi: ExtensionAPI) {
+  let completed = false;
+  pi.on(\"session\", (event, ctx) => {
+    const eventSessionId = typeof event?.id === \"string\" ? event.id : undefined;
+    const managerSessionId =
+      typeof ctx?.sessionManager?.getSessionId === \"function\"
+        ? ctx.sessionManager.getSessionId()
+        : undefined;
+    persistSessionSnapshot(managerSessionId || eventSessionId);
+  });
+  pi.on(\"agent_end\", async (event, ctx) => {
+    const messages = Array.isArray(event?.messages) ? event.messages : [];
+    const stopReason = extractLatestAssistantStopReason(messages);
+    // Esc-aborted runs should keep Pi open for user interaction.
+    if (stopReason === \"aborted\") {
+      return;
+    }
+    if (completed) return;
+    completed = true;
+    try {
+      const usage = extractAssistantUsage(messages);
+      const totalCost = typeof usage?.cost?.total === \"number\" ? usage.cost.total : undefined;
+      const sessionId =
+        typeof ctx?.sessionManager?.getSessionId === \"function\"
+          ? ctx.sessionManager.getSessionId()
+          : undefined;
+      const payload: any = {
+        type: \"result\",
+        subtype: \"success\",
+        is_error: false,
+        result: extractTextFromMessages(messages),
+        usage,
+        total_cost_usd: totalCost,
+        sub_agent_response: event,
+      };
+      if (typeof sessionId === \"string\" && sessionId) {
+        payload.session_id = sessionId;
+      }
+      writeCapturePayload(payload);
+    } catch {
+      // Keep shutdown behavior even when capture writing fails.
+    } finally {
+      ctx.shutdown();
+    }
+  });
+}
+"""
+        return source.replace("__CAPTURE_PATH__", capture_literal)
+    def _create_live_auto_exit_extension_file(self, capture_path: Optional[str]) -> Optional[Path]:
+        """Create a temporary live-mode extension file and return its path."""
+        try:
+            fd, temp_path = tempfile.mkstemp(prefix="juno-pi-live-auto-exit-", suffix=".ts")
+            with os.fdopen(fd, "w", encoding="utf-8") as handle:
+                handle.write(self._build_live_auto_exit_extension_source(capture_path))
+            return Path(temp_path)
+        except Exception as exc:
+            print(f"Warning: Failed to create live auto-exit extension: {exc}", file=sys.stderr)
+            return None
+    def _open_live_tty_stdin(self) -> Optional[TextIO]:
+        """Open /dev/tty for live-mode stdin fallback when stdin is redirected."""
+        try:
+            return open("/dev/tty", "r", encoding="utf-8", errors="ignore")
+        except OSError:
+            return None
     def run_pi(self, cmd: List[str], args: argparse.Namespace,
                stdin_prompt: Optional[str] = None) -> int:
         """Execute the Pi CLI and stream/format its JSON output.
@@ -1423,6 +2752,14 @@ Model shorthands:
         pretty = args.pretty.lower() != "false"
         capture_path = os.environ.get("JUNO_SUBAGENT_CAPTURE_PATH")
         hide_types = self._build_hide_types()
+        self._buffered_tool_stdout_lines.clear()
+        self._reset_run_cost_tracking()
+        cancel_delayed_toolcalls = lambda: None
+        stderr_error_messages: List[str] = []
+        resume_session = getattr(args, "resume", None)
+        if isinstance(resume_session, str) and resume_session.strip():
+            self.session_id = resume_session.strip()
         if verbose:
             # Truncate prompt in display to avoid confusing multi-line output
@@ -1454,7 +2791,80 @@ Model shorthands:
                 print(f"Executing: {' '.join(display_cmd)}", file=sys.stderr)
                 print("-" * 80, file=sys.stderr)
+        process: Optional[subprocess.Popen] = None
+        live_mode_requested = bool(getattr(args, "live", False))
+        stdin_has_tty = (
+            hasattr(sys.stdin, "isatty")
+            and sys.stdin.isatty()
+        )
+        stdout_has_tty = (
+            hasattr(sys.stdout, "isatty")
+            and sys.stdout.isatty()
+        )
+        live_tty_stdin: Optional[TextIO] = None
+        if live_mode_requested and stdout_has_tty and not stdin_has_tty:
+            live_tty_stdin = self._open_live_tty_stdin()
+        is_live_tty_passthrough = (
+            live_mode_requested
+            and stdout_has_tty
+            and (stdin_has_tty or live_tty_stdin is not None)
+        )
         try:
+            if is_live_tty_passthrough:
+                # Interactive live mode: attach Pi directly to the current terminal.
+                # Keep stdout inherited for full-screen TUI rendering/input, but
+                # capture stderr so terminal provider errors can still propagate.
+                popen_kwargs = {
+                    "cwd": self.project_path,
+                    "stderr": subprocess.PIPE,
+                    "text": True,
+                    "universal_newlines": True,
+                }
+                if live_tty_stdin is not None:
+                    popen_kwargs["stdin"] = live_tty_stdin
+                try:
+                    process = subprocess.Popen(cmd, **popen_kwargs)
+                    def _live_tty_stderr_reader():
+                        """Read stderr during live TTY mode and capture terminal failures."""
+                        try:
+                            if process.stderr:
+                                for stderr_line in process.stderr:
+                                    print(stderr_line, end="", file=sys.stderr, flush=True)
+                                    extracted_error = self._extract_error_message_from_text(stderr_line)
+                                    if extracted_error:
+                                        stderr_error_messages.append(extracted_error)
+                                        if not self._is_success_result_event(self.last_result_event):
+                                            self.last_result_event = self._build_error_result_event(extracted_error)
+                        except (ValueError, OSError):
+                            pass
+                    stderr_thread = threading.Thread(target=_live_tty_stderr_reader, daemon=True)
+                    stderr_thread.start()
+                    process.wait()
+                    stderr_thread.join(timeout=3)
+                    if stderr_error_messages and not self._is_success_result_event(self.last_result_event):
+                        self.last_result_event = self._build_error_result_event(stderr_error_messages[-1])
+                    self._write_capture_file(capture_path)
+                    final_return_code = process.returncode or 0
+                    if final_return_code == 0 and self._is_error_result_event(self.last_result_event):
+                        final_return_code = 1
+                    return final_return_code
+                finally:
+                    if live_tty_stdin is not None:
+                        try:
+                            live_tty_stdin.close()
+                        except OSError:
+                            pass
             process = subprocess.Popen(
                 cmd,
                 stdin=subprocess.PIPE if stdin_prompt else subprocess.DEVNULL,
@@ -1517,18 +2927,304 @@ Model shorthands:
             # Stream stderr in a separate thread so Pi diagnostic output is visible
             def _stderr_reader():
-                """Read stderr and forward to our stderr for visibility."""
+                """Read stderr, forward to stderr, and capture terminal error signals."""
                 try:
                     if process.stderr:
                         for stderr_line in process.stderr:
                             print(stderr_line, end="", file=sys.stderr, flush=True)
+                            extracted_error = self._extract_error_message_from_text(stderr_line)
+                            if extracted_error:
+                                stderr_error_messages.append(extracted_error)
+                                if not self._is_success_result_event(self.last_result_event):
+                                    self.last_result_event = self._build_error_result_event(extracted_error)
                 except (ValueError, OSError):
                     pass
             stderr_thread = threading.Thread(target=_stderr_reader, daemon=True)
             stderr_thread.start()
+            cancel_delayed_toolcalls = lambda: None
             if process.stdout:
+                pending_tool_execution_end: Optional[dict] = None
+                pending_turn_end_after_tool: Optional[dict] = None
+                toolcall_end_delay_seconds = self._toolcall_end_delay_seconds()
+                pending_delayed_toolcalls: Dict[int, dict] = {}
+                delayed_toolcalls_lock = threading.Lock()
+                delayed_toolcall_seq = 0
+                def _extract_fallback_toolcall_name(parsed_event: dict) -> Optional[str]:
+                    if parsed_event.get("type") != "message_update":
+                        return None
+                    assistant_event = parsed_event.get("assistantMessageEvent")
+                    if not isinstance(assistant_event, dict) or assistant_event.get("type") != "toolcall_end":
+                        return None
+                    tool_call = assistant_event.get("toolCall")
+                    if not isinstance(tool_call, dict):
+                        return None
+                    tool_call_id = tool_call.get("toolCallId")
+                    if isinstance(tool_call_id, str) and tool_call_id.strip():
+                        return None
+                    name = tool_call.get("name", "")
+                    return name if isinstance(name, str) else ""
+                def _format_deferred_toolcall(parsed_event: dict, mode: str) -> Optional[str]:
+                    if mode == self.PRETTIFIER_LIVE:
+                        return self._format_event_live(parsed_event)
+                    if mode == self.PRETTIFIER_CODEX:
+                        return self._format_pi_codex_event(parsed_event)
+                    if mode == self.PRETTIFIER_CLAUDE:
+                        return self._format_event_pretty_claude(parsed_event)
+                    return self._format_event_pretty(parsed_event)
+                def _emit_stdout(formatted: str, raw: bool = False) -> None:
+                    if raw:
+                        sys.stdout.write(formatted)
+                        sys.stdout.flush()
+                        return
+                    print(formatted, flush=True)
+                def _schedule_delayed_toolcall(parsed_event: dict, tool_name: str, mode: str) -> None:
+                    nonlocal delayed_toolcall_seq
+                    def _emit_delayed_toolcall(event_payload: dict, event_mode: str) -> None:
+                        formatted = _format_deferred_toolcall(event_payload, event_mode)
+                        if not formatted:
+                            return
+                        _emit_stdout(formatted, raw=event_mode == self.PRETTIFIER_LIVE)
+                    if toolcall_end_delay_seconds <= 0:
+                        _emit_delayed_toolcall(parsed_event, mode)
+                        return
+                    delayed_toolcall_seq += 1
+                    entry_id = delayed_toolcall_seq
+                    entry: Dict = {
+                        "id": entry_id,
+                        "tool": tool_name,
+                        "event": parsed_event,
+                        "mode": mode,
+                    }
+                    def _timer_emit() -> None:
+                        with delayed_toolcalls_lock:
+                            pending = pending_delayed_toolcalls.pop(entry_id, None)
+                        if not pending:
+                            return
+                        _emit_delayed_toolcall(pending["event"], pending["mode"])
+                    timer = threading.Timer(toolcall_end_delay_seconds, _timer_emit)
+                    timer.daemon = True
+                    entry["timer"] = timer
+                    with delayed_toolcalls_lock:
+                        pending_delayed_toolcalls[entry_id] = entry
+                    timer.start()
+                def _cancel_delayed_toolcall(tool_name: str) -> None:
+                    with delayed_toolcalls_lock:
+                        if not pending_delayed_toolcalls:
+                            return
+                        selected_id: Optional[int] = None
+                        if tool_name:
+                            for entry_id, entry in pending_delayed_toolcalls.items():
+                                if entry.get("tool") == tool_name:
+                                    selected_id = entry_id
+                                    break
+                        if selected_id is None:
+                            selected_id = min(pending_delayed_toolcalls.keys())
+                        pending = pending_delayed_toolcalls.pop(selected_id, None)
+                    if pending:
+                        timer = pending.get("timer")
+                        if timer:
+                            timer.cancel()
+                def _cancel_all_delayed_toolcalls() -> None:
+                    with delayed_toolcalls_lock:
+                        pending = list(pending_delayed_toolcalls.values())
+                        pending_delayed_toolcalls.clear()
+                    for entry in pending:
+                        timer = entry.get("timer")
+                        if timer:
+                            timer.cancel()
+                cancel_delayed_toolcalls = _cancel_all_delayed_toolcalls
+                def _emit_parsed_event(parsed_event: dict, raw_json_line: Optional[str] = None) -> None:
+                    event_type = parsed_event.get("type", "")
+                    # Capture session ID from the session event (sent at stream start)
+                    if event_type == "session":
+                        self.session_id = parsed_event.get("id")
+                        if (
+                            isinstance(self.last_result_event, dict)
+                            and not self.last_result_event.get("session_id")
+                            and isinstance(self.session_id, str)
+                            and self.session_id.strip()
+                        ):
+                            self.last_result_event["session_id"] = self.session_id
+                    # Capture terminal error events even when upstream exits with code 0.
+                    error_message = self._extract_error_message_from_event(parsed_event)
+                    if error_message:
+                        self.last_result_event = self._build_error_result_event(error_message, parsed_event)
+                    # Track per-run assistant usage from stream events.
+                    self._track_assistant_usage_from_event(parsed_event)
+                    # Ensure agent_end reflects cumulative per-run totals when available.
+                    if event_type == "agent_end":
+                        accumulated_total_cost = self._get_accumulated_total_cost_usd()
+                        if accumulated_total_cost is not None:
+                            parsed_event["total_cost_usd"] = accumulated_total_cost
+                        if isinstance(self._run_usage_totals, dict):
+                            parsed_event["usage"] = self._run_usage_totals
+                    # Capture result event for shell backend
+                    if event_type == "agent_end":
+                        # agent_end has a 'messages' array; extract final assistant text
+                        messages = parsed_event.get("messages", [])
+                        text = ""
+                        if isinstance(messages, list):
+                            # Walk messages in reverse to find last assistant message with text
+                            for m in reversed(messages):
+                                if isinstance(m, dict) and m.get("role") == "assistant":
+                                    text = self._extract_text_from_message(m)
+                                    if text:
+                                        break
+                        if text:
+                            provider_error = self._extract_provider_error_from_result_text(text)
+                            if provider_error:
+                                self.last_result_event = self._build_error_result_event(provider_error, parsed_event)
+                            else:
+                                self.last_result_event = self._build_success_result_event(text, parsed_event)
+                        elif not self._is_error_result_event(self.last_result_event):
+                            self.last_result_event = parsed_event
+                    elif event_type == "message":
+                        # OpenAI-compatible format: capture last assistant message
+                        msg = parsed_event.get("message", {})
+                        if isinstance(msg, dict) and msg.get("role") == "assistant":
+                            text = self._extract_text_from_message(msg)
+                            if text:
+                                provider_error = self._extract_provider_error_from_result_text(text)
+                                if provider_error:
+                                    self.last_result_event = self._build_error_result_event(provider_error, parsed_event)
+                                else:
+                                    self.last_result_event = self._build_success_result_event(text, parsed_event)
+                    elif event_type == "turn_end":
+                        # turn_end may contain the final assistant message
+                        msg = parsed_event.get("message", {})
+                        if isinstance(msg, dict):
+                            text = self._extract_text_from_message(msg)
+                            if text:
+                                provider_error = self._extract_provider_error_from_result_text(text)
+                                if provider_error:
+                                    self.last_result_event = self._build_error_result_event(provider_error, parsed_event)
+                                else:
+                                    self.last_result_event = self._build_success_result_event(text, parsed_event)
+                    # Filter hidden stream types (live mode handles its own filtering)
+                    if event_type in hide_types and self.prettifier_mode != self.PRETTIFIER_LIVE:
+                        return
+                    # Fallback toolcall_end events (without toolCallId) are delayed so
+                    # short tool executions only show the final combined tool event.
+                    if pretty:
+                        fallback_tool_name = _extract_fallback_toolcall_name(parsed_event)
+                        if fallback_tool_name is not None:
+                            _schedule_delayed_toolcall(parsed_event, fallback_tool_name, self.prettifier_mode)
+                            return
+                    # Live stream mode: stream deltas in real-time
+                    if self.prettifier_mode == self.PRETTIFIER_LIVE:
+                        if event_type in hide_types:
+                            # In live mode, still suppress session/compaction/retry events
+                            # but NOT message_start/message_end (handled by _format_event_live)
+                            if event_type not in ("message_start", "message_end"):
+                                return
+                        formatted_live = self._format_event_live(parsed_event)
+                        if formatted_live is not None:
+                            if formatted_live == "":
+                                return
+                            sys.stdout.write(formatted_live)
+                            sys.stdout.flush()
+                        else:
+                            # Fallback: print raw JSON for unhandled event types
+                            print(json.dumps(parsed_event, ensure_ascii=False), flush=True)
+                        return
+                    # Format and print using model-appropriate prettifier
+                    if pretty:
+                        if self.prettifier_mode == self.PRETTIFIER_CODEX:
+                            # Try Pi-wrapped Codex format first (role-based messages)
+                            if "role" in parsed_event:
+                                formatted = self._format_pi_codex_message(parsed_event)
+                            else:
+                                # Try Pi event handler (message_update, turn_end, etc.)
+                                formatted = self._format_pi_codex_event(parsed_event)
+                                if formatted is None:
+                                    # Try native Codex event handler
+                                    formatted = self._format_event_pretty_codex(parsed_event)
+                            if formatted is None:
+                                # Sanitize before raw JSON fallback: strip thinkingSignature,
+                                # encrypted_content, and metadata from nested Codex events.
+                                self._sanitize_codex_event(parsed_event, strip_metadata=True)
+                                formatted = json.dumps(parsed_event, ensure_ascii=False)
+                            elif formatted == "":
+                                return
+                        elif self.prettifier_mode == self.PRETTIFIER_CLAUDE:
+                            formatted = self._format_event_pretty_claude(parsed_event)
+                        else:
+                            formatted = self._format_event_pretty(parsed_event)
+                        if formatted is not None:
+                            print(formatted, flush=True)
+                    else:
+                        if raw_json_line is not None:
+                            print(raw_json_line, flush=True)
+                        else:
+                            print(json.dumps(parsed_event, ensure_ascii=False), flush=True)
+                def _merge_buffered_tool_stdout_into(event_payload: dict) -> None:
+                    buffered_text = "\n".join(self._buffered_tool_stdout_lines).strip()
+                    if not buffered_text:
+                        self._buffered_tool_stdout_lines.clear()
+                        return
+                    result_val = event_payload.get("result")
+                    if result_val in (None, "", [], {}):
+                        event_payload["result"] = buffered_text
+                    elif isinstance(result_val, str):
+                        existing = self._strip_ansi_sequences(result_val)
+                        if existing:
+                            if not existing.endswith("\n"):
+                                existing += "\n"
+                            event_payload["result"] = existing + buffered_text
+                        else:
+                            event_payload["result"] = buffered_text
+                    else:
+                        # Keep complex result structures untouched; print trailing raw lines
+                        # before the next structured event for stable transcript ordering.
+                        print(buffered_text, flush=True)
+                    self._buffered_tool_stdout_lines.clear()
+                def _flush_pending_tool_events() -> None:
+                    nonlocal pending_tool_execution_end, pending_turn_end_after_tool
+                    if pending_tool_execution_end is not None:
+                        _merge_buffered_tool_stdout_into(pending_tool_execution_end)
+                        _emit_parsed_event(pending_tool_execution_end)
+                        pending_tool_execution_end = None
+                    if pending_turn_end_after_tool is not None:
+                        if self._buffered_tool_stdout_lines:
+                            print("\n".join(self._buffered_tool_stdout_lines), flush=True)
+                            self._buffered_tool_stdout_lines.clear()
+                        _emit_parsed_event(pending_turn_end_after_tool)
+                        pending_turn_end_after_tool = None
                 try:
                     for raw_line in process.stdout:
                         line = raw_line.rstrip("\n\r")
@@ -1539,119 +3235,57 @@ Model shorthands:
                         try:
                             parsed = json.loads(line)
                         except json.JSONDecodeError:
-                            # Non-JSON output — print as-is
+                            # Non-JSON output (raw tool stdout). In pretty mode, buffer raw
+                            # lines while tool execution events are pending to avoid
+                            # interleaving with structured events (e.g. turn_end).
+                            if pretty and (
+                                self._in_tool_execution
+                                or pending_tool_execution_end is not None
+                                or pending_turn_end_after_tool is not None
+                            ):
+                                self._buffered_tool_stdout_lines.append(self._strip_ansi_sequences(line))
+                                continue
                             print(line, flush=True)
                             continue
                         event_type = parsed.get("type", "")
-                        # Capture session ID from the session event (sent at stream start)
-                        if event_type == "session":
-                            self.session_id = parsed.get("id")
-                        # Capture result event for shell backend
-                        if event_type == "agent_end":
-                            # agent_end has a 'messages' array; extract final assistant text
-                            messages = parsed.get("messages", [])
-                            text = ""
-                            if isinstance(messages, list):
-                                # Walk messages in reverse to find last assistant message with text
-                                for m in reversed(messages):
-                                    if isinstance(m, dict) and m.get("role") == "assistant":
-                                        text = self._extract_text_from_message(m)
-                                        if text:
-                                            break
-                            if text:
-                                self.last_result_event = {
-                                    "type": "result",
-                                    "subtype": "success",
-                                    "is_error": False,
-                                    "result": text,
-                                    "session_id": self.session_id,
-                                    "sub_agent_response": self._sanitize_sub_agent_response(parsed),
-                                }
-                            else:
-                                self.last_result_event = parsed
-                        elif event_type == "message":
-                            # OpenAI-compatible format: capture last assistant message
-                            msg = parsed.get("message", {})
-                            if isinstance(msg, dict) and msg.get("role") == "assistant":
-                                text = self._extract_text_from_message(msg)
-                                if text:
-                                    self.last_result_event = {
-                                        "type": "result",
-                                        "subtype": "success",
-                                        "is_error": False,
-                                        "result": text,
-                                        "session_id": self.session_id,
-                                        "sub_agent_response": self._sanitize_sub_agent_response(parsed),
-                                    }
-                        elif event_type == "turn_end":
-                            # turn_end may contain the final assistant message
-                            msg = parsed.get("message", {})
-                            if isinstance(msg, dict):
-                                text = self._extract_text_from_message(msg)
-                                if text:
-                                    self.last_result_event = {
-                                        "type": "result",
-                                        "subtype": "success",
-                                        "is_error": False,
-                                        "result": text,
-                                        "session_id": self.session_id,
-                                        "sub_agent_response": self._sanitize_sub_agent_response(parsed),
-                                    }
-                        # Filter hidden stream types (live mode handles its own filtering)
-                        if event_type in hide_types and self.prettifier_mode != self.PRETTIFIER_LIVE:
+                        if pretty and event_type == "tool_execution_start":
+                            # Reset raw tool stdout buffer per tool execution.
+                            self._buffered_tool_stdout_lines.clear()
+                        if pretty and event_type == "tool_execution_end":
+                            # Tool finished before the delayed fallback timer fired — suppress
+                            # the pending fallback toolcall_end preview.
+                            tool_name = parsed.get("toolName", "")
+                            _cancel_delayed_toolcall(tool_name if isinstance(tool_name, str) else "")
+                            # Defer emission so any trailing raw stdout can be grouped before
+                            # downstream structured metadata like turn_end.
+                            pending_tool_execution_end = parsed
                             continue
-                        # Live stream mode: stream deltas in real-time
-                        if self.prettifier_mode == self.PRETTIFIER_LIVE:
-                            if event_type in hide_types:
-                                # In live mode, still suppress session/compaction/retry events
-                                # but NOT message_start/message_end (handled by _format_event_live)
-                                if event_type not in ("message_start", "message_end"):
-                                    continue
-                            formatted = self._format_event_live(parsed)
-                            if formatted is not None:
-                                if formatted == "":
-                                    continue
-                                sys.stdout.write(formatted)
-                                sys.stdout.flush()
-                            else:
-                                # Fallback: print raw JSON for unhandled event types
-                                print(json.dumps(parsed, ensure_ascii=False), flush=True)
+                        if pretty and event_type == "turn_end" and pending_tool_execution_end is not None:
+                            # Hold turn_end until buffered trailing raw stdout is flushed with
+                            # the pending tool event.
+                            pending_turn_end_after_tool = parsed
                             continue
-                        # Format and print using model-appropriate prettifier
-                        if pretty:
-                            if self.prettifier_mode == self.PRETTIFIER_CODEX:
-                                # Try Pi-wrapped Codex format first (role-based messages)
-                                if "role" in parsed:
-                                    formatted = self._format_pi_codex_message(parsed)
-                                else:
-                                    # Try Pi event handler (message_update, turn_end, etc.)
-                                    formatted = self._format_pi_codex_event(parsed)
-                                    if formatted is not None:
-                                        # Empty string means "suppress this event"
-                                        if formatted == "":
-                                            continue
-                                    else:
-                                        # Try native Codex event handler
-                                        formatted = self._format_event_pretty_codex(parsed)
-                                if formatted is None:
-                                    # Sanitize before raw JSON fallback: strip thinkingSignature,
-                                    # encrypted_content, and metadata from nested Codex events.
-                                    self._sanitize_codex_event(parsed, strip_metadata=True)
-                                    formatted = json.dumps(parsed, ensure_ascii=False)
-                            elif self.prettifier_mode == self.PRETTIFIER_CLAUDE:
-                                formatted = self._format_event_pretty_claude(parsed)
-                            else:
-                                formatted = self._format_event_pretty(parsed)
-                            if formatted is not None:
-                                print(formatted, flush=True)
-                        else:
-                            print(line, flush=True)
+                        if pretty and (
+                            pending_tool_execution_end is not None or pending_turn_end_after_tool is not None
+                        ):
+                            _flush_pending_tool_events()
+                        _emit_parsed_event(parsed, raw_json_line=line)
+                    # Flush any deferred tool/turn events at end-of-stream.
+                    if pretty and (
+                        pending_tool_execution_end is not None or pending_turn_end_after_tool is not None
+                    ):
+                        _flush_pending_tool_events()
+                    elif self._buffered_tool_stdout_lines:
+                        print("\n".join(self._buffered_tool_stdout_lines), flush=True)
+                        self._buffered_tool_stdout_lines.clear()
                 except ValueError:
                     # Watchdog closed stdout — expected when process exits but pipe stays open.
@@ -1659,9 +3293,7 @@ Model shorthands:
             # Signal watchdog that output loop is done
             output_done.set()
-            # Write capture file for shell backend
-            self._write_capture_file(capture_path)
+            cancel_delayed_toolcalls()
             # Wait for process cleanup
             try:
@@ -1669,20 +3301,34 @@ Model shorthands:
             except subprocess.TimeoutExpired:
                 pass
-            # Wait for stderr thread to finish
+            # Wait for stderr thread to finish before deriving fallback errors.
             stderr_thread.join(timeout=3)
-            return process.returncode or 0
+            # If stderr surfaced a terminal error and we do not already have an
+            # explicit success envelope, persist the failure for shell-backend consumers.
+            if stderr_error_messages and not self._is_success_result_event(self.last_result_event):
+                self.last_result_event = self._build_error_result_event(stderr_error_messages[-1])
+            # Write capture file for shell backend
+            self._write_capture_file(capture_path)
+            final_return_code = process.returncode or 0
+            if final_return_code == 0 and self._is_error_result_event(self.last_result_event):
+                final_return_code = 1
+            return final_return_code
         except KeyboardInterrupt:
             print("\nInterrupted by user", file=sys.stderr)
+            cancel_delayed_toolcalls()
             try:
-                process.terminate()
-                try:
-                    process.wait(timeout=5)
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                    process.wait(timeout=5)
+                if process is not None:
+                    process.terminate()
+                    try:
+                        process.wait(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        process.kill()
+                        process.wait(timeout=5)
             except Exception:
                 pass
             self._write_capture_file(capture_path)
@@ -1690,8 +3336,9 @@ Model shorthands:
         except Exception as e:
             print(f"Error executing pi: {e}", file=sys.stderr)
+            cancel_delayed_toolcalls()
             try:
-                if process.poll() is None:
+                if process is not None and process.poll() is None:
                     process.terminate()
                     process.wait(timeout=5)
             except Exception:
@@ -1728,7 +3375,9 @@ Model shorthands:
         self.prettifier_mode = self._detect_prettifier_mode(self.model_name)
         self.verbose = args.verbose
-        # Verbose mode enables live stream prettifier for real-time output
+        # Verbose mode enables live stream prettifier for real-time output.
+        # Codex models already default to LIVE; this ensures all models get
+        # real-time streaming when -v is used.
         if args.verbose:
             self.prettifier_mode = self.PRETTIFIER_LIVE
@@ -1740,8 +3389,30 @@ Model shorthands:
         else:
             self.prompt = prompt_value
-        cmd, stdin_prompt = self.build_pi_command(args)
-        return self.run_pi(cmd, args, stdin_prompt=stdin_prompt)
+        if args.live and args.no_extensions:
+            print("Error: --live requires extensions enabled (remove --no-extensions).", file=sys.stderr)
+            return 1
+        live_extension_file: Optional[Path] = None
+        if args.live:
+            capture_path = os.environ.get("JUNO_SUBAGENT_CAPTURE_PATH")
+            live_extension_file = self._create_live_auto_exit_extension_file(capture_path)
+            if not live_extension_file:
+                print("Error: Could not create live auto-exit extension.", file=sys.stderr)
+                return 1
+        try:
+            cmd, stdin_prompt = self.build_pi_command(
+                args,
+                live_extension_path=str(live_extension_file) if live_extension_file else None,
+            )
+            return self.run_pi(cmd, args, stdin_prompt=stdin_prompt)
+        finally:
+            if live_extension_file is not None:
+                try:
+                    live_extension_file.unlink(missing_ok=True)
+                except Exception as e:
+                    print(f"Warning: Failed to remove temp live extension: {e}", file=sys.stderr)
 def main():