PyPI - henosis-cli - Versions diffs - 0.6.8__py3-none-any.whl → 0.6.10__py3-none-any.whl - Mend

henosis-cli 0.6.8py3-none-any.whl → 0.6.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

cli.py +1986 -1457
{henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/METADATA +1 -2
henosis_cli-0.6.10.dist-info/RECORD +11 -0
{henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/WHEEL +1 -1
henosis_cli_tools/input_engine.py +228 -4
henosis_cli_tools/settings_ui.py +77 -38
henosis_cli_tools/tool_impl.py +291 -102
henosis_cli-0.6.8.dist-info/RECORD +0 -11
{henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/entry_points.txt +0 -0
{henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/top_level.txt +0 -0

cli.py CHANGED Viewed

@@ -8,6 +8,7 @@
 import argparse
 import asyncio
+import copy
 import json
 import os
 import sys
@@ -30,6 +31,7 @@ import importlib
 import importlib.util
 import importlib.metadata
 import re
+import base64
 # Optional websockets for Agent Mode (dev-only WS bridge)
 try:
@@ -80,64 +82,53 @@ except Exception:
     Confirm = None
     Text = None
+"""prompt_toolkit is intentionally not used.
+We previously relied on prompt_toolkit for interactive line editing and menus.
+Copy/selection behavior is terminal- and prompt_toolkit-implementation specific
+and proved unreliable across environments.
+The CLI now uses our dependency-free input engine (henosis_cli_tools.input_engine)
+and a dependency-free highlighted menu implementation.
 """
-prompt_toolkit is optional (used for some menus when available). Input editing
-for chat now uses a self-contained cross-platform engine that supports
-Shift+Enter newlines on Windows and on modern POSIX terminals that advertise
-extended keyboard protocols. It falls back to Ctrl+J for newline when
-Shift+Enter cannot be distinguished.
-"""
-try:
-    from prompt_toolkit import PromptSession
-    from prompt_toolkit.completion import WordCompleter
-    from prompt_toolkit.key_binding import KeyBindings
-    from prompt_toolkit.selection import SelectionType
-    from prompt_toolkit.application import Application
-    from prompt_toolkit.application.current import get_app
-    from prompt_toolkit.layout import Layout
-    from prompt_toolkit.layout.containers import HSplit, Window
-    from prompt_toolkit.layout.dimension import Dimension
-    from prompt_toolkit.layout.controls import FormattedTextControl
-    from prompt_toolkit.styles import Style
-    HAS_PT = True
-except Exception:
-    HAS_PT = False
-    PromptSession = None
-    WordCompleter = None
-    KeyBindings = None
-    Application = None
-    get_app = None
-    Layout = None
-    HSplit = None
-    Window = None
-    Dimension = None
-    FormattedTextControl = None
-    Style = None
+# Keep these names defined for legacy branches that are guarded by HAS_PT.
+HAS_PT = False
+PromptSession = None
+WordCompleter = None
+KeyBindings = None
+SelectionType = None
+Condition = None
+Application = None
+get_app = None
+Layout = None
+HSplit = None
+Window = None
+Dimension = None
+FormattedTextControl = None
+Style = None
 # If optional deps are missing, print a friendly note but continue with fallbacks.
-if not HAS_RICH or not HAS_PT:
-    missing = []
-    if not HAS_RICH:
-        missing.append("rich")
-    if not HAS_PT:
-        missing.append("prompt_toolkit")
-    if missing:
-        msg = (
-            "Note: optional packages missing: "
-            + ", ".join(missing)
-            + "\n- rich enables colorful output\n- prompt_toolkit enables arrow-key menus\n"
+if not HAS_RICH:
+    try:
+        sys.stderr.write(
+            "Note: optional package missing: rich\n"
+            "- rich enables colorful output\n"
         )
-        try:
-            sys.stderr.write(msg)
-        except Exception:
-            pass
+    except Exception:
+        pass
 # New: low-level input engine (no third-party deps) for Shift+Enter newlines
+# Also provides a best-effort clipboard helper used for Ctrl+C copy when our
+# prompt_toolkit selection is active.
 try:
-    from henosis_cli_tools.input_engine import make_engine
+    from henosis_cli_tools.input_engine import make_engine, _copy_to_clipboard as _hn_copy_to_clipboard
     HAS_INPUT_ENGINE = True
 except Exception:
     HAS_INPUT_ENGINE = False
+    def _hn_copy_to_clipboard(text: str) -> bool:  # type: ignore
+        return False
 DEBUG_SSE = False  # set via --debug-sse
 DEBUG_REQ = False  # set via --debug-req
 # Max number of recent SSE event summaries to retain for diagnostics when a stream
@@ -683,7 +674,7 @@ class UI:
             for n, ty, sz in rows:
                 print(f"{n:<40} {ty:<8} {sz}")
-class ChatCLI:
+class ChatCLI:
     def __init__(
         self,
         server: str,
@@ -800,23 +791,25 @@ class ChatCLI:
         # - concise: only model (+thinking level when applicable) and context meter
         # - verbose: full details (current behavior)
         self.usage_info_mode: str = "verbose"
-        # Reasoning effort selector for OpenAI reasoning models (low|medium|high|xhigh). Default: medium
-        # Note: 'xhigh' is only applied by the server for models that support it (e.g., gpt-5.2* and gpt-5.1-codex-max).
-        self.reasoning_effort: str = "medium"
+        # Reasoning effort selector for OpenAI reasoning models (low|medium|high|xhigh). Default: medium
+        # Note: 'xhigh' is only applied by the server for models that support it (e.g., gpt-5.2* and gpt-5.1-codex-max).
+        self.reasoning_effort: str = "medium"
         # Retain provider-native tool results between turns (e.g., Kimi reasoning/tool messages)
         self.retain_native_tool_results: bool = False
         # Anthropic thinking-mode budget tokens (applies to '-thinking' models; None = server default)
-        self.thinking_budget_tokens: Optional[int] = None
-        # Anthropic prompt cache TTL preference: None=server default, or "5m" | "1h"
+        self.thinking_budget_tokens: Optional[int] = None
+        # Anthropic effort (Opus 4.6/4.5): low|medium|high|max. Default: high.
+        self.anthropic_effort: str = "high"
+        # Anthropic prompt cache TTL preference: None=server default, or "5m" | "1h"
         self.anthropic_cache_ttl: Optional[str] = None
-        # Text verbosity selector (UI only; not sent to server requests by default)
-        self.text_verbosity: str = "medium"  # low | medium | high
-        # Tool call preambles (UI toggle only)
-        self.preambles_enabled: bool = False
-        # Codex developer prompt injection (system) for Codex models only
-        self.codex_prompt_enabled: bool = True
-        # Codex Max: allow ALL tools instead of minimal subset
-        self.codex_max_allow_all_tools: bool = False
+        # Text verbosity selector (UI only; not sent to server requests by default)
+        self.text_verbosity: str = "medium"  # low | medium | high
+        # Tool call preambles (UI toggle only)
+        self.preambles_enabled: bool = False
+        # Codex developer prompt injection (system) for Codex models only
+        self.codex_prompt_enabled: bool = True
+        # Codex Max: allow ALL tools instead of minimal subset
+        self.codex_max_allow_all_tools: bool = False
         # Custom first-turn injection (like codebase map) — toggle + editable text
         self.custom_first_turn_enabled: bool = False
         self.custom_first_turn_text: str = ""
@@ -997,10 +990,25 @@ class ChatCLI:
         }
         # Track last used model for display
         self._last_used_model: Optional[str] = None
-        # Provider-native history for Kimi (preserve reasoning_content across turns)
-        self._kimi_raw_history: List[Dict[str, Any]] = []
-        # Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
-        self._gemini_raw_history: List[Dict[str, Any]] = []
+        # Provider-native history for Kimi (preserve reasoning_content across turns)
+        self._kimi_raw_history: List[Dict[str, Any]] = []
+        # Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
+        self._gemini_raw_history: List[Dict[str, Any]] = []
+        # OpenAI Responses API threading: retain previous response id across turns
+        self._openai_previous_response_id: Optional[str] = None
+        # OpenAI Responses API threading: retain the full chain of response ids across turns
+        # (server will also echo per-turn ids in message.completed.openai_response_ids)
+        self._openai_response_id_history: List[str] = []
+        # OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
+        # including reasoning items, function_call items, and function_call_output items.
+        self._openai_input_items: List[Dict[str, Any]] = []
+        # For robustness, remember exactly what we sent as openai_input_items for the current turn
+        # so we can append server-provided openai_delta_items deterministically.
+        self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
+        # Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
+        # Shape: {session_id, call_id, job_token, name}
+        self._inflight_dispatch: Optional[Dict[str, Any]] = None
         # Last server billing info from /api/usage/commit
         self._last_commit_cost_usd: float = 0.0
         self._last_remaining_credits: Optional[float] = None
@@ -1054,6 +1062,12 @@ class ChatCLI:
         # Track Ctrl+C timing for double-press-to-exit behavior
         self._last_interrupt_ts: Optional[float] = None
+        # Ctrl+C during a running stream should not kill the entire CLI.
+        # Instead, we cancel the in-flight turn and reopen the last user query for editing.
+        # NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
+        self._pending_user_edit: Optional[str] = None
+        self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
         # Timers: session-level and per-turn wall-clock timers
         self._session_started_at: Optional[float] = None  # time.perf_counter() at session start
         self._turn_started_at: Optional[float] = None     # time.perf_counter() per turn start
@@ -1062,35 +1076,8 @@ class ChatCLI:
         self._commands_catalog: List[Dict[str, str]] = self._build_commands_catalog()
         # Low-level input engine (supports Shift+Enter newlines where possible)
         self._input_engine = make_engine() if HAS_INPUT_ENGINE else None
-        # Optional prompt_toolkit session for inline slash-command completion
+        # prompt_toolkit intentionally not used; always rely on the input engine.
         self._pt_session = None
-        if HAS_PT and PromptSession:
-            try:
-                # Build completer and simple key bindings: Enter submits, Ctrl+J inserts newline
-                self._pt_completer = self._commands_word_completer()
-                kb = KeyBindings()
-                @kb.add("enter")
-                def _submit(event):
-                    # Submit entire buffer
-                    event.app.exit(result=event.current_buffer.text)
-                @kb.add("c-j")
-                def _newline(event):
-                    # Insert literal newline
-                    event.current_buffer.insert_text("\n")
-                # Bottom toolbar with quick hints
-                def _toolbar() -> str:
-                    return " Type / then Tab to complete, or Enter on '/' to open the palette. Ctrl+J inserts a newline. "
-                # Create session
-                self._pt_session = PromptSession(
-                    key_bindings=kb,
-                    bottom_toolbar=_toolbar,
-                )
-            except Exception:
-                self._pt_session = None
     # ----------------------- Provider heuristics -----------------------
     def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
@@ -1339,32 +1326,34 @@ class ChatCLI:
     # ----------------------- Pricing + costs -----------------------
-    def _pricing_table(self) -> Dict[str, Dict[str, Any]]:
-        # Match server chat_adapter PRICING_PER_MILLION (subset is fine; unknown -> 0)
-        return {
-            # OpenAI
-            "gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
-            # From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
-            "gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
-            "gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
+    def _pricing_table(self) -> Dict[str, Dict[str, Any]]:
+        # Match server chat_adapter PRICING_PER_MILLION (subset is fine; unknown -> 0)
+        return {
+            # OpenAI
+            "gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
+            # New: gpt-5.2-codex
+            # Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
+            "gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
+            # From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
+            "gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
+            "gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
             "gpt-5-2025-08-07": {"input": 1.75, "output": 14.00, "provider": "openai"},
             "gpt-5-codex": {"input": 1.75, "output": 14.00, "provider": "openai"},
-            "gpt-4o-mini": {"input": 0.21, "output": 0.84, "provider": "openai"},
+            "gpt-4o-mini": {"input": 0.21, "output": 0.84, "provider": "openai"},
             # Codex Mini (fine-tuned o4-mini for CLI). Pricing includes 1.4x margin per codex-mini.txt.
             # Cached input tokens override: $0.375 * 1.4 = $0.525 per 1M (25% of input rate).
             "codex-mini-latest": {"input": 2.10, "output": 8.40, "cached_input": 0.525, "provider": "openai"},
             # Anthropic
             "claude-sonnet-4-20250514": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
             "claude-sonnet-4-20250514-thinking": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
-            "claude-sonnet-4-5-20250929": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
+            "claude-sonnet-4-5-20250929": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
             "claude-sonnet-4-5-20250929-thinking": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
-            # New Opus 4.5 (provider base $5/$25 with 1.4x margin -> $7.00/$35.00)
-            "claude-opus-4-5-20251101": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
-            "claude-opus-4-5-20251101-thinking": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
+            # New Opus 4.6 (adaptive thinking + effort; 1M context)
+            "claude-opus-4-6": {"input": 5.25, "output": 25.25, "provider": "anthropic"},
+            "claude-opus-4-6-thinking": {"input": 5.25, "output": 25.25, "provider": "anthropic"},
             # Gemini
-            "gemini-2.5-pro": {"input": 1.75, "output": 14.00, "provider": "gemini"},
-            # Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
-            "gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
+            # Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
+            "gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
             # Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
             # CLI uses the low-tier 1.4x margin rates for estimates. High-tier
             # pricing based on total_tokens > 200K is applied on the server.
@@ -1374,17 +1363,15 @@ class ChatCLI:
             "grok-4-1-fast-non-reasoning": {"input": 0.28, "output": 0.70, "provider": "xai"},
             "grok-4": {"input": 4.20, "output": 21.00, "provider": "xai"},
             "grok-code-fast-1": {"input": 0.28, "output": 2.10, "provider": "xai"},
-            # DeepSeek V3.2 (+$0.25 per 1M margin)
-            "deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
-            "deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
-            "deepseek-3.2-speciale": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
+            # DeepSeek V3.2 (+$0.25 per 1M margin)
+            "deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
+            "deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
+            # Removed: deepseek speciale (not supported)
             # Kimi
-            "kimi-k2-0905-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
-            "kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
-            "kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
+            "kimi-k2.5": {"input": 0.85, "output": 3.25, "provider": "kimi"},
             # GLM (Z.AI)
             # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
-            "glm-4.6": {"input": 0.84, "output": 3.08, "provider": "glm"},
+            "glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
         }
     def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
@@ -1398,70 +1385,205 @@ class ChatCLI:
             return table.get("gpt-5-2025-08-07", {"input": 0.0, "output": 0.0, "provider": "unknown"})
         return {"input": 0.0, "output": 0.0, "provider": "unknown"}
-    def _resolve_model_alias(self, raw_name: Optional[str]) -> Optional[str]:
-        """Normalize model aliases to their canonical server identifiers."""
-        if not raw_name:
-            return None
-        name = raw_name.strip()
-        lower = name.lower()
-        aliases = {
-            "gemini-3": "gemini-3-pro-preview",
-            "gemini-3-pro": "gemini-3-pro-preview",
-            "gemini-3-preview": "gemini-3-pro-preview",
-            "gemini-3-flash": "gemini-3-flash-preview",
-            "gemini-flash-3": "gemini-3-flash-preview",
-            "gemini-new": "gemini-3-pro-preview",
-            "new-gemini": "gemini-3-pro-preview",
-            "gemini-pro-3": "gemini-3-pro-preview",
-            "gpt5": "gpt-5",
-            "gpt4": "gpt-4o",
-            # Anthropic Claude Opus 4.5 (thinking OFF) short aliases
-            # Map common shorthand variants to the canonical non-thinking model id
-            "claude-opus-4-5": "claude-opus-4-5-20251101",
-            "claude-opus-4.5": "claude-opus-4-5-20251101",
-            "opus-4-5": "claude-opus-4-5-20251101",
-            "opus-4.5": "claude-opus-4-5-20251101",
-            "opus45": "claude-opus-4-5-20251101",
-            "claude-opus45": "claude-opus-4-5-20251101",
-        }
-        return aliases.get(lower, name)
-    def _apply_model_side_effects(self) -> None:
-        """Adjust related settings when certain models are selected."""
-        try:
-            model_name = (self.model or "").strip().lower()
-        except Exception:
-            model_name = ""
-        try:
-            if model_name in {"gpt-5.2-pro"}:
-                # Default these to high, but don't clobber a user-chosen xhigh.
-                if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
-                    self.reasoning_effort = "high"
-            # Codex family: disable preambles for better behavior
-            if "codex" in model_name:
-                self.preambles_enabled = False
-        except Exception:
-            try:
-                self.reasoning_effort = "high"
-            except Exception:
-                pass
-    def _is_codex_model(self, model: Optional[str]) -> bool:
-        try:
-            return bool(model) and ("codex" in str(model).lower())
-        except Exception:
-            return False
-    def _supports_xhigh_reasoning_effort(self, model: Optional[str]) -> bool:
-        """Return True if the OpenAI model supports reasoning_effort='xhigh'.
-        OpenAI supports xhigh on:
-        - gpt-5.1-codex-max
-        - the gpt-5.2* family
-        """
-        try:
-            m = (str(model).strip().lower() if model else "")
-            return m.startswith("gpt-5.2")
-        except Exception:
-            return False
+    def _resolve_model_alias(self, raw_name: Optional[str]) -> Optional[str]:
+        """Normalize model aliases to their canonical server identifiers."""
+        if not raw_name:
+            return None
+        name = raw_name.strip()
+        lower = name.lower()
+        aliases = {
+            "gemini-3": "gemini-3-pro-preview",
+            "gemini-3-pro": "gemini-3-pro-preview",
+            "gemini-3-preview": "gemini-3-pro-preview",
+            "gemini-3-flash": "gemini-3-flash-preview",
+            "gemini-flash-3": "gemini-3-flash-preview",
+            "gemini-new": "gemini-3-pro-preview",
+            "new-gemini": "gemini-3-pro-preview",
+            "gemini-pro-3": "gemini-3-pro-preview",
+            "gpt5": "gpt-5",
+            "gpt4": "gpt-4o",
+            # Anthropic Claude Opus 4.6 short aliases
+            "claude-opus-4-6": "claude-opus-4-6",
+            "claude-opus-4.6": "claude-opus-4-6",
+            "opus-4-6": "claude-opus-4-6",
+            "opus-4.6": "claude-opus-4-6",
+            "opus46": "claude-opus-4-6",
+            "claude-opus46": "claude-opus-4-6",
+        }
+        return aliases.get(lower, name)
+    def _apply_model_side_effects(self) -> None:
+        """Adjust related settings when certain models are selected."""
+        try:
+            model_name = (self.model or "").strip().lower()
+        except Exception:
+            model_name = ""
+        try:
+            # Provider-native state resets when switching away from OpenAI.
+            try:
+                if self.model and (not self._is_openai_model(self.model)):
+                    self._openai_previous_response_id = None
+                    self._openai_response_id_history = []
+                    self._openai_input_items = []
+                    self._openai_last_sent_input_items = None
+            except Exception:
+                pass
+            if model_name in {"gpt-5.2-pro"}:
+                # Default these to high, but don't clobber a user-chosen xhigh.
+                if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
+                    self.reasoning_effort = "high"
+            # Codex family: disable preambles for better behavior
+            if "codex" in model_name:
+                self.preambles_enabled = False
+            # Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
+            # Force-disable for all other models (even if a saved setting had it enabled).
+            if not self._supports_preambles(self.model):
+                self.preambles_enabled = False
+        except Exception:
+            try:
+                self.reasoning_effort = "high"
+            except Exception:
+                pass
+    def _supports_preambles(self, model: Optional[str]) -> bool:
+        """Tool-call preambles are a CLI-only UX hint.
+        Requirement: disabled for all models except GPT-5 (base model; non-Codex).
+        In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
+        """
+        try:
+            if not model:
+                return False
+            m = str(model).strip().lower()
+            # Only the base GPT-5 line supports this UX toggle.
+            # Allow:
+            #   - "gpt-5"
+            #   - date-pinned variants like "gpt-5-2025-08-07"
+            # Disallow:
+            #   - versioned families like "gpt-5.1*" / "gpt-5.2*"
+            if not (m == "gpt-5" or m.startswith("gpt-5-")):
+                return False
+            if "codex" in m:
+                return False
+            return True
+        except Exception:
+            return False
+    def _is_openai_model(self, model: Optional[str]) -> bool:
+        """Best-effort model/provider discriminator for client-side state.
+        The server is multi-provider. For the CLI we treat anything that isn't an explicit
+        non-OpenAI provider prefix as OpenAI.
+        """
+        try:
+            if not model:
+                return False
+            m = str(model).strip().lower()
+            if not m:
+                return False
+            for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
+                if m.startswith(pfx):
+                    return False
+            # Everything else defaults to OpenAI in this repo.
+            return True
+        except Exception:
+            return False
+    def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
+        """Whether this provider has an implemented native tool/thinking retention path."""
+        try:
+            if not model:
+                return False
+            m = str(model).strip().lower()
+            if m.startswith("gemini-"):
+                return True
+            if m.startswith("kimi-"):
+                return bool(getattr(self, "retain_native_tool_results", False))
+            if self._is_openai_model(model):
+                return True
+            return False
+        except Exception:
+            return False
+    def _sanitize_openai_items(self, items: Any) -> Any:
+        """Recursively strip fields from OpenAI output items that cause errors when used as input."""
+        if isinstance(items, list):
+            return [self._sanitize_openai_items(x) for x in items]
+        if isinstance(items, dict):
+            # 'status' is the main offender causing 400s
+            bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
+            return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
+        return items
+    async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
+        """If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
+        This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
+        Best-effort; never raises.
+        """
+        ctx = None
+        try:
+            ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
+        except Exception:
+            ctx = None
+        if not ctx:
+            return
+        session_id = ctx.get("session_id")
+        call_id = ctx.get("call_id")
+        job_token = ctx.get("job_token")
+        name = ctx.get("name")
+        if not (session_id and call_id and job_token):
+            return
+        payload_cb = {
+            "session_id": session_id,
+            "call_id": call_id,
+            "name": name,
+            "job_token": job_token,
+            "result": {
+                "ok": False,
+                "cancelled": True,
+                "error": str(reason or "cancelled"),
+            },
+        }
+        try:
+            # Keep it short; we just want to unblock the server.
+            http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
+        except Exception:
+            http_timeout = None
+        try:
+            async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
+                await client.post(self.tools_callback_url, json=payload_cb)
+        except Exception:
+            pass
+        finally:
+            try:
+                self._inflight_dispatch = None
+            except Exception:
+                pass
+    def _is_gpt_model(self, model: Optional[str]) -> bool:
+        """True for OpenAI GPT models (used for showing certain UI-only toggles)."""
+        try:
+            return bool(model) and str(model).strip().lower().startswith("gpt-")
+        except Exception:
+            return False
+    def _is_codex_model(self, model: Optional[str]) -> bool:
+        try:
+            return bool(model) and ("codex" in str(model).lower())
+        except Exception:
+            return False
+    def _supports_xhigh_reasoning_effort(self, model: Optional[str]) -> bool:
+        """Return True if the OpenAI model supports reasoning_effort='xhigh'.
+        OpenAI supports xhigh on:
+        - gpt-5.1-codex-max
+        - the gpt-5.2* family
+        """
+        try:
+            m = (str(model).strip().lower() if model else "")
+            return m.startswith("gpt-5.2")
+        except Exception:
+            return False
     def _is_deepseek_like(self, model: Optional[str]) -> bool:
         try:
@@ -1469,87 +1591,87 @@ class ChatCLI:
         except Exception:
             return False
-    def compute_cost_usd(self, model: Optional[str], usage: Dict[str, Any]) -> float:
+    def compute_cost_usd(self, model: Optional[str], usage: Dict[str, Any]) -> float:
         price = self._resolve_price(model)
         provider = (price.get("provider") or "").lower()
         # prefer detailed fields when present
         prompt_tokens = int(usage.get("prompt_tokens") or usage.get("turn", {}).get("input_tokens", 0) or 0)
         completion_tokens = int(usage.get("completion_tokens") or usage.get("turn", {}).get("output_tokens", 0) or 0)
-        total_tokens = int(usage.get("total_tokens") or usage.get("turn", {}).get("total_tokens", 0) or (prompt_tokens + completion_tokens) or 0)
-        image_tokens = int(usage.get("image_tokens", 0) or 0)
-        thinking_tokens = int(usage.get("thinking_tokens", 0) or 0)
-        # Reasoning gap: bill as completion-side if total > (prompt + completion)
-        reasoning_gap = 0
-        try:
-            if total_tokens > (prompt_tokens + completion_tokens):
-                reasoning_gap = total_tokens - (prompt_tokens + completion_tokens)
-        except Exception:
-            reasoning_gap = 0
-        # Anthropic: count image tokens as prompt-side
-        if provider == "anthropic" and image_tokens:
-            prompt_tokens += image_tokens
-        # Anthropic prompt caching: pricing logic (reads @ 10%, creation @ 1.25x/2x)
-        if provider == "anthropic":
-            cache_read = int(usage.get("cache_read_input_tokens", 0) or 0)
-            cache_creation = int(usage.get("cache_creation_input_tokens", 0) or 0)
-            # Try to detect creation breakdown if available
-            cc_5m = 0
-            cc_1h = 0
-            try:
-                cc_map = usage.get("cache_creation") if isinstance(usage, dict) else None
-                if isinstance(cc_map, dict):
-                    cc_5m = int(cc_map.get("ephemeral_5m_input_tokens", 0) or 0)
-                    cc_1h = int(cc_map.get("ephemeral_1h_input_tokens", 0) or 0)
-            except Exception:
-                pass
-            # If breakdown is missing but total creation exists, assume 5m (1.25x) as default/safe estimate
-            if cache_creation > 0 and (cc_5m + cc_1h) == 0:
-                cc_5m = cache_creation
-            # Only apply special pricing if cache fields are present
-            if cache_read > 0 or cache_creation > 0:
-                in_rate = float(price.get("input", 0.0))
-                out_rate = float(price.get("output", 0.0))
-                # Non-cached prompt part
-                non_cached = max(0, int(prompt_tokens) - int(cache_read) - int(cache_creation))
-                cost = 0.0
-                # Standard input
-                cost += (non_cached / 1_000_000.0) * in_rate
-                # Cache reads (10% of input rate)
-                cost += (cache_read / 1_000_000.0) * (in_rate * 0.10)
-                # Cache creation (1.25x for 5m, 2.0x for 1h)
-                if cc_5m > 0:
-                    cost += (cc_5m / 1_000_000.0) * (in_rate * 1.25)
-                if cc_1h > 0:
-                    cost += (cc_1h / 1_000_000.0) * (in_rate * 2.00)
-                # Output + reasoning gap
-                completion_total = completion_tokens
-                if total_tokens and (prompt_tokens + completion_tokens) != total_tokens:
-                    completion_total += reasoning_gap
-                else:
-                    if thinking_tokens and not usage.get("total_tokens"):
-                        completion_total += thinking_tokens
-                cost += (completion_total / 1_000_000.0) * out_rate
-                return float(cost)
-        # reasoning_gap already computed above
+        total_tokens = int(usage.get("total_tokens") or usage.get("turn", {}).get("total_tokens", 0) or (prompt_tokens + completion_tokens) or 0)
+        image_tokens = int(usage.get("image_tokens", 0) or 0)
+        thinking_tokens = int(usage.get("thinking_tokens", 0) or 0)
+        # Reasoning gap: bill as completion-side if total > (prompt + completion)
+        reasoning_gap = 0
+        try:
+            if total_tokens > (prompt_tokens + completion_tokens):
+                reasoning_gap = total_tokens - (prompt_tokens + completion_tokens)
+        except Exception:
+            reasoning_gap = 0
+        # Anthropic: count image tokens as prompt-side
+        if provider == "anthropic" and image_tokens:
+            prompt_tokens += image_tokens
+        # Anthropic prompt caching: pricing logic (reads @ 10%, creation @ 1.25x/2x)
+        if provider == "anthropic":
+            cache_read = int(usage.get("cache_read_input_tokens", 0) or 0)
+            cache_creation = int(usage.get("cache_creation_input_tokens", 0) or 0)
+            # Try to detect creation breakdown if available
+            cc_5m = 0
+            cc_1h = 0
+            try:
+                cc_map = usage.get("cache_creation") if isinstance(usage, dict) else None
+                if isinstance(cc_map, dict):
+                    cc_5m = int(cc_map.get("ephemeral_5m_input_tokens", 0) or 0)
+                    cc_1h = int(cc_map.get("ephemeral_1h_input_tokens", 0) or 0)
+            except Exception:
+                pass
+            # If breakdown is missing but total creation exists, assume 5m (1.25x) as default/safe estimate
+            if cache_creation > 0 and (cc_5m + cc_1h) == 0:
+                cc_5m = cache_creation
+            # Only apply special pricing if cache fields are present
+            if cache_read > 0 or cache_creation > 0:
+                in_rate = float(price.get("input", 0.0))
+                out_rate = float(price.get("output", 0.0))
+                # Non-cached prompt part
+                non_cached = max(0, int(prompt_tokens) - int(cache_read) - int(cache_creation))
+                cost = 0.0
+                # Standard input
+                cost += (non_cached / 1_000_000.0) * in_rate
+                # Cache reads (10% of input rate)
+                cost += (cache_read / 1_000_000.0) * (in_rate * 0.10)
+                # Cache creation (1.25x for 5m, 2.0x for 1h)
+                if cc_5m > 0:
+                    cost += (cc_5m / 1_000_000.0) * (in_rate * 1.25)
+                if cc_1h > 0:
+                    cost += (cc_1h / 1_000_000.0) * (in_rate * 2.00)
+                # Output + reasoning gap
+                completion_total = completion_tokens
+                if total_tokens and (prompt_tokens + completion_tokens) != total_tokens:
+                    completion_total += reasoning_gap
+                else:
+                    if thinking_tokens and not usage.get("total_tokens"):
+                        completion_total += thinking_tokens
+                cost += (completion_total / 1_000_000.0) * out_rate
+                return float(cost)
+        # reasoning_gap already computed above
         # DeepSeek cache pricing nuance (best-effort; needs provider-specific fields to be precise)
-        if self._is_deepseek_like(model):
-            hit = int(usage.get("prompt_cache_hit_tokens", 0) or 0)
-            miss = int(usage.get("prompt_cache_miss_tokens", 0) or 0)
-            if (hit + miss) <= 0:
-                miss = prompt_tokens
-                hit = 0
-            # V3.2 cache hit pricing per docs with +$0.25 margin -> $0.278 / 1M
-            cache_hit_rate_per_m = 0.278
-            cost = (hit / 1_000_000.0) * cache_hit_rate_per_m
-            cost += (miss / 1_000_000.0) * float(price.get("input", 0.0))
-            cost += ((completion_tokens + reasoning_gap) / 1_000_000.0) * float(price.get("output", 0.0))
-            return float(cost)
+        if self._is_deepseek_like(model):
+            hit = int(usage.get("prompt_cache_hit_tokens", 0) or 0)
+            miss = int(usage.get("prompt_cache_miss_tokens", 0) or 0)
+            if (hit + miss) <= 0:
+                miss = prompt_tokens
+                hit = 0
+            # V3.2 cache hit pricing per docs with +$0.25 margin -> $0.278 / 1M
+            cache_hit_rate_per_m = 0.278
+            cost = (hit / 1_000_000.0) * cache_hit_rate_per_m
+            cost += (miss / 1_000_000.0) * float(price.get("input", 0.0))
+            cost += ((completion_tokens + reasoning_gap) / 1_000_000.0) * float(price.get("output", 0.0))
+            return float(cost)
         # OpenAI prompt caching: cached input tokens billed at 10% of input price by default
         # Allow per-model override via price["cached_input"] when provided
         if provider == "openai":
@@ -1905,13 +2027,13 @@ class ChatCLI:
         except Exception as e:
             self.ui.warn(f"Failed to load local settings: {e}")
-    def _collect_settings_dict(self) -> Dict[str, Any]:
-        data = {
-            "model": self.model,
-            "requested_tools": self.requested_tools,
+    def _collect_settings_dict(self) -> Dict[str, Any]:
+        data = {
+            "model": self.model,
+            "requested_tools": self.requested_tools,
             "fs_scope": self.fs_scope,
             # host_base is per-terminal by default; only persist if explicitly set by the user
-            "save_chat_history": self.save_chat_history,
+            "save_chat_history": self.save_chat_history,
             "fs_host_mode": self.fs_host_mode,
             "system_prompt": self.system_prompt,
             "show_tool_calls": self.show_tool_calls,
@@ -1934,8 +2056,10 @@ class ChatCLI:
             # retain provider-native tool results
             "retain_native_tool_results": self.retain_native_tool_results,
             # Anthropic thinking budget
-            "thinking_budget_tokens": self.thinking_budget_tokens,
-            # Anthropic cache TTL preference
+            "thinking_budget_tokens": self.thinking_budget_tokens,
+            # Anthropic effort (Opus 4.6/4.5). Default: high.
+            "anthropic_effort": getattr(self, "anthropic_effort", None),
+            # Anthropic cache TTL preference
             "anthropic_cache_ttl": self.anthropic_cache_ttl,
             # web search
             "web_search_enabled": self.web_search_enabled,
@@ -1946,10 +2070,10 @@ class ChatCLI:
             "text_verbosity": self.text_verbosity,
             "preambles_enabled": self.preambles_enabled,
             "custom_first_turn_enabled": self.custom_first_turn_enabled,
-            "custom_first_turn_text": self.custom_first_turn_text,
-            "codex_prompt_enabled": self.codex_prompt_enabled,
-            "codex_max_allow_all_tools": self.codex_max_allow_all_tools,
-        }
+            "custom_first_turn_text": self.custom_first_turn_text,
+            "codex_prompt_enabled": self.codex_prompt_enabled,
+            "codex_max_allow_all_tools": self.codex_max_allow_all_tools,
+        }
         try:
             if not getattr(self, "_host_base_ephemeral", False) and self.host_base:
                 data["host_base"] = self.host_base
@@ -1957,15 +2081,15 @@ class ChatCLI:
             pass
         return data
-    def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
-        try:
-            old_system_prompt = getattr(self, "system_prompt", None)
-            self.model = data.get("model", self.model)
-            if "save_chat_history" in data:
-                try:
-                    self.save_chat_history = bool(data.get("save_chat_history"))
-                except Exception:
-                    pass
+    def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
+        try:
+            old_system_prompt = getattr(self, "system_prompt", None)
+            self.model = data.get("model", self.model)
+            if "save_chat_history" in data:
+                try:
+                    self.save_chat_history = bool(data.get("save_chat_history"))
+                except Exception:
+                    pass
             self.requested_tools = data.get("requested_tools", self.requested_tools)
             self.fs_scope = data.get("fs_scope", self.fs_scope)
             self.host_base = data.get("host_base", self.host_base)
@@ -2037,13 +2161,25 @@ class ChatCLI:
                         self.usage_info_mode = val
                 except Exception:
                     pass
-            # Reasoning effort (default medium if missing/invalid)
-            try:
-                val = data.get("reasoning_effort")
-                if isinstance(val, str) and val in ("low", "medium", "high", "xhigh"):
-                    self.reasoning_effort = val
-            except Exception:
-                pass
+            # Reasoning effort (default medium if missing/invalid)
+            try:
+                val = data.get("reasoning_effort")
+                if isinstance(val, str) and val in ("low", "medium", "high", "xhigh"):
+                    self.reasoning_effort = val
+            except Exception:
+                pass
+            # Anthropic effort (Opus 4.6/4.5). Default behavior equals high.
+            try:
+                ae = data.get("anthropic_effort")
+                if isinstance(ae, str):
+                    ae2 = ae.strip().lower()
+                    if ae2 in ("low", "medium", "high", "max"):
+                        self.anthropic_effort = ae2
+                elif ae in (None, "", "default"):
+                    self.anthropic_effort = "high"
+            except Exception:
+                self.anthropic_effort = "high"
             # Text verbosity selector
             try:
                 v = data.get("text_verbosity")
@@ -2052,21 +2188,21 @@ class ChatCLI:
             except Exception:
                 pass
             # Tool preambles toggle
-            if "preambles_enabled" in data:
-                try:
-                    self.preambles_enabled = bool(data.get("preambles_enabled"))
-                except Exception:
-                    self.preambles_enabled = False
-            if "codex_prompt_enabled" in data:
-                try:
-                    self.codex_prompt_enabled = bool(data.get("codex_prompt_enabled"))
-                except Exception:
-                    self.codex_prompt_enabled = True
-            if "codex_max_allow_all_tools" in data:
-                try:
-                    self.codex_max_allow_all_tools = bool(data.get("codex_max_allow_all_tools"))
-                except Exception:
-                    self.codex_max_allow_all_tools = False
+            if "preambles_enabled" in data:
+                try:
+                    self.preambles_enabled = bool(data.get("preambles_enabled"))
+                except Exception:
+                    self.preambles_enabled = False
+            if "codex_prompt_enabled" in data:
+                try:
+                    self.codex_prompt_enabled = bool(data.get("codex_prompt_enabled"))
+                except Exception:
+                    self.codex_prompt_enabled = True
+            if "codex_max_allow_all_tools" in data:
+                try:
+                    self.codex_max_allow_all_tools = bool(data.get("codex_max_allow_all_tools"))
+                except Exception:
+                    self.codex_max_allow_all_tools = False
             # Custom first-turn injection
             if "custom_first_turn_enabled" in data:
                 try:
@@ -2095,39 +2231,47 @@ class ChatCLI:
                     self.thinking_budget_tokens = None
             except Exception:
                 pass
-            # Anthropic cache TTL preference
-            try:
-                ttl = data.get("anthropic_cache_ttl")
-                if isinstance(ttl, str) and ttl.strip() in ("5m", "1h"):
-                    self.anthropic_cache_ttl = ttl.strip()
-                elif ttl in (None, "", "default"):
-                    self.anthropic_cache_ttl = None
-            except Exception:
-                pass
-            # Rebuild history if system prompt changed
-            try:
-                system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
-            except Exception:
-                system_prompt_changed = False
-            if system_prompt_changed:
-                # Changing the system prompt can materially alter the behavior of the assistant;
-                # warn the user and reset the current conversation history to avoid mixing contexts.
-                try:
-                    self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
-                except Exception:
-                    pass
-                self.history = []
-                if self.system_prompt:
-                    self.history.append({"role": "system", "content": self.system_prompt})
-            # On settings load, do not assume the custom first-turn was injected yet
-            try:
-                self._did_inject_custom_first_turn = False
-            except Exception:
-                pass
-            self._apply_model_side_effects()
-        except Exception as e:
-            self.ui.warn(f"Failed to apply settings: {e}")
+            # Anthropic cache TTL preference
+            try:
+                ttl = data.get("anthropic_cache_ttl")
+                if isinstance(ttl, str) and ttl.strip() in ("5m", "1h"):
+                    self.anthropic_cache_ttl = ttl.strip()
+                elif ttl in (None, "", "default"):
+                    self.anthropic_cache_ttl = None
+            except Exception:
+                pass
+            # Rebuild history if system prompt changed
+            try:
+                system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
+            except Exception:
+                system_prompt_changed = False
+            if system_prompt_changed:
+                # Changing the system prompt can materially alter the behavior of the assistant;
+                # warn the user and reset the current conversation history to avoid mixing contexts.
+                try:
+                    self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
+                except Exception:
+                    pass
+                self.history = []
+                if self.system_prompt:
+                    self.history.append({"role": "system", "content": self.system_prompt})
+                # OpenAI threaded state is invalid once the system prompt changes.
+                try:
+                    self._openai_previous_response_id = None
+                    self._openai_response_id_history = []
+                    self._openai_input_items = []
+                    self._openai_last_sent_input_items = None
+                except Exception:
+                    pass
+            # On settings load, do not assume the custom first-turn was injected yet
+            try:
+                self._did_inject_custom_first_turn = False
+            except Exception:
+                pass
+            self._apply_model_side_effects()
+        except Exception as e:
+            self.ui.warn(f"Failed to apply settings: {e}")
     async def _fetch_server_settings(self) -> Optional[Dict[str, Any]]:
         try:
@@ -2329,7 +2473,7 @@ class ChatCLI:
         parts = [
             f"Server: {self.server}",
             f"Model: {self.model or '(server default)'}",
-            f"Tools: {self._tools_label()}",
+            f"Tools: {self._tools_label()}",
             f"History: {'ON' if self.save_chat_history else 'OFF'}",
             f"Scope: {self._fs_label()}",
             f"Agent scope: {self.host_base or '(none)'}",
@@ -2480,12 +2624,12 @@ class ChatCLI:
     def _build_commands_catalog(self) -> List[Dict[str, str]]:
         cmds = [
             {"name": "/settings", "usage": "/settings", "desc": "Open settings menu"},
-            {"name": "/configure", "usage": "/configure", "desc": "Run configuration wizard now"},
+            {"name": "/configure", "usage": "/configure", "desc": "Run configuration wizard now"},
             {"name": "/history", "usage": "/history on|off", "desc": "Toggle saving chat history to unified memory"},
             {"name": "/infomode", "usage": "/infomode concise|verbose", "desc": "Set Usage & Info panel mode"},
             {"name": "/tools", "usage": "/tools on|off|default", "desc": "Toggle per-request tools"},
             {"name": "/websearch", "usage": "/websearch on|off|domains|sources|location", "desc": "Configure OpenAI web search"},
-            {"name": "/reasoning", "usage": "/reasoning low|medium|high|xhigh", "desc": "Set OpenAI reasoning effort (default: medium; xhigh supported on gpt-5.2*)"},
+            {"name": "/reasoning", "usage": "/reasoning low|medium|high|xhigh", "desc": "Set OpenAI reasoning effort (default: medium; xhigh supported on gpt-5.2*)"},
             {"name": "/thinkingbudget", "usage": "/thinkingbudget <tokens>|default", "desc": "Set Anthropic thinking budget tokens for -thinking models"},
             {"name": "/fs", "usage": "/fs workspace|host|default", "desc": "Set filesystem scope"},
             {"name": "/agent-scope", "usage": "/agent-scope <absolute path>", "desc": "Alias for /hostbase (set Agent scope)"},
@@ -2505,31 +2649,28 @@ class ChatCLI:
         ]
         return cmds
-    def _model_presets(self) -> List[Tuple[str, str]]:
-        """Shared list of (model, label) used by settings UI and /model menu."""
-        return [
-            ("gpt-5.2", "OpenAI: gpt-5.2"),
-            ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
-            ("gpt-5", "OpenAI: gpt-5"),
-            ("gpt-5-codex", "OpenAI: gpt-5-codex"),
-            ("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
-            ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
-            ("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
-            ("deepseek-3.2-speciale", "DeepSeek: deepseek 3.2 Speciale (no tools)"),
-            ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
-            ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
-            ("gemini-2.5-pro", "Gemini: gemini-2.5-pro"),
-            ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
-            ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
-            ("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
-            ("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
-            ("grok-4", "xAI: grok-4"),
+    def _model_presets(self) -> List[Tuple[str, str]]:
+        """Shared list of (model, label) used by settings UI and /model menu."""
+        # Ordered in "feelings" order (Recommended first, then Others).
+        # NOTE: We intentionally do not include a "server default" or "custom" option here.
+        return [
+            # Recommended
+            ("gpt-5.2", "OpenAI: gpt-5.2"),
+            ("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
+            ("gpt-5", "OpenAI: gpt-5"),
+            ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
+            ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
+            ("claude-opus-4-6", "Anthropic: claude-opus-4-6 (adaptive thinking supported)"),
+            ("kimi-k2.5", "Kimi: kimi-k2.5"),
             ("grok-code-fast-1", "xAI: grok-code-fast-1"),
-            ("claude-sonnet-4-5-20250929", "Anthropic: claude-sonnet-4-5-20250929 (thinking OFF)"),
-            ("claude-sonnet-4-5-20250929-thinking", "Anthropic: claude-sonnet-4-5-20250929 (thinking ON)"),
-            ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
-            ("claude-opus-4-5-20251101-thinking", "Anthropic: claude-opus-4-5-20251101 (thinking ON)"),
-            ("glm-4.6", "GLM: glm-4.6"),
+            # Others
+            ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
+            ("gpt-5-codex", "OpenAI: gpt-5-codex"),
+            ("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
+            ("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
+            ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
+            ("glm-4.7", "GLM: glm-4.7"),
         ]
     async def open_settings(self, focus: Optional[str] = None) -> None:
@@ -2575,7 +2716,8 @@ class ChatCLI:
             "usage_info_mode": "verbose",
             "reasoning_effort": "medium",
             "retain_native_tool_results": False,
-            "thinking_budget_tokens": None,
+            "thinking_budget_tokens": None,
+            "anthropic_effort": "high",
             "anthropic_cache_ttl": None,
             "web_search_enabled": False,
             "web_search_allowed_domains": [],
@@ -2586,87 +2728,87 @@ class ChatCLI:
         # Model presets list (shared)
         model_presets: List[Tuple[str, str]] = self._model_presets()
-        # Reorder with a Recommended section at the top. Avoid decorative symbols; instead,
-        # annotate recommended models with plain text for clarity.
-        # Recommended set per request: opus 4-5 (no thinking), gemini 3, gpt 5, kimi k2 thinking,
-        # grok code fast 1, and deepseek reasoner 3.2
-        rec_keys = {
-            "deepseek-reasoner-3.2",
-            "claude-opus-4-5-20251101",
-            "gemini-3-pro-preview",
-            "gemini-3-flash-preview",
-            "gpt-5",
-            "gpt-5.2",
-            "kimi-k2-thinking",
-            "grok-code-fast-1",
-        }
-        rec_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m in rec_keys]
-        other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_keys]
-        # Build enum options in the order: Server default, Recommended, Others, Custom
-        model_enum_options: List[Optional[str]] = [None] + [m for (m, _l) in rec_list] + [m for (m, _l) in other_list] + ["custom"]
-        # Build render map without any star/marker characters; use a simple "(recommended)" suffix
-        # for recommended models EXCEPT DeepSeek Reasoner 3.2, which should not display the suffix.
-        render_map: Dict[Any, str] = {None: "Server default"}
-        for m, lbl in rec_list:
-            if m == "deepseek-reasoner-3.2":
-                render_map[m] = lbl
-            else:
-                render_map[m] = f"{lbl} (recommended)"
+        # Reorder with a Recommended section at the top.
+        # IMPORTANT: remove "server default" and "custom" from Settings UI.
+        rec_keys_ordered = [
+            "gpt-5.2",
+            "gpt-5.2-codex",
+            "gpt-5",
+            "gemini-3-pro-preview",
+            "gemini-3-flash-preview",
+            "claude-opus-4-6",
+            "kimi-k2.5",
+            "grok-code-fast-1",
+        ]
+        rec_set = set(rec_keys_ordered)
+        preset_map = {m: lbl for (m, lbl) in model_presets}
+        rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
+        other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
+        # Build enum options in the order: Recommended, Others
+        model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
+        render_map: Dict[Any, str] = {}
+        for m, lbl in rec_list:
+            render_map[m] = lbl
         for m, lbl in other_list:
             render_map[m] = lbl
-        render_map["custom"] = "Custom..."
         # Build items schema
-        items: List[Dict[str, Any]] = [
-            {"label": "General", "type": "group", "items": [
-                {
-                    "id": "save_chat_history",
-                    "label": "Save to unified memory",
-                    "type": "bool",
-                    "description": "When ON, chats sync to your account and appear in the web portal. When OFF, chats are ephemeral (local only)."
-                },
-                {
-                    "id": "model",
-                    "label": "Model",
-                    "type": "enum",
-                    "options": model_enum_options,
-                    "render": render_map,
-                },
-                {"id": "system_prompt", "label": "System prompt", "type": "multiline"},
-                {"id": "usage_info_mode", "label": "Usage panel", "type": "enum", "options": ["concise", "verbose"], "render": {"concise": "Concise", "verbose": "Verbose"}},
-                {"id": "text_verbosity", "label": "Text verbosity", "type": "enum", "options": ["low", "medium", "high"], "render": {"low": "Low", "medium": "Medium", "high": "High"}},
-            ]},
-            {"label": "Tools & Security", "type": "group", "items": [
-                {
-                    "id": "requested_tools",
-                    "label": "Tools",
-                    "type": "enum",
-                    "options": [None, True, False],
-                    "render": {None: "Server default", True: "ON", False: "OFF"},
-                },
-                {
-                    "id": "control_level",
-                    "label": "Control level",
-                    "type": "enum",
-                    "options": [None, 1, 2, 3],
-                    "render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
-                },
-                {"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
-                {"id": "show_tool_calls", "label": "Show tool call logs", "type": "bool"},
-                # Note: options are static for this Settings UI session, so include xhigh unconditionally.
-                # The server will safely downgrade xhigh on models that don't support it.
-                {"id": "reasoning_effort", "label": "OpenAI reasoning effort", "type": "enum", "options": ["low", "medium", "high", "xhigh"], "render": {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh (gpt-5.2* / Codex Max; otherwise downgrades)"}},
-                {"id": "codex_max_allow_all_tools", "label": "Codex Max: allow ALL tools", "type": "bool"},
-                {"id": "retain_native_tool_results", "label": "Retain provider-native tool results across turns", "type": "bool"},
-                {"id": "thinking_budget_tokens", "label": "Anthropic thinking budget (tokens)", "type": "int"},
-                {"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
-                # Agent scope & filesystem controls
-                {"id": "host_base", "label": "Agent scope directory", "type": "text"},
+        items: List[Dict[str, Any]] = [
+            {"label": "General", "type": "group", "items": [
+                {
+                    "id": "save_chat_history",
+                    "label": "Save to unified memory",
+                    "type": "bool",
+                    "description": "When ON, chats sync to your account and appear in the web portal. When OFF, chats are ephemeral (local only)."
+                },
+                {
+                    "id": "model",
+                    "label": "Model",
+                    "type": "enum",
+                    "options": model_enum_options,
+                    "render": render_map,
+                },
+                {"id": "system_prompt", "label": "System prompt", "type": "multiline"},
+                {"id": "usage_info_mode", "label": "Usage panel", "type": "enum", "options": ["concise", "verbose"], "render": {"concise": "Concise", "verbose": "Verbose"}},
+                {"id": "text_verbosity", "label": "Text verbosity", "type": "enum", "options": ["low", "medium", "high"], "render": {"low": "Low", "medium": "Medium", "high": "High"}},
+            ]},
+            {"label": "Tools & Security", "type": "group", "items": [
+                {
+                    "id": "requested_tools",
+                    "label": "Tools",
+                    "type": "enum",
+                    # Default-first: ON, then OFF, then server default.
+                    "options": [True, False, None],
+                    "render": {None: "Server default", True: "ON", False: "OFF"},
+                },
+                {
+                    "id": "control_level",
+                    "label": "Control level",
+                    "type": "enum",
+                    # Default-first: Level 3, then 2, then 1, then server default.
+                    "options": [3, 2, 1, None],
+                    "render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
+                },
+                {"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
+                {"id": "show_tool_calls", "label": "Show tool call logs", "type": "bool"},
+                # Note: options are static for this Settings UI session, so include xhigh unconditionally.
+                # The server will safely downgrade xhigh on models that don't support it.
+                {"id": "reasoning_effort", "label": "OpenAI reasoning effort", "type": "enum", "options": ["low", "medium", "high", "xhigh"], "render": {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh (gpt-5.2* / Codex Max; otherwise downgrades)"}},
+                {"id": "codex_max_allow_all_tools", "label": "Codex Max: allow ALL tools", "type": "bool"},
+                {"id": "retain_native_tool_results", "label": "Retain provider-native tool results across turns", "type": "bool"},
+                {"id": "thinking_budget_tokens", "label": "Anthropic thinking budget (tokens)", "type": "int"},
+                {"id": "anthropic_effort", "label": "Anthropic effort (Opus 4.6/4.5)", "type": "enum", "options": ["low", "medium", "high", "max"], "render": {"low": "Low", "medium": "Medium", "high": "High (default)", "max": "Max (Opus 4.6 only)"}},
+                {"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
+                # Agent scope & filesystem controls
+                {"id": "host_base", "label": "Agent scope directory", "type": "text"},
                 {
                     "id": "fs_scope",
                     "label": "Filesystem scope",
                     "type": "enum",
-                    "options": [None, "workspace", "host"],
+                    # Default-first: host (Agent scope), then workspace, then server default.
+                    "options": ["host", "workspace", None],
                     "render": {
                         None: "Server default",
                         "workspace": "Workspace (sandbox)",
@@ -2677,7 +2819,8 @@ class ChatCLI:
                     "id": "fs_host_mode",
                     "label": "Host mode",
                     "type": "enum",
-                    "options": [None, "any", "cwd", "custom"],
+                    # Default-first: custom (use Agent scope), then cwd, then any, then server default.
+                    "options": ["custom", "cwd", "any", None],
                     "render": {
                         None: "Server default / any",
                         "any": "any (no extra client restriction)",
@@ -2689,12 +2832,19 @@ class ChatCLI:
             {"label": "Code Map", "type": "group", "items": [
                 {"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
             ]},
-            {"label": "Preambles & First-turn", "type": "group", "items": [
-                {"id": "preambles_enabled", "label": "Enable tool call preambles (supported models only)", "type": "bool"},
-                {"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
-                {"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
-                {"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
-            ]},
+            {"label": "Preambles & First-turn", "type": "group", "items": [
+                {
+                    "id": "preambles_enabled",
+                    "label": "Enable tool call preambles (GPT-5 only)",
+                    "type": "bool",
+                    # Only show this control when the *currently selected* model supports it.
+                    # (This updates live as the Model picker changes.)
+                    "visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
+                },
+                {"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
+                {"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
+                {"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
+            ]},
             {"label": "Web search", "type": "group", "items": [
                 {"id": "web_search_enabled", "label": "Enable web search (OpenAI)", "type": "bool"},
                 {"id": "web_search_allowed_domains", "label": "Allowed domains (comma)", "type": "text"},
@@ -2703,6 +2853,20 @@ class ChatCLI:
             ]},
         ]
+        # Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
+        try:
+            if not self._is_gpt_model(self.model):
+                for g in items:
+                    if not isinstance(g, dict):
+                        continue
+                    if (g.get("type") == "group") and (g.get("label") == "General"):
+                        for row in (g.get("items") or []):
+                            if isinstance(row, dict) and row.get("id") == "text_verbosity":
+                                row["options"] = ["medium", "high"]
+                                row["render"] = {"medium": "Medium", "high": "High"}
+        except Exception:
+            pass
         # Prepare initial values with enum placeholder for model when custom text set
         init_for_ui = dict(initial)
         if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
@@ -2714,10 +2878,10 @@ class ChatCLI:
             try:
                 if rid == "model":
                     if value == "custom":
-                        typed = self.ui.prompt(
-                            "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
-                            default=self.model or "",
-                        )
+                        typed = self.ui.prompt(
+                            "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
+                            default=self.model or "",
+                        )
                         working["model"] = typed.strip() or None
                     self._apply_model_side_effects()
                 elif rid == "text_verbosity" and isinstance(value, str):
@@ -2742,14 +2906,14 @@ class ChatCLI:
                             if k.strip() and v.strip():
                                 kv[k.strip()] = v.strip()
                     working[rid] = kv
-                elif rid == "auto_approve" and isinstance(value, str):
-                    working[rid] = [t.strip() for t in value.split(",") if t.strip()]
-                elif rid == "anthropic_cache_ttl":
-                    if value in ("5m", "1h"):
-                        working[rid] = value
-                    else:
-                        working[rid] = None
-                self._apply_settings_dict({rid: working.get(rid)})
+                elif rid == "auto_approve" and isinstance(value, str):
+                    working[rid] = [t.strip() for t in value.split(",") if t.strip()]
+                elif rid == "anthropic_cache_ttl":
+                    if value in ("5m", "1h"):
+                        working[rid] = value
+                    else:
+                        working[rid] = None
+                self._apply_settings_dict({rid: working.get(rid)})
                 if rid == "host_base":
                     try:
                         self._host_base_ephemeral = False
@@ -3057,9 +3221,9 @@ class ChatCLI:
             "Control Level",
             "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
             [
-                ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
-                ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
                 ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
+                ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
+                ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
                 ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
             ],
         )
@@ -3142,14 +3306,14 @@ class ChatCLI:
         except Exception:
             pass
-        # 3) Tool usage preamble (UX hint)
-        try:
-            if bool(getattr(self, "preambles_enabled", False)) and not self._is_codex_model(self.model):
-                blocks.append(
-                    "Tool usage: when you need to read or modify files or run commands, "
-                    "explicitly explain why you're using a tool, what you'll do, and how it "
-                    "advances the user's goal before calling the tool."
-                )
+        # 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
+        try:
+            if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
+                blocks.append(
+                    "Tool usage: when you need to read or modify files or run commands, "
+                    "explicitly explain why you're using a tool, what you'll do, and how it "
+                    "advances the user's goal before calling the tool."
+                )
         except Exception:
             pass
@@ -3171,15 +3335,15 @@ class ChatCLI:
     def _build_messages(self, user_input: str) -> List[Dict[str, str]]:
         msgs: List[Dict[str, str]] = []
-        # Inject a concise Codex developer system prompt for Codex models (optional)
-        try:
-            if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
-                msgs.append({"role": "system", "content": self._codex_system_prompt()})
-        except Exception:
-            pass
-        # Always send the system prompt as-is (do NOT inject the code map here)
-        if self.system_prompt:
-            msgs.append({"role": "system", "content": self.system_prompt})
+        # Inject a concise Codex developer system prompt for Codex models (optional)
+        try:
+            if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
+                msgs.append({"role": "system", "content": self._codex_system_prompt()})
+        except Exception:
+            pass
+        # Always send the system prompt as-is (do NOT inject the code map here)
+        if self.system_prompt:
+            msgs.append({"role": "system", "content": self.system_prompt})
         # Replay prior conversation (excluding any system message already added)
         for msg in self.history:
@@ -3198,80 +3362,80 @@ class ChatCLI:
         except Exception:
             self._last_built_user_content = user_input
-        msgs.append({"role": "user", "content": content})
-        return msgs
-    def _codex_system_prompt(self) -> str:
-        """Minimal developer system prompt for GPT-5 Codex family."""
-        return (
-            "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.\n"
-            "Note: In this CLI, the terminal tool is named 'run_command' (not 'shell'). Use run_command and always set the 'cwd' parameter; avoid using 'cd'.\n\n"
-            "## General\n"
-            "- Always set the 'cwd' param when using run_command. Do not use 'cd' unless absolutely necessary.\n"
-            "- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)\n\n"
-            "## Editing constraints\n"
-            "- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.\n"
-            "- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like \"Assigns the value to the variable\", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.\n"
-            "- You may be in a dirty git worktree.\n"
-            "    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.\n"
-            "    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.\n"
-            "    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.\n"
-            "    * If the changes are in unrelated files, just ignore them and don't revert them.\n"
-            "- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.\n\n"
-            "## Plan tool\n"
-            "When using the planning tool:\n"
-            "- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).\n"
-            "- Do not make single-step plans.\n"
-            "- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.\n\n"
-            "## Codex CLI harness, sandboxing, and approvals\n"
-            "The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.\n"
-            "Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:\n"
-            "- read-only: The sandbox only permits reading files.\n"
-            "- workspace-write: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.\n"
-            "- danger-full-access: No filesystem sandboxing - all commands are permitted.\n"
-            "Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:\n"
-            "- restricted: Requires approval\n"
-            "- enabled: No approval needed\n"
-            "Approvals are your mechanism to get user consent to run shell/terminal commands without the sandbox. Possible configuration options for `approval_policy` are\n"
-            "- untrusted: The harness will escalate most commands for user approval, apart from a limited allowlist of safe 'read' commands.\n"
-            "- on-failure: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.\n"
-            "- on-request: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing.\n"
-            "- never: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user.\n\n"
-            "When requesting approval to execute a command that will require escalated privileges:\n"
-            "  - Provide the `with_escalated_permissions` parameter with the boolean value true (when available).\n"
-            "  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter.\n\n"
-            "## Special user requests\n"
-            "- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.\n"
-            "- If the user asks for a 'review', default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.\n\n"
-            "## Presenting your work and final message\n"
-            "You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.\n"
-            "- Default: be very concise; friendly coding teammate tone.\n"
-            "- Ask only when needed; suggest ideas; mirror the user's style.\n"
-            "- For substantial work, summarize clearly; follow final-answer formatting.\n"
-            "- Skip heavy formatting for simple confirmations.\n"
-            "- Don't dump large files you've written; reference paths only.\n"
-            "- No 'save/copy this file' - User is on the same machine.\n"
-            "- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.\n"
-            "- For code changes:\n"
-            "  * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with 'summary', just jump right in.\n"
-            "  * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.\n"
-            "- File References: When referencing files in your response, include the relevant start line and follow these rules: use inline code for paths; each reference should have a standalone path; accepted: absolute, workspace-relative, a/ or b/ diff prefixes, or bare filename/suffix; optional line/column uses :line[:column] or #LlineCcolumn; do not use URIs; do not provide line ranges.\n\n"
-            "Apply Patch\n"
-            "As shared previously in the GPT-5 prompting guide, use apply_patch for file edits to match the training distribution.\n\n"
-            "Preambles\n"
-            "GPT-5-Codex does not emit preambles. Do not ask for them.\n\n"
-            "Frontend Guidance\n"
-            "Use the following libraries unless the user or repo specifies otherwise:\n"
-            "Framework: React + TypeScript\n"
-            "Styling: Tailwind CSS\n"
-            "Components: shadcn/ui\n"
-            "Icons: lucide-react\n"
-            "Animation: Framer Motion\n"
-            "Charts: Recharts\n"
-            "Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
-        )
-    def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
+        msgs.append({"role": "user", "content": content})
+        return msgs
+    def _codex_system_prompt(self) -> str:
+        """Minimal developer system prompt for GPT-5 Codex family."""
+        return (
+            "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.\n"
+            "Note: In this CLI, the terminal tool is named 'run_command' (not 'shell'). Use run_command and always set the 'cwd' parameter; avoid using 'cd'.\n\n"
+            "## General\n"
+            "- Always set the 'cwd' param when using run_command. Do not use 'cd' unless absolutely necessary.\n"
+            "- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)\n\n"
+            "## Editing constraints\n"
+            "- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.\n"
+            "- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like \"Assigns the value to the variable\", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.\n"
+            "- You may be in a dirty git worktree.\n"
+            "    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.\n"
+            "    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.\n"
+            "    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.\n"
+            "    * If the changes are in unrelated files, just ignore them and don't revert them.\n"
+            "- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.\n\n"
+            "## Plan tool\n"
+            "When using the planning tool:\n"
+            "- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).\n"
+            "- Do not make single-step plans.\n"
+            "- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.\n\n"
+            "## Codex CLI harness, sandboxing, and approvals\n"
+            "The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.\n"
+            "Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:\n"
+            "- read-only: The sandbox only permits reading files.\n"
+            "- workspace-write: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.\n"
+            "- danger-full-access: No filesystem sandboxing - all commands are permitted.\n"
+            "Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:\n"
+            "- restricted: Requires approval\n"
+            "- enabled: No approval needed\n"
+            "Approvals are your mechanism to get user consent to run shell/terminal commands without the sandbox. Possible configuration options for `approval_policy` are\n"
+            "- untrusted: The harness will escalate most commands for user approval, apart from a limited allowlist of safe 'read' commands.\n"
+            "- on-failure: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.\n"
+            "- on-request: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing.\n"
+            "- never: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user.\n\n"
+            "When requesting approval to execute a command that will require escalated privileges:\n"
+            "  - Provide the `with_escalated_permissions` parameter with the boolean value true (when available).\n"
+            "  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter.\n\n"
+            "## Special user requests\n"
+            "- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.\n"
+            "- If the user asks for a 'review', default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.\n\n"
+            "## Presenting your work and final message\n"
+            "You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.\n"
+            "- Default: be very concise; friendly coding teammate tone.\n"
+            "- Ask only when needed; suggest ideas; mirror the user's style.\n"
+            "- For substantial work, summarize clearly; follow final-answer formatting.\n"
+            "- Skip heavy formatting for simple confirmations.\n"
+            "- Don't dump large files you've written; reference paths only.\n"
+            "- No 'save/copy this file' - User is on the same machine.\n"
+            "- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.\n"
+            "- For code changes:\n"
+            "  * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with 'summary', just jump right in.\n"
+            "  * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.\n"
+            "- File References: When referencing files in your response, include the relevant start line and follow these rules: use inline code for paths; each reference should have a standalone path; accepted: absolute, workspace-relative, a/ or b/ diff prefixes, or bare filename/suffix; optional line/column uses :line[:column] or #LlineCcolumn; do not use URIs; do not provide line ranges.\n\n"
+            "Apply Patch\n"
+            "As shared previously in the GPT-5 prompting guide, use apply_patch for file edits to match the training distribution.\n\n"
+            "Preambles\n"
+            "GPT-5-Codex does not emit preambles. Do not ask for them.\n\n"
+            "Frontend Guidance\n"
+            "Use the following libraries unless the user or repo specifies otherwise:\n"
+            "Framework: React + TypeScript\n"
+            "Styling: Tailwind CSS\n"
+            "Components: shadcn/ui\n"
+            "Icons: lucide-react\n"
+            "Animation: Framer Motion\n"
+            "Charts: Recharts\n"
+            "Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
+        )
+    def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
         """Build provider-native messages for Kimi preserving prior assistant reasoning_content.
         Includes prior provider-native turns and the current user message with first-turn injections.
         """
@@ -3289,31 +3453,31 @@ class ChatCLI:
         for m in (self._kimi_raw_history or []):
             raw.append(m)
         # Append current user message
-        raw.append({"role": "user", "content": content})
-        return raw
-    def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
-        """Normalize Gemini provider-native history.
-        Ensures we only send a flat list of dicts back to the server.
-        This prevents accidental nesting like [[{...}, {...}]] which the
-        google-genai SDK rejects with pydantic union validation errors.
-        """
-        out: List[Dict[str, Any]] = []
-        if not isinstance(rpm, list):
-            return out
-        for item in rpm:
-            if item is None:
-                continue
-            if isinstance(item, list):
-                # Flatten one level
-                for sub in item:
-                    if isinstance(sub, dict):
-                        out.append(dict(sub))
-                continue
-            if isinstance(item, dict):
-                out.append(dict(item))
-        return out
+        raw.append({"role": "user", "content": content})
+        return raw
+    def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
+        """Normalize Gemini provider-native history.
+        Ensures we only send a flat list of dicts back to the server.
+        This prevents accidental nesting like [[{...}, {...}]] which the
+        google-genai SDK rejects with pydantic union validation errors.
+        """
+        out: List[Dict[str, Any]] = []
+        if not isinstance(rpm, list):
+            return out
+        for item in rpm:
+            if item is None:
+                continue
+            if isinstance(item, list):
+                # Flatten one level
+                for sub in item:
+                    if isinstance(sub, dict):
+                        out.append(dict(sub))
+                continue
+            if isinstance(item, dict):
+                out.append(dict(item))
+        return out
     def _build_working_memory_injection(self) -> Optional[str]:
         try:
@@ -3483,54 +3647,54 @@ class ChatCLI:
         except Exception:
             return ""
-    def _approval_prompt_ui(self, label: str, args: Dict[str, Any]) -> str:
+    def _approval_prompt_ui(self, label: str, args: Dict[str, Any]) -> str:
         """Interactive approval prompt for Level 2.
         Uses the same highlighted, arrow-key-driven menu UX as the rest of the CLI
         when TTY input is available, and falls back to numeric input otherwise.
         Returns one of: "once", "session", "always", "deny".
         """
-        self.ui.print(f"\n[Level 2] Approval required for: {label}")
-        # Show a compact summary
-        summary = self._tool_summary(label.split(":")[0], args)
-        self.ui.print(summary, style=self.ui.theme["dim"])
-        # Show what we're actually approving (key fields), so the user can make an informed decision.
-        try:
-            base_tool = (label.split(":")[0] if isinstance(label, str) and ":" in label else label) or ""
-        except Exception:
-            base_tool = label or ""
-        bt = str(base_tool).strip().lower()
-        try:
-            if bt == "run_command":
-                cmd = args.get("cmd")
-                cwd = args.get("cwd")
-                timeout = args.get("timeout")
-                if cmd is not None:
-                    self.ui.print(f"cmd: {self._clip(cmd, 400)}", style=self.ui.theme["dim"])
-                if cwd is not None:
-                    self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
-                if timeout is not None:
-                    self.ui.print(f"timeout: {timeout}", style=self.ui.theme["dim"])
-            elif bt in ("write_file", "append_file"):
-                path = args.get("path")
-                content = args.get("content") or ""
-                self.ui.print(f"path: {path}", style=self.ui.theme["dim"])
-                try:
-                    b = len(str(content).encode("utf-8", errors="replace"))
-                except Exception:
-                    b = None
-                if b is not None:
-                    self.ui.print(f"content_bytes: {b}", style=self.ui.theme["dim"])
-            elif bt == "apply_patch":
-                cwd = args.get("cwd")
-                dry = bool(args.get("dry_run", False))
-                if cwd is not None:
-                    self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
-                self.ui.print(f"dry_run: {dry}", style=self.ui.theme["dim"])
-        except Exception:
-            # Never block approvals on formatting
-            pass
+        self.ui.print(f"\n[Level 2] Approval required for: {label}")
+        # Show a compact summary
+        summary = self._tool_summary(label.split(":")[0], args)
+        self.ui.print(summary, style=self.ui.theme["dim"])
+        # Show what we're actually approving (key fields), so the user can make an informed decision.
+        try:
+            base_tool = (label.split(":")[0] if isinstance(label, str) and ":" in label else label) or ""
+        except Exception:
+            base_tool = label or ""
+        bt = str(base_tool).strip().lower()
+        try:
+            if bt == "run_command":
+                cmd = args.get("cmd")
+                cwd = args.get("cwd")
+                timeout = args.get("timeout")
+                if cmd is not None:
+                    self.ui.print(f"cmd: {self._clip(cmd, 400)}", style=self.ui.theme["dim"])
+                if cwd is not None:
+                    self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
+                if timeout is not None:
+                    self.ui.print(f"timeout: {timeout}", style=self.ui.theme["dim"])
+            elif bt in ("write_file", "append_file"):
+                path = args.get("path")
+                content = args.get("content") or ""
+                self.ui.print(f"path: {path}", style=self.ui.theme["dim"])
+                try:
+                    b = len(str(content).encode("utf-8", errors="replace"))
+                except Exception:
+                    b = None
+                if b is not None:
+                    self.ui.print(f"content_bytes: {b}", style=self.ui.theme["dim"])
+            elif bt == "apply_patch":
+                cwd = args.get("cwd")
+                dry = bool(args.get("dry_run", False))
+                if cwd is not None:
+                    self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
+                self.ui.print(f"dry_run: {dry}", style=self.ui.theme["dim"])
+        except Exception:
+            # Never block approvals on formatting
+            pass
         choices: List[Tuple[str, str]] = [
             ("once", "Approve once"),
@@ -3788,6 +3952,39 @@ class ChatCLI:
         if n.lower() in ("context", "to_next"):
             return "Context handoff to next turn"
+        # Universal context handoff tool (server-side). Show what files the model chose to keep.
+        if n.lower() == "context_handoff":
+            try:
+                rd = (result or {}).get("data") if isinstance(result, dict) else None
+                rd = rd if isinstance(rd, dict) else {}
+                reason = (rd.get("reason") if isinstance(rd, dict) else None) or ""
+                mem_path = (rd.get("memory_path") if isinstance(rd, dict) else None) or ""
+                keep_files = rd.get("keep_files") if isinstance(rd, dict) else None
+                files: List[str] = []
+                if isinstance(keep_files, list):
+                    for x in keep_files:
+                        if isinstance(x, str) and x.strip():
+                            files.append(x.strip())
+                # Limit for single-line readability
+                shown = files[:5]
+                more = max(0, len(files) - len(shown))
+                if shown:
+                    files_part = ", ".join(shown) + (f" (+{more} more)" if more else "")
+                    return (
+                        "Context handoff saved"
+                        + (f" (reason={reason})" if str(reason).strip() else "")
+                        + f". Files kept in context: {files_part}"
+                        + (f". memory_path: {mem_path}" if str(mem_path).strip() else "")
+                    )
+                return (
+                    "Context handoff saved"
+                    + (f" (reason={reason})" if str(reason).strip() else "")
+                    + ". No files were selected to be kept in context"
+                    + (f". memory_path: {mem_path}" if str(mem_path).strip() else "")
+                )
+            except Exception:
+                return "Context handoff saved"
         def _arg_path() -> str:
             p = a.get("path") or data.get("path") or ""
             try:
@@ -3897,6 +4094,35 @@ class ChatCLI:
             return
         data = result.get("data", {}) or {}
+        # Universal context_handoff tool: show where it was saved and what files were chosen.
+        if name == "context_handoff":
+            try:
+                reason = data.get("reason")
+                mem_path = data.get("memory_path")
+                keep_files = data.get("keep_files")
+                self.ui.print(
+                    f"⇐ [{self.ui.theme['tool_result']}]✅ Context handoff saved[/{self.ui.theme['tool_result']}]"
+                    + (f" (reason={reason})" if isinstance(reason, str) and reason.strip() else "")
+                )
+                if isinstance(mem_path, str) and mem_path.strip():
+                    self.ui.print(f"memory_path: {mem_path}", style=self.ui.theme["dim"])  # type: ignore
+                if isinstance(keep_files, list) and keep_files:
+                    cleaned = [str(x).strip() for x in keep_files if isinstance(x, (str, int, float)) and str(x).strip()]
+                    if cleaned:
+                        self.ui.print("Files kept in context:", style=self.ui.theme["dim"])  # type: ignore
+                        for p in cleaned[:25]:
+                            self.ui.print(f"- {p}", style=self.ui.theme["dim"])  # type: ignore
+                        if len(cleaned) > 25:
+                            self.ui.print(f"... +{len(cleaned)-25} more", style=self.ui.theme["dim"])  # type: ignore
+                    else:
+                        self.ui.print("No files were selected to be kept in context.", style=self.ui.theme["dim"])  # type: ignore
+                else:
+                    self.ui.print("No files were selected to be kept in context.", style=self.ui.theme["dim"])  # type: ignore
+            except Exception:
+                # Fall through to default renderer below.
+                pass
+            return
         if name == "read_file":
             path = data.get("path", "")
             content = data.get("content", "") or ""
@@ -4302,25 +4528,25 @@ class ChatCLI:
             await self.open_settings()
             return True
-        if cmd.startswith("/history"):
-            parts = cmd.split(maxsplit=1)
-            if len(parts) == 1:
-                self.ui.info("Usage: /history on|off")
-                self.ui.info(f"Current: {'ON (synced to unified memory)' if self.save_chat_history else 'OFF (ephemeral)'}")
-                return True
-            arg = parts[1].strip().lower()
-            if arg == "on":
-                self.save_chat_history = True
-                self.ui.success("Chat history will be saved to unified memory.")
-            elif arg == "off":
-                self.save_chat_history = False
-                self.ui.success("Chat history is now ephemeral (local only, not synced).")
-            else:
-                self.ui.warn("Usage: /history on|off")
-                return True
-            self.save_settings()
-            return True
+        if cmd.startswith("/history"):
+            parts = cmd.split(maxsplit=1)
+            if len(parts) == 1:
+                self.ui.info("Usage: /history on|off")
+                self.ui.info(f"Current: {'ON (synced to unified memory)' if self.save_chat_history else 'OFF (ephemeral)'}")
+                return True
+            arg = parts[1].strip().lower()
+            if arg == "on":
+                self.save_chat_history = True
+                self.ui.success("Chat history will be saved to unified memory.")
+            elif arg == "off":
+                self.save_chat_history = False
+                self.ui.success("Chat history is now ephemeral (local only, not synced).")
+            else:
+                self.ui.warn("Usage: /history on|off")
+                return True
+            self.save_settings()
+            return True
         if cmd.startswith("/tools"):
             parts = cmd.split(maxsplit=1)
             if len(parts) == 1:
@@ -4433,25 +4659,25 @@ class ChatCLI:
             self.ui.warn("Unknown /websearch subcommand. Use on, off, domains, sources, or location.")
             return True
-        if cmd.startswith("/reasoning"):
-            parts = cmd.split(maxsplit=1)
-            if len(parts) == 1:
-                self.ui.info("Usage: /reasoning low|medium|high|xhigh")
-                self.ui.info(f"Current: {self.reasoning_effort}")
-                return True
-            arg = (parts[1] or "").strip().lower()
-            if arg in ("low", "medium", "high", "xhigh"):
-                self.reasoning_effort = arg
-                if arg == "xhigh" and not self._supports_xhigh_reasoning_effort(self.model):
-                    # Keep the user's preference, but be explicit about server-side downgrading.
-                    self.ui.warn(
-                        "Note: xhigh is only applied on models that support it (e.g., gpt-5.2* / gpt-5.1-codex-max). The server may downgrade it on other models."
-                    )
-                self.ui.success(f"Reasoning effort set to: {self.reasoning_effort}")
-                self.save_settings()
-            else:
-                self.ui.warn("Invalid value. Use: low, medium, high, or xhigh")
-            return True
+        if cmd.startswith("/reasoning"):
+            parts = cmd.split(maxsplit=1)
+            if len(parts) == 1:
+                self.ui.info("Usage: /reasoning low|medium|high|xhigh")
+                self.ui.info(f"Current: {self.reasoning_effort}")
+                return True
+            arg = (parts[1] or "").strip().lower()
+            if arg in ("low", "medium", "high", "xhigh"):
+                self.reasoning_effort = arg
+                if arg == "xhigh" and not self._supports_xhigh_reasoning_effort(self.model):
+                    # Keep the user's preference, but be explicit about server-side downgrading.
+                    self.ui.warn(
+                        "Note: xhigh is only applied on models that support it (e.g., gpt-5.2* / gpt-5.1-codex-max). The server may downgrade it on other models."
+                    )
+                self.ui.success(f"Reasoning effort set to: {self.reasoning_effort}")
+                self.save_settings()
+            else:
+                self.ui.warn("Invalid value. Use: low, medium, high, or xhigh")
+            return True
         if cmd.startswith("/thinkingbudget"):
             parts = cmd.split(maxsplit=1)
@@ -4628,6 +4854,14 @@ class ChatCLI:
             except Exception:
                 pass
             self.ui.success("System prompt set.")
+            # OpenAI threaded state is invalid once the system prompt changes.
+            try:
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+                self._openai_input_items = []
+                self._openai_last_sent_input_items = None
+            except Exception:
+                pass
             self.save_settings()
             return True
@@ -4641,7 +4875,7 @@ class ChatCLI:
             self.ui.success(f"Thread title set to: {self.thread_name}")
             return True
-        if cmd == "/clear":
+        if cmd == "/clear":
             self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
             self._did_inject_codebase_map = False
             try:
@@ -4650,13 +4884,17 @@ class ChatCLI:
                 pass
             # Reset provider-native histories
             try:
-                self.messages_for_save = []
-                if not self.save_chat_history:
-                    self.thread_uid = None
-                self._kimi_raw_history = []
-                self._gemini_raw_history = []
-            except Exception:
-                pass
+                self.messages_for_save = []
+                if not self.save_chat_history:
+                    self.thread_uid = None
+                self._kimi_raw_history = []
+                self._gemini_raw_history = []
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+                self._openai_input_items = []
+                self._openai_last_sent_input_items = None
+            except Exception:
+                pass
             # Reset local cumulative token counters on session clear
             self._cum_input_tokens = 0
             self._cum_output_tokens = 0
@@ -4857,9 +5095,9 @@ class ChatCLI:
                 # We have a fallback map (repo copy) but none at host base
                 self.ui.print("Code Map: fallback example in use (host base missing CODEBASE_MAP.md). It will be prefixed.", style=self.ui.theme["dim"])
             else:
-                self.ui.print("Code Map: missing at host base — toggle with /map on|off", style=self.ui.theme["dim"])
-        # History status
-        hist_status = "ON (synced)" if self.save_chat_history else "OFF (ephemeral)"
+                self.ui.print("Code Map: missing at host base — toggle with /map on|off", style=self.ui.theme["dim"])
+        # History status
+        hist_status = "ON (synced)" if self.save_chat_history else "OFF (ephemeral)"
         self.ui.print(f"Chat history: {hist_status}", style=self.ui.theme["dim"])
         # If a host base is configured and code map injection is enabled, offer to generate when missing
         try:
@@ -4909,20 +5147,48 @@ class ChatCLI:
         pt_completer = self._commands_word_completer()
         while True:
             try:
-                if self._pt_session is not None:
-                    # Use prompt_toolkit with inline completion when available
-                    # Pass completer per-prompt to ensure latest catalog
-                    user_input = await self._pt_session.prompt_async(
-                        "You: ",
-                        completer=pt_completer,
-                        complete_while_typing=True,
-                    )
-                    user_input = user_input.strip()
-                elif self._input_engine:
-                    # Do not add continuation prefixes on new lines
-                    user_input = self._input_engine.read_message("You: ", "")
+                pending_edit = self._pending_user_edit
+                edit_mode = pending_edit is not None
+                # prompt_toolkit is intentionally not used.
+                # Always prefer our dependency-free input engine when available.
+                if self._input_engine:
+                    if edit_mode:
+                        # The low-level input engine doesn't support prefilling.
+                        # Show the prior message and let the user paste a replacement.
+                        try:
+                            self.ui.print(
+                                "\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):",
+                                style=self.ui.theme["warn"],  # type: ignore
+                            )
+                            self.ui.print(str(pending_edit), style=self.ui.theme["dim"])  # type: ignore
+                        except Exception:
+                            pass
+                        new_txt = self._read_multiline_input("Edit> ")
+                        user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
+                    else:
+                        # Do not add continuation prefixes on new lines.
+                        user_input = self._input_engine.read_message("You: ", "")
                 else:
-                    user_input = self._read_multiline_input("You: ")
+                    # Last-resort fallback.
+                    if edit_mode:
+                        try:
+                            self.ui.print(
+                                "\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):",
+                                style=self.ui.theme["warn"],  # type: ignore
+                            )
+                            self.ui.print(str(pending_edit), style=self.ui.theme["dim"])  # type: ignore
+                        except Exception:
+                            pass
+                        new_txt = self._read_multiline_input("Edit> ")
+                        user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
+                    else:
+                        user_input = self._read_multiline_input("You: ")
+                # Clear pending edit state after we successfully collected input.
+                if edit_mode:
+                    self._pending_user_edit = None
+                    self._pending_turn_snapshot = None
                 # Successful read resets interrupt window
                 self._last_interrupt_ts = None
             except KeyboardInterrupt:
@@ -4969,6 +5235,29 @@ class ChatCLI:
                     continue
             try:
+                # Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
+                # This is critical for first-turn injections (code map/custom note/working memory)
+                # which are applied by mutating flags during payload construction.
+                self._pending_turn_snapshot = {
+                    "history": copy.deepcopy(self.history),
+                    "messages_for_save": copy.deepcopy(self.messages_for_save),
+                    "kimi_raw": copy.deepcopy(self._kimi_raw_history),
+                    "gemini_raw": copy.deepcopy(self._gemini_raw_history),
+                    "openai_prev": getattr(self, "_openai_previous_response_id", None),
+                    "openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
+                    "openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
+                    "openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
+                    "inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
+                    "did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
+                    "did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
+                    "did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
+                    "memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
+                    "last_built_user_content": getattr(self, "_last_built_user_content", None),
+                }
+                # Clear any stale in-flight dispatch context at turn start.
+                self._inflight_dispatch = None
                 # Record user message for local/server save
                 if self.save_chat_history:
                     self.messages_for_save.append({
@@ -4992,6 +5281,67 @@ class ChatCLI:
                     assistant_text = await self._stream_once(user_input)
                 finally:
                     self._busy = False
+            except KeyboardInterrupt:
+                # Ctrl+C mid-stream / mid-tool: do not exit the CLI.
+                # Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
+                try:
+                    await self._cancel_inflight_dispatch()
+                except (Exception, BaseException):
+                    pass
+                # Restore state to *before* this turn started.
+                try:
+                    snap = self._pending_turn_snapshot or {}
+                    if isinstance(snap.get("history"), list):
+                        self.history = snap.get("history")
+                    if isinstance(snap.get("messages_for_save"), list):
+                        self.messages_for_save = snap.get("messages_for_save")
+                    if isinstance(snap.get("kimi_raw"), list):
+                        self._kimi_raw_history = snap.get("kimi_raw")
+                    if isinstance(snap.get("gemini_raw"), list):
+                        self._gemini_raw_history = snap.get("gemini_raw")
+                    if "openai_prev" in snap:
+                        self._openai_previous_response_id = snap.get("openai_prev")
+                    if isinstance(snap.get("openai_ids"), list):
+                        self._openai_response_id_history = snap.get("openai_ids")
+                    if isinstance(snap.get("openai_input_items"), list):
+                        self._openai_input_items = snap.get("openai_input_items")
+                    if "openai_last_sent_input_items" in snap:
+                        self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
+                    if "inflight_dispatch" in snap:
+                        self._inflight_dispatch = snap.get("inflight_dispatch")
+                    if "did_inject_codebase_map" in snap:
+                        self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
+                    if "did_inject_custom_first_turn" in snap:
+                        self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
+                    if "did_inject_working_memory" in snap:
+                        self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
+                    if "memory_paths_for_first_turn" in snap:
+                        self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
+                    self._last_built_user_content = snap.get("last_built_user_content")
+                except Exception:
+                    pass
+                # Clear any transient indicator line and land on a fresh prompt line.
+                try:
+                    sys.stdout.write("\r\x1b[2K\n")
+                    sys.stdout.flush()
+                except Exception:
+                    try:
+                        self.ui.print()
+                    except Exception:
+                        pass
+                try:
+                    supports = self._provider_supports_native_retention(self.model)
+                except Exception:
+                    supports = False
+                if supports:
+                    self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
+                else:
+                    self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
+                self._pending_user_edit = user_input
+                continue
             except httpx.HTTPStatusError as he:
                 try:
                     if he.response is not None:
@@ -5024,14 +5374,14 @@ class ChatCLI:
         auth_action_key = "logout" if self.auth_user else "login"
         auth_action_label = f"🔓 Logout ({self.auth_user})" if self.auth_user else "🔑 Login"
         choices = [
-            ("toggle_tools", f"🧰 Toggle Tools ({self._tools_label()}) - Enable/disable file tools per request (ON: request tools, OFF: no tools, DEFAULT: server setting)"),
+            ("toggle_tools", f"🧰 Toggle Tools ({self._tools_label()}) - Enable/disable file tools per request (ON: request tools, OFF: no tools, DEFAULT: server setting)"),
             ("toggle_history", f"🕘 Toggle History ({'ON' if self.save_chat_history else 'OFF'}) - Save chats to unified memory"),
             ("set_scope", f"📦 Set Filesystem Scope (current: {self._fs_label()}) - Choose workspace (sandbox) or host (full filesystem access if allowed)"),
             ("set_host_base", f"🖥️  Set Agent Scope (current: {self.host_base or '(none)'}) - Absolute path the agent can access when host scope is enabled"),
             ("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
             ("set_auto_approve", f"⚙️  Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
             (auth_action_key, auth_action_label),
-            ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5, gemini-2.5-pro, grok-4, deepseek-chat) or use Change Model to type one"),
+            ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2.5, etc.)"),
             ("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
             ("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
             ("clear_history", "🧹 Clear History - Reset chat history"),
@@ -5067,22 +5417,21 @@ class ChatCLI:
         has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
         is_effectively_free = (self.is_free_tier and not has_credits)
-        # Recommended models (ordered list for shuffling)
-        # Curated list per request (include Codex Max as recommended)
-        rec_keys = [
-            "deepseek-reasoner-3.2",
-            "claude-opus-4-5-20251101",
-            "gemini-3-pro-preview",
-            "gemini-3-flash-preview",
-            "gpt-5",
-            "gpt-5.2",
-            "kimi-k2-thinking",
-            "grok-code-fast-1",
-        ]
-        # If effectively free, shuffle kimi-k2-thinking to the top
+        # Recommended models ("feelings" order)
+        rec_keys = [
+            "gpt-5.2",
+            "gpt-5.2-codex",
+            "gpt-5",
+            "gemini-3-pro-preview",
+            "gemini-3-flash-preview",
+            "claude-opus-4-6",
+            "kimi-k2.5",
+            "grok-code-fast-1",
+        ]
+        # If effectively free, shuffle kimi-k2.5 to the top
         if is_effectively_free:
-            target = "kimi-k2-thinking"
+            target = "kimi-k2.5"
             if target in rec_keys:
                 rec_keys.remove(target)
                 rec_keys.insert(0, target)
@@ -5117,8 +5466,7 @@ class ChatCLI:
             suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
             choices.append((m, f"{lbl}{suffix}"))
-        choices.append(("default", "Server Default (no override)"))
-        choices.append(("custom", "Custom (enter a model name)"))
+        # Per issue list: do not surface "server default" or "custom" in this picker.
         # Render and select using the unified highlighted picker
         picked: Optional[str] = None
@@ -5133,27 +5481,15 @@ class ChatCLI:
             picked = str(val)
             # Enforce free tier restrictions
-            if picked not in ("default", "custom") and is_effectively_free and is_paid_model(picked):
+            if is_effectively_free and is_paid_model(picked):
                 self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
                 continue
             break
         # Apply selection
-        if picked == "default":
-            self.model = None
-            self.ui.info("Model cleared; server default will be used.")
-        elif picked == "custom":
-            typed = self.ui.prompt(
-                "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
-                default=self.model or "",
-            )
-            self.model = self._resolve_model_alias(typed.strip() or None)
-            if not self.model:
-                self.ui.info("Model cleared; server default will be used.")
-        else:
-            self.model = picked
-            self.ui.success(f"Model set to: {self.model}")
+        self.model = picked
+        self.ui.success(f"Model set to: {self.model}")
         self._apply_model_side_effects()
         self.save_settings()
@@ -5176,12 +5512,12 @@ class ChatCLI:
             self.save_settings()
             return True
-        if choice == "toggle_history":
-            self.save_chat_history = not self.save_chat_history
-            self.ui.success(f"History set to: {'ON' if self.save_chat_history else 'OFF'}")
-            self.save_settings()
-            return True
+        if choice == "toggle_history":
+            self.save_chat_history = not self.save_chat_history
+            self.ui.success(f"History set to: {'ON' if self.save_chat_history else 'OFF'}")
+            self.save_settings()
+            return True
         if choice == "set_scope":
             await self.set_scope_menu()
             return True
@@ -5256,31 +5592,38 @@ class ChatCLI:
                 pass
             # Clear provider-native histories on system reset
             try:
-                self.messages_for_save = []
-                if not self.save_chat_history:
-                    self.thread_uid = None
+                self.messages_for_save = []
+                if not self.save_chat_history:
+                    self.thread_uid = None
                 self._kimi_raw_history = []
+                self._gemini_raw_history = []
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+                self._openai_input_items = []
+                self._openai_last_sent_input_items = None
             except Exception:
                 pass
             self.ui.success("System prompt set.")
             self.save_settings()
             return True
-        if choice == "clear_history":
+        if choice == "clear_history":
             self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
             self._did_inject_codebase_map = False
             try:
                 self._did_inject_custom_first_turn = False
             except Exception:
                 pass
-            try:
-                self.messages_for_save = []
-                if not self.save_chat_history:
-                    self.thread_uid = None
-                self._kimi_raw_history = []
-                self._gemini_raw_history = []
-            except Exception:
-                pass
+            try:
+                self.messages_for_save = []
+                if not self.save_chat_history:
+                    self.thread_uid = None
+                self._kimi_raw_history = []
+                self._gemini_raw_history = []
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+            except Exception:
+                pass
             # Reset local cumulative token counters on session clear
             self._cum_input_tokens = 0
             self._cum_output_tokens = 0
@@ -5334,10 +5677,81 @@ class ChatCLI:
     # ----------------------- SSE Streaming loop ------------------------
     async def _stream_once(self, user_input: str) -> str:
-        # Build request payload
-        payload: Dict[str, Any] = {"messages": self._build_messages(user_input)}
-        if self.model:
-            payload["model"] = self.model
+        # Build request payload.
+        # OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
+        # `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
+        if self._is_openai_model(self.model):
+            msgs: List[Dict[str, str]] = []
+            # Codex developer prompt (if enabled) + system prompt
+            try:
+                if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
+                    msgs.append({"role": "system", "content": self._codex_system_prompt()})
+            except Exception:
+                pass
+            if self.system_prompt:
+                msgs.append({"role": "system", "content": self.system_prompt})
+            # Apply first-turn-only injections to the current user content
+            content = user_input
+            prefix = self._build_first_turn_injection(user_input)
+            if prefix:
+                content = f"{prefix}\n\n{user_input}"
+            try:
+                self._last_built_user_content = content
+            except Exception:
+                self._last_built_user_content = user_input
+            msgs.append({"role": "user", "content": content})
+            payload: Dict[str, Any] = {"messages": msgs}
+            # Build OpenAI native input items (authoritative for the server OpenAI path).
+            try:
+                if isinstance(self._openai_input_items, list) and self._openai_input_items:
+                    items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
+                else:
+                    # Seed with system prompts for the first OpenAI turn.
+                    items = []
+                    try:
+                        if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
+                            items.append({"role": "system", "content": self._codex_system_prompt()})
+                    except Exception:
+                        pass
+                    if self.system_prompt:
+                        items.append({"role": "system", "content": self.system_prompt})
+                items.append({"role": "user", "content": content})
+                payload["openai_input_items"] = self._sanitize_openai_items(items)
+                self._openai_last_sent_input_items = copy.deepcopy(items)
+            except Exception:
+                # If this fails for any reason, fall back to normal message-based history.
+                self._openai_last_sent_input_items = None
+            # OpenAI Threading: DISABLED. We use full manual input item replay now.
+            # if "openai_input_items" not in payload:
+            #    try:
+            #        if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
+            #            payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
+            #    except Exception:
+            #        pass
+            try:
+                if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
+                    payload["openai_response_id_history"] = list(self._openai_response_id_history)
+            except Exception:
+                pass
+        else:
+            payload = {"messages": self._build_messages(user_input)}
+        if self.model:
+            payload["model"] = self.model
+        # OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
+        try:
+            if self._is_openai_model(self.model):
+                if (
+                    isinstance(getattr(self, "_openai_response_id_history", None), list)
+                    and self._openai_response_id_history
+                    and "openai_response_id_history" not in payload
+                ):
+                    payload["openai_response_id_history"] = list(self._openai_response_id_history)
+        except Exception:
+            pass
         # Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
         try:
             if self.terminal_id:
@@ -5360,60 +5774,69 @@ class ChatCLI:
             payload["host_roots_mode"] = mode
             if mode in ("cwd", "custom") and self.host_base:
                 payload["host_allowed_dirs"] = [self.host_base]
-        # Controls and approvals
-        if self.control_level in (1, 2, 3):
-            payload["control_level"] = self.control_level
-        # Auto-approve tools at L2: merge explicit auto_approve with trust lists so
-        # "trust for this session" / "always trust" choices also suppress repeat server prompts.
-        try:
-            auto_tools: List[str] = []
-            for seq in (self.auto_approve or [], self.trust_tools_session or [], self.trust_tools_always or []):
-                for t in seq:
-                    if isinstance(t, str) and t.strip():
-                        k = t.strip()
-                        if k not in auto_tools:
-                            auto_tools.append(k)
-            if auto_tools:
-                payload["auto_approve"] = auto_tools
-        except Exception:
-            if self.auto_approve:
-                payload["auto_approve"] = self.auto_approve
-        # Auto-approve run_command base commands at L2 (hybrid approval + trust).
-        try:
-            cmd_bases: List[str] = []
-            for seq in (self.trust_cmds_session or [], self.trust_cmds_always or []):
-                for c in seq:
-                    if isinstance(c, str) and c.strip():
-                        k = c.strip().lower()
-                        if k not in cmd_bases:
-                            cmd_bases.append(k)
-            if cmd_bases:
-                payload["auto_approve_command_bases"] = cmd_bases
-        except Exception:
-            pass
-        # Reasoning effort (OpenAI reasoning models only; server will ignore for others).
-        # Let the server decide whether xhigh is supported for the selected (or default) model.
-        try:
-            if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
-                payload["reasoning_effort"] = self.reasoning_effort
-            else:
-                payload["reasoning_effort"] = "medium"
-        except Exception:
-            payload["reasoning_effort"] = "medium"
+        # Controls and approvals
+        if self.control_level in (1, 2, 3):
+            payload["control_level"] = self.control_level
+        # Auto-approve tools at L2: merge explicit auto_approve with trust lists so
+        # "trust for this session" / "always trust" choices also suppress repeat server prompts.
+        try:
+            auto_tools: List[str] = []
+            for seq in (self.auto_approve or [], self.trust_tools_session or [], self.trust_tools_always or []):
+                for t in seq:
+                    if isinstance(t, str) and t.strip():
+                        k = t.strip()
+                        if k not in auto_tools:
+                            auto_tools.append(k)
+            if auto_tools:
+                payload["auto_approve"] = auto_tools
+        except Exception:
+            if self.auto_approve:
+                payload["auto_approve"] = self.auto_approve
+        # Auto-approve run_command base commands at L2 (hybrid approval + trust).
+        try:
+            cmd_bases: List[str] = []
+            for seq in (self.trust_cmds_session or [], self.trust_cmds_always or []):
+                for c in seq:
+                    if isinstance(c, str) and c.strip():
+                        k = c.strip().lower()
+                        if k not in cmd_bases:
+                            cmd_bases.append(k)
+            if cmd_bases:
+                payload["auto_approve_command_bases"] = cmd_bases
+        except Exception:
+            pass
+        # Reasoning effort (OpenAI reasoning models only; server will ignore for others).
+        # Let the server decide whether xhigh is supported for the selected (or default) model.
+        try:
+            if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
+                payload["reasoning_effort"] = self.reasoning_effort
+            else:
+                payload["reasoning_effort"] = "medium"
+        except Exception:
+            payload["reasoning_effort"] = "medium"
         # Anthropic thinking-mode budget (server ignores unless model ends with -thinking)
         try:
             if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
-                payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
-        except Exception:
-            pass
-        # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
-        try:
-            if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
-                payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
-        except Exception:
-            pass
+                payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
+        except Exception:
+            pass
+        # Anthropic effort (Opus 4.6/4.5). Default to high.
+        try:
+            ae = getattr(self, "anthropic_effort", None)
+            ae2 = str(ae or "high").strip().lower()
+            if ae2 in ("low", "medium", "high", "max"):
+                payload["anthropic_effort"] = ae2
+        except Exception:
+            payload["anthropic_effort"] = "high"
+        # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
+        try:
+            if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
+                payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
+        except Exception:
+            pass
         # Text verbosity and tool preambles preference (UI hints for the server)
         try:
@@ -5421,8 +5844,10 @@ class ChatCLI:
                 payload["text_verbosity"] = self.text_verbosity
         except Exception:
             pass
+        # Preambles are a GPT-5-only UX toggle.
         try:
-            payload["preambles_enabled"] = bool(self.preambles_enabled)
+            if self._supports_preambles(self.model):
+                payload["preambles_enabled"] = bool(self.preambles_enabled)
         except Exception:
             pass
@@ -5519,23 +5944,23 @@ class ChatCLI:
                     headers["X-Request-Timeout"] = str(int(req_timeout_hint))
                 except Exception:
                     pass
-                # If using a Kimi model, include provider-native messages to preserve reasoning_content
-                try:
-                    if isinstance(self.model, str) and self.model.startswith("kimi-"):
-                        req_payload = dict(req_payload)
-                        req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
-                except Exception:
-                    pass
-                # If using a Gemini model, include provider-native contents to preserve thought signatures
-                # and strict tool-call chains across HTTP turns.
-                try:
-                    if isinstance(self.model, str) and self.model.startswith("gemini-"):
-                        req_payload = dict(req_payload)
-                        hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
-                        if hist:
-                            req_payload["raw_provider_messages"] = hist
-                except Exception:
-                    pass
+                # If using a Kimi model, include provider-native messages to preserve reasoning_content
+                try:
+                    if isinstance(self.model, str) and self.model.startswith("kimi-"):
+                        req_payload = dict(req_payload)
+                        req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
+                except Exception:
+                    pass
+                # If using a Gemini model, include provider-native contents to preserve thought signatures
+                # and strict tool-call chains across HTTP turns.
+                try:
+                    if isinstance(self.model, str) and self.model.startswith("gemini-"):
+                        req_payload = dict(req_payload)
+                        hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
+                        if hist:
+                            req_payload["raw_provider_messages"] = hist
+                except Exception:
+                    pass
                 async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
                     async with client.stream("POST", self.stream_url, json=req_payload, headers=headers, follow_redirects=True) as resp:
                         if resp.status_code == 429:
@@ -5639,18 +6064,63 @@ class ChatCLI:
                                     self._rawlog_write(msg)
                             except Exception:
                                 pass
-                        # Idle "thinking" indicator shown while waiting for the next event (first tokens or next tool call)
-                        indicator_task = None
-                        indicator_active = False
-                        indicator_started = False  # used only to adjust leading newline behavior on first assistant header
-                        # Track whether we're currently positioned at the start of a fresh line.
-                        # This prevents double-newlines between back-to-back tool events.
-                        at_line_start = True
-                        # Mode: animate or static (default static for stability)
-                        try:
-                            _animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
-                        except Exception:
-                            _animate_indicator = False
+                        # Idle "thinking" indicator shown while waiting for the next event (first tokens or next tool call)
+                        indicator_task = None
+                        indicator_active = False
+                        indicator_started = False  # used only to adjust leading newline behavior on first assistant header
+                        # Track whether we're currently positioned at the start of a fresh line.
+                        # This prevents double-newlines between back-to-back tool events.
+                        at_line_start = True
+                        # --- Tool call in-place status (issuelist.md #7) ---
+                        # We render a single transient line for the current tool call (no trailing newline)
+                        # so the later tool.result SUCCESS/FAILURE line can replace it in-place.
+                        tool_status_active = False
+                        tool_status_call_id = None
+                        def _tool_status_clear_line() -> None:
+                            """Clear the current line (best-effort) and return to column 0."""
+                            nonlocal at_line_start
+                            try:
+                                sys.stdout.write("\r\x1b[2K")
+                                sys.stdout.flush()
+                            except Exception:
+                                pass
+                            at_line_start = True
+                        def _tool_status_show(call_id: Any, line: str) -> None:
+                            """Show the transient tool status line (no newline)."""
+                            nonlocal tool_status_active, tool_status_call_id, at_line_start
+                            if not self.show_tool_calls:
+                                return
+                            tool_status_active = True
+                            tool_status_call_id = str(call_id) if call_id is not None else None
+                            try:
+                                if not at_line_start:
+                                    sys.stdout.write("\n")
+                                sys.stdout.write("\r\x1b[2K" + str(line))
+                                sys.stdout.flush()
+                                at_line_start = False
+                            except Exception:
+                                # Fallback: degrade to a normal printed line
+                                try:
+                                    self.ui.print(str(line))
+                                except Exception:
+                                    pass
+                                at_line_start = True
+                        def _tool_status_stop() -> None:
+                            """Remove the transient tool status line and clear tracking."""
+                            nonlocal tool_status_active, tool_status_call_id
+                            if tool_status_active:
+                                _tool_status_clear_line()
+                            tool_status_active = False
+                            tool_status_call_id = None
+                        # Mode: animate or static (default static for stability)
+                        try:
+                            _animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
+                        except Exception:
+                            _animate_indicator = False
                         async def _thinking_indicator_loop(chosen_word: str, spacing: int = 3) -> None:
                             """Animate a transient thinking word on a single line until indicator_active becomes False.
@@ -5689,8 +6159,8 @@ class ChatCLI:
                                 except Exception:
                                     pass
-                        async def _indicator_start() -> None:
-                            nonlocal indicator_task, indicator_active, indicator_started, at_line_start
+                        async def _indicator_start() -> None:
+                            nonlocal indicator_task, indicator_active, indicator_started, at_line_start
                             # Choose a random word and spacing each start
                             word_bank = list(self._thinking_words or ["thinking", "working..."])
                             if not word_bank:
@@ -5718,28 +6188,28 @@ class ChatCLI:
                                     c = colors[i % len(colors)]
                                     out_chars.append(f"\x1b[38;5;{c}m{ch}\x1b[0m")
                                 line = " " + joiner.join(out_chars) + " "
-                                # Start on a dedicated new line so we never clobber prior output.
-                                # If we're already at a fresh line, don't emit an extra newline (prevents
-                                # visible blank lines between back-to-back tool events).
-                                if not at_line_start:
-                                    sys.stdout.write("\n")
-                                sys.stdout.write("\r\x1b[2K" + line)
-                                sys.stdout.flush()
-                                at_line_start = False
+                                # Start on a dedicated new line so we never clobber prior output.
+                                # If we're already at a fresh line, don't emit an extra newline (prevents
+                                # visible blank lines between back-to-back tool events).
+                                if not at_line_start:
+                                    sys.stdout.write("\n")
+                                sys.stdout.write("\r\x1b[2K" + line)
+                                sys.stdout.flush()
+                                at_line_start = False
                                 # File debug
                                 try:
                                     self.ui.debug_log(f"indicator.start word='{chosen}' animate={_animate_indicator}")
                                 except Exception:
                                     pass
-                            except Exception:
-                                try:
-                                    if not at_line_start:
-                                        sys.stdout.write("\n")
-                                    sys.stdout.write("\r\x1b[2K" + (" " + joiner.join(list(str(chosen))) + " "))
-                                    sys.stdout.flush()
-                                    at_line_start = False
-                                except Exception:
-                                    pass
+                            except Exception:
+                                try:
+                                    if not at_line_start:
+                                        sys.stdout.write("\n")
+                                    sys.stdout.write("\r\x1b[2K" + (" " + joiner.join(list(str(chosen))) + " "))
+                                    sys.stdout.flush()
+                                    at_line_start = False
+                                except Exception:
+                                    pass
                             indicator_started = True
                             if _animate_indicator:
                                 try:
@@ -5749,8 +6219,8 @@ class ChatCLI:
                                     indicator_task = None
                                     indicator_active = False
-                        async def _indicator_stop(clear: bool = False) -> None:
-                            nonlocal indicator_task, indicator_active, indicator_started, at_line_start
+                        async def _indicator_stop(clear: bool = False) -> None:
+                            nonlocal indicator_task, indicator_active, indicator_started, at_line_start
                             # Only clear the line if an indicator was actually started.
                             was_started = bool(indicator_started)
                             indicator_active = False
@@ -5768,21 +6238,21 @@ class ChatCLI:
                             finally:
                                 indicator_task = None
                                 # Default to not clearing to avoid erasing streamed content lines
-                                if was_started and clear:
-                                    try:
-                                        sys.stdout.write("\r\x1b[2K")
-                                        sys.stdout.flush()
-                                        at_line_start = True
-                                    except Exception:
-                                        pass
-                                elif was_started:
-                                    # Move to the next line to separate subsequent output
-                                    try:
-                                        sys.stdout.write("\n")
-                                        sys.stdout.flush()
-                                        at_line_start = True
-                                    except Exception:
-                                        pass
+                                if was_started and clear:
+                                    try:
+                                        sys.stdout.write("\r\x1b[2K")
+                                        sys.stdout.flush()
+                                        at_line_start = True
+                                    except Exception:
+                                        pass
+                                elif was_started:
+                                    # Move to the next line to separate subsequent output
+                                    try:
+                                        sys.stdout.write("\n")
+                                        sys.stdout.flush()
+                                        at_line_start = True
+                                    except Exception:
+                                        pass
                                 # Reset started flag after stopping
                                 indicator_started = False
                                 try:
@@ -5812,26 +6282,26 @@ class ChatCLI:
                             except json.JSONDecodeError:
                                 data = {"_raw": data_raw}
-                            if event == "session.started":
-                                # Keep indicator until first token; do not stop here
-                                session_id = data.get("session_id")
-                                lvl = data.get("level")
-                                scope = data.get("fs_scope")
-                                self.ui.print(f"[session] id={session_id} level={lvl} scope={scope}", style=self.ui.theme["dim"])
-                                self._log_line({"event": "session.started", "server_session_id": session_id, "level": lvl, "fs_scope": scope})
-                                # Record the server-authoritative level for this stream so approvals work
-                                # even when the user left control_level as "server default".
-                                try:
-                                    if isinstance(lvl, int):
-                                        self._current_turn["level"] = int(lvl)
-                                    elif isinstance(lvl, str) and str(lvl).strip().isdigit():
-                                        self._current_turn["level"] = int(str(lvl).strip())
-                                except Exception:
-                                    pass
-                                try:
-                                    await self._ws_broadcast("session.started", data)
-                                except Exception:
-                                    pass
+                            if event == "session.started":
+                                # Keep indicator until first token; do not stop here
+                                session_id = data.get("session_id")
+                                lvl = data.get("level")
+                                scope = data.get("fs_scope")
+                                self.ui.print(f"[session] id={session_id} level={lvl} scope={scope}", style=self.ui.theme["dim"])
+                                self._log_line({"event": "session.started", "server_session_id": session_id, "level": lvl, "fs_scope": scope})
+                                # Record the server-authoritative level for this stream so approvals work
+                                # even when the user left control_level as "server default".
+                                try:
+                                    if isinstance(lvl, int):
+                                        self._current_turn["level"] = int(lvl)
+                                    elif isinstance(lvl, str) and str(lvl).strip().isdigit():
+                                        self._current_turn["level"] = int(str(lvl).strip())
+                                except Exception:
+                                    pass
+                                try:
+                                    await self._ws_broadcast("session.started", data)
+                                except Exception:
+                                    pass
                                 try:
                                     self._current_turn["session_id"] = session_id
                                 except Exception:
@@ -5842,16 +6312,16 @@ class ChatCLI:
                                     pass
                                 continue
-                            elif event == "message.delta":
-                                # Stop any transient indicator before printing content and clear the line
-                                try:
-                                    await _indicator_stop(clear=True)
-                                except Exception:
-                                    pass
-                                # Indicator line cleared; we're now at the start of a fresh line.
-                                at_line_start = True
-                                text = data.get("text", "")
-                                if text:
+                            elif event == "message.delta":
+                                # Stop any transient indicator before printing content and clear the line
+                                try:
+                                    await _indicator_stop(clear=True)
+                                except Exception:
+                                    pass
+                                # Indicator line cleared; we're now at the start of a fresh line.
+                                at_line_start = True
+                                text = data.get("text", "")
+                                if text:
                                     try:
                                         _deltas_total += 1
                                     except Exception:
@@ -5886,23 +6356,23 @@ class ChatCLI:
                                                 print(str(model_label) + ": ", end="", flush=True)
                                             except Exception:
                                                 pass
-                                        header_printed = True
-                                        at_line_start = False
-                                        try:
-                                            self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
-                                        except Exception:
-                                            pass
+                                        header_printed = True
+                                        at_line_start = False
+                                        try:
+                                            self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
+                                        except Exception:
+                                            pass
                                     assistant_buf.append(text)
                                     # Print the token delta raw to avoid any wrapping/markup side-effects
                                     try:
                                         self.ui.print(text, style=self.ui.theme["assistant"], end="")
-                                    except Exception:
-                                        try:
-                                            print(str(text), end="", flush=True)
-                                        except Exception:
-                                            pass
-                                    at_line_start = False
-                                    # Deep debug: show each delta's size/preview
+                                    except Exception:
+                                        try:
+                                            print(str(text), end="", flush=True)
+                                        except Exception:
+                                            pass
+                                    at_line_start = False
+                                    # Deep debug: show each delta's size/preview
                                     try:
                                         if DEBUG_SSE:
                                             prev = text[:40].replace("\n", "\\n")
@@ -5921,44 +6391,68 @@ class ChatCLI:
                                     except Exception:
                                         pass
-                            elif event == "tool.call":
-                                # Ensure any prior indicator state is reset cleanly, then restart
-                                # a fresh indicator while waiting for the tool to run.
-                                try:
-                                    await _indicator_stop(clear=True)
-                                except Exception:
-                                    pass
-                                # If we were mid-line (e.g., streamed assistant text), break cleanly before
-                                # showing the transient tool-wait indicator.
-                                if not at_line_start:
-                                    try:
-                                        self.ui.print()
-                                    except Exception:
-                                        try:
-                                            print()
-                                        except Exception:
-                                            pass
-                                    at_line_start = True
-                                name = data.get("name")
-                                args = data.get("args", {}) or {}
-                                call_id = data.get("call_id")
+                            elif event == "tool.call":
+                                # Ensure any prior indicator state is reset cleanly, then restart
+                                # a fresh indicator while waiting for the tool to run.
+                                try:
+                                    await _indicator_stop(clear=True)
+                                except Exception:
+                                    pass
+                                # If we were mid-line (e.g., streamed assistant text), break cleanly before
+                                # showing the transient tool-wait indicator.
+                                if not at_line_start:
+                                    try:
+                                        self.ui.print()
+                                    except Exception:
+                                        try:
+                                            print()
+                                        except Exception:
+                                            pass
+                                    at_line_start = True
+                                name = data.get("name")
+                                args = data.get("args", {}) or {}
+                                call_id = data.get("call_id")
                                 try:
                                     self.ui.debug_log(f"tool.call name='{name}' call_id={call_id}")
                                 except Exception:
                                     pass
-                                # Do NOT show the initial tool.call line per UX request; results will be
-                                # rendered on tool.result. We still keep internal state and WS broadcasts.
-                                # While the tool executes (server or client), show a subtle thinking
-                                # indicator so users see progress during potentially long operations.
+                                # issuelist.md #7:
+                                # Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
                                 try:
-                                    # Do not start the indicator if we're in the middle of assistant token streaming
-                                    if (not streaming_assistant) and bool(getattr(self, "_thinking_indicator_enabled", False)):
-                                        await _indicator_start()
+                                    # Clear any previous transient status line (shouldn't happen, but keep stable)
+                                    _tool_status_stop()
                                 except Exception:
                                     pass
+                                try:
+                                    tool_name = str(name or "").strip()
+                                    label = self._tool_concise_label(
+                                        tool_name,
+                                        args if isinstance(args, dict) else {},
+                                        None,
+                                    )
+                                    try:
+                                        model_prefix = (
+                                            self._current_turn.get("model")
+                                            or self._last_used_model
+                                            or self.model
+                                            or "(server default)"
+                                        )
+                                    except Exception:
+                                        model_prefix = self.model or "(server default)"
+                                    ORANGE = "\x1b[38;5;214m"
+                                    WHITE = "\x1b[97m"
+                                    RESET = "\x1b[0m"
+                                    status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
+                                    _tool_status_show(call_id, status_line)
+                                except Exception:
+                                    # Last-resort fallback: print something rather than crash streaming.
+                                    try:
+                                        self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
+                                    except Exception:
+                                        pass
                                 # Count tool calls
                                 try:
                                     tool_calls += 1
@@ -5982,6 +6476,11 @@ class ChatCLI:
                                     pass
                             elif event == "approval.request":
+                                # Don't let the transient [RUNNING] line collide with interactive prompts.
+                                try:
+                                    _tool_status_stop()
+                                except Exception:
+                                    pass
                                 # First reply wins (web or CLI)
                                 await self._handle_approval_request(client, session_id, data)
                                 continue
@@ -6030,23 +6529,35 @@ class ChatCLI:
                                 name = str(data.get("name"))
                                 result = data.get("result", {}) or {}
                                 call_id = data.get("call_id")
-                                # Stop any indicator before rendering results
-                                try:
-                                    await _indicator_stop(clear=True)
-                                except Exception:
-                                    pass
-                                # Ensure tool result starts on a fresh line if assistant text was mid-line.
-                                # Don't rely on assistant_buf ending with "\n" because UI.ensure_newline()
-                                # prints without mutating the buffer, which can cause repeated blank lines.
-                                if not at_line_start:
-                                    try:
-                                        self.ui.print()
-                                    except Exception:
-                                        try:
-                                            print()
-                                        except Exception:
-                                            pass
-                                at_line_start = True
+                                # If we previously rendered a transient [RUNNING] line for this tool call,
+                                # clear it now so the SUCCESS/FAILURE line prints in the same place.
+                                try:
+                                    if tool_status_active:
+                                        # Best-effort match on call_id (some providers may omit it).
+                                        if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
+                                            _tool_status_stop()
+                                except Exception:
+                                    try:
+                                        _tool_status_stop()
+                                    except Exception:
+                                        pass
+                                # Stop any indicator before rendering results
+                                try:
+                                    await _indicator_stop(clear=True)
+                                except Exception:
+                                    pass
+                                # Ensure tool result starts on a fresh line if assistant text was mid-line.
+                                # Don't rely on assistant_buf ending with "\n" because UI.ensure_newline()
+                                # prints without mutating the buffer, which can cause repeated blank lines.
+                                if not at_line_start:
+                                    try:
+                                        self.ui.print()
+                                    except Exception:
+                                        try:
+                                            print()
+                                        except Exception:
+                                            pass
+                                at_line_start = True
                                 # Concise default: one professional, natural-language line per tool call.
                                 if not self.ui.verbose:
                                     try:
@@ -6102,18 +6613,18 @@ class ChatCLI:
                                     except Exception:
                                         # Fall back to legacy renderer on unexpected issues
                                         self._render_tool_result(name, result, call_id=call_id)
-                                else:
-                                    # Verbose mode retains the richer summary with previews
-                                    self._render_tool_result(name, result, call_id=call_id)
-                                # Tool result output is line-oriented; after rendering we should be positioned
-                                # at the start of a fresh line so the next tool.call indicator doesn't insert
-                                # an extra blank line.
-                                at_line_start = True
-                                try:
-                                    await self._ws_broadcast("tool.result", {"name": name, "result": result, "call_id": call_id})
-                                except Exception:
-                                    pass
+                                else:
+                                    # Verbose mode retains the richer summary with previews
+                                    self._render_tool_result(name, result, call_id=call_id)
+                                # Tool result output is line-oriented; after rendering we should be positioned
+                                # at the start of a fresh line so the next tool.call indicator doesn't insert
+                                # an extra blank line.
+                                at_line_start = True
+                                try:
+                                    await self._ws_broadcast("tool.result", {"name": name, "result": result, "call_id": call_id})
+                                except Exception:
+                                    pass
                                 # For Kimi, append provider-native tool result to raw history so it's threaded correctly
                                 try:
                                     if bool(getattr(self, "retain_native_tool_results", False)) and isinstance(self.model, str) and self.model.startswith("kimi-") and call_id:
@@ -6136,7 +6647,7 @@ class ChatCLI:
                                     pass
                                 # Do not auto-restart the indicator here; wait for the next model event
-                            elif event == "tool.dispatch":
+                            elif event == "tool.dispatch":
                                 # Client-executed tool flow
                                 if not HAS_LOCAL_TOOLS:
                                     self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
@@ -6153,6 +6664,17 @@ class ChatCLI:
                                 args = data.get("args", {}) or {}
                                 job_token = data.get("job_token")
                                 reqp = data.get("requested_policy", {}) or {}
+                                # Track in-flight dispatch so Ctrl+C can cancel quickly.
+                                try:
+                                    self._inflight_dispatch = {
+                                        "session_id": session_id_d,
+                                        "call_id": call_id,
+                                        "job_token": job_token,
+                                        "name": name,
+                                    }
+                                except Exception:
+                                    pass
                                 if DEBUG_SSE:
                                     self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
@@ -6163,16 +6685,16 @@ class ChatCLI:
                                 except Exception:
                                     pass
-                                # Level gating and CLI approvals (Level 2)
-                                try:
-                                    lvl = int(self.control_level) if isinstance(self.control_level, int) else None
-                                    if lvl is None:
-                                        # Prefer the server-reported level from session.started
-                                        sl = self._current_turn.get("level") if isinstance(self._current_turn, dict) else None
-                                        if isinstance(sl, int):
-                                            lvl = int(sl)
-                                except Exception:
-                                    lvl = None
+                                # Level gating and CLI approvals (Level 2)
+                                try:
+                                    lvl = int(self.control_level) if isinstance(self.control_level, int) else None
+                                    if lvl is None:
+                                        # Prefer the server-reported level from session.started
+                                        sl = self._current_turn.get("level") if isinstance(self._current_turn, dict) else None
+                                        if isinstance(sl, int):
+                                            lvl = int(sl)
+                                except Exception:
+                                    lvl = None
                                 # Hard block at Level 1 for anything other than read/list
                                 if lvl == 1:
                                     disallowed = str(name) not in ("read_file", "list_dir")
@@ -6313,84 +6835,84 @@ class ChatCLI:
                                         result = local_append_file(args.get("path", ""), args.get("content", ""), policy)
                                     elif name == "list_dir":
                                         result = local_list_dir(args.get("path", ""), policy)
-                                    elif name == "run_command":
-                                        # Command allow policy:
-                                        # - L1: blocked earlier
-                                        # - L2: approval required; once approved, allow any base command
-                                        # - L3: no approval; allow any base command
-                                        # Use '*' wildcard (supported by henosis_cli_tools.run_command).
-                                        if lvl in (2, 3):
-                                            allow_csv = "*"
-                                        else:
-                                            # Legacy: intersect server + local allowlists
-                                            req_allow = (reqp.get("command_allow_csv") or "").strip()
-                                            local_allow = os.getenv("HENOSIS_ALLOW_COMMANDS", "")
-                                            if req_allow and local_allow:
-                                                req_set = {c.strip().lower() for c in req_allow.split(",") if c.strip()}
-                                                loc_set = {c.strip().lower() for c in local_allow.split(",") if c.strip()}
-                                                allow_csv = ",".join(sorted(req_set & loc_set))
-                                            else:
-                                                allow_csv = local_allow or req_allow or ""
-                                            # Include trusted commands from CLI settings (session + always)
-                                            try:
-                                                allow_set = {c.strip().lower() for c in allow_csv.split(",") if c.strip()}
-                                                for k in (self.trust_cmds_session or []):
-                                                    allow_set.add(str(k).strip().lower())
-                                                for k in (self.trust_cmds_always or []):
-                                                    allow_set.add(str(k).strip().lower())
-                                                allow_csv = ",".join(sorted(allow_set))
-                                            except Exception:
-                                                pass
-                                        timeout = args.get("timeout", None)
-                                        result = local_run_command(args.get("cmd", ""), policy, cwd=args.get("cwd", "."), timeout=timeout, allow_commands_csv=allow_csv)
-                                        # Legacy allowlist retry logic removed for L2/L3 (we allow '*').
-                                    elif name == "apply_patch":
-                                        result = local_apply_patch(
-                                            patch=args.get("patch", ""),
-                                            policy=policy,
-                                            cwd=args.get("cwd", "."),
-                                            lenient=bool(args.get("lenient", True)),
-                                            dry_run=bool(args.get("dry_run", False)),
-                                            backup=bool(args.get("backup", True)),
-                                            safeguard_max_lines=int(args.get("safeguard_max_lines", 3000) or 3000),
-                                            safeguard_confirm=bool(args.get("safeguard_confirm", False)),
-                                        )
-                                    elif name == "planning":
-                                        # Persist plan under plans/ at the current root (workspace or host base)
-                                        try:
-                                            plan_text = str(args.get("plan", "") or "").strip()
-                                            ctx_text = args.get("context")
-                                            if not plan_text:
-                                                result = {"ok": False, "error": "plan is required"}
-                                            else:
-                                                base = policy.workspace_base if policy.scope != "host" else (policy.host_base or Path(os.getcwd()).resolve())
-                                                plans_dir = Path(base) / "plans"
-                                                plans_dir.mkdir(parents=True, exist_ok=True)
-                                                from datetime import datetime as _dt
-                                                import re as _re, uuid as _uuid
-                                                ts = _dt.utcnow().strftime("%Y%m%d-%H%M%S")
-                                                first_line = plan_text.splitlines()[0] if plan_text else "plan"
-                                                slug = _re.sub(r"[^a-zA-Z0-9_-]+", "-", first_line).strip("-") or "plan"
-                                                slug = slug[:40]
-                                                fname = f"plan-{ts}-{_uuid.uuid4().hex[:6]}-{slug}.md"
-                                                fpath = plans_dir / fname
-                                                body_lines = [f"# Plan ({ts} UTC)\n"]
-                                                if ctx_text:
-                                                    body_lines.append("## Context\n")
-                                                    body_lines.append(str(ctx_text).strip() + "\n\n")
-                                                body_lines.append("## Steps\n")
-                                                body_lines.append(plan_text.rstrip() + "\n")
-                                                content = "\n".join(body_lines)
-                                                with fpath.open("w", encoding="utf-8", newline="") as f:
-                                                    f.write(content)
-                                                result = {"ok": True, "data": {"path": str(fpath), "bytes_written": len(content.encode('utf-8'))}}
-                                        except Exception as _pe:
-                                            result = {"ok": False, "error": str(_pe)}
-                                    elif name == "string_replace":
-                                        result = local_string_replace(
-                                            pattern=args.get("pattern", ""),
-                                            replacement=args.get("replacement", ""),
-                                            policy=policy,
+                                    elif name == "run_command":
+                                        # Command allow policy:
+                                        # - L1: blocked earlier
+                                        # - L2: approval required; once approved, allow any base command
+                                        # - L3: no approval; allow any base command
+                                        # Use '*' wildcard (supported by henosis_cli_tools.run_command).
+                                        if lvl in (2, 3):
+                                            allow_csv = "*"
+                                        else:
+                                            # Legacy: intersect server + local allowlists
+                                            req_allow = (reqp.get("command_allow_csv") or "").strip()
+                                            local_allow = os.getenv("HENOSIS_ALLOW_COMMANDS", "")
+                                            if req_allow and local_allow:
+                                                req_set = {c.strip().lower() for c in req_allow.split(",") if c.strip()}
+                                                loc_set = {c.strip().lower() for c in local_allow.split(",") if c.strip()}
+                                                allow_csv = ",".join(sorted(req_set & loc_set))
+                                            else:
+                                                allow_csv = local_allow or req_allow or ""
+                                            # Include trusted commands from CLI settings (session + always)
+                                            try:
+                                                allow_set = {c.strip().lower() for c in allow_csv.split(",") if c.strip()}
+                                                for k in (self.trust_cmds_session or []):
+                                                    allow_set.add(str(k).strip().lower())
+                                                for k in (self.trust_cmds_always or []):
+                                                    allow_set.add(str(k).strip().lower())
+                                                allow_csv = ",".join(sorted(allow_set))
+                                            except Exception:
+                                                pass
+                                        timeout = args.get("timeout", None)
+                                        result = local_run_command(args.get("cmd", ""), policy, cwd=args.get("cwd", "."), timeout=timeout, allow_commands_csv=allow_csv)
+                                        # Legacy allowlist retry logic removed for L2/L3 (we allow '*').
+                                    elif name == "apply_patch":
+                                        result = local_apply_patch(
+                                            patch=args.get("patch", ""),
+                                            policy=policy,
+                                            cwd=args.get("cwd", "."),
+                                            lenient=bool(args.get("lenient", True)),
+                                            dry_run=bool(args.get("dry_run", False)),
+                                            backup=bool(args.get("backup", True)),
+                                            safeguard_max_lines=int(args.get("safeguard_max_lines", 3000) or 3000),
+                                            safeguard_confirm=bool(args.get("safeguard_confirm", False)),
+                                        )
+                                    elif name == "planning":
+                                        # Persist plan under plans/ at the current root (workspace or host base)
+                                        try:
+                                            plan_text = str(args.get("plan", "") or "").strip()
+                                            ctx_text = args.get("context")
+                                            if not plan_text:
+                                                result = {"ok": False, "error": "plan is required"}
+                                            else:
+                                                base = policy.workspace_base if policy.scope != "host" else (policy.host_base or Path(os.getcwd()).resolve())
+                                                plans_dir = Path(base) / "plans"
+                                                plans_dir.mkdir(parents=True, exist_ok=True)
+                                                from datetime import datetime as _dt
+                                                import re as _re, uuid as _uuid
+                                                ts = _dt.utcnow().strftime("%Y%m%d-%H%M%S")
+                                                first_line = plan_text.splitlines()[0] if plan_text else "plan"
+                                                slug = _re.sub(r"[^a-zA-Z0-9_-]+", "-", first_line).strip("-") or "plan"
+                                                slug = slug[:40]
+                                                fname = f"plan-{ts}-{_uuid.uuid4().hex[:6]}-{slug}.md"
+                                                fpath = plans_dir / fname
+                                                body_lines = [f"# Plan ({ts} UTC)\n"]
+                                                if ctx_text:
+                                                    body_lines.append("## Context\n")
+                                                    body_lines.append(str(ctx_text).strip() + "\n\n")
+                                                body_lines.append("## Steps\n")
+                                                body_lines.append(plan_text.rstrip() + "\n")
+                                                content = "\n".join(body_lines)
+                                                with fpath.open("w", encoding="utf-8", newline="") as f:
+                                                    f.write(content)
+                                                result = {"ok": True, "data": {"path": str(fpath), "bytes_written": len(content.encode('utf-8'))}}
+                                        except Exception as _pe:
+                                            result = {"ok": False, "error": str(_pe)}
+                                    elif name == "string_replace":
+                                        result = local_string_replace(
+                                            pattern=args.get("pattern", ""),
+                                            replacement=args.get("replacement", ""),
+                                            policy=policy,
                                             cwd=args.get("cwd", "."),
                                             file_globs=[str(g) for g in (args.get("file_globs") or [])],
                                             exclude_globs=[str(e) for e in (args.get("exclude_globs") or [])],
@@ -6442,18 +6964,82 @@ class ChatCLI:
                                             self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
                                 except Exception as e:
                                     self.ui.warn(f"tools.callback error: {e}")
+                                finally:
+                                    try:
+                                        # Clear in-flight dispatch context when we send a callback.
+                                        if isinstance(self._inflight_dispatch, dict):
+                                            if str(self._inflight_dispatch.get("call_id")) == str(call_id):
+                                                self._inflight_dispatch = None
+                                    except Exception:
+                                        pass
-                            elif event == "message.completed":
+                            elif event == "message.completed":
                                 # Safety: this block handles only 'message.completed'.
                                 usage = data.get("usage", {})
-                                model_used = data.get("model") or self.model
-                                # Gemini: server may include an authoritative provider-native history snapshot.
-                                try:
-                                    if isinstance(model_used, str) and model_used.startswith("gemini-"):
-                                        rpm = data.get("raw_provider_messages")
-                                        self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
-                                except Exception:
-                                    pass
+                                model_used = data.get("model") or self.model
+                                # OpenAI: persist the last response id so future turns can use previous_response_id.
+                                try:
+                                    if self._is_openai_model(model_used):
+                                        # Prefer the explicit per-turn id list when provided by the server.
+                                        ids = data.get("openai_response_ids")
+                                        if isinstance(ids, list) and ids:
+                                            for x in ids:
+                                                if not isinstance(x, str):
+                                                    continue
+                                                xs = x.strip()
+                                                if not xs:
+                                                    continue
+                                                try:
+                                                    if xs not in self._openai_response_id_history:
+                                                        self._openai_response_id_history.append(xs)
+                                                except Exception:
+                                                    pass
+                                        rid = data.get("openai_previous_response_id")
+                                        if isinstance(rid, str) and rid.strip():
+                                            self._openai_previous_response_id = rid.strip()
+                                            try:
+                                                if rid.strip() not in self._openai_response_id_history:
+                                                    self._openai_response_id_history.append(rid.strip())
+                                            except Exception:
+                                                pass
+                                        # OpenAI manual-state replay: server returns the delta items appended
+                                        # during this turn (reasoning/tool calls/tool outputs). Persist them.
+                                        try:
+                                            delta = data.get("openai_delta_items")
+                                            if isinstance(delta, list):
+                                                base_items = (
+                                                    self._openai_last_sent_input_items
+                                                    if isinstance(self._openai_last_sent_input_items, list)
+                                                    else copy.deepcopy(self._openai_input_items)
+                                                )
+                                                # Normalize to a list of dicts where possible; keep unknown shapes as-is.
+                                                merged: List[Any] = []
+                                                try:
+                                                    merged.extend(list(base_items or []))
+                                                except Exception:
+                                                    merged = list(base_items or []) if base_items is not None else []
+                                                merged.extend(delta)
+                                                # Store only dict-like items (server is expected to send dicts)
+                                                cleaned: List[Dict[str, Any]] = []
+                                                for it in merged:
+                                                    if isinstance(it, dict):
+                                                        cleaned.append(dict(it))
+                                                self._openai_input_items = cleaned
+                                        except Exception:
+                                            pass
+                                        finally:
+                                            # Clear per-turn sent snapshot
+                                            self._openai_last_sent_input_items = None
+                                except Exception:
+                                    pass
+                                # Gemini: server may include an authoritative provider-native history snapshot.
+                                try:
+                                    if isinstance(model_used, str) and model_used.startswith("gemini-"):
+                                        rpm = data.get("raw_provider_messages")
+                                        self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
+                                except Exception:
+                                    pass
                                 # Mark completion for retry controller
                                 try:
                                     last_completed = True
@@ -6749,13 +7335,13 @@ class ChatCLI:
                                 # Compact style: include reasoning effort inline with model name when applicable
                                 try:
                                     effort_seg = ""
-                                    if self._is_openai_reasoning_model(model_used):
-                                        # Convert low|medium|high|xhigh -> Low|Medium|High|XHigh for display
-                                        lvl = str(self.reasoning_effort or "medium").strip().lower()
-                                        if lvl not in ("low", "medium", "high", "xhigh"):
-                                            lvl = "medium"
-                                        disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
-                                        effort_seg = f" {disp}"
+                                    if self._is_openai_reasoning_model(model_used):
+                                        # Convert low|medium|high|xhigh -> Low|Medium|High|XHigh for display
+                                        lvl = str(self.reasoning_effort or "medium").strip().lower()
+                                        if lvl not in ("low", "medium", "high", "xhigh"):
+                                            lvl = "medium"
+                                        disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
+                                        effort_seg = f" {disp}"
                                 except Exception:
                                     effort_seg = ""
                                 model_only_line = f"model: {model_used or '(unknown)'}{effort_seg}"
@@ -6955,47 +7541,47 @@ class ChatCLI:
                                 except Exception:
                                     pass
-                                # Anthropic prompt caching banner when detected (reads @10% input rate; creation billed at TTL multiplier)
-                                try:
-                                    price = self._resolve_price(model_used)
-                                    provider = (price.get("provider") or "").lower()
-                                    if provider == "anthropic":
-                                        cr = int(usage.get("cache_read_input_tokens", 0) or 0)
-                                        cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
-                                        # Optional breakdown
-                                        cc_5m = 0
-                                        cc_1h = 0
-                                        try:
-                                            ccmap = usage.get("cache_creation") if isinstance(usage, dict) else None
-                                            if isinstance(ccmap, dict):
-                                                cc_5m = int(ccmap.get("ephemeral_5m_input_tokens", 0) or 0)
-                                                cc_1h = int(ccmap.get("ephemeral_1h_input_tokens", 0) or 0)
-                                        except Exception:
-                                            cc_5m = cc_5m or 0
-                                            cc_1h = cc_1h or 0
-                                        if (cr > 0) or (cc > 0) or (cc_5m > 0) or (cc_1h > 0):
-                                            # Build a concise line similar to OpenAI banner
-                                            line = f"Billing: Anthropic prompt cache read {int(cr)} token(s) @10% input rate"
-                                            if (cc_5m > 0) or (cc_1h > 0):
-                                                line += f" | created {int(cc_5m)} @1.25x + {int(cc_1h)} @2x"
-                                            else:
-                                                if cc > 0:
-                                                    line += f" | created {int(cc)} token(s) (billed at 1.25x/2x based on TTL)"
-                                            # Calculate savings (reported on a separate line to match OpenAI style)
-                                            saved_line = None
-                                            if cr > 0:
-                                                try:
-                                                    in_rate_per_m = float(price.get("input", 0.0))
-                                                    # Savings = cache_read * (1.0 - 0.1) * price
-                                                    saved_usd = (int(cr) / 1_000_000.0) * in_rate_per_m * 0.90
-                                                    saved_line = f"saved ${saved_usd:.2f} with prompt cache"
-                                                except Exception:
-                                                    saved_line = None
-                                            box_lines.append(line)
-                                            if saved_line:
-                                                box_lines.append(saved_line)
-                                except Exception:
-                                    pass
+                                # Anthropic prompt caching banner when detected (reads @10% input rate; creation billed at TTL multiplier)
+                                try:
+                                    price = self._resolve_price(model_used)
+                                    provider = (price.get("provider") or "").lower()
+                                    if provider == "anthropic":
+                                        cr = int(usage.get("cache_read_input_tokens", 0) or 0)
+                                        cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
+                                        # Optional breakdown
+                                        cc_5m = 0
+                                        cc_1h = 0
+                                        try:
+                                            ccmap = usage.get("cache_creation") if isinstance(usage, dict) else None
+                                            if isinstance(ccmap, dict):
+                                                cc_5m = int(ccmap.get("ephemeral_5m_input_tokens", 0) or 0)
+                                                cc_1h = int(ccmap.get("ephemeral_1h_input_tokens", 0) or 0)
+                                        except Exception:
+                                            cc_5m = cc_5m or 0
+                                            cc_1h = cc_1h or 0
+                                        if (cr > 0) or (cc > 0) or (cc_5m > 0) or (cc_1h > 0):
+                                            # Build a concise line similar to OpenAI banner
+                                            line = f"Billing: Anthropic prompt cache read {int(cr)} token(s) @10% input rate"
+                                            if (cc_5m > 0) or (cc_1h > 0):
+                                                line += f" | created {int(cc_5m)} @1.25x + {int(cc_1h)} @2x"
+                                            else:
+                                                if cc > 0:
+                                                    line += f" | created {int(cc)} token(s) (billed at 1.25x/2x based on TTL)"
+                                            # Calculate savings (reported on a separate line to match OpenAI style)
+                                            saved_line = None
+                                            if cr > 0:
+                                                try:
+                                                    in_rate_per_m = float(price.get("input", 0.0))
+                                                    # Savings = cache_read * (1.0 - 0.1) * price
+                                                    saved_usd = (int(cr) / 1_000_000.0) * in_rate_per_m * 0.90
+                                                    saved_line = f"saved ${saved_usd:.2f} with prompt cache"
+                                                except Exception:
+                                                    saved_line = None
+                                            box_lines.append(line)
+                                            if saved_line:
+                                                box_lines.append(saved_line)
+                                except Exception:
+                                    pass
                                 # Show consolidated usage summary
                                 try:
@@ -7295,29 +7881,29 @@ class ChatCLI:
                                     pass
                                 return "".join(assistant_buf)
-                            elif event == "provider.message":
-                                # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
-                                provider = (data.get("provider") or "").lower()
-                                msg = data.get("message")
-                                if provider == "gemini":
-                                    # Always retain Gemini provider-native messages (needed for multi-turn tool calling).
-                                    try:
-                                        if isinstance(msg, dict):
-                                            self._gemini_raw_history.append(dict(msg))
-                                        elif isinstance(msg, list):
-                                            self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
-                                    except Exception:
-                                        pass
-                                if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
-                                    # Append as-is to local raw history for the next turn
-                                    try:
-                                        self._kimi_raw_history.append(dict(msg))
-                                    except Exception:
-                                        try:
-                                            self._kimi_raw_history.append(msg)  # type: ignore
-                                        except Exception:
-                                            pass
-                                continue
+                            elif event == "provider.message":
+                                # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
+                                provider = (data.get("provider") or "").lower()
+                                msg = data.get("message")
+                                if provider == "gemini":
+                                    # Always retain Gemini provider-native messages (needed for multi-turn tool calling).
+                                    try:
+                                        if isinstance(msg, dict):
+                                            self._gemini_raw_history.append(dict(msg))
+                                        elif isinstance(msg, list):
+                                            self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
+                                    except Exception:
+                                        pass
+                                if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
+                                    # Append as-is to local raw history for the next turn
+                                    try:
+                                        self._kimi_raw_history.append(dict(msg))
+                                    except Exception:
+                                        try:
+                                            self._kimi_raw_history.append(msg)  # type: ignore
+                                        except Exception:
+                                            pass
+                                continue
                             else:
                                 # TEMP DEBUG: show unknown/unhandled events
@@ -7451,12 +8037,12 @@ class ChatCLI:
                     # Reasoning effort tag for OpenAI reasoning models
                     try:
                         effort_seg = ""
-                        if self._is_openai_reasoning_model(model_label):
-                            lvl = str(self.reasoning_effort or "medium").strip().lower()
-                            if lvl not in ("low", "medium", "high", "xhigh"):
-                                lvl = "medium"
-                            disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
-                            effort_seg = f" {disp}"
+                        if self._is_openai_reasoning_model(model_label):
+                            lvl = str(self.reasoning_effort or "medium").strip().lower()
+                            if lvl not in ("low", "medium", "high", "xhigh"):
+                                lvl = "medium"
+                            disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
+                            effort_seg = f" {disp}"
                     except Exception:
                         effort_seg = ""
                     try:
@@ -7545,15 +8131,15 @@ class ChatCLI:
                     # Allow codebase map to be injected again
                     self._did_inject_codebase_map = False
                     # Ensure working-memory first-turn flag remains False so we inject now
-                    self._did_inject_working_memory = False                    # Allow custom first-turn text to inject again
-                    try:
-                        self._did_inject_custom_first_turn = False
-                    except Exception:
-                        pass
-                    # Reset provider-native histories
-                    self.messages_for_save = []
-                    if not self.save_chat_history:
-                        self.thread_uid = None
+                    self._did_inject_working_memory = False                    # Allow custom first-turn text to inject again
+                    try:
+                        self._did_inject_custom_first_turn = False
+                    except Exception:
+                        pass
+                    # Reset provider-native histories
+                    self.messages_for_save = []
+                    if not self.save_chat_history:
+                        self.thread_uid = None
                     self._kimi_raw_history = []
                     # Build a fresh payload so the first-turn injections (code map + working memory) are applied
@@ -7582,24 +8168,32 @@ class ChatCLI:
                         new_payload["control_level"] = self.control_level
                     if self.auto_approve:
                         new_payload["auto_approve"] = self.auto_approve
-                    try:
-                        if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
-                            new_payload["reasoning_effort"] = self.reasoning_effort
-                        else:
-                            new_payload["reasoning_effort"] = "medium"
-                    except Exception:
-                        new_payload["reasoning_effort"] = "medium"
-                    try:
-                        if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
-                            new_payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
-                    except Exception:
-                        pass
-                    # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
-                    try:
-                        if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
-                            new_payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
-                    except Exception:
-                        pass
+                    try:
+                        if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
+                            new_payload["reasoning_effort"] = self.reasoning_effort
+                        else:
+                            new_payload["reasoning_effort"] = "medium"
+                    except Exception:
+                        new_payload["reasoning_effort"] = "medium"
+                    try:
+                        if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
+                            new_payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
+                    except Exception:
+                        pass
+                    # Anthropic effort (Opus 4.6/4.5)
+                    try:
+                        ae = getattr(self, "anthropic_effort", None)
+                        ae2 = str(ae or "high").strip().lower()
+                        if ae2 in ("low", "medium", "high", "max"):
+                            new_payload["anthropic_effort"] = ae2
+                    except Exception:
+                        new_payload["anthropic_effort"] = "high"
+                    # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
+                    try:
+                        if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
+                            new_payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
+                    except Exception:
+                        pass
                     if self.web_search_enabled:
                         new_payload["enable_web_search"] = True
                         if self.web_search_allowed_domains:
@@ -7857,32 +8451,31 @@ class ChatCLI:
             except Exception:
                 pass
         # Fallback defaults for common models
-        if not ctx_map:
-            try:
-                ctx_map.update({
-                    "gpt-5.2": 400000,
-                    "gpt-5.2-pro": 400000,
-                    "gpt-5": 400000,
-                    "gpt-5-2025-08-07": 400000,
-                    "codex-mini-latest": 200000,
-                    "gemini-2.5-pro": 1048576,
-                    "gemini-3-flash-preview": 1048576,
-                    "gemini-3-pro-preview": 1000000,
+        if not ctx_map:
+            try:
+                ctx_map.update({
+                    "gpt-5.2": 400000,
+                    "gpt-5.2-pro": 400000,
+                    "gpt-5": 400000,
+                    "gpt-5-2025-08-07": 400000,
+                    "codex-mini-latest": 200000,
+                    # (removed gemini-2.5-pro)
+                    "gemini-3-flash-preview": 1048576,
+                    "gemini-3-pro-preview": 1000000,
                     "grok-4-1-fast-reasoning": 2000000,
                     "grok-4-1-fast-non-reasoning": 2000000,
                     "grok-4": 200000,
                     "grok-code-fast-1": 262144,
                     "deepseek-chat": 128000,
                     "deepseek-reasoner": 128000,
-                    "kimi-k2-thinking": 262144,
-                    "kimi-k2-0905-preview": 262144,
+                    "kimi-k2.5": 262144,
                     "claude-sonnet-4-20250514": 1000000,
                     "claude-sonnet-4-20250514-thinking": 1000000,
-                    "claude-sonnet-4-5-20250929": 1000000,
-                    "claude-sonnet-4-5-20250929-thinking": 1000000,
-                    "claude-opus-4-5-20251101": 200000,
-                    "claude-opus-4-5-20251101-thinking": 200000,
-                    "glm-4.6": 200000,
+                    "claude-sonnet-4-5-20250929": 1000000,
+                    "claude-sonnet-4-5-20250929-thinking": 1000000,
+                    "claude-opus-4-6": 1000000,
+                    "claude-opus-4-6-thinking": 1000000,
+                    "glm-4.7": 200000,
                 })
             except Exception:
                 pass
@@ -7912,19 +8505,19 @@ class ChatCLI:
     # --------------------- Tier-aware defaults -------------------------
-    def _recommended_default_model(self) -> str:
-        """Return the tier-aware recommended default model.
+    def _recommended_default_model(self) -> str:
+        """Return the tier-aware recommended default model.
-        - Free-tier users: recommend Kimi k2-thinking (free-tier friendly reasoning model).
-        - All other users: recommend gpt-5.2 (best overall default).
-        When tier is unknown, fall back to gpt-5.2.
-        """
+        - Free-tier users: recommend Kimi k2.5.
+        - All other users: recommend gpt-5.2 (best overall default).
+        When tier is unknown, fall back to gpt-5.2.
+        """
         try:
             if bool(self.is_free_tier):
-                return "kimi-k2-thinking"
+                return "kimi-k2.5"
         except Exception:
             pass
-        return "gpt-5.2"
+        return "gpt-5.2"
     # --------------------- Onboarding and Welcome ---------------------
     async def _welcome_flow(self) -> None:
@@ -8155,6 +8748,57 @@ class ChatCLI:
         self.ui.print("Please select a default model for new chats.")
         await self.select_model_menu()
+    async def _wizard_anthropic_effort_step(self) -> None:
+        """First-time wizard: choose Anthropic effort (Opus 4.6/4.5 only).
+        Per opus4-6.txt:
+          - default effort is "high"
+          - effort "max" is Opus 4.6 only
+        """
+        try:
+            model = str(self.model or "")
+        except Exception:
+            model = ""
+        base = model[:-9] if model.endswith("-thinking") else model
+        # Only prompt when it matters.
+        if base not in ("claude-opus-4-6",):
+            # Default behavior equals high.
+            try:
+                if not getattr(self, "anthropic_effort", None):
+                    self.anthropic_effort = "high"
+            except Exception:
+                self.anthropic_effort = "high"
+            return
+        try:
+            cur = str(getattr(self, "anthropic_effort", "high") or "high").strip().lower()
+        except Exception:
+            cur = "high"
+        if cur not in ("low", "medium", "high", "max"):
+            cur = "high"
+        choices: List[Tuple[str, str]] = [
+            ("high", "High (default)"),
+            ("medium", "Medium"),
+            ("low", "Low"),
+        ]
+        if base == "claude-opus-4-6":
+            choices.append(("max", "Max (Opus 4.6 only)"))
+        sel = await self._menu_choice(
+            "Anthropic effort",
+            "How thoroughly should Claude respond by default?",
+            choices,
+        )
+        if sel in ("low", "medium", "high", "max"):
+            # Guard: max is Opus 4.6 only
+            if sel == "max" and base != "claude-opus-4-6":
+                self.anthropic_effort = "high"
+            else:
+                self.anthropic_effort = sel
+        else:
+            self.anthropic_effort = cur or "high"
     async def _wizard_agent_scope_step(self) -> None:
         """First-time wizard: choose Agent scope root and mode via menus.
@@ -8272,8 +8916,14 @@ class ChatCLI:
             "We’ll configure a few defaults. You can change these later via /settings.",
         )
-        # --- 1) Default model (menu, no Y/N) ---
+        # --- 1) Default model (menu) ---
         await self._wizard_model_step()
+        # If the picker was cancelled (or model still unset), choose a sensible default.
+        if not self.model:
+            self.model = self._recommended_default_model()
+        # --- 1b) Anthropic effort (Opus 4.6 / 4.5) ---
+        await self._wizard_anthropic_effort_step()
         # --- 2) Tools (always ON per design) ---
         self.requested_tools = True
@@ -8284,8 +8934,8 @@ class ChatCLI:
         )
         await self.set_level_menu()
         if self.control_level not in (1, 2, 3):
-            # Default to Level 2 if user aborted
-            self.control_level = 2
+            # Default to Level 3 if user aborted
+            self.control_level = 3
         # --- 4) Agent scope (menus; only type on custom path) ---
         self.ui.print(
@@ -8336,30 +8986,45 @@ class ChatCLI:
         except Exception:
             curv = "medium"
-        verb_choice = await self._menu_choice(
-            "Text verbosity",
-            "How verbose should responses be by default?",
-            [
+        verbosity_choices: List[Tuple[str, str]] = []
+        if self._is_gpt_model(self.model):
+            # Default-first: Low for GPT models.
+            verbosity_choices = [
                 ("low", "Low – short, to-the-point answers"),
+                ("medium", "Medium – balanced detail"),
+                ("high", "High – more verbose explanations"),
+            ]
+        else:
+            # Default-first: Medium for non-GPT models; do not surface "Low".
+            verbosity_choices = [
                 ("medium", "Medium – balanced detail (recommended)"),
                 ("high", "High – more verbose explanations"),
-            ],
+            ]
+        verb_choice = await self._menu_choice(
+            "Text verbosity",
+            "How verbose should responses be by default?",
+            verbosity_choices,
         )
         if verb_choice in ("low", "medium", "high"):
             self.text_verbosity = verb_choice
         else:
             self.text_verbosity = curv or "medium"
-        # --- 7) Tool preambles (menu) ---
-        preamble_choice = await self._menu_choice(
-            "Tool call preambles",
-            "Before using tools, the agent can briefly explain what it will do and why (supported models only).",
-            [
-                ("on", "Enable preambles"),
-                ("off", "Disable preambles (default)"),
-            ],
-        )
-        self.preambles_enabled = preamble_choice == "on"
+        # --- 7) Tool preambles (GPT-5 only) ---
+        if self._supports_preambles(self.model):
+            preamble_choice = await self._menu_choice(
+                "Tool call preambles",
+                "Before using tools, the agent can briefly explain what it will do and why.",
+                [
+                    ("off", "Disable preambles (default)"),
+                    ("on", "Enable preambles"),
+                ],
+            )
+            self.preambles_enabled = preamble_choice == "on"
+        else:
+            # Never enable preambles on unsupported models.
+            self.preambles_enabled = False
         # --- 8) Optional custom first-turn note (menu + text only when chosen) ---
         custom_choice = await self._menu_choice(
@@ -8433,7 +9098,8 @@ class ChatCLI:
                         text = m.get("content", "")
                         contents.append({"role": role, "parts": [{"text": text}]})
                     # Pick a Gemini model for counting; fall back if current isn't Gemini
-                    count_model = "gemini-2.5-pro"
+                    # (gemini-2.5-pro removed from curated lists)
+                    count_model = "gemini-3-flash-preview"
                     res = client.models.count_tokens(model=count_model, contents=contents)
                     t = int(getattr(res, "total_tokens", 0) or 0)
                     if t > 0:
@@ -8491,9 +9157,9 @@ class ChatCLI:
                     blocks.append(txt.strip())
         except Exception:
             pass
-        # Tool preamble
+        # Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
         try:
-            if bool(getattr(self, "preambles_enabled", False)):
+            if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
                 blocks.append(
                     "Tool usage: when you need to read or modify files or run commands, "
                     "explicitly explain why you're using a tool, what you'll do, and how it "
@@ -8918,11 +9584,11 @@ class ChatCLI:
             await self._ws_broadcast("warning", {"message": f"Unknown inbound type: {mtype}"})
     # Handle approval request: first reply wins (web or CLI), then POST to server
-    async def _handle_approval_request(self, client: httpx.AsyncClient, session_id: Optional[str], data: Dict[str, Any]) -> None:
-        tool = str(data.get("tool"))
-        call_id = data.get("call_id")
-        args_prev = data.get("args_preview", {}) or {}
-        timeout_sec = int(data.get("timeout_sec", 60) or 60)
+    async def _handle_approval_request(self, client: httpx.AsyncClient, session_id: Optional[str], data: Dict[str, Any]) -> None:
+        tool = str(data.get("tool"))
+        call_id = data.get("call_id")
+        args_prev = data.get("args_preview", {}) or {}
+        timeout_sec = int(data.get("timeout_sec", 60) or 60)
         # Display summary
         self.ui.print(f"⚠ Approval requested for {tool} (call_id={call_id})", style=self.ui.theme["warn"])
         self.ui.print(truncate_json(args_prev, 600), style=self.ui.theme["dim"])
@@ -8947,92 +9613,92 @@ class ChatCLI:
         # Run blocking CLI prompt in thread to avoid blocking event loop
         loop = asyncio.get_event_loop()
-        def prompt_cli() -> Tuple[bool, str, Optional[str], Optional[str]]:
-            """Return (approved, note, remember, remember_key)."""
-            try:
-                # Prefer the richer approve-once/session/always UX at L2.
-                try:
-                    lvl = data.get("level")
-                    lvl_i = int(lvl) if isinstance(lvl, int) or (isinstance(lvl, str) and str(lvl).strip().isdigit()) else None
-                except Exception:
-                    lvl_i = None
-                if lvl_i == 2:
-                    t = str(tool or "").strip().lower()
-                    remember_key = None
-                    label = t
-                    if t == "run_command":
-                        try:
-                            cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
-                        except Exception:
-                            cmd = None
-                        base = self._base_command(cmd) if cmd is not None else ""
-                        if base:
-                            label = f"run_command:{base}"
-                            remember_key = base
-                        else:
-                            label = "run_command"
-                    else:
-                        remember_key = t
-                    choice = self._approval_prompt_ui(label, args_prev if isinstance(args_prev, dict) else {})
-                    if choice == "deny":
-                        return False, "Denied via CLI", None, remember_key
-                    # Approved; update local trust registries immediately.
-                    try:
-                        if t == "run_command" and remember_key:
-                            if choice == "session":
-                                if remember_key not in self.trust_cmds_session:
-                                    self.trust_cmds_session.append(remember_key)
-                            elif choice == "always":
-                                if remember_key not in self.trust_cmds_always:
-                                    self.trust_cmds_always.append(remember_key)
-                                self.save_settings()
-                        elif t in {"write_file", "append_file", "edit_file", "apply_patch", "string_replace"}:
-                            if choice == "session":
-                                if t not in self.trust_tools_session:
-                                    self.trust_tools_session.append(t)
-                            elif choice == "always":
-                                if t not in self.trust_tools_always:
-                                    self.trust_tools_always.append(t)
-                                self.save_settings()
-                    except Exception:
-                        pass
-                    remember = choice if choice in ("session", "always") else "once"
-                    return True, "Approved via CLI", remember, remember_key
-                # Fallback: simple yes/no confirmation.
-                default_yes = True if str(tool).strip() == "context.summarize" else False
-                prompt = f"Approve {tool} (timeout in {timeout_sec}s)?"
-                try:
-                    if str(tool).strip().lower() == "run_command":
-                        cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
-                        if isinstance(cmd, str) and cmd.strip():
-                            prompt = f"Approve run_command: {self._clip(cmd, 120)} (timeout in {timeout_sec}s)?"
-                except Exception:
-                    pass
-                approved = self.ui.confirm(prompt, default=default_yes)
-                return bool(approved), ("Approved via CLI" if approved else "Denied via CLI"), None, None
-            except Exception:
-                return False, "Denied via CLI (error)", None, None
+        def prompt_cli() -> Tuple[bool, str, Optional[str], Optional[str]]:
+            """Return (approved, note, remember, remember_key)."""
+            try:
+                # Prefer the richer approve-once/session/always UX at L2.
+                try:
+                    lvl = data.get("level")
+                    lvl_i = int(lvl) if isinstance(lvl, int) or (isinstance(lvl, str) and str(lvl).strip().isdigit()) else None
+                except Exception:
+                    lvl_i = None
+                if lvl_i == 2:
+                    t = str(tool or "").strip().lower()
+                    remember_key = None
+                    label = t
+                    if t == "run_command":
+                        try:
+                            cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
+                        except Exception:
+                            cmd = None
+                        base = self._base_command(cmd) if cmd is not None else ""
+                        if base:
+                            label = f"run_command:{base}"
+                            remember_key = base
+                        else:
+                            label = "run_command"
+                    else:
+                        remember_key = t
+                    choice = self._approval_prompt_ui(label, args_prev if isinstance(args_prev, dict) else {})
+                    if choice == "deny":
+                        return False, "Denied via CLI", None, remember_key
+                    # Approved; update local trust registries immediately.
+                    try:
+                        if t == "run_command" and remember_key:
+                            if choice == "session":
+                                if remember_key not in self.trust_cmds_session:
+                                    self.trust_cmds_session.append(remember_key)
+                            elif choice == "always":
+                                if remember_key not in self.trust_cmds_always:
+                                    self.trust_cmds_always.append(remember_key)
+                                self.save_settings()
+                        elif t in {"write_file", "append_file", "edit_file", "apply_patch", "string_replace"}:
+                            if choice == "session":
+                                if t not in self.trust_tools_session:
+                                    self.trust_tools_session.append(t)
+                            elif choice == "always":
+                                if t not in self.trust_tools_always:
+                                    self.trust_tools_always.append(t)
+                                self.save_settings()
+                    except Exception:
+                        pass
+                    remember = choice if choice in ("session", "always") else "once"
+                    return True, "Approved via CLI", remember, remember_key
+                # Fallback: simple yes/no confirmation.
+                default_yes = True if str(tool).strip() == "context.summarize" else False
+                prompt = f"Approve {tool} (timeout in {timeout_sec}s)?"
+                try:
+                    if str(tool).strip().lower() == "run_command":
+                        cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
+                        if isinstance(cmd, str) and cmd.strip():
+                            prompt = f"Approve run_command: {self._clip(cmd, 120)} (timeout in {timeout_sec}s)?"
+                except Exception:
+                    pass
+                approved = self.ui.confirm(prompt, default=default_yes)
+                return bool(approved), ("Approved via CLI" if approved else "Denied via CLI"), None, None
+            except Exception:
+                return False, "Denied via CLI (error)", None, None
         cli_task = loop.run_in_executor(None, prompt_cli)
-        decided: Optional[Tuple[Any, ...]] = None
+        decided: Optional[Tuple[Any, ...]] = None
         try:
             done, pending = await asyncio.wait({fut, asyncio.ensure_future(cli_task)}, timeout=timeout_sec, return_when=asyncio.FIRST_COMPLETED)
             if fut in done and not fut.cancelled():
                 try:
-                    decided = fut.result()
-                except Exception:
-                    decided = (False, "Denied via Web (error)")
-            elif cli_task in done:  # type: ignore
-                try:
-                    decided = await cli_task  # type: ignore
-                except Exception:
-                    decided = (False, "Denied via CLI (error)")
+                    decided = fut.result()
+                except Exception:
+                    decided = (False, "Denied via Web (error)")
+            elif cli_task in done:  # type: ignore
+                try:
+                    decided = await cli_task  # type: ignore
+                except Exception:
+                    decided = (False, "Denied via CLI (error)")
                 # If web future not decided, set it so we can cleanly proceed
                 if not fut.done():
                     try:
@@ -9052,45 +9718,45 @@ class ChatCLI:
             if call_id is not None:
                 self._pending_approvals.pop(str(call_id), None)
-        # Normalize decision tuple to (approved, note, remember, remember_key)
-        approved = False
-        note = ""
-        remember = None
-        remember_key = None
-        try:
-            if decided is None:
-                approved, note = False, ""
-            elif isinstance(decided, tuple) and len(decided) >= 4:
-                approved, note, remember, remember_key = decided[0], decided[1], decided[2], decided[3]
-            elif isinstance(decided, tuple) and len(decided) >= 2:
-                approved, note = decided[0], decided[1]
-            else:
-                approved, note = bool(decided), ""
-        except Exception:
-            approved, note = False, ""
-        # Post decision to server
-        if session_id:
-            try:
-                payload = {
-                    "session_id": session_id,
-                    "call_id": call_id,
-                    "approve": bool(approved),
-                    "note": note,
-                }
-                # Optional remember semantics (used to suppress repeat approvals within the current stream).
-                try:
-                    if bool(approved) and remember in ("session", "always"):
-                        payload["remember"] = remember
-                        if remember_key:
-                            payload["remember_key"] = str(remember_key)
-                except Exception:
-                    pass
-                r = await client.post(self.approvals_url, json=payload, timeout=self.timeout)
-                if r.status_code >= 400:
-                    self.ui.warn(f"Approval POST failed: {r.status_code} {r.text}")
-            except Exception as e:
-                self.ui.warn(f"Approval POST error: {e}")
+        # Normalize decision tuple to (approved, note, remember, remember_key)
+        approved = False
+        note = ""
+        remember = None
+        remember_key = None
+        try:
+            if decided is None:
+                approved, note = False, ""
+            elif isinstance(decided, tuple) and len(decided) >= 4:
+                approved, note, remember, remember_key = decided[0], decided[1], decided[2], decided[3]
+            elif isinstance(decided, tuple) and len(decided) >= 2:
+                approved, note = decided[0], decided[1]
+            else:
+                approved, note = bool(decided), ""
+        except Exception:
+            approved, note = False, ""
+        # Post decision to server
+        if session_id:
+            try:
+                payload = {
+                    "session_id": session_id,
+                    "call_id": call_id,
+                    "approve": bool(approved),
+                    "note": note,
+                }
+                # Optional remember semantics (used to suppress repeat approvals within the current stream).
+                try:
+                    if bool(approved) and remember in ("session", "always"):
+                        payload["remember"] = remember
+                        if remember_key:
+                            payload["remember_key"] = str(remember_key)
+                except Exception:
+                    pass
+                r = await client.post(self.approvals_url, json=payload, timeout=self.timeout)
+                if r.status_code >= 400:
+                    self.ui.warn(f"Approval POST failed: {r.status_code} {r.text}")
+            except Exception as e:
+                self.ui.warn(f"Approval POST error: {e}")
 async def amain():
     args = build_arg_parser().parse_args()
     # Set global debug flags from args
@@ -9166,143 +9832,6 @@ async def amain():
     await cli.run()
-# --- UX Hotfix: Replace menu UI with highlighted cursor picker (no radio buttons) ---
-# The default RadioList menu can be confusing and, on some terminals, non-interactive.
-# We override ChatCLI._menu_choice at runtime with a prompt_toolkit-based list that shows
-# a highlighted bar for the current item; Enter selects; Esc cancels. Falls back to
-# numeric selection when prompt_toolkit is unavailable.
-async def _menu_choice_highlight(self, title: str, text: str, choices: list[tuple[str, str]]):  # type: ignore
-    if HAS_PT and Application and Layout and HSplit and Window and FormattedTextControl and Style and KeyBindings:
-        try:
-            items = [(val, str(label)) for (val, label) in choices]
-            index = 0
-            blink_on = [True]
-            def _lines():
-                out = []
-                if title:
-                    out.append(("class:menu.title", f"{title}\n"))
-                for i, (_v, _lbl) in enumerate(items):
-                    if i == index:
-                        arrow = ">" if blink_on[0] else " "
-                        out.append(("class:menu.item.selected", f" {arrow} {_lbl}\n"))
-                    else:
-                        out.append(("class:menu.item", f"   {_lbl}\n"))
-                out.append(("class:menu.status", f"({index+1}/{len(items)})"))
-                return out
-            body = FormattedTextControl(_lines)
-            hint = FormattedTextControl(lambda: text or "Use ↑/↓, Enter=select, Esc=cancel")
-            root = HSplit([
-                Window(height=1, content=hint, style="class:menu.hint"),
-                Window(content=body),
-            ])
-            kb = KeyBindings()
-            @kb.add("up")
-            def _up(event):
-                nonlocal index
-                index = (index - 1) % len(items)
-                event.app.invalidate()
-            @kb.add("down")
-            def _down(event):
-                nonlocal index
-                index = (index + 1) % len(items)
-                event.app.invalidate()
-            @kb.add("pageup")
-            def _pgup(event):
-                nonlocal index
-                index = max(0, index - 7)
-                event.app.invalidate()
-            @kb.add("pagedown")
-            def _pgdn(event):
-                nonlocal index
-                index = min(len(items) - 1, index + 7)
-                event.app.invalidate()
-            @kb.add("home")
-            def _home(event):
-                nonlocal index
-                index = 0
-                event.app.invalidate()
-            @kb.add("end")
-            def _end(event):
-                nonlocal index
-                index = len(items) - 1
-                event.app.invalidate()
-            @kb.add("enter")
-            def _enter(event):
-                event.app.exit(result=items[index][0])
-            @kb.add("escape")
-            def _esc(event):
-                event.app.exit(result=None)
-            style = Style.from_dict({
-                "menu.title": "bold",
-                "menu.hint": "fg:#888888",
-                "menu.status": "fg:#ff8700",
-                "menu.item": "",
-                # Bright highlighted selection; blink may be ignored on some terminals
-                "menu.item.selected": "fg:#ff8700 reverse",
-            })
-            app = Application(layout=Layout(root), key_bindings=kb, style=style, full_screen=False)
-            async def _blinker():
-                while True:
-                    await asyncio.sleep(0.6)
-                    try:
-                        blink_on[0] = not blink_on[0]
-                        get_app().invalidate()
-                    except Exception:
-                        break
-            try:
-                asyncio.create_task(_blinker())
-            except Exception:
-                pass
-            return await app.run_async()
-        except Exception:
-            pass
-    # Fallback: numeric list
-    self.ui.header(title, text)
-    for i, (_, label) in enumerate(choices, start=1):
-        style = None
-        try:
-            lbl = str(label)
-            if ("VERY expensive" in lbl) or ("[DANGER]" in lbl) or ("!!!" in lbl and "expensive" in lbl.lower()):
-                style = self.ui.theme.get("err")
-        except Exception:
-            style = None
-        self.ui.print(f"{i}. {label}", style=style)
-    self.ui.print()
-    while True:
-        raw = input("Choose an option: ").strip()
-        if raw.lower() in ("q", "quit", "exit"):
-            return None
-        if not raw.isdigit():
-            self.ui.warn("Enter a number from the list.")
-            continue
-        idx = int(raw)
-        if not (1 <= idx <= len(choices)):
-            self.ui.warn("Invalid selection.")
-            continue
-        return choices[idx - 1][0]
-# Monkey-patch the method onto ChatCLI
-try:
-    ChatCLI._menu_choice = _menu_choice_highlight  # type: ignore[attr-defined]
-except Exception:
-    pass
 # --- UX Hotfix v2: dependency-free highlighted menus (Enter selects) ---
 # This override ensures the settings menu works without RadioList and that Enter
 # activates the currently highlighted option even when prompt_toolkit is absent.

henosis-cli 0.6.8__py3-none-any.whl → 0.6.10__py3-none-any.whl

henosis-cli 0.6.8py3-none-any.whl → 0.6.10py3-none-any.whl