PyPI - henosis-cli - Versions diffs - 0.6.7__py3-none-any.whl → 0.6.9__py3-none-any.whl - Mend

henosis-cli 0.6.7py3-none-any.whl → 0.6.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

cli.py +962 -322
{henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/METADATA +1 -1
henosis_cli-0.6.9.dist-info/RECORD +11 -0
{henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/WHEEL +1 -1
henosis_cli_tools/settings_ui.py +77 -38
henosis_cli-0.6.7.dist-info/RECORD +0 -11
{henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/entry_points.txt +0 -0
{henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/top_level.txt +0 -0

cli.py CHANGED Viewed

@@ -6,11 +6,12 @@
 # - Preserves previous behavior and settings
 # - Injects CODEBASE_MAP.md into the first user message (wrapped in <codebase_map>) without manual trimming.
-import argparse
-import asyncio
-import json
-import os
-import sys
+import argparse
+import asyncio
+import copy
+import json
+import os
+import sys
 import socket
 import shutil
 from pathlib import Path
@@ -683,8 +684,8 @@ class UI:
             for n, ty, sz in rows:
                 print(f"{n:<40} {ty:<8} {sz}")
-class ChatCLI:
-    def __init__(
+class ChatCLI:
+    def __init__(
         self,
         server: str,
         model: Optional[str],
@@ -997,8 +998,25 @@ class ChatCLI:
         }
         # Track last used model for display
         self._last_used_model: Optional[str] = None
-        # Provider-native history for Kimi (preserve reasoning_content across turns)
-        self._kimi_raw_history: List[Dict[str, Any]] = []
+        # Provider-native history for Kimi (preserve reasoning_content across turns)
+        self._kimi_raw_history: List[Dict[str, Any]] = []
+        # Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
+        self._gemini_raw_history: List[Dict[str, Any]] = []
+        # OpenAI Responses API threading: retain previous response id across turns
+        self._openai_previous_response_id: Optional[str] = None
+        # OpenAI Responses API threading: retain the full chain of response ids across turns
+        # (server will also echo per-turn ids in message.completed.openai_response_ids)
+        self._openai_response_id_history: List[str] = []
+        # OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
+        # including reasoning items, function_call items, and function_call_output items.
+        self._openai_input_items: List[Dict[str, Any]] = []
+        # For robustness, remember exactly what we sent as openai_input_items for the current turn
+        # so we can append server-provided openai_delta_items deterministically.
+        self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
+        # Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
+        # Shape: {session_id, call_id, job_token, name}
+        self._inflight_dispatch: Optional[Dict[str, Any]] = None
         # Last server billing info from /api/usage/commit
         self._last_commit_cost_usd: float = 0.0
         self._last_remaining_credits: Optional[float] = None
@@ -1049,8 +1067,14 @@ class ChatCLI:
                 self._thinking_indicator_enabled = True
         except Exception:
             self._thinking_indicator_enabled = True
-        # Track Ctrl+C timing for double-press-to-exit behavior
-        self._last_interrupt_ts: Optional[float] = None
+        # Track Ctrl+C timing for double-press-to-exit behavior
+        self._last_interrupt_ts: Optional[float] = None
+        # Ctrl+C during a running stream should not kill the entire CLI.
+        # Instead, we cancel the in-flight turn and reopen the last user query for editing.
+        # NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
+        self._pending_user_edit: Optional[str] = None
+        self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
         # Timers: session-level and per-turn wall-clock timers
         self._session_started_at: Optional[float] = None  # time.perf_counter() at session start
@@ -1091,7 +1115,7 @@ class ChatCLI:
                 self._pt_session = None
     # ----------------------- Provider heuristics -----------------------
-    def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
+    def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
         """Return True when the model is an OpenAI reasoning-capable model.
         Mirrors server-side heuristic: prefixes 'gpt-5' or 'o4'.
         """
@@ -1342,6 +1366,9 @@ class ChatCLI:
         return {
             # OpenAI
             "gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
+            # New: gpt-5.2-codex
+            # Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
+            "gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
             # From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
             "gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
             "gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
@@ -1359,8 +1386,7 @@ class ChatCLI:
             # New Opus 4.5 (provider base $5/$25 with 1.4x margin -> $7.00/$35.00)
             "claude-opus-4-5-20251101": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
             "claude-opus-4-5-20251101-thinking": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
-            # Gemini
-            "gemini-2.5-pro": {"input": 1.75, "output": 14.00, "provider": "gemini"},
+            # Gemini
             # Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
             "gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
             # Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
@@ -1375,15 +1401,15 @@ class ChatCLI:
             # DeepSeek V3.2 (+$0.25 per 1M margin)
             "deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
             "deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
-            "deepseek-3.2-speciale": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
+            # Removed: deepseek speciale (not supported)
             # Kimi
             "kimi-k2-0905-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
             "kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
             "kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
-            # GLM (Z.AI)
-            # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
-            "glm-4.6": {"input": 0.84, "output": 3.08, "provider": "glm"},
-        }
+            # GLM (Z.AI)
+            # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
+            "glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
+        }
     def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
         if not model:
@@ -1431,6 +1457,15 @@ class ChatCLI:
         except Exception:
             model_name = ""
         try:
+            # Provider-native state resets when switching away from OpenAI.
+            try:
+                if self.model and (not self._is_openai_model(self.model)):
+                    self._openai_previous_response_id = None
+                    self._openai_response_id_history = []
+                    self._openai_input_items = []
+                    self._openai_last_sent_input_items = None
+            except Exception:
+                pass
             if model_name in {"gpt-5.2-pro"}:
                 # Default these to high, but don't clobber a user-chosen xhigh.
                 if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
@@ -1438,11 +1473,138 @@ class ChatCLI:
             # Codex family: disable preambles for better behavior
             if "codex" in model_name:
                 self.preambles_enabled = False
+            # Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
+            # Force-disable for all other models (even if a saved setting had it enabled).
+            if not self._supports_preambles(self.model):
+                self.preambles_enabled = False
         except Exception:
             try:
                 self.reasoning_effort = "high"
             except Exception:
                 pass
+    def _supports_preambles(self, model: Optional[str]) -> bool:
+        """Tool-call preambles are a CLI-only UX hint.
+        Requirement: disabled for all models except GPT-5 (base model; non-Codex).
+        In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
+        """
+        try:
+            if not model:
+                return False
+            m = str(model).strip().lower()
+            # Only the base GPT-5 line supports this UX toggle.
+            # Allow:
+            #   - "gpt-5"
+            #   - date-pinned variants like "gpt-5-2025-08-07"
+            # Disallow:
+            #   - versioned families like "gpt-5.1*" / "gpt-5.2*"
+            if not (m == "gpt-5" or m.startswith("gpt-5-")):
+                return False
+            if "codex" in m:
+                return False
+            return True
+        except Exception:
+            return False
+    def _is_openai_model(self, model: Optional[str]) -> bool:
+        """Best-effort model/provider discriminator for client-side state.
+        The server is multi-provider. For the CLI we treat anything that isn't an explicit
+        non-OpenAI provider prefix as OpenAI.
+        """
+        try:
+            if not model:
+                return False
+            m = str(model).strip().lower()
+            if not m:
+                return False
+            for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
+                if m.startswith(pfx):
+                    return False
+            # Everything else defaults to OpenAI in this repo.
+            return True
+        except Exception:
+            return False
+    def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
+        """Whether this provider has an implemented native tool/thinking retention path."""
+        try:
+            if not model:
+                return False
+            m = str(model).strip().lower()
+            if m.startswith("gemini-"):
+                return True
+            if m.startswith("kimi-"):
+                return bool(getattr(self, "retain_native_tool_results", False))
+            if self._is_openai_model(model):
+                return True
+            return False
+        except Exception:
+            return False
+    def _sanitize_openai_items(self, items: Any) -> Any:
+        """Recursively strip fields from OpenAI output items that cause errors when used as input."""
+        if isinstance(items, list):
+            return [self._sanitize_openai_items(x) for x in items]
+        if isinstance(items, dict):
+            # 'status' is the main offender causing 400s
+            bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
+            return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
+        return items
+    async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
+        """If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
+        This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
+        Best-effort; never raises.
+        """
+        ctx = None
+        try:
+            ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
+        except Exception:
+            ctx = None
+        if not ctx:
+            return
+        session_id = ctx.get("session_id")
+        call_id = ctx.get("call_id")
+        job_token = ctx.get("job_token")
+        name = ctx.get("name")
+        if not (session_id and call_id and job_token):
+            return
+        payload_cb = {
+            "session_id": session_id,
+            "call_id": call_id,
+            "name": name,
+            "job_token": job_token,
+            "result": {
+                "ok": False,
+                "cancelled": True,
+                "error": str(reason or "cancelled"),
+            },
+        }
+        try:
+            # Keep it short; we just want to unblock the server.
+            http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
+        except Exception:
+            http_timeout = None
+        try:
+            async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
+                await client.post(self.tools_callback_url, json=payload_cb)
+        except Exception:
+            pass
+        finally:
+            try:
+                self._inflight_dispatch = None
+            except Exception:
+                pass
+    def _is_gpt_model(self, model: Optional[str]) -> bool:
+        """True for OpenAI GPT models (used for showing certain UI-only toggles)."""
+        try:
+            return bool(model) and str(model).strip().lower().startswith("gpt-")
+        except Exception:
+            return False
     def _is_codex_model(self, model: Optional[str]) -> bool:
         try:
             return bool(model) and ("codex" in str(model).lower())
@@ -1955,14 +2117,15 @@ class ChatCLI:
             pass
         return data
-    def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
-        try:
+    def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
+        try:
+            old_system_prompt = getattr(self, "system_prompt", None)
             self.model = data.get("model", self.model)
             if "save_chat_history" in data:
                 try:
                     self.save_chat_history = bool(data.get("save_chat_history"))
                 except Exception:
-                    pass
+                    pass
             self.requested_tools = data.get("requested_tools", self.requested_tools)
             self.fs_scope = data.get("fs_scope", self.fs_scope)
             self.host_base = data.get("host_base", self.host_base)
@@ -2101,18 +2264,38 @@ class ChatCLI:
                     self.anthropic_cache_ttl = None
             except Exception:
                 pass
-            # Rebuild history if system prompt changed
-            self.history = []
-            if self.system_prompt:
-                self.history.append({"role": "system", "content": self.system_prompt})
-            # On settings load, do not assume the custom first-turn was injected yet
-            try:
-                self._did_inject_custom_first_turn = False
-            except Exception:
-                pass
-            self._apply_model_side_effects()
-        except Exception as e:
-            self.ui.warn(f"Failed to apply settings: {e}")
+            # Rebuild history if system prompt changed
+            try:
+                system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
+            except Exception:
+                system_prompt_changed = False
+            if system_prompt_changed:
+                # Changing the system prompt can materially alter the behavior of the assistant;
+                # warn the user and reset the current conversation history to avoid mixing contexts.
+                try:
+                    self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
+                except Exception:
+                    pass
+                self.history = []
+                if self.system_prompt:
+                    self.history.append({"role": "system", "content": self.system_prompt})
+                # OpenAI threaded state is invalid once the system prompt changes.
+                try:
+                    self._openai_previous_response_id = None
+                    self._openai_response_id_history = []
+                    self._openai_input_items = []
+                    self._openai_last_sent_input_items = None
+                except Exception:
+                    pass
+            # On settings load, do not assume the custom first-turn was injected yet
+            try:
+                self._did_inject_custom_first_turn = False
+            except Exception:
+                pass
+            self._apply_model_side_effects()
+        except Exception as e:
+            self.ui.warn(f"Failed to apply settings: {e}")
     async def _fetch_server_settings(self) -> Optional[Dict[str, Any]]:
         try:
@@ -2492,30 +2675,28 @@ class ChatCLI:
     def _model_presets(self) -> List[Tuple[str, str]]:
         """Shared list of (model, label) used by settings UI and /model menu."""
+        # Ordered in "feelings" order (Recommended first, then Others).
+        # NOTE: We intentionally do not include a "server default" or "custom" option here.
         return [
+            # Recommended
             ("gpt-5.2", "OpenAI: gpt-5.2"),
-            ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
+            ("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
             ("gpt-5", "OpenAI: gpt-5"),
+            ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
+            ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
+            ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
+            ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
+            ("grok-code-fast-1", "xAI: grok-code-fast-1"),
+            # Others
+            ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
             ("gpt-5-codex", "OpenAI: gpt-5-codex"),
             ("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
-            ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
             ("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
-            ("deepseek-3.2-speciale", "DeepSeek: deepseek 3.2 Speciale (no tools)"),
-            ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
-            ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
-            ("gemini-2.5-pro", "Gemini: gemini-2.5-pro"),
-            ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
-            ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
-            ("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
-            ("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
-            ("grok-4", "xAI: grok-4"),
-            ("grok-code-fast-1", "xAI: grok-code-fast-1"),
-            ("claude-sonnet-4-5-20250929", "Anthropic: claude-sonnet-4-5-20250929 (thinking OFF)"),
-            ("claude-sonnet-4-5-20250929-thinking", "Anthropic: claude-sonnet-4-5-20250929 (thinking ON)"),
-            ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
-            ("claude-opus-4-5-20251101-thinking", "Anthropic: claude-opus-4-5-20251101 (thinking ON)"),
-            ("glm-4.6", "GLM: glm-4.6"),
-        ]
+            ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
+            ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
+            ("glm-4.7", "GLM: glm-4.7"),
+        ]
     async def open_settings(self, focus: Optional[str] = None) -> None:
         """Open the new dependency-free settings UI. Falls back to legacy only when
@@ -2569,37 +2750,33 @@ class ChatCLI:
         }
         initial = self._collect_settings_dict()
-        # Model presets list (shared)
-        model_presets: List[Tuple[str, str]] = self._model_presets()
-        # Reorder with a Recommended section at the top. Avoid decorative symbols; instead,
-        # annotate recommended models with plain text for clarity.
-        # Recommended set per request: opus 4-5 (no thinking), gemini 3, gpt 5, kimi k2 thinking,
-        # grok code fast 1, and deepseek reasoner 3.2
-        rec_keys = {
-            "deepseek-reasoner-3.2",
-            "claude-opus-4-5-20251101",
+        # Model presets list (shared)
+        model_presets: List[Tuple[str, str]] = self._model_presets()
+        # Reorder with a Recommended section at the top.
+        # IMPORTANT: remove "server default" and "custom" from Settings UI.
+        rec_keys_ordered = [
+            "gpt-5.2",
+            "gpt-5.2-codex",
+            "gpt-5",
             "gemini-3-pro-preview",
             "gemini-3-flash-preview",
-            "gpt-5",
-            "gpt-5.2",
+            "claude-opus-4-5-20251101",
             "kimi-k2-thinking",
             "grok-code-fast-1",
-        }
-        rec_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m in rec_keys]
-        other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_keys]
-        # Build enum options in the order: Server default, Recommended, Others, Custom
-        model_enum_options: List[Optional[str]] = [None] + [m for (m, _l) in rec_list] + [m for (m, _l) in other_list] + ["custom"]
-        # Build render map without any star/marker characters; use a simple "(recommended)" suffix
-        # for recommended models EXCEPT DeepSeek Reasoner 3.2, which should not display the suffix.
-        render_map: Dict[Any, str] = {None: "Server default"}
+        ]
+        rec_set = set(rec_keys_ordered)
+        preset_map = {m: lbl for (m, lbl) in model_presets}
+        rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
+        other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
+        # Build enum options in the order: Recommended, Others
+        model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
+        render_map: Dict[Any, str] = {}
         for m, lbl in rec_list:
-            if m == "deepseek-reasoner-3.2":
-                render_map[m] = lbl
-            else:
-                render_map[m] = f"{lbl} (recommended)"
-        for m, lbl in other_list:
-            render_map[m] = lbl
-        render_map["custom"] = "Custom..."
+            render_map[m] = lbl
+        for m, lbl in other_list:
+            render_map[m] = lbl
         # Build items schema
         items: List[Dict[str, Any]] = [
@@ -2626,14 +2803,16 @@ class ChatCLI:
                     "id": "requested_tools",
                     "label": "Tools",
                     "type": "enum",
-                    "options": [None, True, False],
+                    # Default-first: ON, then OFF, then server default.
+                    "options": [True, False, None],
                     "render": {None: "Server default", True: "ON", False: "OFF"},
                 },
                 {
                     "id": "control_level",
                     "label": "Control level",
                     "type": "enum",
-                    "options": [None, 1, 2, 3],
+                    # Default-first: Level 3, then 2, then 1, then server default.
+                    "options": [3, 2, 1, None],
                     "render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
                 },
                 {"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
@@ -2647,35 +2826,44 @@ class ChatCLI:
                 {"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
                 # Agent scope & filesystem controls
                 {"id": "host_base", "label": "Agent scope directory", "type": "text"},
-                {
-                    "id": "fs_scope",
-                    "label": "Filesystem scope",
-                    "type": "enum",
-                    "options": [None, "workspace", "host"],
-                    "render": {
-                        None: "Server default",
-                        "workspace": "Workspace (sandbox)",
-                        "host": "Host (Agent scope)",
-                    },
-                },
-                {
-                    "id": "fs_host_mode",
-                    "label": "Host mode",
-                    "type": "enum",
-                    "options": [None, "any", "cwd", "custom"],
-                    "render": {
-                        None: "Server default / any",
-                        "any": "any (no extra client restriction)",
-                        "cwd": "Current working directory",
-                        "custom": "Custom (use Agent scope)",
-                    },
-                },
+                {
+                    "id": "fs_scope",
+                    "label": "Filesystem scope",
+                    "type": "enum",
+                    # Default-first: host (Agent scope), then workspace, then server default.
+                    "options": ["host", "workspace", None],
+                    "render": {
+                        None: "Server default",
+                        "workspace": "Workspace (sandbox)",
+                        "host": "Host (Agent scope)",
+                    },
+                },
+                {
+                    "id": "fs_host_mode",
+                    "label": "Host mode",
+                    "type": "enum",
+                    # Default-first: custom (use Agent scope), then cwd, then any, then server default.
+                    "options": ["custom", "cwd", "any", None],
+                    "render": {
+                        None: "Server default / any",
+                        "any": "any (no extra client restriction)",
+                        "cwd": "Current working directory",
+                        "custom": "Custom (use Agent scope)",
+                    },
+                },
             ]},
             {"label": "Code Map", "type": "group", "items": [
                 {"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
             ]},
             {"label": "Preambles & First-turn", "type": "group", "items": [
-                {"id": "preambles_enabled", "label": "Enable tool call preambles (supported models only)", "type": "bool"},
+                {
+                    "id": "preambles_enabled",
+                    "label": "Enable tool call preambles (GPT-5 only)",
+                    "type": "bool",
+                    # Only show this control when the *currently selected* model supports it.
+                    # (This updates live as the Model picker changes.)
+                    "visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
+                },
                 {"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
                 {"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
                 {"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
@@ -2688,8 +2876,22 @@ class ChatCLI:
             ]},
         ]
-        # Prepare initial values with enum placeholder for model when custom text set
-        init_for_ui = dict(initial)
+        # Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
+        try:
+            if not self._is_gpt_model(self.model):
+                for g in items:
+                    if not isinstance(g, dict):
+                        continue
+                    if (g.get("type") == "group") and (g.get("label") == "General"):
+                        for row in (g.get("items") or []):
+                            if isinstance(row, dict) and row.get("id") == "text_verbosity":
+                                row["options"] = ["medium", "high"]
+                                row["render"] = {"medium": "Medium", "high": "High"}
+        except Exception:
+            pass
+        # Prepare initial values with enum placeholder for model when custom text set
+        init_for_ui = dict(initial)
         if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
             # Represent as 'custom' for cycling, but keep original model in working copy for edit with 'e'
             pass  # We'll keep exact model string; enum will show the raw value when not matched
@@ -3037,17 +3239,17 @@ class ChatCLI:
         self.ui.success(f"FS Scope set to: {self._fs_label()}")
         self.save_settings()
-    async def set_level_menu(self) -> None:
-        val = await self._menu_choice(
-            "Control Level",
-            "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
-            [
-                ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
-                ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
-                ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
-                ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
-            ],
-        )
+    async def set_level_menu(self) -> None:
+        val = await self._menu_choice(
+            "Control Level",
+            "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
+            [
+                ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
+                ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
+                ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
+                ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
+            ],
+        )
         if val == "default":
             self.control_level = None
         elif val in ("1", "2", "3"):
@@ -3127,16 +3329,16 @@ class ChatCLI:
         except Exception:
             pass
-        # 3) Tool usage preamble (UX hint)
+        # 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
         try:
-            if bool(getattr(self, "preambles_enabled", False)) and not self._is_codex_model(self.model):
+            if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
                 blocks.append(
                     "Tool usage: when you need to read or modify files or run commands, "
                     "explicitly explain why you're using a tool, what you'll do, and how it "
                     "advances the user's goal before calling the tool."
                 )
-        except Exception:
-            pass
+        except Exception:
+            pass
         # 4) Working memory (context-summary file), injected once on fresh session restart
         try:
@@ -3256,7 +3458,7 @@ class ChatCLI:
             "Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
         )
-    def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
+    def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
         """Build provider-native messages for Kimi preserving prior assistant reasoning_content.
         Includes prior provider-native turns and the current user message with first-turn injections.
         """
@@ -3274,8 +3476,31 @@ class ChatCLI:
         for m in (self._kimi_raw_history or []):
             raw.append(m)
         # Append current user message
-        raw.append({"role": "user", "content": content})
-        return raw
+        raw.append({"role": "user", "content": content})
+        return raw
+    def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
+        """Normalize Gemini provider-native history.
+        Ensures we only send a flat list of dicts back to the server.
+        This prevents accidental nesting like [[{...}, {...}]] which the
+        google-genai SDK rejects with pydantic union validation errors.
+        """
+        out: List[Dict[str, Any]] = []
+        if not isinstance(rpm, list):
+            return out
+        for item in rpm:
+            if item is None:
+                continue
+            if isinstance(item, list):
+                # Flatten one level
+                for sub in item:
+                    if isinstance(sub, dict):
+                        out.append(dict(sub))
+                continue
+            if isinstance(item, dict):
+                out.append(dict(item))
+        return out
     def _build_working_memory_injection(self) -> Optional[str]:
         try:
@@ -4577,21 +4802,29 @@ class ChatCLI:
             self.save_settings()
             return True
-        if cmd.startswith("/system "):
-            self.system_prompt = cmd[len("/system ") :].strip()
-            self.history = []
-            if self.system_prompt:
-                self.history.append({"role": "system", "content": self.system_prompt})
-            # Treat as a fresh session; allow map re-injection
-            self._did_inject_codebase_map = False
+        if cmd.startswith("/system "):
+            self.system_prompt = cmd[len("/system ") :].strip()
+            self.history = []
+            if self.system_prompt:
+                self.history.append({"role": "system", "content": self.system_prompt})
+            # Treat as a fresh session; allow map re-injection
+            self._did_inject_codebase_map = False
             # Also allow custom first-turn injection again
             try:
                 self._did_inject_custom_first_turn = False
             except Exception:
                 pass
-            self.ui.success("System prompt set.")
-            self.save_settings()
-            return True
+            self.ui.success("System prompt set.")
+            # OpenAI threaded state is invalid once the system prompt changes.
+            try:
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+                self._openai_input_items = []
+                self._openai_last_sent_input_items = None
+            except Exception:
+                pass
+            self.save_settings()
+            return True
         if cmd.startswith("/title "):
             new_title = cmd[len("/title ") :].strip()
@@ -4603,21 +4836,26 @@ class ChatCLI:
             self.ui.success(f"Thread title set to: {self.thread_name}")
             return True
-        if cmd == "/clear":
+        if cmd == "/clear":
             self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
             self._did_inject_codebase_map = False
             try:
                 self._did_inject_custom_first_turn = False
             except Exception:
                 pass
-            # Reset provider-native histories
-            try:
+            # Reset provider-native histories
+            try:
                 self.messages_for_save = []
                 if not self.save_chat_history:
                     self.thread_uid = None
-                self._kimi_raw_history = []
-            except Exception:
-                pass
+                self._kimi_raw_history = []
+                self._gemini_raw_history = []
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+                self._openai_input_items = []
+                self._openai_last_sent_input_items = None
+            except Exception:
+                pass
             # Reset local cumulative token counters on session clear
             self._cum_input_tokens = 0
             self._cum_output_tokens = 0
@@ -4694,7 +4932,7 @@ class ChatCLI:
     # ---------------------------- Run loop ----------------------------
-    async def run(self) -> None:
+    async def run(self) -> None:
         # Try persisted auth
         self._load_auth_state_from_disk()
@@ -4868,25 +5106,70 @@ class ChatCLI:
             self._session_started_at = None
         # Prepare completer for slash commands (if prompt_toolkit is available)
         pt_completer = self._commands_word_completer()
-        while True:
-            try:
-                if self._pt_session is not None:
-                    # Use prompt_toolkit with inline completion when available
-                    # Pass completer per-prompt to ensure latest catalog
-                    user_input = await self._pt_session.prompt_async(
-                        "You: ",
-                        completer=pt_completer,
-                        complete_while_typing=True,
-                    )
-                    user_input = user_input.strip()
-                elif self._input_engine:
-                    # Do not add continuation prefixes on new lines
-                    user_input = self._input_engine.read_message("You: ", "")
-                else:
-                    user_input = self._read_multiline_input("You: ")
-                # Successful read resets interrupt window
-                self._last_interrupt_ts = None
-            except KeyboardInterrupt:
+        while True:
+            try:
+                pending_edit = self._pending_user_edit
+                edit_mode = pending_edit is not None
+                if self._pt_session is not None:
+                    # Use prompt_toolkit with inline completion when available
+                    # Pass completer per-prompt to ensure latest catalog
+                    try:
+                        # prompt_toolkit supports default= on modern versions; fall back gracefully.
+                        if edit_mode:
+                            user_input = await self._pt_session.prompt_async(
+                                "You (edit): ",
+                                completer=pt_completer,
+                                complete_while_typing=True,
+                                default=str(pending_edit),
+                            )
+                        else:
+                            user_input = await self._pt_session.prompt_async(
+                                "You: ",
+                                completer=pt_completer,
+                                complete_while_typing=True,
+                            )
+                    except TypeError:
+                        # Older prompt_toolkit: no default= support
+                        user_input = await self._pt_session.prompt_async(
+                            "You: ",
+                            completer=pt_completer,
+                            complete_while_typing=True,
+                        )
+                    user_input = user_input.strip()
+                elif self._input_engine:
+                    if edit_mode:
+                        # The low-level input engine currently doesn't support prefill.
+                        # Show the previous message and let the user paste a replacement.
+                        try:
+                            self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"])  # type: ignore
+                            self.ui.print(str(pending_edit), style=self.ui.theme["dim"])  # type: ignore
+                        except Exception:
+                            pass
+                        new_txt = self._read_multiline_input("Edit> ")
+                        user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
+                    else:
+                        # Do not add continuation prefixes on new lines
+                        user_input = self._input_engine.read_message("You: ", "")
+                else:
+                    if edit_mode:
+                        try:
+                            self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"])  # type: ignore
+                            self.ui.print(str(pending_edit), style=self.ui.theme["dim"])  # type: ignore
+                        except Exception:
+                            pass
+                        new_txt = self._read_multiline_input("Edit> ")
+                        user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
+                    else:
+                        user_input = self._read_multiline_input("You: ")
+                # Clear pending edit state after we successfully collected input.
+                if edit_mode:
+                    self._pending_user_edit = None
+                    self._pending_turn_snapshot = None
+                # Successful read resets interrupt window
+                self._last_interrupt_ts = None
+            except KeyboardInterrupt:
                 # First Ctrl+C: interrupt input and warn; second within window exits
                 now = time.time()
                 try:
@@ -4905,8 +5188,8 @@ class ChatCLI:
                 self.ui.print("Goodbye.")
                 return
-            if not user_input:
-                continue
+            if not user_input:
+                continue
             # Command palette if bare '/'
             if user_input == "/":
@@ -4929,12 +5212,35 @@ class ChatCLI:
                 if handled:
                     continue
-            try:
-                # Record user message for local/server save
-                if self.save_chat_history:
-                    self.messages_for_save.append({
-                        "role": "user",
-                        "content": user_input,
+            try:
+                # Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
+                # This is critical for first-turn injections (code map/custom note/working memory)
+                # which are applied by mutating flags during payload construction.
+                self._pending_turn_snapshot = {
+                    "history": copy.deepcopy(self.history),
+                    "messages_for_save": copy.deepcopy(self.messages_for_save),
+                    "kimi_raw": copy.deepcopy(self._kimi_raw_history),
+                    "gemini_raw": copy.deepcopy(self._gemini_raw_history),
+                    "openai_prev": getattr(self, "_openai_previous_response_id", None),
+                    "openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
+                    "openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
+                    "openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
+                    "inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
+                    "did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
+                    "did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
+                    "did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
+                    "memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
+                    "last_built_user_content": getattr(self, "_last_built_user_content", None),
+                }
+                # Clear any stale in-flight dispatch context at turn start.
+                self._inflight_dispatch = None
+                # Record user message for local/server save
+                if self.save_chat_history:
+                    self.messages_for_save.append({
+                        "role": "user",
+                        "content": user_input,
                         "model": None,
                         "citations": None,
                         "last_turn_input_tokens": 0,
@@ -4948,25 +5254,86 @@ class ChatCLI:
                 if self._busy:
                     self.ui.warn("Agent is busy with another turn. Please wait...")
                     continue
-                self._busy = True
-                try:
-                    assistant_text = await self._stream_once(user_input)
-                finally:
-                    self._busy = False
-            except httpx.HTTPStatusError as he:
-                try:
-                    if he.response is not None:
-                        await he.response.aread()
-                        body = he.response.text
+                self._busy = True
+                try:
+                    assistant_text = await self._stream_once(user_input)
+                finally:
+                    self._busy = False
+            except KeyboardInterrupt:
+                # Ctrl+C mid-stream / mid-tool: do not exit the CLI.
+                # Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
+                try:
+                    await self._cancel_inflight_dispatch()
+                except (Exception, BaseException):
+                    pass
+                # Restore state to *before* this turn started.
+                try:
+                    snap = self._pending_turn_snapshot or {}
+                    if isinstance(snap.get("history"), list):
+                        self.history = snap.get("history")
+                    if isinstance(snap.get("messages_for_save"), list):
+                        self.messages_for_save = snap.get("messages_for_save")
+                    if isinstance(snap.get("kimi_raw"), list):
+                        self._kimi_raw_history = snap.get("kimi_raw")
+                    if isinstance(snap.get("gemini_raw"), list):
+                        self._gemini_raw_history = snap.get("gemini_raw")
+                    if "openai_prev" in snap:
+                        self._openai_previous_response_id = snap.get("openai_prev")
+                    if isinstance(snap.get("openai_ids"), list):
+                        self._openai_response_id_history = snap.get("openai_ids")
+                    if isinstance(snap.get("openai_input_items"), list):
+                        self._openai_input_items = snap.get("openai_input_items")
+                    if "openai_last_sent_input_items" in snap:
+                        self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
+                    if "inflight_dispatch" in snap:
+                        self._inflight_dispatch = snap.get("inflight_dispatch")
+                    if "did_inject_codebase_map" in snap:
+                        self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
+                    if "did_inject_custom_first_turn" in snap:
+                        self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
+                    if "did_inject_working_memory" in snap:
+                        self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
+                    if "memory_paths_for_first_turn" in snap:
+                        self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
+                    self._last_built_user_content = snap.get("last_built_user_content")
+                except Exception:
+                    pass
+                # Clear any transient indicator line and land on a fresh prompt line.
+                try:
+                    sys.stdout.write("\r\x1b[2K\n")
+                    sys.stdout.flush()
+                except Exception:
+                    try:
+                        self.ui.print()
+                    except Exception:
+                        pass
+                try:
+                    supports = self._provider_supports_native_retention(self.model)
+                except Exception:
+                    supports = False
+                if supports:
+                    self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
+                else:
+                    self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
+                self._pending_user_edit = user_input
+                continue
+            except httpx.HTTPStatusError as he:
+                try:
+                    if he.response is not None:
+                        await he.response.aread()
+                        body = he.response.text
                     else:
                         body = ""
                 except Exception:
                     body = ""
                 self.ui.error(f"[HTTP error] {he.response.status_code} {body}")
                 continue
-            except Exception as e:
-                self.ui.error(f"[Client error] {e}")
-                continue
+            except Exception as e:
+                self.ui.error(f"[Client error] {e}")
+                continue
             # Skip appending empty assistant messages to avoid 422 on next request
             if assistant_text.strip():
@@ -4992,7 +5359,7 @@ class ChatCLI:
             ("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
             ("set_auto_approve", f"⚙️  Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
             (auth_action_key, auth_action_label),
-            ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5, gemini-2.5-pro, grok-4, deepseek-chat) or use Change Model to type one"),
+            ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2-thinking, etc.)"),
             ("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
             ("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
             ("clear_history", "🧹 Clear History - Reset chat history"),
@@ -5028,15 +5395,14 @@ class ChatCLI:
         has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
         is_effectively_free = (self.is_free_tier and not has_credits)
-        # Recommended models (ordered list for shuffling)
-        # Curated list per request (include Codex Max as recommended)
+        # Recommended models ("feelings" order)
         rec_keys = [
-            "deepseek-reasoner-3.2",
-            "claude-opus-4-5-20251101",
+            "gpt-5.2",
+            "gpt-5.2-codex",
+            "gpt-5",
             "gemini-3-pro-preview",
             "gemini-3-flash-preview",
-            "gpt-5",
-            "gpt-5.2",
+            "claude-opus-4-5-20251101",
             "kimi-k2-thinking",
             "grok-code-fast-1",
         ]
@@ -5078,8 +5444,7 @@ class ChatCLI:
             suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
             choices.append((m, f"{lbl}{suffix}"))
-        choices.append(("default", "Server Default (no override)"))
-        choices.append(("custom", "Custom (enter a model name)"))
+        # Per issue list: do not surface "server default" or "custom" in this picker.
         # Render and select using the unified highlighted picker
         picked: Optional[str] = None
@@ -5094,27 +5459,15 @@ class ChatCLI:
             picked = str(val)
             # Enforce free tier restrictions
-            if picked not in ("default", "custom") and is_effectively_free and is_paid_model(picked):
-                self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
-                continue
+            if is_effectively_free and is_paid_model(picked):
+                self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
+                continue
             break
-        # Apply selection
-        if picked == "default":
-            self.model = None
-            self.ui.info("Model cleared; server default will be used.")
-        elif picked == "custom":
-            typed = self.ui.prompt(
-                "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
-                default=self.model or "",
-            )
-            self.model = self._resolve_model_alias(typed.strip() or None)
-            if not self.model:
-                self.ui.info("Model cleared; server default will be used.")
-        else:
-            self.model = picked
-            self.ui.success(f"Model set to: {self.model}")
+        # Apply selection
+        self.model = picked
+        self.ui.success(f"Model set to: {self.model}")
         self._apply_model_side_effects()
         self.save_settings()
@@ -5203,7 +5556,7 @@ class ChatCLI:
             self.save_settings()
             return True
-        if choice == "set_system_prompt":
+        if choice == "set_system_prompt":
             prompt = self.ui.prompt("Enter system prompt", default=self.system_prompt or "")
             self.system_prompt = prompt.strip()
             self.history = []
@@ -5215,32 +5568,40 @@ class ChatCLI:
                 self._did_inject_custom_first_turn = False
             except Exception:
                 pass
-            # Clear provider-native histories on system reset
-            try:
+            # Clear provider-native histories on system reset
+            try:
                 self.messages_for_save = []
                 if not self.save_chat_history:
                     self.thread_uid = None
-                self._kimi_raw_history = []
-            except Exception:
-                pass
+                self._kimi_raw_history = []
+                self._gemini_raw_history = []
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+                self._openai_input_items = []
+                self._openai_last_sent_input_items = None
+            except Exception:
+                pass
             self.ui.success("System prompt set.")
             self.save_settings()
             return True
-        if choice == "clear_history":
+        if choice == "clear_history":
             self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
             self._did_inject_codebase_map = False
             try:
                 self._did_inject_custom_first_turn = False
             except Exception:
                 pass
-            try:
+            try:
                 self.messages_for_save = []
                 if not self.save_chat_history:
                     self.thread_uid = None
-                self._kimi_raw_history = []
-            except Exception:
-                pass
+                self._kimi_raw_history = []
+                self._gemini_raw_history = []
+                self._openai_previous_response_id = None
+                self._openai_response_id_history = []
+            except Exception:
+                pass
             # Reset local cumulative token counters on session clear
             self._cum_input_tokens = 0
             self._cum_output_tokens = 0
@@ -5293,11 +5654,82 @@ class ChatCLI:
         return True
     # ----------------------- SSE Streaming loop ------------------------
-    async def _stream_once(self, user_input: str) -> str:
-        # Build request payload
-        payload: Dict[str, Any] = {"messages": self._build_messages(user_input)}
+    async def _stream_once(self, user_input: str) -> str:
+        # Build request payload.
+        # OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
+        # `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
+        if self._is_openai_model(self.model):
+            msgs: List[Dict[str, str]] = []
+            # Codex developer prompt (if enabled) + system prompt
+            try:
+                if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
+                    msgs.append({"role": "system", "content": self._codex_system_prompt()})
+            except Exception:
+                pass
+            if self.system_prompt:
+                msgs.append({"role": "system", "content": self.system_prompt})
+            # Apply first-turn-only injections to the current user content
+            content = user_input
+            prefix = self._build_first_turn_injection(user_input)
+            if prefix:
+                content = f"{prefix}\n\n{user_input}"
+            try:
+                self._last_built_user_content = content
+            except Exception:
+                self._last_built_user_content = user_input
+            msgs.append({"role": "user", "content": content})
+            payload: Dict[str, Any] = {"messages": msgs}
+            # Build OpenAI native input items (authoritative for the server OpenAI path).
+            try:
+                if isinstance(self._openai_input_items, list) and self._openai_input_items:
+                    items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
+                else:
+                    # Seed with system prompts for the first OpenAI turn.
+                    items = []
+                    try:
+                        if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
+                            items.append({"role": "system", "content": self._codex_system_prompt()})
+                    except Exception:
+                        pass
+                    if self.system_prompt:
+                        items.append({"role": "system", "content": self.system_prompt})
+                items.append({"role": "user", "content": content})
+                payload["openai_input_items"] = self._sanitize_openai_items(items)
+                self._openai_last_sent_input_items = copy.deepcopy(items)
+            except Exception:
+                # If this fails for any reason, fall back to normal message-based history.
+                self._openai_last_sent_input_items = None
+            # OpenAI Threading: DISABLED. We use full manual input item replay now.
+            # if "openai_input_items" not in payload:
+            #    try:
+            #        if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
+            #            payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
+            #    except Exception:
+            #        pass
+            try:
+                if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
+                    payload["openai_response_id_history"] = list(self._openai_response_id_history)
+            except Exception:
+                pass
+        else:
+            payload = {"messages": self._build_messages(user_input)}
         if self.model:
             payload["model"] = self.model
+        # OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
+        try:
+            if self._is_openai_model(self.model):
+                if (
+                    isinstance(getattr(self, "_openai_response_id_history", None), list)
+                    and self._openai_response_id_history
+                    and "openai_response_id_history" not in payload
+                ):
+                    payload["openai_response_id_history"] = list(self._openai_response_id_history)
+        except Exception:
+            pass
         # Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
         try:
             if self.terminal_id:
@@ -5381,10 +5813,12 @@ class ChatCLI:
                 payload["text_verbosity"] = self.text_verbosity
         except Exception:
             pass
-        try:
-            payload["preambles_enabled"] = bool(self.preambles_enabled)
-        except Exception:
-            pass
+        # Preambles are a GPT-5-only UX toggle.
+        try:
+            if self._supports_preambles(self.model):
+                payload["preambles_enabled"] = bool(self.preambles_enabled)
+        except Exception:
+            pass
         if self.web_search_enabled:
             payload["enable_web_search"] = True
@@ -5479,13 +5913,23 @@ class ChatCLI:
                     headers["X-Request-Timeout"] = str(int(req_timeout_hint))
                 except Exception:
                     pass
-                # If using a Kimi model, include provider-native messages to preserve reasoning_content
-                try:
-                    if isinstance(self.model, str) and self.model.startswith("kimi-"):
-                        req_payload = dict(req_payload)
-                        req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
-                except Exception:
-                    pass
+                # If using a Kimi model, include provider-native messages to preserve reasoning_content
+                try:
+                    if isinstance(self.model, str) and self.model.startswith("kimi-"):
+                        req_payload = dict(req_payload)
+                        req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
+                except Exception:
+                    pass
+                # If using a Gemini model, include provider-native contents to preserve thought signatures
+                # and strict tool-call chains across HTTP turns.
+                try:
+                    if isinstance(self.model, str) and self.model.startswith("gemini-"):
+                        req_payload = dict(req_payload)
+                        hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
+                        if hist:
+                            req_payload["raw_provider_messages"] = hist
+                except Exception:
+                    pass
                 async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
                     async with client.stream("POST", self.stream_url, json=req_payload, headers=headers, follow_redirects=True) as resp:
                         if resp.status_code == 429:
@@ -5596,6 +6040,51 @@ class ChatCLI:
                         # Track whether we're currently positioned at the start of a fresh line.
                         # This prevents double-newlines between back-to-back tool events.
                         at_line_start = True
+                        # --- Tool call in-place status (issuelist.md #7) ---
+                        # We render a single transient line for the current tool call (no trailing newline)
+                        # so the later tool.result SUCCESS/FAILURE line can replace it in-place.
+                        tool_status_active = False
+                        tool_status_call_id = None
+                        def _tool_status_clear_line() -> None:
+                            """Clear the current line (best-effort) and return to column 0."""
+                            nonlocal at_line_start
+                            try:
+                                sys.stdout.write("\r\x1b[2K")
+                                sys.stdout.flush()
+                            except Exception:
+                                pass
+                            at_line_start = True
+                        def _tool_status_show(call_id: Any, line: str) -> None:
+                            """Show the transient tool status line (no newline)."""
+                            nonlocal tool_status_active, tool_status_call_id, at_line_start
+                            if not self.show_tool_calls:
+                                return
+                            tool_status_active = True
+                            tool_status_call_id = str(call_id) if call_id is not None else None
+                            try:
+                                if not at_line_start:
+                                    sys.stdout.write("\n")
+                                sys.stdout.write("\r\x1b[2K" + str(line))
+                                sys.stdout.flush()
+                                at_line_start = False
+                            except Exception:
+                                # Fallback: degrade to a normal printed line
+                                try:
+                                    self.ui.print(str(line))
+                                except Exception:
+                                    pass
+                                at_line_start = True
+                        def _tool_status_stop() -> None:
+                            """Remove the transient tool status line and clear tracking."""
+                            nonlocal tool_status_active, tool_status_call_id
+                            if tool_status_active:
+                                _tool_status_clear_line()
+                            tool_status_active = False
+                            tool_status_call_id = None
                         # Mode: animate or static (default static for stability)
                         try:
                             _animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
@@ -5899,16 +6388,40 @@ class ChatCLI:
                                 except Exception:
                                     pass
-                                # Do NOT show the initial tool.call line per UX request; results will be
-                                # rendered on tool.result. We still keep internal state and WS broadcasts.
-                                # While the tool executes (server or client), show a subtle thinking
-                                # indicator so users see progress during potentially long operations.
-                                try:
-                                    # Do not start the indicator if we're in the middle of assistant token streaming
-                                    if (not streaming_assistant) and bool(getattr(self, "_thinking_indicator_enabled", False)):
-                                        await _indicator_start()
-                                except Exception:
-                                    pass
+                                # issuelist.md #7:
+                                # Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
+                                try:
+                                    # Clear any previous transient status line (shouldn't happen, but keep stable)
+                                    _tool_status_stop()
+                                except Exception:
+                                    pass
+                                try:
+                                    tool_name = str(name or "").strip()
+                                    label = self._tool_concise_label(
+                                        tool_name,
+                                        args if isinstance(args, dict) else {},
+                                        None,
+                                    )
+                                    try:
+                                        model_prefix = (
+                                            self._current_turn.get("model")
+                                            or self._last_used_model
+                                            or self.model
+                                            or "(server default)"
+                                        )
+                                    except Exception:
+                                        model_prefix = self.model or "(server default)"
+                                    ORANGE = "\x1b[38;5;214m"
+                                    WHITE = "\x1b[97m"
+                                    RESET = "\x1b[0m"
+                                    status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
+                                    _tool_status_show(call_id, status_line)
+                                except Exception:
+                                    # Last-resort fallback: print something rather than crash streaming.
+                                    try:
+                                        self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
+                                    except Exception:
+                                        pass
                                 # Count tool calls
                                 try:
                                     tool_calls += 1
@@ -5931,10 +6444,15 @@ class ChatCLI:
                                 except Exception:
                                     pass
-                            elif event == "approval.request":
-                                # First reply wins (web or CLI)
-                                await self._handle_approval_request(client, session_id, data)
-                                continue
+                            elif event == "approval.request":
+                                # Don't let the transient [RUNNING] line collide with interactive prompts.
+                                try:
+                                    _tool_status_stop()
+                                except Exception:
+                                    pass
+                                # First reply wins (web or CLI)
+                                await self._handle_approval_request(client, session_id, data)
+                                continue
                             elif event == "approval.result":
                                 appr = data.get("approved")
@@ -5976,10 +6494,22 @@ class ChatCLI:
                                 self.ui.info("Working memory created. Restarting conversation with a fresh first-turn injection...")
                                 return ""
-                            elif event == "tool.result":
-                                name = str(data.get("name"))
-                                result = data.get("result", {}) or {}
-                                call_id = data.get("call_id")
+                            elif event == "tool.result":
+                                name = str(data.get("name"))
+                                result = data.get("result", {}) or {}
+                                call_id = data.get("call_id")
+                                # If we previously rendered a transient [RUNNING] line for this tool call,
+                                # clear it now so the SUCCESS/FAILURE line prints in the same place.
+                                try:
+                                    if tool_status_active:
+                                        # Best-effort match on call_id (some providers may omit it).
+                                        if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
+                                            _tool_status_stop()
+                                except Exception:
+                                    try:
+                                        _tool_status_stop()
+                                    except Exception:
+                                        pass
                                 # Stop any indicator before rendering results
                                 try:
                                     await _indicator_stop(clear=True)
@@ -6087,7 +6617,7 @@ class ChatCLI:
                                 # Do not auto-restart the indicator here; wait for the next model event
                             elif event == "tool.dispatch":
-                                # Client-executed tool flow
+                                # Client-executed tool flow
                                 if not HAS_LOCAL_TOOLS:
                                     self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
                                     continue
@@ -6097,12 +6627,23 @@ class ChatCLI:
                                 # tool invocation on the corresponding 'tool.call' event. Counting
                                 # dispatch would double-count a single tool call.
-                                session_id_d = data.get("session_id")
-                                call_id = data.get("call_id")
-                                name = data.get("name")
-                                args = data.get("args", {}) or {}
-                                job_token = data.get("job_token")
-                                reqp = data.get("requested_policy", {}) or {}
+                                session_id_d = data.get("session_id")
+                                call_id = data.get("call_id")
+                                name = data.get("name")
+                                args = data.get("args", {}) or {}
+                                job_token = data.get("job_token")
+                                reqp = data.get("requested_policy", {}) or {}
+                                # Track in-flight dispatch so Ctrl+C can cancel quickly.
+                                try:
+                                    self._inflight_dispatch = {
+                                        "session_id": session_id_d,
+                                        "call_id": call_id,
+                                        "job_token": job_token,
+                                        "name": name,
+                                    }
+                                except Exception:
+                                    pass
                                 if DEBUG_SSE:
                                     self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
@@ -6377,9 +6918,9 @@ class ChatCLI:
                                 except Exception:
                                     self._last_dispatch_ctx = None
-                                # POST callback
-                                try:
-                                    if session_id_d and call_id and job_token:
+                                # POST callback
+                                try:
+                                    if session_id_d and call_id and job_token:
                                         payload_cb = {
                                             "session_id": session_id_d,
                                             "call_id": call_id,
@@ -6387,16 +6928,87 @@ class ChatCLI:
                                             "result": result,
                                             "job_token": job_token,
                                         }
-                                        r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
-                                        if r.status_code >= 400:
-                                            self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
-                                except Exception as e:
-                                    self.ui.warn(f"tools.callback error: {e}")
+                                        r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
+                                        if r.status_code >= 400:
+                                            self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
+                                except Exception as e:
+                                    self.ui.warn(f"tools.callback error: {e}")
+                                finally:
+                                    try:
+                                        # Clear in-flight dispatch context when we send a callback.
+                                        if isinstance(self._inflight_dispatch, dict):
+                                            if str(self._inflight_dispatch.get("call_id")) == str(call_id):
+                                                self._inflight_dispatch = None
+                                    except Exception:
+                                        pass
-                            elif event == "message.completed":
+                            elif event == "message.completed":
                                 # Safety: this block handles only 'message.completed'.
                                 usage = data.get("usage", {})
-                                model_used = data.get("model") or self.model
+                                model_used = data.get("model") or self.model
+                                # OpenAI: persist the last response id so future turns can use previous_response_id.
+                                try:
+                                    if self._is_openai_model(model_used):
+                                        # Prefer the explicit per-turn id list when provided by the server.
+                                        ids = data.get("openai_response_ids")
+                                        if isinstance(ids, list) and ids:
+                                            for x in ids:
+                                                if not isinstance(x, str):
+                                                    continue
+                                                xs = x.strip()
+                                                if not xs:
+                                                    continue
+                                                try:
+                                                    if xs not in self._openai_response_id_history:
+                                                        self._openai_response_id_history.append(xs)
+                                                except Exception:
+                                                    pass
+                                        rid = data.get("openai_previous_response_id")
+                                        if isinstance(rid, str) and rid.strip():
+                                            self._openai_previous_response_id = rid.strip()
+                                            try:
+                                                if rid.strip() not in self._openai_response_id_history:
+                                                    self._openai_response_id_history.append(rid.strip())
+                                            except Exception:
+                                                pass
+                                        # OpenAI manual-state replay: server returns the delta items appended
+                                        # during this turn (reasoning/tool calls/tool outputs). Persist them.
+                                        try:
+                                            delta = data.get("openai_delta_items")
+                                            if isinstance(delta, list):
+                                                base_items = (
+                                                    self._openai_last_sent_input_items
+                                                    if isinstance(self._openai_last_sent_input_items, list)
+                                                    else copy.deepcopy(self._openai_input_items)
+                                                )
+                                                # Normalize to a list of dicts where possible; keep unknown shapes as-is.
+                                                merged: List[Any] = []
+                                                try:
+                                                    merged.extend(list(base_items or []))
+                                                except Exception:
+                                                    merged = list(base_items or []) if base_items is not None else []
+                                                merged.extend(delta)
+                                                # Store only dict-like items (server is expected to send dicts)
+                                                cleaned: List[Dict[str, Any]] = []
+                                                for it in merged:
+                                                    if isinstance(it, dict):
+                                                        cleaned.append(dict(it))
+                                                self._openai_input_items = cleaned
+                                        except Exception:
+                                            pass
+                                        finally:
+                                            # Clear per-turn sent snapshot
+                                            self._openai_last_sent_input_items = None
+                                except Exception:
+                                    pass
+                                # Gemini: server may include an authoritative provider-native history snapshot.
+                                try:
+                                    if isinstance(model_used, str) and model_used.startswith("gemini-"):
+                                        rpm = data.get("raw_provider_messages")
+                                        self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
+                                except Exception:
+                                    pass
                                 # Mark completion for retry controller
                                 try:
                                     last_completed = True
@@ -7238,20 +7850,29 @@ class ChatCLI:
                                     pass
                                 return "".join(assistant_buf)
-                            elif event == "provider.message":
-                                # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
-                                provider = (data.get("provider") or "").lower()
-                                msg = data.get("message")
-                                if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
-                                    # Append as-is to local raw history for the next turn
-                                    try:
-                                        self._kimi_raw_history.append(dict(msg))
-                                    except Exception:
-                                        try:
-                                            self._kimi_raw_history.append(msg)  # type: ignore
-                                        except Exception:
-                                            pass
-                                continue
+                            elif event == "provider.message":
+                                # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
+                                provider = (data.get("provider") or "").lower()
+                                msg = data.get("message")
+                                if provider == "gemini":
+                                    # Always retain Gemini provider-native messages (needed for multi-turn tool calling).
+                                    try:
+                                        if isinstance(msg, dict):
+                                            self._gemini_raw_history.append(dict(msg))
+                                        elif isinstance(msg, list):
+                                            self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
+                                    except Exception:
+                                        pass
+                                if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
+                                    # Append as-is to local raw history for the next turn
+                                    try:
+                                        self._kimi_raw_history.append(dict(msg))
+                                    except Exception:
+                                        try:
+                                            self._kimi_raw_history.append(msg)  # type: ignore
+                                        except Exception:
+                                            pass
+                                continue
                             else:
                                 # TEMP DEBUG: show unknown/unhandled events
@@ -7799,7 +8420,7 @@ class ChatCLI:
                     "gpt-5": 400000,
                     "gpt-5-2025-08-07": 400000,
                     "codex-mini-latest": 200000,
-                    "gemini-2.5-pro": 1048576,
+                    # (removed gemini-2.5-pro)
                     "gemini-3-flash-preview": 1048576,
                     "gemini-3-pro-preview": 1000000,
                     "grok-4-1-fast-reasoning": 2000000,
@@ -7816,10 +8437,10 @@ class ChatCLI:
                     "claude-sonnet-4-5-20250929-thinking": 1000000,
                     "claude-opus-4-5-20251101": 200000,
                     "claude-opus-4-5-20251101-thinking": 200000,
-                    "glm-4.6": 200000,
-                })
-            except Exception:
-                pass
+                    "glm-4.7": 200000,
+                })
+            except Exception:
+                pass
         self._model_ctx_map = ctx_map
         return ctx_map
@@ -8206,8 +8827,11 @@ class ChatCLI:
             "We’ll configure a few defaults. You can change these later via /settings.",
         )
-        # --- 1) Default model (menu, no Y/N) ---
-        await self._wizard_model_step()
+        # --- 1) Default model (menu) ---
+        await self._wizard_model_step()
+        # If the picker was cancelled (or model still unset), choose a sensible default.
+        if not self.model:
+            self.model = self._recommended_default_model()
         # --- 2) Tools (always ON per design) ---
         self.requested_tools = True
@@ -8217,9 +8841,9 @@ class ChatCLI:
             "Control levels: 1=read-only, 2=approval on write/exec, 3=no approvals"
         )
         await self.set_level_menu()
-        if self.control_level not in (1, 2, 3):
-            # Default to Level 2 if user aborted
-            self.control_level = 2
+        if self.control_level not in (1, 2, 3):
+            # Default to Level 3 if user aborted
+            self.control_level = 3
         # --- 4) Agent scope (menus; only type on custom path) ---
         self.ui.print(
@@ -8270,30 +8894,45 @@ class ChatCLI:
         except Exception:
             curv = "medium"
-        verb_choice = await self._menu_choice(
-            "Text verbosity",
-            "How verbose should responses be by default?",
-            [
-                ("low", "Low – short, to-the-point answers"),
-                ("medium", "Medium – balanced detail (recommended)"),
-                ("high", "High – more verbose explanations"),
-            ],
-        )
+        verbosity_choices: List[Tuple[str, str]] = []
+        if self._is_gpt_model(self.model):
+            # Default-first: Low for GPT models.
+            verbosity_choices = [
+                ("low", "Low – short, to-the-point answers"),
+                ("medium", "Medium – balanced detail"),
+                ("high", "High – more verbose explanations"),
+            ]
+        else:
+            # Default-first: Medium for non-GPT models; do not surface "Low".
+            verbosity_choices = [
+                ("medium", "Medium – balanced detail (recommended)"),
+                ("high", "High – more verbose explanations"),
+            ]
+        verb_choice = await self._menu_choice(
+            "Text verbosity",
+            "How verbose should responses be by default?",
+            verbosity_choices,
+        )
         if verb_choice in ("low", "medium", "high"):
             self.text_verbosity = verb_choice
         else:
             self.text_verbosity = curv or "medium"
-        # --- 7) Tool preambles (menu) ---
-        preamble_choice = await self._menu_choice(
-            "Tool call preambles",
-            "Before using tools, the agent can briefly explain what it will do and why (supported models only).",
-            [
-                ("on", "Enable preambles"),
-                ("off", "Disable preambles (default)"),
-            ],
-        )
-        self.preambles_enabled = preamble_choice == "on"
+        # --- 7) Tool preambles (GPT-5 only) ---
+        if self._supports_preambles(self.model):
+            preamble_choice = await self._menu_choice(
+                "Tool call preambles",
+                "Before using tools, the agent can briefly explain what it will do and why.",
+                [
+                    ("off", "Disable preambles (default)"),
+                    ("on", "Enable preambles"),
+                ],
+            )
+            self.preambles_enabled = preamble_choice == "on"
+        else:
+            # Never enable preambles on unsupported models.
+            self.preambles_enabled = False
         # --- 8) Optional custom first-turn note (menu + text only when chosen) ---
         custom_choice = await self._menu_choice(
@@ -8367,7 +9006,8 @@ class ChatCLI:
                         text = m.get("content", "")
                         contents.append({"role": role, "parts": [{"text": text}]})
                     # Pick a Gemini model for counting; fall back if current isn't Gemini
-                    count_model = "gemini-2.5-pro"
+                    # (gemini-2.5-pro removed from curated lists)
+                    count_model = "gemini-3-flash-preview"
                     res = client.models.count_tokens(model=count_model, contents=contents)
                     t = int(getattr(res, "total_tokens", 0) or 0)
                     if t > 0:
@@ -8425,16 +9065,16 @@ class ChatCLI:
                     blocks.append(txt.strip())
         except Exception:
             pass
-        # Tool preamble
-        try:
-            if bool(getattr(self, "preambles_enabled", False)):
-                blocks.append(
-                    "Tool usage: when you need to read or modify files or run commands, "
-                    "explicitly explain why you're using a tool, what you'll do, and how it "
-                    "advances the user's goal before calling the tool."
-                )
-        except Exception:
-            pass
+        # Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
+        try:
+            if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
+                blocks.append(
+                    "Tool usage: when you need to read or modify files or run commands, "
+                    "explicitly explain why you're using a tool, what you'll do, and how it "
+                    "advances the user's goal before calling the tool."
+                )
+        except Exception:
+            pass
         # Working memory preview (does not touch _did_inject_working_memory or paths)
         try:
             if self._memory_paths_for_first_turn:

henosis-cli 0.6.7__py3-none-any.whl → 0.6.9__py3-none-any.whl

henosis-cli 0.6.7py3-none-any.whl → 0.6.9py3-none-any.whl