henosis-cli 0.6.8__py3-none-any.whl → 0.6.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli.py CHANGED
@@ -8,6 +8,7 @@
8
8
 
9
9
  import argparse
10
10
  import asyncio
11
+ import copy
11
12
  import json
12
13
  import os
13
14
  import sys
@@ -30,6 +31,7 @@ import importlib
30
31
  import importlib.util
31
32
  import importlib.metadata
32
33
  import re
34
+ import base64
33
35
 
34
36
  # Optional websockets for Agent Mode (dev-only WS bridge)
35
37
  try:
@@ -80,64 +82,53 @@ except Exception:
80
82
  Confirm = None
81
83
  Text = None
82
84
 
85
+ """prompt_toolkit is intentionally not used.
86
+
87
+ We previously relied on prompt_toolkit for interactive line editing and menus.
88
+ Copy/selection behavior is terminal- and prompt_toolkit-implementation specific
89
+ and proved unreliable across environments.
90
+
91
+ The CLI now uses our dependency-free input engine (henosis_cli_tools.input_engine)
92
+ and a dependency-free highlighted menu implementation.
83
93
  """
84
- prompt_toolkit is optional (used for some menus when available). Input editing
85
- for chat now uses a self-contained cross-platform engine that supports
86
- Shift+Enter newlines on Windows and on modern POSIX terminals that advertise
87
- extended keyboard protocols. It falls back to Ctrl+J for newline when
88
- Shift+Enter cannot be distinguished.
89
- """
90
- try:
91
- from prompt_toolkit import PromptSession
92
- from prompt_toolkit.completion import WordCompleter
93
- from prompt_toolkit.key_binding import KeyBindings
94
- from prompt_toolkit.selection import SelectionType
95
- from prompt_toolkit.application import Application
96
- from prompt_toolkit.application.current import get_app
97
- from prompt_toolkit.layout import Layout
98
- from prompt_toolkit.layout.containers import HSplit, Window
99
- from prompt_toolkit.layout.dimension import Dimension
100
- from prompt_toolkit.layout.controls import FormattedTextControl
101
- from prompt_toolkit.styles import Style
102
- HAS_PT = True
103
- except Exception:
104
- HAS_PT = False
105
- PromptSession = None
106
- WordCompleter = None
107
- KeyBindings = None
108
- Application = None
109
- get_app = None
110
- Layout = None
111
- HSplit = None
112
- Window = None
113
- Dimension = None
114
- FormattedTextControl = None
115
- Style = None
94
+
95
+ # Keep these names defined for legacy branches that are guarded by HAS_PT.
96
+ HAS_PT = False
97
+ PromptSession = None
98
+ WordCompleter = None
99
+ KeyBindings = None
100
+ SelectionType = None
101
+ Condition = None
102
+ Application = None
103
+ get_app = None
104
+ Layout = None
105
+ HSplit = None
106
+ Window = None
107
+ Dimension = None
108
+ FormattedTextControl = None
109
+ Style = None
116
110
 
117
111
  # If optional deps are missing, print a friendly note but continue with fallbacks.
118
- if not HAS_RICH or not HAS_PT:
119
- missing = []
120
- if not HAS_RICH:
121
- missing.append("rich")
122
- if not HAS_PT:
123
- missing.append("prompt_toolkit")
124
- if missing:
125
- msg = (
126
- "Note: optional packages missing: "
127
- + ", ".join(missing)
128
- + "\n- rich enables colorful output\n- prompt_toolkit enables arrow-key menus\n"
112
+ if not HAS_RICH:
113
+ try:
114
+ sys.stderr.write(
115
+ "Note: optional package missing: rich\n"
116
+ "- rich enables colorful output\n"
129
117
  )
130
- try:
131
- sys.stderr.write(msg)
132
- except Exception:
133
- pass
118
+ except Exception:
119
+ pass
134
120
 
135
121
  # New: low-level input engine (no third-party deps) for Shift+Enter newlines
122
+ # Also provides a best-effort clipboard helper used for Ctrl+C copy when our
123
+ # prompt_toolkit selection is active.
136
124
  try:
137
- from henosis_cli_tools.input_engine import make_engine
125
+ from henosis_cli_tools.input_engine import make_engine, _copy_to_clipboard as _hn_copy_to_clipboard
138
126
  HAS_INPUT_ENGINE = True
139
127
  except Exception:
140
128
  HAS_INPUT_ENGINE = False
129
+
130
+ def _hn_copy_to_clipboard(text: str) -> bool: # type: ignore
131
+ return False
141
132
  DEBUG_SSE = False # set via --debug-sse
142
133
  DEBUG_REQ = False # set via --debug-req
143
134
  # Max number of recent SSE event summaries to retain for diagnostics when a stream
@@ -683,7 +674,7 @@ class UI:
683
674
  for n, ty, sz in rows:
684
675
  print(f"{n:<40} {ty:<8} {sz}")
685
676
 
686
- class ChatCLI:
677
+ class ChatCLI:
687
678
  def __init__(
688
679
  self,
689
680
  server: str,
@@ -800,23 +791,25 @@ class ChatCLI:
800
791
  # - concise: only model (+thinking level when applicable) and context meter
801
792
  # - verbose: full details (current behavior)
802
793
  self.usage_info_mode: str = "verbose"
803
- # Reasoning effort selector for OpenAI reasoning models (low|medium|high|xhigh). Default: medium
804
- # Note: 'xhigh' is only applied by the server for models that support it (e.g., gpt-5.2* and gpt-5.1-codex-max).
805
- self.reasoning_effort: str = "medium"
794
+ # Reasoning effort selector for OpenAI reasoning models (low|medium|high|xhigh). Default: medium
795
+ # Note: 'xhigh' is only applied by the server for models that support it (e.g., gpt-5.2* and gpt-5.1-codex-max).
796
+ self.reasoning_effort: str = "medium"
806
797
  # Retain provider-native tool results between turns (e.g., Kimi reasoning/tool messages)
807
798
  self.retain_native_tool_results: bool = False
808
799
  # Anthropic thinking-mode budget tokens (applies to '-thinking' models; None = server default)
809
- self.thinking_budget_tokens: Optional[int] = None
810
- # Anthropic prompt cache TTL preference: None=server default, or "5m" | "1h"
800
+ self.thinking_budget_tokens: Optional[int] = None
801
+ # Anthropic effort (Opus 4.6/4.5): low|medium|high|max. Default: high.
802
+ self.anthropic_effort: str = "high"
803
+ # Anthropic prompt cache TTL preference: None=server default, or "5m" | "1h"
811
804
  self.anthropic_cache_ttl: Optional[str] = None
812
- # Text verbosity selector (UI only; not sent to server requests by default)
813
- self.text_verbosity: str = "medium" # low | medium | high
814
- # Tool call preambles (UI toggle only)
815
- self.preambles_enabled: bool = False
816
- # Codex developer prompt injection (system) for Codex models only
817
- self.codex_prompt_enabled: bool = True
818
- # Codex Max: allow ALL tools instead of minimal subset
819
- self.codex_max_allow_all_tools: bool = False
805
+ # Text verbosity selector (UI only; not sent to server requests by default)
806
+ self.text_verbosity: str = "medium" # low | medium | high
807
+ # Tool call preambles (UI toggle only)
808
+ self.preambles_enabled: bool = False
809
+ # Codex developer prompt injection (system) for Codex models only
810
+ self.codex_prompt_enabled: bool = True
811
+ # Codex Max: allow ALL tools instead of minimal subset
812
+ self.codex_max_allow_all_tools: bool = False
820
813
  # Custom first-turn injection (like codebase map) — toggle + editable text
821
814
  self.custom_first_turn_enabled: bool = False
822
815
  self.custom_first_turn_text: str = ""
@@ -997,10 +990,25 @@ class ChatCLI:
997
990
  }
998
991
  # Track last used model for display
999
992
  self._last_used_model: Optional[str] = None
1000
- # Provider-native history for Kimi (preserve reasoning_content across turns)
1001
- self._kimi_raw_history: List[Dict[str, Any]] = []
1002
- # Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
1003
- self._gemini_raw_history: List[Dict[str, Any]] = []
993
+ # Provider-native history for Kimi (preserve reasoning_content across turns)
994
+ self._kimi_raw_history: List[Dict[str, Any]] = []
995
+ # Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
996
+ self._gemini_raw_history: List[Dict[str, Any]] = []
997
+ # OpenAI Responses API threading: retain previous response id across turns
998
+ self._openai_previous_response_id: Optional[str] = None
999
+ # OpenAI Responses API threading: retain the full chain of response ids across turns
1000
+ # (server will also echo per-turn ids in message.completed.openai_response_ids)
1001
+ self._openai_response_id_history: List[str] = []
1002
+
1003
+ # OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
1004
+ # including reasoning items, function_call items, and function_call_output items.
1005
+ self._openai_input_items: List[Dict[str, Any]] = []
1006
+ # For robustness, remember exactly what we sent as openai_input_items for the current turn
1007
+ # so we can append server-provided openai_delta_items deterministically.
1008
+ self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
1009
+ # Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
1010
+ # Shape: {session_id, call_id, job_token, name}
1011
+ self._inflight_dispatch: Optional[Dict[str, Any]] = None
1004
1012
  # Last server billing info from /api/usage/commit
1005
1013
  self._last_commit_cost_usd: float = 0.0
1006
1014
  self._last_remaining_credits: Optional[float] = None
@@ -1054,6 +1062,12 @@ class ChatCLI:
1054
1062
  # Track Ctrl+C timing for double-press-to-exit behavior
1055
1063
  self._last_interrupt_ts: Optional[float] = None
1056
1064
 
1065
+ # Ctrl+C during a running stream should not kill the entire CLI.
1066
+ # Instead, we cancel the in-flight turn and reopen the last user query for editing.
1067
+ # NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
1068
+ self._pending_user_edit: Optional[str] = None
1069
+ self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
1070
+
1057
1071
  # Timers: session-level and per-turn wall-clock timers
1058
1072
  self._session_started_at: Optional[float] = None # time.perf_counter() at session start
1059
1073
  self._turn_started_at: Optional[float] = None # time.perf_counter() per turn start
@@ -1062,35 +1076,8 @@ class ChatCLI:
1062
1076
  self._commands_catalog: List[Dict[str, str]] = self._build_commands_catalog()
1063
1077
  # Low-level input engine (supports Shift+Enter newlines where possible)
1064
1078
  self._input_engine = make_engine() if HAS_INPUT_ENGINE else None
1065
- # Optional prompt_toolkit session for inline slash-command completion
1079
+ # prompt_toolkit intentionally not used; always rely on the input engine.
1066
1080
  self._pt_session = None
1067
- if HAS_PT and PromptSession:
1068
- try:
1069
- # Build completer and simple key bindings: Enter submits, Ctrl+J inserts newline
1070
- self._pt_completer = self._commands_word_completer()
1071
- kb = KeyBindings()
1072
-
1073
- @kb.add("enter")
1074
- def _submit(event):
1075
- # Submit entire buffer
1076
- event.app.exit(result=event.current_buffer.text)
1077
-
1078
- @kb.add("c-j")
1079
- def _newline(event):
1080
- # Insert literal newline
1081
- event.current_buffer.insert_text("\n")
1082
-
1083
- # Bottom toolbar with quick hints
1084
- def _toolbar() -> str:
1085
- return " Type / then Tab to complete, or Enter on '/' to open the palette. Ctrl+J inserts a newline. "
1086
-
1087
- # Create session
1088
- self._pt_session = PromptSession(
1089
- key_bindings=kb,
1090
- bottom_toolbar=_toolbar,
1091
- )
1092
- except Exception:
1093
- self._pt_session = None
1094
1081
 
1095
1082
  # ----------------------- Provider heuristics -----------------------
1096
1083
  def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
@@ -1339,32 +1326,34 @@ class ChatCLI:
1339
1326
 
1340
1327
  # ----------------------- Pricing + costs -----------------------
1341
1328
 
1342
- def _pricing_table(self) -> Dict[str, Dict[str, Any]]:
1343
- # Match server chat_adapter PRICING_PER_MILLION (subset is fine; unknown -> 0)
1344
- return {
1345
- # OpenAI
1346
- "gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
1347
- # From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
1348
- "gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
1349
- "gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
1329
+ def _pricing_table(self) -> Dict[str, Dict[str, Any]]:
1330
+ # Match server chat_adapter PRICING_PER_MILLION (subset is fine; unknown -> 0)
1331
+ return {
1332
+ # OpenAI
1333
+ "gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
1334
+ # New: gpt-5.2-codex
1335
+ # Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
1336
+ "gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
1337
+ # From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
1338
+ "gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
1339
+ "gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
1350
1340
  "gpt-5-2025-08-07": {"input": 1.75, "output": 14.00, "provider": "openai"},
1351
1341
  "gpt-5-codex": {"input": 1.75, "output": 14.00, "provider": "openai"},
1352
- "gpt-4o-mini": {"input": 0.21, "output": 0.84, "provider": "openai"},
1342
+ "gpt-4o-mini": {"input": 0.21, "output": 0.84, "provider": "openai"},
1353
1343
  # Codex Mini (fine-tuned o4-mini for CLI). Pricing includes 1.4x margin per codex-mini.txt.
1354
1344
  # Cached input tokens override: $0.375 * 1.4 = $0.525 per 1M (25% of input rate).
1355
1345
  "codex-mini-latest": {"input": 2.10, "output": 8.40, "cached_input": 0.525, "provider": "openai"},
1356
1346
  # Anthropic
1357
1347
  "claude-sonnet-4-20250514": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
1358
1348
  "claude-sonnet-4-20250514-thinking": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
1359
- "claude-sonnet-4-5-20250929": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
1349
+ "claude-sonnet-4-5-20250929": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
1360
1350
  "claude-sonnet-4-5-20250929-thinking": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
1361
- # New Opus 4.5 (provider base $5/$25 with 1.4x margin -> $7.00/$35.00)
1362
- "claude-opus-4-5-20251101": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
1363
- "claude-opus-4-5-20251101-thinking": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
1351
+ # New Opus 4.6 (adaptive thinking + effort; 1M context)
1352
+ "claude-opus-4-6": {"input": 5.25, "output": 25.25, "provider": "anthropic"},
1353
+ "claude-opus-4-6-thinking": {"input": 5.25, "output": 25.25, "provider": "anthropic"},
1364
1354
  # Gemini
1365
- "gemini-2.5-pro": {"input": 1.75, "output": 14.00, "provider": "gemini"},
1366
- # Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
1367
- "gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
1355
+ # Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
1356
+ "gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
1368
1357
  # Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
1369
1358
  # CLI uses the low-tier 1.4x margin rates for estimates. High-tier
1370
1359
  # pricing based on total_tokens > 200K is applied on the server.
@@ -1374,17 +1363,15 @@ class ChatCLI:
1374
1363
  "grok-4-1-fast-non-reasoning": {"input": 0.28, "output": 0.70, "provider": "xai"},
1375
1364
  "grok-4": {"input": 4.20, "output": 21.00, "provider": "xai"},
1376
1365
  "grok-code-fast-1": {"input": 0.28, "output": 2.10, "provider": "xai"},
1377
- # DeepSeek V3.2 (+$0.25 per 1M margin)
1378
- "deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1379
- "deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1380
- "deepseek-3.2-speciale": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1366
+ # DeepSeek V3.2 (+$0.25 per 1M margin)
1367
+ "deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1368
+ "deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1369
+ # Removed: deepseek speciale (not supported)
1381
1370
  # Kimi
1382
- "kimi-k2-0905-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1383
- "kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1384
- "kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1371
+ "kimi-k2.5": {"input": 0.85, "output": 3.25, "provider": "kimi"},
1385
1372
  # GLM (Z.AI)
1386
1373
  # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
1387
- "glm-4.6": {"input": 0.84, "output": 3.08, "provider": "glm"},
1374
+ "glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
1388
1375
  }
1389
1376
 
1390
1377
  def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
@@ -1398,70 +1385,205 @@ class ChatCLI:
1398
1385
  return table.get("gpt-5-2025-08-07", {"input": 0.0, "output": 0.0, "provider": "unknown"})
1399
1386
  return {"input": 0.0, "output": 0.0, "provider": "unknown"}
1400
1387
 
1401
- def _resolve_model_alias(self, raw_name: Optional[str]) -> Optional[str]:
1402
- """Normalize model aliases to their canonical server identifiers."""
1403
- if not raw_name:
1404
- return None
1405
- name = raw_name.strip()
1406
- lower = name.lower()
1407
- aliases = {
1408
- "gemini-3": "gemini-3-pro-preview",
1409
- "gemini-3-pro": "gemini-3-pro-preview",
1410
- "gemini-3-preview": "gemini-3-pro-preview",
1411
- "gemini-3-flash": "gemini-3-flash-preview",
1412
- "gemini-flash-3": "gemini-3-flash-preview",
1413
- "gemini-new": "gemini-3-pro-preview",
1414
- "new-gemini": "gemini-3-pro-preview",
1415
- "gemini-pro-3": "gemini-3-pro-preview",
1416
- "gpt5": "gpt-5",
1417
- "gpt4": "gpt-4o",
1418
- # Anthropic Claude Opus 4.5 (thinking OFF) short aliases
1419
- # Map common shorthand variants to the canonical non-thinking model id
1420
- "claude-opus-4-5": "claude-opus-4-5-20251101",
1421
- "claude-opus-4.5": "claude-opus-4-5-20251101",
1422
- "opus-4-5": "claude-opus-4-5-20251101",
1423
- "opus-4.5": "claude-opus-4-5-20251101",
1424
- "opus45": "claude-opus-4-5-20251101",
1425
- "claude-opus45": "claude-opus-4-5-20251101",
1426
- }
1427
- return aliases.get(lower, name)
1428
-
1429
- def _apply_model_side_effects(self) -> None:
1430
- """Adjust related settings when certain models are selected."""
1431
- try:
1432
- model_name = (self.model or "").strip().lower()
1433
- except Exception:
1434
- model_name = ""
1435
- try:
1436
- if model_name in {"gpt-5.2-pro"}:
1437
- # Default these to high, but don't clobber a user-chosen xhigh.
1438
- if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
1439
- self.reasoning_effort = "high"
1440
- # Codex family: disable preambles for better behavior
1441
- if "codex" in model_name:
1442
- self.preambles_enabled = False
1443
- except Exception:
1444
- try:
1445
- self.reasoning_effort = "high"
1446
- except Exception:
1447
- pass
1448
- def _is_codex_model(self, model: Optional[str]) -> bool:
1449
- try:
1450
- return bool(model) and ("codex" in str(model).lower())
1451
- except Exception:
1452
- return False
1453
- def _supports_xhigh_reasoning_effort(self, model: Optional[str]) -> bool:
1454
- """Return True if the OpenAI model supports reasoning_effort='xhigh'.
1455
-
1456
- OpenAI supports xhigh on:
1457
- - gpt-5.1-codex-max
1458
- - the gpt-5.2* family
1459
- """
1460
- try:
1461
- m = (str(model).strip().lower() if model else "")
1462
- return m.startswith("gpt-5.2")
1463
- except Exception:
1464
- return False
1388
+ def _resolve_model_alias(self, raw_name: Optional[str]) -> Optional[str]:
1389
+ """Normalize model aliases to their canonical server identifiers."""
1390
+ if not raw_name:
1391
+ return None
1392
+ name = raw_name.strip()
1393
+ lower = name.lower()
1394
+ aliases = {
1395
+ "gemini-3": "gemini-3-pro-preview",
1396
+ "gemini-3-pro": "gemini-3-pro-preview",
1397
+ "gemini-3-preview": "gemini-3-pro-preview",
1398
+ "gemini-3-flash": "gemini-3-flash-preview",
1399
+ "gemini-flash-3": "gemini-3-flash-preview",
1400
+ "gemini-new": "gemini-3-pro-preview",
1401
+ "new-gemini": "gemini-3-pro-preview",
1402
+ "gemini-pro-3": "gemini-3-pro-preview",
1403
+ "gpt5": "gpt-5",
1404
+ "gpt4": "gpt-4o",
1405
+ # Anthropic Claude Opus 4.6 short aliases
1406
+ "claude-opus-4-6": "claude-opus-4-6",
1407
+ "claude-opus-4.6": "claude-opus-4-6",
1408
+ "opus-4-6": "claude-opus-4-6",
1409
+ "opus-4.6": "claude-opus-4-6",
1410
+ "opus46": "claude-opus-4-6",
1411
+ "claude-opus46": "claude-opus-4-6",
1412
+ }
1413
+ return aliases.get(lower, name)
1414
+
1415
+ def _apply_model_side_effects(self) -> None:
1416
+ """Adjust related settings when certain models are selected."""
1417
+ try:
1418
+ model_name = (self.model or "").strip().lower()
1419
+ except Exception:
1420
+ model_name = ""
1421
+ try:
1422
+ # Provider-native state resets when switching away from OpenAI.
1423
+ try:
1424
+ if self.model and (not self._is_openai_model(self.model)):
1425
+ self._openai_previous_response_id = None
1426
+ self._openai_response_id_history = []
1427
+ self._openai_input_items = []
1428
+ self._openai_last_sent_input_items = None
1429
+ except Exception:
1430
+ pass
1431
+ if model_name in {"gpt-5.2-pro"}:
1432
+ # Default these to high, but don't clobber a user-chosen xhigh.
1433
+ if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
1434
+ self.reasoning_effort = "high"
1435
+ # Codex family: disable preambles for better behavior
1436
+ if "codex" in model_name:
1437
+ self.preambles_enabled = False
1438
+ # Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
1439
+ # Force-disable for all other models (even if a saved setting had it enabled).
1440
+ if not self._supports_preambles(self.model):
1441
+ self.preambles_enabled = False
1442
+ except Exception:
1443
+ try:
1444
+ self.reasoning_effort = "high"
1445
+ except Exception:
1446
+ pass
1447
+
1448
+ def _supports_preambles(self, model: Optional[str]) -> bool:
1449
+ """Tool-call preambles are a CLI-only UX hint.
1450
+
1451
+ Requirement: disabled for all models except GPT-5 (base model; non-Codex).
1452
+ In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
1453
+ """
1454
+ try:
1455
+ if not model:
1456
+ return False
1457
+ m = str(model).strip().lower()
1458
+ # Only the base GPT-5 line supports this UX toggle.
1459
+ # Allow:
1460
+ # - "gpt-5"
1461
+ # - date-pinned variants like "gpt-5-2025-08-07"
1462
+ # Disallow:
1463
+ # - versioned families like "gpt-5.1*" / "gpt-5.2*"
1464
+ if not (m == "gpt-5" or m.startswith("gpt-5-")):
1465
+ return False
1466
+ if "codex" in m:
1467
+ return False
1468
+ return True
1469
+ except Exception:
1470
+ return False
1471
+
1472
+ def _is_openai_model(self, model: Optional[str]) -> bool:
1473
+ """Best-effort model/provider discriminator for client-side state.
1474
+
1475
+ The server is multi-provider. For the CLI we treat anything that isn't an explicit
1476
+ non-OpenAI provider prefix as OpenAI.
1477
+ """
1478
+ try:
1479
+ if not model:
1480
+ return False
1481
+ m = str(model).strip().lower()
1482
+ if not m:
1483
+ return False
1484
+ for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
1485
+ if m.startswith(pfx):
1486
+ return False
1487
+ # Everything else defaults to OpenAI in this repo.
1488
+ return True
1489
+ except Exception:
1490
+ return False
1491
+
1492
+ def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
1493
+ """Whether this provider has an implemented native tool/thinking retention path."""
1494
+ try:
1495
+ if not model:
1496
+ return False
1497
+ m = str(model).strip().lower()
1498
+ if m.startswith("gemini-"):
1499
+ return True
1500
+ if m.startswith("kimi-"):
1501
+ return bool(getattr(self, "retain_native_tool_results", False))
1502
+ if self._is_openai_model(model):
1503
+ return True
1504
+ return False
1505
+ except Exception:
1506
+ return False
1507
+
1508
+ def _sanitize_openai_items(self, items: Any) -> Any:
1509
+ """Recursively strip fields from OpenAI output items that cause errors when used as input."""
1510
+ if isinstance(items, list):
1511
+ return [self._sanitize_openai_items(x) for x in items]
1512
+ if isinstance(items, dict):
1513
+ # 'status' is the main offender causing 400s
1514
+ bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
1515
+ return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
1516
+ return items
1517
+
1518
+ async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
1519
+ """If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
1520
+
1521
+ This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
1522
+ Best-effort; never raises.
1523
+ """
1524
+ ctx = None
1525
+ try:
1526
+ ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
1527
+ except Exception:
1528
+ ctx = None
1529
+ if not ctx:
1530
+ return
1531
+ session_id = ctx.get("session_id")
1532
+ call_id = ctx.get("call_id")
1533
+ job_token = ctx.get("job_token")
1534
+ name = ctx.get("name")
1535
+ if not (session_id and call_id and job_token):
1536
+ return
1537
+ payload_cb = {
1538
+ "session_id": session_id,
1539
+ "call_id": call_id,
1540
+ "name": name,
1541
+ "job_token": job_token,
1542
+ "result": {
1543
+ "ok": False,
1544
+ "cancelled": True,
1545
+ "error": str(reason or "cancelled"),
1546
+ },
1547
+ }
1548
+ try:
1549
+ # Keep it short; we just want to unblock the server.
1550
+ http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
1551
+ except Exception:
1552
+ http_timeout = None
1553
+ try:
1554
+ async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
1555
+ await client.post(self.tools_callback_url, json=payload_cb)
1556
+ except Exception:
1557
+ pass
1558
+ finally:
1559
+ try:
1560
+ self._inflight_dispatch = None
1561
+ except Exception:
1562
+ pass
1563
+
1564
+ def _is_gpt_model(self, model: Optional[str]) -> bool:
1565
+ """True for OpenAI GPT models (used for showing certain UI-only toggles)."""
1566
+ try:
1567
+ return bool(model) and str(model).strip().lower().startswith("gpt-")
1568
+ except Exception:
1569
+ return False
1570
+ def _is_codex_model(self, model: Optional[str]) -> bool:
1571
+ try:
1572
+ return bool(model) and ("codex" in str(model).lower())
1573
+ except Exception:
1574
+ return False
1575
+ def _supports_xhigh_reasoning_effort(self, model: Optional[str]) -> bool:
1576
+ """Return True if the OpenAI model supports reasoning_effort='xhigh'.
1577
+
1578
+ OpenAI supports xhigh on:
1579
+ - gpt-5.1-codex-max
1580
+ - the gpt-5.2* family
1581
+ """
1582
+ try:
1583
+ m = (str(model).strip().lower() if model else "")
1584
+ return m.startswith("gpt-5.2")
1585
+ except Exception:
1586
+ return False
1465
1587
 
1466
1588
  def _is_deepseek_like(self, model: Optional[str]) -> bool:
1467
1589
  try:
@@ -1469,87 +1591,87 @@ class ChatCLI:
1469
1591
  except Exception:
1470
1592
  return False
1471
1593
 
1472
- def compute_cost_usd(self, model: Optional[str], usage: Dict[str, Any]) -> float:
1594
+ def compute_cost_usd(self, model: Optional[str], usage: Dict[str, Any]) -> float:
1473
1595
  price = self._resolve_price(model)
1474
1596
  provider = (price.get("provider") or "").lower()
1475
1597
  # prefer detailed fields when present
1476
1598
  prompt_tokens = int(usage.get("prompt_tokens") or usage.get("turn", {}).get("input_tokens", 0) or 0)
1477
1599
  completion_tokens = int(usage.get("completion_tokens") or usage.get("turn", {}).get("output_tokens", 0) or 0)
1478
- total_tokens = int(usage.get("total_tokens") or usage.get("turn", {}).get("total_tokens", 0) or (prompt_tokens + completion_tokens) or 0)
1479
- image_tokens = int(usage.get("image_tokens", 0) or 0)
1480
- thinking_tokens = int(usage.get("thinking_tokens", 0) or 0)
1481
- # Reasoning gap: bill as completion-side if total > (prompt + completion)
1482
- reasoning_gap = 0
1483
- try:
1484
- if total_tokens > (prompt_tokens + completion_tokens):
1485
- reasoning_gap = total_tokens - (prompt_tokens + completion_tokens)
1486
- except Exception:
1487
- reasoning_gap = 0
1488
- # Anthropic: count image tokens as prompt-side
1489
- if provider == "anthropic" and image_tokens:
1490
- prompt_tokens += image_tokens
1491
- # Anthropic prompt caching: pricing logic (reads @ 10%, creation @ 1.25x/2x)
1492
- if provider == "anthropic":
1493
- cache_read = int(usage.get("cache_read_input_tokens", 0) or 0)
1494
- cache_creation = int(usage.get("cache_creation_input_tokens", 0) or 0)
1495
- # Try to detect creation breakdown if available
1496
- cc_5m = 0
1497
- cc_1h = 0
1498
- try:
1499
- cc_map = usage.get("cache_creation") if isinstance(usage, dict) else None
1500
- if isinstance(cc_map, dict):
1501
- cc_5m = int(cc_map.get("ephemeral_5m_input_tokens", 0) or 0)
1502
- cc_1h = int(cc_map.get("ephemeral_1h_input_tokens", 0) or 0)
1503
- except Exception:
1504
- pass
1505
- # If breakdown is missing but total creation exists, assume 5m (1.25x) as default/safe estimate
1506
- if cache_creation > 0 and (cc_5m + cc_1h) == 0:
1507
- cc_5m = cache_creation
1508
-
1509
- # Only apply special pricing if cache fields are present
1510
- if cache_read > 0 or cache_creation > 0:
1511
- in_rate = float(price.get("input", 0.0))
1512
- out_rate = float(price.get("output", 0.0))
1513
-
1514
- # Non-cached prompt part
1515
- non_cached = max(0, int(prompt_tokens) - int(cache_read) - int(cache_creation))
1516
-
1517
- cost = 0.0
1518
- # Standard input
1519
- cost += (non_cached / 1_000_000.0) * in_rate
1520
- # Cache reads (10% of input rate)
1521
- cost += (cache_read / 1_000_000.0) * (in_rate * 0.10)
1522
- # Cache creation (1.25x for 5m, 2.0x for 1h)
1523
- if cc_5m > 0:
1524
- cost += (cc_5m / 1_000_000.0) * (in_rate * 1.25)
1525
- if cc_1h > 0:
1526
- cost += (cc_1h / 1_000_000.0) * (in_rate * 2.00)
1527
-
1528
- # Output + reasoning gap
1529
- completion_total = completion_tokens
1530
- if total_tokens and (prompt_tokens + completion_tokens) != total_tokens:
1531
- completion_total += reasoning_gap
1532
- else:
1533
- if thinking_tokens and not usage.get("total_tokens"):
1534
- completion_total += thinking_tokens
1535
-
1536
- cost += (completion_total / 1_000_000.0) * out_rate
1537
- return float(cost)
1538
-
1539
- # reasoning_gap already computed above
1600
+ total_tokens = int(usage.get("total_tokens") or usage.get("turn", {}).get("total_tokens", 0) or (prompt_tokens + completion_tokens) or 0)
1601
+ image_tokens = int(usage.get("image_tokens", 0) or 0)
1602
+ thinking_tokens = int(usage.get("thinking_tokens", 0) or 0)
1603
+ # Reasoning gap: bill as completion-side if total > (prompt + completion)
1604
+ reasoning_gap = 0
1605
+ try:
1606
+ if total_tokens > (prompt_tokens + completion_tokens):
1607
+ reasoning_gap = total_tokens - (prompt_tokens + completion_tokens)
1608
+ except Exception:
1609
+ reasoning_gap = 0
1610
+ # Anthropic: count image tokens as prompt-side
1611
+ if provider == "anthropic" and image_tokens:
1612
+ prompt_tokens += image_tokens
1613
+ # Anthropic prompt caching: pricing logic (reads @ 10%, creation @ 1.25x/2x)
1614
+ if provider == "anthropic":
1615
+ cache_read = int(usage.get("cache_read_input_tokens", 0) or 0)
1616
+ cache_creation = int(usage.get("cache_creation_input_tokens", 0) or 0)
1617
+ # Try to detect creation breakdown if available
1618
+ cc_5m = 0
1619
+ cc_1h = 0
1620
+ try:
1621
+ cc_map = usage.get("cache_creation") if isinstance(usage, dict) else None
1622
+ if isinstance(cc_map, dict):
1623
+ cc_5m = int(cc_map.get("ephemeral_5m_input_tokens", 0) or 0)
1624
+ cc_1h = int(cc_map.get("ephemeral_1h_input_tokens", 0) or 0)
1625
+ except Exception:
1626
+ pass
1627
+ # If breakdown is missing but total creation exists, assume 5m (1.25x) as default/safe estimate
1628
+ if cache_creation > 0 and (cc_5m + cc_1h) == 0:
1629
+ cc_5m = cache_creation
1630
+
1631
+ # Only apply special pricing if cache fields are present
1632
+ if cache_read > 0 or cache_creation > 0:
1633
+ in_rate = float(price.get("input", 0.0))
1634
+ out_rate = float(price.get("output", 0.0))
1635
+
1636
+ # Non-cached prompt part
1637
+ non_cached = max(0, int(prompt_tokens) - int(cache_read) - int(cache_creation))
1638
+
1639
+ cost = 0.0
1640
+ # Standard input
1641
+ cost += (non_cached / 1_000_000.0) * in_rate
1642
+ # Cache reads (10% of input rate)
1643
+ cost += (cache_read / 1_000_000.0) * (in_rate * 0.10)
1644
+ # Cache creation (1.25x for 5m, 2.0x for 1h)
1645
+ if cc_5m > 0:
1646
+ cost += (cc_5m / 1_000_000.0) * (in_rate * 1.25)
1647
+ if cc_1h > 0:
1648
+ cost += (cc_1h / 1_000_000.0) * (in_rate * 2.00)
1649
+
1650
+ # Output + reasoning gap
1651
+ completion_total = completion_tokens
1652
+ if total_tokens and (prompt_tokens + completion_tokens) != total_tokens:
1653
+ completion_total += reasoning_gap
1654
+ else:
1655
+ if thinking_tokens and not usage.get("total_tokens"):
1656
+ completion_total += thinking_tokens
1657
+
1658
+ cost += (completion_total / 1_000_000.0) * out_rate
1659
+ return float(cost)
1660
+
1661
+ # reasoning_gap already computed above
1540
1662
  # DeepSeek cache pricing nuance (best-effort; needs provider-specific fields to be precise)
1541
- if self._is_deepseek_like(model):
1542
- hit = int(usage.get("prompt_cache_hit_tokens", 0) or 0)
1543
- miss = int(usage.get("prompt_cache_miss_tokens", 0) or 0)
1544
- if (hit + miss) <= 0:
1545
- miss = prompt_tokens
1546
- hit = 0
1547
- # V3.2 cache hit pricing per docs with +$0.25 margin -> $0.278 / 1M
1548
- cache_hit_rate_per_m = 0.278
1549
- cost = (hit / 1_000_000.0) * cache_hit_rate_per_m
1550
- cost += (miss / 1_000_000.0) * float(price.get("input", 0.0))
1551
- cost += ((completion_tokens + reasoning_gap) / 1_000_000.0) * float(price.get("output", 0.0))
1552
- return float(cost)
1663
+ if self._is_deepseek_like(model):
1664
+ hit = int(usage.get("prompt_cache_hit_tokens", 0) or 0)
1665
+ miss = int(usage.get("prompt_cache_miss_tokens", 0) or 0)
1666
+ if (hit + miss) <= 0:
1667
+ miss = prompt_tokens
1668
+ hit = 0
1669
+ # V3.2 cache hit pricing per docs with +$0.25 margin -> $0.278 / 1M
1670
+ cache_hit_rate_per_m = 0.278
1671
+ cost = (hit / 1_000_000.0) * cache_hit_rate_per_m
1672
+ cost += (miss / 1_000_000.0) * float(price.get("input", 0.0))
1673
+ cost += ((completion_tokens + reasoning_gap) / 1_000_000.0) * float(price.get("output", 0.0))
1674
+ return float(cost)
1553
1675
  # OpenAI prompt caching: cached input tokens billed at 10% of input price by default
1554
1676
  # Allow per-model override via price["cached_input"] when provided
1555
1677
  if provider == "openai":
@@ -1905,13 +2027,13 @@ class ChatCLI:
1905
2027
  except Exception as e:
1906
2028
  self.ui.warn(f"Failed to load local settings: {e}")
1907
2029
 
1908
- def _collect_settings_dict(self) -> Dict[str, Any]:
1909
- data = {
1910
- "model": self.model,
1911
- "requested_tools": self.requested_tools,
2030
+ def _collect_settings_dict(self) -> Dict[str, Any]:
2031
+ data = {
2032
+ "model": self.model,
2033
+ "requested_tools": self.requested_tools,
1912
2034
  "fs_scope": self.fs_scope,
1913
2035
  # host_base is per-terminal by default; only persist if explicitly set by the user
1914
- "save_chat_history": self.save_chat_history,
2036
+ "save_chat_history": self.save_chat_history,
1915
2037
  "fs_host_mode": self.fs_host_mode,
1916
2038
  "system_prompt": self.system_prompt,
1917
2039
  "show_tool_calls": self.show_tool_calls,
@@ -1934,8 +2056,10 @@ class ChatCLI:
1934
2056
  # retain provider-native tool results
1935
2057
  "retain_native_tool_results": self.retain_native_tool_results,
1936
2058
  # Anthropic thinking budget
1937
- "thinking_budget_tokens": self.thinking_budget_tokens,
1938
- # Anthropic cache TTL preference
2059
+ "thinking_budget_tokens": self.thinking_budget_tokens,
2060
+ # Anthropic effort (Opus 4.6/4.5). Default: high.
2061
+ "anthropic_effort": getattr(self, "anthropic_effort", None),
2062
+ # Anthropic cache TTL preference
1939
2063
  "anthropic_cache_ttl": self.anthropic_cache_ttl,
1940
2064
  # web search
1941
2065
  "web_search_enabled": self.web_search_enabled,
@@ -1946,10 +2070,10 @@ class ChatCLI:
1946
2070
  "text_verbosity": self.text_verbosity,
1947
2071
  "preambles_enabled": self.preambles_enabled,
1948
2072
  "custom_first_turn_enabled": self.custom_first_turn_enabled,
1949
- "custom_first_turn_text": self.custom_first_turn_text,
1950
- "codex_prompt_enabled": self.codex_prompt_enabled,
1951
- "codex_max_allow_all_tools": self.codex_max_allow_all_tools,
1952
- }
2073
+ "custom_first_turn_text": self.custom_first_turn_text,
2074
+ "codex_prompt_enabled": self.codex_prompt_enabled,
2075
+ "codex_max_allow_all_tools": self.codex_max_allow_all_tools,
2076
+ }
1953
2077
  try:
1954
2078
  if not getattr(self, "_host_base_ephemeral", False) and self.host_base:
1955
2079
  data["host_base"] = self.host_base
@@ -1957,15 +2081,15 @@ class ChatCLI:
1957
2081
  pass
1958
2082
  return data
1959
2083
 
1960
- def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
1961
- try:
1962
- old_system_prompt = getattr(self, "system_prompt", None)
1963
- self.model = data.get("model", self.model)
1964
- if "save_chat_history" in data:
1965
- try:
1966
- self.save_chat_history = bool(data.get("save_chat_history"))
1967
- except Exception:
1968
- pass
2084
+ def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
2085
+ try:
2086
+ old_system_prompt = getattr(self, "system_prompt", None)
2087
+ self.model = data.get("model", self.model)
2088
+ if "save_chat_history" in data:
2089
+ try:
2090
+ self.save_chat_history = bool(data.get("save_chat_history"))
2091
+ except Exception:
2092
+ pass
1969
2093
  self.requested_tools = data.get("requested_tools", self.requested_tools)
1970
2094
  self.fs_scope = data.get("fs_scope", self.fs_scope)
1971
2095
  self.host_base = data.get("host_base", self.host_base)
@@ -2037,13 +2161,25 @@ class ChatCLI:
2037
2161
  self.usage_info_mode = val
2038
2162
  except Exception:
2039
2163
  pass
2040
- # Reasoning effort (default medium if missing/invalid)
2041
- try:
2042
- val = data.get("reasoning_effort")
2043
- if isinstance(val, str) and val in ("low", "medium", "high", "xhigh"):
2044
- self.reasoning_effort = val
2045
- except Exception:
2046
- pass
2164
+ # Reasoning effort (default medium if missing/invalid)
2165
+ try:
2166
+ val = data.get("reasoning_effort")
2167
+ if isinstance(val, str) and val in ("low", "medium", "high", "xhigh"):
2168
+ self.reasoning_effort = val
2169
+ except Exception:
2170
+ pass
2171
+
2172
+ # Anthropic effort (Opus 4.6/4.5). Default behavior equals high.
2173
+ try:
2174
+ ae = data.get("anthropic_effort")
2175
+ if isinstance(ae, str):
2176
+ ae2 = ae.strip().lower()
2177
+ if ae2 in ("low", "medium", "high", "max"):
2178
+ self.anthropic_effort = ae2
2179
+ elif ae in (None, "", "default"):
2180
+ self.anthropic_effort = "high"
2181
+ except Exception:
2182
+ self.anthropic_effort = "high"
2047
2183
  # Text verbosity selector
2048
2184
  try:
2049
2185
  v = data.get("text_verbosity")
@@ -2052,21 +2188,21 @@ class ChatCLI:
2052
2188
  except Exception:
2053
2189
  pass
2054
2190
  # Tool preambles toggle
2055
- if "preambles_enabled" in data:
2056
- try:
2057
- self.preambles_enabled = bool(data.get("preambles_enabled"))
2058
- except Exception:
2059
- self.preambles_enabled = False
2060
- if "codex_prompt_enabled" in data:
2061
- try:
2062
- self.codex_prompt_enabled = bool(data.get("codex_prompt_enabled"))
2063
- except Exception:
2064
- self.codex_prompt_enabled = True
2065
- if "codex_max_allow_all_tools" in data:
2066
- try:
2067
- self.codex_max_allow_all_tools = bool(data.get("codex_max_allow_all_tools"))
2068
- except Exception:
2069
- self.codex_max_allow_all_tools = False
2191
+ if "preambles_enabled" in data:
2192
+ try:
2193
+ self.preambles_enabled = bool(data.get("preambles_enabled"))
2194
+ except Exception:
2195
+ self.preambles_enabled = False
2196
+ if "codex_prompt_enabled" in data:
2197
+ try:
2198
+ self.codex_prompt_enabled = bool(data.get("codex_prompt_enabled"))
2199
+ except Exception:
2200
+ self.codex_prompt_enabled = True
2201
+ if "codex_max_allow_all_tools" in data:
2202
+ try:
2203
+ self.codex_max_allow_all_tools = bool(data.get("codex_max_allow_all_tools"))
2204
+ except Exception:
2205
+ self.codex_max_allow_all_tools = False
2070
2206
  # Custom first-turn injection
2071
2207
  if "custom_first_turn_enabled" in data:
2072
2208
  try:
@@ -2095,39 +2231,47 @@ class ChatCLI:
2095
2231
  self.thinking_budget_tokens = None
2096
2232
  except Exception:
2097
2233
  pass
2098
- # Anthropic cache TTL preference
2099
- try:
2100
- ttl = data.get("anthropic_cache_ttl")
2101
- if isinstance(ttl, str) and ttl.strip() in ("5m", "1h"):
2102
- self.anthropic_cache_ttl = ttl.strip()
2103
- elif ttl in (None, "", "default"):
2104
- self.anthropic_cache_ttl = None
2105
- except Exception:
2106
- pass
2107
- # Rebuild history if system prompt changed
2108
- try:
2109
- system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
2110
- except Exception:
2111
- system_prompt_changed = False
2112
-
2113
- if system_prompt_changed:
2114
- # Changing the system prompt can materially alter the behavior of the assistant;
2115
- # warn the user and reset the current conversation history to avoid mixing contexts.
2116
- try:
2117
- self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
2118
- except Exception:
2119
- pass
2120
- self.history = []
2121
- if self.system_prompt:
2122
- self.history.append({"role": "system", "content": self.system_prompt})
2123
- # On settings load, do not assume the custom first-turn was injected yet
2124
- try:
2125
- self._did_inject_custom_first_turn = False
2126
- except Exception:
2127
- pass
2128
- self._apply_model_side_effects()
2129
- except Exception as e:
2130
- self.ui.warn(f"Failed to apply settings: {e}")
2234
+ # Anthropic cache TTL preference
2235
+ try:
2236
+ ttl = data.get("anthropic_cache_ttl")
2237
+ if isinstance(ttl, str) and ttl.strip() in ("5m", "1h"):
2238
+ self.anthropic_cache_ttl = ttl.strip()
2239
+ elif ttl in (None, "", "default"):
2240
+ self.anthropic_cache_ttl = None
2241
+ except Exception:
2242
+ pass
2243
+ # Rebuild history if system prompt changed
2244
+ try:
2245
+ system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
2246
+ except Exception:
2247
+ system_prompt_changed = False
2248
+
2249
+ if system_prompt_changed:
2250
+ # Changing the system prompt can materially alter the behavior of the assistant;
2251
+ # warn the user and reset the current conversation history to avoid mixing contexts.
2252
+ try:
2253
+ self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
2254
+ except Exception:
2255
+ pass
2256
+ self.history = []
2257
+ if self.system_prompt:
2258
+ self.history.append({"role": "system", "content": self.system_prompt})
2259
+ # OpenAI threaded state is invalid once the system prompt changes.
2260
+ try:
2261
+ self._openai_previous_response_id = None
2262
+ self._openai_response_id_history = []
2263
+ self._openai_input_items = []
2264
+ self._openai_last_sent_input_items = None
2265
+ except Exception:
2266
+ pass
2267
+ # On settings load, do not assume the custom first-turn was injected yet
2268
+ try:
2269
+ self._did_inject_custom_first_turn = False
2270
+ except Exception:
2271
+ pass
2272
+ self._apply_model_side_effects()
2273
+ except Exception as e:
2274
+ self.ui.warn(f"Failed to apply settings: {e}")
2131
2275
 
2132
2276
  async def _fetch_server_settings(self) -> Optional[Dict[str, Any]]:
2133
2277
  try:
@@ -2329,7 +2473,7 @@ class ChatCLI:
2329
2473
  parts = [
2330
2474
  f"Server: {self.server}",
2331
2475
  f"Model: {self.model or '(server default)'}",
2332
- f"Tools: {self._tools_label()}",
2476
+ f"Tools: {self._tools_label()}",
2333
2477
  f"History: {'ON' if self.save_chat_history else 'OFF'}",
2334
2478
  f"Scope: {self._fs_label()}",
2335
2479
  f"Agent scope: {self.host_base or '(none)'}",
@@ -2480,12 +2624,12 @@ class ChatCLI:
2480
2624
  def _build_commands_catalog(self) -> List[Dict[str, str]]:
2481
2625
  cmds = [
2482
2626
  {"name": "/settings", "usage": "/settings", "desc": "Open settings menu"},
2483
- {"name": "/configure", "usage": "/configure", "desc": "Run configuration wizard now"},
2627
+ {"name": "/configure", "usage": "/configure", "desc": "Run configuration wizard now"},
2484
2628
  {"name": "/history", "usage": "/history on|off", "desc": "Toggle saving chat history to unified memory"},
2485
2629
  {"name": "/infomode", "usage": "/infomode concise|verbose", "desc": "Set Usage & Info panel mode"},
2486
2630
  {"name": "/tools", "usage": "/tools on|off|default", "desc": "Toggle per-request tools"},
2487
2631
  {"name": "/websearch", "usage": "/websearch on|off|domains|sources|location", "desc": "Configure OpenAI web search"},
2488
- {"name": "/reasoning", "usage": "/reasoning low|medium|high|xhigh", "desc": "Set OpenAI reasoning effort (default: medium; xhigh supported on gpt-5.2*)"},
2632
+ {"name": "/reasoning", "usage": "/reasoning low|medium|high|xhigh", "desc": "Set OpenAI reasoning effort (default: medium; xhigh supported on gpt-5.2*)"},
2489
2633
  {"name": "/thinkingbudget", "usage": "/thinkingbudget <tokens>|default", "desc": "Set Anthropic thinking budget tokens for -thinking models"},
2490
2634
  {"name": "/fs", "usage": "/fs workspace|host|default", "desc": "Set filesystem scope"},
2491
2635
  {"name": "/agent-scope", "usage": "/agent-scope <absolute path>", "desc": "Alias for /hostbase (set Agent scope)"},
@@ -2505,31 +2649,28 @@ class ChatCLI:
2505
2649
  ]
2506
2650
  return cmds
2507
2651
 
2508
- def _model_presets(self) -> List[Tuple[str, str]]:
2509
- """Shared list of (model, label) used by settings UI and /model menu."""
2510
- return [
2511
- ("gpt-5.2", "OpenAI: gpt-5.2"),
2512
- ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
2513
- ("gpt-5", "OpenAI: gpt-5"),
2514
- ("gpt-5-codex", "OpenAI: gpt-5-codex"),
2515
- ("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
2516
- ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
2517
- ("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
2518
- ("deepseek-3.2-speciale", "DeepSeek: deepseek 3.2 Speciale (no tools)"),
2519
- ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
2520
- ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
2521
- ("gemini-2.5-pro", "Gemini: gemini-2.5-pro"),
2522
- ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
2523
- ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
2524
- ("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
2525
- ("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
2526
- ("grok-4", "xAI: grok-4"),
2652
+ def _model_presets(self) -> List[Tuple[str, str]]:
2653
+ """Shared list of (model, label) used by settings UI and /model menu."""
2654
+ # Ordered in "feelings" order (Recommended first, then Others).
2655
+ # NOTE: We intentionally do not include a "server default" or "custom" option here.
2656
+ return [
2657
+ # Recommended
2658
+ ("gpt-5.2", "OpenAI: gpt-5.2"),
2659
+ ("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
2660
+ ("gpt-5", "OpenAI: gpt-5"),
2661
+ ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
2662
+ ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
2663
+ ("claude-opus-4-6", "Anthropic: claude-opus-4-6 (adaptive thinking supported)"),
2664
+ ("kimi-k2.5", "Kimi: kimi-k2.5"),
2527
2665
  ("grok-code-fast-1", "xAI: grok-code-fast-1"),
2528
- ("claude-sonnet-4-5-20250929", "Anthropic: claude-sonnet-4-5-20250929 (thinking OFF)"),
2529
- ("claude-sonnet-4-5-20250929-thinking", "Anthropic: claude-sonnet-4-5-20250929 (thinking ON)"),
2530
- ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
2531
- ("claude-opus-4-5-20251101-thinking", "Anthropic: claude-opus-4-5-20251101 (thinking ON)"),
2532
- ("glm-4.6", "GLM: glm-4.6"),
2666
+
2667
+ # Others
2668
+ ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
2669
+ ("gpt-5-codex", "OpenAI: gpt-5-codex"),
2670
+ ("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
2671
+ ("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
2672
+ ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
2673
+ ("glm-4.7", "GLM: glm-4.7"),
2533
2674
  ]
2534
2675
 
2535
2676
  async def open_settings(self, focus: Optional[str] = None) -> None:
@@ -2575,7 +2716,8 @@ class ChatCLI:
2575
2716
  "usage_info_mode": "verbose",
2576
2717
  "reasoning_effort": "medium",
2577
2718
  "retain_native_tool_results": False,
2578
- "thinking_budget_tokens": None,
2719
+ "thinking_budget_tokens": None,
2720
+ "anthropic_effort": "high",
2579
2721
  "anthropic_cache_ttl": None,
2580
2722
  "web_search_enabled": False,
2581
2723
  "web_search_allowed_domains": [],
@@ -2586,87 +2728,87 @@ class ChatCLI:
2586
2728
 
2587
2729
  # Model presets list (shared)
2588
2730
  model_presets: List[Tuple[str, str]] = self._model_presets()
2589
- # Reorder with a Recommended section at the top. Avoid decorative symbols; instead,
2590
- # annotate recommended models with plain text for clarity.
2591
- # Recommended set per request: opus 4-5 (no thinking), gemini 3, gpt 5, kimi k2 thinking,
2592
- # grok code fast 1, and deepseek reasoner 3.2
2593
- rec_keys = {
2594
- "deepseek-reasoner-3.2",
2595
- "claude-opus-4-5-20251101",
2596
- "gemini-3-pro-preview",
2597
- "gemini-3-flash-preview",
2598
- "gpt-5",
2599
- "gpt-5.2",
2600
- "kimi-k2-thinking",
2601
- "grok-code-fast-1",
2602
- }
2603
- rec_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m in rec_keys]
2604
- other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_keys]
2605
- # Build enum options in the order: Server default, Recommended, Others, Custom
2606
- model_enum_options: List[Optional[str]] = [None] + [m for (m, _l) in rec_list] + [m for (m, _l) in other_list] + ["custom"]
2607
- # Build render map without any star/marker characters; use a simple "(recommended)" suffix
2608
- # for recommended models EXCEPT DeepSeek Reasoner 3.2, which should not display the suffix.
2609
- render_map: Dict[Any, str] = {None: "Server default"}
2610
- for m, lbl in rec_list:
2611
- if m == "deepseek-reasoner-3.2":
2612
- render_map[m] = lbl
2613
- else:
2614
- render_map[m] = f"{lbl} (recommended)"
2731
+
2732
+ # Reorder with a Recommended section at the top.
2733
+ # IMPORTANT: remove "server default" and "custom" from Settings UI.
2734
+ rec_keys_ordered = [
2735
+ "gpt-5.2",
2736
+ "gpt-5.2-codex",
2737
+ "gpt-5",
2738
+ "gemini-3-pro-preview",
2739
+ "gemini-3-flash-preview",
2740
+ "claude-opus-4-6",
2741
+ "kimi-k2.5",
2742
+ "grok-code-fast-1",
2743
+ ]
2744
+ rec_set = set(rec_keys_ordered)
2745
+ preset_map = {m: lbl for (m, lbl) in model_presets}
2746
+ rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
2747
+ other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
2748
+
2749
+ # Build enum options in the order: Recommended, Others
2750
+ model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
2751
+ render_map: Dict[Any, str] = {}
2752
+ for m, lbl in rec_list:
2753
+ render_map[m] = lbl
2615
2754
  for m, lbl in other_list:
2616
2755
  render_map[m] = lbl
2617
- render_map["custom"] = "Custom..."
2618
2756
 
2619
2757
  # Build items schema
2620
- items: List[Dict[str, Any]] = [
2621
- {"label": "General", "type": "group", "items": [
2622
- {
2623
- "id": "save_chat_history",
2624
- "label": "Save to unified memory",
2625
- "type": "bool",
2626
- "description": "When ON, chats sync to your account and appear in the web portal. When OFF, chats are ephemeral (local only)."
2627
- },
2628
- {
2629
- "id": "model",
2630
- "label": "Model",
2631
- "type": "enum",
2632
- "options": model_enum_options,
2633
- "render": render_map,
2634
- },
2635
- {"id": "system_prompt", "label": "System prompt", "type": "multiline"},
2636
- {"id": "usage_info_mode", "label": "Usage panel", "type": "enum", "options": ["concise", "verbose"], "render": {"concise": "Concise", "verbose": "Verbose"}},
2637
- {"id": "text_verbosity", "label": "Text verbosity", "type": "enum", "options": ["low", "medium", "high"], "render": {"low": "Low", "medium": "Medium", "high": "High"}},
2638
- ]},
2639
- {"label": "Tools & Security", "type": "group", "items": [
2640
- {
2641
- "id": "requested_tools",
2642
- "label": "Tools",
2643
- "type": "enum",
2644
- "options": [None, True, False],
2645
- "render": {None: "Server default", True: "ON", False: "OFF"},
2646
- },
2647
- {
2648
- "id": "control_level",
2649
- "label": "Control level",
2650
- "type": "enum",
2651
- "options": [None, 1, 2, 3],
2652
- "render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
2653
- },
2654
- {"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
2655
- {"id": "show_tool_calls", "label": "Show tool call logs", "type": "bool"},
2656
- # Note: options are static for this Settings UI session, so include xhigh unconditionally.
2657
- # The server will safely downgrade xhigh on models that don't support it.
2658
- {"id": "reasoning_effort", "label": "OpenAI reasoning effort", "type": "enum", "options": ["low", "medium", "high", "xhigh"], "render": {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh (gpt-5.2* / Codex Max; otherwise downgrades)"}},
2659
- {"id": "codex_max_allow_all_tools", "label": "Codex Max: allow ALL tools", "type": "bool"},
2660
- {"id": "retain_native_tool_results", "label": "Retain provider-native tool results across turns", "type": "bool"},
2661
- {"id": "thinking_budget_tokens", "label": "Anthropic thinking budget (tokens)", "type": "int"},
2662
- {"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
2663
- # Agent scope & filesystem controls
2664
- {"id": "host_base", "label": "Agent scope directory", "type": "text"},
2758
+ items: List[Dict[str, Any]] = [
2759
+ {"label": "General", "type": "group", "items": [
2760
+ {
2761
+ "id": "save_chat_history",
2762
+ "label": "Save to unified memory",
2763
+ "type": "bool",
2764
+ "description": "When ON, chats sync to your account and appear in the web portal. When OFF, chats are ephemeral (local only)."
2765
+ },
2766
+ {
2767
+ "id": "model",
2768
+ "label": "Model",
2769
+ "type": "enum",
2770
+ "options": model_enum_options,
2771
+ "render": render_map,
2772
+ },
2773
+ {"id": "system_prompt", "label": "System prompt", "type": "multiline"},
2774
+ {"id": "usage_info_mode", "label": "Usage panel", "type": "enum", "options": ["concise", "verbose"], "render": {"concise": "Concise", "verbose": "Verbose"}},
2775
+ {"id": "text_verbosity", "label": "Text verbosity", "type": "enum", "options": ["low", "medium", "high"], "render": {"low": "Low", "medium": "Medium", "high": "High"}},
2776
+ ]},
2777
+ {"label": "Tools & Security", "type": "group", "items": [
2778
+ {
2779
+ "id": "requested_tools",
2780
+ "label": "Tools",
2781
+ "type": "enum",
2782
+ # Default-first: ON, then OFF, then server default.
2783
+ "options": [True, False, None],
2784
+ "render": {None: "Server default", True: "ON", False: "OFF"},
2785
+ },
2786
+ {
2787
+ "id": "control_level",
2788
+ "label": "Control level",
2789
+ "type": "enum",
2790
+ # Default-first: Level 3, then 2, then 1, then server default.
2791
+ "options": [3, 2, 1, None],
2792
+ "render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
2793
+ },
2794
+ {"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
2795
+ {"id": "show_tool_calls", "label": "Show tool call logs", "type": "bool"},
2796
+ # Note: options are static for this Settings UI session, so include xhigh unconditionally.
2797
+ # The server will safely downgrade xhigh on models that don't support it.
2798
+ {"id": "reasoning_effort", "label": "OpenAI reasoning effort", "type": "enum", "options": ["low", "medium", "high", "xhigh"], "render": {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh (gpt-5.2* / Codex Max; otherwise downgrades)"}},
2799
+ {"id": "codex_max_allow_all_tools", "label": "Codex Max: allow ALL tools", "type": "bool"},
2800
+ {"id": "retain_native_tool_results", "label": "Retain provider-native tool results across turns", "type": "bool"},
2801
+ {"id": "thinking_budget_tokens", "label": "Anthropic thinking budget (tokens)", "type": "int"},
2802
+ {"id": "anthropic_effort", "label": "Anthropic effort (Opus 4.6/4.5)", "type": "enum", "options": ["low", "medium", "high", "max"], "render": {"low": "Low", "medium": "Medium", "high": "High (default)", "max": "Max (Opus 4.6 only)"}},
2803
+ {"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
2804
+ # Agent scope & filesystem controls
2805
+ {"id": "host_base", "label": "Agent scope directory", "type": "text"},
2665
2806
  {
2666
2807
  "id": "fs_scope",
2667
2808
  "label": "Filesystem scope",
2668
2809
  "type": "enum",
2669
- "options": [None, "workspace", "host"],
2810
+ # Default-first: host (Agent scope), then workspace, then server default.
2811
+ "options": ["host", "workspace", None],
2670
2812
  "render": {
2671
2813
  None: "Server default",
2672
2814
  "workspace": "Workspace (sandbox)",
@@ -2677,7 +2819,8 @@ class ChatCLI:
2677
2819
  "id": "fs_host_mode",
2678
2820
  "label": "Host mode",
2679
2821
  "type": "enum",
2680
- "options": [None, "any", "cwd", "custom"],
2822
+ # Default-first: custom (use Agent scope), then cwd, then any, then server default.
2823
+ "options": ["custom", "cwd", "any", None],
2681
2824
  "render": {
2682
2825
  None: "Server default / any",
2683
2826
  "any": "any (no extra client restriction)",
@@ -2689,12 +2832,19 @@ class ChatCLI:
2689
2832
  {"label": "Code Map", "type": "group", "items": [
2690
2833
  {"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
2691
2834
  ]},
2692
- {"label": "Preambles & First-turn", "type": "group", "items": [
2693
- {"id": "preambles_enabled", "label": "Enable tool call preambles (supported models only)", "type": "bool"},
2694
- {"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
2695
- {"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
2696
- {"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
2697
- ]},
2835
+ {"label": "Preambles & First-turn", "type": "group", "items": [
2836
+ {
2837
+ "id": "preambles_enabled",
2838
+ "label": "Enable tool call preambles (GPT-5 only)",
2839
+ "type": "bool",
2840
+ # Only show this control when the *currently selected* model supports it.
2841
+ # (This updates live as the Model picker changes.)
2842
+ "visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
2843
+ },
2844
+ {"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
2845
+ {"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
2846
+ {"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
2847
+ ]},
2698
2848
  {"label": "Web search", "type": "group", "items": [
2699
2849
  {"id": "web_search_enabled", "label": "Enable web search (OpenAI)", "type": "bool"},
2700
2850
  {"id": "web_search_allowed_domains", "label": "Allowed domains (comma)", "type": "text"},
@@ -2703,6 +2853,20 @@ class ChatCLI:
2703
2853
  ]},
2704
2854
  ]
2705
2855
 
2856
+ # Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
2857
+ try:
2858
+ if not self._is_gpt_model(self.model):
2859
+ for g in items:
2860
+ if not isinstance(g, dict):
2861
+ continue
2862
+ if (g.get("type") == "group") and (g.get("label") == "General"):
2863
+ for row in (g.get("items") or []):
2864
+ if isinstance(row, dict) and row.get("id") == "text_verbosity":
2865
+ row["options"] = ["medium", "high"]
2866
+ row["render"] = {"medium": "Medium", "high": "High"}
2867
+ except Exception:
2868
+ pass
2869
+
2706
2870
  # Prepare initial values with enum placeholder for model when custom text set
2707
2871
  init_for_ui = dict(initial)
2708
2872
  if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
@@ -2714,10 +2878,10 @@ class ChatCLI:
2714
2878
  try:
2715
2879
  if rid == "model":
2716
2880
  if value == "custom":
2717
- typed = self.ui.prompt(
2718
- "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
2719
- default=self.model or "",
2720
- )
2881
+ typed = self.ui.prompt(
2882
+ "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
2883
+ default=self.model or "",
2884
+ )
2721
2885
  working["model"] = typed.strip() or None
2722
2886
  self._apply_model_side_effects()
2723
2887
  elif rid == "text_verbosity" and isinstance(value, str):
@@ -2742,14 +2906,14 @@ class ChatCLI:
2742
2906
  if k.strip() and v.strip():
2743
2907
  kv[k.strip()] = v.strip()
2744
2908
  working[rid] = kv
2745
- elif rid == "auto_approve" and isinstance(value, str):
2746
- working[rid] = [t.strip() for t in value.split(",") if t.strip()]
2747
- elif rid == "anthropic_cache_ttl":
2748
- if value in ("5m", "1h"):
2749
- working[rid] = value
2750
- else:
2751
- working[rid] = None
2752
- self._apply_settings_dict({rid: working.get(rid)})
2909
+ elif rid == "auto_approve" and isinstance(value, str):
2910
+ working[rid] = [t.strip() for t in value.split(",") if t.strip()]
2911
+ elif rid == "anthropic_cache_ttl":
2912
+ if value in ("5m", "1h"):
2913
+ working[rid] = value
2914
+ else:
2915
+ working[rid] = None
2916
+ self._apply_settings_dict({rid: working.get(rid)})
2753
2917
  if rid == "host_base":
2754
2918
  try:
2755
2919
  self._host_base_ephemeral = False
@@ -3057,9 +3221,9 @@ class ChatCLI:
3057
3221
  "Control Level",
3058
3222
  "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
3059
3223
  [
3060
- ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
3061
- ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
3062
3224
  ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
3225
+ ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
3226
+ ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
3063
3227
  ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
3064
3228
  ],
3065
3229
  )
@@ -3142,14 +3306,14 @@ class ChatCLI:
3142
3306
  except Exception:
3143
3307
  pass
3144
3308
 
3145
- # 3) Tool usage preamble (UX hint)
3146
- try:
3147
- if bool(getattr(self, "preambles_enabled", False)) and not self._is_codex_model(self.model):
3148
- blocks.append(
3149
- "Tool usage: when you need to read or modify files or run commands, "
3150
- "explicitly explain why you're using a tool, what you'll do, and how it "
3151
- "advances the user's goal before calling the tool."
3152
- )
3309
+ # 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
3310
+ try:
3311
+ if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
3312
+ blocks.append(
3313
+ "Tool usage: when you need to read or modify files or run commands, "
3314
+ "explicitly explain why you're using a tool, what you'll do, and how it "
3315
+ "advances the user's goal before calling the tool."
3316
+ )
3153
3317
  except Exception:
3154
3318
  pass
3155
3319
 
@@ -3171,15 +3335,15 @@ class ChatCLI:
3171
3335
 
3172
3336
  def _build_messages(self, user_input: str) -> List[Dict[str, str]]:
3173
3337
  msgs: List[Dict[str, str]] = []
3174
- # Inject a concise Codex developer system prompt for Codex models (optional)
3175
- try:
3176
- if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
3177
- msgs.append({"role": "system", "content": self._codex_system_prompt()})
3178
- except Exception:
3179
- pass
3180
- # Always send the system prompt as-is (do NOT inject the code map here)
3181
- if self.system_prompt:
3182
- msgs.append({"role": "system", "content": self.system_prompt})
3338
+ # Inject a concise Codex developer system prompt for Codex models (optional)
3339
+ try:
3340
+ if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
3341
+ msgs.append({"role": "system", "content": self._codex_system_prompt()})
3342
+ except Exception:
3343
+ pass
3344
+ # Always send the system prompt as-is (do NOT inject the code map here)
3345
+ if self.system_prompt:
3346
+ msgs.append({"role": "system", "content": self.system_prompt})
3183
3347
 
3184
3348
  # Replay prior conversation (excluding any system message already added)
3185
3349
  for msg in self.history:
@@ -3198,80 +3362,80 @@ class ChatCLI:
3198
3362
  except Exception:
3199
3363
  self._last_built_user_content = user_input
3200
3364
 
3201
- msgs.append({"role": "user", "content": content})
3202
- return msgs
3203
-
3204
- def _codex_system_prompt(self) -> str:
3205
- """Minimal developer system prompt for GPT-5 Codex family."""
3206
- return (
3207
- "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.\n"
3208
- "Note: In this CLI, the terminal tool is named 'run_command' (not 'shell'). Use run_command and always set the 'cwd' parameter; avoid using 'cd'.\n\n"
3209
- "## General\n"
3210
- "- Always set the 'cwd' param when using run_command. Do not use 'cd' unless absolutely necessary.\n"
3211
- "- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)\n\n"
3212
- "## Editing constraints\n"
3213
- "- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.\n"
3214
- "- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like \"Assigns the value to the variable\", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.\n"
3215
- "- You may be in a dirty git worktree.\n"
3216
- " * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.\n"
3217
- " * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.\n"
3218
- " * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.\n"
3219
- " * If the changes are in unrelated files, just ignore them and don't revert them.\n"
3220
- "- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.\n\n"
3221
- "## Plan tool\n"
3222
- "When using the planning tool:\n"
3223
- "- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).\n"
3224
- "- Do not make single-step plans.\n"
3225
- "- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.\n\n"
3226
- "## Codex CLI harness, sandboxing, and approvals\n"
3227
- "The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.\n"
3228
- "Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:\n"
3229
- "- read-only: The sandbox only permits reading files.\n"
3230
- "- workspace-write: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.\n"
3231
- "- danger-full-access: No filesystem sandboxing - all commands are permitted.\n"
3232
- "Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:\n"
3233
- "- restricted: Requires approval\n"
3234
- "- enabled: No approval needed\n"
3235
- "Approvals are your mechanism to get user consent to run shell/terminal commands without the sandbox. Possible configuration options for `approval_policy` are\n"
3236
- "- untrusted: The harness will escalate most commands for user approval, apart from a limited allowlist of safe 'read' commands.\n"
3237
- "- on-failure: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.\n"
3238
- "- on-request: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing.\n"
3239
- "- never: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user.\n\n"
3240
- "When requesting approval to execute a command that will require escalated privileges:\n"
3241
- " - Provide the `with_escalated_permissions` parameter with the boolean value true (when available).\n"
3242
- " - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter.\n\n"
3243
- "## Special user requests\n"
3244
- "- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.\n"
3245
- "- If the user asks for a 'review', default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.\n\n"
3246
- "## Presenting your work and final message\n"
3247
- "You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.\n"
3248
- "- Default: be very concise; friendly coding teammate tone.\n"
3249
- "- Ask only when needed; suggest ideas; mirror the user's style.\n"
3250
- "- For substantial work, summarize clearly; follow final-answer formatting.\n"
3251
- "- Skip heavy formatting for simple confirmations.\n"
3252
- "- Don't dump large files you've written; reference paths only.\n"
3253
- "- No 'save/copy this file' - User is on the same machine.\n"
3254
- "- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.\n"
3255
- "- For code changes:\n"
3256
- " * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with 'summary', just jump right in.\n"
3257
- " * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.\n"
3258
- "- File References: When referencing files in your response, include the relevant start line and follow these rules: use inline code for paths; each reference should have a standalone path; accepted: absolute, workspace-relative, a/ or b/ diff prefixes, or bare filename/suffix; optional line/column uses :line[:column] or #LlineCcolumn; do not use URIs; do not provide line ranges.\n\n"
3259
- "Apply Patch\n"
3260
- "As shared previously in the GPT-5 prompting guide, use apply_patch for file edits to match the training distribution.\n\n"
3261
- "Preambles\n"
3262
- "GPT-5-Codex does not emit preambles. Do not ask for them.\n\n"
3263
- "Frontend Guidance\n"
3264
- "Use the following libraries unless the user or repo specifies otherwise:\n"
3265
- "Framework: React + TypeScript\n"
3266
- "Styling: Tailwind CSS\n"
3267
- "Components: shadcn/ui\n"
3268
- "Icons: lucide-react\n"
3269
- "Animation: Framer Motion\n"
3270
- "Charts: Recharts\n"
3271
- "Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
3272
- )
3273
-
3274
- def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
3365
+ msgs.append({"role": "user", "content": content})
3366
+ return msgs
3367
+
3368
+ def _codex_system_prompt(self) -> str:
3369
+ """Minimal developer system prompt for GPT-5 Codex family."""
3370
+ return (
3371
+ "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.\n"
3372
+ "Note: In this CLI, the terminal tool is named 'run_command' (not 'shell'). Use run_command and always set the 'cwd' parameter; avoid using 'cd'.\n\n"
3373
+ "## General\n"
3374
+ "- Always set the 'cwd' param when using run_command. Do not use 'cd' unless absolutely necessary.\n"
3375
+ "- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)\n\n"
3376
+ "## Editing constraints\n"
3377
+ "- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.\n"
3378
+ "- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like \"Assigns the value to the variable\", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.\n"
3379
+ "- You may be in a dirty git worktree.\n"
3380
+ " * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.\n"
3381
+ " * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.\n"
3382
+ " * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.\n"
3383
+ " * If the changes are in unrelated files, just ignore them and don't revert them.\n"
3384
+ "- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.\n\n"
3385
+ "## Plan tool\n"
3386
+ "When using the planning tool:\n"
3387
+ "- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).\n"
3388
+ "- Do not make single-step plans.\n"
3389
+ "- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.\n\n"
3390
+ "## Codex CLI harness, sandboxing, and approvals\n"
3391
+ "The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.\n"
3392
+ "Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:\n"
3393
+ "- read-only: The sandbox only permits reading files.\n"
3394
+ "- workspace-write: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.\n"
3395
+ "- danger-full-access: No filesystem sandboxing - all commands are permitted.\n"
3396
+ "Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:\n"
3397
+ "- restricted: Requires approval\n"
3398
+ "- enabled: No approval needed\n"
3399
+ "Approvals are your mechanism to get user consent to run shell/terminal commands without the sandbox. Possible configuration options for `approval_policy` are\n"
3400
+ "- untrusted: The harness will escalate most commands for user approval, apart from a limited allowlist of safe 'read' commands.\n"
3401
+ "- on-failure: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.\n"
3402
+ "- on-request: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing.\n"
3403
+ "- never: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user.\n\n"
3404
+ "When requesting approval to execute a command that will require escalated privileges:\n"
3405
+ " - Provide the `with_escalated_permissions` parameter with the boolean value true (when available).\n"
3406
+ " - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter.\n\n"
3407
+ "## Special user requests\n"
3408
+ "- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.\n"
3409
+ "- If the user asks for a 'review', default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.\n\n"
3410
+ "## Presenting your work and final message\n"
3411
+ "You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.\n"
3412
+ "- Default: be very concise; friendly coding teammate tone.\n"
3413
+ "- Ask only when needed; suggest ideas; mirror the user's style.\n"
3414
+ "- For substantial work, summarize clearly; follow final-answer formatting.\n"
3415
+ "- Skip heavy formatting for simple confirmations.\n"
3416
+ "- Don't dump large files you've written; reference paths only.\n"
3417
+ "- No 'save/copy this file' - User is on the same machine.\n"
3418
+ "- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.\n"
3419
+ "- For code changes:\n"
3420
+ " * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with 'summary', just jump right in.\n"
3421
+ " * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.\n"
3422
+ "- File References: When referencing files in your response, include the relevant start line and follow these rules: use inline code for paths; each reference should have a standalone path; accepted: absolute, workspace-relative, a/ or b/ diff prefixes, or bare filename/suffix; optional line/column uses :line[:column] or #LlineCcolumn; do not use URIs; do not provide line ranges.\n\n"
3423
+ "Apply Patch\n"
3424
+ "As shared previously in the GPT-5 prompting guide, use apply_patch for file edits to match the training distribution.\n\n"
3425
+ "Preambles\n"
3426
+ "GPT-5-Codex does not emit preambles. Do not ask for them.\n\n"
3427
+ "Frontend Guidance\n"
3428
+ "Use the following libraries unless the user or repo specifies otherwise:\n"
3429
+ "Framework: React + TypeScript\n"
3430
+ "Styling: Tailwind CSS\n"
3431
+ "Components: shadcn/ui\n"
3432
+ "Icons: lucide-react\n"
3433
+ "Animation: Framer Motion\n"
3434
+ "Charts: Recharts\n"
3435
+ "Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
3436
+ )
3437
+
3438
+ def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
3275
3439
  """Build provider-native messages for Kimi preserving prior assistant reasoning_content.
3276
3440
  Includes prior provider-native turns and the current user message with first-turn injections.
3277
3441
  """
@@ -3289,31 +3453,31 @@ class ChatCLI:
3289
3453
  for m in (self._kimi_raw_history or []):
3290
3454
  raw.append(m)
3291
3455
  # Append current user message
3292
- raw.append({"role": "user", "content": content})
3293
- return raw
3294
-
3295
- def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
3296
- """Normalize Gemini provider-native history.
3297
-
3298
- Ensures we only send a flat list of dicts back to the server.
3299
- This prevents accidental nesting like [[{...}, {...}]] which the
3300
- google-genai SDK rejects with pydantic union validation errors.
3301
- """
3302
- out: List[Dict[str, Any]] = []
3303
- if not isinstance(rpm, list):
3304
- return out
3305
- for item in rpm:
3306
- if item is None:
3307
- continue
3308
- if isinstance(item, list):
3309
- # Flatten one level
3310
- for sub in item:
3311
- if isinstance(sub, dict):
3312
- out.append(dict(sub))
3313
- continue
3314
- if isinstance(item, dict):
3315
- out.append(dict(item))
3316
- return out
3456
+ raw.append({"role": "user", "content": content})
3457
+ return raw
3458
+
3459
+ def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
3460
+ """Normalize Gemini provider-native history.
3461
+
3462
+ Ensures we only send a flat list of dicts back to the server.
3463
+ This prevents accidental nesting like [[{...}, {...}]] which the
3464
+ google-genai SDK rejects with pydantic union validation errors.
3465
+ """
3466
+ out: List[Dict[str, Any]] = []
3467
+ if not isinstance(rpm, list):
3468
+ return out
3469
+ for item in rpm:
3470
+ if item is None:
3471
+ continue
3472
+ if isinstance(item, list):
3473
+ # Flatten one level
3474
+ for sub in item:
3475
+ if isinstance(sub, dict):
3476
+ out.append(dict(sub))
3477
+ continue
3478
+ if isinstance(item, dict):
3479
+ out.append(dict(item))
3480
+ return out
3317
3481
 
3318
3482
  def _build_working_memory_injection(self) -> Optional[str]:
3319
3483
  try:
@@ -3483,54 +3647,54 @@ class ChatCLI:
3483
3647
  except Exception:
3484
3648
  return ""
3485
3649
 
3486
- def _approval_prompt_ui(self, label: str, args: Dict[str, Any]) -> str:
3650
+ def _approval_prompt_ui(self, label: str, args: Dict[str, Any]) -> str:
3487
3651
  """Interactive approval prompt for Level 2.
3488
3652
 
3489
3653
  Uses the same highlighted, arrow-key-driven menu UX as the rest of the CLI
3490
3654
  when TTY input is available, and falls back to numeric input otherwise.
3491
3655
  Returns one of: "once", "session", "always", "deny".
3492
3656
  """
3493
- self.ui.print(f"\n[Level 2] Approval required for: {label}")
3494
- # Show a compact summary
3495
- summary = self._tool_summary(label.split(":")[0], args)
3496
- self.ui.print(summary, style=self.ui.theme["dim"])
3497
-
3498
- # Show what we're actually approving (key fields), so the user can make an informed decision.
3499
- try:
3500
- base_tool = (label.split(":")[0] if isinstance(label, str) and ":" in label else label) or ""
3501
- except Exception:
3502
- base_tool = label or ""
3503
- bt = str(base_tool).strip().lower()
3504
- try:
3505
- if bt == "run_command":
3506
- cmd = args.get("cmd")
3507
- cwd = args.get("cwd")
3508
- timeout = args.get("timeout")
3509
- if cmd is not None:
3510
- self.ui.print(f"cmd: {self._clip(cmd, 400)}", style=self.ui.theme["dim"])
3511
- if cwd is not None:
3512
- self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
3513
- if timeout is not None:
3514
- self.ui.print(f"timeout: {timeout}", style=self.ui.theme["dim"])
3515
- elif bt in ("write_file", "append_file"):
3516
- path = args.get("path")
3517
- content = args.get("content") or ""
3518
- self.ui.print(f"path: {path}", style=self.ui.theme["dim"])
3519
- try:
3520
- b = len(str(content).encode("utf-8", errors="replace"))
3521
- except Exception:
3522
- b = None
3523
- if b is not None:
3524
- self.ui.print(f"content_bytes: {b}", style=self.ui.theme["dim"])
3525
- elif bt == "apply_patch":
3526
- cwd = args.get("cwd")
3527
- dry = bool(args.get("dry_run", False))
3528
- if cwd is not None:
3529
- self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
3530
- self.ui.print(f"dry_run: {dry}", style=self.ui.theme["dim"])
3531
- except Exception:
3532
- # Never block approvals on formatting
3533
- pass
3657
+ self.ui.print(f"\n[Level 2] Approval required for: {label}")
3658
+ # Show a compact summary
3659
+ summary = self._tool_summary(label.split(":")[0], args)
3660
+ self.ui.print(summary, style=self.ui.theme["dim"])
3661
+
3662
+ # Show what we're actually approving (key fields), so the user can make an informed decision.
3663
+ try:
3664
+ base_tool = (label.split(":")[0] if isinstance(label, str) and ":" in label else label) or ""
3665
+ except Exception:
3666
+ base_tool = label or ""
3667
+ bt = str(base_tool).strip().lower()
3668
+ try:
3669
+ if bt == "run_command":
3670
+ cmd = args.get("cmd")
3671
+ cwd = args.get("cwd")
3672
+ timeout = args.get("timeout")
3673
+ if cmd is not None:
3674
+ self.ui.print(f"cmd: {self._clip(cmd, 400)}", style=self.ui.theme["dim"])
3675
+ if cwd is not None:
3676
+ self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
3677
+ if timeout is not None:
3678
+ self.ui.print(f"timeout: {timeout}", style=self.ui.theme["dim"])
3679
+ elif bt in ("write_file", "append_file"):
3680
+ path = args.get("path")
3681
+ content = args.get("content") or ""
3682
+ self.ui.print(f"path: {path}", style=self.ui.theme["dim"])
3683
+ try:
3684
+ b = len(str(content).encode("utf-8", errors="replace"))
3685
+ except Exception:
3686
+ b = None
3687
+ if b is not None:
3688
+ self.ui.print(f"content_bytes: {b}", style=self.ui.theme["dim"])
3689
+ elif bt == "apply_patch":
3690
+ cwd = args.get("cwd")
3691
+ dry = bool(args.get("dry_run", False))
3692
+ if cwd is not None:
3693
+ self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
3694
+ self.ui.print(f"dry_run: {dry}", style=self.ui.theme["dim"])
3695
+ except Exception:
3696
+ # Never block approvals on formatting
3697
+ pass
3534
3698
 
3535
3699
  choices: List[Tuple[str, str]] = [
3536
3700
  ("once", "Approve once"),
@@ -3788,6 +3952,39 @@ class ChatCLI:
3788
3952
  if n.lower() in ("context", "to_next"):
3789
3953
  return "Context handoff to next turn"
3790
3954
 
3955
+ # Universal context handoff tool (server-side). Show what files the model chose to keep.
3956
+ if n.lower() == "context_handoff":
3957
+ try:
3958
+ rd = (result or {}).get("data") if isinstance(result, dict) else None
3959
+ rd = rd if isinstance(rd, dict) else {}
3960
+ reason = (rd.get("reason") if isinstance(rd, dict) else None) or ""
3961
+ mem_path = (rd.get("memory_path") if isinstance(rd, dict) else None) or ""
3962
+ keep_files = rd.get("keep_files") if isinstance(rd, dict) else None
3963
+ files: List[str] = []
3964
+ if isinstance(keep_files, list):
3965
+ for x in keep_files:
3966
+ if isinstance(x, str) and x.strip():
3967
+ files.append(x.strip())
3968
+ # Limit for single-line readability
3969
+ shown = files[:5]
3970
+ more = max(0, len(files) - len(shown))
3971
+ if shown:
3972
+ files_part = ", ".join(shown) + (f" (+{more} more)" if more else "")
3973
+ return (
3974
+ "Context handoff saved"
3975
+ + (f" (reason={reason})" if str(reason).strip() else "")
3976
+ + f". Files kept in context: {files_part}"
3977
+ + (f". memory_path: {mem_path}" if str(mem_path).strip() else "")
3978
+ )
3979
+ return (
3980
+ "Context handoff saved"
3981
+ + (f" (reason={reason})" if str(reason).strip() else "")
3982
+ + ". No files were selected to be kept in context"
3983
+ + (f". memory_path: {mem_path}" if str(mem_path).strip() else "")
3984
+ )
3985
+ except Exception:
3986
+ return "Context handoff saved"
3987
+
3791
3988
  def _arg_path() -> str:
3792
3989
  p = a.get("path") or data.get("path") or ""
3793
3990
  try:
@@ -3897,6 +4094,35 @@ class ChatCLI:
3897
4094
  return
3898
4095
 
3899
4096
  data = result.get("data", {}) or {}
4097
+
4098
+ # Universal context_handoff tool: show where it was saved and what files were chosen.
4099
+ if name == "context_handoff":
4100
+ try:
4101
+ reason = data.get("reason")
4102
+ mem_path = data.get("memory_path")
4103
+ keep_files = data.get("keep_files")
4104
+ self.ui.print(
4105
+ f"⇐ [{self.ui.theme['tool_result']}]✅ Context handoff saved[/{self.ui.theme['tool_result']}]"
4106
+ + (f" (reason={reason})" if isinstance(reason, str) and reason.strip() else "")
4107
+ )
4108
+ if isinstance(mem_path, str) and mem_path.strip():
4109
+ self.ui.print(f"memory_path: {mem_path}", style=self.ui.theme["dim"]) # type: ignore
4110
+ if isinstance(keep_files, list) and keep_files:
4111
+ cleaned = [str(x).strip() for x in keep_files if isinstance(x, (str, int, float)) and str(x).strip()]
4112
+ if cleaned:
4113
+ self.ui.print("Files kept in context:", style=self.ui.theme["dim"]) # type: ignore
4114
+ for p in cleaned[:25]:
4115
+ self.ui.print(f"- {p}", style=self.ui.theme["dim"]) # type: ignore
4116
+ if len(cleaned) > 25:
4117
+ self.ui.print(f"... +{len(cleaned)-25} more", style=self.ui.theme["dim"]) # type: ignore
4118
+ else:
4119
+ self.ui.print("No files were selected to be kept in context.", style=self.ui.theme["dim"]) # type: ignore
4120
+ else:
4121
+ self.ui.print("No files were selected to be kept in context.", style=self.ui.theme["dim"]) # type: ignore
4122
+ except Exception:
4123
+ # Fall through to default renderer below.
4124
+ pass
4125
+ return
3900
4126
  if name == "read_file":
3901
4127
  path = data.get("path", "")
3902
4128
  content = data.get("content", "") or ""
@@ -4302,25 +4528,25 @@ class ChatCLI:
4302
4528
  await self.open_settings()
4303
4529
  return True
4304
4530
 
4305
- if cmd.startswith("/history"):
4306
- parts = cmd.split(maxsplit=1)
4307
- if len(parts) == 1:
4308
- self.ui.info("Usage: /history on|off")
4309
- self.ui.info(f"Current: {'ON (synced to unified memory)' if self.save_chat_history else 'OFF (ephemeral)'}")
4310
- return True
4311
- arg = parts[1].strip().lower()
4312
- if arg == "on":
4313
- self.save_chat_history = True
4314
- self.ui.success("Chat history will be saved to unified memory.")
4315
- elif arg == "off":
4316
- self.save_chat_history = False
4317
- self.ui.success("Chat history is now ephemeral (local only, not synced).")
4318
- else:
4319
- self.ui.warn("Usage: /history on|off")
4320
- return True
4321
- self.save_settings()
4322
- return True
4323
-
4531
+ if cmd.startswith("/history"):
4532
+ parts = cmd.split(maxsplit=1)
4533
+ if len(parts) == 1:
4534
+ self.ui.info("Usage: /history on|off")
4535
+ self.ui.info(f"Current: {'ON (synced to unified memory)' if self.save_chat_history else 'OFF (ephemeral)'}")
4536
+ return True
4537
+ arg = parts[1].strip().lower()
4538
+ if arg == "on":
4539
+ self.save_chat_history = True
4540
+ self.ui.success("Chat history will be saved to unified memory.")
4541
+ elif arg == "off":
4542
+ self.save_chat_history = False
4543
+ self.ui.success("Chat history is now ephemeral (local only, not synced).")
4544
+ else:
4545
+ self.ui.warn("Usage: /history on|off")
4546
+ return True
4547
+ self.save_settings()
4548
+ return True
4549
+
4324
4550
  if cmd.startswith("/tools"):
4325
4551
  parts = cmd.split(maxsplit=1)
4326
4552
  if len(parts) == 1:
@@ -4433,25 +4659,25 @@ class ChatCLI:
4433
4659
  self.ui.warn("Unknown /websearch subcommand. Use on, off, domains, sources, or location.")
4434
4660
  return True
4435
4661
 
4436
- if cmd.startswith("/reasoning"):
4437
- parts = cmd.split(maxsplit=1)
4438
- if len(parts) == 1:
4439
- self.ui.info("Usage: /reasoning low|medium|high|xhigh")
4440
- self.ui.info(f"Current: {self.reasoning_effort}")
4441
- return True
4442
- arg = (parts[1] or "").strip().lower()
4443
- if arg in ("low", "medium", "high", "xhigh"):
4444
- self.reasoning_effort = arg
4445
- if arg == "xhigh" and not self._supports_xhigh_reasoning_effort(self.model):
4446
- # Keep the user's preference, but be explicit about server-side downgrading.
4447
- self.ui.warn(
4448
- "Note: xhigh is only applied on models that support it (e.g., gpt-5.2* / gpt-5.1-codex-max). The server may downgrade it on other models."
4449
- )
4450
- self.ui.success(f"Reasoning effort set to: {self.reasoning_effort}")
4451
- self.save_settings()
4452
- else:
4453
- self.ui.warn("Invalid value. Use: low, medium, high, or xhigh")
4454
- return True
4662
+ if cmd.startswith("/reasoning"):
4663
+ parts = cmd.split(maxsplit=1)
4664
+ if len(parts) == 1:
4665
+ self.ui.info("Usage: /reasoning low|medium|high|xhigh")
4666
+ self.ui.info(f"Current: {self.reasoning_effort}")
4667
+ return True
4668
+ arg = (parts[1] or "").strip().lower()
4669
+ if arg in ("low", "medium", "high", "xhigh"):
4670
+ self.reasoning_effort = arg
4671
+ if arg == "xhigh" and not self._supports_xhigh_reasoning_effort(self.model):
4672
+ # Keep the user's preference, but be explicit about server-side downgrading.
4673
+ self.ui.warn(
4674
+ "Note: xhigh is only applied on models that support it (e.g., gpt-5.2* / gpt-5.1-codex-max). The server may downgrade it on other models."
4675
+ )
4676
+ self.ui.success(f"Reasoning effort set to: {self.reasoning_effort}")
4677
+ self.save_settings()
4678
+ else:
4679
+ self.ui.warn("Invalid value. Use: low, medium, high, or xhigh")
4680
+ return True
4455
4681
 
4456
4682
  if cmd.startswith("/thinkingbudget"):
4457
4683
  parts = cmd.split(maxsplit=1)
@@ -4628,6 +4854,14 @@ class ChatCLI:
4628
4854
  except Exception:
4629
4855
  pass
4630
4856
  self.ui.success("System prompt set.")
4857
+ # OpenAI threaded state is invalid once the system prompt changes.
4858
+ try:
4859
+ self._openai_previous_response_id = None
4860
+ self._openai_response_id_history = []
4861
+ self._openai_input_items = []
4862
+ self._openai_last_sent_input_items = None
4863
+ except Exception:
4864
+ pass
4631
4865
  self.save_settings()
4632
4866
  return True
4633
4867
 
@@ -4641,7 +4875,7 @@ class ChatCLI:
4641
4875
  self.ui.success(f"Thread title set to: {self.thread_name}")
4642
4876
  return True
4643
4877
 
4644
- if cmd == "/clear":
4878
+ if cmd == "/clear":
4645
4879
  self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
4646
4880
  self._did_inject_codebase_map = False
4647
4881
  try:
@@ -4650,13 +4884,17 @@ class ChatCLI:
4650
4884
  pass
4651
4885
  # Reset provider-native histories
4652
4886
  try:
4653
- self.messages_for_save = []
4654
- if not self.save_chat_history:
4655
- self.thread_uid = None
4656
- self._kimi_raw_history = []
4657
- self._gemini_raw_history = []
4658
- except Exception:
4659
- pass
4887
+ self.messages_for_save = []
4888
+ if not self.save_chat_history:
4889
+ self.thread_uid = None
4890
+ self._kimi_raw_history = []
4891
+ self._gemini_raw_history = []
4892
+ self._openai_previous_response_id = None
4893
+ self._openai_response_id_history = []
4894
+ self._openai_input_items = []
4895
+ self._openai_last_sent_input_items = None
4896
+ except Exception:
4897
+ pass
4660
4898
  # Reset local cumulative token counters on session clear
4661
4899
  self._cum_input_tokens = 0
4662
4900
  self._cum_output_tokens = 0
@@ -4857,9 +5095,9 @@ class ChatCLI:
4857
5095
  # We have a fallback map (repo copy) but none at host base
4858
5096
  self.ui.print("Code Map: fallback example in use (host base missing CODEBASE_MAP.md). It will be prefixed.", style=self.ui.theme["dim"])
4859
5097
  else:
4860
- self.ui.print("Code Map: missing at host base — toggle with /map on|off", style=self.ui.theme["dim"])
4861
- # History status
4862
- hist_status = "ON (synced)" if self.save_chat_history else "OFF (ephemeral)"
5098
+ self.ui.print("Code Map: missing at host base — toggle with /map on|off", style=self.ui.theme["dim"])
5099
+ # History status
5100
+ hist_status = "ON (synced)" if self.save_chat_history else "OFF (ephemeral)"
4863
5101
  self.ui.print(f"Chat history: {hist_status}", style=self.ui.theme["dim"])
4864
5102
  # If a host base is configured and code map injection is enabled, offer to generate when missing
4865
5103
  try:
@@ -4909,20 +5147,48 @@ class ChatCLI:
4909
5147
  pt_completer = self._commands_word_completer()
4910
5148
  while True:
4911
5149
  try:
4912
- if self._pt_session is not None:
4913
- # Use prompt_toolkit with inline completion when available
4914
- # Pass completer per-prompt to ensure latest catalog
4915
- user_input = await self._pt_session.prompt_async(
4916
- "You: ",
4917
- completer=pt_completer,
4918
- complete_while_typing=True,
4919
- )
4920
- user_input = user_input.strip()
4921
- elif self._input_engine:
4922
- # Do not add continuation prefixes on new lines
4923
- user_input = self._input_engine.read_message("You: ", "")
5150
+ pending_edit = self._pending_user_edit
5151
+ edit_mode = pending_edit is not None
5152
+
5153
+ # prompt_toolkit is intentionally not used.
5154
+ # Always prefer our dependency-free input engine when available.
5155
+ if self._input_engine:
5156
+ if edit_mode:
5157
+ # The low-level input engine doesn't support prefilling.
5158
+ # Show the prior message and let the user paste a replacement.
5159
+ try:
5160
+ self.ui.print(
5161
+ "\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):",
5162
+ style=self.ui.theme["warn"], # type: ignore
5163
+ )
5164
+ self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
5165
+ except Exception:
5166
+ pass
5167
+ new_txt = self._read_multiline_input("Edit> ")
5168
+ user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
5169
+ else:
5170
+ # Do not add continuation prefixes on new lines.
5171
+ user_input = self._input_engine.read_message("You: ", "")
4924
5172
  else:
4925
- user_input = self._read_multiline_input("You: ")
5173
+ # Last-resort fallback.
5174
+ if edit_mode:
5175
+ try:
5176
+ self.ui.print(
5177
+ "\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):",
5178
+ style=self.ui.theme["warn"], # type: ignore
5179
+ )
5180
+ self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
5181
+ except Exception:
5182
+ pass
5183
+ new_txt = self._read_multiline_input("Edit> ")
5184
+ user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
5185
+ else:
5186
+ user_input = self._read_multiline_input("You: ")
5187
+
5188
+ # Clear pending edit state after we successfully collected input.
5189
+ if edit_mode:
5190
+ self._pending_user_edit = None
5191
+ self._pending_turn_snapshot = None
4926
5192
  # Successful read resets interrupt window
4927
5193
  self._last_interrupt_ts = None
4928
5194
  except KeyboardInterrupt:
@@ -4969,6 +5235,29 @@ class ChatCLI:
4969
5235
  continue
4970
5236
 
4971
5237
  try:
5238
+ # Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
5239
+ # This is critical for first-turn injections (code map/custom note/working memory)
5240
+ # which are applied by mutating flags during payload construction.
5241
+ self._pending_turn_snapshot = {
5242
+ "history": copy.deepcopy(self.history),
5243
+ "messages_for_save": copy.deepcopy(self.messages_for_save),
5244
+ "kimi_raw": copy.deepcopy(self._kimi_raw_history),
5245
+ "gemini_raw": copy.deepcopy(self._gemini_raw_history),
5246
+ "openai_prev": getattr(self, "_openai_previous_response_id", None),
5247
+ "openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
5248
+ "openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
5249
+ "openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
5250
+ "inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
5251
+ "did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
5252
+ "did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
5253
+ "did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
5254
+ "memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
5255
+ "last_built_user_content": getattr(self, "_last_built_user_content", None),
5256
+ }
5257
+
5258
+ # Clear any stale in-flight dispatch context at turn start.
5259
+ self._inflight_dispatch = None
5260
+
4972
5261
  # Record user message for local/server save
4973
5262
  if self.save_chat_history:
4974
5263
  self.messages_for_save.append({
@@ -4992,6 +5281,67 @@ class ChatCLI:
4992
5281
  assistant_text = await self._stream_once(user_input)
4993
5282
  finally:
4994
5283
  self._busy = False
5284
+ except KeyboardInterrupt:
5285
+ # Ctrl+C mid-stream / mid-tool: do not exit the CLI.
5286
+ # Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
5287
+ try:
5288
+ await self._cancel_inflight_dispatch()
5289
+ except (Exception, BaseException):
5290
+ pass
5291
+
5292
+ # Restore state to *before* this turn started.
5293
+ try:
5294
+ snap = self._pending_turn_snapshot or {}
5295
+ if isinstance(snap.get("history"), list):
5296
+ self.history = snap.get("history")
5297
+ if isinstance(snap.get("messages_for_save"), list):
5298
+ self.messages_for_save = snap.get("messages_for_save")
5299
+ if isinstance(snap.get("kimi_raw"), list):
5300
+ self._kimi_raw_history = snap.get("kimi_raw")
5301
+ if isinstance(snap.get("gemini_raw"), list):
5302
+ self._gemini_raw_history = snap.get("gemini_raw")
5303
+ if "openai_prev" in snap:
5304
+ self._openai_previous_response_id = snap.get("openai_prev")
5305
+ if isinstance(snap.get("openai_ids"), list):
5306
+ self._openai_response_id_history = snap.get("openai_ids")
5307
+ if isinstance(snap.get("openai_input_items"), list):
5308
+ self._openai_input_items = snap.get("openai_input_items")
5309
+ if "openai_last_sent_input_items" in snap:
5310
+ self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
5311
+ if "inflight_dispatch" in snap:
5312
+ self._inflight_dispatch = snap.get("inflight_dispatch")
5313
+ if "did_inject_codebase_map" in snap:
5314
+ self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
5315
+ if "did_inject_custom_first_turn" in snap:
5316
+ self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
5317
+ if "did_inject_working_memory" in snap:
5318
+ self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
5319
+ if "memory_paths_for_first_turn" in snap:
5320
+ self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
5321
+ self._last_built_user_content = snap.get("last_built_user_content")
5322
+ except Exception:
5323
+ pass
5324
+
5325
+ # Clear any transient indicator line and land on a fresh prompt line.
5326
+ try:
5327
+ sys.stdout.write("\r\x1b[2K\n")
5328
+ sys.stdout.flush()
5329
+ except Exception:
5330
+ try:
5331
+ self.ui.print()
5332
+ except Exception:
5333
+ pass
5334
+
5335
+ try:
5336
+ supports = self._provider_supports_native_retention(self.model)
5337
+ except Exception:
5338
+ supports = False
5339
+ if supports:
5340
+ self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
5341
+ else:
5342
+ self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
5343
+ self._pending_user_edit = user_input
5344
+ continue
4995
5345
  except httpx.HTTPStatusError as he:
4996
5346
  try:
4997
5347
  if he.response is not None:
@@ -5024,14 +5374,14 @@ class ChatCLI:
5024
5374
  auth_action_key = "logout" if self.auth_user else "login"
5025
5375
  auth_action_label = f"🔓 Logout ({self.auth_user})" if self.auth_user else "🔑 Login"
5026
5376
  choices = [
5027
- ("toggle_tools", f"🧰 Toggle Tools ({self._tools_label()}) - Enable/disable file tools per request (ON: request tools, OFF: no tools, DEFAULT: server setting)"),
5377
+ ("toggle_tools", f"🧰 Toggle Tools ({self._tools_label()}) - Enable/disable file tools per request (ON: request tools, OFF: no tools, DEFAULT: server setting)"),
5028
5378
  ("toggle_history", f"🕘 Toggle History ({'ON' if self.save_chat_history else 'OFF'}) - Save chats to unified memory"),
5029
5379
  ("set_scope", f"📦 Set Filesystem Scope (current: {self._fs_label()}) - Choose workspace (sandbox) or host (full filesystem access if allowed)"),
5030
5380
  ("set_host_base", f"🖥️ Set Agent Scope (current: {self.host_base or '(none)'}) - Absolute path the agent can access when host scope is enabled"),
5031
5381
  ("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
5032
5382
  ("set_auto_approve", f"⚙️ Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
5033
5383
  (auth_action_key, auth_action_label),
5034
- ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5, gemini-2.5-pro, grok-4, deepseek-chat) or use Change Model to type one"),
5384
+ ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2.5, etc.)"),
5035
5385
  ("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
5036
5386
  ("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
5037
5387
  ("clear_history", "🧹 Clear History - Reset chat history"),
@@ -5067,22 +5417,21 @@ class ChatCLI:
5067
5417
  has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
5068
5418
  is_effectively_free = (self.is_free_tier and not has_credits)
5069
5419
 
5070
- # Recommended models (ordered list for shuffling)
5071
- # Curated list per request (include Codex Max as recommended)
5072
- rec_keys = [
5073
- "deepseek-reasoner-3.2",
5074
- "claude-opus-4-5-20251101",
5075
- "gemini-3-pro-preview",
5076
- "gemini-3-flash-preview",
5077
- "gpt-5",
5078
- "gpt-5.2",
5079
- "kimi-k2-thinking",
5080
- "grok-code-fast-1",
5081
- ]
5082
-
5083
- # If effectively free, shuffle kimi-k2-thinking to the top
5420
+ # Recommended models ("feelings" order)
5421
+ rec_keys = [
5422
+ "gpt-5.2",
5423
+ "gpt-5.2-codex",
5424
+ "gpt-5",
5425
+ "gemini-3-pro-preview",
5426
+ "gemini-3-flash-preview",
5427
+ "claude-opus-4-6",
5428
+ "kimi-k2.5",
5429
+ "grok-code-fast-1",
5430
+ ]
5431
+
5432
+ # If effectively free, shuffle kimi-k2.5 to the top
5084
5433
  if is_effectively_free:
5085
- target = "kimi-k2-thinking"
5434
+ target = "kimi-k2.5"
5086
5435
  if target in rec_keys:
5087
5436
  rec_keys.remove(target)
5088
5437
  rec_keys.insert(0, target)
@@ -5117,8 +5466,7 @@ class ChatCLI:
5117
5466
  suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
5118
5467
  choices.append((m, f"{lbl}{suffix}"))
5119
5468
 
5120
- choices.append(("default", "Server Default (no override)"))
5121
- choices.append(("custom", "Custom (enter a model name)"))
5469
+ # Per issue list: do not surface "server default" or "custom" in this picker.
5122
5470
 
5123
5471
  # Render and select using the unified highlighted picker
5124
5472
  picked: Optional[str] = None
@@ -5133,27 +5481,15 @@ class ChatCLI:
5133
5481
  picked = str(val)
5134
5482
 
5135
5483
  # Enforce free tier restrictions
5136
- if picked not in ("default", "custom") and is_effectively_free and is_paid_model(picked):
5484
+ if is_effectively_free and is_paid_model(picked):
5137
5485
  self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
5138
5486
  continue
5139
5487
 
5140
5488
  break
5141
5489
 
5142
5490
  # Apply selection
5143
- if picked == "default":
5144
- self.model = None
5145
- self.ui.info("Model cleared; server default will be used.")
5146
- elif picked == "custom":
5147
- typed = self.ui.prompt(
5148
- "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
5149
- default=self.model or "",
5150
- )
5151
- self.model = self._resolve_model_alias(typed.strip() or None)
5152
- if not self.model:
5153
- self.ui.info("Model cleared; server default will be used.")
5154
- else:
5155
- self.model = picked
5156
- self.ui.success(f"Model set to: {self.model}")
5491
+ self.model = picked
5492
+ self.ui.success(f"Model set to: {self.model}")
5157
5493
 
5158
5494
  self._apply_model_side_effects()
5159
5495
  self.save_settings()
@@ -5176,12 +5512,12 @@ class ChatCLI:
5176
5512
  self.save_settings()
5177
5513
  return True
5178
5514
 
5179
- if choice == "toggle_history":
5180
- self.save_chat_history = not self.save_chat_history
5181
- self.ui.success(f"History set to: {'ON' if self.save_chat_history else 'OFF'}")
5182
- self.save_settings()
5183
- return True
5184
-
5515
+ if choice == "toggle_history":
5516
+ self.save_chat_history = not self.save_chat_history
5517
+ self.ui.success(f"History set to: {'ON' if self.save_chat_history else 'OFF'}")
5518
+ self.save_settings()
5519
+ return True
5520
+
5185
5521
  if choice == "set_scope":
5186
5522
  await self.set_scope_menu()
5187
5523
  return True
@@ -5256,31 +5592,38 @@ class ChatCLI:
5256
5592
  pass
5257
5593
  # Clear provider-native histories on system reset
5258
5594
  try:
5259
- self.messages_for_save = []
5260
- if not self.save_chat_history:
5261
- self.thread_uid = None
5595
+ self.messages_for_save = []
5596
+ if not self.save_chat_history:
5597
+ self.thread_uid = None
5262
5598
  self._kimi_raw_history = []
5599
+ self._gemini_raw_history = []
5600
+ self._openai_previous_response_id = None
5601
+ self._openai_response_id_history = []
5602
+ self._openai_input_items = []
5603
+ self._openai_last_sent_input_items = None
5263
5604
  except Exception:
5264
5605
  pass
5265
5606
  self.ui.success("System prompt set.")
5266
5607
  self.save_settings()
5267
5608
  return True
5268
5609
 
5269
- if choice == "clear_history":
5610
+ if choice == "clear_history":
5270
5611
  self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
5271
5612
  self._did_inject_codebase_map = False
5272
5613
  try:
5273
5614
  self._did_inject_custom_first_turn = False
5274
5615
  except Exception:
5275
5616
  pass
5276
- try:
5277
- self.messages_for_save = []
5278
- if not self.save_chat_history:
5279
- self.thread_uid = None
5280
- self._kimi_raw_history = []
5281
- self._gemini_raw_history = []
5282
- except Exception:
5283
- pass
5617
+ try:
5618
+ self.messages_for_save = []
5619
+ if not self.save_chat_history:
5620
+ self.thread_uid = None
5621
+ self._kimi_raw_history = []
5622
+ self._gemini_raw_history = []
5623
+ self._openai_previous_response_id = None
5624
+ self._openai_response_id_history = []
5625
+ except Exception:
5626
+ pass
5284
5627
  # Reset local cumulative token counters on session clear
5285
5628
  self._cum_input_tokens = 0
5286
5629
  self._cum_output_tokens = 0
@@ -5334,10 +5677,81 @@ class ChatCLI:
5334
5677
 
5335
5678
  # ----------------------- SSE Streaming loop ------------------------
5336
5679
  async def _stream_once(self, user_input: str) -> str:
5337
- # Build request payload
5338
- payload: Dict[str, Any] = {"messages": self._build_messages(user_input)}
5339
- if self.model:
5340
- payload["model"] = self.model
5680
+ # Build request payload.
5681
+ # OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
5682
+ # `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
5683
+ if self._is_openai_model(self.model):
5684
+ msgs: List[Dict[str, str]] = []
5685
+ # Codex developer prompt (if enabled) + system prompt
5686
+ try:
5687
+ if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
5688
+ msgs.append({"role": "system", "content": self._codex_system_prompt()})
5689
+ except Exception:
5690
+ pass
5691
+ if self.system_prompt:
5692
+ msgs.append({"role": "system", "content": self.system_prompt})
5693
+
5694
+ # Apply first-turn-only injections to the current user content
5695
+ content = user_input
5696
+ prefix = self._build_first_turn_injection(user_input)
5697
+ if prefix:
5698
+ content = f"{prefix}\n\n{user_input}"
5699
+ try:
5700
+ self._last_built_user_content = content
5701
+ except Exception:
5702
+ self._last_built_user_content = user_input
5703
+ msgs.append({"role": "user", "content": content})
5704
+
5705
+ payload: Dict[str, Any] = {"messages": msgs}
5706
+
5707
+ # Build OpenAI native input items (authoritative for the server OpenAI path).
5708
+ try:
5709
+ if isinstance(self._openai_input_items, list) and self._openai_input_items:
5710
+ items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
5711
+ else:
5712
+ # Seed with system prompts for the first OpenAI turn.
5713
+ items = []
5714
+ try:
5715
+ if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
5716
+ items.append({"role": "system", "content": self._codex_system_prompt()})
5717
+ except Exception:
5718
+ pass
5719
+ if self.system_prompt:
5720
+ items.append({"role": "system", "content": self.system_prompt})
5721
+ items.append({"role": "user", "content": content})
5722
+ payload["openai_input_items"] = self._sanitize_openai_items(items)
5723
+ self._openai_last_sent_input_items = copy.deepcopy(items)
5724
+ except Exception:
5725
+ # If this fails for any reason, fall back to normal message-based history.
5726
+ self._openai_last_sent_input_items = None
5727
+
5728
+ # OpenAI Threading: DISABLED. We use full manual input item replay now.
5729
+ # if "openai_input_items" not in payload:
5730
+ # try:
5731
+ # if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
5732
+ # payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
5733
+ # except Exception:
5734
+ # pass
5735
+ try:
5736
+ if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
5737
+ payload["openai_response_id_history"] = list(self._openai_response_id_history)
5738
+ except Exception:
5739
+ pass
5740
+ else:
5741
+ payload = {"messages": self._build_messages(user_input)}
5742
+ if self.model:
5743
+ payload["model"] = self.model
5744
+ # OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
5745
+ try:
5746
+ if self._is_openai_model(self.model):
5747
+ if (
5748
+ isinstance(getattr(self, "_openai_response_id_history", None), list)
5749
+ and self._openai_response_id_history
5750
+ and "openai_response_id_history" not in payload
5751
+ ):
5752
+ payload["openai_response_id_history"] = list(self._openai_response_id_history)
5753
+ except Exception:
5754
+ pass
5341
5755
  # Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
5342
5756
  try:
5343
5757
  if self.terminal_id:
@@ -5360,60 +5774,69 @@ class ChatCLI:
5360
5774
  payload["host_roots_mode"] = mode
5361
5775
  if mode in ("cwd", "custom") and self.host_base:
5362
5776
  payload["host_allowed_dirs"] = [self.host_base]
5363
- # Controls and approvals
5364
- if self.control_level in (1, 2, 3):
5365
- payload["control_level"] = self.control_level
5366
- # Auto-approve tools at L2: merge explicit auto_approve with trust lists so
5367
- # "trust for this session" / "always trust" choices also suppress repeat server prompts.
5368
- try:
5369
- auto_tools: List[str] = []
5370
- for seq in (self.auto_approve or [], self.trust_tools_session or [], self.trust_tools_always or []):
5371
- for t in seq:
5372
- if isinstance(t, str) and t.strip():
5373
- k = t.strip()
5374
- if k not in auto_tools:
5375
- auto_tools.append(k)
5376
- if auto_tools:
5377
- payload["auto_approve"] = auto_tools
5378
- except Exception:
5379
- if self.auto_approve:
5380
- payload["auto_approve"] = self.auto_approve
5381
-
5382
- # Auto-approve run_command base commands at L2 (hybrid approval + trust).
5383
- try:
5384
- cmd_bases: List[str] = []
5385
- for seq in (self.trust_cmds_session or [], self.trust_cmds_always or []):
5386
- for c in seq:
5387
- if isinstance(c, str) and c.strip():
5388
- k = c.strip().lower()
5389
- if k not in cmd_bases:
5390
- cmd_bases.append(k)
5391
- if cmd_bases:
5392
- payload["auto_approve_command_bases"] = cmd_bases
5393
- except Exception:
5394
- pass
5395
- # Reasoning effort (OpenAI reasoning models only; server will ignore for others).
5396
- # Let the server decide whether xhigh is supported for the selected (or default) model.
5397
- try:
5398
- if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
5399
- payload["reasoning_effort"] = self.reasoning_effort
5400
- else:
5401
- payload["reasoning_effort"] = "medium"
5402
- except Exception:
5403
- payload["reasoning_effort"] = "medium"
5777
+ # Controls and approvals
5778
+ if self.control_level in (1, 2, 3):
5779
+ payload["control_level"] = self.control_level
5780
+ # Auto-approve tools at L2: merge explicit auto_approve with trust lists so
5781
+ # "trust for this session" / "always trust" choices also suppress repeat server prompts.
5782
+ try:
5783
+ auto_tools: List[str] = []
5784
+ for seq in (self.auto_approve or [], self.trust_tools_session or [], self.trust_tools_always or []):
5785
+ for t in seq:
5786
+ if isinstance(t, str) and t.strip():
5787
+ k = t.strip()
5788
+ if k not in auto_tools:
5789
+ auto_tools.append(k)
5790
+ if auto_tools:
5791
+ payload["auto_approve"] = auto_tools
5792
+ except Exception:
5793
+ if self.auto_approve:
5794
+ payload["auto_approve"] = self.auto_approve
5795
+
5796
+ # Auto-approve run_command base commands at L2 (hybrid approval + trust).
5797
+ try:
5798
+ cmd_bases: List[str] = []
5799
+ for seq in (self.trust_cmds_session or [], self.trust_cmds_always or []):
5800
+ for c in seq:
5801
+ if isinstance(c, str) and c.strip():
5802
+ k = c.strip().lower()
5803
+ if k not in cmd_bases:
5804
+ cmd_bases.append(k)
5805
+ if cmd_bases:
5806
+ payload["auto_approve_command_bases"] = cmd_bases
5807
+ except Exception:
5808
+ pass
5809
+ # Reasoning effort (OpenAI reasoning models only; server will ignore for others).
5810
+ # Let the server decide whether xhigh is supported for the selected (or default) model.
5811
+ try:
5812
+ if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
5813
+ payload["reasoning_effort"] = self.reasoning_effort
5814
+ else:
5815
+ payload["reasoning_effort"] = "medium"
5816
+ except Exception:
5817
+ payload["reasoning_effort"] = "medium"
5404
5818
 
5405
5819
  # Anthropic thinking-mode budget (server ignores unless model ends with -thinking)
5406
5820
  try:
5407
5821
  if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
5408
- payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
5409
- except Exception:
5410
- pass
5411
- # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
5412
- try:
5413
- if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
5414
- payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
5415
- except Exception:
5416
- pass
5822
+ payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
5823
+ except Exception:
5824
+ pass
5825
+
5826
+ # Anthropic effort (Opus 4.6/4.5). Default to high.
5827
+ try:
5828
+ ae = getattr(self, "anthropic_effort", None)
5829
+ ae2 = str(ae or "high").strip().lower()
5830
+ if ae2 in ("low", "medium", "high", "max"):
5831
+ payload["anthropic_effort"] = ae2
5832
+ except Exception:
5833
+ payload["anthropic_effort"] = "high"
5834
+ # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
5835
+ try:
5836
+ if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
5837
+ payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
5838
+ except Exception:
5839
+ pass
5417
5840
 
5418
5841
  # Text verbosity and tool preambles preference (UI hints for the server)
5419
5842
  try:
@@ -5421,8 +5844,10 @@ class ChatCLI:
5421
5844
  payload["text_verbosity"] = self.text_verbosity
5422
5845
  except Exception:
5423
5846
  pass
5847
+ # Preambles are a GPT-5-only UX toggle.
5424
5848
  try:
5425
- payload["preambles_enabled"] = bool(self.preambles_enabled)
5849
+ if self._supports_preambles(self.model):
5850
+ payload["preambles_enabled"] = bool(self.preambles_enabled)
5426
5851
  except Exception:
5427
5852
  pass
5428
5853
 
@@ -5519,23 +5944,23 @@ class ChatCLI:
5519
5944
  headers["X-Request-Timeout"] = str(int(req_timeout_hint))
5520
5945
  except Exception:
5521
5946
  pass
5522
- # If using a Kimi model, include provider-native messages to preserve reasoning_content
5523
- try:
5524
- if isinstance(self.model, str) and self.model.startswith("kimi-"):
5525
- req_payload = dict(req_payload)
5526
- req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
5527
- except Exception:
5528
- pass
5529
- # If using a Gemini model, include provider-native contents to preserve thought signatures
5530
- # and strict tool-call chains across HTTP turns.
5531
- try:
5532
- if isinstance(self.model, str) and self.model.startswith("gemini-"):
5533
- req_payload = dict(req_payload)
5534
- hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
5535
- if hist:
5536
- req_payload["raw_provider_messages"] = hist
5537
- except Exception:
5538
- pass
5947
+ # If using a Kimi model, include provider-native messages to preserve reasoning_content
5948
+ try:
5949
+ if isinstance(self.model, str) and self.model.startswith("kimi-"):
5950
+ req_payload = dict(req_payload)
5951
+ req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
5952
+ except Exception:
5953
+ pass
5954
+ # If using a Gemini model, include provider-native contents to preserve thought signatures
5955
+ # and strict tool-call chains across HTTP turns.
5956
+ try:
5957
+ if isinstance(self.model, str) and self.model.startswith("gemini-"):
5958
+ req_payload = dict(req_payload)
5959
+ hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
5960
+ if hist:
5961
+ req_payload["raw_provider_messages"] = hist
5962
+ except Exception:
5963
+ pass
5539
5964
  async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
5540
5965
  async with client.stream("POST", self.stream_url, json=req_payload, headers=headers, follow_redirects=True) as resp:
5541
5966
  if resp.status_code == 429:
@@ -5639,18 +6064,63 @@ class ChatCLI:
5639
6064
  self._rawlog_write(msg)
5640
6065
  except Exception:
5641
6066
  pass
5642
- # Idle "thinking" indicator shown while waiting for the next event (first tokens or next tool call)
5643
- indicator_task = None
5644
- indicator_active = False
5645
- indicator_started = False # used only to adjust leading newline behavior on first assistant header
5646
- # Track whether we're currently positioned at the start of a fresh line.
5647
- # This prevents double-newlines between back-to-back tool events.
5648
- at_line_start = True
5649
- # Mode: animate or static (default static for stability)
5650
- try:
5651
- _animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
5652
- except Exception:
5653
- _animate_indicator = False
6067
+ # Idle "thinking" indicator shown while waiting for the next event (first tokens or next tool call)
6068
+ indicator_task = None
6069
+ indicator_active = False
6070
+ indicator_started = False # used only to adjust leading newline behavior on first assistant header
6071
+ # Track whether we're currently positioned at the start of a fresh line.
6072
+ # This prevents double-newlines between back-to-back tool events.
6073
+ at_line_start = True
6074
+
6075
+ # --- Tool call in-place status (issuelist.md #7) ---
6076
+ # We render a single transient line for the current tool call (no trailing newline)
6077
+ # so the later tool.result SUCCESS/FAILURE line can replace it in-place.
6078
+ tool_status_active = False
6079
+ tool_status_call_id = None
6080
+
6081
+ def _tool_status_clear_line() -> None:
6082
+ """Clear the current line (best-effort) and return to column 0."""
6083
+ nonlocal at_line_start
6084
+ try:
6085
+ sys.stdout.write("\r\x1b[2K")
6086
+ sys.stdout.flush()
6087
+ except Exception:
6088
+ pass
6089
+ at_line_start = True
6090
+
6091
+ def _tool_status_show(call_id: Any, line: str) -> None:
6092
+ """Show the transient tool status line (no newline)."""
6093
+ nonlocal tool_status_active, tool_status_call_id, at_line_start
6094
+ if not self.show_tool_calls:
6095
+ return
6096
+ tool_status_active = True
6097
+ tool_status_call_id = str(call_id) if call_id is not None else None
6098
+ try:
6099
+ if not at_line_start:
6100
+ sys.stdout.write("\n")
6101
+ sys.stdout.write("\r\x1b[2K" + str(line))
6102
+ sys.stdout.flush()
6103
+ at_line_start = False
6104
+ except Exception:
6105
+ # Fallback: degrade to a normal printed line
6106
+ try:
6107
+ self.ui.print(str(line))
6108
+ except Exception:
6109
+ pass
6110
+ at_line_start = True
6111
+
6112
+ def _tool_status_stop() -> None:
6113
+ """Remove the transient tool status line and clear tracking."""
6114
+ nonlocal tool_status_active, tool_status_call_id
6115
+ if tool_status_active:
6116
+ _tool_status_clear_line()
6117
+ tool_status_active = False
6118
+ tool_status_call_id = None
6119
+ # Mode: animate or static (default static for stability)
6120
+ try:
6121
+ _animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
6122
+ except Exception:
6123
+ _animate_indicator = False
5654
6124
 
5655
6125
  async def _thinking_indicator_loop(chosen_word: str, spacing: int = 3) -> None:
5656
6126
  """Animate a transient thinking word on a single line until indicator_active becomes False.
@@ -5689,8 +6159,8 @@ class ChatCLI:
5689
6159
  except Exception:
5690
6160
  pass
5691
6161
 
5692
- async def _indicator_start() -> None:
5693
- nonlocal indicator_task, indicator_active, indicator_started, at_line_start
6162
+ async def _indicator_start() -> None:
6163
+ nonlocal indicator_task, indicator_active, indicator_started, at_line_start
5694
6164
  # Choose a random word and spacing each start
5695
6165
  word_bank = list(self._thinking_words or ["thinking", "working..."])
5696
6166
  if not word_bank:
@@ -5718,28 +6188,28 @@ class ChatCLI:
5718
6188
  c = colors[i % len(colors)]
5719
6189
  out_chars.append(f"\x1b[38;5;{c}m{ch}\x1b[0m")
5720
6190
  line = " " + joiner.join(out_chars) + " "
5721
- # Start on a dedicated new line so we never clobber prior output.
5722
- # If we're already at a fresh line, don't emit an extra newline (prevents
5723
- # visible blank lines between back-to-back tool events).
5724
- if not at_line_start:
5725
- sys.stdout.write("\n")
5726
- sys.stdout.write("\r\x1b[2K" + line)
5727
- sys.stdout.flush()
5728
- at_line_start = False
6191
+ # Start on a dedicated new line so we never clobber prior output.
6192
+ # If we're already at a fresh line, don't emit an extra newline (prevents
6193
+ # visible blank lines between back-to-back tool events).
6194
+ if not at_line_start:
6195
+ sys.stdout.write("\n")
6196
+ sys.stdout.write("\r\x1b[2K" + line)
6197
+ sys.stdout.flush()
6198
+ at_line_start = False
5729
6199
  # File debug
5730
6200
  try:
5731
6201
  self.ui.debug_log(f"indicator.start word='{chosen}' animate={_animate_indicator}")
5732
6202
  except Exception:
5733
6203
  pass
5734
- except Exception:
5735
- try:
5736
- if not at_line_start:
5737
- sys.stdout.write("\n")
5738
- sys.stdout.write("\r\x1b[2K" + (" " + joiner.join(list(str(chosen))) + " "))
5739
- sys.stdout.flush()
5740
- at_line_start = False
5741
- except Exception:
5742
- pass
6204
+ except Exception:
6205
+ try:
6206
+ if not at_line_start:
6207
+ sys.stdout.write("\n")
6208
+ sys.stdout.write("\r\x1b[2K" + (" " + joiner.join(list(str(chosen))) + " "))
6209
+ sys.stdout.flush()
6210
+ at_line_start = False
6211
+ except Exception:
6212
+ pass
5743
6213
  indicator_started = True
5744
6214
  if _animate_indicator:
5745
6215
  try:
@@ -5749,8 +6219,8 @@ class ChatCLI:
5749
6219
  indicator_task = None
5750
6220
  indicator_active = False
5751
6221
 
5752
- async def _indicator_stop(clear: bool = False) -> None:
5753
- nonlocal indicator_task, indicator_active, indicator_started, at_line_start
6222
+ async def _indicator_stop(clear: bool = False) -> None:
6223
+ nonlocal indicator_task, indicator_active, indicator_started, at_line_start
5754
6224
  # Only clear the line if an indicator was actually started.
5755
6225
  was_started = bool(indicator_started)
5756
6226
  indicator_active = False
@@ -5768,21 +6238,21 @@ class ChatCLI:
5768
6238
  finally:
5769
6239
  indicator_task = None
5770
6240
  # Default to not clearing to avoid erasing streamed content lines
5771
- if was_started and clear:
5772
- try:
5773
- sys.stdout.write("\r\x1b[2K")
5774
- sys.stdout.flush()
5775
- at_line_start = True
5776
- except Exception:
5777
- pass
5778
- elif was_started:
5779
- # Move to the next line to separate subsequent output
5780
- try:
5781
- sys.stdout.write("\n")
5782
- sys.stdout.flush()
5783
- at_line_start = True
5784
- except Exception:
5785
- pass
6241
+ if was_started and clear:
6242
+ try:
6243
+ sys.stdout.write("\r\x1b[2K")
6244
+ sys.stdout.flush()
6245
+ at_line_start = True
6246
+ except Exception:
6247
+ pass
6248
+ elif was_started:
6249
+ # Move to the next line to separate subsequent output
6250
+ try:
6251
+ sys.stdout.write("\n")
6252
+ sys.stdout.flush()
6253
+ at_line_start = True
6254
+ except Exception:
6255
+ pass
5786
6256
  # Reset started flag after stopping
5787
6257
  indicator_started = False
5788
6258
  try:
@@ -5812,26 +6282,26 @@ class ChatCLI:
5812
6282
  except json.JSONDecodeError:
5813
6283
  data = {"_raw": data_raw}
5814
6284
 
5815
- if event == "session.started":
5816
- # Keep indicator until first token; do not stop here
5817
- session_id = data.get("session_id")
5818
- lvl = data.get("level")
5819
- scope = data.get("fs_scope")
5820
- self.ui.print(f"[session] id={session_id} level={lvl} scope={scope}", style=self.ui.theme["dim"])
5821
- self._log_line({"event": "session.started", "server_session_id": session_id, "level": lvl, "fs_scope": scope})
5822
- # Record the server-authoritative level for this stream so approvals work
5823
- # even when the user left control_level as "server default".
5824
- try:
5825
- if isinstance(lvl, int):
5826
- self._current_turn["level"] = int(lvl)
5827
- elif isinstance(lvl, str) and str(lvl).strip().isdigit():
5828
- self._current_turn["level"] = int(str(lvl).strip())
5829
- except Exception:
5830
- pass
5831
- try:
5832
- await self._ws_broadcast("session.started", data)
5833
- except Exception:
5834
- pass
6285
+ if event == "session.started":
6286
+ # Keep indicator until first token; do not stop here
6287
+ session_id = data.get("session_id")
6288
+ lvl = data.get("level")
6289
+ scope = data.get("fs_scope")
6290
+ self.ui.print(f"[session] id={session_id} level={lvl} scope={scope}", style=self.ui.theme["dim"])
6291
+ self._log_line({"event": "session.started", "server_session_id": session_id, "level": lvl, "fs_scope": scope})
6292
+ # Record the server-authoritative level for this stream so approvals work
6293
+ # even when the user left control_level as "server default".
6294
+ try:
6295
+ if isinstance(lvl, int):
6296
+ self._current_turn["level"] = int(lvl)
6297
+ elif isinstance(lvl, str) and str(lvl).strip().isdigit():
6298
+ self._current_turn["level"] = int(str(lvl).strip())
6299
+ except Exception:
6300
+ pass
6301
+ try:
6302
+ await self._ws_broadcast("session.started", data)
6303
+ except Exception:
6304
+ pass
5835
6305
  try:
5836
6306
  self._current_turn["session_id"] = session_id
5837
6307
  except Exception:
@@ -5842,16 +6312,16 @@ class ChatCLI:
5842
6312
  pass
5843
6313
  continue
5844
6314
 
5845
- elif event == "message.delta":
5846
- # Stop any transient indicator before printing content and clear the line
5847
- try:
5848
- await _indicator_stop(clear=True)
5849
- except Exception:
5850
- pass
5851
- # Indicator line cleared; we're now at the start of a fresh line.
5852
- at_line_start = True
5853
- text = data.get("text", "")
5854
- if text:
6315
+ elif event == "message.delta":
6316
+ # Stop any transient indicator before printing content and clear the line
6317
+ try:
6318
+ await _indicator_stop(clear=True)
6319
+ except Exception:
6320
+ pass
6321
+ # Indicator line cleared; we're now at the start of a fresh line.
6322
+ at_line_start = True
6323
+ text = data.get("text", "")
6324
+ if text:
5855
6325
  try:
5856
6326
  _deltas_total += 1
5857
6327
  except Exception:
@@ -5886,23 +6356,23 @@ class ChatCLI:
5886
6356
  print(str(model_label) + ": ", end="", flush=True)
5887
6357
  except Exception:
5888
6358
  pass
5889
- header_printed = True
5890
- at_line_start = False
5891
- try:
5892
- self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
5893
- except Exception:
5894
- pass
6359
+ header_printed = True
6360
+ at_line_start = False
6361
+ try:
6362
+ self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
6363
+ except Exception:
6364
+ pass
5895
6365
  assistant_buf.append(text)
5896
6366
  # Print the token delta raw to avoid any wrapping/markup side-effects
5897
6367
  try:
5898
6368
  self.ui.print(text, style=self.ui.theme["assistant"], end="")
5899
- except Exception:
5900
- try:
5901
- print(str(text), end="", flush=True)
5902
- except Exception:
5903
- pass
5904
- at_line_start = False
5905
- # Deep debug: show each delta's size/preview
6369
+ except Exception:
6370
+ try:
6371
+ print(str(text), end="", flush=True)
6372
+ except Exception:
6373
+ pass
6374
+ at_line_start = False
6375
+ # Deep debug: show each delta's size/preview
5906
6376
  try:
5907
6377
  if DEBUG_SSE:
5908
6378
  prev = text[:40].replace("\n", "\\n")
@@ -5921,44 +6391,68 @@ class ChatCLI:
5921
6391
  except Exception:
5922
6392
  pass
5923
6393
 
5924
- elif event == "tool.call":
5925
- # Ensure any prior indicator state is reset cleanly, then restart
5926
- # a fresh indicator while waiting for the tool to run.
5927
- try:
5928
- await _indicator_stop(clear=True)
5929
- except Exception:
5930
- pass
5931
-
5932
- # If we were mid-line (e.g., streamed assistant text), break cleanly before
5933
- # showing the transient tool-wait indicator.
5934
- if not at_line_start:
5935
- try:
5936
- self.ui.print()
5937
- except Exception:
5938
- try:
5939
- print()
5940
- except Exception:
5941
- pass
5942
- at_line_start = True
5943
-
5944
- name = data.get("name")
5945
- args = data.get("args", {}) or {}
5946
- call_id = data.get("call_id")
6394
+ elif event == "tool.call":
6395
+ # Ensure any prior indicator state is reset cleanly, then restart
6396
+ # a fresh indicator while waiting for the tool to run.
6397
+ try:
6398
+ await _indicator_stop(clear=True)
6399
+ except Exception:
6400
+ pass
6401
+
6402
+ # If we were mid-line (e.g., streamed assistant text), break cleanly before
6403
+ # showing the transient tool-wait indicator.
6404
+ if not at_line_start:
6405
+ try:
6406
+ self.ui.print()
6407
+ except Exception:
6408
+ try:
6409
+ print()
6410
+ except Exception:
6411
+ pass
6412
+ at_line_start = True
6413
+
6414
+ name = data.get("name")
6415
+ args = data.get("args", {}) or {}
6416
+ call_id = data.get("call_id")
5947
6417
  try:
5948
6418
  self.ui.debug_log(f"tool.call name='{name}' call_id={call_id}")
5949
6419
  except Exception:
5950
6420
  pass
5951
6421
 
5952
- # Do NOT show the initial tool.call line per UX request; results will be
5953
- # rendered on tool.result. We still keep internal state and WS broadcasts.
5954
- # While the tool executes (server or client), show a subtle thinking
5955
- # indicator so users see progress during potentially long operations.
6422
+ # issuelist.md #7:
6423
+ # Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
5956
6424
  try:
5957
- # Do not start the indicator if we're in the middle of assistant token streaming
5958
- if (not streaming_assistant) and bool(getattr(self, "_thinking_indicator_enabled", False)):
5959
- await _indicator_start()
6425
+ # Clear any previous transient status line (shouldn't happen, but keep stable)
6426
+ _tool_status_stop()
5960
6427
  except Exception:
5961
6428
  pass
6429
+ try:
6430
+ tool_name = str(name or "").strip()
6431
+ label = self._tool_concise_label(
6432
+ tool_name,
6433
+ args if isinstance(args, dict) else {},
6434
+ None,
6435
+ )
6436
+ try:
6437
+ model_prefix = (
6438
+ self._current_turn.get("model")
6439
+ or self._last_used_model
6440
+ or self.model
6441
+ or "(server default)"
6442
+ )
6443
+ except Exception:
6444
+ model_prefix = self.model or "(server default)"
6445
+ ORANGE = "\x1b[38;5;214m"
6446
+ WHITE = "\x1b[97m"
6447
+ RESET = "\x1b[0m"
6448
+ status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
6449
+ _tool_status_show(call_id, status_line)
6450
+ except Exception:
6451
+ # Last-resort fallback: print something rather than crash streaming.
6452
+ try:
6453
+ self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
6454
+ except Exception:
6455
+ pass
5962
6456
  # Count tool calls
5963
6457
  try:
5964
6458
  tool_calls += 1
@@ -5982,6 +6476,11 @@ class ChatCLI:
5982
6476
  pass
5983
6477
 
5984
6478
  elif event == "approval.request":
6479
+ # Don't let the transient [RUNNING] line collide with interactive prompts.
6480
+ try:
6481
+ _tool_status_stop()
6482
+ except Exception:
6483
+ pass
5985
6484
  # First reply wins (web or CLI)
5986
6485
  await self._handle_approval_request(client, session_id, data)
5987
6486
  continue
@@ -6030,23 +6529,35 @@ class ChatCLI:
6030
6529
  name = str(data.get("name"))
6031
6530
  result = data.get("result", {}) or {}
6032
6531
  call_id = data.get("call_id")
6033
- # Stop any indicator before rendering results
6034
- try:
6035
- await _indicator_stop(clear=True)
6036
- except Exception:
6037
- pass
6038
- # Ensure tool result starts on a fresh line if assistant text was mid-line.
6039
- # Don't rely on assistant_buf ending with "\n" because UI.ensure_newline()
6040
- # prints without mutating the buffer, which can cause repeated blank lines.
6041
- if not at_line_start:
6042
- try:
6043
- self.ui.print()
6044
- except Exception:
6045
- try:
6046
- print()
6047
- except Exception:
6048
- pass
6049
- at_line_start = True
6532
+ # If we previously rendered a transient [RUNNING] line for this tool call,
6533
+ # clear it now so the SUCCESS/FAILURE line prints in the same place.
6534
+ try:
6535
+ if tool_status_active:
6536
+ # Best-effort match on call_id (some providers may omit it).
6537
+ if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
6538
+ _tool_status_stop()
6539
+ except Exception:
6540
+ try:
6541
+ _tool_status_stop()
6542
+ except Exception:
6543
+ pass
6544
+ # Stop any indicator before rendering results
6545
+ try:
6546
+ await _indicator_stop(clear=True)
6547
+ except Exception:
6548
+ pass
6549
+ # Ensure tool result starts on a fresh line if assistant text was mid-line.
6550
+ # Don't rely on assistant_buf ending with "\n" because UI.ensure_newline()
6551
+ # prints without mutating the buffer, which can cause repeated blank lines.
6552
+ if not at_line_start:
6553
+ try:
6554
+ self.ui.print()
6555
+ except Exception:
6556
+ try:
6557
+ print()
6558
+ except Exception:
6559
+ pass
6560
+ at_line_start = True
6050
6561
  # Concise default: one professional, natural-language line per tool call.
6051
6562
  if not self.ui.verbose:
6052
6563
  try:
@@ -6102,18 +6613,18 @@ class ChatCLI:
6102
6613
  except Exception:
6103
6614
  # Fall back to legacy renderer on unexpected issues
6104
6615
  self._render_tool_result(name, result, call_id=call_id)
6105
- else:
6106
- # Verbose mode retains the richer summary with previews
6107
- self._render_tool_result(name, result, call_id=call_id)
6108
-
6109
- # Tool result output is line-oriented; after rendering we should be positioned
6110
- # at the start of a fresh line so the next tool.call indicator doesn't insert
6111
- # an extra blank line.
6112
- at_line_start = True
6113
- try:
6114
- await self._ws_broadcast("tool.result", {"name": name, "result": result, "call_id": call_id})
6115
- except Exception:
6116
- pass
6616
+ else:
6617
+ # Verbose mode retains the richer summary with previews
6618
+ self._render_tool_result(name, result, call_id=call_id)
6619
+
6620
+ # Tool result output is line-oriented; after rendering we should be positioned
6621
+ # at the start of a fresh line so the next tool.call indicator doesn't insert
6622
+ # an extra blank line.
6623
+ at_line_start = True
6624
+ try:
6625
+ await self._ws_broadcast("tool.result", {"name": name, "result": result, "call_id": call_id})
6626
+ except Exception:
6627
+ pass
6117
6628
  # For Kimi, append provider-native tool result to raw history so it's threaded correctly
6118
6629
  try:
6119
6630
  if bool(getattr(self, "retain_native_tool_results", False)) and isinstance(self.model, str) and self.model.startswith("kimi-") and call_id:
@@ -6136,7 +6647,7 @@ class ChatCLI:
6136
6647
  pass
6137
6648
  # Do not auto-restart the indicator here; wait for the next model event
6138
6649
 
6139
- elif event == "tool.dispatch":
6650
+ elif event == "tool.dispatch":
6140
6651
  # Client-executed tool flow
6141
6652
  if not HAS_LOCAL_TOOLS:
6142
6653
  self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
@@ -6153,6 +6664,17 @@ class ChatCLI:
6153
6664
  args = data.get("args", {}) or {}
6154
6665
  job_token = data.get("job_token")
6155
6666
  reqp = data.get("requested_policy", {}) or {}
6667
+
6668
+ # Track in-flight dispatch so Ctrl+C can cancel quickly.
6669
+ try:
6670
+ self._inflight_dispatch = {
6671
+ "session_id": session_id_d,
6672
+ "call_id": call_id,
6673
+ "job_token": job_token,
6674
+ "name": name,
6675
+ }
6676
+ except Exception:
6677
+ pass
6156
6678
 
6157
6679
  if DEBUG_SSE:
6158
6680
  self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
@@ -6163,16 +6685,16 @@ class ChatCLI:
6163
6685
  except Exception:
6164
6686
  pass
6165
6687
 
6166
- # Level gating and CLI approvals (Level 2)
6167
- try:
6168
- lvl = int(self.control_level) if isinstance(self.control_level, int) else None
6169
- if lvl is None:
6170
- # Prefer the server-reported level from session.started
6171
- sl = self._current_turn.get("level") if isinstance(self._current_turn, dict) else None
6172
- if isinstance(sl, int):
6173
- lvl = int(sl)
6174
- except Exception:
6175
- lvl = None
6688
+ # Level gating and CLI approvals (Level 2)
6689
+ try:
6690
+ lvl = int(self.control_level) if isinstance(self.control_level, int) else None
6691
+ if lvl is None:
6692
+ # Prefer the server-reported level from session.started
6693
+ sl = self._current_turn.get("level") if isinstance(self._current_turn, dict) else None
6694
+ if isinstance(sl, int):
6695
+ lvl = int(sl)
6696
+ except Exception:
6697
+ lvl = None
6176
6698
  # Hard block at Level 1 for anything other than read/list
6177
6699
  if lvl == 1:
6178
6700
  disallowed = str(name) not in ("read_file", "list_dir")
@@ -6313,84 +6835,84 @@ class ChatCLI:
6313
6835
  result = local_append_file(args.get("path", ""), args.get("content", ""), policy)
6314
6836
  elif name == "list_dir":
6315
6837
  result = local_list_dir(args.get("path", ""), policy)
6316
- elif name == "run_command":
6317
- # Command allow policy:
6318
- # - L1: blocked earlier
6319
- # - L2: approval required; once approved, allow any base command
6320
- # - L3: no approval; allow any base command
6321
- # Use '*' wildcard (supported by henosis_cli_tools.run_command).
6322
- if lvl in (2, 3):
6323
- allow_csv = "*"
6324
- else:
6325
- # Legacy: intersect server + local allowlists
6326
- req_allow = (reqp.get("command_allow_csv") or "").strip()
6327
- local_allow = os.getenv("HENOSIS_ALLOW_COMMANDS", "")
6328
- if req_allow and local_allow:
6329
- req_set = {c.strip().lower() for c in req_allow.split(",") if c.strip()}
6330
- loc_set = {c.strip().lower() for c in local_allow.split(",") if c.strip()}
6331
- allow_csv = ",".join(sorted(req_set & loc_set))
6332
- else:
6333
- allow_csv = local_allow or req_allow or ""
6334
- # Include trusted commands from CLI settings (session + always)
6335
- try:
6336
- allow_set = {c.strip().lower() for c in allow_csv.split(",") if c.strip()}
6337
- for k in (self.trust_cmds_session or []):
6338
- allow_set.add(str(k).strip().lower())
6339
- for k in (self.trust_cmds_always or []):
6340
- allow_set.add(str(k).strip().lower())
6341
- allow_csv = ",".join(sorted(allow_set))
6342
- except Exception:
6343
- pass
6344
- timeout = args.get("timeout", None)
6345
- result = local_run_command(args.get("cmd", ""), policy, cwd=args.get("cwd", "."), timeout=timeout, allow_commands_csv=allow_csv)
6346
- # Legacy allowlist retry logic removed for L2/L3 (we allow '*').
6347
- elif name == "apply_patch":
6348
- result = local_apply_patch(
6349
- patch=args.get("patch", ""),
6350
- policy=policy,
6351
- cwd=args.get("cwd", "."),
6352
- lenient=bool(args.get("lenient", True)),
6353
- dry_run=bool(args.get("dry_run", False)),
6354
- backup=bool(args.get("backup", True)),
6355
- safeguard_max_lines=int(args.get("safeguard_max_lines", 3000) or 3000),
6356
- safeguard_confirm=bool(args.get("safeguard_confirm", False)),
6357
- )
6358
- elif name == "planning":
6359
- # Persist plan under plans/ at the current root (workspace or host base)
6360
- try:
6361
- plan_text = str(args.get("plan", "") or "").strip()
6362
- ctx_text = args.get("context")
6363
- if not plan_text:
6364
- result = {"ok": False, "error": "plan is required"}
6365
- else:
6366
- base = policy.workspace_base if policy.scope != "host" else (policy.host_base or Path(os.getcwd()).resolve())
6367
- plans_dir = Path(base) / "plans"
6368
- plans_dir.mkdir(parents=True, exist_ok=True)
6369
- from datetime import datetime as _dt
6370
- import re as _re, uuid as _uuid
6371
- ts = _dt.utcnow().strftime("%Y%m%d-%H%M%S")
6372
- first_line = plan_text.splitlines()[0] if plan_text else "plan"
6373
- slug = _re.sub(r"[^a-zA-Z0-9_-]+", "-", first_line).strip("-") or "plan"
6374
- slug = slug[:40]
6375
- fname = f"plan-{ts}-{_uuid.uuid4().hex[:6]}-{slug}.md"
6376
- fpath = plans_dir / fname
6377
- body_lines = [f"# Plan ({ts} UTC)\n"]
6378
- if ctx_text:
6379
- body_lines.append("## Context\n")
6380
- body_lines.append(str(ctx_text).strip() + "\n\n")
6381
- body_lines.append("## Steps\n")
6382
- body_lines.append(plan_text.rstrip() + "\n")
6383
- content = "\n".join(body_lines)
6384
- with fpath.open("w", encoding="utf-8", newline="") as f:
6385
- f.write(content)
6386
- result = {"ok": True, "data": {"path": str(fpath), "bytes_written": len(content.encode('utf-8'))}}
6387
- except Exception as _pe:
6388
- result = {"ok": False, "error": str(_pe)}
6389
- elif name == "string_replace":
6390
- result = local_string_replace(
6391
- pattern=args.get("pattern", ""),
6392
- replacement=args.get("replacement", ""),
6393
- policy=policy,
6838
+ elif name == "run_command":
6839
+ # Command allow policy:
6840
+ # - L1: blocked earlier
6841
+ # - L2: approval required; once approved, allow any base command
6842
+ # - L3: no approval; allow any base command
6843
+ # Use '*' wildcard (supported by henosis_cli_tools.run_command).
6844
+ if lvl in (2, 3):
6845
+ allow_csv = "*"
6846
+ else:
6847
+ # Legacy: intersect server + local allowlists
6848
+ req_allow = (reqp.get("command_allow_csv") or "").strip()
6849
+ local_allow = os.getenv("HENOSIS_ALLOW_COMMANDS", "")
6850
+ if req_allow and local_allow:
6851
+ req_set = {c.strip().lower() for c in req_allow.split(",") if c.strip()}
6852
+ loc_set = {c.strip().lower() for c in local_allow.split(",") if c.strip()}
6853
+ allow_csv = ",".join(sorted(req_set & loc_set))
6854
+ else:
6855
+ allow_csv = local_allow or req_allow or ""
6856
+ # Include trusted commands from CLI settings (session + always)
6857
+ try:
6858
+ allow_set = {c.strip().lower() for c in allow_csv.split(",") if c.strip()}
6859
+ for k in (self.trust_cmds_session or []):
6860
+ allow_set.add(str(k).strip().lower())
6861
+ for k in (self.trust_cmds_always or []):
6862
+ allow_set.add(str(k).strip().lower())
6863
+ allow_csv = ",".join(sorted(allow_set))
6864
+ except Exception:
6865
+ pass
6866
+ timeout = args.get("timeout", None)
6867
+ result = local_run_command(args.get("cmd", ""), policy, cwd=args.get("cwd", "."), timeout=timeout, allow_commands_csv=allow_csv)
6868
+ # Legacy allowlist retry logic removed for L2/L3 (we allow '*').
6869
+ elif name == "apply_patch":
6870
+ result = local_apply_patch(
6871
+ patch=args.get("patch", ""),
6872
+ policy=policy,
6873
+ cwd=args.get("cwd", "."),
6874
+ lenient=bool(args.get("lenient", True)),
6875
+ dry_run=bool(args.get("dry_run", False)),
6876
+ backup=bool(args.get("backup", True)),
6877
+ safeguard_max_lines=int(args.get("safeguard_max_lines", 3000) or 3000),
6878
+ safeguard_confirm=bool(args.get("safeguard_confirm", False)),
6879
+ )
6880
+ elif name == "planning":
6881
+ # Persist plan under plans/ at the current root (workspace or host base)
6882
+ try:
6883
+ plan_text = str(args.get("plan", "") or "").strip()
6884
+ ctx_text = args.get("context")
6885
+ if not plan_text:
6886
+ result = {"ok": False, "error": "plan is required"}
6887
+ else:
6888
+ base = policy.workspace_base if policy.scope != "host" else (policy.host_base or Path(os.getcwd()).resolve())
6889
+ plans_dir = Path(base) / "plans"
6890
+ plans_dir.mkdir(parents=True, exist_ok=True)
6891
+ from datetime import datetime as _dt
6892
+ import re as _re, uuid as _uuid
6893
+ ts = _dt.utcnow().strftime("%Y%m%d-%H%M%S")
6894
+ first_line = plan_text.splitlines()[0] if plan_text else "plan"
6895
+ slug = _re.sub(r"[^a-zA-Z0-9_-]+", "-", first_line).strip("-") or "plan"
6896
+ slug = slug[:40]
6897
+ fname = f"plan-{ts}-{_uuid.uuid4().hex[:6]}-{slug}.md"
6898
+ fpath = plans_dir / fname
6899
+ body_lines = [f"# Plan ({ts} UTC)\n"]
6900
+ if ctx_text:
6901
+ body_lines.append("## Context\n")
6902
+ body_lines.append(str(ctx_text).strip() + "\n\n")
6903
+ body_lines.append("## Steps\n")
6904
+ body_lines.append(plan_text.rstrip() + "\n")
6905
+ content = "\n".join(body_lines)
6906
+ with fpath.open("w", encoding="utf-8", newline="") as f:
6907
+ f.write(content)
6908
+ result = {"ok": True, "data": {"path": str(fpath), "bytes_written": len(content.encode('utf-8'))}}
6909
+ except Exception as _pe:
6910
+ result = {"ok": False, "error": str(_pe)}
6911
+ elif name == "string_replace":
6912
+ result = local_string_replace(
6913
+ pattern=args.get("pattern", ""),
6914
+ replacement=args.get("replacement", ""),
6915
+ policy=policy,
6394
6916
  cwd=args.get("cwd", "."),
6395
6917
  file_globs=[str(g) for g in (args.get("file_globs") or [])],
6396
6918
  exclude_globs=[str(e) for e in (args.get("exclude_globs") or [])],
@@ -6442,18 +6964,82 @@ class ChatCLI:
6442
6964
  self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
6443
6965
  except Exception as e:
6444
6966
  self.ui.warn(f"tools.callback error: {e}")
6967
+ finally:
6968
+ try:
6969
+ # Clear in-flight dispatch context when we send a callback.
6970
+ if isinstance(self._inflight_dispatch, dict):
6971
+ if str(self._inflight_dispatch.get("call_id")) == str(call_id):
6972
+ self._inflight_dispatch = None
6973
+ except Exception:
6974
+ pass
6445
6975
 
6446
- elif event == "message.completed":
6976
+ elif event == "message.completed":
6447
6977
  # Safety: this block handles only 'message.completed'.
6448
6978
  usage = data.get("usage", {})
6449
- model_used = data.get("model") or self.model
6450
- # Gemini: server may include an authoritative provider-native history snapshot.
6451
- try:
6452
- if isinstance(model_used, str) and model_used.startswith("gemini-"):
6453
- rpm = data.get("raw_provider_messages")
6454
- self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
6455
- except Exception:
6456
- pass
6979
+ model_used = data.get("model") or self.model
6980
+ # OpenAI: persist the last response id so future turns can use previous_response_id.
6981
+ try:
6982
+ if self._is_openai_model(model_used):
6983
+ # Prefer the explicit per-turn id list when provided by the server.
6984
+ ids = data.get("openai_response_ids")
6985
+ if isinstance(ids, list) and ids:
6986
+ for x in ids:
6987
+ if not isinstance(x, str):
6988
+ continue
6989
+ xs = x.strip()
6990
+ if not xs:
6991
+ continue
6992
+ try:
6993
+ if xs not in self._openai_response_id_history:
6994
+ self._openai_response_id_history.append(xs)
6995
+ except Exception:
6996
+ pass
6997
+ rid = data.get("openai_previous_response_id")
6998
+ if isinstance(rid, str) and rid.strip():
6999
+ self._openai_previous_response_id = rid.strip()
7000
+ try:
7001
+ if rid.strip() not in self._openai_response_id_history:
7002
+ self._openai_response_id_history.append(rid.strip())
7003
+ except Exception:
7004
+ pass
7005
+
7006
+ # OpenAI manual-state replay: server returns the delta items appended
7007
+ # during this turn (reasoning/tool calls/tool outputs). Persist them.
7008
+ try:
7009
+ delta = data.get("openai_delta_items")
7010
+ if isinstance(delta, list):
7011
+ base_items = (
7012
+ self._openai_last_sent_input_items
7013
+ if isinstance(self._openai_last_sent_input_items, list)
7014
+ else copy.deepcopy(self._openai_input_items)
7015
+ )
7016
+ # Normalize to a list of dicts where possible; keep unknown shapes as-is.
7017
+ merged: List[Any] = []
7018
+ try:
7019
+ merged.extend(list(base_items or []))
7020
+ except Exception:
7021
+ merged = list(base_items or []) if base_items is not None else []
7022
+ merged.extend(delta)
7023
+ # Store only dict-like items (server is expected to send dicts)
7024
+ cleaned: List[Dict[str, Any]] = []
7025
+ for it in merged:
7026
+ if isinstance(it, dict):
7027
+ cleaned.append(dict(it))
7028
+ self._openai_input_items = cleaned
7029
+ except Exception:
7030
+ pass
7031
+ finally:
7032
+ # Clear per-turn sent snapshot
7033
+ self._openai_last_sent_input_items = None
7034
+ except Exception:
7035
+ pass
7036
+ # Gemini: server may include an authoritative provider-native history snapshot.
7037
+ try:
7038
+ if isinstance(model_used, str) and model_used.startswith("gemini-"):
7039
+ rpm = data.get("raw_provider_messages")
7040
+ self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
7041
+ except Exception:
7042
+ pass
6457
7043
  # Mark completion for retry controller
6458
7044
  try:
6459
7045
  last_completed = True
@@ -6749,13 +7335,13 @@ class ChatCLI:
6749
7335
  # Compact style: include reasoning effort inline with model name when applicable
6750
7336
  try:
6751
7337
  effort_seg = ""
6752
- if self._is_openai_reasoning_model(model_used):
6753
- # Convert low|medium|high|xhigh -> Low|Medium|High|XHigh for display
6754
- lvl = str(self.reasoning_effort or "medium").strip().lower()
6755
- if lvl not in ("low", "medium", "high", "xhigh"):
6756
- lvl = "medium"
6757
- disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
6758
- effort_seg = f" {disp}"
7338
+ if self._is_openai_reasoning_model(model_used):
7339
+ # Convert low|medium|high|xhigh -> Low|Medium|High|XHigh for display
7340
+ lvl = str(self.reasoning_effort or "medium").strip().lower()
7341
+ if lvl not in ("low", "medium", "high", "xhigh"):
7342
+ lvl = "medium"
7343
+ disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
7344
+ effort_seg = f" {disp}"
6759
7345
  except Exception:
6760
7346
  effort_seg = ""
6761
7347
  model_only_line = f"model: {model_used or '(unknown)'}{effort_seg}"
@@ -6955,47 +7541,47 @@ class ChatCLI:
6955
7541
  except Exception:
6956
7542
  pass
6957
7543
 
6958
- # Anthropic prompt caching banner when detected (reads @10% input rate; creation billed at TTL multiplier)
6959
- try:
6960
- price = self._resolve_price(model_used)
6961
- provider = (price.get("provider") or "").lower()
6962
- if provider == "anthropic":
6963
- cr = int(usage.get("cache_read_input_tokens", 0) or 0)
6964
- cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
6965
- # Optional breakdown
6966
- cc_5m = 0
6967
- cc_1h = 0
6968
- try:
6969
- ccmap = usage.get("cache_creation") if isinstance(usage, dict) else None
6970
- if isinstance(ccmap, dict):
6971
- cc_5m = int(ccmap.get("ephemeral_5m_input_tokens", 0) or 0)
6972
- cc_1h = int(ccmap.get("ephemeral_1h_input_tokens", 0) or 0)
6973
- except Exception:
6974
- cc_5m = cc_5m or 0
6975
- cc_1h = cc_1h or 0
6976
- if (cr > 0) or (cc > 0) or (cc_5m > 0) or (cc_1h > 0):
6977
- # Build a concise line similar to OpenAI banner
6978
- line = f"Billing: Anthropic prompt cache read {int(cr)} token(s) @10% input rate"
6979
- if (cc_5m > 0) or (cc_1h > 0):
6980
- line += f" | created {int(cc_5m)} @1.25x + {int(cc_1h)} @2x"
6981
- else:
6982
- if cc > 0:
6983
- line += f" | created {int(cc)} token(s) (billed at 1.25x/2x based on TTL)"
6984
- # Calculate savings (reported on a separate line to match OpenAI style)
6985
- saved_line = None
6986
- if cr > 0:
6987
- try:
6988
- in_rate_per_m = float(price.get("input", 0.0))
6989
- # Savings = cache_read * (1.0 - 0.1) * price
6990
- saved_usd = (int(cr) / 1_000_000.0) * in_rate_per_m * 0.90
6991
- saved_line = f"saved ${saved_usd:.2f} with prompt cache"
6992
- except Exception:
6993
- saved_line = None
6994
- box_lines.append(line)
6995
- if saved_line:
6996
- box_lines.append(saved_line)
6997
- except Exception:
6998
- pass
7544
+ # Anthropic prompt caching banner when detected (reads @10% input rate; creation billed at TTL multiplier)
7545
+ try:
7546
+ price = self._resolve_price(model_used)
7547
+ provider = (price.get("provider") or "").lower()
7548
+ if provider == "anthropic":
7549
+ cr = int(usage.get("cache_read_input_tokens", 0) or 0)
7550
+ cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
7551
+ # Optional breakdown
7552
+ cc_5m = 0
7553
+ cc_1h = 0
7554
+ try:
7555
+ ccmap = usage.get("cache_creation") if isinstance(usage, dict) else None
7556
+ if isinstance(ccmap, dict):
7557
+ cc_5m = int(ccmap.get("ephemeral_5m_input_tokens", 0) or 0)
7558
+ cc_1h = int(ccmap.get("ephemeral_1h_input_tokens", 0) or 0)
7559
+ except Exception:
7560
+ cc_5m = cc_5m or 0
7561
+ cc_1h = cc_1h or 0
7562
+ if (cr > 0) or (cc > 0) or (cc_5m > 0) or (cc_1h > 0):
7563
+ # Build a concise line similar to OpenAI banner
7564
+ line = f"Billing: Anthropic prompt cache read {int(cr)} token(s) @10% input rate"
7565
+ if (cc_5m > 0) or (cc_1h > 0):
7566
+ line += f" | created {int(cc_5m)} @1.25x + {int(cc_1h)} @2x"
7567
+ else:
7568
+ if cc > 0:
7569
+ line += f" | created {int(cc)} token(s) (billed at 1.25x/2x based on TTL)"
7570
+ # Calculate savings (reported on a separate line to match OpenAI style)
7571
+ saved_line = None
7572
+ if cr > 0:
7573
+ try:
7574
+ in_rate_per_m = float(price.get("input", 0.0))
7575
+ # Savings = cache_read * (1.0 - 0.1) * price
7576
+ saved_usd = (int(cr) / 1_000_000.0) * in_rate_per_m * 0.90
7577
+ saved_line = f"saved ${saved_usd:.2f} with prompt cache"
7578
+ except Exception:
7579
+ saved_line = None
7580
+ box_lines.append(line)
7581
+ if saved_line:
7582
+ box_lines.append(saved_line)
7583
+ except Exception:
7584
+ pass
6999
7585
 
7000
7586
  # Show consolidated usage summary
7001
7587
  try:
@@ -7295,29 +7881,29 @@ class ChatCLI:
7295
7881
  pass
7296
7882
  return "".join(assistant_buf)
7297
7883
 
7298
- elif event == "provider.message":
7299
- # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
7300
- provider = (data.get("provider") or "").lower()
7301
- msg = data.get("message")
7302
- if provider == "gemini":
7303
- # Always retain Gemini provider-native messages (needed for multi-turn tool calling).
7304
- try:
7305
- if isinstance(msg, dict):
7306
- self._gemini_raw_history.append(dict(msg))
7307
- elif isinstance(msg, list):
7308
- self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
7309
- except Exception:
7310
- pass
7311
- if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
7312
- # Append as-is to local raw history for the next turn
7313
- try:
7314
- self._kimi_raw_history.append(dict(msg))
7315
- except Exception:
7316
- try:
7317
- self._kimi_raw_history.append(msg) # type: ignore
7318
- except Exception:
7319
- pass
7320
- continue
7884
+ elif event == "provider.message":
7885
+ # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
7886
+ provider = (data.get("provider") or "").lower()
7887
+ msg = data.get("message")
7888
+ if provider == "gemini":
7889
+ # Always retain Gemini provider-native messages (needed for multi-turn tool calling).
7890
+ try:
7891
+ if isinstance(msg, dict):
7892
+ self._gemini_raw_history.append(dict(msg))
7893
+ elif isinstance(msg, list):
7894
+ self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
7895
+ except Exception:
7896
+ pass
7897
+ if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
7898
+ # Append as-is to local raw history for the next turn
7899
+ try:
7900
+ self._kimi_raw_history.append(dict(msg))
7901
+ except Exception:
7902
+ try:
7903
+ self._kimi_raw_history.append(msg) # type: ignore
7904
+ except Exception:
7905
+ pass
7906
+ continue
7321
7907
 
7322
7908
  else:
7323
7909
  # TEMP DEBUG: show unknown/unhandled events
@@ -7451,12 +8037,12 @@ class ChatCLI:
7451
8037
  # Reasoning effort tag for OpenAI reasoning models
7452
8038
  try:
7453
8039
  effort_seg = ""
7454
- if self._is_openai_reasoning_model(model_label):
7455
- lvl = str(self.reasoning_effort or "medium").strip().lower()
7456
- if lvl not in ("low", "medium", "high", "xhigh"):
7457
- lvl = "medium"
7458
- disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
7459
- effort_seg = f" {disp}"
8040
+ if self._is_openai_reasoning_model(model_label):
8041
+ lvl = str(self.reasoning_effort or "medium").strip().lower()
8042
+ if lvl not in ("low", "medium", "high", "xhigh"):
8043
+ lvl = "medium"
8044
+ disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
8045
+ effort_seg = f" {disp}"
7460
8046
  except Exception:
7461
8047
  effort_seg = ""
7462
8048
  try:
@@ -7545,15 +8131,15 @@ class ChatCLI:
7545
8131
  # Allow codebase map to be injected again
7546
8132
  self._did_inject_codebase_map = False
7547
8133
  # Ensure working-memory first-turn flag remains False so we inject now
7548
- self._did_inject_working_memory = False # Allow custom first-turn text to inject again
7549
- try:
7550
- self._did_inject_custom_first_turn = False
7551
- except Exception:
7552
- pass
7553
- # Reset provider-native histories
7554
- self.messages_for_save = []
7555
- if not self.save_chat_history:
7556
- self.thread_uid = None
8134
+ self._did_inject_working_memory = False # Allow custom first-turn text to inject again
8135
+ try:
8136
+ self._did_inject_custom_first_turn = False
8137
+ except Exception:
8138
+ pass
8139
+ # Reset provider-native histories
8140
+ self.messages_for_save = []
8141
+ if not self.save_chat_history:
8142
+ self.thread_uid = None
7557
8143
  self._kimi_raw_history = []
7558
8144
 
7559
8145
  # Build a fresh payload so the first-turn injections (code map + working memory) are applied
@@ -7582,24 +8168,32 @@ class ChatCLI:
7582
8168
  new_payload["control_level"] = self.control_level
7583
8169
  if self.auto_approve:
7584
8170
  new_payload["auto_approve"] = self.auto_approve
7585
- try:
7586
- if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
7587
- new_payload["reasoning_effort"] = self.reasoning_effort
7588
- else:
7589
- new_payload["reasoning_effort"] = "medium"
7590
- except Exception:
7591
- new_payload["reasoning_effort"] = "medium"
7592
- try:
7593
- if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
7594
- new_payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
7595
- except Exception:
7596
- pass
7597
- # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
7598
- try:
7599
- if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
7600
- new_payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
7601
- except Exception:
7602
- pass
8171
+ try:
8172
+ if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
8173
+ new_payload["reasoning_effort"] = self.reasoning_effort
8174
+ else:
8175
+ new_payload["reasoning_effort"] = "medium"
8176
+ except Exception:
8177
+ new_payload["reasoning_effort"] = "medium"
8178
+ try:
8179
+ if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
8180
+ new_payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
8181
+ except Exception:
8182
+ pass
8183
+ # Anthropic effort (Opus 4.6/4.5)
8184
+ try:
8185
+ ae = getattr(self, "anthropic_effort", None)
8186
+ ae2 = str(ae or "high").strip().lower()
8187
+ if ae2 in ("low", "medium", "high", "max"):
8188
+ new_payload["anthropic_effort"] = ae2
8189
+ except Exception:
8190
+ new_payload["anthropic_effort"] = "high"
8191
+ # Anthropic prompt cache TTL (server override): send when set to 5m or 1h
8192
+ try:
8193
+ if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
8194
+ new_payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
8195
+ except Exception:
8196
+ pass
7603
8197
  if self.web_search_enabled:
7604
8198
  new_payload["enable_web_search"] = True
7605
8199
  if self.web_search_allowed_domains:
@@ -7857,32 +8451,31 @@ class ChatCLI:
7857
8451
  except Exception:
7858
8452
  pass
7859
8453
  # Fallback defaults for common models
7860
- if not ctx_map:
7861
- try:
7862
- ctx_map.update({
7863
- "gpt-5.2": 400000,
7864
- "gpt-5.2-pro": 400000,
7865
- "gpt-5": 400000,
7866
- "gpt-5-2025-08-07": 400000,
7867
- "codex-mini-latest": 200000,
7868
- "gemini-2.5-pro": 1048576,
7869
- "gemini-3-flash-preview": 1048576,
7870
- "gemini-3-pro-preview": 1000000,
8454
+ if not ctx_map:
8455
+ try:
8456
+ ctx_map.update({
8457
+ "gpt-5.2": 400000,
8458
+ "gpt-5.2-pro": 400000,
8459
+ "gpt-5": 400000,
8460
+ "gpt-5-2025-08-07": 400000,
8461
+ "codex-mini-latest": 200000,
8462
+ # (removed gemini-2.5-pro)
8463
+ "gemini-3-flash-preview": 1048576,
8464
+ "gemini-3-pro-preview": 1000000,
7871
8465
  "grok-4-1-fast-reasoning": 2000000,
7872
8466
  "grok-4-1-fast-non-reasoning": 2000000,
7873
8467
  "grok-4": 200000,
7874
8468
  "grok-code-fast-1": 262144,
7875
8469
  "deepseek-chat": 128000,
7876
8470
  "deepseek-reasoner": 128000,
7877
- "kimi-k2-thinking": 262144,
7878
- "kimi-k2-0905-preview": 262144,
8471
+ "kimi-k2.5": 262144,
7879
8472
  "claude-sonnet-4-20250514": 1000000,
7880
8473
  "claude-sonnet-4-20250514-thinking": 1000000,
7881
- "claude-sonnet-4-5-20250929": 1000000,
7882
- "claude-sonnet-4-5-20250929-thinking": 1000000,
7883
- "claude-opus-4-5-20251101": 200000,
7884
- "claude-opus-4-5-20251101-thinking": 200000,
7885
- "glm-4.6": 200000,
8474
+ "claude-sonnet-4-5-20250929": 1000000,
8475
+ "claude-sonnet-4-5-20250929-thinking": 1000000,
8476
+ "claude-opus-4-6": 1000000,
8477
+ "claude-opus-4-6-thinking": 1000000,
8478
+ "glm-4.7": 200000,
7886
8479
  })
7887
8480
  except Exception:
7888
8481
  pass
@@ -7912,19 +8505,19 @@ class ChatCLI:
7912
8505
 
7913
8506
  # --------------------- Tier-aware defaults -------------------------
7914
8507
 
7915
- def _recommended_default_model(self) -> str:
7916
- """Return the tier-aware recommended default model.
8508
+ def _recommended_default_model(self) -> str:
8509
+ """Return the tier-aware recommended default model.
7917
8510
 
7918
- - Free-tier users: recommend Kimi k2-thinking (free-tier friendly reasoning model).
7919
- - All other users: recommend gpt-5.2 (best overall default).
7920
- When tier is unknown, fall back to gpt-5.2.
7921
- """
8511
+ - Free-tier users: recommend Kimi k2.5.
8512
+ - All other users: recommend gpt-5.2 (best overall default).
8513
+ When tier is unknown, fall back to gpt-5.2.
8514
+ """
7922
8515
  try:
7923
8516
  if bool(self.is_free_tier):
7924
- return "kimi-k2-thinking"
8517
+ return "kimi-k2.5"
7925
8518
  except Exception:
7926
8519
  pass
7927
- return "gpt-5.2"
8520
+ return "gpt-5.2"
7928
8521
 
7929
8522
  # --------------------- Onboarding and Welcome ---------------------
7930
8523
  async def _welcome_flow(self) -> None:
@@ -8155,6 +8748,57 @@ class ChatCLI:
8155
8748
  self.ui.print("Please select a default model for new chats.")
8156
8749
  await self.select_model_menu()
8157
8750
 
8751
+ async def _wizard_anthropic_effort_step(self) -> None:
8752
+ """First-time wizard: choose Anthropic effort (Opus 4.6/4.5 only).
8753
+
8754
+ Per opus4-6.txt:
8755
+ - default effort is "high"
8756
+ - effort "max" is Opus 4.6 only
8757
+ """
8758
+ try:
8759
+ model = str(self.model or "")
8760
+ except Exception:
8761
+ model = ""
8762
+ base = model[:-9] if model.endswith("-thinking") else model
8763
+ # Only prompt when it matters.
8764
+ if base not in ("claude-opus-4-6",):
8765
+ # Default behavior equals high.
8766
+ try:
8767
+ if not getattr(self, "anthropic_effort", None):
8768
+ self.anthropic_effort = "high"
8769
+ except Exception:
8770
+ self.anthropic_effort = "high"
8771
+ return
8772
+
8773
+ try:
8774
+ cur = str(getattr(self, "anthropic_effort", "high") or "high").strip().lower()
8775
+ except Exception:
8776
+ cur = "high"
8777
+ if cur not in ("low", "medium", "high", "max"):
8778
+ cur = "high"
8779
+
8780
+ choices: List[Tuple[str, str]] = [
8781
+ ("high", "High (default)"),
8782
+ ("medium", "Medium"),
8783
+ ("low", "Low"),
8784
+ ]
8785
+ if base == "claude-opus-4-6":
8786
+ choices.append(("max", "Max (Opus 4.6 only)"))
8787
+
8788
+ sel = await self._menu_choice(
8789
+ "Anthropic effort",
8790
+ "How thoroughly should Claude respond by default?",
8791
+ choices,
8792
+ )
8793
+ if sel in ("low", "medium", "high", "max"):
8794
+ # Guard: max is Opus 4.6 only
8795
+ if sel == "max" and base != "claude-opus-4-6":
8796
+ self.anthropic_effort = "high"
8797
+ else:
8798
+ self.anthropic_effort = sel
8799
+ else:
8800
+ self.anthropic_effort = cur or "high"
8801
+
8158
8802
  async def _wizard_agent_scope_step(self) -> None:
8159
8803
  """First-time wizard: choose Agent scope root and mode via menus.
8160
8804
 
@@ -8272,8 +8916,14 @@ class ChatCLI:
8272
8916
  "We’ll configure a few defaults. You can change these later via /settings.",
8273
8917
  )
8274
8918
 
8275
- # --- 1) Default model (menu, no Y/N) ---
8919
+ # --- 1) Default model (menu) ---
8276
8920
  await self._wizard_model_step()
8921
+ # If the picker was cancelled (or model still unset), choose a sensible default.
8922
+ if not self.model:
8923
+ self.model = self._recommended_default_model()
8924
+
8925
+ # --- 1b) Anthropic effort (Opus 4.6 / 4.5) ---
8926
+ await self._wizard_anthropic_effort_step()
8277
8927
 
8278
8928
  # --- 2) Tools (always ON per design) ---
8279
8929
  self.requested_tools = True
@@ -8284,8 +8934,8 @@ class ChatCLI:
8284
8934
  )
8285
8935
  await self.set_level_menu()
8286
8936
  if self.control_level not in (1, 2, 3):
8287
- # Default to Level 2 if user aborted
8288
- self.control_level = 2
8937
+ # Default to Level 3 if user aborted
8938
+ self.control_level = 3
8289
8939
 
8290
8940
  # --- 4) Agent scope (menus; only type on custom path) ---
8291
8941
  self.ui.print(
@@ -8336,30 +8986,45 @@ class ChatCLI:
8336
8986
  except Exception:
8337
8987
  curv = "medium"
8338
8988
 
8339
- verb_choice = await self._menu_choice(
8340
- "Text verbosity",
8341
- "How verbose should responses be by default?",
8342
- [
8989
+ verbosity_choices: List[Tuple[str, str]] = []
8990
+ if self._is_gpt_model(self.model):
8991
+ # Default-first: Low for GPT models.
8992
+ verbosity_choices = [
8343
8993
  ("low", "Low – short, to-the-point answers"),
8994
+ ("medium", "Medium – balanced detail"),
8995
+ ("high", "High – more verbose explanations"),
8996
+ ]
8997
+ else:
8998
+ # Default-first: Medium for non-GPT models; do not surface "Low".
8999
+ verbosity_choices = [
8344
9000
  ("medium", "Medium – balanced detail (recommended)"),
8345
9001
  ("high", "High – more verbose explanations"),
8346
- ],
9002
+ ]
9003
+
9004
+ verb_choice = await self._menu_choice(
9005
+ "Text verbosity",
9006
+ "How verbose should responses be by default?",
9007
+ verbosity_choices,
8347
9008
  )
8348
9009
  if verb_choice in ("low", "medium", "high"):
8349
9010
  self.text_verbosity = verb_choice
8350
9011
  else:
8351
9012
  self.text_verbosity = curv or "medium"
8352
9013
 
8353
- # --- 7) Tool preambles (menu) ---
8354
- preamble_choice = await self._menu_choice(
8355
- "Tool call preambles",
8356
- "Before using tools, the agent can briefly explain what it will do and why (supported models only).",
8357
- [
8358
- ("on", "Enable preambles"),
8359
- ("off", "Disable preambles (default)"),
8360
- ],
8361
- )
8362
- self.preambles_enabled = preamble_choice == "on"
9014
+ # --- 7) Tool preambles (GPT-5 only) ---
9015
+ if self._supports_preambles(self.model):
9016
+ preamble_choice = await self._menu_choice(
9017
+ "Tool call preambles",
9018
+ "Before using tools, the agent can briefly explain what it will do and why.",
9019
+ [
9020
+ ("off", "Disable preambles (default)"),
9021
+ ("on", "Enable preambles"),
9022
+ ],
9023
+ )
9024
+ self.preambles_enabled = preamble_choice == "on"
9025
+ else:
9026
+ # Never enable preambles on unsupported models.
9027
+ self.preambles_enabled = False
8363
9028
 
8364
9029
  # --- 8) Optional custom first-turn note (menu + text only when chosen) ---
8365
9030
  custom_choice = await self._menu_choice(
@@ -8433,7 +9098,8 @@ class ChatCLI:
8433
9098
  text = m.get("content", "")
8434
9099
  contents.append({"role": role, "parts": [{"text": text}]})
8435
9100
  # Pick a Gemini model for counting; fall back if current isn't Gemini
8436
- count_model = "gemini-2.5-pro"
9101
+ # (gemini-2.5-pro removed from curated lists)
9102
+ count_model = "gemini-3-flash-preview"
8437
9103
  res = client.models.count_tokens(model=count_model, contents=contents)
8438
9104
  t = int(getattr(res, "total_tokens", 0) or 0)
8439
9105
  if t > 0:
@@ -8491,9 +9157,9 @@ class ChatCLI:
8491
9157
  blocks.append(txt.strip())
8492
9158
  except Exception:
8493
9159
  pass
8494
- # Tool preamble
9160
+ # Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
8495
9161
  try:
8496
- if bool(getattr(self, "preambles_enabled", False)):
9162
+ if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
8497
9163
  blocks.append(
8498
9164
  "Tool usage: when you need to read or modify files or run commands, "
8499
9165
  "explicitly explain why you're using a tool, what you'll do, and how it "
@@ -8918,11 +9584,11 @@ class ChatCLI:
8918
9584
  await self._ws_broadcast("warning", {"message": f"Unknown inbound type: {mtype}"})
8919
9585
 
8920
9586
  # Handle approval request: first reply wins (web or CLI), then POST to server
8921
- async def _handle_approval_request(self, client: httpx.AsyncClient, session_id: Optional[str], data: Dict[str, Any]) -> None:
8922
- tool = str(data.get("tool"))
8923
- call_id = data.get("call_id")
8924
- args_prev = data.get("args_preview", {}) or {}
8925
- timeout_sec = int(data.get("timeout_sec", 60) or 60)
9587
+ async def _handle_approval_request(self, client: httpx.AsyncClient, session_id: Optional[str], data: Dict[str, Any]) -> None:
9588
+ tool = str(data.get("tool"))
9589
+ call_id = data.get("call_id")
9590
+ args_prev = data.get("args_preview", {}) or {}
9591
+ timeout_sec = int(data.get("timeout_sec", 60) or 60)
8926
9592
  # Display summary
8927
9593
  self.ui.print(f"⚠ Approval requested for {tool} (call_id={call_id})", style=self.ui.theme["warn"])
8928
9594
  self.ui.print(truncate_json(args_prev, 600), style=self.ui.theme["dim"])
@@ -8947,92 +9613,92 @@ class ChatCLI:
8947
9613
 
8948
9614
  # Run blocking CLI prompt in thread to avoid blocking event loop
8949
9615
  loop = asyncio.get_event_loop()
8950
- def prompt_cli() -> Tuple[bool, str, Optional[str], Optional[str]]:
8951
- """Return (approved, note, remember, remember_key)."""
8952
- try:
8953
- # Prefer the richer approve-once/session/always UX at L2.
8954
- try:
8955
- lvl = data.get("level")
8956
- lvl_i = int(lvl) if isinstance(lvl, int) or (isinstance(lvl, str) and str(lvl).strip().isdigit()) else None
8957
- except Exception:
8958
- lvl_i = None
8959
-
8960
- if lvl_i == 2:
8961
- t = str(tool or "").strip().lower()
8962
- remember_key = None
8963
- label = t
8964
- if t == "run_command":
8965
- try:
8966
- cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
8967
- except Exception:
8968
- cmd = None
8969
- base = self._base_command(cmd) if cmd is not None else ""
8970
- if base:
8971
- label = f"run_command:{base}"
8972
- remember_key = base
8973
- else:
8974
- label = "run_command"
8975
- else:
8976
- remember_key = t
8977
-
8978
- choice = self._approval_prompt_ui(label, args_prev if isinstance(args_prev, dict) else {})
8979
- if choice == "deny":
8980
- return False, "Denied via CLI", None, remember_key
8981
-
8982
- # Approved; update local trust registries immediately.
8983
- try:
8984
- if t == "run_command" and remember_key:
8985
- if choice == "session":
8986
- if remember_key not in self.trust_cmds_session:
8987
- self.trust_cmds_session.append(remember_key)
8988
- elif choice == "always":
8989
- if remember_key not in self.trust_cmds_always:
8990
- self.trust_cmds_always.append(remember_key)
8991
- self.save_settings()
8992
- elif t in {"write_file", "append_file", "edit_file", "apply_patch", "string_replace"}:
8993
- if choice == "session":
8994
- if t not in self.trust_tools_session:
8995
- self.trust_tools_session.append(t)
8996
- elif choice == "always":
8997
- if t not in self.trust_tools_always:
8998
- self.trust_tools_always.append(t)
8999
- self.save_settings()
9000
- except Exception:
9001
- pass
9002
-
9003
- remember = choice if choice in ("session", "always") else "once"
9004
- return True, "Approved via CLI", remember, remember_key
9005
-
9006
- # Fallback: simple yes/no confirmation.
9007
- default_yes = True if str(tool).strip() == "context.summarize" else False
9008
- prompt = f"Approve {tool} (timeout in {timeout_sec}s)?"
9009
- try:
9010
- if str(tool).strip().lower() == "run_command":
9011
- cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
9012
- if isinstance(cmd, str) and cmd.strip():
9013
- prompt = f"Approve run_command: {self._clip(cmd, 120)} (timeout in {timeout_sec}s)?"
9014
- except Exception:
9015
- pass
9016
- approved = self.ui.confirm(prompt, default=default_yes)
9017
- return bool(approved), ("Approved via CLI" if approved else "Denied via CLI"), None, None
9018
- except Exception:
9019
- return False, "Denied via CLI (error)", None, None
9616
+ def prompt_cli() -> Tuple[bool, str, Optional[str], Optional[str]]:
9617
+ """Return (approved, note, remember, remember_key)."""
9618
+ try:
9619
+ # Prefer the richer approve-once/session/always UX at L2.
9620
+ try:
9621
+ lvl = data.get("level")
9622
+ lvl_i = int(lvl) if isinstance(lvl, int) or (isinstance(lvl, str) and str(lvl).strip().isdigit()) else None
9623
+ except Exception:
9624
+ lvl_i = None
9625
+
9626
+ if lvl_i == 2:
9627
+ t = str(tool or "").strip().lower()
9628
+ remember_key = None
9629
+ label = t
9630
+ if t == "run_command":
9631
+ try:
9632
+ cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
9633
+ except Exception:
9634
+ cmd = None
9635
+ base = self._base_command(cmd) if cmd is not None else ""
9636
+ if base:
9637
+ label = f"run_command:{base}"
9638
+ remember_key = base
9639
+ else:
9640
+ label = "run_command"
9641
+ else:
9642
+ remember_key = t
9643
+
9644
+ choice = self._approval_prompt_ui(label, args_prev if isinstance(args_prev, dict) else {})
9645
+ if choice == "deny":
9646
+ return False, "Denied via CLI", None, remember_key
9647
+
9648
+ # Approved; update local trust registries immediately.
9649
+ try:
9650
+ if t == "run_command" and remember_key:
9651
+ if choice == "session":
9652
+ if remember_key not in self.trust_cmds_session:
9653
+ self.trust_cmds_session.append(remember_key)
9654
+ elif choice == "always":
9655
+ if remember_key not in self.trust_cmds_always:
9656
+ self.trust_cmds_always.append(remember_key)
9657
+ self.save_settings()
9658
+ elif t in {"write_file", "append_file", "edit_file", "apply_patch", "string_replace"}:
9659
+ if choice == "session":
9660
+ if t not in self.trust_tools_session:
9661
+ self.trust_tools_session.append(t)
9662
+ elif choice == "always":
9663
+ if t not in self.trust_tools_always:
9664
+ self.trust_tools_always.append(t)
9665
+ self.save_settings()
9666
+ except Exception:
9667
+ pass
9668
+
9669
+ remember = choice if choice in ("session", "always") else "once"
9670
+ return True, "Approved via CLI", remember, remember_key
9671
+
9672
+ # Fallback: simple yes/no confirmation.
9673
+ default_yes = True if str(tool).strip() == "context.summarize" else False
9674
+ prompt = f"Approve {tool} (timeout in {timeout_sec}s)?"
9675
+ try:
9676
+ if str(tool).strip().lower() == "run_command":
9677
+ cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
9678
+ if isinstance(cmd, str) and cmd.strip():
9679
+ prompt = f"Approve run_command: {self._clip(cmd, 120)} (timeout in {timeout_sec}s)?"
9680
+ except Exception:
9681
+ pass
9682
+ approved = self.ui.confirm(prompt, default=default_yes)
9683
+ return bool(approved), ("Approved via CLI" if approved else "Denied via CLI"), None, None
9684
+ except Exception:
9685
+ return False, "Denied via CLI (error)", None, None
9020
9686
 
9021
9687
  cli_task = loop.run_in_executor(None, prompt_cli)
9022
9688
 
9023
- decided: Optional[Tuple[Any, ...]] = None
9689
+ decided: Optional[Tuple[Any, ...]] = None
9024
9690
  try:
9025
9691
  done, pending = await asyncio.wait({fut, asyncio.ensure_future(cli_task)}, timeout=timeout_sec, return_when=asyncio.FIRST_COMPLETED)
9026
9692
  if fut in done and not fut.cancelled():
9027
9693
  try:
9028
- decided = fut.result()
9029
- except Exception:
9030
- decided = (False, "Denied via Web (error)")
9031
- elif cli_task in done: # type: ignore
9032
- try:
9033
- decided = await cli_task # type: ignore
9034
- except Exception:
9035
- decided = (False, "Denied via CLI (error)")
9694
+ decided = fut.result()
9695
+ except Exception:
9696
+ decided = (False, "Denied via Web (error)")
9697
+ elif cli_task in done: # type: ignore
9698
+ try:
9699
+ decided = await cli_task # type: ignore
9700
+ except Exception:
9701
+ decided = (False, "Denied via CLI (error)")
9036
9702
  # If web future not decided, set it so we can cleanly proceed
9037
9703
  if not fut.done():
9038
9704
  try:
@@ -9052,45 +9718,45 @@ class ChatCLI:
9052
9718
  if call_id is not None:
9053
9719
  self._pending_approvals.pop(str(call_id), None)
9054
9720
 
9055
- # Normalize decision tuple to (approved, note, remember, remember_key)
9056
- approved = False
9057
- note = ""
9058
- remember = None
9059
- remember_key = None
9060
- try:
9061
- if decided is None:
9062
- approved, note = False, ""
9063
- elif isinstance(decided, tuple) and len(decided) >= 4:
9064
- approved, note, remember, remember_key = decided[0], decided[1], decided[2], decided[3]
9065
- elif isinstance(decided, tuple) and len(decided) >= 2:
9066
- approved, note = decided[0], decided[1]
9067
- else:
9068
- approved, note = bool(decided), ""
9069
- except Exception:
9070
- approved, note = False, ""
9071
-
9072
- # Post decision to server
9073
- if session_id:
9074
- try:
9075
- payload = {
9076
- "session_id": session_id,
9077
- "call_id": call_id,
9078
- "approve": bool(approved),
9079
- "note": note,
9080
- }
9081
- # Optional remember semantics (used to suppress repeat approvals within the current stream).
9082
- try:
9083
- if bool(approved) and remember in ("session", "always"):
9084
- payload["remember"] = remember
9085
- if remember_key:
9086
- payload["remember_key"] = str(remember_key)
9087
- except Exception:
9088
- pass
9089
- r = await client.post(self.approvals_url, json=payload, timeout=self.timeout)
9090
- if r.status_code >= 400:
9091
- self.ui.warn(f"Approval POST failed: {r.status_code} {r.text}")
9092
- except Exception as e:
9093
- self.ui.warn(f"Approval POST error: {e}")
9721
+ # Normalize decision tuple to (approved, note, remember, remember_key)
9722
+ approved = False
9723
+ note = ""
9724
+ remember = None
9725
+ remember_key = None
9726
+ try:
9727
+ if decided is None:
9728
+ approved, note = False, ""
9729
+ elif isinstance(decided, tuple) and len(decided) >= 4:
9730
+ approved, note, remember, remember_key = decided[0], decided[1], decided[2], decided[3]
9731
+ elif isinstance(decided, tuple) and len(decided) >= 2:
9732
+ approved, note = decided[0], decided[1]
9733
+ else:
9734
+ approved, note = bool(decided), ""
9735
+ except Exception:
9736
+ approved, note = False, ""
9737
+
9738
+ # Post decision to server
9739
+ if session_id:
9740
+ try:
9741
+ payload = {
9742
+ "session_id": session_id,
9743
+ "call_id": call_id,
9744
+ "approve": bool(approved),
9745
+ "note": note,
9746
+ }
9747
+ # Optional remember semantics (used to suppress repeat approvals within the current stream).
9748
+ try:
9749
+ if bool(approved) and remember in ("session", "always"):
9750
+ payload["remember"] = remember
9751
+ if remember_key:
9752
+ payload["remember_key"] = str(remember_key)
9753
+ except Exception:
9754
+ pass
9755
+ r = await client.post(self.approvals_url, json=payload, timeout=self.timeout)
9756
+ if r.status_code >= 400:
9757
+ self.ui.warn(f"Approval POST failed: {r.status_code} {r.text}")
9758
+ except Exception as e:
9759
+ self.ui.warn(f"Approval POST error: {e}")
9094
9760
  async def amain():
9095
9761
  args = build_arg_parser().parse_args()
9096
9762
  # Set global debug flags from args
@@ -9166,143 +9832,6 @@ async def amain():
9166
9832
  await cli.run()
9167
9833
 
9168
9834
 
9169
- # --- UX Hotfix: Replace menu UI with highlighted cursor picker (no radio buttons) ---
9170
- # The default RadioList menu can be confusing and, on some terminals, non-interactive.
9171
- # We override ChatCLI._menu_choice at runtime with a prompt_toolkit-based list that shows
9172
- # a highlighted bar for the current item; Enter selects; Esc cancels. Falls back to
9173
- # numeric selection when prompt_toolkit is unavailable.
9174
-
9175
- async def _menu_choice_highlight(self, title: str, text: str, choices: list[tuple[str, str]]): # type: ignore
9176
- if HAS_PT and Application and Layout and HSplit and Window and FormattedTextControl and Style and KeyBindings:
9177
- try:
9178
- items = [(val, str(label)) for (val, label) in choices]
9179
- index = 0
9180
- blink_on = [True]
9181
-
9182
- def _lines():
9183
- out = []
9184
- if title:
9185
- out.append(("class:menu.title", f"{title}\n"))
9186
- for i, (_v, _lbl) in enumerate(items):
9187
- if i == index:
9188
- arrow = ">" if blink_on[0] else " "
9189
- out.append(("class:menu.item.selected", f" {arrow} {_lbl}\n"))
9190
- else:
9191
- out.append(("class:menu.item", f" {_lbl}\n"))
9192
- out.append(("class:menu.status", f"({index+1}/{len(items)})"))
9193
- return out
9194
-
9195
- body = FormattedTextControl(_lines)
9196
- hint = FormattedTextControl(lambda: text or "Use ↑/↓, Enter=select, Esc=cancel")
9197
- root = HSplit([
9198
- Window(height=1, content=hint, style="class:menu.hint"),
9199
- Window(content=body),
9200
- ])
9201
- kb = KeyBindings()
9202
-
9203
- @kb.add("up")
9204
- def _up(event):
9205
- nonlocal index
9206
- index = (index - 1) % len(items)
9207
- event.app.invalidate()
9208
-
9209
- @kb.add("down")
9210
- def _down(event):
9211
- nonlocal index
9212
- index = (index + 1) % len(items)
9213
- event.app.invalidate()
9214
-
9215
- @kb.add("pageup")
9216
- def _pgup(event):
9217
- nonlocal index
9218
- index = max(0, index - 7)
9219
- event.app.invalidate()
9220
-
9221
- @kb.add("pagedown")
9222
- def _pgdn(event):
9223
- nonlocal index
9224
- index = min(len(items) - 1, index + 7)
9225
- event.app.invalidate()
9226
-
9227
- @kb.add("home")
9228
- def _home(event):
9229
- nonlocal index
9230
- index = 0
9231
- event.app.invalidate()
9232
-
9233
- @kb.add("end")
9234
- def _end(event):
9235
- nonlocal index
9236
- index = len(items) - 1
9237
- event.app.invalidate()
9238
-
9239
- @kb.add("enter")
9240
- def _enter(event):
9241
- event.app.exit(result=items[index][0])
9242
-
9243
- @kb.add("escape")
9244
- def _esc(event):
9245
- event.app.exit(result=None)
9246
-
9247
- style = Style.from_dict({
9248
- "menu.title": "bold",
9249
- "menu.hint": "fg:#888888",
9250
- "menu.status": "fg:#ff8700",
9251
- "menu.item": "",
9252
- # Bright highlighted selection; blink may be ignored on some terminals
9253
- "menu.item.selected": "fg:#ff8700 reverse",
9254
- })
9255
-
9256
- app = Application(layout=Layout(root), key_bindings=kb, style=style, full_screen=False)
9257
-
9258
- async def _blinker():
9259
- while True:
9260
- await asyncio.sleep(0.6)
9261
- try:
9262
- blink_on[0] = not blink_on[0]
9263
- get_app().invalidate()
9264
- except Exception:
9265
- break
9266
-
9267
- try:
9268
- asyncio.create_task(_blinker())
9269
- except Exception:
9270
- pass
9271
-
9272
- return await app.run_async()
9273
- except Exception:
9274
- pass
9275
- # Fallback: numeric list
9276
- self.ui.header(title, text)
9277
- for i, (_, label) in enumerate(choices, start=1):
9278
- style = None
9279
- try:
9280
- lbl = str(label)
9281
- if ("VERY expensive" in lbl) or ("[DANGER]" in lbl) or ("!!!" in lbl and "expensive" in lbl.lower()):
9282
- style = self.ui.theme.get("err")
9283
- except Exception:
9284
- style = None
9285
- self.ui.print(f"{i}. {label}", style=style)
9286
- self.ui.print()
9287
- while True:
9288
- raw = input("Choose an option: ").strip()
9289
- if raw.lower() in ("q", "quit", "exit"):
9290
- return None
9291
- if not raw.isdigit():
9292
- self.ui.warn("Enter a number from the list.")
9293
- continue
9294
- idx = int(raw)
9295
- if not (1 <= idx <= len(choices)):
9296
- self.ui.warn("Invalid selection.")
9297
- continue
9298
- return choices[idx - 1][0]
9299
-
9300
- # Monkey-patch the method onto ChatCLI
9301
- try:
9302
- ChatCLI._menu_choice = _menu_choice_highlight # type: ignore[attr-defined]
9303
- except Exception:
9304
- pass
9305
-
9306
9835
  # --- UX Hotfix v2: dependency-free highlighted menus (Enter selects) ---
9307
9836
  # This override ensures the settings menu works without RadioList and that Enter
9308
9837
  # activates the currently highlighted option even when prompt_toolkit is absent.