henosis-cli 0.6.7__py3-none-any.whl → 0.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli.py CHANGED
@@ -6,11 +6,12 @@
6
6
  # - Preserves previous behavior and settings
7
7
  # - Injects CODEBASE_MAP.md into the first user message (wrapped in <codebase_map>) without manual trimming.
8
8
 
9
- import argparse
10
- import asyncio
11
- import json
12
- import os
13
- import sys
9
+ import argparse
10
+ import asyncio
11
+ import copy
12
+ import json
13
+ import os
14
+ import sys
14
15
  import socket
15
16
  import shutil
16
17
  from pathlib import Path
@@ -683,8 +684,8 @@ class UI:
683
684
  for n, ty, sz in rows:
684
685
  print(f"{n:<40} {ty:<8} {sz}")
685
686
 
686
- class ChatCLI:
687
- def __init__(
687
+ class ChatCLI:
688
+ def __init__(
688
689
  self,
689
690
  server: str,
690
691
  model: Optional[str],
@@ -997,8 +998,25 @@ class ChatCLI:
997
998
  }
998
999
  # Track last used model for display
999
1000
  self._last_used_model: Optional[str] = None
1000
- # Provider-native history for Kimi (preserve reasoning_content across turns)
1001
- self._kimi_raw_history: List[Dict[str, Any]] = []
1001
+ # Provider-native history for Kimi (preserve reasoning_content across turns)
1002
+ self._kimi_raw_history: List[Dict[str, Any]] = []
1003
+ # Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
1004
+ self._gemini_raw_history: List[Dict[str, Any]] = []
1005
+ # OpenAI Responses API threading: retain previous response id across turns
1006
+ self._openai_previous_response_id: Optional[str] = None
1007
+ # OpenAI Responses API threading: retain the full chain of response ids across turns
1008
+ # (server will also echo per-turn ids in message.completed.openai_response_ids)
1009
+ self._openai_response_id_history: List[str] = []
1010
+
1011
+ # OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
1012
+ # including reasoning items, function_call items, and function_call_output items.
1013
+ self._openai_input_items: List[Dict[str, Any]] = []
1014
+ # For robustness, remember exactly what we sent as openai_input_items for the current turn
1015
+ # so we can append server-provided openai_delta_items deterministically.
1016
+ self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
1017
+ # Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
1018
+ # Shape: {session_id, call_id, job_token, name}
1019
+ self._inflight_dispatch: Optional[Dict[str, Any]] = None
1002
1020
  # Last server billing info from /api/usage/commit
1003
1021
  self._last_commit_cost_usd: float = 0.0
1004
1022
  self._last_remaining_credits: Optional[float] = None
@@ -1049,8 +1067,14 @@ class ChatCLI:
1049
1067
  self._thinking_indicator_enabled = True
1050
1068
  except Exception:
1051
1069
  self._thinking_indicator_enabled = True
1052
- # Track Ctrl+C timing for double-press-to-exit behavior
1053
- self._last_interrupt_ts: Optional[float] = None
1070
+ # Track Ctrl+C timing for double-press-to-exit behavior
1071
+ self._last_interrupt_ts: Optional[float] = None
1072
+
1073
+ # Ctrl+C during a running stream should not kill the entire CLI.
1074
+ # Instead, we cancel the in-flight turn and reopen the last user query for editing.
1075
+ # NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
1076
+ self._pending_user_edit: Optional[str] = None
1077
+ self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
1054
1078
 
1055
1079
  # Timers: session-level and per-turn wall-clock timers
1056
1080
  self._session_started_at: Optional[float] = None # time.perf_counter() at session start
@@ -1091,7 +1115,7 @@ class ChatCLI:
1091
1115
  self._pt_session = None
1092
1116
 
1093
1117
  # ----------------------- Provider heuristics -----------------------
1094
- def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
1118
+ def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
1095
1119
  """Return True when the model is an OpenAI reasoning-capable model.
1096
1120
  Mirrors server-side heuristic: prefixes 'gpt-5' or 'o4'.
1097
1121
  """
@@ -1342,6 +1366,9 @@ class ChatCLI:
1342
1366
  return {
1343
1367
  # OpenAI
1344
1368
  "gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
1369
+ # New: gpt-5.2-codex
1370
+ # Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
1371
+ "gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
1345
1372
  # From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
1346
1373
  "gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
1347
1374
  "gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
@@ -1359,8 +1386,7 @@ class ChatCLI:
1359
1386
  # New Opus 4.5 (provider base $5/$25 with 1.4x margin -> $7.00/$35.00)
1360
1387
  "claude-opus-4-5-20251101": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
1361
1388
  "claude-opus-4-5-20251101-thinking": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
1362
- # Gemini
1363
- "gemini-2.5-pro": {"input": 1.75, "output": 14.00, "provider": "gemini"},
1389
+ # Gemini
1364
1390
  # Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
1365
1391
  "gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
1366
1392
  # Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
@@ -1375,15 +1401,15 @@ class ChatCLI:
1375
1401
  # DeepSeek V3.2 (+$0.25 per 1M margin)
1376
1402
  "deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1377
1403
  "deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1378
- "deepseek-3.2-speciale": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1404
+ # Removed: deepseek speciale (not supported)
1379
1405
  # Kimi
1380
1406
  "kimi-k2-0905-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1381
1407
  "kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1382
1408
  "kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1383
- # GLM (Z.AI)
1384
- # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
1385
- "glm-4.6": {"input": 0.84, "output": 3.08, "provider": "glm"},
1386
- }
1409
+ # GLM (Z.AI)
1410
+ # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
1411
+ "glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
1412
+ }
1387
1413
 
1388
1414
  def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
1389
1415
  if not model:
@@ -1431,6 +1457,15 @@ class ChatCLI:
1431
1457
  except Exception:
1432
1458
  model_name = ""
1433
1459
  try:
1460
+ # Provider-native state resets when switching away from OpenAI.
1461
+ try:
1462
+ if self.model and (not self._is_openai_model(self.model)):
1463
+ self._openai_previous_response_id = None
1464
+ self._openai_response_id_history = []
1465
+ self._openai_input_items = []
1466
+ self._openai_last_sent_input_items = None
1467
+ except Exception:
1468
+ pass
1434
1469
  if model_name in {"gpt-5.2-pro"}:
1435
1470
  # Default these to high, but don't clobber a user-chosen xhigh.
1436
1471
  if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
@@ -1438,11 +1473,138 @@ class ChatCLI:
1438
1473
  # Codex family: disable preambles for better behavior
1439
1474
  if "codex" in model_name:
1440
1475
  self.preambles_enabled = False
1476
+ # Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
1477
+ # Force-disable for all other models (even if a saved setting had it enabled).
1478
+ if not self._supports_preambles(self.model):
1479
+ self.preambles_enabled = False
1441
1480
  except Exception:
1442
1481
  try:
1443
1482
  self.reasoning_effort = "high"
1444
1483
  except Exception:
1445
1484
  pass
1485
+
1486
+ def _supports_preambles(self, model: Optional[str]) -> bool:
1487
+ """Tool-call preambles are a CLI-only UX hint.
1488
+
1489
+ Requirement: disabled for all models except GPT-5 (base model; non-Codex).
1490
+ In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
1491
+ """
1492
+ try:
1493
+ if not model:
1494
+ return False
1495
+ m = str(model).strip().lower()
1496
+ # Only the base GPT-5 line supports this UX toggle.
1497
+ # Allow:
1498
+ # - "gpt-5"
1499
+ # - date-pinned variants like "gpt-5-2025-08-07"
1500
+ # Disallow:
1501
+ # - versioned families like "gpt-5.1*" / "gpt-5.2*"
1502
+ if not (m == "gpt-5" or m.startswith("gpt-5-")):
1503
+ return False
1504
+ if "codex" in m:
1505
+ return False
1506
+ return True
1507
+ except Exception:
1508
+ return False
1509
+
1510
+ def _is_openai_model(self, model: Optional[str]) -> bool:
1511
+ """Best-effort model/provider discriminator for client-side state.
1512
+
1513
+ The server is multi-provider. For the CLI we treat anything that isn't an explicit
1514
+ non-OpenAI provider prefix as OpenAI.
1515
+ """
1516
+ try:
1517
+ if not model:
1518
+ return False
1519
+ m = str(model).strip().lower()
1520
+ if not m:
1521
+ return False
1522
+ for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
1523
+ if m.startswith(pfx):
1524
+ return False
1525
+ # Everything else defaults to OpenAI in this repo.
1526
+ return True
1527
+ except Exception:
1528
+ return False
1529
+
1530
+ def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
1531
+ """Whether this provider has an implemented native tool/thinking retention path."""
1532
+ try:
1533
+ if not model:
1534
+ return False
1535
+ m = str(model).strip().lower()
1536
+ if m.startswith("gemini-"):
1537
+ return True
1538
+ if m.startswith("kimi-"):
1539
+ return bool(getattr(self, "retain_native_tool_results", False))
1540
+ if self._is_openai_model(model):
1541
+ return True
1542
+ return False
1543
+ except Exception:
1544
+ return False
1545
+
1546
+ def _sanitize_openai_items(self, items: Any) -> Any:
1547
+ """Recursively strip fields from OpenAI output items that cause errors when used as input."""
1548
+ if isinstance(items, list):
1549
+ return [self._sanitize_openai_items(x) for x in items]
1550
+ if isinstance(items, dict):
1551
+ # 'status' is the main offender causing 400s
1552
+ bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
1553
+ return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
1554
+ return items
1555
+
1556
+ async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
1557
+ """If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
1558
+
1559
+ This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
1560
+ Best-effort; never raises.
1561
+ """
1562
+ ctx = None
1563
+ try:
1564
+ ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
1565
+ except Exception:
1566
+ ctx = None
1567
+ if not ctx:
1568
+ return
1569
+ session_id = ctx.get("session_id")
1570
+ call_id = ctx.get("call_id")
1571
+ job_token = ctx.get("job_token")
1572
+ name = ctx.get("name")
1573
+ if not (session_id and call_id and job_token):
1574
+ return
1575
+ payload_cb = {
1576
+ "session_id": session_id,
1577
+ "call_id": call_id,
1578
+ "name": name,
1579
+ "job_token": job_token,
1580
+ "result": {
1581
+ "ok": False,
1582
+ "cancelled": True,
1583
+ "error": str(reason or "cancelled"),
1584
+ },
1585
+ }
1586
+ try:
1587
+ # Keep it short; we just want to unblock the server.
1588
+ http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
1589
+ except Exception:
1590
+ http_timeout = None
1591
+ try:
1592
+ async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
1593
+ await client.post(self.tools_callback_url, json=payload_cb)
1594
+ except Exception:
1595
+ pass
1596
+ finally:
1597
+ try:
1598
+ self._inflight_dispatch = None
1599
+ except Exception:
1600
+ pass
1601
+
1602
+ def _is_gpt_model(self, model: Optional[str]) -> bool:
1603
+ """True for OpenAI GPT models (used for showing certain UI-only toggles)."""
1604
+ try:
1605
+ return bool(model) and str(model).strip().lower().startswith("gpt-")
1606
+ except Exception:
1607
+ return False
1446
1608
  def _is_codex_model(self, model: Optional[str]) -> bool:
1447
1609
  try:
1448
1610
  return bool(model) and ("codex" in str(model).lower())
@@ -1955,14 +2117,15 @@ class ChatCLI:
1955
2117
  pass
1956
2118
  return data
1957
2119
 
1958
- def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
1959
- try:
2120
+ def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
2121
+ try:
2122
+ old_system_prompt = getattr(self, "system_prompt", None)
1960
2123
  self.model = data.get("model", self.model)
1961
2124
  if "save_chat_history" in data:
1962
2125
  try:
1963
2126
  self.save_chat_history = bool(data.get("save_chat_history"))
1964
2127
  except Exception:
1965
- pass
2128
+ pass
1966
2129
  self.requested_tools = data.get("requested_tools", self.requested_tools)
1967
2130
  self.fs_scope = data.get("fs_scope", self.fs_scope)
1968
2131
  self.host_base = data.get("host_base", self.host_base)
@@ -2101,18 +2264,38 @@ class ChatCLI:
2101
2264
  self.anthropic_cache_ttl = None
2102
2265
  except Exception:
2103
2266
  pass
2104
- # Rebuild history if system prompt changed
2105
- self.history = []
2106
- if self.system_prompt:
2107
- self.history.append({"role": "system", "content": self.system_prompt})
2108
- # On settings load, do not assume the custom first-turn was injected yet
2109
- try:
2110
- self._did_inject_custom_first_turn = False
2111
- except Exception:
2112
- pass
2113
- self._apply_model_side_effects()
2114
- except Exception as e:
2115
- self.ui.warn(f"Failed to apply settings: {e}")
2267
+ # Rebuild history if system prompt changed
2268
+ try:
2269
+ system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
2270
+ except Exception:
2271
+ system_prompt_changed = False
2272
+
2273
+ if system_prompt_changed:
2274
+ # Changing the system prompt can materially alter the behavior of the assistant;
2275
+ # warn the user and reset the current conversation history to avoid mixing contexts.
2276
+ try:
2277
+ self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
2278
+ except Exception:
2279
+ pass
2280
+ self.history = []
2281
+ if self.system_prompt:
2282
+ self.history.append({"role": "system", "content": self.system_prompt})
2283
+ # OpenAI threaded state is invalid once the system prompt changes.
2284
+ try:
2285
+ self._openai_previous_response_id = None
2286
+ self._openai_response_id_history = []
2287
+ self._openai_input_items = []
2288
+ self._openai_last_sent_input_items = None
2289
+ except Exception:
2290
+ pass
2291
+ # On settings load, do not assume the custom first-turn was injected yet
2292
+ try:
2293
+ self._did_inject_custom_first_turn = False
2294
+ except Exception:
2295
+ pass
2296
+ self._apply_model_side_effects()
2297
+ except Exception as e:
2298
+ self.ui.warn(f"Failed to apply settings: {e}")
2116
2299
 
2117
2300
  async def _fetch_server_settings(self) -> Optional[Dict[str, Any]]:
2118
2301
  try:
@@ -2492,30 +2675,28 @@ class ChatCLI:
2492
2675
 
2493
2676
  def _model_presets(self) -> List[Tuple[str, str]]:
2494
2677
  """Shared list of (model, label) used by settings UI and /model menu."""
2678
+ # Ordered in "feelings" order (Recommended first, then Others).
2679
+ # NOTE: We intentionally do not include a "server default" or "custom" option here.
2495
2680
  return [
2681
+ # Recommended
2496
2682
  ("gpt-5.2", "OpenAI: gpt-5.2"),
2497
- ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
2683
+ ("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
2498
2684
  ("gpt-5", "OpenAI: gpt-5"),
2685
+ ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
2686
+ ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
2687
+ ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
2688
+ ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
2689
+ ("grok-code-fast-1", "xAI: grok-code-fast-1"),
2690
+
2691
+ # Others
2692
+ ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
2499
2693
  ("gpt-5-codex", "OpenAI: gpt-5-codex"),
2500
2694
  ("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
2501
- ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
2502
2695
  ("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
2503
- ("deepseek-3.2-speciale", "DeepSeek: deepseek 3.2 Speciale (no tools)"),
2504
- ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
2505
- ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
2506
- ("gemini-2.5-pro", "Gemini: gemini-2.5-pro"),
2507
- ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
2508
- ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
2509
- ("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
2510
- ("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
2511
- ("grok-4", "xAI: grok-4"),
2512
- ("grok-code-fast-1", "xAI: grok-code-fast-1"),
2513
- ("claude-sonnet-4-5-20250929", "Anthropic: claude-sonnet-4-5-20250929 (thinking OFF)"),
2514
- ("claude-sonnet-4-5-20250929-thinking", "Anthropic: claude-sonnet-4-5-20250929 (thinking ON)"),
2515
- ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
2516
- ("claude-opus-4-5-20251101-thinking", "Anthropic: claude-opus-4-5-20251101 (thinking ON)"),
2517
- ("glm-4.6", "GLM: glm-4.6"),
2518
- ]
2696
+ ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
2697
+ ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
2698
+ ("glm-4.7", "GLM: glm-4.7"),
2699
+ ]
2519
2700
 
2520
2701
  async def open_settings(self, focus: Optional[str] = None) -> None:
2521
2702
  """Open the new dependency-free settings UI. Falls back to legacy only when
@@ -2569,37 +2750,33 @@ class ChatCLI:
2569
2750
  }
2570
2751
  initial = self._collect_settings_dict()
2571
2752
 
2572
- # Model presets list (shared)
2573
- model_presets: List[Tuple[str, str]] = self._model_presets()
2574
- # Reorder with a Recommended section at the top. Avoid decorative symbols; instead,
2575
- # annotate recommended models with plain text for clarity.
2576
- # Recommended set per request: opus 4-5 (no thinking), gemini 3, gpt 5, kimi k2 thinking,
2577
- # grok code fast 1, and deepseek reasoner 3.2
2578
- rec_keys = {
2579
- "deepseek-reasoner-3.2",
2580
- "claude-opus-4-5-20251101",
2753
+ # Model presets list (shared)
2754
+ model_presets: List[Tuple[str, str]] = self._model_presets()
2755
+
2756
+ # Reorder with a Recommended section at the top.
2757
+ # IMPORTANT: remove "server default" and "custom" from Settings UI.
2758
+ rec_keys_ordered = [
2759
+ "gpt-5.2",
2760
+ "gpt-5.2-codex",
2761
+ "gpt-5",
2581
2762
  "gemini-3-pro-preview",
2582
2763
  "gemini-3-flash-preview",
2583
- "gpt-5",
2584
- "gpt-5.2",
2764
+ "claude-opus-4-5-20251101",
2585
2765
  "kimi-k2-thinking",
2586
2766
  "grok-code-fast-1",
2587
- }
2588
- rec_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m in rec_keys]
2589
- other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_keys]
2590
- # Build enum options in the order: Server default, Recommended, Others, Custom
2591
- model_enum_options: List[Optional[str]] = [None] + [m for (m, _l) in rec_list] + [m for (m, _l) in other_list] + ["custom"]
2592
- # Build render map without any star/marker characters; use a simple "(recommended)" suffix
2593
- # for recommended models EXCEPT DeepSeek Reasoner 3.2, which should not display the suffix.
2594
- render_map: Dict[Any, str] = {None: "Server default"}
2767
+ ]
2768
+ rec_set = set(rec_keys_ordered)
2769
+ preset_map = {m: lbl for (m, lbl) in model_presets}
2770
+ rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
2771
+ other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
2772
+
2773
+ # Build enum options in the order: Recommended, Others
2774
+ model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
2775
+ render_map: Dict[Any, str] = {}
2595
2776
  for m, lbl in rec_list:
2596
- if m == "deepseek-reasoner-3.2":
2597
- render_map[m] = lbl
2598
- else:
2599
- render_map[m] = f"{lbl} (recommended)"
2600
- for m, lbl in other_list:
2601
- render_map[m] = lbl
2602
- render_map["custom"] = "Custom..."
2777
+ render_map[m] = lbl
2778
+ for m, lbl in other_list:
2779
+ render_map[m] = lbl
2603
2780
 
2604
2781
  # Build items schema
2605
2782
  items: List[Dict[str, Any]] = [
@@ -2626,14 +2803,16 @@ class ChatCLI:
2626
2803
  "id": "requested_tools",
2627
2804
  "label": "Tools",
2628
2805
  "type": "enum",
2629
- "options": [None, True, False],
2806
+ # Default-first: ON, then OFF, then server default.
2807
+ "options": [True, False, None],
2630
2808
  "render": {None: "Server default", True: "ON", False: "OFF"},
2631
2809
  },
2632
2810
  {
2633
2811
  "id": "control_level",
2634
2812
  "label": "Control level",
2635
2813
  "type": "enum",
2636
- "options": [None, 1, 2, 3],
2814
+ # Default-first: Level 3, then 2, then 1, then server default.
2815
+ "options": [3, 2, 1, None],
2637
2816
  "render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
2638
2817
  },
2639
2818
  {"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
@@ -2647,35 +2826,44 @@ class ChatCLI:
2647
2826
  {"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
2648
2827
  # Agent scope & filesystem controls
2649
2828
  {"id": "host_base", "label": "Agent scope directory", "type": "text"},
2650
- {
2651
- "id": "fs_scope",
2652
- "label": "Filesystem scope",
2653
- "type": "enum",
2654
- "options": [None, "workspace", "host"],
2655
- "render": {
2656
- None: "Server default",
2657
- "workspace": "Workspace (sandbox)",
2658
- "host": "Host (Agent scope)",
2659
- },
2660
- },
2661
- {
2662
- "id": "fs_host_mode",
2663
- "label": "Host mode",
2664
- "type": "enum",
2665
- "options": [None, "any", "cwd", "custom"],
2666
- "render": {
2667
- None: "Server default / any",
2668
- "any": "any (no extra client restriction)",
2669
- "cwd": "Current working directory",
2670
- "custom": "Custom (use Agent scope)",
2671
- },
2672
- },
2829
+ {
2830
+ "id": "fs_scope",
2831
+ "label": "Filesystem scope",
2832
+ "type": "enum",
2833
+ # Default-first: host (Agent scope), then workspace, then server default.
2834
+ "options": ["host", "workspace", None],
2835
+ "render": {
2836
+ None: "Server default",
2837
+ "workspace": "Workspace (sandbox)",
2838
+ "host": "Host (Agent scope)",
2839
+ },
2840
+ },
2841
+ {
2842
+ "id": "fs_host_mode",
2843
+ "label": "Host mode",
2844
+ "type": "enum",
2845
+ # Default-first: custom (use Agent scope), then cwd, then any, then server default.
2846
+ "options": ["custom", "cwd", "any", None],
2847
+ "render": {
2848
+ None: "Server default / any",
2849
+ "any": "any (no extra client restriction)",
2850
+ "cwd": "Current working directory",
2851
+ "custom": "Custom (use Agent scope)",
2852
+ },
2853
+ },
2673
2854
  ]},
2674
2855
  {"label": "Code Map", "type": "group", "items": [
2675
2856
  {"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
2676
2857
  ]},
2677
2858
  {"label": "Preambles & First-turn", "type": "group", "items": [
2678
- {"id": "preambles_enabled", "label": "Enable tool call preambles (supported models only)", "type": "bool"},
2859
+ {
2860
+ "id": "preambles_enabled",
2861
+ "label": "Enable tool call preambles (GPT-5 only)",
2862
+ "type": "bool",
2863
+ # Only show this control when the *currently selected* model supports it.
2864
+ # (This updates live as the Model picker changes.)
2865
+ "visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
2866
+ },
2679
2867
  {"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
2680
2868
  {"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
2681
2869
  {"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
@@ -2688,8 +2876,22 @@ class ChatCLI:
2688
2876
  ]},
2689
2877
  ]
2690
2878
 
2691
- # Prepare initial values with enum placeholder for model when custom text set
2692
- init_for_ui = dict(initial)
2879
+ # Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
2880
+ try:
2881
+ if not self._is_gpt_model(self.model):
2882
+ for g in items:
2883
+ if not isinstance(g, dict):
2884
+ continue
2885
+ if (g.get("type") == "group") and (g.get("label") == "General"):
2886
+ for row in (g.get("items") or []):
2887
+ if isinstance(row, dict) and row.get("id") == "text_verbosity":
2888
+ row["options"] = ["medium", "high"]
2889
+ row["render"] = {"medium": "Medium", "high": "High"}
2890
+ except Exception:
2891
+ pass
2892
+
2893
+ # Prepare initial values with enum placeholder for model when custom text set
2894
+ init_for_ui = dict(initial)
2693
2895
  if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
2694
2896
  # Represent as 'custom' for cycling, but keep original model in working copy for edit with 'e'
2695
2897
  pass # We'll keep exact model string; enum will show the raw value when not matched
@@ -3037,17 +3239,17 @@ class ChatCLI:
3037
3239
  self.ui.success(f"FS Scope set to: {self._fs_label()}")
3038
3240
  self.save_settings()
3039
3241
 
3040
- async def set_level_menu(self) -> None:
3041
- val = await self._menu_choice(
3042
- "Control Level",
3043
- "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
3044
- [
3045
- ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
3046
- ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
3047
- ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
3048
- ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
3049
- ],
3050
- )
3242
+ async def set_level_menu(self) -> None:
3243
+ val = await self._menu_choice(
3244
+ "Control Level",
3245
+ "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
3246
+ [
3247
+ ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
3248
+ ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
3249
+ ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
3250
+ ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
3251
+ ],
3252
+ )
3051
3253
  if val == "default":
3052
3254
  self.control_level = None
3053
3255
  elif val in ("1", "2", "3"):
@@ -3127,16 +3329,16 @@ class ChatCLI:
3127
3329
  except Exception:
3128
3330
  pass
3129
3331
 
3130
- # 3) Tool usage preamble (UX hint)
3332
+ # 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
3131
3333
  try:
3132
- if bool(getattr(self, "preambles_enabled", False)) and not self._is_codex_model(self.model):
3334
+ if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
3133
3335
  blocks.append(
3134
3336
  "Tool usage: when you need to read or modify files or run commands, "
3135
3337
  "explicitly explain why you're using a tool, what you'll do, and how it "
3136
3338
  "advances the user's goal before calling the tool."
3137
3339
  )
3138
- except Exception:
3139
- pass
3340
+ except Exception:
3341
+ pass
3140
3342
 
3141
3343
  # 4) Working memory (context-summary file), injected once on fresh session restart
3142
3344
  try:
@@ -3256,7 +3458,7 @@ class ChatCLI:
3256
3458
  "Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
3257
3459
  )
3258
3460
 
3259
- def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
3461
+ def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
3260
3462
  """Build provider-native messages for Kimi preserving prior assistant reasoning_content.
3261
3463
  Includes prior provider-native turns and the current user message with first-turn injections.
3262
3464
  """
@@ -3274,8 +3476,31 @@ class ChatCLI:
3274
3476
  for m in (self._kimi_raw_history or []):
3275
3477
  raw.append(m)
3276
3478
  # Append current user message
3277
- raw.append({"role": "user", "content": content})
3278
- return raw
3479
+ raw.append({"role": "user", "content": content})
3480
+ return raw
3481
+
3482
+ def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
3483
+ """Normalize Gemini provider-native history.
3484
+
3485
+ Ensures we only send a flat list of dicts back to the server.
3486
+ This prevents accidental nesting like [[{...}, {...}]] which the
3487
+ google-genai SDK rejects with pydantic union validation errors.
3488
+ """
3489
+ out: List[Dict[str, Any]] = []
3490
+ if not isinstance(rpm, list):
3491
+ return out
3492
+ for item in rpm:
3493
+ if item is None:
3494
+ continue
3495
+ if isinstance(item, list):
3496
+ # Flatten one level
3497
+ for sub in item:
3498
+ if isinstance(sub, dict):
3499
+ out.append(dict(sub))
3500
+ continue
3501
+ if isinstance(item, dict):
3502
+ out.append(dict(item))
3503
+ return out
3279
3504
 
3280
3505
  def _build_working_memory_injection(self) -> Optional[str]:
3281
3506
  try:
@@ -4577,21 +4802,29 @@ class ChatCLI:
4577
4802
  self.save_settings()
4578
4803
  return True
4579
4804
 
4580
- if cmd.startswith("/system "):
4581
- self.system_prompt = cmd[len("/system ") :].strip()
4582
- self.history = []
4583
- if self.system_prompt:
4584
- self.history.append({"role": "system", "content": self.system_prompt})
4585
- # Treat as a fresh session; allow map re-injection
4586
- self._did_inject_codebase_map = False
4805
+ if cmd.startswith("/system "):
4806
+ self.system_prompt = cmd[len("/system ") :].strip()
4807
+ self.history = []
4808
+ if self.system_prompt:
4809
+ self.history.append({"role": "system", "content": self.system_prompt})
4810
+ # Treat as a fresh session; allow map re-injection
4811
+ self._did_inject_codebase_map = False
4587
4812
  # Also allow custom first-turn injection again
4588
4813
  try:
4589
4814
  self._did_inject_custom_first_turn = False
4590
4815
  except Exception:
4591
4816
  pass
4592
- self.ui.success("System prompt set.")
4593
- self.save_settings()
4594
- return True
4817
+ self.ui.success("System prompt set.")
4818
+ # OpenAI threaded state is invalid once the system prompt changes.
4819
+ try:
4820
+ self._openai_previous_response_id = None
4821
+ self._openai_response_id_history = []
4822
+ self._openai_input_items = []
4823
+ self._openai_last_sent_input_items = None
4824
+ except Exception:
4825
+ pass
4826
+ self.save_settings()
4827
+ return True
4595
4828
 
4596
4829
  if cmd.startswith("/title "):
4597
4830
  new_title = cmd[len("/title ") :].strip()
@@ -4603,21 +4836,26 @@ class ChatCLI:
4603
4836
  self.ui.success(f"Thread title set to: {self.thread_name}")
4604
4837
  return True
4605
4838
 
4606
- if cmd == "/clear":
4839
+ if cmd == "/clear":
4607
4840
  self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
4608
4841
  self._did_inject_codebase_map = False
4609
4842
  try:
4610
4843
  self._did_inject_custom_first_turn = False
4611
4844
  except Exception:
4612
4845
  pass
4613
- # Reset provider-native histories
4614
- try:
4846
+ # Reset provider-native histories
4847
+ try:
4615
4848
  self.messages_for_save = []
4616
4849
  if not self.save_chat_history:
4617
4850
  self.thread_uid = None
4618
- self._kimi_raw_history = []
4619
- except Exception:
4620
- pass
4851
+ self._kimi_raw_history = []
4852
+ self._gemini_raw_history = []
4853
+ self._openai_previous_response_id = None
4854
+ self._openai_response_id_history = []
4855
+ self._openai_input_items = []
4856
+ self._openai_last_sent_input_items = None
4857
+ except Exception:
4858
+ pass
4621
4859
  # Reset local cumulative token counters on session clear
4622
4860
  self._cum_input_tokens = 0
4623
4861
  self._cum_output_tokens = 0
@@ -4694,7 +4932,7 @@ class ChatCLI:
4694
4932
 
4695
4933
  # ---------------------------- Run loop ----------------------------
4696
4934
 
4697
- async def run(self) -> None:
4935
+ async def run(self) -> None:
4698
4936
  # Try persisted auth
4699
4937
  self._load_auth_state_from_disk()
4700
4938
 
@@ -4868,25 +5106,70 @@ class ChatCLI:
4868
5106
  self._session_started_at = None
4869
5107
  # Prepare completer for slash commands (if prompt_toolkit is available)
4870
5108
  pt_completer = self._commands_word_completer()
4871
- while True:
4872
- try:
4873
- if self._pt_session is not None:
4874
- # Use prompt_toolkit with inline completion when available
4875
- # Pass completer per-prompt to ensure latest catalog
4876
- user_input = await self._pt_session.prompt_async(
4877
- "You: ",
4878
- completer=pt_completer,
4879
- complete_while_typing=True,
4880
- )
4881
- user_input = user_input.strip()
4882
- elif self._input_engine:
4883
- # Do not add continuation prefixes on new lines
4884
- user_input = self._input_engine.read_message("You: ", "")
4885
- else:
4886
- user_input = self._read_multiline_input("You: ")
4887
- # Successful read resets interrupt window
4888
- self._last_interrupt_ts = None
4889
- except KeyboardInterrupt:
5109
+ while True:
5110
+ try:
5111
+ pending_edit = self._pending_user_edit
5112
+ edit_mode = pending_edit is not None
5113
+
5114
+ if self._pt_session is not None:
5115
+ # Use prompt_toolkit with inline completion when available
5116
+ # Pass completer per-prompt to ensure latest catalog
5117
+ try:
5118
+ # prompt_toolkit supports default= on modern versions; fall back gracefully.
5119
+ if edit_mode:
5120
+ user_input = await self._pt_session.prompt_async(
5121
+ "You (edit): ",
5122
+ completer=pt_completer,
5123
+ complete_while_typing=True,
5124
+ default=str(pending_edit),
5125
+ )
5126
+ else:
5127
+ user_input = await self._pt_session.prompt_async(
5128
+ "You: ",
5129
+ completer=pt_completer,
5130
+ complete_while_typing=True,
5131
+ )
5132
+ except TypeError:
5133
+ # Older prompt_toolkit: no default= support
5134
+ user_input = await self._pt_session.prompt_async(
5135
+ "You: ",
5136
+ completer=pt_completer,
5137
+ complete_while_typing=True,
5138
+ )
5139
+ user_input = user_input.strip()
5140
+ elif self._input_engine:
5141
+ if edit_mode:
5142
+ # The low-level input engine currently doesn't support prefill.
5143
+ # Show the previous message and let the user paste a replacement.
5144
+ try:
5145
+ self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
5146
+ self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
5147
+ except Exception:
5148
+ pass
5149
+ new_txt = self._read_multiline_input("Edit> ")
5150
+ user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
5151
+ else:
5152
+ # Do not add continuation prefixes on new lines
5153
+ user_input = self._input_engine.read_message("You: ", "")
5154
+ else:
5155
+ if edit_mode:
5156
+ try:
5157
+ self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
5158
+ self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
5159
+ except Exception:
5160
+ pass
5161
+ new_txt = self._read_multiline_input("Edit> ")
5162
+ user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
5163
+ else:
5164
+ user_input = self._read_multiline_input("You: ")
5165
+
5166
+ # Clear pending edit state after we successfully collected input.
5167
+ if edit_mode:
5168
+ self._pending_user_edit = None
5169
+ self._pending_turn_snapshot = None
5170
+ # Successful read resets interrupt window
5171
+ self._last_interrupt_ts = None
5172
+ except KeyboardInterrupt:
4890
5173
  # First Ctrl+C: interrupt input and warn; second within window exits
4891
5174
  now = time.time()
4892
5175
  try:
@@ -4905,8 +5188,8 @@ class ChatCLI:
4905
5188
  self.ui.print("Goodbye.")
4906
5189
  return
4907
5190
 
4908
- if not user_input:
4909
- continue
5191
+ if not user_input:
5192
+ continue
4910
5193
 
4911
5194
  # Command palette if bare '/'
4912
5195
  if user_input == "/":
@@ -4929,12 +5212,35 @@ class ChatCLI:
4929
5212
  if handled:
4930
5213
  continue
4931
5214
 
4932
- try:
4933
- # Record user message for local/server save
4934
- if self.save_chat_history:
4935
- self.messages_for_save.append({
4936
- "role": "user",
4937
- "content": user_input,
5215
+ try:
5216
+ # Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
5217
+ # This is critical for first-turn injections (code map/custom note/working memory)
5218
+ # which are applied by mutating flags during payload construction.
5219
+ self._pending_turn_snapshot = {
5220
+ "history": copy.deepcopy(self.history),
5221
+ "messages_for_save": copy.deepcopy(self.messages_for_save),
5222
+ "kimi_raw": copy.deepcopy(self._kimi_raw_history),
5223
+ "gemini_raw": copy.deepcopy(self._gemini_raw_history),
5224
+ "openai_prev": getattr(self, "_openai_previous_response_id", None),
5225
+ "openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
5226
+ "openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
5227
+ "openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
5228
+ "inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
5229
+ "did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
5230
+ "did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
5231
+ "did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
5232
+ "memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
5233
+ "last_built_user_content": getattr(self, "_last_built_user_content", None),
5234
+ }
5235
+
5236
+ # Clear any stale in-flight dispatch context at turn start.
5237
+ self._inflight_dispatch = None
5238
+
5239
+ # Record user message for local/server save
5240
+ if self.save_chat_history:
5241
+ self.messages_for_save.append({
5242
+ "role": "user",
5243
+ "content": user_input,
4938
5244
  "model": None,
4939
5245
  "citations": None,
4940
5246
  "last_turn_input_tokens": 0,
@@ -4948,25 +5254,86 @@ class ChatCLI:
4948
5254
  if self._busy:
4949
5255
  self.ui.warn("Agent is busy with another turn. Please wait...")
4950
5256
  continue
4951
- self._busy = True
4952
- try:
4953
- assistant_text = await self._stream_once(user_input)
4954
- finally:
4955
- self._busy = False
4956
- except httpx.HTTPStatusError as he:
4957
- try:
4958
- if he.response is not None:
4959
- await he.response.aread()
4960
- body = he.response.text
5257
+ self._busy = True
5258
+ try:
5259
+ assistant_text = await self._stream_once(user_input)
5260
+ finally:
5261
+ self._busy = False
5262
+ except KeyboardInterrupt:
5263
+ # Ctrl+C mid-stream / mid-tool: do not exit the CLI.
5264
+ # Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
5265
+ try:
5266
+ await self._cancel_inflight_dispatch()
5267
+ except (Exception, BaseException):
5268
+ pass
5269
+
5270
+ # Restore state to *before* this turn started.
5271
+ try:
5272
+ snap = self._pending_turn_snapshot or {}
5273
+ if isinstance(snap.get("history"), list):
5274
+ self.history = snap.get("history")
5275
+ if isinstance(snap.get("messages_for_save"), list):
5276
+ self.messages_for_save = snap.get("messages_for_save")
5277
+ if isinstance(snap.get("kimi_raw"), list):
5278
+ self._kimi_raw_history = snap.get("kimi_raw")
5279
+ if isinstance(snap.get("gemini_raw"), list):
5280
+ self._gemini_raw_history = snap.get("gemini_raw")
5281
+ if "openai_prev" in snap:
5282
+ self._openai_previous_response_id = snap.get("openai_prev")
5283
+ if isinstance(snap.get("openai_ids"), list):
5284
+ self._openai_response_id_history = snap.get("openai_ids")
5285
+ if isinstance(snap.get("openai_input_items"), list):
5286
+ self._openai_input_items = snap.get("openai_input_items")
5287
+ if "openai_last_sent_input_items" in snap:
5288
+ self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
5289
+ if "inflight_dispatch" in snap:
5290
+ self._inflight_dispatch = snap.get("inflight_dispatch")
5291
+ if "did_inject_codebase_map" in snap:
5292
+ self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
5293
+ if "did_inject_custom_first_turn" in snap:
5294
+ self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
5295
+ if "did_inject_working_memory" in snap:
5296
+ self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
5297
+ if "memory_paths_for_first_turn" in snap:
5298
+ self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
5299
+ self._last_built_user_content = snap.get("last_built_user_content")
5300
+ except Exception:
5301
+ pass
5302
+
5303
+ # Clear any transient indicator line and land on a fresh prompt line.
5304
+ try:
5305
+ sys.stdout.write("\r\x1b[2K\n")
5306
+ sys.stdout.flush()
5307
+ except Exception:
5308
+ try:
5309
+ self.ui.print()
5310
+ except Exception:
5311
+ pass
5312
+
5313
+ try:
5314
+ supports = self._provider_supports_native_retention(self.model)
5315
+ except Exception:
5316
+ supports = False
5317
+ if supports:
5318
+ self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
5319
+ else:
5320
+ self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
5321
+ self._pending_user_edit = user_input
5322
+ continue
5323
+ except httpx.HTTPStatusError as he:
5324
+ try:
5325
+ if he.response is not None:
5326
+ await he.response.aread()
5327
+ body = he.response.text
4961
5328
  else:
4962
5329
  body = ""
4963
5330
  except Exception:
4964
5331
  body = ""
4965
5332
  self.ui.error(f"[HTTP error] {he.response.status_code} {body}")
4966
5333
  continue
4967
- except Exception as e:
4968
- self.ui.error(f"[Client error] {e}")
4969
- continue
5334
+ except Exception as e:
5335
+ self.ui.error(f"[Client error] {e}")
5336
+ continue
4970
5337
 
4971
5338
  # Skip appending empty assistant messages to avoid 422 on next request
4972
5339
  if assistant_text.strip():
@@ -4992,7 +5359,7 @@ class ChatCLI:
4992
5359
  ("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
4993
5360
  ("set_auto_approve", f"⚙️ Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
4994
5361
  (auth_action_key, auth_action_label),
4995
- ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5, gemini-2.5-pro, grok-4, deepseek-chat) or use Change Model to type one"),
5362
+ ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2-thinking, etc.)"),
4996
5363
  ("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
4997
5364
  ("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
4998
5365
  ("clear_history", "🧹 Clear History - Reset chat history"),
@@ -5028,15 +5395,14 @@ class ChatCLI:
5028
5395
  has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
5029
5396
  is_effectively_free = (self.is_free_tier and not has_credits)
5030
5397
 
5031
- # Recommended models (ordered list for shuffling)
5032
- # Curated list per request (include Codex Max as recommended)
5398
+ # Recommended models ("feelings" order)
5033
5399
  rec_keys = [
5034
- "deepseek-reasoner-3.2",
5035
- "claude-opus-4-5-20251101",
5400
+ "gpt-5.2",
5401
+ "gpt-5.2-codex",
5402
+ "gpt-5",
5036
5403
  "gemini-3-pro-preview",
5037
5404
  "gemini-3-flash-preview",
5038
- "gpt-5",
5039
- "gpt-5.2",
5405
+ "claude-opus-4-5-20251101",
5040
5406
  "kimi-k2-thinking",
5041
5407
  "grok-code-fast-1",
5042
5408
  ]
@@ -5078,8 +5444,7 @@ class ChatCLI:
5078
5444
  suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
5079
5445
  choices.append((m, f"{lbl}{suffix}"))
5080
5446
 
5081
- choices.append(("default", "Server Default (no override)"))
5082
- choices.append(("custom", "Custom (enter a model name)"))
5447
+ # Per issue list: do not surface "server default" or "custom" in this picker.
5083
5448
 
5084
5449
  # Render and select using the unified highlighted picker
5085
5450
  picked: Optional[str] = None
@@ -5094,27 +5459,15 @@ class ChatCLI:
5094
5459
  picked = str(val)
5095
5460
 
5096
5461
  # Enforce free tier restrictions
5097
- if picked not in ("default", "custom") and is_effectively_free and is_paid_model(picked):
5098
- self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
5099
- continue
5462
+ if is_effectively_free and is_paid_model(picked):
5463
+ self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
5464
+ continue
5100
5465
 
5101
5466
  break
5102
5467
 
5103
- # Apply selection
5104
- if picked == "default":
5105
- self.model = None
5106
- self.ui.info("Model cleared; server default will be used.")
5107
- elif picked == "custom":
5108
- typed = self.ui.prompt(
5109
- "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
5110
- default=self.model or "",
5111
- )
5112
- self.model = self._resolve_model_alias(typed.strip() or None)
5113
- if not self.model:
5114
- self.ui.info("Model cleared; server default will be used.")
5115
- else:
5116
- self.model = picked
5117
- self.ui.success(f"Model set to: {self.model}")
5468
+ # Apply selection
5469
+ self.model = picked
5470
+ self.ui.success(f"Model set to: {self.model}")
5118
5471
 
5119
5472
  self._apply_model_side_effects()
5120
5473
  self.save_settings()
@@ -5203,7 +5556,7 @@ class ChatCLI:
5203
5556
  self.save_settings()
5204
5557
  return True
5205
5558
 
5206
- if choice == "set_system_prompt":
5559
+ if choice == "set_system_prompt":
5207
5560
  prompt = self.ui.prompt("Enter system prompt", default=self.system_prompt or "")
5208
5561
  self.system_prompt = prompt.strip()
5209
5562
  self.history = []
@@ -5215,32 +5568,40 @@ class ChatCLI:
5215
5568
  self._did_inject_custom_first_turn = False
5216
5569
  except Exception:
5217
5570
  pass
5218
- # Clear provider-native histories on system reset
5219
- try:
5571
+ # Clear provider-native histories on system reset
5572
+ try:
5220
5573
  self.messages_for_save = []
5221
5574
  if not self.save_chat_history:
5222
5575
  self.thread_uid = None
5223
- self._kimi_raw_history = []
5224
- except Exception:
5225
- pass
5576
+ self._kimi_raw_history = []
5577
+ self._gemini_raw_history = []
5578
+ self._openai_previous_response_id = None
5579
+ self._openai_response_id_history = []
5580
+ self._openai_input_items = []
5581
+ self._openai_last_sent_input_items = None
5582
+ except Exception:
5583
+ pass
5226
5584
  self.ui.success("System prompt set.")
5227
5585
  self.save_settings()
5228
5586
  return True
5229
5587
 
5230
- if choice == "clear_history":
5588
+ if choice == "clear_history":
5231
5589
  self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
5232
5590
  self._did_inject_codebase_map = False
5233
5591
  try:
5234
5592
  self._did_inject_custom_first_turn = False
5235
5593
  except Exception:
5236
5594
  pass
5237
- try:
5595
+ try:
5238
5596
  self.messages_for_save = []
5239
5597
  if not self.save_chat_history:
5240
5598
  self.thread_uid = None
5241
- self._kimi_raw_history = []
5242
- except Exception:
5243
- pass
5599
+ self._kimi_raw_history = []
5600
+ self._gemini_raw_history = []
5601
+ self._openai_previous_response_id = None
5602
+ self._openai_response_id_history = []
5603
+ except Exception:
5604
+ pass
5244
5605
  # Reset local cumulative token counters on session clear
5245
5606
  self._cum_input_tokens = 0
5246
5607
  self._cum_output_tokens = 0
@@ -5293,11 +5654,82 @@ class ChatCLI:
5293
5654
  return True
5294
5655
 
5295
5656
  # ----------------------- SSE Streaming loop ------------------------
5296
- async def _stream_once(self, user_input: str) -> str:
5297
- # Build request payload
5298
- payload: Dict[str, Any] = {"messages": self._build_messages(user_input)}
5657
+ async def _stream_once(self, user_input: str) -> str:
5658
+ # Build request payload.
5659
+ # OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
5660
+ # `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
5661
+ if self._is_openai_model(self.model):
5662
+ msgs: List[Dict[str, str]] = []
5663
+ # Codex developer prompt (if enabled) + system prompt
5664
+ try:
5665
+ if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
5666
+ msgs.append({"role": "system", "content": self._codex_system_prompt()})
5667
+ except Exception:
5668
+ pass
5669
+ if self.system_prompt:
5670
+ msgs.append({"role": "system", "content": self.system_prompt})
5671
+
5672
+ # Apply first-turn-only injections to the current user content
5673
+ content = user_input
5674
+ prefix = self._build_first_turn_injection(user_input)
5675
+ if prefix:
5676
+ content = f"{prefix}\n\n{user_input}"
5677
+ try:
5678
+ self._last_built_user_content = content
5679
+ except Exception:
5680
+ self._last_built_user_content = user_input
5681
+ msgs.append({"role": "user", "content": content})
5682
+
5683
+ payload: Dict[str, Any] = {"messages": msgs}
5684
+
5685
+ # Build OpenAI native input items (authoritative for the server OpenAI path).
5686
+ try:
5687
+ if isinstance(self._openai_input_items, list) and self._openai_input_items:
5688
+ items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
5689
+ else:
5690
+ # Seed with system prompts for the first OpenAI turn.
5691
+ items = []
5692
+ try:
5693
+ if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
5694
+ items.append({"role": "system", "content": self._codex_system_prompt()})
5695
+ except Exception:
5696
+ pass
5697
+ if self.system_prompt:
5698
+ items.append({"role": "system", "content": self.system_prompt})
5699
+ items.append({"role": "user", "content": content})
5700
+ payload["openai_input_items"] = self._sanitize_openai_items(items)
5701
+ self._openai_last_sent_input_items = copy.deepcopy(items)
5702
+ except Exception:
5703
+ # If this fails for any reason, fall back to normal message-based history.
5704
+ self._openai_last_sent_input_items = None
5705
+
5706
+ # OpenAI Threading: DISABLED. We use full manual input item replay now.
5707
+ # if "openai_input_items" not in payload:
5708
+ # try:
5709
+ # if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
5710
+ # payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
5711
+ # except Exception:
5712
+ # pass
5713
+ try:
5714
+ if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
5715
+ payload["openai_response_id_history"] = list(self._openai_response_id_history)
5716
+ except Exception:
5717
+ pass
5718
+ else:
5719
+ payload = {"messages": self._build_messages(user_input)}
5299
5720
  if self.model:
5300
5721
  payload["model"] = self.model
5722
+ # OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
5723
+ try:
5724
+ if self._is_openai_model(self.model):
5725
+ if (
5726
+ isinstance(getattr(self, "_openai_response_id_history", None), list)
5727
+ and self._openai_response_id_history
5728
+ and "openai_response_id_history" not in payload
5729
+ ):
5730
+ payload["openai_response_id_history"] = list(self._openai_response_id_history)
5731
+ except Exception:
5732
+ pass
5301
5733
  # Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
5302
5734
  try:
5303
5735
  if self.terminal_id:
@@ -5381,10 +5813,12 @@ class ChatCLI:
5381
5813
  payload["text_verbosity"] = self.text_verbosity
5382
5814
  except Exception:
5383
5815
  pass
5384
- try:
5385
- payload["preambles_enabled"] = bool(self.preambles_enabled)
5386
- except Exception:
5387
- pass
5816
+ # Preambles are a GPT-5-only UX toggle.
5817
+ try:
5818
+ if self._supports_preambles(self.model):
5819
+ payload["preambles_enabled"] = bool(self.preambles_enabled)
5820
+ except Exception:
5821
+ pass
5388
5822
 
5389
5823
  if self.web_search_enabled:
5390
5824
  payload["enable_web_search"] = True
@@ -5479,13 +5913,23 @@ class ChatCLI:
5479
5913
  headers["X-Request-Timeout"] = str(int(req_timeout_hint))
5480
5914
  except Exception:
5481
5915
  pass
5482
- # If using a Kimi model, include provider-native messages to preserve reasoning_content
5483
- try:
5484
- if isinstance(self.model, str) and self.model.startswith("kimi-"):
5485
- req_payload = dict(req_payload)
5486
- req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
5487
- except Exception:
5488
- pass
5916
+ # If using a Kimi model, include provider-native messages to preserve reasoning_content
5917
+ try:
5918
+ if isinstance(self.model, str) and self.model.startswith("kimi-"):
5919
+ req_payload = dict(req_payload)
5920
+ req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
5921
+ except Exception:
5922
+ pass
5923
+ # If using a Gemini model, include provider-native contents to preserve thought signatures
5924
+ # and strict tool-call chains across HTTP turns.
5925
+ try:
5926
+ if isinstance(self.model, str) and self.model.startswith("gemini-"):
5927
+ req_payload = dict(req_payload)
5928
+ hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
5929
+ if hist:
5930
+ req_payload["raw_provider_messages"] = hist
5931
+ except Exception:
5932
+ pass
5489
5933
  async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
5490
5934
  async with client.stream("POST", self.stream_url, json=req_payload, headers=headers, follow_redirects=True) as resp:
5491
5935
  if resp.status_code == 429:
@@ -5596,6 +6040,51 @@ class ChatCLI:
5596
6040
  # Track whether we're currently positioned at the start of a fresh line.
5597
6041
  # This prevents double-newlines between back-to-back tool events.
5598
6042
  at_line_start = True
6043
+
6044
+ # --- Tool call in-place status (issuelist.md #7) ---
6045
+ # We render a single transient line for the current tool call (no trailing newline)
6046
+ # so the later tool.result SUCCESS/FAILURE line can replace it in-place.
6047
+ tool_status_active = False
6048
+ tool_status_call_id = None
6049
+
6050
+ def _tool_status_clear_line() -> None:
6051
+ """Clear the current line (best-effort) and return to column 0."""
6052
+ nonlocal at_line_start
6053
+ try:
6054
+ sys.stdout.write("\r\x1b[2K")
6055
+ sys.stdout.flush()
6056
+ except Exception:
6057
+ pass
6058
+ at_line_start = True
6059
+
6060
+ def _tool_status_show(call_id: Any, line: str) -> None:
6061
+ """Show the transient tool status line (no newline)."""
6062
+ nonlocal tool_status_active, tool_status_call_id, at_line_start
6063
+ if not self.show_tool_calls:
6064
+ return
6065
+ tool_status_active = True
6066
+ tool_status_call_id = str(call_id) if call_id is not None else None
6067
+ try:
6068
+ if not at_line_start:
6069
+ sys.stdout.write("\n")
6070
+ sys.stdout.write("\r\x1b[2K" + str(line))
6071
+ sys.stdout.flush()
6072
+ at_line_start = False
6073
+ except Exception:
6074
+ # Fallback: degrade to a normal printed line
6075
+ try:
6076
+ self.ui.print(str(line))
6077
+ except Exception:
6078
+ pass
6079
+ at_line_start = True
6080
+
6081
+ def _tool_status_stop() -> None:
6082
+ """Remove the transient tool status line and clear tracking."""
6083
+ nonlocal tool_status_active, tool_status_call_id
6084
+ if tool_status_active:
6085
+ _tool_status_clear_line()
6086
+ tool_status_active = False
6087
+ tool_status_call_id = None
5599
6088
  # Mode: animate or static (default static for stability)
5600
6089
  try:
5601
6090
  _animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
@@ -5899,16 +6388,40 @@ class ChatCLI:
5899
6388
  except Exception:
5900
6389
  pass
5901
6390
 
5902
- # Do NOT show the initial tool.call line per UX request; results will be
5903
- # rendered on tool.result. We still keep internal state and WS broadcasts.
5904
- # While the tool executes (server or client), show a subtle thinking
5905
- # indicator so users see progress during potentially long operations.
5906
- try:
5907
- # Do not start the indicator if we're in the middle of assistant token streaming
5908
- if (not streaming_assistant) and bool(getattr(self, "_thinking_indicator_enabled", False)):
5909
- await _indicator_start()
5910
- except Exception:
5911
- pass
6391
+ # issuelist.md #7:
6392
+ # Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
6393
+ try:
6394
+ # Clear any previous transient status line (shouldn't happen, but keep stable)
6395
+ _tool_status_stop()
6396
+ except Exception:
6397
+ pass
6398
+ try:
6399
+ tool_name = str(name or "").strip()
6400
+ label = self._tool_concise_label(
6401
+ tool_name,
6402
+ args if isinstance(args, dict) else {},
6403
+ None,
6404
+ )
6405
+ try:
6406
+ model_prefix = (
6407
+ self._current_turn.get("model")
6408
+ or self._last_used_model
6409
+ or self.model
6410
+ or "(server default)"
6411
+ )
6412
+ except Exception:
6413
+ model_prefix = self.model or "(server default)"
6414
+ ORANGE = "\x1b[38;5;214m"
6415
+ WHITE = "\x1b[97m"
6416
+ RESET = "\x1b[0m"
6417
+ status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
6418
+ _tool_status_show(call_id, status_line)
6419
+ except Exception:
6420
+ # Last-resort fallback: print something rather than crash streaming.
6421
+ try:
6422
+ self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
6423
+ except Exception:
6424
+ pass
5912
6425
  # Count tool calls
5913
6426
  try:
5914
6427
  tool_calls += 1
@@ -5931,10 +6444,15 @@ class ChatCLI:
5931
6444
  except Exception:
5932
6445
  pass
5933
6446
 
5934
- elif event == "approval.request":
5935
- # First reply wins (web or CLI)
5936
- await self._handle_approval_request(client, session_id, data)
5937
- continue
6447
+ elif event == "approval.request":
6448
+ # Don't let the transient [RUNNING] line collide with interactive prompts.
6449
+ try:
6450
+ _tool_status_stop()
6451
+ except Exception:
6452
+ pass
6453
+ # First reply wins (web or CLI)
6454
+ await self._handle_approval_request(client, session_id, data)
6455
+ continue
5938
6456
 
5939
6457
  elif event == "approval.result":
5940
6458
  appr = data.get("approved")
@@ -5976,10 +6494,22 @@ class ChatCLI:
5976
6494
  self.ui.info("Working memory created. Restarting conversation with a fresh first-turn injection...")
5977
6495
  return ""
5978
6496
 
5979
- elif event == "tool.result":
5980
- name = str(data.get("name"))
5981
- result = data.get("result", {}) or {}
5982
- call_id = data.get("call_id")
6497
+ elif event == "tool.result":
6498
+ name = str(data.get("name"))
6499
+ result = data.get("result", {}) or {}
6500
+ call_id = data.get("call_id")
6501
+ # If we previously rendered a transient [RUNNING] line for this tool call,
6502
+ # clear it now so the SUCCESS/FAILURE line prints in the same place.
6503
+ try:
6504
+ if tool_status_active:
6505
+ # Best-effort match on call_id (some providers may omit it).
6506
+ if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
6507
+ _tool_status_stop()
6508
+ except Exception:
6509
+ try:
6510
+ _tool_status_stop()
6511
+ except Exception:
6512
+ pass
5983
6513
  # Stop any indicator before rendering results
5984
6514
  try:
5985
6515
  await _indicator_stop(clear=True)
@@ -6087,7 +6617,7 @@ class ChatCLI:
6087
6617
  # Do not auto-restart the indicator here; wait for the next model event
6088
6618
 
6089
6619
  elif event == "tool.dispatch":
6090
- # Client-executed tool flow
6620
+ # Client-executed tool flow
6091
6621
  if not HAS_LOCAL_TOOLS:
6092
6622
  self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
6093
6623
  continue
@@ -6097,12 +6627,23 @@ class ChatCLI:
6097
6627
  # tool invocation on the corresponding 'tool.call' event. Counting
6098
6628
  # dispatch would double-count a single tool call.
6099
6629
 
6100
- session_id_d = data.get("session_id")
6101
- call_id = data.get("call_id")
6102
- name = data.get("name")
6103
- args = data.get("args", {}) or {}
6104
- job_token = data.get("job_token")
6105
- reqp = data.get("requested_policy", {}) or {}
6630
+ session_id_d = data.get("session_id")
6631
+ call_id = data.get("call_id")
6632
+ name = data.get("name")
6633
+ args = data.get("args", {}) or {}
6634
+ job_token = data.get("job_token")
6635
+ reqp = data.get("requested_policy", {}) or {}
6636
+
6637
+ # Track in-flight dispatch so Ctrl+C can cancel quickly.
6638
+ try:
6639
+ self._inflight_dispatch = {
6640
+ "session_id": session_id_d,
6641
+ "call_id": call_id,
6642
+ "job_token": job_token,
6643
+ "name": name,
6644
+ }
6645
+ except Exception:
6646
+ pass
6106
6647
 
6107
6648
  if DEBUG_SSE:
6108
6649
  self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
@@ -6377,9 +6918,9 @@ class ChatCLI:
6377
6918
  except Exception:
6378
6919
  self._last_dispatch_ctx = None
6379
6920
 
6380
- # POST callback
6381
- try:
6382
- if session_id_d and call_id and job_token:
6921
+ # POST callback
6922
+ try:
6923
+ if session_id_d and call_id and job_token:
6383
6924
  payload_cb = {
6384
6925
  "session_id": session_id_d,
6385
6926
  "call_id": call_id,
@@ -6387,16 +6928,87 @@ class ChatCLI:
6387
6928
  "result": result,
6388
6929
  "job_token": job_token,
6389
6930
  }
6390
- r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
6391
- if r.status_code >= 400:
6392
- self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
6393
- except Exception as e:
6394
- self.ui.warn(f"tools.callback error: {e}")
6931
+ r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
6932
+ if r.status_code >= 400:
6933
+ self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
6934
+ except Exception as e:
6935
+ self.ui.warn(f"tools.callback error: {e}")
6936
+ finally:
6937
+ try:
6938
+ # Clear in-flight dispatch context when we send a callback.
6939
+ if isinstance(self._inflight_dispatch, dict):
6940
+ if str(self._inflight_dispatch.get("call_id")) == str(call_id):
6941
+ self._inflight_dispatch = None
6942
+ except Exception:
6943
+ pass
6395
6944
 
6396
- elif event == "message.completed":
6945
+ elif event == "message.completed":
6397
6946
  # Safety: this block handles only 'message.completed'.
6398
6947
  usage = data.get("usage", {})
6399
- model_used = data.get("model") or self.model
6948
+ model_used = data.get("model") or self.model
6949
+ # OpenAI: persist the last response id so future turns can use previous_response_id.
6950
+ try:
6951
+ if self._is_openai_model(model_used):
6952
+ # Prefer the explicit per-turn id list when provided by the server.
6953
+ ids = data.get("openai_response_ids")
6954
+ if isinstance(ids, list) and ids:
6955
+ for x in ids:
6956
+ if not isinstance(x, str):
6957
+ continue
6958
+ xs = x.strip()
6959
+ if not xs:
6960
+ continue
6961
+ try:
6962
+ if xs not in self._openai_response_id_history:
6963
+ self._openai_response_id_history.append(xs)
6964
+ except Exception:
6965
+ pass
6966
+ rid = data.get("openai_previous_response_id")
6967
+ if isinstance(rid, str) and rid.strip():
6968
+ self._openai_previous_response_id = rid.strip()
6969
+ try:
6970
+ if rid.strip() not in self._openai_response_id_history:
6971
+ self._openai_response_id_history.append(rid.strip())
6972
+ except Exception:
6973
+ pass
6974
+
6975
+ # OpenAI manual-state replay: server returns the delta items appended
6976
+ # during this turn (reasoning/tool calls/tool outputs). Persist them.
6977
+ try:
6978
+ delta = data.get("openai_delta_items")
6979
+ if isinstance(delta, list):
6980
+ base_items = (
6981
+ self._openai_last_sent_input_items
6982
+ if isinstance(self._openai_last_sent_input_items, list)
6983
+ else copy.deepcopy(self._openai_input_items)
6984
+ )
6985
+ # Normalize to a list of dicts where possible; keep unknown shapes as-is.
6986
+ merged: List[Any] = []
6987
+ try:
6988
+ merged.extend(list(base_items or []))
6989
+ except Exception:
6990
+ merged = list(base_items or []) if base_items is not None else []
6991
+ merged.extend(delta)
6992
+ # Store only dict-like items (server is expected to send dicts)
6993
+ cleaned: List[Dict[str, Any]] = []
6994
+ for it in merged:
6995
+ if isinstance(it, dict):
6996
+ cleaned.append(dict(it))
6997
+ self._openai_input_items = cleaned
6998
+ except Exception:
6999
+ pass
7000
+ finally:
7001
+ # Clear per-turn sent snapshot
7002
+ self._openai_last_sent_input_items = None
7003
+ except Exception:
7004
+ pass
7005
+ # Gemini: server may include an authoritative provider-native history snapshot.
7006
+ try:
7007
+ if isinstance(model_used, str) and model_used.startswith("gemini-"):
7008
+ rpm = data.get("raw_provider_messages")
7009
+ self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
7010
+ except Exception:
7011
+ pass
6400
7012
  # Mark completion for retry controller
6401
7013
  try:
6402
7014
  last_completed = True
@@ -7238,20 +7850,29 @@ class ChatCLI:
7238
7850
  pass
7239
7851
  return "".join(assistant_buf)
7240
7852
 
7241
- elif event == "provider.message":
7242
- # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
7243
- provider = (data.get("provider") or "").lower()
7244
- msg = data.get("message")
7245
- if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
7246
- # Append as-is to local raw history for the next turn
7247
- try:
7248
- self._kimi_raw_history.append(dict(msg))
7249
- except Exception:
7250
- try:
7251
- self._kimi_raw_history.append(msg) # type: ignore
7252
- except Exception:
7253
- pass
7254
- continue
7853
+ elif event == "provider.message":
7854
+ # Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
7855
+ provider = (data.get("provider") or "").lower()
7856
+ msg = data.get("message")
7857
+ if provider == "gemini":
7858
+ # Always retain Gemini provider-native messages (needed for multi-turn tool calling).
7859
+ try:
7860
+ if isinstance(msg, dict):
7861
+ self._gemini_raw_history.append(dict(msg))
7862
+ elif isinstance(msg, list):
7863
+ self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
7864
+ except Exception:
7865
+ pass
7866
+ if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
7867
+ # Append as-is to local raw history for the next turn
7868
+ try:
7869
+ self._kimi_raw_history.append(dict(msg))
7870
+ except Exception:
7871
+ try:
7872
+ self._kimi_raw_history.append(msg) # type: ignore
7873
+ except Exception:
7874
+ pass
7875
+ continue
7255
7876
 
7256
7877
  else:
7257
7878
  # TEMP DEBUG: show unknown/unhandled events
@@ -7799,7 +8420,7 @@ class ChatCLI:
7799
8420
  "gpt-5": 400000,
7800
8421
  "gpt-5-2025-08-07": 400000,
7801
8422
  "codex-mini-latest": 200000,
7802
- "gemini-2.5-pro": 1048576,
8423
+ # (removed gemini-2.5-pro)
7803
8424
  "gemini-3-flash-preview": 1048576,
7804
8425
  "gemini-3-pro-preview": 1000000,
7805
8426
  "grok-4-1-fast-reasoning": 2000000,
@@ -7816,10 +8437,10 @@ class ChatCLI:
7816
8437
  "claude-sonnet-4-5-20250929-thinking": 1000000,
7817
8438
  "claude-opus-4-5-20251101": 200000,
7818
8439
  "claude-opus-4-5-20251101-thinking": 200000,
7819
- "glm-4.6": 200000,
7820
- })
7821
- except Exception:
7822
- pass
8440
+ "glm-4.7": 200000,
8441
+ })
8442
+ except Exception:
8443
+ pass
7823
8444
  self._model_ctx_map = ctx_map
7824
8445
  return ctx_map
7825
8446
 
@@ -8206,8 +8827,11 @@ class ChatCLI:
8206
8827
  "We’ll configure a few defaults. You can change these later via /settings.",
8207
8828
  )
8208
8829
 
8209
- # --- 1) Default model (menu, no Y/N) ---
8210
- await self._wizard_model_step()
8830
+ # --- 1) Default model (menu) ---
8831
+ await self._wizard_model_step()
8832
+ # If the picker was cancelled (or model still unset), choose a sensible default.
8833
+ if not self.model:
8834
+ self.model = self._recommended_default_model()
8211
8835
 
8212
8836
  # --- 2) Tools (always ON per design) ---
8213
8837
  self.requested_tools = True
@@ -8217,9 +8841,9 @@ class ChatCLI:
8217
8841
  "Control levels: 1=read-only, 2=approval on write/exec, 3=no approvals"
8218
8842
  )
8219
8843
  await self.set_level_menu()
8220
- if self.control_level not in (1, 2, 3):
8221
- # Default to Level 2 if user aborted
8222
- self.control_level = 2
8844
+ if self.control_level not in (1, 2, 3):
8845
+ # Default to Level 3 if user aborted
8846
+ self.control_level = 3
8223
8847
 
8224
8848
  # --- 4) Agent scope (menus; only type on custom path) ---
8225
8849
  self.ui.print(
@@ -8270,30 +8894,45 @@ class ChatCLI:
8270
8894
  except Exception:
8271
8895
  curv = "medium"
8272
8896
 
8273
- verb_choice = await self._menu_choice(
8274
- "Text verbosity",
8275
- "How verbose should responses be by default?",
8276
- [
8277
- ("low", "Low – short, to-the-point answers"),
8278
- ("medium", "Medium – balanced detail (recommended)"),
8279
- ("high", "High – more verbose explanations"),
8280
- ],
8281
- )
8897
+ verbosity_choices: List[Tuple[str, str]] = []
8898
+ if self._is_gpt_model(self.model):
8899
+ # Default-first: Low for GPT models.
8900
+ verbosity_choices = [
8901
+ ("low", "Low – short, to-the-point answers"),
8902
+ ("medium", "Medium – balanced detail"),
8903
+ ("high", "High – more verbose explanations"),
8904
+ ]
8905
+ else:
8906
+ # Default-first: Medium for non-GPT models; do not surface "Low".
8907
+ verbosity_choices = [
8908
+ ("medium", "Medium – balanced detail (recommended)"),
8909
+ ("high", "High – more verbose explanations"),
8910
+ ]
8911
+
8912
+ verb_choice = await self._menu_choice(
8913
+ "Text verbosity",
8914
+ "How verbose should responses be by default?",
8915
+ verbosity_choices,
8916
+ )
8282
8917
  if verb_choice in ("low", "medium", "high"):
8283
8918
  self.text_verbosity = verb_choice
8284
8919
  else:
8285
8920
  self.text_verbosity = curv or "medium"
8286
8921
 
8287
- # --- 7) Tool preambles (menu) ---
8288
- preamble_choice = await self._menu_choice(
8289
- "Tool call preambles",
8290
- "Before using tools, the agent can briefly explain what it will do and why (supported models only).",
8291
- [
8292
- ("on", "Enable preambles"),
8293
- ("off", "Disable preambles (default)"),
8294
- ],
8295
- )
8296
- self.preambles_enabled = preamble_choice == "on"
8922
+ # --- 7) Tool preambles (GPT-5 only) ---
8923
+ if self._supports_preambles(self.model):
8924
+ preamble_choice = await self._menu_choice(
8925
+ "Tool call preambles",
8926
+ "Before using tools, the agent can briefly explain what it will do and why.",
8927
+ [
8928
+ ("off", "Disable preambles (default)"),
8929
+ ("on", "Enable preambles"),
8930
+ ],
8931
+ )
8932
+ self.preambles_enabled = preamble_choice == "on"
8933
+ else:
8934
+ # Never enable preambles on unsupported models.
8935
+ self.preambles_enabled = False
8297
8936
 
8298
8937
  # --- 8) Optional custom first-turn note (menu + text only when chosen) ---
8299
8938
  custom_choice = await self._menu_choice(
@@ -8367,7 +9006,8 @@ class ChatCLI:
8367
9006
  text = m.get("content", "")
8368
9007
  contents.append({"role": role, "parts": [{"text": text}]})
8369
9008
  # Pick a Gemini model for counting; fall back if current isn't Gemini
8370
- count_model = "gemini-2.5-pro"
9009
+ # (gemini-2.5-pro removed from curated lists)
9010
+ count_model = "gemini-3-flash-preview"
8371
9011
  res = client.models.count_tokens(model=count_model, contents=contents)
8372
9012
  t = int(getattr(res, "total_tokens", 0) or 0)
8373
9013
  if t > 0:
@@ -8425,16 +9065,16 @@ class ChatCLI:
8425
9065
  blocks.append(txt.strip())
8426
9066
  except Exception:
8427
9067
  pass
8428
- # Tool preamble
8429
- try:
8430
- if bool(getattr(self, "preambles_enabled", False)):
8431
- blocks.append(
8432
- "Tool usage: when you need to read or modify files or run commands, "
8433
- "explicitly explain why you're using a tool, what you'll do, and how it "
8434
- "advances the user's goal before calling the tool."
8435
- )
8436
- except Exception:
8437
- pass
9068
+ # Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
9069
+ try:
9070
+ if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
9071
+ blocks.append(
9072
+ "Tool usage: when you need to read or modify files or run commands, "
9073
+ "explicitly explain why you're using a tool, what you'll do, and how it "
9074
+ "advances the user's goal before calling the tool."
9075
+ )
9076
+ except Exception:
9077
+ pass
8438
9078
  # Working memory preview (does not touch _did_inject_working_memory or paths)
8439
9079
  try:
8440
9080
  if self._memory_paths_for_first_turn: