henosis-cli 0.6.8__py3-none-any.whl → 0.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli.py CHANGED
@@ -6,11 +6,12 @@
6
6
  # - Preserves previous behavior and settings
7
7
  # - Injects CODEBASE_MAP.md into the first user message (wrapped in <codebase_map>) without manual trimming.
8
8
 
9
- import argparse
10
- import asyncio
11
- import json
12
- import os
13
- import sys
9
+ import argparse
10
+ import asyncio
11
+ import copy
12
+ import json
13
+ import os
14
+ import sys
14
15
  import socket
15
16
  import shutil
16
17
  from pathlib import Path
@@ -684,7 +685,7 @@ class UI:
684
685
  print(f"{n:<40} {ty:<8} {sz}")
685
686
 
686
687
  class ChatCLI:
687
- def __init__(
688
+ def __init__(
688
689
  self,
689
690
  server: str,
690
691
  model: Optional[str],
@@ -1001,6 +1002,21 @@ class ChatCLI:
1001
1002
  self._kimi_raw_history: List[Dict[str, Any]] = []
1002
1003
  # Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
1003
1004
  self._gemini_raw_history: List[Dict[str, Any]] = []
1005
+ # OpenAI Responses API threading: retain previous response id across turns
1006
+ self._openai_previous_response_id: Optional[str] = None
1007
+ # OpenAI Responses API threading: retain the full chain of response ids across turns
1008
+ # (server will also echo per-turn ids in message.completed.openai_response_ids)
1009
+ self._openai_response_id_history: List[str] = []
1010
+
1011
+ # OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
1012
+ # including reasoning items, function_call items, and function_call_output items.
1013
+ self._openai_input_items: List[Dict[str, Any]] = []
1014
+ # For robustness, remember exactly what we sent as openai_input_items for the current turn
1015
+ # so we can append server-provided openai_delta_items deterministically.
1016
+ self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
1017
+ # Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
1018
+ # Shape: {session_id, call_id, job_token, name}
1019
+ self._inflight_dispatch: Optional[Dict[str, Any]] = None
1004
1020
  # Last server billing info from /api/usage/commit
1005
1021
  self._last_commit_cost_usd: float = 0.0
1006
1022
  self._last_remaining_credits: Optional[float] = None
@@ -1051,8 +1067,14 @@ class ChatCLI:
1051
1067
  self._thinking_indicator_enabled = True
1052
1068
  except Exception:
1053
1069
  self._thinking_indicator_enabled = True
1054
- # Track Ctrl+C timing for double-press-to-exit behavior
1055
- self._last_interrupt_ts: Optional[float] = None
1070
+ # Track Ctrl+C timing for double-press-to-exit behavior
1071
+ self._last_interrupt_ts: Optional[float] = None
1072
+
1073
+ # Ctrl+C during a running stream should not kill the entire CLI.
1074
+ # Instead, we cancel the in-flight turn and reopen the last user query for editing.
1075
+ # NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
1076
+ self._pending_user_edit: Optional[str] = None
1077
+ self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
1056
1078
 
1057
1079
  # Timers: session-level and per-turn wall-clock timers
1058
1080
  self._session_started_at: Optional[float] = None # time.perf_counter() at session start
@@ -1093,7 +1115,7 @@ class ChatCLI:
1093
1115
  self._pt_session = None
1094
1116
 
1095
1117
  # ----------------------- Provider heuristics -----------------------
1096
- def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
1118
+ def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
1097
1119
  """Return True when the model is an OpenAI reasoning-capable model.
1098
1120
  Mirrors server-side heuristic: prefixes 'gpt-5' or 'o4'.
1099
1121
  """
@@ -1344,6 +1366,9 @@ class ChatCLI:
1344
1366
  return {
1345
1367
  # OpenAI
1346
1368
  "gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
1369
+ # New: gpt-5.2-codex
1370
+ # Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
1371
+ "gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
1347
1372
  # From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
1348
1373
  "gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
1349
1374
  "gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
@@ -1361,8 +1386,7 @@ class ChatCLI:
1361
1386
  # New Opus 4.5 (provider base $5/$25 with 1.4x margin -> $7.00/$35.00)
1362
1387
  "claude-opus-4-5-20251101": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
1363
1388
  "claude-opus-4-5-20251101-thinking": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
1364
- # Gemini
1365
- "gemini-2.5-pro": {"input": 1.75, "output": 14.00, "provider": "gemini"},
1389
+ # Gemini
1366
1390
  # Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
1367
1391
  "gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
1368
1392
  # Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
@@ -1377,15 +1401,15 @@ class ChatCLI:
1377
1401
  # DeepSeek V3.2 (+$0.25 per 1M margin)
1378
1402
  "deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1379
1403
  "deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1380
- "deepseek-3.2-speciale": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
1404
+ # Removed: deepseek speciale (not supported)
1381
1405
  # Kimi
1382
1406
  "kimi-k2-0905-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1383
1407
  "kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1384
1408
  "kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
1385
- # GLM (Z.AI)
1386
- # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
1387
- "glm-4.6": {"input": 0.84, "output": 3.08, "provider": "glm"},
1388
- }
1409
+ # GLM (Z.AI)
1410
+ # Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
1411
+ "glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
1412
+ }
1389
1413
 
1390
1414
  def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
1391
1415
  if not model:
@@ -1433,6 +1457,15 @@ class ChatCLI:
1433
1457
  except Exception:
1434
1458
  model_name = ""
1435
1459
  try:
1460
+ # Provider-native state resets when switching away from OpenAI.
1461
+ try:
1462
+ if self.model and (not self._is_openai_model(self.model)):
1463
+ self._openai_previous_response_id = None
1464
+ self._openai_response_id_history = []
1465
+ self._openai_input_items = []
1466
+ self._openai_last_sent_input_items = None
1467
+ except Exception:
1468
+ pass
1436
1469
  if model_name in {"gpt-5.2-pro"}:
1437
1470
  # Default these to high, but don't clobber a user-chosen xhigh.
1438
1471
  if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
@@ -1440,11 +1473,138 @@ class ChatCLI:
1440
1473
  # Codex family: disable preambles for better behavior
1441
1474
  if "codex" in model_name:
1442
1475
  self.preambles_enabled = False
1476
+ # Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
1477
+ # Force-disable for all other models (even if a saved setting had it enabled).
1478
+ if not self._supports_preambles(self.model):
1479
+ self.preambles_enabled = False
1443
1480
  except Exception:
1444
1481
  try:
1445
1482
  self.reasoning_effort = "high"
1446
1483
  except Exception:
1447
1484
  pass
1485
+
1486
+ def _supports_preambles(self, model: Optional[str]) -> bool:
1487
+ """Tool-call preambles are a CLI-only UX hint.
1488
+
1489
+ Requirement: disabled for all models except GPT-5 (base model; non-Codex).
1490
+ In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
1491
+ """
1492
+ try:
1493
+ if not model:
1494
+ return False
1495
+ m = str(model).strip().lower()
1496
+ # Only the base GPT-5 line supports this UX toggle.
1497
+ # Allow:
1498
+ # - "gpt-5"
1499
+ # - date-pinned variants like "gpt-5-2025-08-07"
1500
+ # Disallow:
1501
+ # - versioned families like "gpt-5.1*" / "gpt-5.2*"
1502
+ if not (m == "gpt-5" or m.startswith("gpt-5-")):
1503
+ return False
1504
+ if "codex" in m:
1505
+ return False
1506
+ return True
1507
+ except Exception:
1508
+ return False
1509
+
1510
+ def _is_openai_model(self, model: Optional[str]) -> bool:
1511
+ """Best-effort model/provider discriminator for client-side state.
1512
+
1513
+ The server is multi-provider. For the CLI we treat anything that isn't an explicit
1514
+ non-OpenAI provider prefix as OpenAI.
1515
+ """
1516
+ try:
1517
+ if not model:
1518
+ return False
1519
+ m = str(model).strip().lower()
1520
+ if not m:
1521
+ return False
1522
+ for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
1523
+ if m.startswith(pfx):
1524
+ return False
1525
+ # Everything else defaults to OpenAI in this repo.
1526
+ return True
1527
+ except Exception:
1528
+ return False
1529
+
1530
+ def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
1531
+ """Whether this provider has an implemented native tool/thinking retention path."""
1532
+ try:
1533
+ if not model:
1534
+ return False
1535
+ m = str(model).strip().lower()
1536
+ if m.startswith("gemini-"):
1537
+ return True
1538
+ if m.startswith("kimi-"):
1539
+ return bool(getattr(self, "retain_native_tool_results", False))
1540
+ if self._is_openai_model(model):
1541
+ return True
1542
+ return False
1543
+ except Exception:
1544
+ return False
1545
+
1546
+ def _sanitize_openai_items(self, items: Any) -> Any:
1547
+ """Recursively strip fields from OpenAI output items that cause errors when used as input."""
1548
+ if isinstance(items, list):
1549
+ return [self._sanitize_openai_items(x) for x in items]
1550
+ if isinstance(items, dict):
1551
+ # 'status' is the main offender causing 400s
1552
+ bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
1553
+ return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
1554
+ return items
1555
+
1556
+ async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
1557
+ """If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
1558
+
1559
+ This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
1560
+ Best-effort; never raises.
1561
+ """
1562
+ ctx = None
1563
+ try:
1564
+ ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
1565
+ except Exception:
1566
+ ctx = None
1567
+ if not ctx:
1568
+ return
1569
+ session_id = ctx.get("session_id")
1570
+ call_id = ctx.get("call_id")
1571
+ job_token = ctx.get("job_token")
1572
+ name = ctx.get("name")
1573
+ if not (session_id and call_id and job_token):
1574
+ return
1575
+ payload_cb = {
1576
+ "session_id": session_id,
1577
+ "call_id": call_id,
1578
+ "name": name,
1579
+ "job_token": job_token,
1580
+ "result": {
1581
+ "ok": False,
1582
+ "cancelled": True,
1583
+ "error": str(reason or "cancelled"),
1584
+ },
1585
+ }
1586
+ try:
1587
+ # Keep it short; we just want to unblock the server.
1588
+ http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
1589
+ except Exception:
1590
+ http_timeout = None
1591
+ try:
1592
+ async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
1593
+ await client.post(self.tools_callback_url, json=payload_cb)
1594
+ except Exception:
1595
+ pass
1596
+ finally:
1597
+ try:
1598
+ self._inflight_dispatch = None
1599
+ except Exception:
1600
+ pass
1601
+
1602
+ def _is_gpt_model(self, model: Optional[str]) -> bool:
1603
+ """True for OpenAI GPT models (used for showing certain UI-only toggles)."""
1604
+ try:
1605
+ return bool(model) and str(model).strip().lower().startswith("gpt-")
1606
+ except Exception:
1607
+ return False
1448
1608
  def _is_codex_model(self, model: Optional[str]) -> bool:
1449
1609
  try:
1450
1610
  return bool(model) and ("codex" in str(model).lower())
@@ -2120,6 +2280,14 @@ class ChatCLI:
2120
2280
  self.history = []
2121
2281
  if self.system_prompt:
2122
2282
  self.history.append({"role": "system", "content": self.system_prompt})
2283
+ # OpenAI threaded state is invalid once the system prompt changes.
2284
+ try:
2285
+ self._openai_previous_response_id = None
2286
+ self._openai_response_id_history = []
2287
+ self._openai_input_items = []
2288
+ self._openai_last_sent_input_items = None
2289
+ except Exception:
2290
+ pass
2123
2291
  # On settings load, do not assume the custom first-turn was injected yet
2124
2292
  try:
2125
2293
  self._did_inject_custom_first_turn = False
@@ -2507,30 +2675,28 @@ class ChatCLI:
2507
2675
 
2508
2676
  def _model_presets(self) -> List[Tuple[str, str]]:
2509
2677
  """Shared list of (model, label) used by settings UI and /model menu."""
2678
+ # Ordered in "feelings" order (Recommended first, then Others).
2679
+ # NOTE: We intentionally do not include a "server default" or "custom" option here.
2510
2680
  return [
2681
+ # Recommended
2511
2682
  ("gpt-5.2", "OpenAI: gpt-5.2"),
2512
- ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
2683
+ ("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
2513
2684
  ("gpt-5", "OpenAI: gpt-5"),
2685
+ ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
2686
+ ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
2687
+ ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
2688
+ ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
2689
+ ("grok-code-fast-1", "xAI: grok-code-fast-1"),
2690
+
2691
+ # Others
2692
+ ("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
2514
2693
  ("gpt-5-codex", "OpenAI: gpt-5-codex"),
2515
2694
  ("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
2516
- ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
2517
2695
  ("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
2518
- ("deepseek-3.2-speciale", "DeepSeek: deepseek 3.2 Speciale (no tools)"),
2519
- ("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
2520
- ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
2521
- ("gemini-2.5-pro", "Gemini: gemini-2.5-pro"),
2522
- ("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
2523
- ("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
2524
- ("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
2525
- ("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
2526
- ("grok-4", "xAI: grok-4"),
2527
- ("grok-code-fast-1", "xAI: grok-code-fast-1"),
2528
- ("claude-sonnet-4-5-20250929", "Anthropic: claude-sonnet-4-5-20250929 (thinking OFF)"),
2529
- ("claude-sonnet-4-5-20250929-thinking", "Anthropic: claude-sonnet-4-5-20250929 (thinking ON)"),
2530
- ("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
2531
- ("claude-opus-4-5-20251101-thinking", "Anthropic: claude-opus-4-5-20251101 (thinking ON)"),
2532
- ("glm-4.6", "GLM: glm-4.6"),
2533
- ]
2696
+ ("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
2697
+ ("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
2698
+ ("glm-4.7", "GLM: glm-4.7"),
2699
+ ]
2534
2700
 
2535
2701
  async def open_settings(self, focus: Optional[str] = None) -> None:
2536
2702
  """Open the new dependency-free settings UI. Falls back to legacy only when
@@ -2584,37 +2750,33 @@ class ChatCLI:
2584
2750
  }
2585
2751
  initial = self._collect_settings_dict()
2586
2752
 
2587
- # Model presets list (shared)
2588
- model_presets: List[Tuple[str, str]] = self._model_presets()
2589
- # Reorder with a Recommended section at the top. Avoid decorative symbols; instead,
2590
- # annotate recommended models with plain text for clarity.
2591
- # Recommended set per request: opus 4-5 (no thinking), gemini 3, gpt 5, kimi k2 thinking,
2592
- # grok code fast 1, and deepseek reasoner 3.2
2593
- rec_keys = {
2594
- "deepseek-reasoner-3.2",
2595
- "claude-opus-4-5-20251101",
2753
+ # Model presets list (shared)
2754
+ model_presets: List[Tuple[str, str]] = self._model_presets()
2755
+
2756
+ # Reorder with a Recommended section at the top.
2757
+ # IMPORTANT: remove "server default" and "custom" from Settings UI.
2758
+ rec_keys_ordered = [
2759
+ "gpt-5.2",
2760
+ "gpt-5.2-codex",
2761
+ "gpt-5",
2596
2762
  "gemini-3-pro-preview",
2597
2763
  "gemini-3-flash-preview",
2598
- "gpt-5",
2599
- "gpt-5.2",
2764
+ "claude-opus-4-5-20251101",
2600
2765
  "kimi-k2-thinking",
2601
2766
  "grok-code-fast-1",
2602
- }
2603
- rec_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m in rec_keys]
2604
- other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_keys]
2605
- # Build enum options in the order: Server default, Recommended, Others, Custom
2606
- model_enum_options: List[Optional[str]] = [None] + [m for (m, _l) in rec_list] + [m for (m, _l) in other_list] + ["custom"]
2607
- # Build render map without any star/marker characters; use a simple "(recommended)" suffix
2608
- # for recommended models EXCEPT DeepSeek Reasoner 3.2, which should not display the suffix.
2609
- render_map: Dict[Any, str] = {None: "Server default"}
2767
+ ]
2768
+ rec_set = set(rec_keys_ordered)
2769
+ preset_map = {m: lbl for (m, lbl) in model_presets}
2770
+ rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
2771
+ other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
2772
+
2773
+ # Build enum options in the order: Recommended, Others
2774
+ model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
2775
+ render_map: Dict[Any, str] = {}
2610
2776
  for m, lbl in rec_list:
2611
- if m == "deepseek-reasoner-3.2":
2612
- render_map[m] = lbl
2613
- else:
2614
- render_map[m] = f"{lbl} (recommended)"
2615
- for m, lbl in other_list:
2616
- render_map[m] = lbl
2617
- render_map["custom"] = "Custom..."
2777
+ render_map[m] = lbl
2778
+ for m, lbl in other_list:
2779
+ render_map[m] = lbl
2618
2780
 
2619
2781
  # Build items schema
2620
2782
  items: List[Dict[str, Any]] = [
@@ -2641,14 +2803,16 @@ class ChatCLI:
2641
2803
  "id": "requested_tools",
2642
2804
  "label": "Tools",
2643
2805
  "type": "enum",
2644
- "options": [None, True, False],
2806
+ # Default-first: ON, then OFF, then server default.
2807
+ "options": [True, False, None],
2645
2808
  "render": {None: "Server default", True: "ON", False: "OFF"},
2646
2809
  },
2647
2810
  {
2648
2811
  "id": "control_level",
2649
2812
  "label": "Control level",
2650
2813
  "type": "enum",
2651
- "options": [None, 1, 2, 3],
2814
+ # Default-first: Level 3, then 2, then 1, then server default.
2815
+ "options": [3, 2, 1, None],
2652
2816
  "render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
2653
2817
  },
2654
2818
  {"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
@@ -2662,35 +2826,44 @@ class ChatCLI:
2662
2826
  {"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
2663
2827
  # Agent scope & filesystem controls
2664
2828
  {"id": "host_base", "label": "Agent scope directory", "type": "text"},
2665
- {
2666
- "id": "fs_scope",
2667
- "label": "Filesystem scope",
2668
- "type": "enum",
2669
- "options": [None, "workspace", "host"],
2670
- "render": {
2671
- None: "Server default",
2672
- "workspace": "Workspace (sandbox)",
2673
- "host": "Host (Agent scope)",
2674
- },
2675
- },
2676
- {
2677
- "id": "fs_host_mode",
2678
- "label": "Host mode",
2679
- "type": "enum",
2680
- "options": [None, "any", "cwd", "custom"],
2681
- "render": {
2682
- None: "Server default / any",
2683
- "any": "any (no extra client restriction)",
2684
- "cwd": "Current working directory",
2685
- "custom": "Custom (use Agent scope)",
2686
- },
2687
- },
2829
+ {
2830
+ "id": "fs_scope",
2831
+ "label": "Filesystem scope",
2832
+ "type": "enum",
2833
+ # Default-first: host (Agent scope), then workspace, then server default.
2834
+ "options": ["host", "workspace", None],
2835
+ "render": {
2836
+ None: "Server default",
2837
+ "workspace": "Workspace (sandbox)",
2838
+ "host": "Host (Agent scope)",
2839
+ },
2840
+ },
2841
+ {
2842
+ "id": "fs_host_mode",
2843
+ "label": "Host mode",
2844
+ "type": "enum",
2845
+ # Default-first: custom (use Agent scope), then cwd, then any, then server default.
2846
+ "options": ["custom", "cwd", "any", None],
2847
+ "render": {
2848
+ None: "Server default / any",
2849
+ "any": "any (no extra client restriction)",
2850
+ "cwd": "Current working directory",
2851
+ "custom": "Custom (use Agent scope)",
2852
+ },
2853
+ },
2688
2854
  ]},
2689
2855
  {"label": "Code Map", "type": "group", "items": [
2690
2856
  {"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
2691
2857
  ]},
2692
2858
  {"label": "Preambles & First-turn", "type": "group", "items": [
2693
- {"id": "preambles_enabled", "label": "Enable tool call preambles (supported models only)", "type": "bool"},
2859
+ {
2860
+ "id": "preambles_enabled",
2861
+ "label": "Enable tool call preambles (GPT-5 only)",
2862
+ "type": "bool",
2863
+ # Only show this control when the *currently selected* model supports it.
2864
+ # (This updates live as the Model picker changes.)
2865
+ "visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
2866
+ },
2694
2867
  {"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
2695
2868
  {"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
2696
2869
  {"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
@@ -2703,8 +2876,22 @@ class ChatCLI:
2703
2876
  ]},
2704
2877
  ]
2705
2878
 
2706
- # Prepare initial values with enum placeholder for model when custom text set
2707
- init_for_ui = dict(initial)
2879
+ # Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
2880
+ try:
2881
+ if not self._is_gpt_model(self.model):
2882
+ for g in items:
2883
+ if not isinstance(g, dict):
2884
+ continue
2885
+ if (g.get("type") == "group") and (g.get("label") == "General"):
2886
+ for row in (g.get("items") or []):
2887
+ if isinstance(row, dict) and row.get("id") == "text_verbosity":
2888
+ row["options"] = ["medium", "high"]
2889
+ row["render"] = {"medium": "Medium", "high": "High"}
2890
+ except Exception:
2891
+ pass
2892
+
2893
+ # Prepare initial values with enum placeholder for model when custom text set
2894
+ init_for_ui = dict(initial)
2708
2895
  if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
2709
2896
  # Represent as 'custom' for cycling, but keep original model in working copy for edit with 'e'
2710
2897
  pass # We'll keep exact model string; enum will show the raw value when not matched
@@ -3052,17 +3239,17 @@ class ChatCLI:
3052
3239
  self.ui.success(f"FS Scope set to: {self._fs_label()}")
3053
3240
  self.save_settings()
3054
3241
 
3055
- async def set_level_menu(self) -> None:
3056
- val = await self._menu_choice(
3057
- "Control Level",
3058
- "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
3059
- [
3060
- ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
3061
- ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
3062
- ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
3063
- ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
3064
- ],
3065
- )
3242
+ async def set_level_menu(self) -> None:
3243
+ val = await self._menu_choice(
3244
+ "Control Level",
3245
+ "Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
3246
+ [
3247
+ ("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
3248
+ ("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
3249
+ ("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
3250
+ ("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
3251
+ ],
3252
+ )
3066
3253
  if val == "default":
3067
3254
  self.control_level = None
3068
3255
  elif val in ("1", "2", "3"):
@@ -3142,16 +3329,16 @@ class ChatCLI:
3142
3329
  except Exception:
3143
3330
  pass
3144
3331
 
3145
- # 3) Tool usage preamble (UX hint)
3332
+ # 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
3146
3333
  try:
3147
- if bool(getattr(self, "preambles_enabled", False)) and not self._is_codex_model(self.model):
3334
+ if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
3148
3335
  blocks.append(
3149
3336
  "Tool usage: when you need to read or modify files or run commands, "
3150
3337
  "explicitly explain why you're using a tool, what you'll do, and how it "
3151
3338
  "advances the user's goal before calling the tool."
3152
3339
  )
3153
- except Exception:
3154
- pass
3340
+ except Exception:
3341
+ pass
3155
3342
 
3156
3343
  # 4) Working memory (context-summary file), injected once on fresh session restart
3157
3344
  try:
@@ -4615,21 +4802,29 @@ class ChatCLI:
4615
4802
  self.save_settings()
4616
4803
  return True
4617
4804
 
4618
- if cmd.startswith("/system "):
4619
- self.system_prompt = cmd[len("/system ") :].strip()
4620
- self.history = []
4621
- if self.system_prompt:
4622
- self.history.append({"role": "system", "content": self.system_prompt})
4623
- # Treat as a fresh session; allow map re-injection
4624
- self._did_inject_codebase_map = False
4805
+ if cmd.startswith("/system "):
4806
+ self.system_prompt = cmd[len("/system ") :].strip()
4807
+ self.history = []
4808
+ if self.system_prompt:
4809
+ self.history.append({"role": "system", "content": self.system_prompt})
4810
+ # Treat as a fresh session; allow map re-injection
4811
+ self._did_inject_codebase_map = False
4625
4812
  # Also allow custom first-turn injection again
4626
4813
  try:
4627
4814
  self._did_inject_custom_first_turn = False
4628
4815
  except Exception:
4629
4816
  pass
4630
- self.ui.success("System prompt set.")
4631
- self.save_settings()
4632
- return True
4817
+ self.ui.success("System prompt set.")
4818
+ # OpenAI threaded state is invalid once the system prompt changes.
4819
+ try:
4820
+ self._openai_previous_response_id = None
4821
+ self._openai_response_id_history = []
4822
+ self._openai_input_items = []
4823
+ self._openai_last_sent_input_items = None
4824
+ except Exception:
4825
+ pass
4826
+ self.save_settings()
4827
+ return True
4633
4828
 
4634
4829
  if cmd.startswith("/title "):
4635
4830
  new_title = cmd[len("/title ") :].strip()
@@ -4648,13 +4843,17 @@ class ChatCLI:
4648
4843
  self._did_inject_custom_first_turn = False
4649
4844
  except Exception:
4650
4845
  pass
4651
- # Reset provider-native histories
4652
- try:
4846
+ # Reset provider-native histories
4847
+ try:
4653
4848
  self.messages_for_save = []
4654
4849
  if not self.save_chat_history:
4655
4850
  self.thread_uid = None
4656
4851
  self._kimi_raw_history = []
4657
4852
  self._gemini_raw_history = []
4853
+ self._openai_previous_response_id = None
4854
+ self._openai_response_id_history = []
4855
+ self._openai_input_items = []
4856
+ self._openai_last_sent_input_items = None
4658
4857
  except Exception:
4659
4858
  pass
4660
4859
  # Reset local cumulative token counters on session clear
@@ -4733,7 +4932,7 @@ class ChatCLI:
4733
4932
 
4734
4933
  # ---------------------------- Run loop ----------------------------
4735
4934
 
4736
- async def run(self) -> None:
4935
+ async def run(self) -> None:
4737
4936
  # Try persisted auth
4738
4937
  self._load_auth_state_from_disk()
4739
4938
 
@@ -4907,25 +5106,70 @@ class ChatCLI:
4907
5106
  self._session_started_at = None
4908
5107
  # Prepare completer for slash commands (if prompt_toolkit is available)
4909
5108
  pt_completer = self._commands_word_completer()
4910
- while True:
4911
- try:
4912
- if self._pt_session is not None:
4913
- # Use prompt_toolkit with inline completion when available
4914
- # Pass completer per-prompt to ensure latest catalog
4915
- user_input = await self._pt_session.prompt_async(
4916
- "You: ",
4917
- completer=pt_completer,
4918
- complete_while_typing=True,
4919
- )
4920
- user_input = user_input.strip()
4921
- elif self._input_engine:
4922
- # Do not add continuation prefixes on new lines
4923
- user_input = self._input_engine.read_message("You: ", "")
4924
- else:
4925
- user_input = self._read_multiline_input("You: ")
4926
- # Successful read resets interrupt window
4927
- self._last_interrupt_ts = None
4928
- except KeyboardInterrupt:
5109
+ while True:
5110
+ try:
5111
+ pending_edit = self._pending_user_edit
5112
+ edit_mode = pending_edit is not None
5113
+
5114
+ if self._pt_session is not None:
5115
+ # Use prompt_toolkit with inline completion when available
5116
+ # Pass completer per-prompt to ensure latest catalog
5117
+ try:
5118
+ # prompt_toolkit supports default= on modern versions; fall back gracefully.
5119
+ if edit_mode:
5120
+ user_input = await self._pt_session.prompt_async(
5121
+ "You (edit): ",
5122
+ completer=pt_completer,
5123
+ complete_while_typing=True,
5124
+ default=str(pending_edit),
5125
+ )
5126
+ else:
5127
+ user_input = await self._pt_session.prompt_async(
5128
+ "You: ",
5129
+ completer=pt_completer,
5130
+ complete_while_typing=True,
5131
+ )
5132
+ except TypeError:
5133
+ # Older prompt_toolkit: no default= support
5134
+ user_input = await self._pt_session.prompt_async(
5135
+ "You: ",
5136
+ completer=pt_completer,
5137
+ complete_while_typing=True,
5138
+ )
5139
+ user_input = user_input.strip()
5140
+ elif self._input_engine:
5141
+ if edit_mode:
5142
+ # The low-level input engine currently doesn't support prefill.
5143
+ # Show the previous message and let the user paste a replacement.
5144
+ try:
5145
+ self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
5146
+ self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
5147
+ except Exception:
5148
+ pass
5149
+ new_txt = self._read_multiline_input("Edit> ")
5150
+ user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
5151
+ else:
5152
+ # Do not add continuation prefixes on new lines
5153
+ user_input = self._input_engine.read_message("You: ", "")
5154
+ else:
5155
+ if edit_mode:
5156
+ try:
5157
+ self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
5158
+ self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
5159
+ except Exception:
5160
+ pass
5161
+ new_txt = self._read_multiline_input("Edit> ")
5162
+ user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
5163
+ else:
5164
+ user_input = self._read_multiline_input("You: ")
5165
+
5166
+ # Clear pending edit state after we successfully collected input.
5167
+ if edit_mode:
5168
+ self._pending_user_edit = None
5169
+ self._pending_turn_snapshot = None
5170
+ # Successful read resets interrupt window
5171
+ self._last_interrupt_ts = None
5172
+ except KeyboardInterrupt:
4929
5173
  # First Ctrl+C: interrupt input and warn; second within window exits
4930
5174
  now = time.time()
4931
5175
  try:
@@ -4944,8 +5188,8 @@ class ChatCLI:
4944
5188
  self.ui.print("Goodbye.")
4945
5189
  return
4946
5190
 
4947
- if not user_input:
4948
- continue
5191
+ if not user_input:
5192
+ continue
4949
5193
 
4950
5194
  # Command palette if bare '/'
4951
5195
  if user_input == "/":
@@ -4968,12 +5212,35 @@ class ChatCLI:
4968
5212
  if handled:
4969
5213
  continue
4970
5214
 
4971
- try:
4972
- # Record user message for local/server save
4973
- if self.save_chat_history:
4974
- self.messages_for_save.append({
4975
- "role": "user",
4976
- "content": user_input,
5215
+ try:
5216
+ # Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
5217
+ # This is critical for first-turn injections (code map/custom note/working memory)
5218
+ # which are applied by mutating flags during payload construction.
5219
+ self._pending_turn_snapshot = {
5220
+ "history": copy.deepcopy(self.history),
5221
+ "messages_for_save": copy.deepcopy(self.messages_for_save),
5222
+ "kimi_raw": copy.deepcopy(self._kimi_raw_history),
5223
+ "gemini_raw": copy.deepcopy(self._gemini_raw_history),
5224
+ "openai_prev": getattr(self, "_openai_previous_response_id", None),
5225
+ "openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
5226
+ "openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
5227
+ "openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
5228
+ "inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
5229
+ "did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
5230
+ "did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
5231
+ "did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
5232
+ "memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
5233
+ "last_built_user_content": getattr(self, "_last_built_user_content", None),
5234
+ }
5235
+
5236
+ # Clear any stale in-flight dispatch context at turn start.
5237
+ self._inflight_dispatch = None
5238
+
5239
+ # Record user message for local/server save
5240
+ if self.save_chat_history:
5241
+ self.messages_for_save.append({
5242
+ "role": "user",
5243
+ "content": user_input,
4977
5244
  "model": None,
4978
5245
  "citations": None,
4979
5246
  "last_turn_input_tokens": 0,
@@ -4987,25 +5254,86 @@ class ChatCLI:
4987
5254
  if self._busy:
4988
5255
  self.ui.warn("Agent is busy with another turn. Please wait...")
4989
5256
  continue
4990
- self._busy = True
4991
- try:
4992
- assistant_text = await self._stream_once(user_input)
4993
- finally:
4994
- self._busy = False
4995
- except httpx.HTTPStatusError as he:
4996
- try:
4997
- if he.response is not None:
4998
- await he.response.aread()
4999
- body = he.response.text
5257
+ self._busy = True
5258
+ try:
5259
+ assistant_text = await self._stream_once(user_input)
5260
+ finally:
5261
+ self._busy = False
5262
+ except KeyboardInterrupt:
5263
+ # Ctrl+C mid-stream / mid-tool: do not exit the CLI.
5264
+ # Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
5265
+ try:
5266
+ await self._cancel_inflight_dispatch()
5267
+ except (Exception, BaseException):
5268
+ pass
5269
+
5270
+ # Restore state to *before* this turn started.
5271
+ try:
5272
+ snap = self._pending_turn_snapshot or {}
5273
+ if isinstance(snap.get("history"), list):
5274
+ self.history = snap.get("history")
5275
+ if isinstance(snap.get("messages_for_save"), list):
5276
+ self.messages_for_save = snap.get("messages_for_save")
5277
+ if isinstance(snap.get("kimi_raw"), list):
5278
+ self._kimi_raw_history = snap.get("kimi_raw")
5279
+ if isinstance(snap.get("gemini_raw"), list):
5280
+ self._gemini_raw_history = snap.get("gemini_raw")
5281
+ if "openai_prev" in snap:
5282
+ self._openai_previous_response_id = snap.get("openai_prev")
5283
+ if isinstance(snap.get("openai_ids"), list):
5284
+ self._openai_response_id_history = snap.get("openai_ids")
5285
+ if isinstance(snap.get("openai_input_items"), list):
5286
+ self._openai_input_items = snap.get("openai_input_items")
5287
+ if "openai_last_sent_input_items" in snap:
5288
+ self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
5289
+ if "inflight_dispatch" in snap:
5290
+ self._inflight_dispatch = snap.get("inflight_dispatch")
5291
+ if "did_inject_codebase_map" in snap:
5292
+ self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
5293
+ if "did_inject_custom_first_turn" in snap:
5294
+ self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
5295
+ if "did_inject_working_memory" in snap:
5296
+ self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
5297
+ if "memory_paths_for_first_turn" in snap:
5298
+ self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
5299
+ self._last_built_user_content = snap.get("last_built_user_content")
5300
+ except Exception:
5301
+ pass
5302
+
5303
+ # Clear any transient indicator line and land on a fresh prompt line.
5304
+ try:
5305
+ sys.stdout.write("\r\x1b[2K\n")
5306
+ sys.stdout.flush()
5307
+ except Exception:
5308
+ try:
5309
+ self.ui.print()
5310
+ except Exception:
5311
+ pass
5312
+
5313
+ try:
5314
+ supports = self._provider_supports_native_retention(self.model)
5315
+ except Exception:
5316
+ supports = False
5317
+ if supports:
5318
+ self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
5319
+ else:
5320
+ self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
5321
+ self._pending_user_edit = user_input
5322
+ continue
5323
+ except httpx.HTTPStatusError as he:
5324
+ try:
5325
+ if he.response is not None:
5326
+ await he.response.aread()
5327
+ body = he.response.text
5000
5328
  else:
5001
5329
  body = ""
5002
5330
  except Exception:
5003
5331
  body = ""
5004
5332
  self.ui.error(f"[HTTP error] {he.response.status_code} {body}")
5005
5333
  continue
5006
- except Exception as e:
5007
- self.ui.error(f"[Client error] {e}")
5008
- continue
5334
+ except Exception as e:
5335
+ self.ui.error(f"[Client error] {e}")
5336
+ continue
5009
5337
 
5010
5338
  # Skip appending empty assistant messages to avoid 422 on next request
5011
5339
  if assistant_text.strip():
@@ -5031,7 +5359,7 @@ class ChatCLI:
5031
5359
  ("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
5032
5360
  ("set_auto_approve", f"⚙️ Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
5033
5361
  (auth_action_key, auth_action_label),
5034
- ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5, gemini-2.5-pro, grok-4, deepseek-chat) or use Change Model to type one"),
5362
+ ("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2-thinking, etc.)"),
5035
5363
  ("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
5036
5364
  ("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
5037
5365
  ("clear_history", "🧹 Clear History - Reset chat history"),
@@ -5067,15 +5395,14 @@ class ChatCLI:
5067
5395
  has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
5068
5396
  is_effectively_free = (self.is_free_tier and not has_credits)
5069
5397
 
5070
- # Recommended models (ordered list for shuffling)
5071
- # Curated list per request (include Codex Max as recommended)
5398
+ # Recommended models ("feelings" order)
5072
5399
  rec_keys = [
5073
- "deepseek-reasoner-3.2",
5074
- "claude-opus-4-5-20251101",
5400
+ "gpt-5.2",
5401
+ "gpt-5.2-codex",
5402
+ "gpt-5",
5075
5403
  "gemini-3-pro-preview",
5076
5404
  "gemini-3-flash-preview",
5077
- "gpt-5",
5078
- "gpt-5.2",
5405
+ "claude-opus-4-5-20251101",
5079
5406
  "kimi-k2-thinking",
5080
5407
  "grok-code-fast-1",
5081
5408
  ]
@@ -5117,8 +5444,7 @@ class ChatCLI:
5117
5444
  suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
5118
5445
  choices.append((m, f"{lbl}{suffix}"))
5119
5446
 
5120
- choices.append(("default", "Server Default (no override)"))
5121
- choices.append(("custom", "Custom (enter a model name)"))
5447
+ # Per issue list: do not surface "server default" or "custom" in this picker.
5122
5448
 
5123
5449
  # Render and select using the unified highlighted picker
5124
5450
  picked: Optional[str] = None
@@ -5133,27 +5459,15 @@ class ChatCLI:
5133
5459
  picked = str(val)
5134
5460
 
5135
5461
  # Enforce free tier restrictions
5136
- if picked not in ("default", "custom") and is_effectively_free and is_paid_model(picked):
5137
- self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
5138
- continue
5462
+ if is_effectively_free and is_paid_model(picked):
5463
+ self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
5464
+ continue
5139
5465
 
5140
5466
  break
5141
5467
 
5142
- # Apply selection
5143
- if picked == "default":
5144
- self.model = None
5145
- self.ui.info("Model cleared; server default will be used.")
5146
- elif picked == "custom":
5147
- typed = self.ui.prompt(
5148
- "Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
5149
- default=self.model or "",
5150
- )
5151
- self.model = self._resolve_model_alias(typed.strip() or None)
5152
- if not self.model:
5153
- self.ui.info("Model cleared; server default will be used.")
5154
- else:
5155
- self.model = picked
5156
- self.ui.success(f"Model set to: {self.model}")
5468
+ # Apply selection
5469
+ self.model = picked
5470
+ self.ui.success(f"Model set to: {self.model}")
5157
5471
 
5158
5472
  self._apply_model_side_effects()
5159
5473
  self.save_settings()
@@ -5242,7 +5556,7 @@ class ChatCLI:
5242
5556
  self.save_settings()
5243
5557
  return True
5244
5558
 
5245
- if choice == "set_system_prompt":
5559
+ if choice == "set_system_prompt":
5246
5560
  prompt = self.ui.prompt("Enter system prompt", default=self.system_prompt or "")
5247
5561
  self.system_prompt = prompt.strip()
5248
5562
  self.history = []
@@ -5254,14 +5568,19 @@ class ChatCLI:
5254
5568
  self._did_inject_custom_first_turn = False
5255
5569
  except Exception:
5256
5570
  pass
5257
- # Clear provider-native histories on system reset
5258
- try:
5571
+ # Clear provider-native histories on system reset
5572
+ try:
5259
5573
  self.messages_for_save = []
5260
5574
  if not self.save_chat_history:
5261
5575
  self.thread_uid = None
5262
- self._kimi_raw_history = []
5263
- except Exception:
5264
- pass
5576
+ self._kimi_raw_history = []
5577
+ self._gemini_raw_history = []
5578
+ self._openai_previous_response_id = None
5579
+ self._openai_response_id_history = []
5580
+ self._openai_input_items = []
5581
+ self._openai_last_sent_input_items = None
5582
+ except Exception:
5583
+ pass
5265
5584
  self.ui.success("System prompt set.")
5266
5585
  self.save_settings()
5267
5586
  return True
@@ -5279,6 +5598,8 @@ class ChatCLI:
5279
5598
  self.thread_uid = None
5280
5599
  self._kimi_raw_history = []
5281
5600
  self._gemini_raw_history = []
5601
+ self._openai_previous_response_id = None
5602
+ self._openai_response_id_history = []
5282
5603
  except Exception:
5283
5604
  pass
5284
5605
  # Reset local cumulative token counters on session clear
@@ -5333,11 +5654,82 @@ class ChatCLI:
5333
5654
  return True
5334
5655
 
5335
5656
  # ----------------------- SSE Streaming loop ------------------------
5336
- async def _stream_once(self, user_input: str) -> str:
5337
- # Build request payload
5338
- payload: Dict[str, Any] = {"messages": self._build_messages(user_input)}
5657
+ async def _stream_once(self, user_input: str) -> str:
5658
+ # Build request payload.
5659
+ # OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
5660
+ # `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
5661
+ if self._is_openai_model(self.model):
5662
+ msgs: List[Dict[str, str]] = []
5663
+ # Codex developer prompt (if enabled) + system prompt
5664
+ try:
5665
+ if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
5666
+ msgs.append({"role": "system", "content": self._codex_system_prompt()})
5667
+ except Exception:
5668
+ pass
5669
+ if self.system_prompt:
5670
+ msgs.append({"role": "system", "content": self.system_prompt})
5671
+
5672
+ # Apply first-turn-only injections to the current user content
5673
+ content = user_input
5674
+ prefix = self._build_first_turn_injection(user_input)
5675
+ if prefix:
5676
+ content = f"{prefix}\n\n{user_input}"
5677
+ try:
5678
+ self._last_built_user_content = content
5679
+ except Exception:
5680
+ self._last_built_user_content = user_input
5681
+ msgs.append({"role": "user", "content": content})
5682
+
5683
+ payload: Dict[str, Any] = {"messages": msgs}
5684
+
5685
+ # Build OpenAI native input items (authoritative for the server OpenAI path).
5686
+ try:
5687
+ if isinstance(self._openai_input_items, list) and self._openai_input_items:
5688
+ items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
5689
+ else:
5690
+ # Seed with system prompts for the first OpenAI turn.
5691
+ items = []
5692
+ try:
5693
+ if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
5694
+ items.append({"role": "system", "content": self._codex_system_prompt()})
5695
+ except Exception:
5696
+ pass
5697
+ if self.system_prompt:
5698
+ items.append({"role": "system", "content": self.system_prompt})
5699
+ items.append({"role": "user", "content": content})
5700
+ payload["openai_input_items"] = self._sanitize_openai_items(items)
5701
+ self._openai_last_sent_input_items = copy.deepcopy(items)
5702
+ except Exception:
5703
+ # If this fails for any reason, fall back to normal message-based history.
5704
+ self._openai_last_sent_input_items = None
5705
+
5706
+ # OpenAI Threading: DISABLED. We use full manual input item replay now.
5707
+ # if "openai_input_items" not in payload:
5708
+ # try:
5709
+ # if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
5710
+ # payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
5711
+ # except Exception:
5712
+ # pass
5713
+ try:
5714
+ if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
5715
+ payload["openai_response_id_history"] = list(self._openai_response_id_history)
5716
+ except Exception:
5717
+ pass
5718
+ else:
5719
+ payload = {"messages": self._build_messages(user_input)}
5339
5720
  if self.model:
5340
5721
  payload["model"] = self.model
5722
+ # OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
5723
+ try:
5724
+ if self._is_openai_model(self.model):
5725
+ if (
5726
+ isinstance(getattr(self, "_openai_response_id_history", None), list)
5727
+ and self._openai_response_id_history
5728
+ and "openai_response_id_history" not in payload
5729
+ ):
5730
+ payload["openai_response_id_history"] = list(self._openai_response_id_history)
5731
+ except Exception:
5732
+ pass
5341
5733
  # Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
5342
5734
  try:
5343
5735
  if self.terminal_id:
@@ -5421,10 +5813,12 @@ class ChatCLI:
5421
5813
  payload["text_verbosity"] = self.text_verbosity
5422
5814
  except Exception:
5423
5815
  pass
5424
- try:
5425
- payload["preambles_enabled"] = bool(self.preambles_enabled)
5426
- except Exception:
5427
- pass
5816
+ # Preambles are a GPT-5-only UX toggle.
5817
+ try:
5818
+ if self._supports_preambles(self.model):
5819
+ payload["preambles_enabled"] = bool(self.preambles_enabled)
5820
+ except Exception:
5821
+ pass
5428
5822
 
5429
5823
  if self.web_search_enabled:
5430
5824
  payload["enable_web_search"] = True
@@ -5646,6 +6040,51 @@ class ChatCLI:
5646
6040
  # Track whether we're currently positioned at the start of a fresh line.
5647
6041
  # This prevents double-newlines between back-to-back tool events.
5648
6042
  at_line_start = True
6043
+
6044
+ # --- Tool call in-place status (issuelist.md #7) ---
6045
+ # We render a single transient line for the current tool call (no trailing newline)
6046
+ # so the later tool.result SUCCESS/FAILURE line can replace it in-place.
6047
+ tool_status_active = False
6048
+ tool_status_call_id = None
6049
+
6050
+ def _tool_status_clear_line() -> None:
6051
+ """Clear the current line (best-effort) and return to column 0."""
6052
+ nonlocal at_line_start
6053
+ try:
6054
+ sys.stdout.write("\r\x1b[2K")
6055
+ sys.stdout.flush()
6056
+ except Exception:
6057
+ pass
6058
+ at_line_start = True
6059
+
6060
+ def _tool_status_show(call_id: Any, line: str) -> None:
6061
+ """Show the transient tool status line (no newline)."""
6062
+ nonlocal tool_status_active, tool_status_call_id, at_line_start
6063
+ if not self.show_tool_calls:
6064
+ return
6065
+ tool_status_active = True
6066
+ tool_status_call_id = str(call_id) if call_id is not None else None
6067
+ try:
6068
+ if not at_line_start:
6069
+ sys.stdout.write("\n")
6070
+ sys.stdout.write("\r\x1b[2K" + str(line))
6071
+ sys.stdout.flush()
6072
+ at_line_start = False
6073
+ except Exception:
6074
+ # Fallback: degrade to a normal printed line
6075
+ try:
6076
+ self.ui.print(str(line))
6077
+ except Exception:
6078
+ pass
6079
+ at_line_start = True
6080
+
6081
+ def _tool_status_stop() -> None:
6082
+ """Remove the transient tool status line and clear tracking."""
6083
+ nonlocal tool_status_active, tool_status_call_id
6084
+ if tool_status_active:
6085
+ _tool_status_clear_line()
6086
+ tool_status_active = False
6087
+ tool_status_call_id = None
5649
6088
  # Mode: animate or static (default static for stability)
5650
6089
  try:
5651
6090
  _animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
@@ -5949,16 +6388,40 @@ class ChatCLI:
5949
6388
  except Exception:
5950
6389
  pass
5951
6390
 
5952
- # Do NOT show the initial tool.call line per UX request; results will be
5953
- # rendered on tool.result. We still keep internal state and WS broadcasts.
5954
- # While the tool executes (server or client), show a subtle thinking
5955
- # indicator so users see progress during potentially long operations.
5956
- try:
5957
- # Do not start the indicator if we're in the middle of assistant token streaming
5958
- if (not streaming_assistant) and bool(getattr(self, "_thinking_indicator_enabled", False)):
5959
- await _indicator_start()
5960
- except Exception:
5961
- pass
6391
+ # issuelist.md #7:
6392
+ # Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
6393
+ try:
6394
+ # Clear any previous transient status line (shouldn't happen, but keep stable)
6395
+ _tool_status_stop()
6396
+ except Exception:
6397
+ pass
6398
+ try:
6399
+ tool_name = str(name or "").strip()
6400
+ label = self._tool_concise_label(
6401
+ tool_name,
6402
+ args if isinstance(args, dict) else {},
6403
+ None,
6404
+ )
6405
+ try:
6406
+ model_prefix = (
6407
+ self._current_turn.get("model")
6408
+ or self._last_used_model
6409
+ or self.model
6410
+ or "(server default)"
6411
+ )
6412
+ except Exception:
6413
+ model_prefix = self.model or "(server default)"
6414
+ ORANGE = "\x1b[38;5;214m"
6415
+ WHITE = "\x1b[97m"
6416
+ RESET = "\x1b[0m"
6417
+ status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
6418
+ _tool_status_show(call_id, status_line)
6419
+ except Exception:
6420
+ # Last-resort fallback: print something rather than crash streaming.
6421
+ try:
6422
+ self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
6423
+ except Exception:
6424
+ pass
5962
6425
  # Count tool calls
5963
6426
  try:
5964
6427
  tool_calls += 1
@@ -5981,10 +6444,15 @@ class ChatCLI:
5981
6444
  except Exception:
5982
6445
  pass
5983
6446
 
5984
- elif event == "approval.request":
5985
- # First reply wins (web or CLI)
5986
- await self._handle_approval_request(client, session_id, data)
5987
- continue
6447
+ elif event == "approval.request":
6448
+ # Don't let the transient [RUNNING] line collide with interactive prompts.
6449
+ try:
6450
+ _tool_status_stop()
6451
+ except Exception:
6452
+ pass
6453
+ # First reply wins (web or CLI)
6454
+ await self._handle_approval_request(client, session_id, data)
6455
+ continue
5988
6456
 
5989
6457
  elif event == "approval.result":
5990
6458
  appr = data.get("approved")
@@ -6026,10 +6494,22 @@ class ChatCLI:
6026
6494
  self.ui.info("Working memory created. Restarting conversation with a fresh first-turn injection...")
6027
6495
  return ""
6028
6496
 
6029
- elif event == "tool.result":
6030
- name = str(data.get("name"))
6031
- result = data.get("result", {}) or {}
6032
- call_id = data.get("call_id")
6497
+ elif event == "tool.result":
6498
+ name = str(data.get("name"))
6499
+ result = data.get("result", {}) or {}
6500
+ call_id = data.get("call_id")
6501
+ # If we previously rendered a transient [RUNNING] line for this tool call,
6502
+ # clear it now so the SUCCESS/FAILURE line prints in the same place.
6503
+ try:
6504
+ if tool_status_active:
6505
+ # Best-effort match on call_id (some providers may omit it).
6506
+ if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
6507
+ _tool_status_stop()
6508
+ except Exception:
6509
+ try:
6510
+ _tool_status_stop()
6511
+ except Exception:
6512
+ pass
6033
6513
  # Stop any indicator before rendering results
6034
6514
  try:
6035
6515
  await _indicator_stop(clear=True)
@@ -6137,7 +6617,7 @@ class ChatCLI:
6137
6617
  # Do not auto-restart the indicator here; wait for the next model event
6138
6618
 
6139
6619
  elif event == "tool.dispatch":
6140
- # Client-executed tool flow
6620
+ # Client-executed tool flow
6141
6621
  if not HAS_LOCAL_TOOLS:
6142
6622
  self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
6143
6623
  continue
@@ -6147,12 +6627,23 @@ class ChatCLI:
6147
6627
  # tool invocation on the corresponding 'tool.call' event. Counting
6148
6628
  # dispatch would double-count a single tool call.
6149
6629
 
6150
- session_id_d = data.get("session_id")
6151
- call_id = data.get("call_id")
6152
- name = data.get("name")
6153
- args = data.get("args", {}) or {}
6154
- job_token = data.get("job_token")
6155
- reqp = data.get("requested_policy", {}) or {}
6630
+ session_id_d = data.get("session_id")
6631
+ call_id = data.get("call_id")
6632
+ name = data.get("name")
6633
+ args = data.get("args", {}) or {}
6634
+ job_token = data.get("job_token")
6635
+ reqp = data.get("requested_policy", {}) or {}
6636
+
6637
+ # Track in-flight dispatch so Ctrl+C can cancel quickly.
6638
+ try:
6639
+ self._inflight_dispatch = {
6640
+ "session_id": session_id_d,
6641
+ "call_id": call_id,
6642
+ "job_token": job_token,
6643
+ "name": name,
6644
+ }
6645
+ except Exception:
6646
+ pass
6156
6647
 
6157
6648
  if DEBUG_SSE:
6158
6649
  self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
@@ -6427,9 +6918,9 @@ class ChatCLI:
6427
6918
  except Exception:
6428
6919
  self._last_dispatch_ctx = None
6429
6920
 
6430
- # POST callback
6431
- try:
6432
- if session_id_d and call_id and job_token:
6921
+ # POST callback
6922
+ try:
6923
+ if session_id_d and call_id and job_token:
6433
6924
  payload_cb = {
6434
6925
  "session_id": session_id_d,
6435
6926
  "call_id": call_id,
@@ -6437,16 +6928,80 @@ class ChatCLI:
6437
6928
  "result": result,
6438
6929
  "job_token": job_token,
6439
6930
  }
6440
- r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
6441
- if r.status_code >= 400:
6442
- self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
6443
- except Exception as e:
6444
- self.ui.warn(f"tools.callback error: {e}")
6931
+ r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
6932
+ if r.status_code >= 400:
6933
+ self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
6934
+ except Exception as e:
6935
+ self.ui.warn(f"tools.callback error: {e}")
6936
+ finally:
6937
+ try:
6938
+ # Clear in-flight dispatch context when we send a callback.
6939
+ if isinstance(self._inflight_dispatch, dict):
6940
+ if str(self._inflight_dispatch.get("call_id")) == str(call_id):
6941
+ self._inflight_dispatch = None
6942
+ except Exception:
6943
+ pass
6445
6944
 
6446
6945
  elif event == "message.completed":
6447
6946
  # Safety: this block handles only 'message.completed'.
6448
6947
  usage = data.get("usage", {})
6449
6948
  model_used = data.get("model") or self.model
6949
+ # OpenAI: persist the last response id so future turns can use previous_response_id.
6950
+ try:
6951
+ if self._is_openai_model(model_used):
6952
+ # Prefer the explicit per-turn id list when provided by the server.
6953
+ ids = data.get("openai_response_ids")
6954
+ if isinstance(ids, list) and ids:
6955
+ for x in ids:
6956
+ if not isinstance(x, str):
6957
+ continue
6958
+ xs = x.strip()
6959
+ if not xs:
6960
+ continue
6961
+ try:
6962
+ if xs not in self._openai_response_id_history:
6963
+ self._openai_response_id_history.append(xs)
6964
+ except Exception:
6965
+ pass
6966
+ rid = data.get("openai_previous_response_id")
6967
+ if isinstance(rid, str) and rid.strip():
6968
+ self._openai_previous_response_id = rid.strip()
6969
+ try:
6970
+ if rid.strip() not in self._openai_response_id_history:
6971
+ self._openai_response_id_history.append(rid.strip())
6972
+ except Exception:
6973
+ pass
6974
+
6975
+ # OpenAI manual-state replay: server returns the delta items appended
6976
+ # during this turn (reasoning/tool calls/tool outputs). Persist them.
6977
+ try:
6978
+ delta = data.get("openai_delta_items")
6979
+ if isinstance(delta, list):
6980
+ base_items = (
6981
+ self._openai_last_sent_input_items
6982
+ if isinstance(self._openai_last_sent_input_items, list)
6983
+ else copy.deepcopy(self._openai_input_items)
6984
+ )
6985
+ # Normalize to a list of dicts where possible; keep unknown shapes as-is.
6986
+ merged: List[Any] = []
6987
+ try:
6988
+ merged.extend(list(base_items or []))
6989
+ except Exception:
6990
+ merged = list(base_items or []) if base_items is not None else []
6991
+ merged.extend(delta)
6992
+ # Store only dict-like items (server is expected to send dicts)
6993
+ cleaned: List[Dict[str, Any]] = []
6994
+ for it in merged:
6995
+ if isinstance(it, dict):
6996
+ cleaned.append(dict(it))
6997
+ self._openai_input_items = cleaned
6998
+ except Exception:
6999
+ pass
7000
+ finally:
7001
+ # Clear per-turn sent snapshot
7002
+ self._openai_last_sent_input_items = None
7003
+ except Exception:
7004
+ pass
6450
7005
  # Gemini: server may include an authoritative provider-native history snapshot.
6451
7006
  try:
6452
7007
  if isinstance(model_used, str) and model_used.startswith("gemini-"):
@@ -7865,7 +8420,7 @@ class ChatCLI:
7865
8420
  "gpt-5": 400000,
7866
8421
  "gpt-5-2025-08-07": 400000,
7867
8422
  "codex-mini-latest": 200000,
7868
- "gemini-2.5-pro": 1048576,
8423
+ # (removed gemini-2.5-pro)
7869
8424
  "gemini-3-flash-preview": 1048576,
7870
8425
  "gemini-3-pro-preview": 1000000,
7871
8426
  "grok-4-1-fast-reasoning": 2000000,
@@ -7882,10 +8437,10 @@ class ChatCLI:
7882
8437
  "claude-sonnet-4-5-20250929-thinking": 1000000,
7883
8438
  "claude-opus-4-5-20251101": 200000,
7884
8439
  "claude-opus-4-5-20251101-thinking": 200000,
7885
- "glm-4.6": 200000,
7886
- })
7887
- except Exception:
7888
- pass
8440
+ "glm-4.7": 200000,
8441
+ })
8442
+ except Exception:
8443
+ pass
7889
8444
  self._model_ctx_map = ctx_map
7890
8445
  return ctx_map
7891
8446
 
@@ -8272,8 +8827,11 @@ class ChatCLI:
8272
8827
  "We’ll configure a few defaults. You can change these later via /settings.",
8273
8828
  )
8274
8829
 
8275
- # --- 1) Default model (menu, no Y/N) ---
8276
- await self._wizard_model_step()
8830
+ # --- 1) Default model (menu) ---
8831
+ await self._wizard_model_step()
8832
+ # If the picker was cancelled (or model still unset), choose a sensible default.
8833
+ if not self.model:
8834
+ self.model = self._recommended_default_model()
8277
8835
 
8278
8836
  # --- 2) Tools (always ON per design) ---
8279
8837
  self.requested_tools = True
@@ -8283,9 +8841,9 @@ class ChatCLI:
8283
8841
  "Control levels: 1=read-only, 2=approval on write/exec, 3=no approvals"
8284
8842
  )
8285
8843
  await self.set_level_menu()
8286
- if self.control_level not in (1, 2, 3):
8287
- # Default to Level 2 if user aborted
8288
- self.control_level = 2
8844
+ if self.control_level not in (1, 2, 3):
8845
+ # Default to Level 3 if user aborted
8846
+ self.control_level = 3
8289
8847
 
8290
8848
  # --- 4) Agent scope (menus; only type on custom path) ---
8291
8849
  self.ui.print(
@@ -8336,30 +8894,45 @@ class ChatCLI:
8336
8894
  except Exception:
8337
8895
  curv = "medium"
8338
8896
 
8339
- verb_choice = await self._menu_choice(
8340
- "Text verbosity",
8341
- "How verbose should responses be by default?",
8342
- [
8343
- ("low", "Low – short, to-the-point answers"),
8344
- ("medium", "Medium – balanced detail (recommended)"),
8345
- ("high", "High – more verbose explanations"),
8346
- ],
8347
- )
8897
+ verbosity_choices: List[Tuple[str, str]] = []
8898
+ if self._is_gpt_model(self.model):
8899
+ # Default-first: Low for GPT models.
8900
+ verbosity_choices = [
8901
+ ("low", "Low – short, to-the-point answers"),
8902
+ ("medium", "Medium – balanced detail"),
8903
+ ("high", "High – more verbose explanations"),
8904
+ ]
8905
+ else:
8906
+ # Default-first: Medium for non-GPT models; do not surface "Low".
8907
+ verbosity_choices = [
8908
+ ("medium", "Medium – balanced detail (recommended)"),
8909
+ ("high", "High – more verbose explanations"),
8910
+ ]
8911
+
8912
+ verb_choice = await self._menu_choice(
8913
+ "Text verbosity",
8914
+ "How verbose should responses be by default?",
8915
+ verbosity_choices,
8916
+ )
8348
8917
  if verb_choice in ("low", "medium", "high"):
8349
8918
  self.text_verbosity = verb_choice
8350
8919
  else:
8351
8920
  self.text_verbosity = curv or "medium"
8352
8921
 
8353
- # --- 7) Tool preambles (menu) ---
8354
- preamble_choice = await self._menu_choice(
8355
- "Tool call preambles",
8356
- "Before using tools, the agent can briefly explain what it will do and why (supported models only).",
8357
- [
8358
- ("on", "Enable preambles"),
8359
- ("off", "Disable preambles (default)"),
8360
- ],
8361
- )
8362
- self.preambles_enabled = preamble_choice == "on"
8922
+ # --- 7) Tool preambles (GPT-5 only) ---
8923
+ if self._supports_preambles(self.model):
8924
+ preamble_choice = await self._menu_choice(
8925
+ "Tool call preambles",
8926
+ "Before using tools, the agent can briefly explain what it will do and why.",
8927
+ [
8928
+ ("off", "Disable preambles (default)"),
8929
+ ("on", "Enable preambles"),
8930
+ ],
8931
+ )
8932
+ self.preambles_enabled = preamble_choice == "on"
8933
+ else:
8934
+ # Never enable preambles on unsupported models.
8935
+ self.preambles_enabled = False
8363
8936
 
8364
8937
  # --- 8) Optional custom first-turn note (menu + text only when chosen) ---
8365
8938
  custom_choice = await self._menu_choice(
@@ -8433,7 +9006,8 @@ class ChatCLI:
8433
9006
  text = m.get("content", "")
8434
9007
  contents.append({"role": role, "parts": [{"text": text}]})
8435
9008
  # Pick a Gemini model for counting; fall back if current isn't Gemini
8436
- count_model = "gemini-2.5-pro"
9009
+ # (gemini-2.5-pro removed from curated lists)
9010
+ count_model = "gemini-3-flash-preview"
8437
9011
  res = client.models.count_tokens(model=count_model, contents=contents)
8438
9012
  t = int(getattr(res, "total_tokens", 0) or 0)
8439
9013
  if t > 0:
@@ -8491,16 +9065,16 @@ class ChatCLI:
8491
9065
  blocks.append(txt.strip())
8492
9066
  except Exception:
8493
9067
  pass
8494
- # Tool preamble
8495
- try:
8496
- if bool(getattr(self, "preambles_enabled", False)):
8497
- blocks.append(
8498
- "Tool usage: when you need to read or modify files or run commands, "
8499
- "explicitly explain why you're using a tool, what you'll do, and how it "
8500
- "advances the user's goal before calling the tool."
8501
- )
8502
- except Exception:
8503
- pass
9068
+ # Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
9069
+ try:
9070
+ if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
9071
+ blocks.append(
9072
+ "Tool usage: when you need to read or modify files or run commands, "
9073
+ "explicitly explain why you're using a tool, what you'll do, and how it "
9074
+ "advances the user's goal before calling the tool."
9075
+ )
9076
+ except Exception:
9077
+ pass
8504
9078
  # Working memory preview (does not touch _did_inject_working_memory or paths)
8505
9079
  try:
8506
9080
  if self._memory_paths_for_first_turn: