henosis-cli 0.6.7__py3-none-any.whl → 0.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli.py +962 -322
- {henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/METADATA +1 -1
- henosis_cli-0.6.9.dist-info/RECORD +11 -0
- {henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/WHEEL +1 -1
- henosis_cli_tools/settings_ui.py +77 -38
- henosis_cli-0.6.7.dist-info/RECORD +0 -11
- {henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/entry_points.txt +0 -0
- {henosis_cli-0.6.7.dist-info → henosis_cli-0.6.9.dist-info}/top_level.txt +0 -0
cli.py
CHANGED
|
@@ -6,11 +6,12 @@
|
|
|
6
6
|
# - Preserves previous behavior and settings
|
|
7
7
|
# - Injects CODEBASE_MAP.md into the first user message (wrapped in <codebase_map>) without manual trimming.
|
|
8
8
|
|
|
9
|
-
import argparse
|
|
10
|
-
import asyncio
|
|
11
|
-
import
|
|
12
|
-
import
|
|
13
|
-
import
|
|
9
|
+
import argparse
|
|
10
|
+
import asyncio
|
|
11
|
+
import copy
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
14
15
|
import socket
|
|
15
16
|
import shutil
|
|
16
17
|
from pathlib import Path
|
|
@@ -683,8 +684,8 @@ class UI:
|
|
|
683
684
|
for n, ty, sz in rows:
|
|
684
685
|
print(f"{n:<40} {ty:<8} {sz}")
|
|
685
686
|
|
|
686
|
-
class ChatCLI:
|
|
687
|
-
def __init__(
|
|
687
|
+
class ChatCLI:
|
|
688
|
+
def __init__(
|
|
688
689
|
self,
|
|
689
690
|
server: str,
|
|
690
691
|
model: Optional[str],
|
|
@@ -997,8 +998,25 @@ class ChatCLI:
|
|
|
997
998
|
}
|
|
998
999
|
# Track last used model for display
|
|
999
1000
|
self._last_used_model: Optional[str] = None
|
|
1000
|
-
# Provider-native history for Kimi (preserve reasoning_content across turns)
|
|
1001
|
-
self._kimi_raw_history: List[Dict[str, Any]] = []
|
|
1001
|
+
# Provider-native history for Kimi (preserve reasoning_content across turns)
|
|
1002
|
+
self._kimi_raw_history: List[Dict[str, Any]] = []
|
|
1003
|
+
# Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
|
|
1004
|
+
self._gemini_raw_history: List[Dict[str, Any]] = []
|
|
1005
|
+
# OpenAI Responses API threading: retain previous response id across turns
|
|
1006
|
+
self._openai_previous_response_id: Optional[str] = None
|
|
1007
|
+
# OpenAI Responses API threading: retain the full chain of response ids across turns
|
|
1008
|
+
# (server will also echo per-turn ids in message.completed.openai_response_ids)
|
|
1009
|
+
self._openai_response_id_history: List[str] = []
|
|
1010
|
+
|
|
1011
|
+
# OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
|
|
1012
|
+
# including reasoning items, function_call items, and function_call_output items.
|
|
1013
|
+
self._openai_input_items: List[Dict[str, Any]] = []
|
|
1014
|
+
# For robustness, remember exactly what we sent as openai_input_items for the current turn
|
|
1015
|
+
# so we can append server-provided openai_delta_items deterministically.
|
|
1016
|
+
self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
|
|
1017
|
+
# Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
|
|
1018
|
+
# Shape: {session_id, call_id, job_token, name}
|
|
1019
|
+
self._inflight_dispatch: Optional[Dict[str, Any]] = None
|
|
1002
1020
|
# Last server billing info from /api/usage/commit
|
|
1003
1021
|
self._last_commit_cost_usd: float = 0.0
|
|
1004
1022
|
self._last_remaining_credits: Optional[float] = None
|
|
@@ -1049,8 +1067,14 @@ class ChatCLI:
|
|
|
1049
1067
|
self._thinking_indicator_enabled = True
|
|
1050
1068
|
except Exception:
|
|
1051
1069
|
self._thinking_indicator_enabled = True
|
|
1052
|
-
# Track Ctrl+C timing for double-press-to-exit behavior
|
|
1053
|
-
self._last_interrupt_ts: Optional[float] = None
|
|
1070
|
+
# Track Ctrl+C timing for double-press-to-exit behavior
|
|
1071
|
+
self._last_interrupt_ts: Optional[float] = None
|
|
1072
|
+
|
|
1073
|
+
# Ctrl+C during a running stream should not kill the entire CLI.
|
|
1074
|
+
# Instead, we cancel the in-flight turn and reopen the last user query for editing.
|
|
1075
|
+
# NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
|
|
1076
|
+
self._pending_user_edit: Optional[str] = None
|
|
1077
|
+
self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
|
|
1054
1078
|
|
|
1055
1079
|
# Timers: session-level and per-turn wall-clock timers
|
|
1056
1080
|
self._session_started_at: Optional[float] = None # time.perf_counter() at session start
|
|
@@ -1091,7 +1115,7 @@ class ChatCLI:
|
|
|
1091
1115
|
self._pt_session = None
|
|
1092
1116
|
|
|
1093
1117
|
# ----------------------- Provider heuristics -----------------------
|
|
1094
|
-
def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
|
|
1118
|
+
def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
|
|
1095
1119
|
"""Return True when the model is an OpenAI reasoning-capable model.
|
|
1096
1120
|
Mirrors server-side heuristic: prefixes 'gpt-5' or 'o4'.
|
|
1097
1121
|
"""
|
|
@@ -1342,6 +1366,9 @@ class ChatCLI:
|
|
|
1342
1366
|
return {
|
|
1343
1367
|
# OpenAI
|
|
1344
1368
|
"gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
|
|
1369
|
+
# New: gpt-5.2-codex
|
|
1370
|
+
# Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
|
|
1371
|
+
"gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
|
|
1345
1372
|
# From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
|
|
1346
1373
|
"gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
|
|
1347
1374
|
"gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
|
|
@@ -1359,8 +1386,7 @@ class ChatCLI:
|
|
|
1359
1386
|
# New Opus 4.5 (provider base $5/$25 with 1.4x margin -> $7.00/$35.00)
|
|
1360
1387
|
"claude-opus-4-5-20251101": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
|
|
1361
1388
|
"claude-opus-4-5-20251101-thinking": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
|
|
1362
|
-
# Gemini
|
|
1363
|
-
"gemini-2.5-pro": {"input": 1.75, "output": 14.00, "provider": "gemini"},
|
|
1389
|
+
# Gemini
|
|
1364
1390
|
# Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
|
|
1365
1391
|
"gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
|
|
1366
1392
|
# Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
|
|
@@ -1375,15 +1401,15 @@ class ChatCLI:
|
|
|
1375
1401
|
# DeepSeek V3.2 (+$0.25 per 1M margin)
|
|
1376
1402
|
"deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1377
1403
|
"deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1378
|
-
|
|
1404
|
+
# Removed: deepseek speciale (not supported)
|
|
1379
1405
|
# Kimi
|
|
1380
1406
|
"kimi-k2-0905-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1381
1407
|
"kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1382
1408
|
"kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1383
|
-
# GLM (Z.AI)
|
|
1384
|
-
# Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
|
|
1385
|
-
"glm-4.
|
|
1386
|
-
}
|
|
1409
|
+
# GLM (Z.AI)
|
|
1410
|
+
# Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
|
|
1411
|
+
"glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
|
|
1412
|
+
}
|
|
1387
1413
|
|
|
1388
1414
|
def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
|
|
1389
1415
|
if not model:
|
|
@@ -1431,6 +1457,15 @@ class ChatCLI:
|
|
|
1431
1457
|
except Exception:
|
|
1432
1458
|
model_name = ""
|
|
1433
1459
|
try:
|
|
1460
|
+
# Provider-native state resets when switching away from OpenAI.
|
|
1461
|
+
try:
|
|
1462
|
+
if self.model and (not self._is_openai_model(self.model)):
|
|
1463
|
+
self._openai_previous_response_id = None
|
|
1464
|
+
self._openai_response_id_history = []
|
|
1465
|
+
self._openai_input_items = []
|
|
1466
|
+
self._openai_last_sent_input_items = None
|
|
1467
|
+
except Exception:
|
|
1468
|
+
pass
|
|
1434
1469
|
if model_name in {"gpt-5.2-pro"}:
|
|
1435
1470
|
# Default these to high, but don't clobber a user-chosen xhigh.
|
|
1436
1471
|
if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
|
|
@@ -1438,11 +1473,138 @@ class ChatCLI:
|
|
|
1438
1473
|
# Codex family: disable preambles for better behavior
|
|
1439
1474
|
if "codex" in model_name:
|
|
1440
1475
|
self.preambles_enabled = False
|
|
1476
|
+
# Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
|
|
1477
|
+
# Force-disable for all other models (even if a saved setting had it enabled).
|
|
1478
|
+
if not self._supports_preambles(self.model):
|
|
1479
|
+
self.preambles_enabled = False
|
|
1441
1480
|
except Exception:
|
|
1442
1481
|
try:
|
|
1443
1482
|
self.reasoning_effort = "high"
|
|
1444
1483
|
except Exception:
|
|
1445
1484
|
pass
|
|
1485
|
+
|
|
1486
|
+
def _supports_preambles(self, model: Optional[str]) -> bool:
|
|
1487
|
+
"""Tool-call preambles are a CLI-only UX hint.
|
|
1488
|
+
|
|
1489
|
+
Requirement: disabled for all models except GPT-5 (base model; non-Codex).
|
|
1490
|
+
In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
|
|
1491
|
+
"""
|
|
1492
|
+
try:
|
|
1493
|
+
if not model:
|
|
1494
|
+
return False
|
|
1495
|
+
m = str(model).strip().lower()
|
|
1496
|
+
# Only the base GPT-5 line supports this UX toggle.
|
|
1497
|
+
# Allow:
|
|
1498
|
+
# - "gpt-5"
|
|
1499
|
+
# - date-pinned variants like "gpt-5-2025-08-07"
|
|
1500
|
+
# Disallow:
|
|
1501
|
+
# - versioned families like "gpt-5.1*" / "gpt-5.2*"
|
|
1502
|
+
if not (m == "gpt-5" or m.startswith("gpt-5-")):
|
|
1503
|
+
return False
|
|
1504
|
+
if "codex" in m:
|
|
1505
|
+
return False
|
|
1506
|
+
return True
|
|
1507
|
+
except Exception:
|
|
1508
|
+
return False
|
|
1509
|
+
|
|
1510
|
+
def _is_openai_model(self, model: Optional[str]) -> bool:
|
|
1511
|
+
"""Best-effort model/provider discriminator for client-side state.
|
|
1512
|
+
|
|
1513
|
+
The server is multi-provider. For the CLI we treat anything that isn't an explicit
|
|
1514
|
+
non-OpenAI provider prefix as OpenAI.
|
|
1515
|
+
"""
|
|
1516
|
+
try:
|
|
1517
|
+
if not model:
|
|
1518
|
+
return False
|
|
1519
|
+
m = str(model).strip().lower()
|
|
1520
|
+
if not m:
|
|
1521
|
+
return False
|
|
1522
|
+
for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
|
|
1523
|
+
if m.startswith(pfx):
|
|
1524
|
+
return False
|
|
1525
|
+
# Everything else defaults to OpenAI in this repo.
|
|
1526
|
+
return True
|
|
1527
|
+
except Exception:
|
|
1528
|
+
return False
|
|
1529
|
+
|
|
1530
|
+
def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
|
|
1531
|
+
"""Whether this provider has an implemented native tool/thinking retention path."""
|
|
1532
|
+
try:
|
|
1533
|
+
if not model:
|
|
1534
|
+
return False
|
|
1535
|
+
m = str(model).strip().lower()
|
|
1536
|
+
if m.startswith("gemini-"):
|
|
1537
|
+
return True
|
|
1538
|
+
if m.startswith("kimi-"):
|
|
1539
|
+
return bool(getattr(self, "retain_native_tool_results", False))
|
|
1540
|
+
if self._is_openai_model(model):
|
|
1541
|
+
return True
|
|
1542
|
+
return False
|
|
1543
|
+
except Exception:
|
|
1544
|
+
return False
|
|
1545
|
+
|
|
1546
|
+
def _sanitize_openai_items(self, items: Any) -> Any:
|
|
1547
|
+
"""Recursively strip fields from OpenAI output items that cause errors when used as input."""
|
|
1548
|
+
if isinstance(items, list):
|
|
1549
|
+
return [self._sanitize_openai_items(x) for x in items]
|
|
1550
|
+
if isinstance(items, dict):
|
|
1551
|
+
# 'status' is the main offender causing 400s
|
|
1552
|
+
bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
|
|
1553
|
+
return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
|
|
1554
|
+
return items
|
|
1555
|
+
|
|
1556
|
+
async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
|
|
1557
|
+
"""If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
|
|
1558
|
+
|
|
1559
|
+
This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
|
|
1560
|
+
Best-effort; never raises.
|
|
1561
|
+
"""
|
|
1562
|
+
ctx = None
|
|
1563
|
+
try:
|
|
1564
|
+
ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
|
|
1565
|
+
except Exception:
|
|
1566
|
+
ctx = None
|
|
1567
|
+
if not ctx:
|
|
1568
|
+
return
|
|
1569
|
+
session_id = ctx.get("session_id")
|
|
1570
|
+
call_id = ctx.get("call_id")
|
|
1571
|
+
job_token = ctx.get("job_token")
|
|
1572
|
+
name = ctx.get("name")
|
|
1573
|
+
if not (session_id and call_id and job_token):
|
|
1574
|
+
return
|
|
1575
|
+
payload_cb = {
|
|
1576
|
+
"session_id": session_id,
|
|
1577
|
+
"call_id": call_id,
|
|
1578
|
+
"name": name,
|
|
1579
|
+
"job_token": job_token,
|
|
1580
|
+
"result": {
|
|
1581
|
+
"ok": False,
|
|
1582
|
+
"cancelled": True,
|
|
1583
|
+
"error": str(reason or "cancelled"),
|
|
1584
|
+
},
|
|
1585
|
+
}
|
|
1586
|
+
try:
|
|
1587
|
+
# Keep it short; we just want to unblock the server.
|
|
1588
|
+
http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
|
|
1589
|
+
except Exception:
|
|
1590
|
+
http_timeout = None
|
|
1591
|
+
try:
|
|
1592
|
+
async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
|
|
1593
|
+
await client.post(self.tools_callback_url, json=payload_cb)
|
|
1594
|
+
except Exception:
|
|
1595
|
+
pass
|
|
1596
|
+
finally:
|
|
1597
|
+
try:
|
|
1598
|
+
self._inflight_dispatch = None
|
|
1599
|
+
except Exception:
|
|
1600
|
+
pass
|
|
1601
|
+
|
|
1602
|
+
def _is_gpt_model(self, model: Optional[str]) -> bool:
|
|
1603
|
+
"""True for OpenAI GPT models (used for showing certain UI-only toggles)."""
|
|
1604
|
+
try:
|
|
1605
|
+
return bool(model) and str(model).strip().lower().startswith("gpt-")
|
|
1606
|
+
except Exception:
|
|
1607
|
+
return False
|
|
1446
1608
|
def _is_codex_model(self, model: Optional[str]) -> bool:
|
|
1447
1609
|
try:
|
|
1448
1610
|
return bool(model) and ("codex" in str(model).lower())
|
|
@@ -1955,14 +2117,15 @@ class ChatCLI:
|
|
|
1955
2117
|
pass
|
|
1956
2118
|
return data
|
|
1957
2119
|
|
|
1958
|
-
def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
|
|
1959
|
-
try:
|
|
2120
|
+
def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
|
|
2121
|
+
try:
|
|
2122
|
+
old_system_prompt = getattr(self, "system_prompt", None)
|
|
1960
2123
|
self.model = data.get("model", self.model)
|
|
1961
2124
|
if "save_chat_history" in data:
|
|
1962
2125
|
try:
|
|
1963
2126
|
self.save_chat_history = bool(data.get("save_chat_history"))
|
|
1964
2127
|
except Exception:
|
|
1965
|
-
pass
|
|
2128
|
+
pass
|
|
1966
2129
|
self.requested_tools = data.get("requested_tools", self.requested_tools)
|
|
1967
2130
|
self.fs_scope = data.get("fs_scope", self.fs_scope)
|
|
1968
2131
|
self.host_base = data.get("host_base", self.host_base)
|
|
@@ -2101,18 +2264,38 @@ class ChatCLI:
|
|
|
2101
2264
|
self.anthropic_cache_ttl = None
|
|
2102
2265
|
except Exception:
|
|
2103
2266
|
pass
|
|
2104
|
-
# Rebuild history if system prompt changed
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2267
|
+
# Rebuild history if system prompt changed
|
|
2268
|
+
try:
|
|
2269
|
+
system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
|
|
2270
|
+
except Exception:
|
|
2271
|
+
system_prompt_changed = False
|
|
2272
|
+
|
|
2273
|
+
if system_prompt_changed:
|
|
2274
|
+
# Changing the system prompt can materially alter the behavior of the assistant;
|
|
2275
|
+
# warn the user and reset the current conversation history to avoid mixing contexts.
|
|
2276
|
+
try:
|
|
2277
|
+
self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
|
|
2278
|
+
except Exception:
|
|
2279
|
+
pass
|
|
2280
|
+
self.history = []
|
|
2281
|
+
if self.system_prompt:
|
|
2282
|
+
self.history.append({"role": "system", "content": self.system_prompt})
|
|
2283
|
+
# OpenAI threaded state is invalid once the system prompt changes.
|
|
2284
|
+
try:
|
|
2285
|
+
self._openai_previous_response_id = None
|
|
2286
|
+
self._openai_response_id_history = []
|
|
2287
|
+
self._openai_input_items = []
|
|
2288
|
+
self._openai_last_sent_input_items = None
|
|
2289
|
+
except Exception:
|
|
2290
|
+
pass
|
|
2291
|
+
# On settings load, do not assume the custom first-turn was injected yet
|
|
2292
|
+
try:
|
|
2293
|
+
self._did_inject_custom_first_turn = False
|
|
2294
|
+
except Exception:
|
|
2295
|
+
pass
|
|
2296
|
+
self._apply_model_side_effects()
|
|
2297
|
+
except Exception as e:
|
|
2298
|
+
self.ui.warn(f"Failed to apply settings: {e}")
|
|
2116
2299
|
|
|
2117
2300
|
async def _fetch_server_settings(self) -> Optional[Dict[str, Any]]:
|
|
2118
2301
|
try:
|
|
@@ -2492,30 +2675,28 @@ class ChatCLI:
|
|
|
2492
2675
|
|
|
2493
2676
|
def _model_presets(self) -> List[Tuple[str, str]]:
|
|
2494
2677
|
"""Shared list of (model, label) used by settings UI and /model menu."""
|
|
2678
|
+
# Ordered in "feelings" order (Recommended first, then Others).
|
|
2679
|
+
# NOTE: We intentionally do not include a "server default" or "custom" option here.
|
|
2495
2680
|
return [
|
|
2681
|
+
# Recommended
|
|
2496
2682
|
("gpt-5.2", "OpenAI: gpt-5.2"),
|
|
2497
|
-
("gpt-5.2-
|
|
2683
|
+
("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
|
|
2498
2684
|
("gpt-5", "OpenAI: gpt-5"),
|
|
2685
|
+
("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
|
|
2686
|
+
("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
|
|
2687
|
+
("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
|
|
2688
|
+
("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
|
|
2689
|
+
("grok-code-fast-1", "xAI: grok-code-fast-1"),
|
|
2690
|
+
|
|
2691
|
+
# Others
|
|
2692
|
+
("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
|
|
2499
2693
|
("gpt-5-codex", "OpenAI: gpt-5-codex"),
|
|
2500
2694
|
("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
|
|
2501
|
-
("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
|
|
2502
2695
|
("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
|
|
2503
|
-
("deepseek-3.2
|
|
2504
|
-
("kimi-k2-
|
|
2505
|
-
("
|
|
2506
|
-
|
|
2507
|
-
("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
|
|
2508
|
-
("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
|
|
2509
|
-
("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
|
|
2510
|
-
("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
|
|
2511
|
-
("grok-4", "xAI: grok-4"),
|
|
2512
|
-
("grok-code-fast-1", "xAI: grok-code-fast-1"),
|
|
2513
|
-
("claude-sonnet-4-5-20250929", "Anthropic: claude-sonnet-4-5-20250929 (thinking OFF)"),
|
|
2514
|
-
("claude-sonnet-4-5-20250929-thinking", "Anthropic: claude-sonnet-4-5-20250929 (thinking ON)"),
|
|
2515
|
-
("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
|
|
2516
|
-
("claude-opus-4-5-20251101-thinking", "Anthropic: claude-opus-4-5-20251101 (thinking ON)"),
|
|
2517
|
-
("glm-4.6", "GLM: glm-4.6"),
|
|
2518
|
-
]
|
|
2696
|
+
("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
|
|
2697
|
+
("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
|
|
2698
|
+
("glm-4.7", "GLM: glm-4.7"),
|
|
2699
|
+
]
|
|
2519
2700
|
|
|
2520
2701
|
async def open_settings(self, focus: Optional[str] = None) -> None:
|
|
2521
2702
|
"""Open the new dependency-free settings UI. Falls back to legacy only when
|
|
@@ -2569,37 +2750,33 @@ class ChatCLI:
|
|
|
2569
2750
|
}
|
|
2570
2751
|
initial = self._collect_settings_dict()
|
|
2571
2752
|
|
|
2572
|
-
# Model presets list (shared)
|
|
2573
|
-
model_presets: List[Tuple[str, str]] = self._model_presets()
|
|
2574
|
-
|
|
2575
|
-
#
|
|
2576
|
-
#
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
"
|
|
2580
|
-
"
|
|
2753
|
+
# Model presets list (shared)
|
|
2754
|
+
model_presets: List[Tuple[str, str]] = self._model_presets()
|
|
2755
|
+
|
|
2756
|
+
# Reorder with a Recommended section at the top.
|
|
2757
|
+
# IMPORTANT: remove "server default" and "custom" from Settings UI.
|
|
2758
|
+
rec_keys_ordered = [
|
|
2759
|
+
"gpt-5.2",
|
|
2760
|
+
"gpt-5.2-codex",
|
|
2761
|
+
"gpt-5",
|
|
2581
2762
|
"gemini-3-pro-preview",
|
|
2582
2763
|
"gemini-3-flash-preview",
|
|
2583
|
-
"
|
|
2584
|
-
"gpt-5.2",
|
|
2764
|
+
"claude-opus-4-5-20251101",
|
|
2585
2765
|
"kimi-k2-thinking",
|
|
2586
2766
|
"grok-code-fast-1",
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
#
|
|
2594
|
-
|
|
2767
|
+
]
|
|
2768
|
+
rec_set = set(rec_keys_ordered)
|
|
2769
|
+
preset_map = {m: lbl for (m, lbl) in model_presets}
|
|
2770
|
+
rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
|
|
2771
|
+
other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
|
|
2772
|
+
|
|
2773
|
+
# Build enum options in the order: Recommended, Others
|
|
2774
|
+
model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
|
|
2775
|
+
render_map: Dict[Any, str] = {}
|
|
2595
2776
|
for m, lbl in rec_list:
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
|
|
2599
|
-
render_map[m] = f"{lbl} (recommended)"
|
|
2600
|
-
for m, lbl in other_list:
|
|
2601
|
-
render_map[m] = lbl
|
|
2602
|
-
render_map["custom"] = "Custom..."
|
|
2777
|
+
render_map[m] = lbl
|
|
2778
|
+
for m, lbl in other_list:
|
|
2779
|
+
render_map[m] = lbl
|
|
2603
2780
|
|
|
2604
2781
|
# Build items schema
|
|
2605
2782
|
items: List[Dict[str, Any]] = [
|
|
@@ -2626,14 +2803,16 @@ class ChatCLI:
|
|
|
2626
2803
|
"id": "requested_tools",
|
|
2627
2804
|
"label": "Tools",
|
|
2628
2805
|
"type": "enum",
|
|
2629
|
-
|
|
2806
|
+
# Default-first: ON, then OFF, then server default.
|
|
2807
|
+
"options": [True, False, None],
|
|
2630
2808
|
"render": {None: "Server default", True: "ON", False: "OFF"},
|
|
2631
2809
|
},
|
|
2632
2810
|
{
|
|
2633
2811
|
"id": "control_level",
|
|
2634
2812
|
"label": "Control level",
|
|
2635
2813
|
"type": "enum",
|
|
2636
|
-
|
|
2814
|
+
# Default-first: Level 3, then 2, then 1, then server default.
|
|
2815
|
+
"options": [3, 2, 1, None],
|
|
2637
2816
|
"render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
|
|
2638
2817
|
},
|
|
2639
2818
|
{"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
|
|
@@ -2647,35 +2826,44 @@ class ChatCLI:
|
|
|
2647
2826
|
{"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
|
|
2648
2827
|
# Agent scope & filesystem controls
|
|
2649
2828
|
{"id": "host_base", "label": "Agent scope directory", "type": "text"},
|
|
2650
|
-
{
|
|
2651
|
-
"id": "fs_scope",
|
|
2652
|
-
"label": "Filesystem scope",
|
|
2653
|
-
"type": "enum",
|
|
2654
|
-
|
|
2655
|
-
"
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
"
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
"
|
|
2664
|
-
"
|
|
2665
|
-
"
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
"
|
|
2671
|
-
|
|
2672
|
-
|
|
2829
|
+
{
|
|
2830
|
+
"id": "fs_scope",
|
|
2831
|
+
"label": "Filesystem scope",
|
|
2832
|
+
"type": "enum",
|
|
2833
|
+
# Default-first: host (Agent scope), then workspace, then server default.
|
|
2834
|
+
"options": ["host", "workspace", None],
|
|
2835
|
+
"render": {
|
|
2836
|
+
None: "Server default",
|
|
2837
|
+
"workspace": "Workspace (sandbox)",
|
|
2838
|
+
"host": "Host (Agent scope)",
|
|
2839
|
+
},
|
|
2840
|
+
},
|
|
2841
|
+
{
|
|
2842
|
+
"id": "fs_host_mode",
|
|
2843
|
+
"label": "Host mode",
|
|
2844
|
+
"type": "enum",
|
|
2845
|
+
# Default-first: custom (use Agent scope), then cwd, then any, then server default.
|
|
2846
|
+
"options": ["custom", "cwd", "any", None],
|
|
2847
|
+
"render": {
|
|
2848
|
+
None: "Server default / any",
|
|
2849
|
+
"any": "any (no extra client restriction)",
|
|
2850
|
+
"cwd": "Current working directory",
|
|
2851
|
+
"custom": "Custom (use Agent scope)",
|
|
2852
|
+
},
|
|
2853
|
+
},
|
|
2673
2854
|
]},
|
|
2674
2855
|
{"label": "Code Map", "type": "group", "items": [
|
|
2675
2856
|
{"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
|
|
2676
2857
|
]},
|
|
2677
2858
|
{"label": "Preambles & First-turn", "type": "group", "items": [
|
|
2678
|
-
{
|
|
2859
|
+
{
|
|
2860
|
+
"id": "preambles_enabled",
|
|
2861
|
+
"label": "Enable tool call preambles (GPT-5 only)",
|
|
2862
|
+
"type": "bool",
|
|
2863
|
+
# Only show this control when the *currently selected* model supports it.
|
|
2864
|
+
# (This updates live as the Model picker changes.)
|
|
2865
|
+
"visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
|
|
2866
|
+
},
|
|
2679
2867
|
{"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
|
|
2680
2868
|
{"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
|
|
2681
2869
|
{"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
|
|
@@ -2688,8 +2876,22 @@ class ChatCLI:
|
|
|
2688
2876
|
]},
|
|
2689
2877
|
]
|
|
2690
2878
|
|
|
2691
|
-
#
|
|
2692
|
-
|
|
2879
|
+
# Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
|
|
2880
|
+
try:
|
|
2881
|
+
if not self._is_gpt_model(self.model):
|
|
2882
|
+
for g in items:
|
|
2883
|
+
if not isinstance(g, dict):
|
|
2884
|
+
continue
|
|
2885
|
+
if (g.get("type") == "group") and (g.get("label") == "General"):
|
|
2886
|
+
for row in (g.get("items") or []):
|
|
2887
|
+
if isinstance(row, dict) and row.get("id") == "text_verbosity":
|
|
2888
|
+
row["options"] = ["medium", "high"]
|
|
2889
|
+
row["render"] = {"medium": "Medium", "high": "High"}
|
|
2890
|
+
except Exception:
|
|
2891
|
+
pass
|
|
2892
|
+
|
|
2893
|
+
# Prepare initial values with enum placeholder for model when custom text set
|
|
2894
|
+
init_for_ui = dict(initial)
|
|
2693
2895
|
if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
|
|
2694
2896
|
# Represent as 'custom' for cycling, but keep original model in working copy for edit with 'e'
|
|
2695
2897
|
pass # We'll keep exact model string; enum will show the raw value when not matched
|
|
@@ -3037,17 +3239,17 @@ class ChatCLI:
|
|
|
3037
3239
|
self.ui.success(f"FS Scope set to: {self._fs_label()}")
|
|
3038
3240
|
self.save_settings()
|
|
3039
3241
|
|
|
3040
|
-
async def set_level_menu(self) -> None:
|
|
3041
|
-
val = await self._menu_choice(
|
|
3042
|
-
"Control Level",
|
|
3043
|
-
"Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
|
|
3044
|
-
[
|
|
3045
|
-
("
|
|
3046
|
-
("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
|
|
3047
|
-
("
|
|
3048
|
-
("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
|
|
3049
|
-
],
|
|
3050
|
-
)
|
|
3242
|
+
async def set_level_menu(self) -> None:
|
|
3243
|
+
val = await self._menu_choice(
|
|
3244
|
+
"Control Level",
|
|
3245
|
+
"Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
|
|
3246
|
+
[
|
|
3247
|
+
("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
|
|
3248
|
+
("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
|
|
3249
|
+
("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
|
|
3250
|
+
("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
|
|
3251
|
+
],
|
|
3252
|
+
)
|
|
3051
3253
|
if val == "default":
|
|
3052
3254
|
self.control_level = None
|
|
3053
3255
|
elif val in ("1", "2", "3"):
|
|
@@ -3127,16 +3329,16 @@ class ChatCLI:
|
|
|
3127
3329
|
except Exception:
|
|
3128
3330
|
pass
|
|
3129
3331
|
|
|
3130
|
-
# 3) Tool usage preamble (UX hint)
|
|
3332
|
+
# 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
3131
3333
|
try:
|
|
3132
|
-
if bool(getattr(self, "preambles_enabled", False)) and
|
|
3334
|
+
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
3133
3335
|
blocks.append(
|
|
3134
3336
|
"Tool usage: when you need to read or modify files or run commands, "
|
|
3135
3337
|
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
3136
3338
|
"advances the user's goal before calling the tool."
|
|
3137
3339
|
)
|
|
3138
|
-
except Exception:
|
|
3139
|
-
pass
|
|
3340
|
+
except Exception:
|
|
3341
|
+
pass
|
|
3140
3342
|
|
|
3141
3343
|
# 4) Working memory (context-summary file), injected once on fresh session restart
|
|
3142
3344
|
try:
|
|
@@ -3256,7 +3458,7 @@ class ChatCLI:
|
|
|
3256
3458
|
"Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
|
|
3257
3459
|
)
|
|
3258
3460
|
|
|
3259
|
-
def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
|
|
3461
|
+
def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
|
|
3260
3462
|
"""Build provider-native messages for Kimi preserving prior assistant reasoning_content.
|
|
3261
3463
|
Includes prior provider-native turns and the current user message with first-turn injections.
|
|
3262
3464
|
"""
|
|
@@ -3274,8 +3476,31 @@ class ChatCLI:
|
|
|
3274
3476
|
for m in (self._kimi_raw_history or []):
|
|
3275
3477
|
raw.append(m)
|
|
3276
3478
|
# Append current user message
|
|
3277
|
-
raw.append({"role": "user", "content": content})
|
|
3278
|
-
return raw
|
|
3479
|
+
raw.append({"role": "user", "content": content})
|
|
3480
|
+
return raw
|
|
3481
|
+
|
|
3482
|
+
def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
|
|
3483
|
+
"""Normalize Gemini provider-native history.
|
|
3484
|
+
|
|
3485
|
+
Ensures we only send a flat list of dicts back to the server.
|
|
3486
|
+
This prevents accidental nesting like [[{...}, {...}]] which the
|
|
3487
|
+
google-genai SDK rejects with pydantic union validation errors.
|
|
3488
|
+
"""
|
|
3489
|
+
out: List[Dict[str, Any]] = []
|
|
3490
|
+
if not isinstance(rpm, list):
|
|
3491
|
+
return out
|
|
3492
|
+
for item in rpm:
|
|
3493
|
+
if item is None:
|
|
3494
|
+
continue
|
|
3495
|
+
if isinstance(item, list):
|
|
3496
|
+
# Flatten one level
|
|
3497
|
+
for sub in item:
|
|
3498
|
+
if isinstance(sub, dict):
|
|
3499
|
+
out.append(dict(sub))
|
|
3500
|
+
continue
|
|
3501
|
+
if isinstance(item, dict):
|
|
3502
|
+
out.append(dict(item))
|
|
3503
|
+
return out
|
|
3279
3504
|
|
|
3280
3505
|
def _build_working_memory_injection(self) -> Optional[str]:
|
|
3281
3506
|
try:
|
|
@@ -4577,21 +4802,29 @@ class ChatCLI:
|
|
|
4577
4802
|
self.save_settings()
|
|
4578
4803
|
return True
|
|
4579
4804
|
|
|
4580
|
-
if cmd.startswith("/system "):
|
|
4581
|
-
self.system_prompt = cmd[len("/system ") :].strip()
|
|
4582
|
-
self.history = []
|
|
4583
|
-
if self.system_prompt:
|
|
4584
|
-
self.history.append({"role": "system", "content": self.system_prompt})
|
|
4585
|
-
# Treat as a fresh session; allow map re-injection
|
|
4586
|
-
self._did_inject_codebase_map = False
|
|
4805
|
+
if cmd.startswith("/system "):
|
|
4806
|
+
self.system_prompt = cmd[len("/system ") :].strip()
|
|
4807
|
+
self.history = []
|
|
4808
|
+
if self.system_prompt:
|
|
4809
|
+
self.history.append({"role": "system", "content": self.system_prompt})
|
|
4810
|
+
# Treat as a fresh session; allow map re-injection
|
|
4811
|
+
self._did_inject_codebase_map = False
|
|
4587
4812
|
# Also allow custom first-turn injection again
|
|
4588
4813
|
try:
|
|
4589
4814
|
self._did_inject_custom_first_turn = False
|
|
4590
4815
|
except Exception:
|
|
4591
4816
|
pass
|
|
4592
|
-
self.ui.success("System prompt set.")
|
|
4593
|
-
|
|
4594
|
-
|
|
4817
|
+
self.ui.success("System prompt set.")
|
|
4818
|
+
# OpenAI threaded state is invalid once the system prompt changes.
|
|
4819
|
+
try:
|
|
4820
|
+
self._openai_previous_response_id = None
|
|
4821
|
+
self._openai_response_id_history = []
|
|
4822
|
+
self._openai_input_items = []
|
|
4823
|
+
self._openai_last_sent_input_items = None
|
|
4824
|
+
except Exception:
|
|
4825
|
+
pass
|
|
4826
|
+
self.save_settings()
|
|
4827
|
+
return True
|
|
4595
4828
|
|
|
4596
4829
|
if cmd.startswith("/title "):
|
|
4597
4830
|
new_title = cmd[len("/title ") :].strip()
|
|
@@ -4603,21 +4836,26 @@ class ChatCLI:
|
|
|
4603
4836
|
self.ui.success(f"Thread title set to: {self.thread_name}")
|
|
4604
4837
|
return True
|
|
4605
4838
|
|
|
4606
|
-
if cmd == "/clear":
|
|
4839
|
+
if cmd == "/clear":
|
|
4607
4840
|
self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
|
|
4608
4841
|
self._did_inject_codebase_map = False
|
|
4609
4842
|
try:
|
|
4610
4843
|
self._did_inject_custom_first_turn = False
|
|
4611
4844
|
except Exception:
|
|
4612
4845
|
pass
|
|
4613
|
-
# Reset provider-native histories
|
|
4614
|
-
try:
|
|
4846
|
+
# Reset provider-native histories
|
|
4847
|
+
try:
|
|
4615
4848
|
self.messages_for_save = []
|
|
4616
4849
|
if not self.save_chat_history:
|
|
4617
4850
|
self.thread_uid = None
|
|
4618
|
-
self._kimi_raw_history = []
|
|
4619
|
-
|
|
4620
|
-
|
|
4851
|
+
self._kimi_raw_history = []
|
|
4852
|
+
self._gemini_raw_history = []
|
|
4853
|
+
self._openai_previous_response_id = None
|
|
4854
|
+
self._openai_response_id_history = []
|
|
4855
|
+
self._openai_input_items = []
|
|
4856
|
+
self._openai_last_sent_input_items = None
|
|
4857
|
+
except Exception:
|
|
4858
|
+
pass
|
|
4621
4859
|
# Reset local cumulative token counters on session clear
|
|
4622
4860
|
self._cum_input_tokens = 0
|
|
4623
4861
|
self._cum_output_tokens = 0
|
|
@@ -4694,7 +4932,7 @@ class ChatCLI:
|
|
|
4694
4932
|
|
|
4695
4933
|
# ---------------------------- Run loop ----------------------------
|
|
4696
4934
|
|
|
4697
|
-
async def run(self) -> None:
|
|
4935
|
+
async def run(self) -> None:
|
|
4698
4936
|
# Try persisted auth
|
|
4699
4937
|
self._load_auth_state_from_disk()
|
|
4700
4938
|
|
|
@@ -4868,25 +5106,70 @@ class ChatCLI:
|
|
|
4868
5106
|
self._session_started_at = None
|
|
4869
5107
|
# Prepare completer for slash commands (if prompt_toolkit is available)
|
|
4870
5108
|
pt_completer = self._commands_word_completer()
|
|
4871
|
-
while True:
|
|
4872
|
-
try:
|
|
4873
|
-
|
|
4874
|
-
|
|
4875
|
-
|
|
4876
|
-
|
|
4877
|
-
|
|
4878
|
-
|
|
4879
|
-
|
|
4880
|
-
|
|
4881
|
-
|
|
4882
|
-
|
|
4883
|
-
|
|
4884
|
-
|
|
4885
|
-
|
|
4886
|
-
|
|
4887
|
-
|
|
4888
|
-
|
|
4889
|
-
|
|
5109
|
+
while True:
|
|
5110
|
+
try:
|
|
5111
|
+
pending_edit = self._pending_user_edit
|
|
5112
|
+
edit_mode = pending_edit is not None
|
|
5113
|
+
|
|
5114
|
+
if self._pt_session is not None:
|
|
5115
|
+
# Use prompt_toolkit with inline completion when available
|
|
5116
|
+
# Pass completer per-prompt to ensure latest catalog
|
|
5117
|
+
try:
|
|
5118
|
+
# prompt_toolkit supports default= on modern versions; fall back gracefully.
|
|
5119
|
+
if edit_mode:
|
|
5120
|
+
user_input = await self._pt_session.prompt_async(
|
|
5121
|
+
"You (edit): ",
|
|
5122
|
+
completer=pt_completer,
|
|
5123
|
+
complete_while_typing=True,
|
|
5124
|
+
default=str(pending_edit),
|
|
5125
|
+
)
|
|
5126
|
+
else:
|
|
5127
|
+
user_input = await self._pt_session.prompt_async(
|
|
5128
|
+
"You: ",
|
|
5129
|
+
completer=pt_completer,
|
|
5130
|
+
complete_while_typing=True,
|
|
5131
|
+
)
|
|
5132
|
+
except TypeError:
|
|
5133
|
+
# Older prompt_toolkit: no default= support
|
|
5134
|
+
user_input = await self._pt_session.prompt_async(
|
|
5135
|
+
"You: ",
|
|
5136
|
+
completer=pt_completer,
|
|
5137
|
+
complete_while_typing=True,
|
|
5138
|
+
)
|
|
5139
|
+
user_input = user_input.strip()
|
|
5140
|
+
elif self._input_engine:
|
|
5141
|
+
if edit_mode:
|
|
5142
|
+
# The low-level input engine currently doesn't support prefill.
|
|
5143
|
+
# Show the previous message and let the user paste a replacement.
|
|
5144
|
+
try:
|
|
5145
|
+
self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
|
|
5146
|
+
self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
|
|
5147
|
+
except Exception:
|
|
5148
|
+
pass
|
|
5149
|
+
new_txt = self._read_multiline_input("Edit> ")
|
|
5150
|
+
user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
|
|
5151
|
+
else:
|
|
5152
|
+
# Do not add continuation prefixes on new lines
|
|
5153
|
+
user_input = self._input_engine.read_message("You: ", "")
|
|
5154
|
+
else:
|
|
5155
|
+
if edit_mode:
|
|
5156
|
+
try:
|
|
5157
|
+
self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
|
|
5158
|
+
self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
|
|
5159
|
+
except Exception:
|
|
5160
|
+
pass
|
|
5161
|
+
new_txt = self._read_multiline_input("Edit> ")
|
|
5162
|
+
user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
|
|
5163
|
+
else:
|
|
5164
|
+
user_input = self._read_multiline_input("You: ")
|
|
5165
|
+
|
|
5166
|
+
# Clear pending edit state after we successfully collected input.
|
|
5167
|
+
if edit_mode:
|
|
5168
|
+
self._pending_user_edit = None
|
|
5169
|
+
self._pending_turn_snapshot = None
|
|
5170
|
+
# Successful read resets interrupt window
|
|
5171
|
+
self._last_interrupt_ts = None
|
|
5172
|
+
except KeyboardInterrupt:
|
|
4890
5173
|
# First Ctrl+C: interrupt input and warn; second within window exits
|
|
4891
5174
|
now = time.time()
|
|
4892
5175
|
try:
|
|
@@ -4905,8 +5188,8 @@ class ChatCLI:
|
|
|
4905
5188
|
self.ui.print("Goodbye.")
|
|
4906
5189
|
return
|
|
4907
5190
|
|
|
4908
|
-
if not user_input:
|
|
4909
|
-
continue
|
|
5191
|
+
if not user_input:
|
|
5192
|
+
continue
|
|
4910
5193
|
|
|
4911
5194
|
# Command palette if bare '/'
|
|
4912
5195
|
if user_input == "/":
|
|
@@ -4929,12 +5212,35 @@ class ChatCLI:
|
|
|
4929
5212
|
if handled:
|
|
4930
5213
|
continue
|
|
4931
5214
|
|
|
4932
|
-
try:
|
|
4933
|
-
#
|
|
4934
|
-
|
|
4935
|
-
|
|
4936
|
-
|
|
4937
|
-
|
|
5215
|
+
try:
|
|
5216
|
+
# Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
|
|
5217
|
+
# This is critical for first-turn injections (code map/custom note/working memory)
|
|
5218
|
+
# which are applied by mutating flags during payload construction.
|
|
5219
|
+
self._pending_turn_snapshot = {
|
|
5220
|
+
"history": copy.deepcopy(self.history),
|
|
5221
|
+
"messages_for_save": copy.deepcopy(self.messages_for_save),
|
|
5222
|
+
"kimi_raw": copy.deepcopy(self._kimi_raw_history),
|
|
5223
|
+
"gemini_raw": copy.deepcopy(self._gemini_raw_history),
|
|
5224
|
+
"openai_prev": getattr(self, "_openai_previous_response_id", None),
|
|
5225
|
+
"openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
|
|
5226
|
+
"openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
|
|
5227
|
+
"openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
|
|
5228
|
+
"inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
|
|
5229
|
+
"did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
|
|
5230
|
+
"did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
|
|
5231
|
+
"did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
|
|
5232
|
+
"memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
|
|
5233
|
+
"last_built_user_content": getattr(self, "_last_built_user_content", None),
|
|
5234
|
+
}
|
|
5235
|
+
|
|
5236
|
+
# Clear any stale in-flight dispatch context at turn start.
|
|
5237
|
+
self._inflight_dispatch = None
|
|
5238
|
+
|
|
5239
|
+
# Record user message for local/server save
|
|
5240
|
+
if self.save_chat_history:
|
|
5241
|
+
self.messages_for_save.append({
|
|
5242
|
+
"role": "user",
|
|
5243
|
+
"content": user_input,
|
|
4938
5244
|
"model": None,
|
|
4939
5245
|
"citations": None,
|
|
4940
5246
|
"last_turn_input_tokens": 0,
|
|
@@ -4948,25 +5254,86 @@ class ChatCLI:
|
|
|
4948
5254
|
if self._busy:
|
|
4949
5255
|
self.ui.warn("Agent is busy with another turn. Please wait...")
|
|
4950
5256
|
continue
|
|
4951
|
-
self._busy = True
|
|
4952
|
-
try:
|
|
4953
|
-
assistant_text = await self._stream_once(user_input)
|
|
4954
|
-
finally:
|
|
4955
|
-
self._busy = False
|
|
4956
|
-
except
|
|
4957
|
-
|
|
4958
|
-
|
|
4959
|
-
|
|
4960
|
-
|
|
5257
|
+
self._busy = True
|
|
5258
|
+
try:
|
|
5259
|
+
assistant_text = await self._stream_once(user_input)
|
|
5260
|
+
finally:
|
|
5261
|
+
self._busy = False
|
|
5262
|
+
except KeyboardInterrupt:
|
|
5263
|
+
# Ctrl+C mid-stream / mid-tool: do not exit the CLI.
|
|
5264
|
+
# Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
|
|
5265
|
+
try:
|
|
5266
|
+
await self._cancel_inflight_dispatch()
|
|
5267
|
+
except (Exception, BaseException):
|
|
5268
|
+
pass
|
|
5269
|
+
|
|
5270
|
+
# Restore state to *before* this turn started.
|
|
5271
|
+
try:
|
|
5272
|
+
snap = self._pending_turn_snapshot or {}
|
|
5273
|
+
if isinstance(snap.get("history"), list):
|
|
5274
|
+
self.history = snap.get("history")
|
|
5275
|
+
if isinstance(snap.get("messages_for_save"), list):
|
|
5276
|
+
self.messages_for_save = snap.get("messages_for_save")
|
|
5277
|
+
if isinstance(snap.get("kimi_raw"), list):
|
|
5278
|
+
self._kimi_raw_history = snap.get("kimi_raw")
|
|
5279
|
+
if isinstance(snap.get("gemini_raw"), list):
|
|
5280
|
+
self._gemini_raw_history = snap.get("gemini_raw")
|
|
5281
|
+
if "openai_prev" in snap:
|
|
5282
|
+
self._openai_previous_response_id = snap.get("openai_prev")
|
|
5283
|
+
if isinstance(snap.get("openai_ids"), list):
|
|
5284
|
+
self._openai_response_id_history = snap.get("openai_ids")
|
|
5285
|
+
if isinstance(snap.get("openai_input_items"), list):
|
|
5286
|
+
self._openai_input_items = snap.get("openai_input_items")
|
|
5287
|
+
if "openai_last_sent_input_items" in snap:
|
|
5288
|
+
self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
|
|
5289
|
+
if "inflight_dispatch" in snap:
|
|
5290
|
+
self._inflight_dispatch = snap.get("inflight_dispatch")
|
|
5291
|
+
if "did_inject_codebase_map" in snap:
|
|
5292
|
+
self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
|
|
5293
|
+
if "did_inject_custom_first_turn" in snap:
|
|
5294
|
+
self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
|
|
5295
|
+
if "did_inject_working_memory" in snap:
|
|
5296
|
+
self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
|
|
5297
|
+
if "memory_paths_for_first_turn" in snap:
|
|
5298
|
+
self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
|
|
5299
|
+
self._last_built_user_content = snap.get("last_built_user_content")
|
|
5300
|
+
except Exception:
|
|
5301
|
+
pass
|
|
5302
|
+
|
|
5303
|
+
# Clear any transient indicator line and land on a fresh prompt line.
|
|
5304
|
+
try:
|
|
5305
|
+
sys.stdout.write("\r\x1b[2K\n")
|
|
5306
|
+
sys.stdout.flush()
|
|
5307
|
+
except Exception:
|
|
5308
|
+
try:
|
|
5309
|
+
self.ui.print()
|
|
5310
|
+
except Exception:
|
|
5311
|
+
pass
|
|
5312
|
+
|
|
5313
|
+
try:
|
|
5314
|
+
supports = self._provider_supports_native_retention(self.model)
|
|
5315
|
+
except Exception:
|
|
5316
|
+
supports = False
|
|
5317
|
+
if supports:
|
|
5318
|
+
self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
|
|
5319
|
+
else:
|
|
5320
|
+
self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
|
|
5321
|
+
self._pending_user_edit = user_input
|
|
5322
|
+
continue
|
|
5323
|
+
except httpx.HTTPStatusError as he:
|
|
5324
|
+
try:
|
|
5325
|
+
if he.response is not None:
|
|
5326
|
+
await he.response.aread()
|
|
5327
|
+
body = he.response.text
|
|
4961
5328
|
else:
|
|
4962
5329
|
body = ""
|
|
4963
5330
|
except Exception:
|
|
4964
5331
|
body = ""
|
|
4965
5332
|
self.ui.error(f"[HTTP error] {he.response.status_code} {body}")
|
|
4966
5333
|
continue
|
|
4967
|
-
except Exception as e:
|
|
4968
|
-
self.ui.error(f"[Client error] {e}")
|
|
4969
|
-
continue
|
|
5334
|
+
except Exception as e:
|
|
5335
|
+
self.ui.error(f"[Client error] {e}")
|
|
5336
|
+
continue
|
|
4970
5337
|
|
|
4971
5338
|
# Skip appending empty assistant messages to avoid 422 on next request
|
|
4972
5339
|
if assistant_text.strip():
|
|
@@ -4992,7 +5359,7 @@ class ChatCLI:
|
|
|
4992
5359
|
("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
|
|
4993
5360
|
("set_auto_approve", f"⚙️ Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
|
|
4994
5361
|
(auth_action_key, auth_action_label),
|
|
4995
|
-
("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5,
|
|
5362
|
+
("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2-thinking, etc.)"),
|
|
4996
5363
|
("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
|
|
4997
5364
|
("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
|
|
4998
5365
|
("clear_history", "🧹 Clear History - Reset chat history"),
|
|
@@ -5028,15 +5395,14 @@ class ChatCLI:
|
|
|
5028
5395
|
has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
|
|
5029
5396
|
is_effectively_free = (self.is_free_tier and not has_credits)
|
|
5030
5397
|
|
|
5031
|
-
# Recommended models (
|
|
5032
|
-
# Curated list per request (include Codex Max as recommended)
|
|
5398
|
+
# Recommended models ("feelings" order)
|
|
5033
5399
|
rec_keys = [
|
|
5034
|
-
"
|
|
5035
|
-
"
|
|
5400
|
+
"gpt-5.2",
|
|
5401
|
+
"gpt-5.2-codex",
|
|
5402
|
+
"gpt-5",
|
|
5036
5403
|
"gemini-3-pro-preview",
|
|
5037
5404
|
"gemini-3-flash-preview",
|
|
5038
|
-
"
|
|
5039
|
-
"gpt-5.2",
|
|
5405
|
+
"claude-opus-4-5-20251101",
|
|
5040
5406
|
"kimi-k2-thinking",
|
|
5041
5407
|
"grok-code-fast-1",
|
|
5042
5408
|
]
|
|
@@ -5078,8 +5444,7 @@ class ChatCLI:
|
|
|
5078
5444
|
suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
|
|
5079
5445
|
choices.append((m, f"{lbl}{suffix}"))
|
|
5080
5446
|
|
|
5081
|
-
|
|
5082
|
-
choices.append(("custom", "Custom (enter a model name)"))
|
|
5447
|
+
# Per issue list: do not surface "server default" or "custom" in this picker.
|
|
5083
5448
|
|
|
5084
5449
|
# Render and select using the unified highlighted picker
|
|
5085
5450
|
picked: Optional[str] = None
|
|
@@ -5094,27 +5459,15 @@ class ChatCLI:
|
|
|
5094
5459
|
picked = str(val)
|
|
5095
5460
|
|
|
5096
5461
|
# Enforce free tier restrictions
|
|
5097
|
-
if
|
|
5098
|
-
self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
|
|
5099
|
-
continue
|
|
5462
|
+
if is_effectively_free and is_paid_model(picked):
|
|
5463
|
+
self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
|
|
5464
|
+
continue
|
|
5100
5465
|
|
|
5101
5466
|
break
|
|
5102
5467
|
|
|
5103
|
-
# Apply selection
|
|
5104
|
-
|
|
5105
|
-
|
|
5106
|
-
self.ui.info("Model cleared; server default will be used.")
|
|
5107
|
-
elif picked == "custom":
|
|
5108
|
-
typed = self.ui.prompt(
|
|
5109
|
-
"Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
|
|
5110
|
-
default=self.model or "",
|
|
5111
|
-
)
|
|
5112
|
-
self.model = self._resolve_model_alias(typed.strip() or None)
|
|
5113
|
-
if not self.model:
|
|
5114
|
-
self.ui.info("Model cleared; server default will be used.")
|
|
5115
|
-
else:
|
|
5116
|
-
self.model = picked
|
|
5117
|
-
self.ui.success(f"Model set to: {self.model}")
|
|
5468
|
+
# Apply selection
|
|
5469
|
+
self.model = picked
|
|
5470
|
+
self.ui.success(f"Model set to: {self.model}")
|
|
5118
5471
|
|
|
5119
5472
|
self._apply_model_side_effects()
|
|
5120
5473
|
self.save_settings()
|
|
@@ -5203,7 +5556,7 @@ class ChatCLI:
|
|
|
5203
5556
|
self.save_settings()
|
|
5204
5557
|
return True
|
|
5205
5558
|
|
|
5206
|
-
if choice == "set_system_prompt":
|
|
5559
|
+
if choice == "set_system_prompt":
|
|
5207
5560
|
prompt = self.ui.prompt("Enter system prompt", default=self.system_prompt or "")
|
|
5208
5561
|
self.system_prompt = prompt.strip()
|
|
5209
5562
|
self.history = []
|
|
@@ -5215,32 +5568,40 @@ class ChatCLI:
|
|
|
5215
5568
|
self._did_inject_custom_first_turn = False
|
|
5216
5569
|
except Exception:
|
|
5217
5570
|
pass
|
|
5218
|
-
# Clear provider-native histories on system reset
|
|
5219
|
-
try:
|
|
5571
|
+
# Clear provider-native histories on system reset
|
|
5572
|
+
try:
|
|
5220
5573
|
self.messages_for_save = []
|
|
5221
5574
|
if not self.save_chat_history:
|
|
5222
5575
|
self.thread_uid = None
|
|
5223
|
-
self._kimi_raw_history = []
|
|
5224
|
-
|
|
5225
|
-
|
|
5576
|
+
self._kimi_raw_history = []
|
|
5577
|
+
self._gemini_raw_history = []
|
|
5578
|
+
self._openai_previous_response_id = None
|
|
5579
|
+
self._openai_response_id_history = []
|
|
5580
|
+
self._openai_input_items = []
|
|
5581
|
+
self._openai_last_sent_input_items = None
|
|
5582
|
+
except Exception:
|
|
5583
|
+
pass
|
|
5226
5584
|
self.ui.success("System prompt set.")
|
|
5227
5585
|
self.save_settings()
|
|
5228
5586
|
return True
|
|
5229
5587
|
|
|
5230
|
-
if choice == "clear_history":
|
|
5588
|
+
if choice == "clear_history":
|
|
5231
5589
|
self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
|
|
5232
5590
|
self._did_inject_codebase_map = False
|
|
5233
5591
|
try:
|
|
5234
5592
|
self._did_inject_custom_first_turn = False
|
|
5235
5593
|
except Exception:
|
|
5236
5594
|
pass
|
|
5237
|
-
try:
|
|
5595
|
+
try:
|
|
5238
5596
|
self.messages_for_save = []
|
|
5239
5597
|
if not self.save_chat_history:
|
|
5240
5598
|
self.thread_uid = None
|
|
5241
|
-
self._kimi_raw_history = []
|
|
5242
|
-
|
|
5243
|
-
|
|
5599
|
+
self._kimi_raw_history = []
|
|
5600
|
+
self._gemini_raw_history = []
|
|
5601
|
+
self._openai_previous_response_id = None
|
|
5602
|
+
self._openai_response_id_history = []
|
|
5603
|
+
except Exception:
|
|
5604
|
+
pass
|
|
5244
5605
|
# Reset local cumulative token counters on session clear
|
|
5245
5606
|
self._cum_input_tokens = 0
|
|
5246
5607
|
self._cum_output_tokens = 0
|
|
@@ -5293,11 +5654,82 @@ class ChatCLI:
|
|
|
5293
5654
|
return True
|
|
5294
5655
|
|
|
5295
5656
|
# ----------------------- SSE Streaming loop ------------------------
|
|
5296
|
-
async def _stream_once(self, user_input: str) -> str:
|
|
5297
|
-
# Build request payload
|
|
5298
|
-
|
|
5657
|
+
async def _stream_once(self, user_input: str) -> str:
|
|
5658
|
+
# Build request payload.
|
|
5659
|
+
# OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
|
|
5660
|
+
# `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
|
|
5661
|
+
if self._is_openai_model(self.model):
|
|
5662
|
+
msgs: List[Dict[str, str]] = []
|
|
5663
|
+
# Codex developer prompt (if enabled) + system prompt
|
|
5664
|
+
try:
|
|
5665
|
+
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
5666
|
+
msgs.append({"role": "system", "content": self._codex_system_prompt()})
|
|
5667
|
+
except Exception:
|
|
5668
|
+
pass
|
|
5669
|
+
if self.system_prompt:
|
|
5670
|
+
msgs.append({"role": "system", "content": self.system_prompt})
|
|
5671
|
+
|
|
5672
|
+
# Apply first-turn-only injections to the current user content
|
|
5673
|
+
content = user_input
|
|
5674
|
+
prefix = self._build_first_turn_injection(user_input)
|
|
5675
|
+
if prefix:
|
|
5676
|
+
content = f"{prefix}\n\n{user_input}"
|
|
5677
|
+
try:
|
|
5678
|
+
self._last_built_user_content = content
|
|
5679
|
+
except Exception:
|
|
5680
|
+
self._last_built_user_content = user_input
|
|
5681
|
+
msgs.append({"role": "user", "content": content})
|
|
5682
|
+
|
|
5683
|
+
payload: Dict[str, Any] = {"messages": msgs}
|
|
5684
|
+
|
|
5685
|
+
# Build OpenAI native input items (authoritative for the server OpenAI path).
|
|
5686
|
+
try:
|
|
5687
|
+
if isinstance(self._openai_input_items, list) and self._openai_input_items:
|
|
5688
|
+
items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
|
|
5689
|
+
else:
|
|
5690
|
+
# Seed with system prompts for the first OpenAI turn.
|
|
5691
|
+
items = []
|
|
5692
|
+
try:
|
|
5693
|
+
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
5694
|
+
items.append({"role": "system", "content": self._codex_system_prompt()})
|
|
5695
|
+
except Exception:
|
|
5696
|
+
pass
|
|
5697
|
+
if self.system_prompt:
|
|
5698
|
+
items.append({"role": "system", "content": self.system_prompt})
|
|
5699
|
+
items.append({"role": "user", "content": content})
|
|
5700
|
+
payload["openai_input_items"] = self._sanitize_openai_items(items)
|
|
5701
|
+
self._openai_last_sent_input_items = copy.deepcopy(items)
|
|
5702
|
+
except Exception:
|
|
5703
|
+
# If this fails for any reason, fall back to normal message-based history.
|
|
5704
|
+
self._openai_last_sent_input_items = None
|
|
5705
|
+
|
|
5706
|
+
# OpenAI Threading: DISABLED. We use full manual input item replay now.
|
|
5707
|
+
# if "openai_input_items" not in payload:
|
|
5708
|
+
# try:
|
|
5709
|
+
# if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
|
|
5710
|
+
# payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
|
|
5711
|
+
# except Exception:
|
|
5712
|
+
# pass
|
|
5713
|
+
try:
|
|
5714
|
+
if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
|
|
5715
|
+
payload["openai_response_id_history"] = list(self._openai_response_id_history)
|
|
5716
|
+
except Exception:
|
|
5717
|
+
pass
|
|
5718
|
+
else:
|
|
5719
|
+
payload = {"messages": self._build_messages(user_input)}
|
|
5299
5720
|
if self.model:
|
|
5300
5721
|
payload["model"] = self.model
|
|
5722
|
+
# OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
|
|
5723
|
+
try:
|
|
5724
|
+
if self._is_openai_model(self.model):
|
|
5725
|
+
if (
|
|
5726
|
+
isinstance(getattr(self, "_openai_response_id_history", None), list)
|
|
5727
|
+
and self._openai_response_id_history
|
|
5728
|
+
and "openai_response_id_history" not in payload
|
|
5729
|
+
):
|
|
5730
|
+
payload["openai_response_id_history"] = list(self._openai_response_id_history)
|
|
5731
|
+
except Exception:
|
|
5732
|
+
pass
|
|
5301
5733
|
# Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
|
|
5302
5734
|
try:
|
|
5303
5735
|
if self.terminal_id:
|
|
@@ -5381,10 +5813,12 @@ class ChatCLI:
|
|
|
5381
5813
|
payload["text_verbosity"] = self.text_verbosity
|
|
5382
5814
|
except Exception:
|
|
5383
5815
|
pass
|
|
5384
|
-
|
|
5385
|
-
|
|
5386
|
-
|
|
5387
|
-
|
|
5816
|
+
# Preambles are a GPT-5-only UX toggle.
|
|
5817
|
+
try:
|
|
5818
|
+
if self._supports_preambles(self.model):
|
|
5819
|
+
payload["preambles_enabled"] = bool(self.preambles_enabled)
|
|
5820
|
+
except Exception:
|
|
5821
|
+
pass
|
|
5388
5822
|
|
|
5389
5823
|
if self.web_search_enabled:
|
|
5390
5824
|
payload["enable_web_search"] = True
|
|
@@ -5479,13 +5913,23 @@ class ChatCLI:
|
|
|
5479
5913
|
headers["X-Request-Timeout"] = str(int(req_timeout_hint))
|
|
5480
5914
|
except Exception:
|
|
5481
5915
|
pass
|
|
5482
|
-
# If using a Kimi model, include provider-native messages to preserve reasoning_content
|
|
5483
|
-
try:
|
|
5484
|
-
if isinstance(self.model, str) and self.model.startswith("kimi-"):
|
|
5485
|
-
req_payload = dict(req_payload)
|
|
5486
|
-
req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
|
|
5487
|
-
except Exception:
|
|
5488
|
-
pass
|
|
5916
|
+
# If using a Kimi model, include provider-native messages to preserve reasoning_content
|
|
5917
|
+
try:
|
|
5918
|
+
if isinstance(self.model, str) and self.model.startswith("kimi-"):
|
|
5919
|
+
req_payload = dict(req_payload)
|
|
5920
|
+
req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
|
|
5921
|
+
except Exception:
|
|
5922
|
+
pass
|
|
5923
|
+
# If using a Gemini model, include provider-native contents to preserve thought signatures
|
|
5924
|
+
# and strict tool-call chains across HTTP turns.
|
|
5925
|
+
try:
|
|
5926
|
+
if isinstance(self.model, str) and self.model.startswith("gemini-"):
|
|
5927
|
+
req_payload = dict(req_payload)
|
|
5928
|
+
hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
|
|
5929
|
+
if hist:
|
|
5930
|
+
req_payload["raw_provider_messages"] = hist
|
|
5931
|
+
except Exception:
|
|
5932
|
+
pass
|
|
5489
5933
|
async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
|
|
5490
5934
|
async with client.stream("POST", self.stream_url, json=req_payload, headers=headers, follow_redirects=True) as resp:
|
|
5491
5935
|
if resp.status_code == 429:
|
|
@@ -5596,6 +6040,51 @@ class ChatCLI:
|
|
|
5596
6040
|
# Track whether we're currently positioned at the start of a fresh line.
|
|
5597
6041
|
# This prevents double-newlines between back-to-back tool events.
|
|
5598
6042
|
at_line_start = True
|
|
6043
|
+
|
|
6044
|
+
# --- Tool call in-place status (issuelist.md #7) ---
|
|
6045
|
+
# We render a single transient line for the current tool call (no trailing newline)
|
|
6046
|
+
# so the later tool.result SUCCESS/FAILURE line can replace it in-place.
|
|
6047
|
+
tool_status_active = False
|
|
6048
|
+
tool_status_call_id = None
|
|
6049
|
+
|
|
6050
|
+
def _tool_status_clear_line() -> None:
|
|
6051
|
+
"""Clear the current line (best-effort) and return to column 0."""
|
|
6052
|
+
nonlocal at_line_start
|
|
6053
|
+
try:
|
|
6054
|
+
sys.stdout.write("\r\x1b[2K")
|
|
6055
|
+
sys.stdout.flush()
|
|
6056
|
+
except Exception:
|
|
6057
|
+
pass
|
|
6058
|
+
at_line_start = True
|
|
6059
|
+
|
|
6060
|
+
def _tool_status_show(call_id: Any, line: str) -> None:
|
|
6061
|
+
"""Show the transient tool status line (no newline)."""
|
|
6062
|
+
nonlocal tool_status_active, tool_status_call_id, at_line_start
|
|
6063
|
+
if not self.show_tool_calls:
|
|
6064
|
+
return
|
|
6065
|
+
tool_status_active = True
|
|
6066
|
+
tool_status_call_id = str(call_id) if call_id is not None else None
|
|
6067
|
+
try:
|
|
6068
|
+
if not at_line_start:
|
|
6069
|
+
sys.stdout.write("\n")
|
|
6070
|
+
sys.stdout.write("\r\x1b[2K" + str(line))
|
|
6071
|
+
sys.stdout.flush()
|
|
6072
|
+
at_line_start = False
|
|
6073
|
+
except Exception:
|
|
6074
|
+
# Fallback: degrade to a normal printed line
|
|
6075
|
+
try:
|
|
6076
|
+
self.ui.print(str(line))
|
|
6077
|
+
except Exception:
|
|
6078
|
+
pass
|
|
6079
|
+
at_line_start = True
|
|
6080
|
+
|
|
6081
|
+
def _tool_status_stop() -> None:
|
|
6082
|
+
"""Remove the transient tool status line and clear tracking."""
|
|
6083
|
+
nonlocal tool_status_active, tool_status_call_id
|
|
6084
|
+
if tool_status_active:
|
|
6085
|
+
_tool_status_clear_line()
|
|
6086
|
+
tool_status_active = False
|
|
6087
|
+
tool_status_call_id = None
|
|
5599
6088
|
# Mode: animate or static (default static for stability)
|
|
5600
6089
|
try:
|
|
5601
6090
|
_animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
|
|
@@ -5899,16 +6388,40 @@ class ChatCLI:
|
|
|
5899
6388
|
except Exception:
|
|
5900
6389
|
pass
|
|
5901
6390
|
|
|
5902
|
-
#
|
|
5903
|
-
#
|
|
5904
|
-
|
|
5905
|
-
|
|
5906
|
-
|
|
5907
|
-
|
|
5908
|
-
|
|
5909
|
-
|
|
5910
|
-
|
|
5911
|
-
|
|
6391
|
+
# issuelist.md #7:
|
|
6392
|
+
# Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
|
|
6393
|
+
try:
|
|
6394
|
+
# Clear any previous transient status line (shouldn't happen, but keep stable)
|
|
6395
|
+
_tool_status_stop()
|
|
6396
|
+
except Exception:
|
|
6397
|
+
pass
|
|
6398
|
+
try:
|
|
6399
|
+
tool_name = str(name or "").strip()
|
|
6400
|
+
label = self._tool_concise_label(
|
|
6401
|
+
tool_name,
|
|
6402
|
+
args if isinstance(args, dict) else {},
|
|
6403
|
+
None,
|
|
6404
|
+
)
|
|
6405
|
+
try:
|
|
6406
|
+
model_prefix = (
|
|
6407
|
+
self._current_turn.get("model")
|
|
6408
|
+
or self._last_used_model
|
|
6409
|
+
or self.model
|
|
6410
|
+
or "(server default)"
|
|
6411
|
+
)
|
|
6412
|
+
except Exception:
|
|
6413
|
+
model_prefix = self.model or "(server default)"
|
|
6414
|
+
ORANGE = "\x1b[38;5;214m"
|
|
6415
|
+
WHITE = "\x1b[97m"
|
|
6416
|
+
RESET = "\x1b[0m"
|
|
6417
|
+
status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
|
|
6418
|
+
_tool_status_show(call_id, status_line)
|
|
6419
|
+
except Exception:
|
|
6420
|
+
# Last-resort fallback: print something rather than crash streaming.
|
|
6421
|
+
try:
|
|
6422
|
+
self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
|
|
6423
|
+
except Exception:
|
|
6424
|
+
pass
|
|
5912
6425
|
# Count tool calls
|
|
5913
6426
|
try:
|
|
5914
6427
|
tool_calls += 1
|
|
@@ -5931,10 +6444,15 @@ class ChatCLI:
|
|
|
5931
6444
|
except Exception:
|
|
5932
6445
|
pass
|
|
5933
6446
|
|
|
5934
|
-
elif event == "approval.request":
|
|
5935
|
-
#
|
|
5936
|
-
|
|
5937
|
-
|
|
6447
|
+
elif event == "approval.request":
|
|
6448
|
+
# Don't let the transient [RUNNING] line collide with interactive prompts.
|
|
6449
|
+
try:
|
|
6450
|
+
_tool_status_stop()
|
|
6451
|
+
except Exception:
|
|
6452
|
+
pass
|
|
6453
|
+
# First reply wins (web or CLI)
|
|
6454
|
+
await self._handle_approval_request(client, session_id, data)
|
|
6455
|
+
continue
|
|
5938
6456
|
|
|
5939
6457
|
elif event == "approval.result":
|
|
5940
6458
|
appr = data.get("approved")
|
|
@@ -5976,10 +6494,22 @@ class ChatCLI:
|
|
|
5976
6494
|
self.ui.info("Working memory created. Restarting conversation with a fresh first-turn injection...")
|
|
5977
6495
|
return ""
|
|
5978
6496
|
|
|
5979
|
-
elif event == "tool.result":
|
|
5980
|
-
name = str(data.get("name"))
|
|
5981
|
-
result = data.get("result", {}) or {}
|
|
5982
|
-
call_id = data.get("call_id")
|
|
6497
|
+
elif event == "tool.result":
|
|
6498
|
+
name = str(data.get("name"))
|
|
6499
|
+
result = data.get("result", {}) or {}
|
|
6500
|
+
call_id = data.get("call_id")
|
|
6501
|
+
# If we previously rendered a transient [RUNNING] line for this tool call,
|
|
6502
|
+
# clear it now so the SUCCESS/FAILURE line prints in the same place.
|
|
6503
|
+
try:
|
|
6504
|
+
if tool_status_active:
|
|
6505
|
+
# Best-effort match on call_id (some providers may omit it).
|
|
6506
|
+
if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
|
|
6507
|
+
_tool_status_stop()
|
|
6508
|
+
except Exception:
|
|
6509
|
+
try:
|
|
6510
|
+
_tool_status_stop()
|
|
6511
|
+
except Exception:
|
|
6512
|
+
pass
|
|
5983
6513
|
# Stop any indicator before rendering results
|
|
5984
6514
|
try:
|
|
5985
6515
|
await _indicator_stop(clear=True)
|
|
@@ -6087,7 +6617,7 @@ class ChatCLI:
|
|
|
6087
6617
|
# Do not auto-restart the indicator here; wait for the next model event
|
|
6088
6618
|
|
|
6089
6619
|
elif event == "tool.dispatch":
|
|
6090
|
-
# Client-executed tool flow
|
|
6620
|
+
# Client-executed tool flow
|
|
6091
6621
|
if not HAS_LOCAL_TOOLS:
|
|
6092
6622
|
self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
|
|
6093
6623
|
continue
|
|
@@ -6097,12 +6627,23 @@ class ChatCLI:
|
|
|
6097
6627
|
# tool invocation on the corresponding 'tool.call' event. Counting
|
|
6098
6628
|
# dispatch would double-count a single tool call.
|
|
6099
6629
|
|
|
6100
|
-
session_id_d = data.get("session_id")
|
|
6101
|
-
call_id = data.get("call_id")
|
|
6102
|
-
name = data.get("name")
|
|
6103
|
-
args = data.get("args", {}) or {}
|
|
6104
|
-
job_token = data.get("job_token")
|
|
6105
|
-
reqp = data.get("requested_policy", {}) or {}
|
|
6630
|
+
session_id_d = data.get("session_id")
|
|
6631
|
+
call_id = data.get("call_id")
|
|
6632
|
+
name = data.get("name")
|
|
6633
|
+
args = data.get("args", {}) or {}
|
|
6634
|
+
job_token = data.get("job_token")
|
|
6635
|
+
reqp = data.get("requested_policy", {}) or {}
|
|
6636
|
+
|
|
6637
|
+
# Track in-flight dispatch so Ctrl+C can cancel quickly.
|
|
6638
|
+
try:
|
|
6639
|
+
self._inflight_dispatch = {
|
|
6640
|
+
"session_id": session_id_d,
|
|
6641
|
+
"call_id": call_id,
|
|
6642
|
+
"job_token": job_token,
|
|
6643
|
+
"name": name,
|
|
6644
|
+
}
|
|
6645
|
+
except Exception:
|
|
6646
|
+
pass
|
|
6106
6647
|
|
|
6107
6648
|
if DEBUG_SSE:
|
|
6108
6649
|
self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
|
|
@@ -6377,9 +6918,9 @@ class ChatCLI:
|
|
|
6377
6918
|
except Exception:
|
|
6378
6919
|
self._last_dispatch_ctx = None
|
|
6379
6920
|
|
|
6380
|
-
# POST callback
|
|
6381
|
-
try:
|
|
6382
|
-
if session_id_d and call_id and job_token:
|
|
6921
|
+
# POST callback
|
|
6922
|
+
try:
|
|
6923
|
+
if session_id_d and call_id and job_token:
|
|
6383
6924
|
payload_cb = {
|
|
6384
6925
|
"session_id": session_id_d,
|
|
6385
6926
|
"call_id": call_id,
|
|
@@ -6387,16 +6928,87 @@ class ChatCLI:
|
|
|
6387
6928
|
"result": result,
|
|
6388
6929
|
"job_token": job_token,
|
|
6389
6930
|
}
|
|
6390
|
-
r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
|
|
6391
|
-
if r.status_code >= 400:
|
|
6392
|
-
self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
|
|
6393
|
-
except Exception as e:
|
|
6394
|
-
self.ui.warn(f"tools.callback error: {e}")
|
|
6931
|
+
r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
|
|
6932
|
+
if r.status_code >= 400:
|
|
6933
|
+
self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
|
|
6934
|
+
except Exception as e:
|
|
6935
|
+
self.ui.warn(f"tools.callback error: {e}")
|
|
6936
|
+
finally:
|
|
6937
|
+
try:
|
|
6938
|
+
# Clear in-flight dispatch context when we send a callback.
|
|
6939
|
+
if isinstance(self._inflight_dispatch, dict):
|
|
6940
|
+
if str(self._inflight_dispatch.get("call_id")) == str(call_id):
|
|
6941
|
+
self._inflight_dispatch = None
|
|
6942
|
+
except Exception:
|
|
6943
|
+
pass
|
|
6395
6944
|
|
|
6396
|
-
elif event == "message.completed":
|
|
6945
|
+
elif event == "message.completed":
|
|
6397
6946
|
# Safety: this block handles only 'message.completed'.
|
|
6398
6947
|
usage = data.get("usage", {})
|
|
6399
|
-
model_used = data.get("model") or self.model
|
|
6948
|
+
model_used = data.get("model") or self.model
|
|
6949
|
+
# OpenAI: persist the last response id so future turns can use previous_response_id.
|
|
6950
|
+
try:
|
|
6951
|
+
if self._is_openai_model(model_used):
|
|
6952
|
+
# Prefer the explicit per-turn id list when provided by the server.
|
|
6953
|
+
ids = data.get("openai_response_ids")
|
|
6954
|
+
if isinstance(ids, list) and ids:
|
|
6955
|
+
for x in ids:
|
|
6956
|
+
if not isinstance(x, str):
|
|
6957
|
+
continue
|
|
6958
|
+
xs = x.strip()
|
|
6959
|
+
if not xs:
|
|
6960
|
+
continue
|
|
6961
|
+
try:
|
|
6962
|
+
if xs not in self._openai_response_id_history:
|
|
6963
|
+
self._openai_response_id_history.append(xs)
|
|
6964
|
+
except Exception:
|
|
6965
|
+
pass
|
|
6966
|
+
rid = data.get("openai_previous_response_id")
|
|
6967
|
+
if isinstance(rid, str) and rid.strip():
|
|
6968
|
+
self._openai_previous_response_id = rid.strip()
|
|
6969
|
+
try:
|
|
6970
|
+
if rid.strip() not in self._openai_response_id_history:
|
|
6971
|
+
self._openai_response_id_history.append(rid.strip())
|
|
6972
|
+
except Exception:
|
|
6973
|
+
pass
|
|
6974
|
+
|
|
6975
|
+
# OpenAI manual-state replay: server returns the delta items appended
|
|
6976
|
+
# during this turn (reasoning/tool calls/tool outputs). Persist them.
|
|
6977
|
+
try:
|
|
6978
|
+
delta = data.get("openai_delta_items")
|
|
6979
|
+
if isinstance(delta, list):
|
|
6980
|
+
base_items = (
|
|
6981
|
+
self._openai_last_sent_input_items
|
|
6982
|
+
if isinstance(self._openai_last_sent_input_items, list)
|
|
6983
|
+
else copy.deepcopy(self._openai_input_items)
|
|
6984
|
+
)
|
|
6985
|
+
# Normalize to a list of dicts where possible; keep unknown shapes as-is.
|
|
6986
|
+
merged: List[Any] = []
|
|
6987
|
+
try:
|
|
6988
|
+
merged.extend(list(base_items or []))
|
|
6989
|
+
except Exception:
|
|
6990
|
+
merged = list(base_items or []) if base_items is not None else []
|
|
6991
|
+
merged.extend(delta)
|
|
6992
|
+
# Store only dict-like items (server is expected to send dicts)
|
|
6993
|
+
cleaned: List[Dict[str, Any]] = []
|
|
6994
|
+
for it in merged:
|
|
6995
|
+
if isinstance(it, dict):
|
|
6996
|
+
cleaned.append(dict(it))
|
|
6997
|
+
self._openai_input_items = cleaned
|
|
6998
|
+
except Exception:
|
|
6999
|
+
pass
|
|
7000
|
+
finally:
|
|
7001
|
+
# Clear per-turn sent snapshot
|
|
7002
|
+
self._openai_last_sent_input_items = None
|
|
7003
|
+
except Exception:
|
|
7004
|
+
pass
|
|
7005
|
+
# Gemini: server may include an authoritative provider-native history snapshot.
|
|
7006
|
+
try:
|
|
7007
|
+
if isinstance(model_used, str) and model_used.startswith("gemini-"):
|
|
7008
|
+
rpm = data.get("raw_provider_messages")
|
|
7009
|
+
self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
|
|
7010
|
+
except Exception:
|
|
7011
|
+
pass
|
|
6400
7012
|
# Mark completion for retry controller
|
|
6401
7013
|
try:
|
|
6402
7014
|
last_completed = True
|
|
@@ -7238,20 +7850,29 @@ class ChatCLI:
|
|
|
7238
7850
|
pass
|
|
7239
7851
|
return "".join(assistant_buf)
|
|
7240
7852
|
|
|
7241
|
-
elif event == "provider.message":
|
|
7242
|
-
# Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
|
|
7243
|
-
provider = (data.get("provider") or "").lower()
|
|
7244
|
-
msg = data.get("message")
|
|
7245
|
-
if
|
|
7246
|
-
#
|
|
7247
|
-
try:
|
|
7248
|
-
|
|
7249
|
-
|
|
7250
|
-
|
|
7251
|
-
self.
|
|
7252
|
-
|
|
7253
|
-
|
|
7254
|
-
|
|
7853
|
+
elif event == "provider.message":
|
|
7854
|
+
# Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
|
|
7855
|
+
provider = (data.get("provider") or "").lower()
|
|
7856
|
+
msg = data.get("message")
|
|
7857
|
+
if provider == "gemini":
|
|
7858
|
+
# Always retain Gemini provider-native messages (needed for multi-turn tool calling).
|
|
7859
|
+
try:
|
|
7860
|
+
if isinstance(msg, dict):
|
|
7861
|
+
self._gemini_raw_history.append(dict(msg))
|
|
7862
|
+
elif isinstance(msg, list):
|
|
7863
|
+
self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
|
|
7864
|
+
except Exception:
|
|
7865
|
+
pass
|
|
7866
|
+
if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
|
|
7867
|
+
# Append as-is to local raw history for the next turn
|
|
7868
|
+
try:
|
|
7869
|
+
self._kimi_raw_history.append(dict(msg))
|
|
7870
|
+
except Exception:
|
|
7871
|
+
try:
|
|
7872
|
+
self._kimi_raw_history.append(msg) # type: ignore
|
|
7873
|
+
except Exception:
|
|
7874
|
+
pass
|
|
7875
|
+
continue
|
|
7255
7876
|
|
|
7256
7877
|
else:
|
|
7257
7878
|
# TEMP DEBUG: show unknown/unhandled events
|
|
@@ -7799,7 +8420,7 @@ class ChatCLI:
|
|
|
7799
8420
|
"gpt-5": 400000,
|
|
7800
8421
|
"gpt-5-2025-08-07": 400000,
|
|
7801
8422
|
"codex-mini-latest": 200000,
|
|
7802
|
-
|
|
8423
|
+
# (removed gemini-2.5-pro)
|
|
7803
8424
|
"gemini-3-flash-preview": 1048576,
|
|
7804
8425
|
"gemini-3-pro-preview": 1000000,
|
|
7805
8426
|
"grok-4-1-fast-reasoning": 2000000,
|
|
@@ -7816,10 +8437,10 @@ class ChatCLI:
|
|
|
7816
8437
|
"claude-sonnet-4-5-20250929-thinking": 1000000,
|
|
7817
8438
|
"claude-opus-4-5-20251101": 200000,
|
|
7818
8439
|
"claude-opus-4-5-20251101-thinking": 200000,
|
|
7819
|
-
"glm-4.
|
|
7820
|
-
})
|
|
7821
|
-
except Exception:
|
|
7822
|
-
pass
|
|
8440
|
+
"glm-4.7": 200000,
|
|
8441
|
+
})
|
|
8442
|
+
except Exception:
|
|
8443
|
+
pass
|
|
7823
8444
|
self._model_ctx_map = ctx_map
|
|
7824
8445
|
return ctx_map
|
|
7825
8446
|
|
|
@@ -8206,8 +8827,11 @@ class ChatCLI:
|
|
|
8206
8827
|
"We’ll configure a few defaults. You can change these later via /settings.",
|
|
8207
8828
|
)
|
|
8208
8829
|
|
|
8209
|
-
# --- 1) Default model (menu
|
|
8210
|
-
await self._wizard_model_step()
|
|
8830
|
+
# --- 1) Default model (menu) ---
|
|
8831
|
+
await self._wizard_model_step()
|
|
8832
|
+
# If the picker was cancelled (or model still unset), choose a sensible default.
|
|
8833
|
+
if not self.model:
|
|
8834
|
+
self.model = self._recommended_default_model()
|
|
8211
8835
|
|
|
8212
8836
|
# --- 2) Tools (always ON per design) ---
|
|
8213
8837
|
self.requested_tools = True
|
|
@@ -8217,9 +8841,9 @@ class ChatCLI:
|
|
|
8217
8841
|
"Control levels: 1=read-only, 2=approval on write/exec, 3=no approvals"
|
|
8218
8842
|
)
|
|
8219
8843
|
await self.set_level_menu()
|
|
8220
|
-
if self.control_level not in (1, 2, 3):
|
|
8221
|
-
# Default to Level
|
|
8222
|
-
self.control_level =
|
|
8844
|
+
if self.control_level not in (1, 2, 3):
|
|
8845
|
+
# Default to Level 3 if user aborted
|
|
8846
|
+
self.control_level = 3
|
|
8223
8847
|
|
|
8224
8848
|
# --- 4) Agent scope (menus; only type on custom path) ---
|
|
8225
8849
|
self.ui.print(
|
|
@@ -8270,30 +8894,45 @@ class ChatCLI:
|
|
|
8270
8894
|
except Exception:
|
|
8271
8895
|
curv = "medium"
|
|
8272
8896
|
|
|
8273
|
-
|
|
8274
|
-
|
|
8275
|
-
|
|
8276
|
-
[
|
|
8277
|
-
("low", "Low – short, to-the-point answers"),
|
|
8278
|
-
("medium", "Medium – balanced detail
|
|
8279
|
-
("high", "High – more verbose explanations"),
|
|
8280
|
-
]
|
|
8281
|
-
|
|
8897
|
+
verbosity_choices: List[Tuple[str, str]] = []
|
|
8898
|
+
if self._is_gpt_model(self.model):
|
|
8899
|
+
# Default-first: Low for GPT models.
|
|
8900
|
+
verbosity_choices = [
|
|
8901
|
+
("low", "Low – short, to-the-point answers"),
|
|
8902
|
+
("medium", "Medium – balanced detail"),
|
|
8903
|
+
("high", "High – more verbose explanations"),
|
|
8904
|
+
]
|
|
8905
|
+
else:
|
|
8906
|
+
# Default-first: Medium for non-GPT models; do not surface "Low".
|
|
8907
|
+
verbosity_choices = [
|
|
8908
|
+
("medium", "Medium – balanced detail (recommended)"),
|
|
8909
|
+
("high", "High – more verbose explanations"),
|
|
8910
|
+
]
|
|
8911
|
+
|
|
8912
|
+
verb_choice = await self._menu_choice(
|
|
8913
|
+
"Text verbosity",
|
|
8914
|
+
"How verbose should responses be by default?",
|
|
8915
|
+
verbosity_choices,
|
|
8916
|
+
)
|
|
8282
8917
|
if verb_choice in ("low", "medium", "high"):
|
|
8283
8918
|
self.text_verbosity = verb_choice
|
|
8284
8919
|
else:
|
|
8285
8920
|
self.text_verbosity = curv or "medium"
|
|
8286
8921
|
|
|
8287
|
-
# --- 7) Tool preambles (
|
|
8288
|
-
|
|
8289
|
-
|
|
8290
|
-
|
|
8291
|
-
|
|
8292
|
-
|
|
8293
|
-
|
|
8294
|
-
|
|
8295
|
-
|
|
8296
|
-
|
|
8922
|
+
# --- 7) Tool preambles (GPT-5 only) ---
|
|
8923
|
+
if self._supports_preambles(self.model):
|
|
8924
|
+
preamble_choice = await self._menu_choice(
|
|
8925
|
+
"Tool call preambles",
|
|
8926
|
+
"Before using tools, the agent can briefly explain what it will do and why.",
|
|
8927
|
+
[
|
|
8928
|
+
("off", "Disable preambles (default)"),
|
|
8929
|
+
("on", "Enable preambles"),
|
|
8930
|
+
],
|
|
8931
|
+
)
|
|
8932
|
+
self.preambles_enabled = preamble_choice == "on"
|
|
8933
|
+
else:
|
|
8934
|
+
# Never enable preambles on unsupported models.
|
|
8935
|
+
self.preambles_enabled = False
|
|
8297
8936
|
|
|
8298
8937
|
# --- 8) Optional custom first-turn note (menu + text only when chosen) ---
|
|
8299
8938
|
custom_choice = await self._menu_choice(
|
|
@@ -8367,7 +9006,8 @@ class ChatCLI:
|
|
|
8367
9006
|
text = m.get("content", "")
|
|
8368
9007
|
contents.append({"role": role, "parts": [{"text": text}]})
|
|
8369
9008
|
# Pick a Gemini model for counting; fall back if current isn't Gemini
|
|
8370
|
-
|
|
9009
|
+
# (gemini-2.5-pro removed from curated lists)
|
|
9010
|
+
count_model = "gemini-3-flash-preview"
|
|
8371
9011
|
res = client.models.count_tokens(model=count_model, contents=contents)
|
|
8372
9012
|
t = int(getattr(res, "total_tokens", 0) or 0)
|
|
8373
9013
|
if t > 0:
|
|
@@ -8425,16 +9065,16 @@ class ChatCLI:
|
|
|
8425
9065
|
blocks.append(txt.strip())
|
|
8426
9066
|
except Exception:
|
|
8427
9067
|
pass
|
|
8428
|
-
# Tool preamble
|
|
8429
|
-
try:
|
|
8430
|
-
if bool(getattr(self, "preambles_enabled", False)):
|
|
8431
|
-
blocks.append(
|
|
8432
|
-
"Tool usage: when you need to read or modify files or run commands, "
|
|
8433
|
-
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
8434
|
-
"advances the user's goal before calling the tool."
|
|
8435
|
-
)
|
|
8436
|
-
except Exception:
|
|
8437
|
-
pass
|
|
9068
|
+
# Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
9069
|
+
try:
|
|
9070
|
+
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
9071
|
+
blocks.append(
|
|
9072
|
+
"Tool usage: when you need to read or modify files or run commands, "
|
|
9073
|
+
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
9074
|
+
"advances the user's goal before calling the tool."
|
|
9075
|
+
)
|
|
9076
|
+
except Exception:
|
|
9077
|
+
pass
|
|
8438
9078
|
# Working memory preview (does not touch _did_inject_working_memory or paths)
|
|
8439
9079
|
try:
|
|
8440
9080
|
if self._memory_paths_for_first_turn:
|