henosis-cli 0.6.8__py3-none-any.whl → 0.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli.py +843 -269
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.9.dist-info}/METADATA +1 -1
- henosis_cli-0.6.9.dist-info/RECORD +11 -0
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.9.dist-info}/WHEEL +1 -1
- henosis_cli_tools/settings_ui.py +77 -38
- henosis_cli-0.6.8.dist-info/RECORD +0 -11
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.9.dist-info}/entry_points.txt +0 -0
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.9.dist-info}/top_level.txt +0 -0
cli.py
CHANGED
|
@@ -6,11 +6,12 @@
|
|
|
6
6
|
# - Preserves previous behavior and settings
|
|
7
7
|
# - Injects CODEBASE_MAP.md into the first user message (wrapped in <codebase_map>) without manual trimming.
|
|
8
8
|
|
|
9
|
-
import argparse
|
|
10
|
-
import asyncio
|
|
11
|
-
import
|
|
12
|
-
import
|
|
13
|
-
import
|
|
9
|
+
import argparse
|
|
10
|
+
import asyncio
|
|
11
|
+
import copy
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
14
15
|
import socket
|
|
15
16
|
import shutil
|
|
16
17
|
from pathlib import Path
|
|
@@ -684,7 +685,7 @@ class UI:
|
|
|
684
685
|
print(f"{n:<40} {ty:<8} {sz}")
|
|
685
686
|
|
|
686
687
|
class ChatCLI:
|
|
687
|
-
def __init__(
|
|
688
|
+
def __init__(
|
|
688
689
|
self,
|
|
689
690
|
server: str,
|
|
690
691
|
model: Optional[str],
|
|
@@ -1001,6 +1002,21 @@ class ChatCLI:
|
|
|
1001
1002
|
self._kimi_raw_history: List[Dict[str, Any]] = []
|
|
1002
1003
|
# Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
|
|
1003
1004
|
self._gemini_raw_history: List[Dict[str, Any]] = []
|
|
1005
|
+
# OpenAI Responses API threading: retain previous response id across turns
|
|
1006
|
+
self._openai_previous_response_id: Optional[str] = None
|
|
1007
|
+
# OpenAI Responses API threading: retain the full chain of response ids across turns
|
|
1008
|
+
# (server will also echo per-turn ids in message.completed.openai_response_ids)
|
|
1009
|
+
self._openai_response_id_history: List[str] = []
|
|
1010
|
+
|
|
1011
|
+
# OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
|
|
1012
|
+
# including reasoning items, function_call items, and function_call_output items.
|
|
1013
|
+
self._openai_input_items: List[Dict[str, Any]] = []
|
|
1014
|
+
# For robustness, remember exactly what we sent as openai_input_items for the current turn
|
|
1015
|
+
# so we can append server-provided openai_delta_items deterministically.
|
|
1016
|
+
self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
|
|
1017
|
+
# Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
|
|
1018
|
+
# Shape: {session_id, call_id, job_token, name}
|
|
1019
|
+
self._inflight_dispatch: Optional[Dict[str, Any]] = None
|
|
1004
1020
|
# Last server billing info from /api/usage/commit
|
|
1005
1021
|
self._last_commit_cost_usd: float = 0.0
|
|
1006
1022
|
self._last_remaining_credits: Optional[float] = None
|
|
@@ -1051,8 +1067,14 @@ class ChatCLI:
|
|
|
1051
1067
|
self._thinking_indicator_enabled = True
|
|
1052
1068
|
except Exception:
|
|
1053
1069
|
self._thinking_indicator_enabled = True
|
|
1054
|
-
# Track Ctrl+C timing for double-press-to-exit behavior
|
|
1055
|
-
self._last_interrupt_ts: Optional[float] = None
|
|
1070
|
+
# Track Ctrl+C timing for double-press-to-exit behavior
|
|
1071
|
+
self._last_interrupt_ts: Optional[float] = None
|
|
1072
|
+
|
|
1073
|
+
# Ctrl+C during a running stream should not kill the entire CLI.
|
|
1074
|
+
# Instead, we cancel the in-flight turn and reopen the last user query for editing.
|
|
1075
|
+
# NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
|
|
1076
|
+
self._pending_user_edit: Optional[str] = None
|
|
1077
|
+
self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
|
|
1056
1078
|
|
|
1057
1079
|
# Timers: session-level and per-turn wall-clock timers
|
|
1058
1080
|
self._session_started_at: Optional[float] = None # time.perf_counter() at session start
|
|
@@ -1093,7 +1115,7 @@ class ChatCLI:
|
|
|
1093
1115
|
self._pt_session = None
|
|
1094
1116
|
|
|
1095
1117
|
# ----------------------- Provider heuristics -----------------------
|
|
1096
|
-
def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
|
|
1118
|
+
def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
|
|
1097
1119
|
"""Return True when the model is an OpenAI reasoning-capable model.
|
|
1098
1120
|
Mirrors server-side heuristic: prefixes 'gpt-5' or 'o4'.
|
|
1099
1121
|
"""
|
|
@@ -1344,6 +1366,9 @@ class ChatCLI:
|
|
|
1344
1366
|
return {
|
|
1345
1367
|
# OpenAI
|
|
1346
1368
|
"gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
|
|
1369
|
+
# New: gpt-5.2-codex
|
|
1370
|
+
# Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
|
|
1371
|
+
"gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
|
|
1347
1372
|
# From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
|
|
1348
1373
|
"gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
|
|
1349
1374
|
"gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
|
|
@@ -1361,8 +1386,7 @@ class ChatCLI:
|
|
|
1361
1386
|
# New Opus 4.5 (provider base $5/$25 with 1.4x margin -> $7.00/$35.00)
|
|
1362
1387
|
"claude-opus-4-5-20251101": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
|
|
1363
1388
|
"claude-opus-4-5-20251101-thinking": {"input": 7.00, "output": 35.00, "provider": "anthropic"},
|
|
1364
|
-
# Gemini
|
|
1365
|
-
"gemini-2.5-pro": {"input": 1.75, "output": 14.00, "provider": "gemini"},
|
|
1389
|
+
# Gemini
|
|
1366
1390
|
# Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
|
|
1367
1391
|
"gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
|
|
1368
1392
|
# Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
|
|
@@ -1377,15 +1401,15 @@ class ChatCLI:
|
|
|
1377
1401
|
# DeepSeek V3.2 (+$0.25 per 1M margin)
|
|
1378
1402
|
"deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1379
1403
|
"deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1380
|
-
|
|
1404
|
+
# Removed: deepseek speciale (not supported)
|
|
1381
1405
|
# Kimi
|
|
1382
1406
|
"kimi-k2-0905-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1383
1407
|
"kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1384
1408
|
"kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1385
|
-
# GLM (Z.AI)
|
|
1386
|
-
# Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
|
|
1387
|
-
"glm-4.
|
|
1388
|
-
}
|
|
1409
|
+
# GLM (Z.AI)
|
|
1410
|
+
# Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
|
|
1411
|
+
"glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
|
|
1412
|
+
}
|
|
1389
1413
|
|
|
1390
1414
|
def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
|
|
1391
1415
|
if not model:
|
|
@@ -1433,6 +1457,15 @@ class ChatCLI:
|
|
|
1433
1457
|
except Exception:
|
|
1434
1458
|
model_name = ""
|
|
1435
1459
|
try:
|
|
1460
|
+
# Provider-native state resets when switching away from OpenAI.
|
|
1461
|
+
try:
|
|
1462
|
+
if self.model and (not self._is_openai_model(self.model)):
|
|
1463
|
+
self._openai_previous_response_id = None
|
|
1464
|
+
self._openai_response_id_history = []
|
|
1465
|
+
self._openai_input_items = []
|
|
1466
|
+
self._openai_last_sent_input_items = None
|
|
1467
|
+
except Exception:
|
|
1468
|
+
pass
|
|
1436
1469
|
if model_name in {"gpt-5.2-pro"}:
|
|
1437
1470
|
# Default these to high, but don't clobber a user-chosen xhigh.
|
|
1438
1471
|
if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
|
|
@@ -1440,11 +1473,138 @@ class ChatCLI:
|
|
|
1440
1473
|
# Codex family: disable preambles for better behavior
|
|
1441
1474
|
if "codex" in model_name:
|
|
1442
1475
|
self.preambles_enabled = False
|
|
1476
|
+
# Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
|
|
1477
|
+
# Force-disable for all other models (even if a saved setting had it enabled).
|
|
1478
|
+
if not self._supports_preambles(self.model):
|
|
1479
|
+
self.preambles_enabled = False
|
|
1443
1480
|
except Exception:
|
|
1444
1481
|
try:
|
|
1445
1482
|
self.reasoning_effort = "high"
|
|
1446
1483
|
except Exception:
|
|
1447
1484
|
pass
|
|
1485
|
+
|
|
1486
|
+
def _supports_preambles(self, model: Optional[str]) -> bool:
|
|
1487
|
+
"""Tool-call preambles are a CLI-only UX hint.
|
|
1488
|
+
|
|
1489
|
+
Requirement: disabled for all models except GPT-5 (base model; non-Codex).
|
|
1490
|
+
In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
|
|
1491
|
+
"""
|
|
1492
|
+
try:
|
|
1493
|
+
if not model:
|
|
1494
|
+
return False
|
|
1495
|
+
m = str(model).strip().lower()
|
|
1496
|
+
# Only the base GPT-5 line supports this UX toggle.
|
|
1497
|
+
# Allow:
|
|
1498
|
+
# - "gpt-5"
|
|
1499
|
+
# - date-pinned variants like "gpt-5-2025-08-07"
|
|
1500
|
+
# Disallow:
|
|
1501
|
+
# - versioned families like "gpt-5.1*" / "gpt-5.2*"
|
|
1502
|
+
if not (m == "gpt-5" or m.startswith("gpt-5-")):
|
|
1503
|
+
return False
|
|
1504
|
+
if "codex" in m:
|
|
1505
|
+
return False
|
|
1506
|
+
return True
|
|
1507
|
+
except Exception:
|
|
1508
|
+
return False
|
|
1509
|
+
|
|
1510
|
+
def _is_openai_model(self, model: Optional[str]) -> bool:
|
|
1511
|
+
"""Best-effort model/provider discriminator for client-side state.
|
|
1512
|
+
|
|
1513
|
+
The server is multi-provider. For the CLI we treat anything that isn't an explicit
|
|
1514
|
+
non-OpenAI provider prefix as OpenAI.
|
|
1515
|
+
"""
|
|
1516
|
+
try:
|
|
1517
|
+
if not model:
|
|
1518
|
+
return False
|
|
1519
|
+
m = str(model).strip().lower()
|
|
1520
|
+
if not m:
|
|
1521
|
+
return False
|
|
1522
|
+
for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
|
|
1523
|
+
if m.startswith(pfx):
|
|
1524
|
+
return False
|
|
1525
|
+
# Everything else defaults to OpenAI in this repo.
|
|
1526
|
+
return True
|
|
1527
|
+
except Exception:
|
|
1528
|
+
return False
|
|
1529
|
+
|
|
1530
|
+
def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
|
|
1531
|
+
"""Whether this provider has an implemented native tool/thinking retention path."""
|
|
1532
|
+
try:
|
|
1533
|
+
if not model:
|
|
1534
|
+
return False
|
|
1535
|
+
m = str(model).strip().lower()
|
|
1536
|
+
if m.startswith("gemini-"):
|
|
1537
|
+
return True
|
|
1538
|
+
if m.startswith("kimi-"):
|
|
1539
|
+
return bool(getattr(self, "retain_native_tool_results", False))
|
|
1540
|
+
if self._is_openai_model(model):
|
|
1541
|
+
return True
|
|
1542
|
+
return False
|
|
1543
|
+
except Exception:
|
|
1544
|
+
return False
|
|
1545
|
+
|
|
1546
|
+
def _sanitize_openai_items(self, items: Any) -> Any:
|
|
1547
|
+
"""Recursively strip fields from OpenAI output items that cause errors when used as input."""
|
|
1548
|
+
if isinstance(items, list):
|
|
1549
|
+
return [self._sanitize_openai_items(x) for x in items]
|
|
1550
|
+
if isinstance(items, dict):
|
|
1551
|
+
# 'status' is the main offender causing 400s
|
|
1552
|
+
bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
|
|
1553
|
+
return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
|
|
1554
|
+
return items
|
|
1555
|
+
|
|
1556
|
+
async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
|
|
1557
|
+
"""If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
|
|
1558
|
+
|
|
1559
|
+
This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
|
|
1560
|
+
Best-effort; never raises.
|
|
1561
|
+
"""
|
|
1562
|
+
ctx = None
|
|
1563
|
+
try:
|
|
1564
|
+
ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
|
|
1565
|
+
except Exception:
|
|
1566
|
+
ctx = None
|
|
1567
|
+
if not ctx:
|
|
1568
|
+
return
|
|
1569
|
+
session_id = ctx.get("session_id")
|
|
1570
|
+
call_id = ctx.get("call_id")
|
|
1571
|
+
job_token = ctx.get("job_token")
|
|
1572
|
+
name = ctx.get("name")
|
|
1573
|
+
if not (session_id and call_id and job_token):
|
|
1574
|
+
return
|
|
1575
|
+
payload_cb = {
|
|
1576
|
+
"session_id": session_id,
|
|
1577
|
+
"call_id": call_id,
|
|
1578
|
+
"name": name,
|
|
1579
|
+
"job_token": job_token,
|
|
1580
|
+
"result": {
|
|
1581
|
+
"ok": False,
|
|
1582
|
+
"cancelled": True,
|
|
1583
|
+
"error": str(reason or "cancelled"),
|
|
1584
|
+
},
|
|
1585
|
+
}
|
|
1586
|
+
try:
|
|
1587
|
+
# Keep it short; we just want to unblock the server.
|
|
1588
|
+
http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
|
|
1589
|
+
except Exception:
|
|
1590
|
+
http_timeout = None
|
|
1591
|
+
try:
|
|
1592
|
+
async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
|
|
1593
|
+
await client.post(self.tools_callback_url, json=payload_cb)
|
|
1594
|
+
except Exception:
|
|
1595
|
+
pass
|
|
1596
|
+
finally:
|
|
1597
|
+
try:
|
|
1598
|
+
self._inflight_dispatch = None
|
|
1599
|
+
except Exception:
|
|
1600
|
+
pass
|
|
1601
|
+
|
|
1602
|
+
def _is_gpt_model(self, model: Optional[str]) -> bool:
|
|
1603
|
+
"""True for OpenAI GPT models (used for showing certain UI-only toggles)."""
|
|
1604
|
+
try:
|
|
1605
|
+
return bool(model) and str(model).strip().lower().startswith("gpt-")
|
|
1606
|
+
except Exception:
|
|
1607
|
+
return False
|
|
1448
1608
|
def _is_codex_model(self, model: Optional[str]) -> bool:
|
|
1449
1609
|
try:
|
|
1450
1610
|
return bool(model) and ("codex" in str(model).lower())
|
|
@@ -2120,6 +2280,14 @@ class ChatCLI:
|
|
|
2120
2280
|
self.history = []
|
|
2121
2281
|
if self.system_prompt:
|
|
2122
2282
|
self.history.append({"role": "system", "content": self.system_prompt})
|
|
2283
|
+
# OpenAI threaded state is invalid once the system prompt changes.
|
|
2284
|
+
try:
|
|
2285
|
+
self._openai_previous_response_id = None
|
|
2286
|
+
self._openai_response_id_history = []
|
|
2287
|
+
self._openai_input_items = []
|
|
2288
|
+
self._openai_last_sent_input_items = None
|
|
2289
|
+
except Exception:
|
|
2290
|
+
pass
|
|
2123
2291
|
# On settings load, do not assume the custom first-turn was injected yet
|
|
2124
2292
|
try:
|
|
2125
2293
|
self._did_inject_custom_first_turn = False
|
|
@@ -2507,30 +2675,28 @@ class ChatCLI:
|
|
|
2507
2675
|
|
|
2508
2676
|
def _model_presets(self) -> List[Tuple[str, str]]:
|
|
2509
2677
|
"""Shared list of (model, label) used by settings UI and /model menu."""
|
|
2678
|
+
# Ordered in "feelings" order (Recommended first, then Others).
|
|
2679
|
+
# NOTE: We intentionally do not include a "server default" or "custom" option here.
|
|
2510
2680
|
return [
|
|
2681
|
+
# Recommended
|
|
2511
2682
|
("gpt-5.2", "OpenAI: gpt-5.2"),
|
|
2512
|
-
("gpt-5.2-
|
|
2683
|
+
("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
|
|
2513
2684
|
("gpt-5", "OpenAI: gpt-5"),
|
|
2685
|
+
("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
|
|
2686
|
+
("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
|
|
2687
|
+
("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
|
|
2688
|
+
("kimi-k2-thinking", "Kimi: kimi-k2-thinking"),
|
|
2689
|
+
("grok-code-fast-1", "xAI: grok-code-fast-1"),
|
|
2690
|
+
|
|
2691
|
+
# Others
|
|
2692
|
+
("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
|
|
2514
2693
|
("gpt-5-codex", "OpenAI: gpt-5-codex"),
|
|
2515
2694
|
("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
|
|
2516
|
-
("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
|
|
2517
2695
|
("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
|
|
2518
|
-
("deepseek-3.2
|
|
2519
|
-
("kimi-k2-
|
|
2520
|
-
("
|
|
2521
|
-
|
|
2522
|
-
("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
|
|
2523
|
-
("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
|
|
2524
|
-
("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
|
|
2525
|
-
("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
|
|
2526
|
-
("grok-4", "xAI: grok-4"),
|
|
2527
|
-
("grok-code-fast-1", "xAI: grok-code-fast-1"),
|
|
2528
|
-
("claude-sonnet-4-5-20250929", "Anthropic: claude-sonnet-4-5-20250929 (thinking OFF)"),
|
|
2529
|
-
("claude-sonnet-4-5-20250929-thinking", "Anthropic: claude-sonnet-4-5-20250929 (thinking ON)"),
|
|
2530
|
-
("claude-opus-4-5-20251101", "Anthropic: claude-opus-4-5-20251101 (thinking OFF)"),
|
|
2531
|
-
("claude-opus-4-5-20251101-thinking", "Anthropic: claude-opus-4-5-20251101 (thinking ON)"),
|
|
2532
|
-
("glm-4.6", "GLM: glm-4.6"),
|
|
2533
|
-
]
|
|
2696
|
+
("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
|
|
2697
|
+
("kimi-k2-0905-preview", "Kimi: kimi-k2-0905-preview"),
|
|
2698
|
+
("glm-4.7", "GLM: glm-4.7"),
|
|
2699
|
+
]
|
|
2534
2700
|
|
|
2535
2701
|
async def open_settings(self, focus: Optional[str] = None) -> None:
|
|
2536
2702
|
"""Open the new dependency-free settings UI. Falls back to legacy only when
|
|
@@ -2584,37 +2750,33 @@ class ChatCLI:
|
|
|
2584
2750
|
}
|
|
2585
2751
|
initial = self._collect_settings_dict()
|
|
2586
2752
|
|
|
2587
|
-
# Model presets list (shared)
|
|
2588
|
-
model_presets: List[Tuple[str, str]] = self._model_presets()
|
|
2589
|
-
|
|
2590
|
-
#
|
|
2591
|
-
#
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
"
|
|
2595
|
-
"
|
|
2753
|
+
# Model presets list (shared)
|
|
2754
|
+
model_presets: List[Tuple[str, str]] = self._model_presets()
|
|
2755
|
+
|
|
2756
|
+
# Reorder with a Recommended section at the top.
|
|
2757
|
+
# IMPORTANT: remove "server default" and "custom" from Settings UI.
|
|
2758
|
+
rec_keys_ordered = [
|
|
2759
|
+
"gpt-5.2",
|
|
2760
|
+
"gpt-5.2-codex",
|
|
2761
|
+
"gpt-5",
|
|
2596
2762
|
"gemini-3-pro-preview",
|
|
2597
2763
|
"gemini-3-flash-preview",
|
|
2598
|
-
"
|
|
2599
|
-
"gpt-5.2",
|
|
2764
|
+
"claude-opus-4-5-20251101",
|
|
2600
2765
|
"kimi-k2-thinking",
|
|
2601
2766
|
"grok-code-fast-1",
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
#
|
|
2609
|
-
|
|
2767
|
+
]
|
|
2768
|
+
rec_set = set(rec_keys_ordered)
|
|
2769
|
+
preset_map = {m: lbl for (m, lbl) in model_presets}
|
|
2770
|
+
rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
|
|
2771
|
+
other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
|
|
2772
|
+
|
|
2773
|
+
# Build enum options in the order: Recommended, Others
|
|
2774
|
+
model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
|
|
2775
|
+
render_map: Dict[Any, str] = {}
|
|
2610
2776
|
for m, lbl in rec_list:
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
render_map[m] = f"{lbl} (recommended)"
|
|
2615
|
-
for m, lbl in other_list:
|
|
2616
|
-
render_map[m] = lbl
|
|
2617
|
-
render_map["custom"] = "Custom..."
|
|
2777
|
+
render_map[m] = lbl
|
|
2778
|
+
for m, lbl in other_list:
|
|
2779
|
+
render_map[m] = lbl
|
|
2618
2780
|
|
|
2619
2781
|
# Build items schema
|
|
2620
2782
|
items: List[Dict[str, Any]] = [
|
|
@@ -2641,14 +2803,16 @@ class ChatCLI:
|
|
|
2641
2803
|
"id": "requested_tools",
|
|
2642
2804
|
"label": "Tools",
|
|
2643
2805
|
"type": "enum",
|
|
2644
|
-
|
|
2806
|
+
# Default-first: ON, then OFF, then server default.
|
|
2807
|
+
"options": [True, False, None],
|
|
2645
2808
|
"render": {None: "Server default", True: "ON", False: "OFF"},
|
|
2646
2809
|
},
|
|
2647
2810
|
{
|
|
2648
2811
|
"id": "control_level",
|
|
2649
2812
|
"label": "Control level",
|
|
2650
2813
|
"type": "enum",
|
|
2651
|
-
|
|
2814
|
+
# Default-first: Level 3, then 2, then 1, then server default.
|
|
2815
|
+
"options": [3, 2, 1, None],
|
|
2652
2816
|
"render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
|
|
2653
2817
|
},
|
|
2654
2818
|
{"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
|
|
@@ -2662,35 +2826,44 @@ class ChatCLI:
|
|
|
2662
2826
|
{"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
|
|
2663
2827
|
# Agent scope & filesystem controls
|
|
2664
2828
|
{"id": "host_base", "label": "Agent scope directory", "type": "text"},
|
|
2665
|
-
{
|
|
2666
|
-
"id": "fs_scope",
|
|
2667
|
-
"label": "Filesystem scope",
|
|
2668
|
-
"type": "enum",
|
|
2669
|
-
|
|
2670
|
-
"
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
"
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
"
|
|
2679
|
-
"
|
|
2680
|
-
"
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
"
|
|
2686
|
-
|
|
2687
|
-
|
|
2829
|
+
{
|
|
2830
|
+
"id": "fs_scope",
|
|
2831
|
+
"label": "Filesystem scope",
|
|
2832
|
+
"type": "enum",
|
|
2833
|
+
# Default-first: host (Agent scope), then workspace, then server default.
|
|
2834
|
+
"options": ["host", "workspace", None],
|
|
2835
|
+
"render": {
|
|
2836
|
+
None: "Server default",
|
|
2837
|
+
"workspace": "Workspace (sandbox)",
|
|
2838
|
+
"host": "Host (Agent scope)",
|
|
2839
|
+
},
|
|
2840
|
+
},
|
|
2841
|
+
{
|
|
2842
|
+
"id": "fs_host_mode",
|
|
2843
|
+
"label": "Host mode",
|
|
2844
|
+
"type": "enum",
|
|
2845
|
+
# Default-first: custom (use Agent scope), then cwd, then any, then server default.
|
|
2846
|
+
"options": ["custom", "cwd", "any", None],
|
|
2847
|
+
"render": {
|
|
2848
|
+
None: "Server default / any",
|
|
2849
|
+
"any": "any (no extra client restriction)",
|
|
2850
|
+
"cwd": "Current working directory",
|
|
2851
|
+
"custom": "Custom (use Agent scope)",
|
|
2852
|
+
},
|
|
2853
|
+
},
|
|
2688
2854
|
]},
|
|
2689
2855
|
{"label": "Code Map", "type": "group", "items": [
|
|
2690
2856
|
{"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
|
|
2691
2857
|
]},
|
|
2692
2858
|
{"label": "Preambles & First-turn", "type": "group", "items": [
|
|
2693
|
-
{
|
|
2859
|
+
{
|
|
2860
|
+
"id": "preambles_enabled",
|
|
2861
|
+
"label": "Enable tool call preambles (GPT-5 only)",
|
|
2862
|
+
"type": "bool",
|
|
2863
|
+
# Only show this control when the *currently selected* model supports it.
|
|
2864
|
+
# (This updates live as the Model picker changes.)
|
|
2865
|
+
"visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
|
|
2866
|
+
},
|
|
2694
2867
|
{"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
|
|
2695
2868
|
{"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
|
|
2696
2869
|
{"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
|
|
@@ -2703,8 +2876,22 @@ class ChatCLI:
|
|
|
2703
2876
|
]},
|
|
2704
2877
|
]
|
|
2705
2878
|
|
|
2706
|
-
#
|
|
2707
|
-
|
|
2879
|
+
# Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
|
|
2880
|
+
try:
|
|
2881
|
+
if not self._is_gpt_model(self.model):
|
|
2882
|
+
for g in items:
|
|
2883
|
+
if not isinstance(g, dict):
|
|
2884
|
+
continue
|
|
2885
|
+
if (g.get("type") == "group") and (g.get("label") == "General"):
|
|
2886
|
+
for row in (g.get("items") or []):
|
|
2887
|
+
if isinstance(row, dict) and row.get("id") == "text_verbosity":
|
|
2888
|
+
row["options"] = ["medium", "high"]
|
|
2889
|
+
row["render"] = {"medium": "Medium", "high": "High"}
|
|
2890
|
+
except Exception:
|
|
2891
|
+
pass
|
|
2892
|
+
|
|
2893
|
+
# Prepare initial values with enum placeholder for model when custom text set
|
|
2894
|
+
init_for_ui = dict(initial)
|
|
2708
2895
|
if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
|
|
2709
2896
|
# Represent as 'custom' for cycling, but keep original model in working copy for edit with 'e'
|
|
2710
2897
|
pass # We'll keep exact model string; enum will show the raw value when not matched
|
|
@@ -3052,17 +3239,17 @@ class ChatCLI:
|
|
|
3052
3239
|
self.ui.success(f"FS Scope set to: {self._fs_label()}")
|
|
3053
3240
|
self.save_settings()
|
|
3054
3241
|
|
|
3055
|
-
async def set_level_menu(self) -> None:
|
|
3056
|
-
val = await self._menu_choice(
|
|
3057
|
-
"Control Level",
|
|
3058
|
-
"Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
|
|
3059
|
-
[
|
|
3060
|
-
("
|
|
3061
|
-
("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
|
|
3062
|
-
("
|
|
3063
|
-
("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
|
|
3064
|
-
],
|
|
3065
|
-
)
|
|
3242
|
+
async def set_level_menu(self) -> None:
|
|
3243
|
+
val = await self._menu_choice(
|
|
3244
|
+
"Control Level",
|
|
3245
|
+
"Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
|
|
3246
|
+
[
|
|
3247
|
+
("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
|
|
3248
|
+
("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
|
|
3249
|
+
("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
|
|
3250
|
+
("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
|
|
3251
|
+
],
|
|
3252
|
+
)
|
|
3066
3253
|
if val == "default":
|
|
3067
3254
|
self.control_level = None
|
|
3068
3255
|
elif val in ("1", "2", "3"):
|
|
@@ -3142,16 +3329,16 @@ class ChatCLI:
|
|
|
3142
3329
|
except Exception:
|
|
3143
3330
|
pass
|
|
3144
3331
|
|
|
3145
|
-
# 3) Tool usage preamble (UX hint)
|
|
3332
|
+
# 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
3146
3333
|
try:
|
|
3147
|
-
if bool(getattr(self, "preambles_enabled", False)) and
|
|
3334
|
+
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
3148
3335
|
blocks.append(
|
|
3149
3336
|
"Tool usage: when you need to read or modify files or run commands, "
|
|
3150
3337
|
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
3151
3338
|
"advances the user's goal before calling the tool."
|
|
3152
3339
|
)
|
|
3153
|
-
except Exception:
|
|
3154
|
-
pass
|
|
3340
|
+
except Exception:
|
|
3341
|
+
pass
|
|
3155
3342
|
|
|
3156
3343
|
# 4) Working memory (context-summary file), injected once on fresh session restart
|
|
3157
3344
|
try:
|
|
@@ -4615,21 +4802,29 @@ class ChatCLI:
|
|
|
4615
4802
|
self.save_settings()
|
|
4616
4803
|
return True
|
|
4617
4804
|
|
|
4618
|
-
if cmd.startswith("/system "):
|
|
4619
|
-
self.system_prompt = cmd[len("/system ") :].strip()
|
|
4620
|
-
self.history = []
|
|
4621
|
-
if self.system_prompt:
|
|
4622
|
-
self.history.append({"role": "system", "content": self.system_prompt})
|
|
4623
|
-
# Treat as a fresh session; allow map re-injection
|
|
4624
|
-
self._did_inject_codebase_map = False
|
|
4805
|
+
if cmd.startswith("/system "):
|
|
4806
|
+
self.system_prompt = cmd[len("/system ") :].strip()
|
|
4807
|
+
self.history = []
|
|
4808
|
+
if self.system_prompt:
|
|
4809
|
+
self.history.append({"role": "system", "content": self.system_prompt})
|
|
4810
|
+
# Treat as a fresh session; allow map re-injection
|
|
4811
|
+
self._did_inject_codebase_map = False
|
|
4625
4812
|
# Also allow custom first-turn injection again
|
|
4626
4813
|
try:
|
|
4627
4814
|
self._did_inject_custom_first_turn = False
|
|
4628
4815
|
except Exception:
|
|
4629
4816
|
pass
|
|
4630
|
-
self.ui.success("System prompt set.")
|
|
4631
|
-
|
|
4632
|
-
|
|
4817
|
+
self.ui.success("System prompt set.")
|
|
4818
|
+
# OpenAI threaded state is invalid once the system prompt changes.
|
|
4819
|
+
try:
|
|
4820
|
+
self._openai_previous_response_id = None
|
|
4821
|
+
self._openai_response_id_history = []
|
|
4822
|
+
self._openai_input_items = []
|
|
4823
|
+
self._openai_last_sent_input_items = None
|
|
4824
|
+
except Exception:
|
|
4825
|
+
pass
|
|
4826
|
+
self.save_settings()
|
|
4827
|
+
return True
|
|
4633
4828
|
|
|
4634
4829
|
if cmd.startswith("/title "):
|
|
4635
4830
|
new_title = cmd[len("/title ") :].strip()
|
|
@@ -4648,13 +4843,17 @@ class ChatCLI:
|
|
|
4648
4843
|
self._did_inject_custom_first_turn = False
|
|
4649
4844
|
except Exception:
|
|
4650
4845
|
pass
|
|
4651
|
-
# Reset provider-native histories
|
|
4652
|
-
try:
|
|
4846
|
+
# Reset provider-native histories
|
|
4847
|
+
try:
|
|
4653
4848
|
self.messages_for_save = []
|
|
4654
4849
|
if not self.save_chat_history:
|
|
4655
4850
|
self.thread_uid = None
|
|
4656
4851
|
self._kimi_raw_history = []
|
|
4657
4852
|
self._gemini_raw_history = []
|
|
4853
|
+
self._openai_previous_response_id = None
|
|
4854
|
+
self._openai_response_id_history = []
|
|
4855
|
+
self._openai_input_items = []
|
|
4856
|
+
self._openai_last_sent_input_items = None
|
|
4658
4857
|
except Exception:
|
|
4659
4858
|
pass
|
|
4660
4859
|
# Reset local cumulative token counters on session clear
|
|
@@ -4733,7 +4932,7 @@ class ChatCLI:
|
|
|
4733
4932
|
|
|
4734
4933
|
# ---------------------------- Run loop ----------------------------
|
|
4735
4934
|
|
|
4736
|
-
async def run(self) -> None:
|
|
4935
|
+
async def run(self) -> None:
|
|
4737
4936
|
# Try persisted auth
|
|
4738
4937
|
self._load_auth_state_from_disk()
|
|
4739
4938
|
|
|
@@ -4907,25 +5106,70 @@ class ChatCLI:
|
|
|
4907
5106
|
self._session_started_at = None
|
|
4908
5107
|
# Prepare completer for slash commands (if prompt_toolkit is available)
|
|
4909
5108
|
pt_completer = self._commands_word_completer()
|
|
4910
|
-
while True:
|
|
4911
|
-
try:
|
|
4912
|
-
|
|
4913
|
-
|
|
4914
|
-
|
|
4915
|
-
|
|
4916
|
-
|
|
4917
|
-
|
|
4918
|
-
|
|
4919
|
-
|
|
4920
|
-
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
|
|
4925
|
-
|
|
4926
|
-
|
|
4927
|
-
|
|
4928
|
-
|
|
5109
|
+
while True:
|
|
5110
|
+
try:
|
|
5111
|
+
pending_edit = self._pending_user_edit
|
|
5112
|
+
edit_mode = pending_edit is not None
|
|
5113
|
+
|
|
5114
|
+
if self._pt_session is not None:
|
|
5115
|
+
# Use prompt_toolkit with inline completion when available
|
|
5116
|
+
# Pass completer per-prompt to ensure latest catalog
|
|
5117
|
+
try:
|
|
5118
|
+
# prompt_toolkit supports default= on modern versions; fall back gracefully.
|
|
5119
|
+
if edit_mode:
|
|
5120
|
+
user_input = await self._pt_session.prompt_async(
|
|
5121
|
+
"You (edit): ",
|
|
5122
|
+
completer=pt_completer,
|
|
5123
|
+
complete_while_typing=True,
|
|
5124
|
+
default=str(pending_edit),
|
|
5125
|
+
)
|
|
5126
|
+
else:
|
|
5127
|
+
user_input = await self._pt_session.prompt_async(
|
|
5128
|
+
"You: ",
|
|
5129
|
+
completer=pt_completer,
|
|
5130
|
+
complete_while_typing=True,
|
|
5131
|
+
)
|
|
5132
|
+
except TypeError:
|
|
5133
|
+
# Older prompt_toolkit: no default= support
|
|
5134
|
+
user_input = await self._pt_session.prompt_async(
|
|
5135
|
+
"You: ",
|
|
5136
|
+
completer=pt_completer,
|
|
5137
|
+
complete_while_typing=True,
|
|
5138
|
+
)
|
|
5139
|
+
user_input = user_input.strip()
|
|
5140
|
+
elif self._input_engine:
|
|
5141
|
+
if edit_mode:
|
|
5142
|
+
# The low-level input engine currently doesn't support prefill.
|
|
5143
|
+
# Show the previous message and let the user paste a replacement.
|
|
5144
|
+
try:
|
|
5145
|
+
self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
|
|
5146
|
+
self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
|
|
5147
|
+
except Exception:
|
|
5148
|
+
pass
|
|
5149
|
+
new_txt = self._read_multiline_input("Edit> ")
|
|
5150
|
+
user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
|
|
5151
|
+
else:
|
|
5152
|
+
# Do not add continuation prefixes on new lines
|
|
5153
|
+
user_input = self._input_engine.read_message("You: ", "")
|
|
5154
|
+
else:
|
|
5155
|
+
if edit_mode:
|
|
5156
|
+
try:
|
|
5157
|
+
self.ui.print("\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):", style=self.ui.theme["warn"]) # type: ignore
|
|
5158
|
+
self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
|
|
5159
|
+
except Exception:
|
|
5160
|
+
pass
|
|
5161
|
+
new_txt = self._read_multiline_input("Edit> ")
|
|
5162
|
+
user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
|
|
5163
|
+
else:
|
|
5164
|
+
user_input = self._read_multiline_input("You: ")
|
|
5165
|
+
|
|
5166
|
+
# Clear pending edit state after we successfully collected input.
|
|
5167
|
+
if edit_mode:
|
|
5168
|
+
self._pending_user_edit = None
|
|
5169
|
+
self._pending_turn_snapshot = None
|
|
5170
|
+
# Successful read resets interrupt window
|
|
5171
|
+
self._last_interrupt_ts = None
|
|
5172
|
+
except KeyboardInterrupt:
|
|
4929
5173
|
# First Ctrl+C: interrupt input and warn; second within window exits
|
|
4930
5174
|
now = time.time()
|
|
4931
5175
|
try:
|
|
@@ -4944,8 +5188,8 @@ class ChatCLI:
|
|
|
4944
5188
|
self.ui.print("Goodbye.")
|
|
4945
5189
|
return
|
|
4946
5190
|
|
|
4947
|
-
if not user_input:
|
|
4948
|
-
continue
|
|
5191
|
+
if not user_input:
|
|
5192
|
+
continue
|
|
4949
5193
|
|
|
4950
5194
|
# Command palette if bare '/'
|
|
4951
5195
|
if user_input == "/":
|
|
@@ -4968,12 +5212,35 @@ class ChatCLI:
|
|
|
4968
5212
|
if handled:
|
|
4969
5213
|
continue
|
|
4970
5214
|
|
|
4971
|
-
try:
|
|
4972
|
-
#
|
|
4973
|
-
|
|
4974
|
-
|
|
4975
|
-
|
|
4976
|
-
|
|
5215
|
+
try:
|
|
5216
|
+
# Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
|
|
5217
|
+
# This is critical for first-turn injections (code map/custom note/working memory)
|
|
5218
|
+
# which are applied by mutating flags during payload construction.
|
|
5219
|
+
self._pending_turn_snapshot = {
|
|
5220
|
+
"history": copy.deepcopy(self.history),
|
|
5221
|
+
"messages_for_save": copy.deepcopy(self.messages_for_save),
|
|
5222
|
+
"kimi_raw": copy.deepcopy(self._kimi_raw_history),
|
|
5223
|
+
"gemini_raw": copy.deepcopy(self._gemini_raw_history),
|
|
5224
|
+
"openai_prev": getattr(self, "_openai_previous_response_id", None),
|
|
5225
|
+
"openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
|
|
5226
|
+
"openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
|
|
5227
|
+
"openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
|
|
5228
|
+
"inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
|
|
5229
|
+
"did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
|
|
5230
|
+
"did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
|
|
5231
|
+
"did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
|
|
5232
|
+
"memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
|
|
5233
|
+
"last_built_user_content": getattr(self, "_last_built_user_content", None),
|
|
5234
|
+
}
|
|
5235
|
+
|
|
5236
|
+
# Clear any stale in-flight dispatch context at turn start.
|
|
5237
|
+
self._inflight_dispatch = None
|
|
5238
|
+
|
|
5239
|
+
# Record user message for local/server save
|
|
5240
|
+
if self.save_chat_history:
|
|
5241
|
+
self.messages_for_save.append({
|
|
5242
|
+
"role": "user",
|
|
5243
|
+
"content": user_input,
|
|
4977
5244
|
"model": None,
|
|
4978
5245
|
"citations": None,
|
|
4979
5246
|
"last_turn_input_tokens": 0,
|
|
@@ -4987,25 +5254,86 @@ class ChatCLI:
|
|
|
4987
5254
|
if self._busy:
|
|
4988
5255
|
self.ui.warn("Agent is busy with another turn. Please wait...")
|
|
4989
5256
|
continue
|
|
4990
|
-
self._busy = True
|
|
4991
|
-
try:
|
|
4992
|
-
assistant_text = await self._stream_once(user_input)
|
|
4993
|
-
finally:
|
|
4994
|
-
self._busy = False
|
|
4995
|
-
except
|
|
4996
|
-
|
|
4997
|
-
|
|
4998
|
-
|
|
4999
|
-
|
|
5257
|
+
self._busy = True
|
|
5258
|
+
try:
|
|
5259
|
+
assistant_text = await self._stream_once(user_input)
|
|
5260
|
+
finally:
|
|
5261
|
+
self._busy = False
|
|
5262
|
+
except KeyboardInterrupt:
|
|
5263
|
+
# Ctrl+C mid-stream / mid-tool: do not exit the CLI.
|
|
5264
|
+
# Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
|
|
5265
|
+
try:
|
|
5266
|
+
await self._cancel_inflight_dispatch()
|
|
5267
|
+
except (Exception, BaseException):
|
|
5268
|
+
pass
|
|
5269
|
+
|
|
5270
|
+
# Restore state to *before* this turn started.
|
|
5271
|
+
try:
|
|
5272
|
+
snap = self._pending_turn_snapshot or {}
|
|
5273
|
+
if isinstance(snap.get("history"), list):
|
|
5274
|
+
self.history = snap.get("history")
|
|
5275
|
+
if isinstance(snap.get("messages_for_save"), list):
|
|
5276
|
+
self.messages_for_save = snap.get("messages_for_save")
|
|
5277
|
+
if isinstance(snap.get("kimi_raw"), list):
|
|
5278
|
+
self._kimi_raw_history = snap.get("kimi_raw")
|
|
5279
|
+
if isinstance(snap.get("gemini_raw"), list):
|
|
5280
|
+
self._gemini_raw_history = snap.get("gemini_raw")
|
|
5281
|
+
if "openai_prev" in snap:
|
|
5282
|
+
self._openai_previous_response_id = snap.get("openai_prev")
|
|
5283
|
+
if isinstance(snap.get("openai_ids"), list):
|
|
5284
|
+
self._openai_response_id_history = snap.get("openai_ids")
|
|
5285
|
+
if isinstance(snap.get("openai_input_items"), list):
|
|
5286
|
+
self._openai_input_items = snap.get("openai_input_items")
|
|
5287
|
+
if "openai_last_sent_input_items" in snap:
|
|
5288
|
+
self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
|
|
5289
|
+
if "inflight_dispatch" in snap:
|
|
5290
|
+
self._inflight_dispatch = snap.get("inflight_dispatch")
|
|
5291
|
+
if "did_inject_codebase_map" in snap:
|
|
5292
|
+
self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
|
|
5293
|
+
if "did_inject_custom_first_turn" in snap:
|
|
5294
|
+
self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
|
|
5295
|
+
if "did_inject_working_memory" in snap:
|
|
5296
|
+
self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
|
|
5297
|
+
if "memory_paths_for_first_turn" in snap:
|
|
5298
|
+
self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
|
|
5299
|
+
self._last_built_user_content = snap.get("last_built_user_content")
|
|
5300
|
+
except Exception:
|
|
5301
|
+
pass
|
|
5302
|
+
|
|
5303
|
+
# Clear any transient indicator line and land on a fresh prompt line.
|
|
5304
|
+
try:
|
|
5305
|
+
sys.stdout.write("\r\x1b[2K\n")
|
|
5306
|
+
sys.stdout.flush()
|
|
5307
|
+
except Exception:
|
|
5308
|
+
try:
|
|
5309
|
+
self.ui.print()
|
|
5310
|
+
except Exception:
|
|
5311
|
+
pass
|
|
5312
|
+
|
|
5313
|
+
try:
|
|
5314
|
+
supports = self._provider_supports_native_retention(self.model)
|
|
5315
|
+
except Exception:
|
|
5316
|
+
supports = False
|
|
5317
|
+
if supports:
|
|
5318
|
+
self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
|
|
5319
|
+
else:
|
|
5320
|
+
self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
|
|
5321
|
+
self._pending_user_edit = user_input
|
|
5322
|
+
continue
|
|
5323
|
+
except httpx.HTTPStatusError as he:
|
|
5324
|
+
try:
|
|
5325
|
+
if he.response is not None:
|
|
5326
|
+
await he.response.aread()
|
|
5327
|
+
body = he.response.text
|
|
5000
5328
|
else:
|
|
5001
5329
|
body = ""
|
|
5002
5330
|
except Exception:
|
|
5003
5331
|
body = ""
|
|
5004
5332
|
self.ui.error(f"[HTTP error] {he.response.status_code} {body}")
|
|
5005
5333
|
continue
|
|
5006
|
-
except Exception as e:
|
|
5007
|
-
self.ui.error(f"[Client error] {e}")
|
|
5008
|
-
continue
|
|
5334
|
+
except Exception as e:
|
|
5335
|
+
self.ui.error(f"[Client error] {e}")
|
|
5336
|
+
continue
|
|
5009
5337
|
|
|
5010
5338
|
# Skip appending empty assistant messages to avoid 422 on next request
|
|
5011
5339
|
if assistant_text.strip():
|
|
@@ -5031,7 +5359,7 @@ class ChatCLI:
|
|
|
5031
5359
|
("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
|
|
5032
5360
|
("set_auto_approve", f"⚙️ Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
|
|
5033
5361
|
(auth_action_key, auth_action_label),
|
|
5034
|
-
("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5,
|
|
5362
|
+
("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2-thinking, etc.)"),
|
|
5035
5363
|
("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
|
|
5036
5364
|
("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
|
|
5037
5365
|
("clear_history", "🧹 Clear History - Reset chat history"),
|
|
@@ -5067,15 +5395,14 @@ class ChatCLI:
|
|
|
5067
5395
|
has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
|
|
5068
5396
|
is_effectively_free = (self.is_free_tier and not has_credits)
|
|
5069
5397
|
|
|
5070
|
-
# Recommended models (
|
|
5071
|
-
# Curated list per request (include Codex Max as recommended)
|
|
5398
|
+
# Recommended models ("feelings" order)
|
|
5072
5399
|
rec_keys = [
|
|
5073
|
-
"
|
|
5074
|
-
"
|
|
5400
|
+
"gpt-5.2",
|
|
5401
|
+
"gpt-5.2-codex",
|
|
5402
|
+
"gpt-5",
|
|
5075
5403
|
"gemini-3-pro-preview",
|
|
5076
5404
|
"gemini-3-flash-preview",
|
|
5077
|
-
"
|
|
5078
|
-
"gpt-5.2",
|
|
5405
|
+
"claude-opus-4-5-20251101",
|
|
5079
5406
|
"kimi-k2-thinking",
|
|
5080
5407
|
"grok-code-fast-1",
|
|
5081
5408
|
]
|
|
@@ -5117,8 +5444,7 @@ class ChatCLI:
|
|
|
5117
5444
|
suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
|
|
5118
5445
|
choices.append((m, f"{lbl}{suffix}"))
|
|
5119
5446
|
|
|
5120
|
-
|
|
5121
|
-
choices.append(("custom", "Custom (enter a model name)"))
|
|
5447
|
+
# Per issue list: do not surface "server default" or "custom" in this picker.
|
|
5122
5448
|
|
|
5123
5449
|
# Render and select using the unified highlighted picker
|
|
5124
5450
|
picked: Optional[str] = None
|
|
@@ -5133,27 +5459,15 @@ class ChatCLI:
|
|
|
5133
5459
|
picked = str(val)
|
|
5134
5460
|
|
|
5135
5461
|
# Enforce free tier restrictions
|
|
5136
|
-
if
|
|
5137
|
-
self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
|
|
5138
|
-
continue
|
|
5462
|
+
if is_effectively_free and is_paid_model(picked):
|
|
5463
|
+
self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
|
|
5464
|
+
continue
|
|
5139
5465
|
|
|
5140
5466
|
break
|
|
5141
5467
|
|
|
5142
|
-
# Apply selection
|
|
5143
|
-
|
|
5144
|
-
|
|
5145
|
-
self.ui.info("Model cleared; server default will be used.")
|
|
5146
|
-
elif picked == "custom":
|
|
5147
|
-
typed = self.ui.prompt(
|
|
5148
|
-
"Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
|
|
5149
|
-
default=self.model or "",
|
|
5150
|
-
)
|
|
5151
|
-
self.model = self._resolve_model_alias(typed.strip() or None)
|
|
5152
|
-
if not self.model:
|
|
5153
|
-
self.ui.info("Model cleared; server default will be used.")
|
|
5154
|
-
else:
|
|
5155
|
-
self.model = picked
|
|
5156
|
-
self.ui.success(f"Model set to: {self.model}")
|
|
5468
|
+
# Apply selection
|
|
5469
|
+
self.model = picked
|
|
5470
|
+
self.ui.success(f"Model set to: {self.model}")
|
|
5157
5471
|
|
|
5158
5472
|
self._apply_model_side_effects()
|
|
5159
5473
|
self.save_settings()
|
|
@@ -5242,7 +5556,7 @@ class ChatCLI:
|
|
|
5242
5556
|
self.save_settings()
|
|
5243
5557
|
return True
|
|
5244
5558
|
|
|
5245
|
-
if choice == "set_system_prompt":
|
|
5559
|
+
if choice == "set_system_prompt":
|
|
5246
5560
|
prompt = self.ui.prompt("Enter system prompt", default=self.system_prompt or "")
|
|
5247
5561
|
self.system_prompt = prompt.strip()
|
|
5248
5562
|
self.history = []
|
|
@@ -5254,14 +5568,19 @@ class ChatCLI:
|
|
|
5254
5568
|
self._did_inject_custom_first_turn = False
|
|
5255
5569
|
except Exception:
|
|
5256
5570
|
pass
|
|
5257
|
-
# Clear provider-native histories on system reset
|
|
5258
|
-
try:
|
|
5571
|
+
# Clear provider-native histories on system reset
|
|
5572
|
+
try:
|
|
5259
5573
|
self.messages_for_save = []
|
|
5260
5574
|
if not self.save_chat_history:
|
|
5261
5575
|
self.thread_uid = None
|
|
5262
|
-
self._kimi_raw_history = []
|
|
5263
|
-
|
|
5264
|
-
|
|
5576
|
+
self._kimi_raw_history = []
|
|
5577
|
+
self._gemini_raw_history = []
|
|
5578
|
+
self._openai_previous_response_id = None
|
|
5579
|
+
self._openai_response_id_history = []
|
|
5580
|
+
self._openai_input_items = []
|
|
5581
|
+
self._openai_last_sent_input_items = None
|
|
5582
|
+
except Exception:
|
|
5583
|
+
pass
|
|
5265
5584
|
self.ui.success("System prompt set.")
|
|
5266
5585
|
self.save_settings()
|
|
5267
5586
|
return True
|
|
@@ -5279,6 +5598,8 @@ class ChatCLI:
|
|
|
5279
5598
|
self.thread_uid = None
|
|
5280
5599
|
self._kimi_raw_history = []
|
|
5281
5600
|
self._gemini_raw_history = []
|
|
5601
|
+
self._openai_previous_response_id = None
|
|
5602
|
+
self._openai_response_id_history = []
|
|
5282
5603
|
except Exception:
|
|
5283
5604
|
pass
|
|
5284
5605
|
# Reset local cumulative token counters on session clear
|
|
@@ -5333,11 +5654,82 @@ class ChatCLI:
|
|
|
5333
5654
|
return True
|
|
5334
5655
|
|
|
5335
5656
|
# ----------------------- SSE Streaming loop ------------------------
|
|
5336
|
-
async def _stream_once(self, user_input: str) -> str:
|
|
5337
|
-
# Build request payload
|
|
5338
|
-
|
|
5657
|
+
async def _stream_once(self, user_input: str) -> str:
|
|
5658
|
+
# Build request payload.
|
|
5659
|
+
# OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
|
|
5660
|
+
# `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
|
|
5661
|
+
if self._is_openai_model(self.model):
|
|
5662
|
+
msgs: List[Dict[str, str]] = []
|
|
5663
|
+
# Codex developer prompt (if enabled) + system prompt
|
|
5664
|
+
try:
|
|
5665
|
+
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
5666
|
+
msgs.append({"role": "system", "content": self._codex_system_prompt()})
|
|
5667
|
+
except Exception:
|
|
5668
|
+
pass
|
|
5669
|
+
if self.system_prompt:
|
|
5670
|
+
msgs.append({"role": "system", "content": self.system_prompt})
|
|
5671
|
+
|
|
5672
|
+
# Apply first-turn-only injections to the current user content
|
|
5673
|
+
content = user_input
|
|
5674
|
+
prefix = self._build_first_turn_injection(user_input)
|
|
5675
|
+
if prefix:
|
|
5676
|
+
content = f"{prefix}\n\n{user_input}"
|
|
5677
|
+
try:
|
|
5678
|
+
self._last_built_user_content = content
|
|
5679
|
+
except Exception:
|
|
5680
|
+
self._last_built_user_content = user_input
|
|
5681
|
+
msgs.append({"role": "user", "content": content})
|
|
5682
|
+
|
|
5683
|
+
payload: Dict[str, Any] = {"messages": msgs}
|
|
5684
|
+
|
|
5685
|
+
# Build OpenAI native input items (authoritative for the server OpenAI path).
|
|
5686
|
+
try:
|
|
5687
|
+
if isinstance(self._openai_input_items, list) and self._openai_input_items:
|
|
5688
|
+
items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
|
|
5689
|
+
else:
|
|
5690
|
+
# Seed with system prompts for the first OpenAI turn.
|
|
5691
|
+
items = []
|
|
5692
|
+
try:
|
|
5693
|
+
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
5694
|
+
items.append({"role": "system", "content": self._codex_system_prompt()})
|
|
5695
|
+
except Exception:
|
|
5696
|
+
pass
|
|
5697
|
+
if self.system_prompt:
|
|
5698
|
+
items.append({"role": "system", "content": self.system_prompt})
|
|
5699
|
+
items.append({"role": "user", "content": content})
|
|
5700
|
+
payload["openai_input_items"] = self._sanitize_openai_items(items)
|
|
5701
|
+
self._openai_last_sent_input_items = copy.deepcopy(items)
|
|
5702
|
+
except Exception:
|
|
5703
|
+
# If this fails for any reason, fall back to normal message-based history.
|
|
5704
|
+
self._openai_last_sent_input_items = None
|
|
5705
|
+
|
|
5706
|
+
# OpenAI Threading: DISABLED. We use full manual input item replay now.
|
|
5707
|
+
# if "openai_input_items" not in payload:
|
|
5708
|
+
# try:
|
|
5709
|
+
# if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
|
|
5710
|
+
# payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
|
|
5711
|
+
# except Exception:
|
|
5712
|
+
# pass
|
|
5713
|
+
try:
|
|
5714
|
+
if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
|
|
5715
|
+
payload["openai_response_id_history"] = list(self._openai_response_id_history)
|
|
5716
|
+
except Exception:
|
|
5717
|
+
pass
|
|
5718
|
+
else:
|
|
5719
|
+
payload = {"messages": self._build_messages(user_input)}
|
|
5339
5720
|
if self.model:
|
|
5340
5721
|
payload["model"] = self.model
|
|
5722
|
+
# OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
|
|
5723
|
+
try:
|
|
5724
|
+
if self._is_openai_model(self.model):
|
|
5725
|
+
if (
|
|
5726
|
+
isinstance(getattr(self, "_openai_response_id_history", None), list)
|
|
5727
|
+
and self._openai_response_id_history
|
|
5728
|
+
and "openai_response_id_history" not in payload
|
|
5729
|
+
):
|
|
5730
|
+
payload["openai_response_id_history"] = list(self._openai_response_id_history)
|
|
5731
|
+
except Exception:
|
|
5732
|
+
pass
|
|
5341
5733
|
# Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
|
|
5342
5734
|
try:
|
|
5343
5735
|
if self.terminal_id:
|
|
@@ -5421,10 +5813,12 @@ class ChatCLI:
|
|
|
5421
5813
|
payload["text_verbosity"] = self.text_verbosity
|
|
5422
5814
|
except Exception:
|
|
5423
5815
|
pass
|
|
5424
|
-
|
|
5425
|
-
|
|
5426
|
-
|
|
5427
|
-
|
|
5816
|
+
# Preambles are a GPT-5-only UX toggle.
|
|
5817
|
+
try:
|
|
5818
|
+
if self._supports_preambles(self.model):
|
|
5819
|
+
payload["preambles_enabled"] = bool(self.preambles_enabled)
|
|
5820
|
+
except Exception:
|
|
5821
|
+
pass
|
|
5428
5822
|
|
|
5429
5823
|
if self.web_search_enabled:
|
|
5430
5824
|
payload["enable_web_search"] = True
|
|
@@ -5646,6 +6040,51 @@ class ChatCLI:
|
|
|
5646
6040
|
# Track whether we're currently positioned at the start of a fresh line.
|
|
5647
6041
|
# This prevents double-newlines between back-to-back tool events.
|
|
5648
6042
|
at_line_start = True
|
|
6043
|
+
|
|
6044
|
+
# --- Tool call in-place status (issuelist.md #7) ---
|
|
6045
|
+
# We render a single transient line for the current tool call (no trailing newline)
|
|
6046
|
+
# so the later tool.result SUCCESS/FAILURE line can replace it in-place.
|
|
6047
|
+
tool_status_active = False
|
|
6048
|
+
tool_status_call_id = None
|
|
6049
|
+
|
|
6050
|
+
def _tool_status_clear_line() -> None:
|
|
6051
|
+
"""Clear the current line (best-effort) and return to column 0."""
|
|
6052
|
+
nonlocal at_line_start
|
|
6053
|
+
try:
|
|
6054
|
+
sys.stdout.write("\r\x1b[2K")
|
|
6055
|
+
sys.stdout.flush()
|
|
6056
|
+
except Exception:
|
|
6057
|
+
pass
|
|
6058
|
+
at_line_start = True
|
|
6059
|
+
|
|
6060
|
+
def _tool_status_show(call_id: Any, line: str) -> None:
|
|
6061
|
+
"""Show the transient tool status line (no newline)."""
|
|
6062
|
+
nonlocal tool_status_active, tool_status_call_id, at_line_start
|
|
6063
|
+
if not self.show_tool_calls:
|
|
6064
|
+
return
|
|
6065
|
+
tool_status_active = True
|
|
6066
|
+
tool_status_call_id = str(call_id) if call_id is not None else None
|
|
6067
|
+
try:
|
|
6068
|
+
if not at_line_start:
|
|
6069
|
+
sys.stdout.write("\n")
|
|
6070
|
+
sys.stdout.write("\r\x1b[2K" + str(line))
|
|
6071
|
+
sys.stdout.flush()
|
|
6072
|
+
at_line_start = False
|
|
6073
|
+
except Exception:
|
|
6074
|
+
# Fallback: degrade to a normal printed line
|
|
6075
|
+
try:
|
|
6076
|
+
self.ui.print(str(line))
|
|
6077
|
+
except Exception:
|
|
6078
|
+
pass
|
|
6079
|
+
at_line_start = True
|
|
6080
|
+
|
|
6081
|
+
def _tool_status_stop() -> None:
|
|
6082
|
+
"""Remove the transient tool status line and clear tracking."""
|
|
6083
|
+
nonlocal tool_status_active, tool_status_call_id
|
|
6084
|
+
if tool_status_active:
|
|
6085
|
+
_tool_status_clear_line()
|
|
6086
|
+
tool_status_active = False
|
|
6087
|
+
tool_status_call_id = None
|
|
5649
6088
|
# Mode: animate or static (default static for stability)
|
|
5650
6089
|
try:
|
|
5651
6090
|
_animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
|
|
@@ -5949,16 +6388,40 @@ class ChatCLI:
|
|
|
5949
6388
|
except Exception:
|
|
5950
6389
|
pass
|
|
5951
6390
|
|
|
5952
|
-
#
|
|
5953
|
-
#
|
|
5954
|
-
|
|
5955
|
-
|
|
5956
|
-
|
|
5957
|
-
|
|
5958
|
-
|
|
5959
|
-
|
|
5960
|
-
|
|
5961
|
-
|
|
6391
|
+
# issuelist.md #7:
|
|
6392
|
+
# Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
|
|
6393
|
+
try:
|
|
6394
|
+
# Clear any previous transient status line (shouldn't happen, but keep stable)
|
|
6395
|
+
_tool_status_stop()
|
|
6396
|
+
except Exception:
|
|
6397
|
+
pass
|
|
6398
|
+
try:
|
|
6399
|
+
tool_name = str(name or "").strip()
|
|
6400
|
+
label = self._tool_concise_label(
|
|
6401
|
+
tool_name,
|
|
6402
|
+
args if isinstance(args, dict) else {},
|
|
6403
|
+
None,
|
|
6404
|
+
)
|
|
6405
|
+
try:
|
|
6406
|
+
model_prefix = (
|
|
6407
|
+
self._current_turn.get("model")
|
|
6408
|
+
or self._last_used_model
|
|
6409
|
+
or self.model
|
|
6410
|
+
or "(server default)"
|
|
6411
|
+
)
|
|
6412
|
+
except Exception:
|
|
6413
|
+
model_prefix = self.model or "(server default)"
|
|
6414
|
+
ORANGE = "\x1b[38;5;214m"
|
|
6415
|
+
WHITE = "\x1b[97m"
|
|
6416
|
+
RESET = "\x1b[0m"
|
|
6417
|
+
status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
|
|
6418
|
+
_tool_status_show(call_id, status_line)
|
|
6419
|
+
except Exception:
|
|
6420
|
+
# Last-resort fallback: print something rather than crash streaming.
|
|
6421
|
+
try:
|
|
6422
|
+
self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
|
|
6423
|
+
except Exception:
|
|
6424
|
+
pass
|
|
5962
6425
|
# Count tool calls
|
|
5963
6426
|
try:
|
|
5964
6427
|
tool_calls += 1
|
|
@@ -5981,10 +6444,15 @@ class ChatCLI:
|
|
|
5981
6444
|
except Exception:
|
|
5982
6445
|
pass
|
|
5983
6446
|
|
|
5984
|
-
elif event == "approval.request":
|
|
5985
|
-
#
|
|
5986
|
-
|
|
5987
|
-
|
|
6447
|
+
elif event == "approval.request":
|
|
6448
|
+
# Don't let the transient [RUNNING] line collide with interactive prompts.
|
|
6449
|
+
try:
|
|
6450
|
+
_tool_status_stop()
|
|
6451
|
+
except Exception:
|
|
6452
|
+
pass
|
|
6453
|
+
# First reply wins (web or CLI)
|
|
6454
|
+
await self._handle_approval_request(client, session_id, data)
|
|
6455
|
+
continue
|
|
5988
6456
|
|
|
5989
6457
|
elif event == "approval.result":
|
|
5990
6458
|
appr = data.get("approved")
|
|
@@ -6026,10 +6494,22 @@ class ChatCLI:
|
|
|
6026
6494
|
self.ui.info("Working memory created. Restarting conversation with a fresh first-turn injection...")
|
|
6027
6495
|
return ""
|
|
6028
6496
|
|
|
6029
|
-
elif event == "tool.result":
|
|
6030
|
-
name = str(data.get("name"))
|
|
6031
|
-
result = data.get("result", {}) or {}
|
|
6032
|
-
call_id = data.get("call_id")
|
|
6497
|
+
elif event == "tool.result":
|
|
6498
|
+
name = str(data.get("name"))
|
|
6499
|
+
result = data.get("result", {}) or {}
|
|
6500
|
+
call_id = data.get("call_id")
|
|
6501
|
+
# If we previously rendered a transient [RUNNING] line for this tool call,
|
|
6502
|
+
# clear it now so the SUCCESS/FAILURE line prints in the same place.
|
|
6503
|
+
try:
|
|
6504
|
+
if tool_status_active:
|
|
6505
|
+
# Best-effort match on call_id (some providers may omit it).
|
|
6506
|
+
if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
|
|
6507
|
+
_tool_status_stop()
|
|
6508
|
+
except Exception:
|
|
6509
|
+
try:
|
|
6510
|
+
_tool_status_stop()
|
|
6511
|
+
except Exception:
|
|
6512
|
+
pass
|
|
6033
6513
|
# Stop any indicator before rendering results
|
|
6034
6514
|
try:
|
|
6035
6515
|
await _indicator_stop(clear=True)
|
|
@@ -6137,7 +6617,7 @@ class ChatCLI:
|
|
|
6137
6617
|
# Do not auto-restart the indicator here; wait for the next model event
|
|
6138
6618
|
|
|
6139
6619
|
elif event == "tool.dispatch":
|
|
6140
|
-
# Client-executed tool flow
|
|
6620
|
+
# Client-executed tool flow
|
|
6141
6621
|
if not HAS_LOCAL_TOOLS:
|
|
6142
6622
|
self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
|
|
6143
6623
|
continue
|
|
@@ -6147,12 +6627,23 @@ class ChatCLI:
|
|
|
6147
6627
|
# tool invocation on the corresponding 'tool.call' event. Counting
|
|
6148
6628
|
# dispatch would double-count a single tool call.
|
|
6149
6629
|
|
|
6150
|
-
session_id_d = data.get("session_id")
|
|
6151
|
-
call_id = data.get("call_id")
|
|
6152
|
-
name = data.get("name")
|
|
6153
|
-
args = data.get("args", {}) or {}
|
|
6154
|
-
job_token = data.get("job_token")
|
|
6155
|
-
reqp = data.get("requested_policy", {}) or {}
|
|
6630
|
+
session_id_d = data.get("session_id")
|
|
6631
|
+
call_id = data.get("call_id")
|
|
6632
|
+
name = data.get("name")
|
|
6633
|
+
args = data.get("args", {}) or {}
|
|
6634
|
+
job_token = data.get("job_token")
|
|
6635
|
+
reqp = data.get("requested_policy", {}) or {}
|
|
6636
|
+
|
|
6637
|
+
# Track in-flight dispatch so Ctrl+C can cancel quickly.
|
|
6638
|
+
try:
|
|
6639
|
+
self._inflight_dispatch = {
|
|
6640
|
+
"session_id": session_id_d,
|
|
6641
|
+
"call_id": call_id,
|
|
6642
|
+
"job_token": job_token,
|
|
6643
|
+
"name": name,
|
|
6644
|
+
}
|
|
6645
|
+
except Exception:
|
|
6646
|
+
pass
|
|
6156
6647
|
|
|
6157
6648
|
if DEBUG_SSE:
|
|
6158
6649
|
self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
|
|
@@ -6427,9 +6918,9 @@ class ChatCLI:
|
|
|
6427
6918
|
except Exception:
|
|
6428
6919
|
self._last_dispatch_ctx = None
|
|
6429
6920
|
|
|
6430
|
-
# POST callback
|
|
6431
|
-
try:
|
|
6432
|
-
if session_id_d and call_id and job_token:
|
|
6921
|
+
# POST callback
|
|
6922
|
+
try:
|
|
6923
|
+
if session_id_d and call_id and job_token:
|
|
6433
6924
|
payload_cb = {
|
|
6434
6925
|
"session_id": session_id_d,
|
|
6435
6926
|
"call_id": call_id,
|
|
@@ -6437,16 +6928,80 @@ class ChatCLI:
|
|
|
6437
6928
|
"result": result,
|
|
6438
6929
|
"job_token": job_token,
|
|
6439
6930
|
}
|
|
6440
|
-
r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
|
|
6441
|
-
if r.status_code >= 400:
|
|
6442
|
-
self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
|
|
6443
|
-
except Exception as e:
|
|
6444
|
-
self.ui.warn(f"tools.callback error: {e}")
|
|
6931
|
+
r = await client.post(self.tools_callback_url, json=payload_cb, timeout=self.timeout)
|
|
6932
|
+
if r.status_code >= 400:
|
|
6933
|
+
self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
|
|
6934
|
+
except Exception as e:
|
|
6935
|
+
self.ui.warn(f"tools.callback error: {e}")
|
|
6936
|
+
finally:
|
|
6937
|
+
try:
|
|
6938
|
+
# Clear in-flight dispatch context when we send a callback.
|
|
6939
|
+
if isinstance(self._inflight_dispatch, dict):
|
|
6940
|
+
if str(self._inflight_dispatch.get("call_id")) == str(call_id):
|
|
6941
|
+
self._inflight_dispatch = None
|
|
6942
|
+
except Exception:
|
|
6943
|
+
pass
|
|
6445
6944
|
|
|
6446
6945
|
elif event == "message.completed":
|
|
6447
6946
|
# Safety: this block handles only 'message.completed'.
|
|
6448
6947
|
usage = data.get("usage", {})
|
|
6449
6948
|
model_used = data.get("model") or self.model
|
|
6949
|
+
# OpenAI: persist the last response id so future turns can use previous_response_id.
|
|
6950
|
+
try:
|
|
6951
|
+
if self._is_openai_model(model_used):
|
|
6952
|
+
# Prefer the explicit per-turn id list when provided by the server.
|
|
6953
|
+
ids = data.get("openai_response_ids")
|
|
6954
|
+
if isinstance(ids, list) and ids:
|
|
6955
|
+
for x in ids:
|
|
6956
|
+
if not isinstance(x, str):
|
|
6957
|
+
continue
|
|
6958
|
+
xs = x.strip()
|
|
6959
|
+
if not xs:
|
|
6960
|
+
continue
|
|
6961
|
+
try:
|
|
6962
|
+
if xs not in self._openai_response_id_history:
|
|
6963
|
+
self._openai_response_id_history.append(xs)
|
|
6964
|
+
except Exception:
|
|
6965
|
+
pass
|
|
6966
|
+
rid = data.get("openai_previous_response_id")
|
|
6967
|
+
if isinstance(rid, str) and rid.strip():
|
|
6968
|
+
self._openai_previous_response_id = rid.strip()
|
|
6969
|
+
try:
|
|
6970
|
+
if rid.strip() not in self._openai_response_id_history:
|
|
6971
|
+
self._openai_response_id_history.append(rid.strip())
|
|
6972
|
+
except Exception:
|
|
6973
|
+
pass
|
|
6974
|
+
|
|
6975
|
+
# OpenAI manual-state replay: server returns the delta items appended
|
|
6976
|
+
# during this turn (reasoning/tool calls/tool outputs). Persist them.
|
|
6977
|
+
try:
|
|
6978
|
+
delta = data.get("openai_delta_items")
|
|
6979
|
+
if isinstance(delta, list):
|
|
6980
|
+
base_items = (
|
|
6981
|
+
self._openai_last_sent_input_items
|
|
6982
|
+
if isinstance(self._openai_last_sent_input_items, list)
|
|
6983
|
+
else copy.deepcopy(self._openai_input_items)
|
|
6984
|
+
)
|
|
6985
|
+
# Normalize to a list of dicts where possible; keep unknown shapes as-is.
|
|
6986
|
+
merged: List[Any] = []
|
|
6987
|
+
try:
|
|
6988
|
+
merged.extend(list(base_items or []))
|
|
6989
|
+
except Exception:
|
|
6990
|
+
merged = list(base_items or []) if base_items is not None else []
|
|
6991
|
+
merged.extend(delta)
|
|
6992
|
+
# Store only dict-like items (server is expected to send dicts)
|
|
6993
|
+
cleaned: List[Dict[str, Any]] = []
|
|
6994
|
+
for it in merged:
|
|
6995
|
+
if isinstance(it, dict):
|
|
6996
|
+
cleaned.append(dict(it))
|
|
6997
|
+
self._openai_input_items = cleaned
|
|
6998
|
+
except Exception:
|
|
6999
|
+
pass
|
|
7000
|
+
finally:
|
|
7001
|
+
# Clear per-turn sent snapshot
|
|
7002
|
+
self._openai_last_sent_input_items = None
|
|
7003
|
+
except Exception:
|
|
7004
|
+
pass
|
|
6450
7005
|
# Gemini: server may include an authoritative provider-native history snapshot.
|
|
6451
7006
|
try:
|
|
6452
7007
|
if isinstance(model_used, str) and model_used.startswith("gemini-"):
|
|
@@ -7865,7 +8420,7 @@ class ChatCLI:
|
|
|
7865
8420
|
"gpt-5": 400000,
|
|
7866
8421
|
"gpt-5-2025-08-07": 400000,
|
|
7867
8422
|
"codex-mini-latest": 200000,
|
|
7868
|
-
|
|
8423
|
+
# (removed gemini-2.5-pro)
|
|
7869
8424
|
"gemini-3-flash-preview": 1048576,
|
|
7870
8425
|
"gemini-3-pro-preview": 1000000,
|
|
7871
8426
|
"grok-4-1-fast-reasoning": 2000000,
|
|
@@ -7882,10 +8437,10 @@ class ChatCLI:
|
|
|
7882
8437
|
"claude-sonnet-4-5-20250929-thinking": 1000000,
|
|
7883
8438
|
"claude-opus-4-5-20251101": 200000,
|
|
7884
8439
|
"claude-opus-4-5-20251101-thinking": 200000,
|
|
7885
|
-
"glm-4.
|
|
7886
|
-
})
|
|
7887
|
-
except Exception:
|
|
7888
|
-
pass
|
|
8440
|
+
"glm-4.7": 200000,
|
|
8441
|
+
})
|
|
8442
|
+
except Exception:
|
|
8443
|
+
pass
|
|
7889
8444
|
self._model_ctx_map = ctx_map
|
|
7890
8445
|
return ctx_map
|
|
7891
8446
|
|
|
@@ -8272,8 +8827,11 @@ class ChatCLI:
|
|
|
8272
8827
|
"We’ll configure a few defaults. You can change these later via /settings.",
|
|
8273
8828
|
)
|
|
8274
8829
|
|
|
8275
|
-
# --- 1) Default model (menu
|
|
8276
|
-
await self._wizard_model_step()
|
|
8830
|
+
# --- 1) Default model (menu) ---
|
|
8831
|
+
await self._wizard_model_step()
|
|
8832
|
+
# If the picker was cancelled (or model still unset), choose a sensible default.
|
|
8833
|
+
if not self.model:
|
|
8834
|
+
self.model = self._recommended_default_model()
|
|
8277
8835
|
|
|
8278
8836
|
# --- 2) Tools (always ON per design) ---
|
|
8279
8837
|
self.requested_tools = True
|
|
@@ -8283,9 +8841,9 @@ class ChatCLI:
|
|
|
8283
8841
|
"Control levels: 1=read-only, 2=approval on write/exec, 3=no approvals"
|
|
8284
8842
|
)
|
|
8285
8843
|
await self.set_level_menu()
|
|
8286
|
-
if self.control_level not in (1, 2, 3):
|
|
8287
|
-
# Default to Level
|
|
8288
|
-
self.control_level =
|
|
8844
|
+
if self.control_level not in (1, 2, 3):
|
|
8845
|
+
# Default to Level 3 if user aborted
|
|
8846
|
+
self.control_level = 3
|
|
8289
8847
|
|
|
8290
8848
|
# --- 4) Agent scope (menus; only type on custom path) ---
|
|
8291
8849
|
self.ui.print(
|
|
@@ -8336,30 +8894,45 @@ class ChatCLI:
|
|
|
8336
8894
|
except Exception:
|
|
8337
8895
|
curv = "medium"
|
|
8338
8896
|
|
|
8339
|
-
|
|
8340
|
-
|
|
8341
|
-
|
|
8342
|
-
[
|
|
8343
|
-
("low", "Low – short, to-the-point answers"),
|
|
8344
|
-
("medium", "Medium – balanced detail
|
|
8345
|
-
("high", "High – more verbose explanations"),
|
|
8346
|
-
]
|
|
8347
|
-
|
|
8897
|
+
verbosity_choices: List[Tuple[str, str]] = []
|
|
8898
|
+
if self._is_gpt_model(self.model):
|
|
8899
|
+
# Default-first: Low for GPT models.
|
|
8900
|
+
verbosity_choices = [
|
|
8901
|
+
("low", "Low – short, to-the-point answers"),
|
|
8902
|
+
("medium", "Medium – balanced detail"),
|
|
8903
|
+
("high", "High – more verbose explanations"),
|
|
8904
|
+
]
|
|
8905
|
+
else:
|
|
8906
|
+
# Default-first: Medium for non-GPT models; do not surface "Low".
|
|
8907
|
+
verbosity_choices = [
|
|
8908
|
+
("medium", "Medium – balanced detail (recommended)"),
|
|
8909
|
+
("high", "High – more verbose explanations"),
|
|
8910
|
+
]
|
|
8911
|
+
|
|
8912
|
+
verb_choice = await self._menu_choice(
|
|
8913
|
+
"Text verbosity",
|
|
8914
|
+
"How verbose should responses be by default?",
|
|
8915
|
+
verbosity_choices,
|
|
8916
|
+
)
|
|
8348
8917
|
if verb_choice in ("low", "medium", "high"):
|
|
8349
8918
|
self.text_verbosity = verb_choice
|
|
8350
8919
|
else:
|
|
8351
8920
|
self.text_verbosity = curv or "medium"
|
|
8352
8921
|
|
|
8353
|
-
# --- 7) Tool preambles (
|
|
8354
|
-
|
|
8355
|
-
|
|
8356
|
-
|
|
8357
|
-
|
|
8358
|
-
|
|
8359
|
-
|
|
8360
|
-
|
|
8361
|
-
|
|
8362
|
-
|
|
8922
|
+
# --- 7) Tool preambles (GPT-5 only) ---
|
|
8923
|
+
if self._supports_preambles(self.model):
|
|
8924
|
+
preamble_choice = await self._menu_choice(
|
|
8925
|
+
"Tool call preambles",
|
|
8926
|
+
"Before using tools, the agent can briefly explain what it will do and why.",
|
|
8927
|
+
[
|
|
8928
|
+
("off", "Disable preambles (default)"),
|
|
8929
|
+
("on", "Enable preambles"),
|
|
8930
|
+
],
|
|
8931
|
+
)
|
|
8932
|
+
self.preambles_enabled = preamble_choice == "on"
|
|
8933
|
+
else:
|
|
8934
|
+
# Never enable preambles on unsupported models.
|
|
8935
|
+
self.preambles_enabled = False
|
|
8363
8936
|
|
|
8364
8937
|
# --- 8) Optional custom first-turn note (menu + text only when chosen) ---
|
|
8365
8938
|
custom_choice = await self._menu_choice(
|
|
@@ -8433,7 +9006,8 @@ class ChatCLI:
|
|
|
8433
9006
|
text = m.get("content", "")
|
|
8434
9007
|
contents.append({"role": role, "parts": [{"text": text}]})
|
|
8435
9008
|
# Pick a Gemini model for counting; fall back if current isn't Gemini
|
|
8436
|
-
|
|
9009
|
+
# (gemini-2.5-pro removed from curated lists)
|
|
9010
|
+
count_model = "gemini-3-flash-preview"
|
|
8437
9011
|
res = client.models.count_tokens(model=count_model, contents=contents)
|
|
8438
9012
|
t = int(getattr(res, "total_tokens", 0) or 0)
|
|
8439
9013
|
if t > 0:
|
|
@@ -8491,16 +9065,16 @@ class ChatCLI:
|
|
|
8491
9065
|
blocks.append(txt.strip())
|
|
8492
9066
|
except Exception:
|
|
8493
9067
|
pass
|
|
8494
|
-
# Tool preamble
|
|
8495
|
-
try:
|
|
8496
|
-
if bool(getattr(self, "preambles_enabled", False)):
|
|
8497
|
-
blocks.append(
|
|
8498
|
-
"Tool usage: when you need to read or modify files or run commands, "
|
|
8499
|
-
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
8500
|
-
"advances the user's goal before calling the tool."
|
|
8501
|
-
)
|
|
8502
|
-
except Exception:
|
|
8503
|
-
pass
|
|
9068
|
+
# Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
9069
|
+
try:
|
|
9070
|
+
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
9071
|
+
blocks.append(
|
|
9072
|
+
"Tool usage: when you need to read or modify files or run commands, "
|
|
9073
|
+
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
9074
|
+
"advances the user's goal before calling the tool."
|
|
9075
|
+
)
|
|
9076
|
+
except Exception:
|
|
9077
|
+
pass
|
|
8504
9078
|
# Working memory preview (does not touch _did_inject_working_memory or paths)
|
|
8505
9079
|
try:
|
|
8506
9080
|
if self._memory_paths_for_first_turn:
|