henosis-cli 0.6.8__py3-none-any.whl → 0.6.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli.py +1986 -1457
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/METADATA +1 -2
- henosis_cli-0.6.10.dist-info/RECORD +11 -0
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/WHEEL +1 -1
- henosis_cli_tools/input_engine.py +228 -4
- henosis_cli_tools/settings_ui.py +77 -38
- henosis_cli_tools/tool_impl.py +291 -102
- henosis_cli-0.6.8.dist-info/RECORD +0 -11
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/entry_points.txt +0 -0
- {henosis_cli-0.6.8.dist-info → henosis_cli-0.6.10.dist-info}/top_level.txt +0 -0
cli.py
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import argparse
|
|
10
10
|
import asyncio
|
|
11
|
+
import copy
|
|
11
12
|
import json
|
|
12
13
|
import os
|
|
13
14
|
import sys
|
|
@@ -30,6 +31,7 @@ import importlib
|
|
|
30
31
|
import importlib.util
|
|
31
32
|
import importlib.metadata
|
|
32
33
|
import re
|
|
34
|
+
import base64
|
|
33
35
|
|
|
34
36
|
# Optional websockets for Agent Mode (dev-only WS bridge)
|
|
35
37
|
try:
|
|
@@ -80,64 +82,53 @@ except Exception:
|
|
|
80
82
|
Confirm = None
|
|
81
83
|
Text = None
|
|
82
84
|
|
|
85
|
+
"""prompt_toolkit is intentionally not used.
|
|
86
|
+
|
|
87
|
+
We previously relied on prompt_toolkit for interactive line editing and menus.
|
|
88
|
+
Copy/selection behavior is terminal- and prompt_toolkit-implementation specific
|
|
89
|
+
and proved unreliable across environments.
|
|
90
|
+
|
|
91
|
+
The CLI now uses our dependency-free input engine (henosis_cli_tools.input_engine)
|
|
92
|
+
and a dependency-free highlighted menu implementation.
|
|
83
93
|
"""
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
from prompt_toolkit.layout.controls import FormattedTextControl
|
|
101
|
-
from prompt_toolkit.styles import Style
|
|
102
|
-
HAS_PT = True
|
|
103
|
-
except Exception:
|
|
104
|
-
HAS_PT = False
|
|
105
|
-
PromptSession = None
|
|
106
|
-
WordCompleter = None
|
|
107
|
-
KeyBindings = None
|
|
108
|
-
Application = None
|
|
109
|
-
get_app = None
|
|
110
|
-
Layout = None
|
|
111
|
-
HSplit = None
|
|
112
|
-
Window = None
|
|
113
|
-
Dimension = None
|
|
114
|
-
FormattedTextControl = None
|
|
115
|
-
Style = None
|
|
94
|
+
|
|
95
|
+
# Keep these names defined for legacy branches that are guarded by HAS_PT.
|
|
96
|
+
HAS_PT = False
|
|
97
|
+
PromptSession = None
|
|
98
|
+
WordCompleter = None
|
|
99
|
+
KeyBindings = None
|
|
100
|
+
SelectionType = None
|
|
101
|
+
Condition = None
|
|
102
|
+
Application = None
|
|
103
|
+
get_app = None
|
|
104
|
+
Layout = None
|
|
105
|
+
HSplit = None
|
|
106
|
+
Window = None
|
|
107
|
+
Dimension = None
|
|
108
|
+
FormattedTextControl = None
|
|
109
|
+
Style = None
|
|
116
110
|
|
|
117
111
|
# If optional deps are missing, print a friendly note but continue with fallbacks.
|
|
118
|
-
if not HAS_RICH
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
missing.append("prompt_toolkit")
|
|
124
|
-
if missing:
|
|
125
|
-
msg = (
|
|
126
|
-
"Note: optional packages missing: "
|
|
127
|
-
+ ", ".join(missing)
|
|
128
|
-
+ "\n- rich enables colorful output\n- prompt_toolkit enables arrow-key menus\n"
|
|
112
|
+
if not HAS_RICH:
|
|
113
|
+
try:
|
|
114
|
+
sys.stderr.write(
|
|
115
|
+
"Note: optional package missing: rich\n"
|
|
116
|
+
"- rich enables colorful output\n"
|
|
129
117
|
)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
except Exception:
|
|
133
|
-
pass
|
|
118
|
+
except Exception:
|
|
119
|
+
pass
|
|
134
120
|
|
|
135
121
|
# New: low-level input engine (no third-party deps) for Shift+Enter newlines
|
|
122
|
+
# Also provides a best-effort clipboard helper used for Ctrl+C copy when our
|
|
123
|
+
# prompt_toolkit selection is active.
|
|
136
124
|
try:
|
|
137
|
-
from henosis_cli_tools.input_engine import make_engine
|
|
125
|
+
from henosis_cli_tools.input_engine import make_engine, _copy_to_clipboard as _hn_copy_to_clipboard
|
|
138
126
|
HAS_INPUT_ENGINE = True
|
|
139
127
|
except Exception:
|
|
140
128
|
HAS_INPUT_ENGINE = False
|
|
129
|
+
|
|
130
|
+
def _hn_copy_to_clipboard(text: str) -> bool: # type: ignore
|
|
131
|
+
return False
|
|
141
132
|
DEBUG_SSE = False # set via --debug-sse
|
|
142
133
|
DEBUG_REQ = False # set via --debug-req
|
|
143
134
|
# Max number of recent SSE event summaries to retain for diagnostics when a stream
|
|
@@ -683,7 +674,7 @@ class UI:
|
|
|
683
674
|
for n, ty, sz in rows:
|
|
684
675
|
print(f"{n:<40} {ty:<8} {sz}")
|
|
685
676
|
|
|
686
|
-
class ChatCLI:
|
|
677
|
+
class ChatCLI:
|
|
687
678
|
def __init__(
|
|
688
679
|
self,
|
|
689
680
|
server: str,
|
|
@@ -800,23 +791,25 @@ class ChatCLI:
|
|
|
800
791
|
# - concise: only model (+thinking level when applicable) and context meter
|
|
801
792
|
# - verbose: full details (current behavior)
|
|
802
793
|
self.usage_info_mode: str = "verbose"
|
|
803
|
-
# Reasoning effort selector for OpenAI reasoning models (low|medium|high|xhigh). Default: medium
|
|
804
|
-
# Note: 'xhigh' is only applied by the server for models that support it (e.g., gpt-5.2* and gpt-5.1-codex-max).
|
|
805
|
-
self.reasoning_effort: str = "medium"
|
|
794
|
+
# Reasoning effort selector for OpenAI reasoning models (low|medium|high|xhigh). Default: medium
|
|
795
|
+
# Note: 'xhigh' is only applied by the server for models that support it (e.g., gpt-5.2* and gpt-5.1-codex-max).
|
|
796
|
+
self.reasoning_effort: str = "medium"
|
|
806
797
|
# Retain provider-native tool results between turns (e.g., Kimi reasoning/tool messages)
|
|
807
798
|
self.retain_native_tool_results: bool = False
|
|
808
799
|
# Anthropic thinking-mode budget tokens (applies to '-thinking' models; None = server default)
|
|
809
|
-
self.thinking_budget_tokens: Optional[int] = None
|
|
810
|
-
# Anthropic
|
|
800
|
+
self.thinking_budget_tokens: Optional[int] = None
|
|
801
|
+
# Anthropic effort (Opus 4.6/4.5): low|medium|high|max. Default: high.
|
|
802
|
+
self.anthropic_effort: str = "high"
|
|
803
|
+
# Anthropic prompt cache TTL preference: None=server default, or "5m" | "1h"
|
|
811
804
|
self.anthropic_cache_ttl: Optional[str] = None
|
|
812
|
-
# Text verbosity selector (UI only; not sent to server requests by default)
|
|
813
|
-
self.text_verbosity: str = "medium" # low | medium | high
|
|
814
|
-
# Tool call preambles (UI toggle only)
|
|
815
|
-
self.preambles_enabled: bool = False
|
|
816
|
-
# Codex developer prompt injection (system) for Codex models only
|
|
817
|
-
self.codex_prompt_enabled: bool = True
|
|
818
|
-
# Codex Max: allow ALL tools instead of minimal subset
|
|
819
|
-
self.codex_max_allow_all_tools: bool = False
|
|
805
|
+
# Text verbosity selector (UI only; not sent to server requests by default)
|
|
806
|
+
self.text_verbosity: str = "medium" # low | medium | high
|
|
807
|
+
# Tool call preambles (UI toggle only)
|
|
808
|
+
self.preambles_enabled: bool = False
|
|
809
|
+
# Codex developer prompt injection (system) for Codex models only
|
|
810
|
+
self.codex_prompt_enabled: bool = True
|
|
811
|
+
# Codex Max: allow ALL tools instead of minimal subset
|
|
812
|
+
self.codex_max_allow_all_tools: bool = False
|
|
820
813
|
# Custom first-turn injection (like codebase map) — toggle + editable text
|
|
821
814
|
self.custom_first_turn_enabled: bool = False
|
|
822
815
|
self.custom_first_turn_text: str = ""
|
|
@@ -997,10 +990,25 @@ class ChatCLI:
|
|
|
997
990
|
}
|
|
998
991
|
# Track last used model for display
|
|
999
992
|
self._last_used_model: Optional[str] = None
|
|
1000
|
-
# Provider-native history for Kimi (preserve reasoning_content across turns)
|
|
1001
|
-
self._kimi_raw_history: List[Dict[str, Any]] = []
|
|
1002
|
-
# Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
|
|
1003
|
-
self._gemini_raw_history: List[Dict[str, Any]] = []
|
|
993
|
+
# Provider-native history for Kimi (preserve reasoning_content across turns)
|
|
994
|
+
self._kimi_raw_history: List[Dict[str, Any]] = []
|
|
995
|
+
# Provider-native history for Gemini (preserve thoughtSignatures + strict tool-call chains across turns)
|
|
996
|
+
self._gemini_raw_history: List[Dict[str, Any]] = []
|
|
997
|
+
# OpenAI Responses API threading: retain previous response id across turns
|
|
998
|
+
self._openai_previous_response_id: Optional[str] = None
|
|
999
|
+
# OpenAI Responses API threading: retain the full chain of response ids across turns
|
|
1000
|
+
# (server will also echo per-turn ids in message.completed.openai_response_ids)
|
|
1001
|
+
self._openai_response_id_history: List[str] = []
|
|
1002
|
+
|
|
1003
|
+
# OpenAI Responses API manual state (stateless/ZDR-safe): retain the full input item chain
|
|
1004
|
+
# including reasoning items, function_call items, and function_call_output items.
|
|
1005
|
+
self._openai_input_items: List[Dict[str, Any]] = []
|
|
1006
|
+
# For robustness, remember exactly what we sent as openai_input_items for the current turn
|
|
1007
|
+
# so we can append server-provided openai_delta_items deterministically.
|
|
1008
|
+
self._openai_last_sent_input_items: Optional[List[Dict[str, Any]]] = None
|
|
1009
|
+
# Track an in-flight client-dispatched tool job so Ctrl+C can cancel it quickly.
|
|
1010
|
+
# Shape: {session_id, call_id, job_token, name}
|
|
1011
|
+
self._inflight_dispatch: Optional[Dict[str, Any]] = None
|
|
1004
1012
|
# Last server billing info from /api/usage/commit
|
|
1005
1013
|
self._last_commit_cost_usd: float = 0.0
|
|
1006
1014
|
self._last_remaining_credits: Optional[float] = None
|
|
@@ -1054,6 +1062,12 @@ class ChatCLI:
|
|
|
1054
1062
|
# Track Ctrl+C timing for double-press-to-exit behavior
|
|
1055
1063
|
self._last_interrupt_ts: Optional[float] = None
|
|
1056
1064
|
|
|
1065
|
+
# Ctrl+C during a running stream should not kill the entire CLI.
|
|
1066
|
+
# Instead, we cancel the in-flight turn and reopen the last user query for editing.
|
|
1067
|
+
# NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
|
|
1068
|
+
self._pending_user_edit: Optional[str] = None
|
|
1069
|
+
self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
|
|
1070
|
+
|
|
1057
1071
|
# Timers: session-level and per-turn wall-clock timers
|
|
1058
1072
|
self._session_started_at: Optional[float] = None # time.perf_counter() at session start
|
|
1059
1073
|
self._turn_started_at: Optional[float] = None # time.perf_counter() per turn start
|
|
@@ -1062,35 +1076,8 @@ class ChatCLI:
|
|
|
1062
1076
|
self._commands_catalog: List[Dict[str, str]] = self._build_commands_catalog()
|
|
1063
1077
|
# Low-level input engine (supports Shift+Enter newlines where possible)
|
|
1064
1078
|
self._input_engine = make_engine() if HAS_INPUT_ENGINE else None
|
|
1065
|
-
#
|
|
1079
|
+
# prompt_toolkit intentionally not used; always rely on the input engine.
|
|
1066
1080
|
self._pt_session = None
|
|
1067
|
-
if HAS_PT and PromptSession:
|
|
1068
|
-
try:
|
|
1069
|
-
# Build completer and simple key bindings: Enter submits, Ctrl+J inserts newline
|
|
1070
|
-
self._pt_completer = self._commands_word_completer()
|
|
1071
|
-
kb = KeyBindings()
|
|
1072
|
-
|
|
1073
|
-
@kb.add("enter")
|
|
1074
|
-
def _submit(event):
|
|
1075
|
-
# Submit entire buffer
|
|
1076
|
-
event.app.exit(result=event.current_buffer.text)
|
|
1077
|
-
|
|
1078
|
-
@kb.add("c-j")
|
|
1079
|
-
def _newline(event):
|
|
1080
|
-
# Insert literal newline
|
|
1081
|
-
event.current_buffer.insert_text("\n")
|
|
1082
|
-
|
|
1083
|
-
# Bottom toolbar with quick hints
|
|
1084
|
-
def _toolbar() -> str:
|
|
1085
|
-
return " Type / then Tab to complete, or Enter on '/' to open the palette. Ctrl+J inserts a newline. "
|
|
1086
|
-
|
|
1087
|
-
# Create session
|
|
1088
|
-
self._pt_session = PromptSession(
|
|
1089
|
-
key_bindings=kb,
|
|
1090
|
-
bottom_toolbar=_toolbar,
|
|
1091
|
-
)
|
|
1092
|
-
except Exception:
|
|
1093
|
-
self._pt_session = None
|
|
1094
1081
|
|
|
1095
1082
|
# ----------------------- Provider heuristics -----------------------
|
|
1096
1083
|
def _is_openai_reasoning_model(self, model: Optional[str]) -> bool:
|
|
@@ -1339,32 +1326,34 @@ class ChatCLI:
|
|
|
1339
1326
|
|
|
1340
1327
|
# ----------------------- Pricing + costs -----------------------
|
|
1341
1328
|
|
|
1342
|
-
def _pricing_table(self) -> Dict[str, Dict[str, Any]]:
|
|
1343
|
-
# Match server chat_adapter PRICING_PER_MILLION (subset is fine; unknown -> 0)
|
|
1344
|
-
return {
|
|
1345
|
-
# OpenAI
|
|
1346
|
-
"gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
|
|
1347
|
-
#
|
|
1348
|
-
|
|
1349
|
-
"gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
|
|
1329
|
+
def _pricing_table(self) -> Dict[str, Dict[str, Any]]:
|
|
1330
|
+
# Match server chat_adapter PRICING_PER_MILLION (subset is fine; unknown -> 0)
|
|
1331
|
+
return {
|
|
1332
|
+
# OpenAI
|
|
1333
|
+
"gpt-5.2": {"input": 2.00, "output": 14.25, "provider": "openai"},
|
|
1334
|
+
# New: gpt-5.2-codex
|
|
1335
|
+
# Pricing requested: input $1.75 / 1M, cached input $0.175 / 1M, output $14.00 / 1M
|
|
1336
|
+
"gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached_input": 0.175, "provider": "openai"},
|
|
1337
|
+
# From gpt5.2.txt: $21/$168 base, plus +$0.25 margin each -> $21.25/$168.25
|
|
1338
|
+
"gpt-5.2-pro": {"input": 21.25, "output": 168.25, "provider": "openai"},
|
|
1339
|
+
"gpt-5": {"input": 1.75, "output": 14.00, "provider": "openai"},
|
|
1350
1340
|
"gpt-5-2025-08-07": {"input": 1.75, "output": 14.00, "provider": "openai"},
|
|
1351
1341
|
"gpt-5-codex": {"input": 1.75, "output": 14.00, "provider": "openai"},
|
|
1352
|
-
"gpt-4o-mini": {"input": 0.21, "output": 0.84, "provider": "openai"},
|
|
1342
|
+
"gpt-4o-mini": {"input": 0.21, "output": 0.84, "provider": "openai"},
|
|
1353
1343
|
# Codex Mini (fine-tuned o4-mini for CLI). Pricing includes 1.4x margin per codex-mini.txt.
|
|
1354
1344
|
# Cached input tokens override: $0.375 * 1.4 = $0.525 per 1M (25% of input rate).
|
|
1355
1345
|
"codex-mini-latest": {"input": 2.10, "output": 8.40, "cached_input": 0.525, "provider": "openai"},
|
|
1356
1346
|
# Anthropic
|
|
1357
1347
|
"claude-sonnet-4-20250514": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
|
|
1358
1348
|
"claude-sonnet-4-20250514-thinking": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
|
|
1359
|
-
"claude-sonnet-4-5-20250929": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
|
|
1349
|
+
"claude-sonnet-4-5-20250929": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
|
|
1360
1350
|
"claude-sonnet-4-5-20250929-thinking": {"input": 4.20, "output": 21.00, "provider": "anthropic"},
|
|
1361
|
-
# New Opus 4.
|
|
1362
|
-
"claude-opus-4-
|
|
1363
|
-
"claude-opus-4-
|
|
1351
|
+
# New Opus 4.6 (adaptive thinking + effort; 1M context)
|
|
1352
|
+
"claude-opus-4-6": {"input": 5.25, "output": 25.25, "provider": "anthropic"},
|
|
1353
|
+
"claude-opus-4-6-thinking": {"input": 5.25, "output": 25.25, "provider": "anthropic"},
|
|
1364
1354
|
# Gemini
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
"gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
|
|
1355
|
+
# Gemini 3 Flash Preview (priced same as prior Gemini 2.5 Flash per request)
|
|
1356
|
+
"gemini-3-flash-preview": {"input": 0.21, "output": 0.84, "provider": "gemini"},
|
|
1368
1357
|
# Gemini 3 Pro Preview ("newgem"). Base: $2/$12 and $4/$18 per 1M;
|
|
1369
1358
|
# CLI uses the low-tier 1.4x margin rates for estimates. High-tier
|
|
1370
1359
|
# pricing based on total_tokens > 200K is applied on the server.
|
|
@@ -1374,17 +1363,15 @@ class ChatCLI:
|
|
|
1374
1363
|
"grok-4-1-fast-non-reasoning": {"input": 0.28, "output": 0.70, "provider": "xai"},
|
|
1375
1364
|
"grok-4": {"input": 4.20, "output": 21.00, "provider": "xai"},
|
|
1376
1365
|
"grok-code-fast-1": {"input": 0.28, "output": 2.10, "provider": "xai"},
|
|
1377
|
-
# DeepSeek V3.2 (+$0.25 per 1M margin)
|
|
1378
|
-
"deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1379
|
-
"deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1380
|
-
|
|
1366
|
+
# DeepSeek V3.2 (+$0.25 per 1M margin)
|
|
1367
|
+
"deepseek-chat-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1368
|
+
"deepseek-reasoner-3.2": {"input": 0.53, "output": 0.67, "provider": "deepseek"},
|
|
1369
|
+
# Removed: deepseek speciale (not supported)
|
|
1381
1370
|
# Kimi
|
|
1382
|
-
"kimi-k2
|
|
1383
|
-
"kimi-k2-0711-preview": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1384
|
-
"kimi-k2-thinking": {"input": 0.84, "output": 3.50, "provider": "kimi"},
|
|
1371
|
+
"kimi-k2.5": {"input": 0.85, "output": 3.25, "provider": "kimi"},
|
|
1385
1372
|
# GLM (Z.AI)
|
|
1386
1373
|
# Pricing with 1.4x margin applied (base: in $0.60, out $2.20)
|
|
1387
|
-
"glm-4.
|
|
1374
|
+
"glm-4.7": {"input": 0.84, "output": 3.08, "provider": "glm"},
|
|
1388
1375
|
}
|
|
1389
1376
|
|
|
1390
1377
|
def _resolve_price(self, model: Optional[str]) -> Dict[str, Any]:
|
|
@@ -1398,70 +1385,205 @@ class ChatCLI:
|
|
|
1398
1385
|
return table.get("gpt-5-2025-08-07", {"input": 0.0, "output": 0.0, "provider": "unknown"})
|
|
1399
1386
|
return {"input": 0.0, "output": 0.0, "provider": "unknown"}
|
|
1400
1387
|
|
|
1401
|
-
def _resolve_model_alias(self, raw_name: Optional[str]) -> Optional[str]:
|
|
1402
|
-
"""Normalize model aliases to their canonical server identifiers."""
|
|
1403
|
-
if not raw_name:
|
|
1404
|
-
return None
|
|
1405
|
-
name = raw_name.strip()
|
|
1406
|
-
lower = name.lower()
|
|
1407
|
-
aliases = {
|
|
1408
|
-
"gemini-3": "gemini-3-pro-preview",
|
|
1409
|
-
"gemini-3-pro": "gemini-3-pro-preview",
|
|
1410
|
-
"gemini-3-preview": "gemini-3-pro-preview",
|
|
1411
|
-
"gemini-3-flash": "gemini-3-flash-preview",
|
|
1412
|
-
"gemini-flash-3": "gemini-3-flash-preview",
|
|
1413
|
-
"gemini-new": "gemini-3-pro-preview",
|
|
1414
|
-
"new-gemini": "gemini-3-pro-preview",
|
|
1415
|
-
"gemini-pro-3": "gemini-3-pro-preview",
|
|
1416
|
-
"gpt5": "gpt-5",
|
|
1417
|
-
"gpt4": "gpt-4o",
|
|
1418
|
-
# Anthropic Claude Opus 4.
|
|
1419
|
-
|
|
1420
|
-
"claude-opus-4
|
|
1421
|
-
"
|
|
1422
|
-
"opus-4
|
|
1423
|
-
"
|
|
1424
|
-
"
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
self.
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1388
|
+
def _resolve_model_alias(self, raw_name: Optional[str]) -> Optional[str]:
|
|
1389
|
+
"""Normalize model aliases to their canonical server identifiers."""
|
|
1390
|
+
if not raw_name:
|
|
1391
|
+
return None
|
|
1392
|
+
name = raw_name.strip()
|
|
1393
|
+
lower = name.lower()
|
|
1394
|
+
aliases = {
|
|
1395
|
+
"gemini-3": "gemini-3-pro-preview",
|
|
1396
|
+
"gemini-3-pro": "gemini-3-pro-preview",
|
|
1397
|
+
"gemini-3-preview": "gemini-3-pro-preview",
|
|
1398
|
+
"gemini-3-flash": "gemini-3-flash-preview",
|
|
1399
|
+
"gemini-flash-3": "gemini-3-flash-preview",
|
|
1400
|
+
"gemini-new": "gemini-3-pro-preview",
|
|
1401
|
+
"new-gemini": "gemini-3-pro-preview",
|
|
1402
|
+
"gemini-pro-3": "gemini-3-pro-preview",
|
|
1403
|
+
"gpt5": "gpt-5",
|
|
1404
|
+
"gpt4": "gpt-4o",
|
|
1405
|
+
# Anthropic Claude Opus 4.6 short aliases
|
|
1406
|
+
"claude-opus-4-6": "claude-opus-4-6",
|
|
1407
|
+
"claude-opus-4.6": "claude-opus-4-6",
|
|
1408
|
+
"opus-4-6": "claude-opus-4-6",
|
|
1409
|
+
"opus-4.6": "claude-opus-4-6",
|
|
1410
|
+
"opus46": "claude-opus-4-6",
|
|
1411
|
+
"claude-opus46": "claude-opus-4-6",
|
|
1412
|
+
}
|
|
1413
|
+
return aliases.get(lower, name)
|
|
1414
|
+
|
|
1415
|
+
def _apply_model_side_effects(self) -> None:
|
|
1416
|
+
"""Adjust related settings when certain models are selected."""
|
|
1417
|
+
try:
|
|
1418
|
+
model_name = (self.model or "").strip().lower()
|
|
1419
|
+
except Exception:
|
|
1420
|
+
model_name = ""
|
|
1421
|
+
try:
|
|
1422
|
+
# Provider-native state resets when switching away from OpenAI.
|
|
1423
|
+
try:
|
|
1424
|
+
if self.model and (not self._is_openai_model(self.model)):
|
|
1425
|
+
self._openai_previous_response_id = None
|
|
1426
|
+
self._openai_response_id_history = []
|
|
1427
|
+
self._openai_input_items = []
|
|
1428
|
+
self._openai_last_sent_input_items = None
|
|
1429
|
+
except Exception:
|
|
1430
|
+
pass
|
|
1431
|
+
if model_name in {"gpt-5.2-pro"}:
|
|
1432
|
+
# Default these to high, but don't clobber a user-chosen xhigh.
|
|
1433
|
+
if getattr(self, "reasoning_effort", None) not in ("high", "xhigh"):
|
|
1434
|
+
self.reasoning_effort = "high"
|
|
1435
|
+
# Codex family: disable preambles for better behavior
|
|
1436
|
+
if "codex" in model_name:
|
|
1437
|
+
self.preambles_enabled = False
|
|
1438
|
+
# Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
|
|
1439
|
+
# Force-disable for all other models (even if a saved setting had it enabled).
|
|
1440
|
+
if not self._supports_preambles(self.model):
|
|
1441
|
+
self.preambles_enabled = False
|
|
1442
|
+
except Exception:
|
|
1443
|
+
try:
|
|
1444
|
+
self.reasoning_effort = "high"
|
|
1445
|
+
except Exception:
|
|
1446
|
+
pass
|
|
1447
|
+
|
|
1448
|
+
def _supports_preambles(self, model: Optional[str]) -> bool:
|
|
1449
|
+
"""Tool-call preambles are a CLI-only UX hint.
|
|
1450
|
+
|
|
1451
|
+
Requirement: disabled for all models except GPT-5 (base model; non-Codex).
|
|
1452
|
+
In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
|
|
1453
|
+
"""
|
|
1454
|
+
try:
|
|
1455
|
+
if not model:
|
|
1456
|
+
return False
|
|
1457
|
+
m = str(model).strip().lower()
|
|
1458
|
+
# Only the base GPT-5 line supports this UX toggle.
|
|
1459
|
+
# Allow:
|
|
1460
|
+
# - "gpt-5"
|
|
1461
|
+
# - date-pinned variants like "gpt-5-2025-08-07"
|
|
1462
|
+
# Disallow:
|
|
1463
|
+
# - versioned families like "gpt-5.1*" / "gpt-5.2*"
|
|
1464
|
+
if not (m == "gpt-5" or m.startswith("gpt-5-")):
|
|
1465
|
+
return False
|
|
1466
|
+
if "codex" in m:
|
|
1467
|
+
return False
|
|
1468
|
+
return True
|
|
1469
|
+
except Exception:
|
|
1470
|
+
return False
|
|
1471
|
+
|
|
1472
|
+
def _is_openai_model(self, model: Optional[str]) -> bool:
|
|
1473
|
+
"""Best-effort model/provider discriminator for client-side state.
|
|
1474
|
+
|
|
1475
|
+
The server is multi-provider. For the CLI we treat anything that isn't an explicit
|
|
1476
|
+
non-OpenAI provider prefix as OpenAI.
|
|
1477
|
+
"""
|
|
1478
|
+
try:
|
|
1479
|
+
if not model:
|
|
1480
|
+
return False
|
|
1481
|
+
m = str(model).strip().lower()
|
|
1482
|
+
if not m:
|
|
1483
|
+
return False
|
|
1484
|
+
for pfx in ("gemini-", "claude-", "grok-", "deepseek-", "kimi-", "glm-"):
|
|
1485
|
+
if m.startswith(pfx):
|
|
1486
|
+
return False
|
|
1487
|
+
# Everything else defaults to OpenAI in this repo.
|
|
1488
|
+
return True
|
|
1489
|
+
except Exception:
|
|
1490
|
+
return False
|
|
1491
|
+
|
|
1492
|
+
def _provider_supports_native_retention(self, model: Optional[str]) -> bool:
|
|
1493
|
+
"""Whether this provider has an implemented native tool/thinking retention path."""
|
|
1494
|
+
try:
|
|
1495
|
+
if not model:
|
|
1496
|
+
return False
|
|
1497
|
+
m = str(model).strip().lower()
|
|
1498
|
+
if m.startswith("gemini-"):
|
|
1499
|
+
return True
|
|
1500
|
+
if m.startswith("kimi-"):
|
|
1501
|
+
return bool(getattr(self, "retain_native_tool_results", False))
|
|
1502
|
+
if self._is_openai_model(model):
|
|
1503
|
+
return True
|
|
1504
|
+
return False
|
|
1505
|
+
except Exception:
|
|
1506
|
+
return False
|
|
1507
|
+
|
|
1508
|
+
def _sanitize_openai_items(self, items: Any) -> Any:
|
|
1509
|
+
"""Recursively strip fields from OpenAI output items that cause errors when used as input."""
|
|
1510
|
+
if isinstance(items, list):
|
|
1511
|
+
return [self._sanitize_openai_items(x) for x in items]
|
|
1512
|
+
if isinstance(items, dict):
|
|
1513
|
+
# 'status' is the main offender causing 400s
|
|
1514
|
+
bad_keys = {"status", "usage", "completed_at", "created_at", "incomplete_details", "metadata", "parsed_arguments"}
|
|
1515
|
+
return {k: self._sanitize_openai_items(v) for k, v in items.items() if k not in bad_keys}
|
|
1516
|
+
return items
|
|
1517
|
+
|
|
1518
|
+
async def _cancel_inflight_dispatch(self, reason: str = "cancelled by user") -> None:
|
|
1519
|
+
"""If the server delegated a tool to this CLI (tool.dispatch), send a cancellation callback.
|
|
1520
|
+
|
|
1521
|
+
This prevents the server from waiting until TOOLS_CALLBACK_TIMEOUT_SEC when the user aborts.
|
|
1522
|
+
Best-effort; never raises.
|
|
1523
|
+
"""
|
|
1524
|
+
ctx = None
|
|
1525
|
+
try:
|
|
1526
|
+
ctx = dict(self._inflight_dispatch) if isinstance(self._inflight_dispatch, dict) else None
|
|
1527
|
+
except Exception:
|
|
1528
|
+
ctx = None
|
|
1529
|
+
if not ctx:
|
|
1530
|
+
return
|
|
1531
|
+
session_id = ctx.get("session_id")
|
|
1532
|
+
call_id = ctx.get("call_id")
|
|
1533
|
+
job_token = ctx.get("job_token")
|
|
1534
|
+
name = ctx.get("name")
|
|
1535
|
+
if not (session_id and call_id and job_token):
|
|
1536
|
+
return
|
|
1537
|
+
payload_cb = {
|
|
1538
|
+
"session_id": session_id,
|
|
1539
|
+
"call_id": call_id,
|
|
1540
|
+
"name": name,
|
|
1541
|
+
"job_token": job_token,
|
|
1542
|
+
"result": {
|
|
1543
|
+
"ok": False,
|
|
1544
|
+
"cancelled": True,
|
|
1545
|
+
"error": str(reason or "cancelled"),
|
|
1546
|
+
},
|
|
1547
|
+
}
|
|
1548
|
+
try:
|
|
1549
|
+
# Keep it short; we just want to unblock the server.
|
|
1550
|
+
http_timeout = httpx.Timeout(connect=2.0, read=3.0, write=2.0, pool=2.0)
|
|
1551
|
+
except Exception:
|
|
1552
|
+
http_timeout = None
|
|
1553
|
+
try:
|
|
1554
|
+
async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
|
|
1555
|
+
await client.post(self.tools_callback_url, json=payload_cb)
|
|
1556
|
+
except Exception:
|
|
1557
|
+
pass
|
|
1558
|
+
finally:
|
|
1559
|
+
try:
|
|
1560
|
+
self._inflight_dispatch = None
|
|
1561
|
+
except Exception:
|
|
1562
|
+
pass
|
|
1563
|
+
|
|
1564
|
+
def _is_gpt_model(self, model: Optional[str]) -> bool:
|
|
1565
|
+
"""True for OpenAI GPT models (used for showing certain UI-only toggles)."""
|
|
1566
|
+
try:
|
|
1567
|
+
return bool(model) and str(model).strip().lower().startswith("gpt-")
|
|
1568
|
+
except Exception:
|
|
1569
|
+
return False
|
|
1570
|
+
def _is_codex_model(self, model: Optional[str]) -> bool:
|
|
1571
|
+
try:
|
|
1572
|
+
return bool(model) and ("codex" in str(model).lower())
|
|
1573
|
+
except Exception:
|
|
1574
|
+
return False
|
|
1575
|
+
def _supports_xhigh_reasoning_effort(self, model: Optional[str]) -> bool:
|
|
1576
|
+
"""Return True if the OpenAI model supports reasoning_effort='xhigh'.
|
|
1577
|
+
|
|
1578
|
+
OpenAI supports xhigh on:
|
|
1579
|
+
- gpt-5.1-codex-max
|
|
1580
|
+
- the gpt-5.2* family
|
|
1581
|
+
"""
|
|
1582
|
+
try:
|
|
1583
|
+
m = (str(model).strip().lower() if model else "")
|
|
1584
|
+
return m.startswith("gpt-5.2")
|
|
1585
|
+
except Exception:
|
|
1586
|
+
return False
|
|
1465
1587
|
|
|
1466
1588
|
def _is_deepseek_like(self, model: Optional[str]) -> bool:
|
|
1467
1589
|
try:
|
|
@@ -1469,87 +1591,87 @@ class ChatCLI:
|
|
|
1469
1591
|
except Exception:
|
|
1470
1592
|
return False
|
|
1471
1593
|
|
|
1472
|
-
def compute_cost_usd(self, model: Optional[str], usage: Dict[str, Any]) -> float:
|
|
1594
|
+
def compute_cost_usd(self, model: Optional[str], usage: Dict[str, Any]) -> float:
|
|
1473
1595
|
price = self._resolve_price(model)
|
|
1474
1596
|
provider = (price.get("provider") or "").lower()
|
|
1475
1597
|
# prefer detailed fields when present
|
|
1476
1598
|
prompt_tokens = int(usage.get("prompt_tokens") or usage.get("turn", {}).get("input_tokens", 0) or 0)
|
|
1477
1599
|
completion_tokens = int(usage.get("completion_tokens") or usage.get("turn", {}).get("output_tokens", 0) or 0)
|
|
1478
|
-
total_tokens = int(usage.get("total_tokens") or usage.get("turn", {}).get("total_tokens", 0) or (prompt_tokens + completion_tokens) or 0)
|
|
1479
|
-
image_tokens = int(usage.get("image_tokens", 0) or 0)
|
|
1480
|
-
thinking_tokens = int(usage.get("thinking_tokens", 0) or 0)
|
|
1481
|
-
# Reasoning gap: bill as completion-side if total > (prompt + completion)
|
|
1482
|
-
reasoning_gap = 0
|
|
1483
|
-
try:
|
|
1484
|
-
if total_tokens > (prompt_tokens + completion_tokens):
|
|
1485
|
-
reasoning_gap = total_tokens - (prompt_tokens + completion_tokens)
|
|
1486
|
-
except Exception:
|
|
1487
|
-
reasoning_gap = 0
|
|
1488
|
-
# Anthropic: count image tokens as prompt-side
|
|
1489
|
-
if provider == "anthropic" and image_tokens:
|
|
1490
|
-
prompt_tokens += image_tokens
|
|
1491
|
-
# Anthropic prompt caching: pricing logic (reads @ 10%, creation @ 1.25x/2x)
|
|
1492
|
-
if provider == "anthropic":
|
|
1493
|
-
cache_read = int(usage.get("cache_read_input_tokens", 0) or 0)
|
|
1494
|
-
cache_creation = int(usage.get("cache_creation_input_tokens", 0) or 0)
|
|
1495
|
-
# Try to detect creation breakdown if available
|
|
1496
|
-
cc_5m = 0
|
|
1497
|
-
cc_1h = 0
|
|
1498
|
-
try:
|
|
1499
|
-
cc_map = usage.get("cache_creation") if isinstance(usage, dict) else None
|
|
1500
|
-
if isinstance(cc_map, dict):
|
|
1501
|
-
cc_5m = int(cc_map.get("ephemeral_5m_input_tokens", 0) or 0)
|
|
1502
|
-
cc_1h = int(cc_map.get("ephemeral_1h_input_tokens", 0) or 0)
|
|
1503
|
-
except Exception:
|
|
1504
|
-
pass
|
|
1505
|
-
# If breakdown is missing but total creation exists, assume 5m (1.25x) as default/safe estimate
|
|
1506
|
-
if cache_creation > 0 and (cc_5m + cc_1h) == 0:
|
|
1507
|
-
cc_5m = cache_creation
|
|
1508
|
-
|
|
1509
|
-
# Only apply special pricing if cache fields are present
|
|
1510
|
-
if cache_read > 0 or cache_creation > 0:
|
|
1511
|
-
in_rate = float(price.get("input", 0.0))
|
|
1512
|
-
out_rate = float(price.get("output", 0.0))
|
|
1513
|
-
|
|
1514
|
-
# Non-cached prompt part
|
|
1515
|
-
non_cached = max(0, int(prompt_tokens) - int(cache_read) - int(cache_creation))
|
|
1516
|
-
|
|
1517
|
-
cost = 0.0
|
|
1518
|
-
# Standard input
|
|
1519
|
-
cost += (non_cached / 1_000_000.0) * in_rate
|
|
1520
|
-
# Cache reads (10% of input rate)
|
|
1521
|
-
cost += (cache_read / 1_000_000.0) * (in_rate * 0.10)
|
|
1522
|
-
# Cache creation (1.25x for 5m, 2.0x for 1h)
|
|
1523
|
-
if cc_5m > 0:
|
|
1524
|
-
cost += (cc_5m / 1_000_000.0) * (in_rate * 1.25)
|
|
1525
|
-
if cc_1h > 0:
|
|
1526
|
-
cost += (cc_1h / 1_000_000.0) * (in_rate * 2.00)
|
|
1527
|
-
|
|
1528
|
-
# Output + reasoning gap
|
|
1529
|
-
completion_total = completion_tokens
|
|
1530
|
-
if total_tokens and (prompt_tokens + completion_tokens) != total_tokens:
|
|
1531
|
-
completion_total += reasoning_gap
|
|
1532
|
-
else:
|
|
1533
|
-
if thinking_tokens and not usage.get("total_tokens"):
|
|
1534
|
-
completion_total += thinking_tokens
|
|
1535
|
-
|
|
1536
|
-
cost += (completion_total / 1_000_000.0) * out_rate
|
|
1537
|
-
return float(cost)
|
|
1538
|
-
|
|
1539
|
-
# reasoning_gap already computed above
|
|
1600
|
+
total_tokens = int(usage.get("total_tokens") or usage.get("turn", {}).get("total_tokens", 0) or (prompt_tokens + completion_tokens) or 0)
|
|
1601
|
+
image_tokens = int(usage.get("image_tokens", 0) or 0)
|
|
1602
|
+
thinking_tokens = int(usage.get("thinking_tokens", 0) or 0)
|
|
1603
|
+
# Reasoning gap: bill as completion-side if total > (prompt + completion)
|
|
1604
|
+
reasoning_gap = 0
|
|
1605
|
+
try:
|
|
1606
|
+
if total_tokens > (prompt_tokens + completion_tokens):
|
|
1607
|
+
reasoning_gap = total_tokens - (prompt_tokens + completion_tokens)
|
|
1608
|
+
except Exception:
|
|
1609
|
+
reasoning_gap = 0
|
|
1610
|
+
# Anthropic: count image tokens as prompt-side
|
|
1611
|
+
if provider == "anthropic" and image_tokens:
|
|
1612
|
+
prompt_tokens += image_tokens
|
|
1613
|
+
# Anthropic prompt caching: pricing logic (reads @ 10%, creation @ 1.25x/2x)
|
|
1614
|
+
if provider == "anthropic":
|
|
1615
|
+
cache_read = int(usage.get("cache_read_input_tokens", 0) or 0)
|
|
1616
|
+
cache_creation = int(usage.get("cache_creation_input_tokens", 0) or 0)
|
|
1617
|
+
# Try to detect creation breakdown if available
|
|
1618
|
+
cc_5m = 0
|
|
1619
|
+
cc_1h = 0
|
|
1620
|
+
try:
|
|
1621
|
+
cc_map = usage.get("cache_creation") if isinstance(usage, dict) else None
|
|
1622
|
+
if isinstance(cc_map, dict):
|
|
1623
|
+
cc_5m = int(cc_map.get("ephemeral_5m_input_tokens", 0) or 0)
|
|
1624
|
+
cc_1h = int(cc_map.get("ephemeral_1h_input_tokens", 0) or 0)
|
|
1625
|
+
except Exception:
|
|
1626
|
+
pass
|
|
1627
|
+
# If breakdown is missing but total creation exists, assume 5m (1.25x) as default/safe estimate
|
|
1628
|
+
if cache_creation > 0 and (cc_5m + cc_1h) == 0:
|
|
1629
|
+
cc_5m = cache_creation
|
|
1630
|
+
|
|
1631
|
+
# Only apply special pricing if cache fields are present
|
|
1632
|
+
if cache_read > 0 or cache_creation > 0:
|
|
1633
|
+
in_rate = float(price.get("input", 0.0))
|
|
1634
|
+
out_rate = float(price.get("output", 0.0))
|
|
1635
|
+
|
|
1636
|
+
# Non-cached prompt part
|
|
1637
|
+
non_cached = max(0, int(prompt_tokens) - int(cache_read) - int(cache_creation))
|
|
1638
|
+
|
|
1639
|
+
cost = 0.0
|
|
1640
|
+
# Standard input
|
|
1641
|
+
cost += (non_cached / 1_000_000.0) * in_rate
|
|
1642
|
+
# Cache reads (10% of input rate)
|
|
1643
|
+
cost += (cache_read / 1_000_000.0) * (in_rate * 0.10)
|
|
1644
|
+
# Cache creation (1.25x for 5m, 2.0x for 1h)
|
|
1645
|
+
if cc_5m > 0:
|
|
1646
|
+
cost += (cc_5m / 1_000_000.0) * (in_rate * 1.25)
|
|
1647
|
+
if cc_1h > 0:
|
|
1648
|
+
cost += (cc_1h / 1_000_000.0) * (in_rate * 2.00)
|
|
1649
|
+
|
|
1650
|
+
# Output + reasoning gap
|
|
1651
|
+
completion_total = completion_tokens
|
|
1652
|
+
if total_tokens and (prompt_tokens + completion_tokens) != total_tokens:
|
|
1653
|
+
completion_total += reasoning_gap
|
|
1654
|
+
else:
|
|
1655
|
+
if thinking_tokens and not usage.get("total_tokens"):
|
|
1656
|
+
completion_total += thinking_tokens
|
|
1657
|
+
|
|
1658
|
+
cost += (completion_total / 1_000_000.0) * out_rate
|
|
1659
|
+
return float(cost)
|
|
1660
|
+
|
|
1661
|
+
# reasoning_gap already computed above
|
|
1540
1662
|
# DeepSeek cache pricing nuance (best-effort; needs provider-specific fields to be precise)
|
|
1541
|
-
if self._is_deepseek_like(model):
|
|
1542
|
-
hit = int(usage.get("prompt_cache_hit_tokens", 0) or 0)
|
|
1543
|
-
miss = int(usage.get("prompt_cache_miss_tokens", 0) or 0)
|
|
1544
|
-
if (hit + miss) <= 0:
|
|
1545
|
-
miss = prompt_tokens
|
|
1546
|
-
hit = 0
|
|
1547
|
-
# V3.2 cache hit pricing per docs with +$0.25 margin -> $0.278 / 1M
|
|
1548
|
-
cache_hit_rate_per_m = 0.278
|
|
1549
|
-
cost = (hit / 1_000_000.0) * cache_hit_rate_per_m
|
|
1550
|
-
cost += (miss / 1_000_000.0) * float(price.get("input", 0.0))
|
|
1551
|
-
cost += ((completion_tokens + reasoning_gap) / 1_000_000.0) * float(price.get("output", 0.0))
|
|
1552
|
-
return float(cost)
|
|
1663
|
+
if self._is_deepseek_like(model):
|
|
1664
|
+
hit = int(usage.get("prompt_cache_hit_tokens", 0) or 0)
|
|
1665
|
+
miss = int(usage.get("prompt_cache_miss_tokens", 0) or 0)
|
|
1666
|
+
if (hit + miss) <= 0:
|
|
1667
|
+
miss = prompt_tokens
|
|
1668
|
+
hit = 0
|
|
1669
|
+
# V3.2 cache hit pricing per docs with +$0.25 margin -> $0.278 / 1M
|
|
1670
|
+
cache_hit_rate_per_m = 0.278
|
|
1671
|
+
cost = (hit / 1_000_000.0) * cache_hit_rate_per_m
|
|
1672
|
+
cost += (miss / 1_000_000.0) * float(price.get("input", 0.0))
|
|
1673
|
+
cost += ((completion_tokens + reasoning_gap) / 1_000_000.0) * float(price.get("output", 0.0))
|
|
1674
|
+
return float(cost)
|
|
1553
1675
|
# OpenAI prompt caching: cached input tokens billed at 10% of input price by default
|
|
1554
1676
|
# Allow per-model override via price["cached_input"] when provided
|
|
1555
1677
|
if provider == "openai":
|
|
@@ -1905,13 +2027,13 @@ class ChatCLI:
|
|
|
1905
2027
|
except Exception as e:
|
|
1906
2028
|
self.ui.warn(f"Failed to load local settings: {e}")
|
|
1907
2029
|
|
|
1908
|
-
def _collect_settings_dict(self) -> Dict[str, Any]:
|
|
1909
|
-
data = {
|
|
1910
|
-
"model": self.model,
|
|
1911
|
-
"requested_tools": self.requested_tools,
|
|
2030
|
+
def _collect_settings_dict(self) -> Dict[str, Any]:
|
|
2031
|
+
data = {
|
|
2032
|
+
"model": self.model,
|
|
2033
|
+
"requested_tools": self.requested_tools,
|
|
1912
2034
|
"fs_scope": self.fs_scope,
|
|
1913
2035
|
# host_base is per-terminal by default; only persist if explicitly set by the user
|
|
1914
|
-
"save_chat_history": self.save_chat_history,
|
|
2036
|
+
"save_chat_history": self.save_chat_history,
|
|
1915
2037
|
"fs_host_mode": self.fs_host_mode,
|
|
1916
2038
|
"system_prompt": self.system_prompt,
|
|
1917
2039
|
"show_tool_calls": self.show_tool_calls,
|
|
@@ -1934,8 +2056,10 @@ class ChatCLI:
|
|
|
1934
2056
|
# retain provider-native tool results
|
|
1935
2057
|
"retain_native_tool_results": self.retain_native_tool_results,
|
|
1936
2058
|
# Anthropic thinking budget
|
|
1937
|
-
"thinking_budget_tokens": self.thinking_budget_tokens,
|
|
1938
|
-
# Anthropic
|
|
2059
|
+
"thinking_budget_tokens": self.thinking_budget_tokens,
|
|
2060
|
+
# Anthropic effort (Opus 4.6/4.5). Default: high.
|
|
2061
|
+
"anthropic_effort": getattr(self, "anthropic_effort", None),
|
|
2062
|
+
# Anthropic cache TTL preference
|
|
1939
2063
|
"anthropic_cache_ttl": self.anthropic_cache_ttl,
|
|
1940
2064
|
# web search
|
|
1941
2065
|
"web_search_enabled": self.web_search_enabled,
|
|
@@ -1946,10 +2070,10 @@ class ChatCLI:
|
|
|
1946
2070
|
"text_verbosity": self.text_verbosity,
|
|
1947
2071
|
"preambles_enabled": self.preambles_enabled,
|
|
1948
2072
|
"custom_first_turn_enabled": self.custom_first_turn_enabled,
|
|
1949
|
-
"custom_first_turn_text": self.custom_first_turn_text,
|
|
1950
|
-
"codex_prompt_enabled": self.codex_prompt_enabled,
|
|
1951
|
-
"codex_max_allow_all_tools": self.codex_max_allow_all_tools,
|
|
1952
|
-
}
|
|
2073
|
+
"custom_first_turn_text": self.custom_first_turn_text,
|
|
2074
|
+
"codex_prompt_enabled": self.codex_prompt_enabled,
|
|
2075
|
+
"codex_max_allow_all_tools": self.codex_max_allow_all_tools,
|
|
2076
|
+
}
|
|
1953
2077
|
try:
|
|
1954
2078
|
if not getattr(self, "_host_base_ephemeral", False) and self.host_base:
|
|
1955
2079
|
data["host_base"] = self.host_base
|
|
@@ -1957,15 +2081,15 @@ class ChatCLI:
|
|
|
1957
2081
|
pass
|
|
1958
2082
|
return data
|
|
1959
2083
|
|
|
1960
|
-
def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
|
|
1961
|
-
try:
|
|
1962
|
-
old_system_prompt = getattr(self, "system_prompt", None)
|
|
1963
|
-
self.model = data.get("model", self.model)
|
|
1964
|
-
if "save_chat_history" in data:
|
|
1965
|
-
try:
|
|
1966
|
-
self.save_chat_history = bool(data.get("save_chat_history"))
|
|
1967
|
-
except Exception:
|
|
1968
|
-
pass
|
|
2084
|
+
def _apply_settings_dict(self, data: Dict[str, Any]) -> None:
|
|
2085
|
+
try:
|
|
2086
|
+
old_system_prompt = getattr(self, "system_prompt", None)
|
|
2087
|
+
self.model = data.get("model", self.model)
|
|
2088
|
+
if "save_chat_history" in data:
|
|
2089
|
+
try:
|
|
2090
|
+
self.save_chat_history = bool(data.get("save_chat_history"))
|
|
2091
|
+
except Exception:
|
|
2092
|
+
pass
|
|
1969
2093
|
self.requested_tools = data.get("requested_tools", self.requested_tools)
|
|
1970
2094
|
self.fs_scope = data.get("fs_scope", self.fs_scope)
|
|
1971
2095
|
self.host_base = data.get("host_base", self.host_base)
|
|
@@ -2037,13 +2161,25 @@ class ChatCLI:
|
|
|
2037
2161
|
self.usage_info_mode = val
|
|
2038
2162
|
except Exception:
|
|
2039
2163
|
pass
|
|
2040
|
-
# Reasoning effort (default medium if missing/invalid)
|
|
2041
|
-
try:
|
|
2042
|
-
val = data.get("reasoning_effort")
|
|
2043
|
-
if isinstance(val, str) and val in ("low", "medium", "high", "xhigh"):
|
|
2044
|
-
self.reasoning_effort = val
|
|
2045
|
-
except Exception:
|
|
2046
|
-
pass
|
|
2164
|
+
# Reasoning effort (default medium if missing/invalid)
|
|
2165
|
+
try:
|
|
2166
|
+
val = data.get("reasoning_effort")
|
|
2167
|
+
if isinstance(val, str) and val in ("low", "medium", "high", "xhigh"):
|
|
2168
|
+
self.reasoning_effort = val
|
|
2169
|
+
except Exception:
|
|
2170
|
+
pass
|
|
2171
|
+
|
|
2172
|
+
# Anthropic effort (Opus 4.6/4.5). Default behavior equals high.
|
|
2173
|
+
try:
|
|
2174
|
+
ae = data.get("anthropic_effort")
|
|
2175
|
+
if isinstance(ae, str):
|
|
2176
|
+
ae2 = ae.strip().lower()
|
|
2177
|
+
if ae2 in ("low", "medium", "high", "max"):
|
|
2178
|
+
self.anthropic_effort = ae2
|
|
2179
|
+
elif ae in (None, "", "default"):
|
|
2180
|
+
self.anthropic_effort = "high"
|
|
2181
|
+
except Exception:
|
|
2182
|
+
self.anthropic_effort = "high"
|
|
2047
2183
|
# Text verbosity selector
|
|
2048
2184
|
try:
|
|
2049
2185
|
v = data.get("text_verbosity")
|
|
@@ -2052,21 +2188,21 @@ class ChatCLI:
|
|
|
2052
2188
|
except Exception:
|
|
2053
2189
|
pass
|
|
2054
2190
|
# Tool preambles toggle
|
|
2055
|
-
if "preambles_enabled" in data:
|
|
2056
|
-
try:
|
|
2057
|
-
self.preambles_enabled = bool(data.get("preambles_enabled"))
|
|
2058
|
-
except Exception:
|
|
2059
|
-
self.preambles_enabled = False
|
|
2060
|
-
if "codex_prompt_enabled" in data:
|
|
2061
|
-
try:
|
|
2062
|
-
self.codex_prompt_enabled = bool(data.get("codex_prompt_enabled"))
|
|
2063
|
-
except Exception:
|
|
2064
|
-
self.codex_prompt_enabled = True
|
|
2065
|
-
if "codex_max_allow_all_tools" in data:
|
|
2066
|
-
try:
|
|
2067
|
-
self.codex_max_allow_all_tools = bool(data.get("codex_max_allow_all_tools"))
|
|
2068
|
-
except Exception:
|
|
2069
|
-
self.codex_max_allow_all_tools = False
|
|
2191
|
+
if "preambles_enabled" in data:
|
|
2192
|
+
try:
|
|
2193
|
+
self.preambles_enabled = bool(data.get("preambles_enabled"))
|
|
2194
|
+
except Exception:
|
|
2195
|
+
self.preambles_enabled = False
|
|
2196
|
+
if "codex_prompt_enabled" in data:
|
|
2197
|
+
try:
|
|
2198
|
+
self.codex_prompt_enabled = bool(data.get("codex_prompt_enabled"))
|
|
2199
|
+
except Exception:
|
|
2200
|
+
self.codex_prompt_enabled = True
|
|
2201
|
+
if "codex_max_allow_all_tools" in data:
|
|
2202
|
+
try:
|
|
2203
|
+
self.codex_max_allow_all_tools = bool(data.get("codex_max_allow_all_tools"))
|
|
2204
|
+
except Exception:
|
|
2205
|
+
self.codex_max_allow_all_tools = False
|
|
2070
2206
|
# Custom first-turn injection
|
|
2071
2207
|
if "custom_first_turn_enabled" in data:
|
|
2072
2208
|
try:
|
|
@@ -2095,39 +2231,47 @@ class ChatCLI:
|
|
|
2095
2231
|
self.thinking_budget_tokens = None
|
|
2096
2232
|
except Exception:
|
|
2097
2233
|
pass
|
|
2098
|
-
# Anthropic cache TTL preference
|
|
2099
|
-
try:
|
|
2100
|
-
ttl = data.get("anthropic_cache_ttl")
|
|
2101
|
-
if isinstance(ttl, str) and ttl.strip() in ("5m", "1h"):
|
|
2102
|
-
self.anthropic_cache_ttl = ttl.strip()
|
|
2103
|
-
elif ttl in (None, "", "default"):
|
|
2104
|
-
self.anthropic_cache_ttl = None
|
|
2105
|
-
except Exception:
|
|
2106
|
-
pass
|
|
2107
|
-
# Rebuild history if system prompt changed
|
|
2108
|
-
try:
|
|
2109
|
-
system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
|
|
2110
|
-
except Exception:
|
|
2111
|
-
system_prompt_changed = False
|
|
2112
|
-
|
|
2113
|
-
if system_prompt_changed:
|
|
2114
|
-
# Changing the system prompt can materially alter the behavior of the assistant;
|
|
2115
|
-
# warn the user and reset the current conversation history to avoid mixing contexts.
|
|
2116
|
-
try:
|
|
2117
|
-
self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
|
|
2118
|
-
except Exception:
|
|
2119
|
-
pass
|
|
2120
|
-
self.history = []
|
|
2121
|
-
if self.system_prompt:
|
|
2122
|
-
self.history.append({"role": "system", "content": self.system_prompt})
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2234
|
+
# Anthropic cache TTL preference
|
|
2235
|
+
try:
|
|
2236
|
+
ttl = data.get("anthropic_cache_ttl")
|
|
2237
|
+
if isinstance(ttl, str) and ttl.strip() in ("5m", "1h"):
|
|
2238
|
+
self.anthropic_cache_ttl = ttl.strip()
|
|
2239
|
+
elif ttl in (None, "", "default"):
|
|
2240
|
+
self.anthropic_cache_ttl = None
|
|
2241
|
+
except Exception:
|
|
2242
|
+
pass
|
|
2243
|
+
# Rebuild history if system prompt changed
|
|
2244
|
+
try:
|
|
2245
|
+
system_prompt_changed = old_system_prompt != getattr(self, "system_prompt", None)
|
|
2246
|
+
except Exception:
|
|
2247
|
+
system_prompt_changed = False
|
|
2248
|
+
|
|
2249
|
+
if system_prompt_changed:
|
|
2250
|
+
# Changing the system prompt can materially alter the behavior of the assistant;
|
|
2251
|
+
# warn the user and reset the current conversation history to avoid mixing contexts.
|
|
2252
|
+
try:
|
|
2253
|
+
self.ui.warn("[settings] System prompt changed - clearing current conversation history.")
|
|
2254
|
+
except Exception:
|
|
2255
|
+
pass
|
|
2256
|
+
self.history = []
|
|
2257
|
+
if self.system_prompt:
|
|
2258
|
+
self.history.append({"role": "system", "content": self.system_prompt})
|
|
2259
|
+
# OpenAI threaded state is invalid once the system prompt changes.
|
|
2260
|
+
try:
|
|
2261
|
+
self._openai_previous_response_id = None
|
|
2262
|
+
self._openai_response_id_history = []
|
|
2263
|
+
self._openai_input_items = []
|
|
2264
|
+
self._openai_last_sent_input_items = None
|
|
2265
|
+
except Exception:
|
|
2266
|
+
pass
|
|
2267
|
+
# On settings load, do not assume the custom first-turn was injected yet
|
|
2268
|
+
try:
|
|
2269
|
+
self._did_inject_custom_first_turn = False
|
|
2270
|
+
except Exception:
|
|
2271
|
+
pass
|
|
2272
|
+
self._apply_model_side_effects()
|
|
2273
|
+
except Exception as e:
|
|
2274
|
+
self.ui.warn(f"Failed to apply settings: {e}")
|
|
2131
2275
|
|
|
2132
2276
|
async def _fetch_server_settings(self) -> Optional[Dict[str, Any]]:
|
|
2133
2277
|
try:
|
|
@@ -2329,7 +2473,7 @@ class ChatCLI:
|
|
|
2329
2473
|
parts = [
|
|
2330
2474
|
f"Server: {self.server}",
|
|
2331
2475
|
f"Model: {self.model or '(server default)'}",
|
|
2332
|
-
f"Tools: {self._tools_label()}",
|
|
2476
|
+
f"Tools: {self._tools_label()}",
|
|
2333
2477
|
f"History: {'ON' if self.save_chat_history else 'OFF'}",
|
|
2334
2478
|
f"Scope: {self._fs_label()}",
|
|
2335
2479
|
f"Agent scope: {self.host_base or '(none)'}",
|
|
@@ -2480,12 +2624,12 @@ class ChatCLI:
|
|
|
2480
2624
|
def _build_commands_catalog(self) -> List[Dict[str, str]]:
|
|
2481
2625
|
cmds = [
|
|
2482
2626
|
{"name": "/settings", "usage": "/settings", "desc": "Open settings menu"},
|
|
2483
|
-
{"name": "/configure", "usage": "/configure", "desc": "Run configuration wizard now"},
|
|
2627
|
+
{"name": "/configure", "usage": "/configure", "desc": "Run configuration wizard now"},
|
|
2484
2628
|
{"name": "/history", "usage": "/history on|off", "desc": "Toggle saving chat history to unified memory"},
|
|
2485
2629
|
{"name": "/infomode", "usage": "/infomode concise|verbose", "desc": "Set Usage & Info panel mode"},
|
|
2486
2630
|
{"name": "/tools", "usage": "/tools on|off|default", "desc": "Toggle per-request tools"},
|
|
2487
2631
|
{"name": "/websearch", "usage": "/websearch on|off|domains|sources|location", "desc": "Configure OpenAI web search"},
|
|
2488
|
-
{"name": "/reasoning", "usage": "/reasoning low|medium|high|xhigh", "desc": "Set OpenAI reasoning effort (default: medium; xhigh supported on gpt-5.2*)"},
|
|
2632
|
+
{"name": "/reasoning", "usage": "/reasoning low|medium|high|xhigh", "desc": "Set OpenAI reasoning effort (default: medium; xhigh supported on gpt-5.2*)"},
|
|
2489
2633
|
{"name": "/thinkingbudget", "usage": "/thinkingbudget <tokens>|default", "desc": "Set Anthropic thinking budget tokens for -thinking models"},
|
|
2490
2634
|
{"name": "/fs", "usage": "/fs workspace|host|default", "desc": "Set filesystem scope"},
|
|
2491
2635
|
{"name": "/agent-scope", "usage": "/agent-scope <absolute path>", "desc": "Alias for /hostbase (set Agent scope)"},
|
|
@@ -2505,31 +2649,28 @@ class ChatCLI:
|
|
|
2505
2649
|
]
|
|
2506
2650
|
return cmds
|
|
2507
2651
|
|
|
2508
|
-
def _model_presets(self) -> List[Tuple[str, str]]:
|
|
2509
|
-
"""Shared list of (model, label) used by settings UI and /model menu."""
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
("gpt-5
|
|
2515
|
-
("
|
|
2516
|
-
("
|
|
2517
|
-
("
|
|
2518
|
-
("
|
|
2519
|
-
("
|
|
2520
|
-
("kimi-k2
|
|
2521
|
-
("gemini-2.5-pro", "Gemini: gemini-2.5-pro"),
|
|
2522
|
-
("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
|
|
2523
|
-
("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
|
|
2524
|
-
("grok-4-1-fast-reasoning", "xAI: grok-4-1-fast-reasoning"),
|
|
2525
|
-
("grok-4-1-fast-non-reasoning", "xAI: grok-4-1-fast-non-reasoning"),
|
|
2526
|
-
("grok-4", "xAI: grok-4"),
|
|
2652
|
+
def _model_presets(self) -> List[Tuple[str, str]]:
|
|
2653
|
+
"""Shared list of (model, label) used by settings UI and /model menu."""
|
|
2654
|
+
# Ordered in "feelings" order (Recommended first, then Others).
|
|
2655
|
+
# NOTE: We intentionally do not include a "server default" or "custom" option here.
|
|
2656
|
+
return [
|
|
2657
|
+
# Recommended
|
|
2658
|
+
("gpt-5.2", "OpenAI: gpt-5.2"),
|
|
2659
|
+
("gpt-5.2-codex", "OpenAI: gpt-5.2-codex"),
|
|
2660
|
+
("gpt-5", "OpenAI: gpt-5"),
|
|
2661
|
+
("gemini-3-pro-preview", "Gemini: gemini-3-pro-preview"),
|
|
2662
|
+
("gemini-3-flash-preview", "Gemini: gemini-3-flash-preview"),
|
|
2663
|
+
("claude-opus-4-6", "Anthropic: claude-opus-4-6 (adaptive thinking supported)"),
|
|
2664
|
+
("kimi-k2.5", "Kimi: kimi-k2.5"),
|
|
2527
2665
|
("grok-code-fast-1", "xAI: grok-code-fast-1"),
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
("
|
|
2531
|
-
("
|
|
2532
|
-
("
|
|
2666
|
+
|
|
2667
|
+
# Others
|
|
2668
|
+
("gpt-5.2-pro", "OpenAI: gpt-5.2-pro (streaming, very expensive)"),
|
|
2669
|
+
("gpt-5-codex", "OpenAI: gpt-5-codex"),
|
|
2670
|
+
("codex-mini-latest", "OpenAI: codex-mini-latest (fast reasoning)"),
|
|
2671
|
+
("deepseek-reasoner-3.2", "DeepSeek: deepseek-reasoner 3.2"),
|
|
2672
|
+
("deepseek-chat-3.2", "DeepSeek: deepseek-chat 3.2"),
|
|
2673
|
+
("glm-4.7", "GLM: glm-4.7"),
|
|
2533
2674
|
]
|
|
2534
2675
|
|
|
2535
2676
|
async def open_settings(self, focus: Optional[str] = None) -> None:
|
|
@@ -2575,7 +2716,8 @@ class ChatCLI:
|
|
|
2575
2716
|
"usage_info_mode": "verbose",
|
|
2576
2717
|
"reasoning_effort": "medium",
|
|
2577
2718
|
"retain_native_tool_results": False,
|
|
2578
|
-
"thinking_budget_tokens": None,
|
|
2719
|
+
"thinking_budget_tokens": None,
|
|
2720
|
+
"anthropic_effort": "high",
|
|
2579
2721
|
"anthropic_cache_ttl": None,
|
|
2580
2722
|
"web_search_enabled": False,
|
|
2581
2723
|
"web_search_allowed_domains": [],
|
|
@@ -2586,87 +2728,87 @@ class ChatCLI:
|
|
|
2586
2728
|
|
|
2587
2729
|
# Model presets list (shared)
|
|
2588
2730
|
model_presets: List[Tuple[str, str]] = self._model_presets()
|
|
2589
|
-
|
|
2590
|
-
#
|
|
2591
|
-
#
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
"
|
|
2595
|
-
"
|
|
2596
|
-
"gemini-3-pro-preview",
|
|
2597
|
-
"gemini-3-flash-preview",
|
|
2598
|
-
"
|
|
2599
|
-
"
|
|
2600
|
-
"
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
# Build
|
|
2608
|
-
|
|
2609
|
-
render_map: Dict[Any, str] = {
|
|
2610
|
-
for m, lbl in rec_list:
|
|
2611
|
-
|
|
2612
|
-
render_map[m] = lbl
|
|
2613
|
-
else:
|
|
2614
|
-
render_map[m] = f"{lbl} (recommended)"
|
|
2731
|
+
|
|
2732
|
+
# Reorder with a Recommended section at the top.
|
|
2733
|
+
# IMPORTANT: remove "server default" and "custom" from Settings UI.
|
|
2734
|
+
rec_keys_ordered = [
|
|
2735
|
+
"gpt-5.2",
|
|
2736
|
+
"gpt-5.2-codex",
|
|
2737
|
+
"gpt-5",
|
|
2738
|
+
"gemini-3-pro-preview",
|
|
2739
|
+
"gemini-3-flash-preview",
|
|
2740
|
+
"claude-opus-4-6",
|
|
2741
|
+
"kimi-k2.5",
|
|
2742
|
+
"grok-code-fast-1",
|
|
2743
|
+
]
|
|
2744
|
+
rec_set = set(rec_keys_ordered)
|
|
2745
|
+
preset_map = {m: lbl for (m, lbl) in model_presets}
|
|
2746
|
+
rec_list: List[Tuple[str, str]] = [(m, preset_map[m]) for m in rec_keys_ordered if m in preset_map]
|
|
2747
|
+
other_list: List[Tuple[str, str]] = [(m, lbl) for (m, lbl) in model_presets if m not in rec_set]
|
|
2748
|
+
|
|
2749
|
+
# Build enum options in the order: Recommended, Others
|
|
2750
|
+
model_enum_options: List[Optional[str]] = [m for (m, _l) in rec_list] + [m for (m, _l) in other_list]
|
|
2751
|
+
render_map: Dict[Any, str] = {}
|
|
2752
|
+
for m, lbl in rec_list:
|
|
2753
|
+
render_map[m] = lbl
|
|
2615
2754
|
for m, lbl in other_list:
|
|
2616
2755
|
render_map[m] = lbl
|
|
2617
|
-
render_map["custom"] = "Custom..."
|
|
2618
2756
|
|
|
2619
2757
|
# Build items schema
|
|
2620
|
-
items: List[Dict[str, Any]] = [
|
|
2621
|
-
{"label": "General", "type": "group", "items": [
|
|
2622
|
-
{
|
|
2623
|
-
"id": "save_chat_history",
|
|
2624
|
-
"label": "Save to unified memory",
|
|
2625
|
-
"type": "bool",
|
|
2626
|
-
"description": "When ON, chats sync to your account and appear in the web portal. When OFF, chats are ephemeral (local only)."
|
|
2627
|
-
},
|
|
2628
|
-
{
|
|
2629
|
-
"id": "model",
|
|
2630
|
-
"label": "Model",
|
|
2631
|
-
"type": "enum",
|
|
2632
|
-
"options": model_enum_options,
|
|
2633
|
-
"render": render_map,
|
|
2634
|
-
},
|
|
2635
|
-
{"id": "system_prompt", "label": "System prompt", "type": "multiline"},
|
|
2636
|
-
{"id": "usage_info_mode", "label": "Usage panel", "type": "enum", "options": ["concise", "verbose"], "render": {"concise": "Concise", "verbose": "Verbose"}},
|
|
2637
|
-
{"id": "text_verbosity", "label": "Text verbosity", "type": "enum", "options": ["low", "medium", "high"], "render": {"low": "Low", "medium": "Medium", "high": "High"}},
|
|
2638
|
-
]},
|
|
2639
|
-
{"label": "Tools & Security", "type": "group", "items": [
|
|
2640
|
-
{
|
|
2641
|
-
"id": "requested_tools",
|
|
2642
|
-
"label": "Tools",
|
|
2643
|
-
"type": "enum",
|
|
2644
|
-
|
|
2645
|
-
"
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
"
|
|
2650
|
-
"
|
|
2651
|
-
"
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
{"id": "
|
|
2661
|
-
{"id": "
|
|
2662
|
-
{"id": "
|
|
2663
|
-
|
|
2664
|
-
{"id": "
|
|
2758
|
+
items: List[Dict[str, Any]] = [
|
|
2759
|
+
{"label": "General", "type": "group", "items": [
|
|
2760
|
+
{
|
|
2761
|
+
"id": "save_chat_history",
|
|
2762
|
+
"label": "Save to unified memory",
|
|
2763
|
+
"type": "bool",
|
|
2764
|
+
"description": "When ON, chats sync to your account and appear in the web portal. When OFF, chats are ephemeral (local only)."
|
|
2765
|
+
},
|
|
2766
|
+
{
|
|
2767
|
+
"id": "model",
|
|
2768
|
+
"label": "Model",
|
|
2769
|
+
"type": "enum",
|
|
2770
|
+
"options": model_enum_options,
|
|
2771
|
+
"render": render_map,
|
|
2772
|
+
},
|
|
2773
|
+
{"id": "system_prompt", "label": "System prompt", "type": "multiline"},
|
|
2774
|
+
{"id": "usage_info_mode", "label": "Usage panel", "type": "enum", "options": ["concise", "verbose"], "render": {"concise": "Concise", "verbose": "Verbose"}},
|
|
2775
|
+
{"id": "text_verbosity", "label": "Text verbosity", "type": "enum", "options": ["low", "medium", "high"], "render": {"low": "Low", "medium": "Medium", "high": "High"}},
|
|
2776
|
+
]},
|
|
2777
|
+
{"label": "Tools & Security", "type": "group", "items": [
|
|
2778
|
+
{
|
|
2779
|
+
"id": "requested_tools",
|
|
2780
|
+
"label": "Tools",
|
|
2781
|
+
"type": "enum",
|
|
2782
|
+
# Default-first: ON, then OFF, then server default.
|
|
2783
|
+
"options": [True, False, None],
|
|
2784
|
+
"render": {None: "Server default", True: "ON", False: "OFF"},
|
|
2785
|
+
},
|
|
2786
|
+
{
|
|
2787
|
+
"id": "control_level",
|
|
2788
|
+
"label": "Control level",
|
|
2789
|
+
"type": "enum",
|
|
2790
|
+
# Default-first: Level 3, then 2, then 1, then server default.
|
|
2791
|
+
"options": [3, 2, 1, None],
|
|
2792
|
+
"render": {None: "Server default", 1: "1 (read)", 2: "2 (approval)", 3: "3 (full)"},
|
|
2793
|
+
},
|
|
2794
|
+
{"id": "auto_approve", "label": "Auto-approve tools (comma)", "type": "text"},
|
|
2795
|
+
{"id": "show_tool_calls", "label": "Show tool call logs", "type": "bool"},
|
|
2796
|
+
# Note: options are static for this Settings UI session, so include xhigh unconditionally.
|
|
2797
|
+
# The server will safely downgrade xhigh on models that don't support it.
|
|
2798
|
+
{"id": "reasoning_effort", "label": "OpenAI reasoning effort", "type": "enum", "options": ["low", "medium", "high", "xhigh"], "render": {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh (gpt-5.2* / Codex Max; otherwise downgrades)"}},
|
|
2799
|
+
{"id": "codex_max_allow_all_tools", "label": "Codex Max: allow ALL tools", "type": "bool"},
|
|
2800
|
+
{"id": "retain_native_tool_results", "label": "Retain provider-native tool results across turns", "type": "bool"},
|
|
2801
|
+
{"id": "thinking_budget_tokens", "label": "Anthropic thinking budget (tokens)", "type": "int"},
|
|
2802
|
+
{"id": "anthropic_effort", "label": "Anthropic effort (Opus 4.6/4.5)", "type": "enum", "options": ["low", "medium", "high", "max"], "render": {"low": "Low", "medium": "Medium", "high": "High (default)", "max": "Max (Opus 4.6 only)"}},
|
|
2803
|
+
{"id": "anthropic_cache_ttl", "label": "Anthropic prompt cache TTL", "type": "enum", "options": [None, "5m", "1h"], "render": {None: "Server default (5m)", "5m": "5 minutes (lower write cost)", "1h": "1 hour (higher write cost)"}},
|
|
2804
|
+
# Agent scope & filesystem controls
|
|
2805
|
+
{"id": "host_base", "label": "Agent scope directory", "type": "text"},
|
|
2665
2806
|
{
|
|
2666
2807
|
"id": "fs_scope",
|
|
2667
2808
|
"label": "Filesystem scope",
|
|
2668
2809
|
"type": "enum",
|
|
2669
|
-
|
|
2810
|
+
# Default-first: host (Agent scope), then workspace, then server default.
|
|
2811
|
+
"options": ["host", "workspace", None],
|
|
2670
2812
|
"render": {
|
|
2671
2813
|
None: "Server default",
|
|
2672
2814
|
"workspace": "Workspace (sandbox)",
|
|
@@ -2677,7 +2819,8 @@ class ChatCLI:
|
|
|
2677
2819
|
"id": "fs_host_mode",
|
|
2678
2820
|
"label": "Host mode",
|
|
2679
2821
|
"type": "enum",
|
|
2680
|
-
|
|
2822
|
+
# Default-first: custom (use Agent scope), then cwd, then any, then server default.
|
|
2823
|
+
"options": ["custom", "cwd", "any", None],
|
|
2681
2824
|
"render": {
|
|
2682
2825
|
None: "Server default / any",
|
|
2683
2826
|
"any": "any (no extra client restriction)",
|
|
@@ -2689,12 +2832,19 @@ class ChatCLI:
|
|
|
2689
2832
|
{"label": "Code Map", "type": "group", "items": [
|
|
2690
2833
|
{"id": "inject_codebase_map", "label": "Inject codebase map on first turn", "type": "bool"},
|
|
2691
2834
|
]},
|
|
2692
|
-
{"label": "Preambles & First-turn", "type": "group", "items": [
|
|
2693
|
-
{
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2835
|
+
{"label": "Preambles & First-turn", "type": "group", "items": [
|
|
2836
|
+
{
|
|
2837
|
+
"id": "preambles_enabled",
|
|
2838
|
+
"label": "Enable tool call preambles (GPT-5 only)",
|
|
2839
|
+
"type": "bool",
|
|
2840
|
+
# Only show this control when the *currently selected* model supports it.
|
|
2841
|
+
# (This updates live as the Model picker changes.)
|
|
2842
|
+
"visible_if": (lambda w: self._supports_preambles((w or {}).get("model"))),
|
|
2843
|
+
},
|
|
2844
|
+
{"id": "custom_first_turn_enabled", "label": "Enable custom first-turn injection", "type": "bool"},
|
|
2845
|
+
{"id": "custom_first_turn_text", "label": "Custom first-turn text", "type": "multiline"},
|
|
2846
|
+
{"id": "codex_prompt_enabled", "label": "Inject Codex developer system prompt (Codex models only)", "type": "bool"},
|
|
2847
|
+
]},
|
|
2698
2848
|
{"label": "Web search", "type": "group", "items": [
|
|
2699
2849
|
{"id": "web_search_enabled", "label": "Enable web search (OpenAI)", "type": "bool"},
|
|
2700
2850
|
{"id": "web_search_allowed_domains", "label": "Allowed domains (comma)", "type": "text"},
|
|
@@ -2703,6 +2853,20 @@ class ChatCLI:
|
|
|
2703
2853
|
]},
|
|
2704
2854
|
]
|
|
2705
2855
|
|
|
2856
|
+
# Wizard parity: only surface "Low" text verbosity when a GPT model is selected.
|
|
2857
|
+
try:
|
|
2858
|
+
if not self._is_gpt_model(self.model):
|
|
2859
|
+
for g in items:
|
|
2860
|
+
if not isinstance(g, dict):
|
|
2861
|
+
continue
|
|
2862
|
+
if (g.get("type") == "group") and (g.get("label") == "General"):
|
|
2863
|
+
for row in (g.get("items") or []):
|
|
2864
|
+
if isinstance(row, dict) and row.get("id") == "text_verbosity":
|
|
2865
|
+
row["options"] = ["medium", "high"]
|
|
2866
|
+
row["render"] = {"medium": "Medium", "high": "High"}
|
|
2867
|
+
except Exception:
|
|
2868
|
+
pass
|
|
2869
|
+
|
|
2706
2870
|
# Prepare initial values with enum placeholder for model when custom text set
|
|
2707
2871
|
init_for_ui = dict(initial)
|
|
2708
2872
|
if isinstance(init_for_ui.get("model"), str) and init_for_ui["model"] not in [m for m, _ in model_presets]:
|
|
@@ -2714,10 +2878,10 @@ class ChatCLI:
|
|
|
2714
2878
|
try:
|
|
2715
2879
|
if rid == "model":
|
|
2716
2880
|
if value == "custom":
|
|
2717
|
-
typed = self.ui.prompt(
|
|
2718
|
-
"Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
|
|
2719
|
-
default=self.model or "",
|
|
2720
|
-
)
|
|
2881
|
+
typed = self.ui.prompt(
|
|
2882
|
+
"Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
|
|
2883
|
+
default=self.model or "",
|
|
2884
|
+
)
|
|
2721
2885
|
working["model"] = typed.strip() or None
|
|
2722
2886
|
self._apply_model_side_effects()
|
|
2723
2887
|
elif rid == "text_verbosity" and isinstance(value, str):
|
|
@@ -2742,14 +2906,14 @@ class ChatCLI:
|
|
|
2742
2906
|
if k.strip() and v.strip():
|
|
2743
2907
|
kv[k.strip()] = v.strip()
|
|
2744
2908
|
working[rid] = kv
|
|
2745
|
-
elif rid == "auto_approve" and isinstance(value, str):
|
|
2746
|
-
working[rid] = [t.strip() for t in value.split(",") if t.strip()]
|
|
2747
|
-
elif rid == "anthropic_cache_ttl":
|
|
2748
|
-
if value in ("5m", "1h"):
|
|
2749
|
-
working[rid] = value
|
|
2750
|
-
else:
|
|
2751
|
-
working[rid] = None
|
|
2752
|
-
self._apply_settings_dict({rid: working.get(rid)})
|
|
2909
|
+
elif rid == "auto_approve" and isinstance(value, str):
|
|
2910
|
+
working[rid] = [t.strip() for t in value.split(",") if t.strip()]
|
|
2911
|
+
elif rid == "anthropic_cache_ttl":
|
|
2912
|
+
if value in ("5m", "1h"):
|
|
2913
|
+
working[rid] = value
|
|
2914
|
+
else:
|
|
2915
|
+
working[rid] = None
|
|
2916
|
+
self._apply_settings_dict({rid: working.get(rid)})
|
|
2753
2917
|
if rid == "host_base":
|
|
2754
2918
|
try:
|
|
2755
2919
|
self._host_base_ephemeral = False
|
|
@@ -3057,9 +3221,9 @@ class ChatCLI:
|
|
|
3057
3221
|
"Control Level",
|
|
3058
3222
|
"Choose control level (1=read-only, 2=approval on write/exec, 3=unrestricted within sandbox):",
|
|
3059
3223
|
[
|
|
3060
|
-
("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
|
|
3061
|
-
("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
|
|
3062
3224
|
("3", "Level 3: Full Access - No approvals needed, all tools unrestricted"),
|
|
3225
|
+
("2", "Level 2: Approval Required - Write/edit/exec tools require user approval"),
|
|
3226
|
+
("1", "Level 1: Read-Only - Only read_file and list_dir available, no writes or executions"),
|
|
3063
3227
|
("default", "Server Default - Use server's CONTROL_LEVEL_DEFAULT setting"),
|
|
3064
3228
|
],
|
|
3065
3229
|
)
|
|
@@ -3142,14 +3306,14 @@ class ChatCLI:
|
|
|
3142
3306
|
except Exception:
|
|
3143
3307
|
pass
|
|
3144
3308
|
|
|
3145
|
-
# 3) Tool usage preamble (UX hint)
|
|
3146
|
-
try:
|
|
3147
|
-
if bool(getattr(self, "preambles_enabled", False)) and
|
|
3148
|
-
blocks.append(
|
|
3149
|
-
"Tool usage: when you need to read or modify files or run commands, "
|
|
3150
|
-
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
3151
|
-
"advances the user's goal before calling the tool."
|
|
3152
|
-
)
|
|
3309
|
+
# 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
3310
|
+
try:
|
|
3311
|
+
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
3312
|
+
blocks.append(
|
|
3313
|
+
"Tool usage: when you need to read or modify files or run commands, "
|
|
3314
|
+
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
3315
|
+
"advances the user's goal before calling the tool."
|
|
3316
|
+
)
|
|
3153
3317
|
except Exception:
|
|
3154
3318
|
pass
|
|
3155
3319
|
|
|
@@ -3171,15 +3335,15 @@ class ChatCLI:
|
|
|
3171
3335
|
|
|
3172
3336
|
def _build_messages(self, user_input: str) -> List[Dict[str, str]]:
|
|
3173
3337
|
msgs: List[Dict[str, str]] = []
|
|
3174
|
-
# Inject a concise Codex developer system prompt for Codex models (optional)
|
|
3175
|
-
try:
|
|
3176
|
-
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
3177
|
-
msgs.append({"role": "system", "content": self._codex_system_prompt()})
|
|
3178
|
-
except Exception:
|
|
3179
|
-
pass
|
|
3180
|
-
# Always send the system prompt as-is (do NOT inject the code map here)
|
|
3181
|
-
if self.system_prompt:
|
|
3182
|
-
msgs.append({"role": "system", "content": self.system_prompt})
|
|
3338
|
+
# Inject a concise Codex developer system prompt for Codex models (optional)
|
|
3339
|
+
try:
|
|
3340
|
+
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
3341
|
+
msgs.append({"role": "system", "content": self._codex_system_prompt()})
|
|
3342
|
+
except Exception:
|
|
3343
|
+
pass
|
|
3344
|
+
# Always send the system prompt as-is (do NOT inject the code map here)
|
|
3345
|
+
if self.system_prompt:
|
|
3346
|
+
msgs.append({"role": "system", "content": self.system_prompt})
|
|
3183
3347
|
|
|
3184
3348
|
# Replay prior conversation (excluding any system message already added)
|
|
3185
3349
|
for msg in self.history:
|
|
@@ -3198,80 +3362,80 @@ class ChatCLI:
|
|
|
3198
3362
|
except Exception:
|
|
3199
3363
|
self._last_built_user_content = user_input
|
|
3200
3364
|
|
|
3201
|
-
msgs.append({"role": "user", "content": content})
|
|
3202
|
-
return msgs
|
|
3203
|
-
|
|
3204
|
-
def _codex_system_prompt(self) -> str:
|
|
3205
|
-
"""Minimal developer system prompt for GPT-5 Codex family."""
|
|
3206
|
-
return (
|
|
3207
|
-
"You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.\n"
|
|
3208
|
-
"Note: In this CLI, the terminal tool is named 'run_command' (not 'shell'). Use run_command and always set the 'cwd' parameter; avoid using 'cd'.\n\n"
|
|
3209
|
-
"## General\n"
|
|
3210
|
-
"- Always set the 'cwd' param when using run_command. Do not use 'cd' unless absolutely necessary.\n"
|
|
3211
|
-
"- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)\n\n"
|
|
3212
|
-
"## Editing constraints\n"
|
|
3213
|
-
"- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.\n"
|
|
3214
|
-
"- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like \"Assigns the value to the variable\", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.\n"
|
|
3215
|
-
"- You may be in a dirty git worktree.\n"
|
|
3216
|
-
" * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.\n"
|
|
3217
|
-
" * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.\n"
|
|
3218
|
-
" * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.\n"
|
|
3219
|
-
" * If the changes are in unrelated files, just ignore them and don't revert them.\n"
|
|
3220
|
-
"- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.\n\n"
|
|
3221
|
-
"## Plan tool\n"
|
|
3222
|
-
"When using the planning tool:\n"
|
|
3223
|
-
"- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).\n"
|
|
3224
|
-
"- Do not make single-step plans.\n"
|
|
3225
|
-
"- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.\n\n"
|
|
3226
|
-
"## Codex CLI harness, sandboxing, and approvals\n"
|
|
3227
|
-
"The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.\n"
|
|
3228
|
-
"Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:\n"
|
|
3229
|
-
"- read-only: The sandbox only permits reading files.\n"
|
|
3230
|
-
"- workspace-write: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.\n"
|
|
3231
|
-
"- danger-full-access: No filesystem sandboxing - all commands are permitted.\n"
|
|
3232
|
-
"Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:\n"
|
|
3233
|
-
"- restricted: Requires approval\n"
|
|
3234
|
-
"- enabled: No approval needed\n"
|
|
3235
|
-
"Approvals are your mechanism to get user consent to run shell/terminal commands without the sandbox. Possible configuration options for `approval_policy` are\n"
|
|
3236
|
-
"- untrusted: The harness will escalate most commands for user approval, apart from a limited allowlist of safe 'read' commands.\n"
|
|
3237
|
-
"- on-failure: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.\n"
|
|
3238
|
-
"- on-request: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing.\n"
|
|
3239
|
-
"- never: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user.\n\n"
|
|
3240
|
-
"When requesting approval to execute a command that will require escalated privileges:\n"
|
|
3241
|
-
" - Provide the `with_escalated_permissions` parameter with the boolean value true (when available).\n"
|
|
3242
|
-
" - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter.\n\n"
|
|
3243
|
-
"## Special user requests\n"
|
|
3244
|
-
"- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.\n"
|
|
3245
|
-
"- If the user asks for a 'review', default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.\n\n"
|
|
3246
|
-
"## Presenting your work and final message\n"
|
|
3247
|
-
"You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.\n"
|
|
3248
|
-
"- Default: be very concise; friendly coding teammate tone.\n"
|
|
3249
|
-
"- Ask only when needed; suggest ideas; mirror the user's style.\n"
|
|
3250
|
-
"- For substantial work, summarize clearly; follow final-answer formatting.\n"
|
|
3251
|
-
"- Skip heavy formatting for simple confirmations.\n"
|
|
3252
|
-
"- Don't dump large files you've written; reference paths only.\n"
|
|
3253
|
-
"- No 'save/copy this file' - User is on the same machine.\n"
|
|
3254
|
-
"- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.\n"
|
|
3255
|
-
"- For code changes:\n"
|
|
3256
|
-
" * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with 'summary', just jump right in.\n"
|
|
3257
|
-
" * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.\n"
|
|
3258
|
-
"- File References: When referencing files in your response, include the relevant start line and follow these rules: use inline code for paths; each reference should have a standalone path; accepted: absolute, workspace-relative, a/ or b/ diff prefixes, or bare filename/suffix; optional line/column uses :line[:column] or #LlineCcolumn; do not use URIs; do not provide line ranges.\n\n"
|
|
3259
|
-
"Apply Patch\n"
|
|
3260
|
-
"As shared previously in the GPT-5 prompting guide, use apply_patch for file edits to match the training distribution.\n\n"
|
|
3261
|
-
"Preambles\n"
|
|
3262
|
-
"GPT-5-Codex does not emit preambles. Do not ask for them.\n\n"
|
|
3263
|
-
"Frontend Guidance\n"
|
|
3264
|
-
"Use the following libraries unless the user or repo specifies otherwise:\n"
|
|
3265
|
-
"Framework: React + TypeScript\n"
|
|
3266
|
-
"Styling: Tailwind CSS\n"
|
|
3267
|
-
"Components: shadcn/ui\n"
|
|
3268
|
-
"Icons: lucide-react\n"
|
|
3269
|
-
"Animation: Framer Motion\n"
|
|
3270
|
-
"Charts: Recharts\n"
|
|
3271
|
-
"Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
|
|
3272
|
-
)
|
|
3273
|
-
|
|
3274
|
-
def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
|
|
3365
|
+
msgs.append({"role": "user", "content": content})
|
|
3366
|
+
return msgs
|
|
3367
|
+
|
|
3368
|
+
def _codex_system_prompt(self) -> str:
|
|
3369
|
+
"""Minimal developer system prompt for GPT-5 Codex family."""
|
|
3370
|
+
return (
|
|
3371
|
+
"You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.\n"
|
|
3372
|
+
"Note: In this CLI, the terminal tool is named 'run_command' (not 'shell'). Use run_command and always set the 'cwd' parameter; avoid using 'cd'.\n\n"
|
|
3373
|
+
"## General\n"
|
|
3374
|
+
"- Always set the 'cwd' param when using run_command. Do not use 'cd' unless absolutely necessary.\n"
|
|
3375
|
+
"- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)\n\n"
|
|
3376
|
+
"## Editing constraints\n"
|
|
3377
|
+
"- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.\n"
|
|
3378
|
+
"- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like \"Assigns the value to the variable\", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.\n"
|
|
3379
|
+
"- You may be in a dirty git worktree.\n"
|
|
3380
|
+
" * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.\n"
|
|
3381
|
+
" * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.\n"
|
|
3382
|
+
" * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.\n"
|
|
3383
|
+
" * If the changes are in unrelated files, just ignore them and don't revert them.\n"
|
|
3384
|
+
"- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.\n\n"
|
|
3385
|
+
"## Plan tool\n"
|
|
3386
|
+
"When using the planning tool:\n"
|
|
3387
|
+
"- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).\n"
|
|
3388
|
+
"- Do not make single-step plans.\n"
|
|
3389
|
+
"- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.\n\n"
|
|
3390
|
+
"## Codex CLI harness, sandboxing, and approvals\n"
|
|
3391
|
+
"The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.\n"
|
|
3392
|
+
"Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:\n"
|
|
3393
|
+
"- read-only: The sandbox only permits reading files.\n"
|
|
3394
|
+
"- workspace-write: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.\n"
|
|
3395
|
+
"- danger-full-access: No filesystem sandboxing - all commands are permitted.\n"
|
|
3396
|
+
"Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:\n"
|
|
3397
|
+
"- restricted: Requires approval\n"
|
|
3398
|
+
"- enabled: No approval needed\n"
|
|
3399
|
+
"Approvals are your mechanism to get user consent to run shell/terminal commands without the sandbox. Possible configuration options for `approval_policy` are\n"
|
|
3400
|
+
"- untrusted: The harness will escalate most commands for user approval, apart from a limited allowlist of safe 'read' commands.\n"
|
|
3401
|
+
"- on-failure: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.\n"
|
|
3402
|
+
"- on-request: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing.\n"
|
|
3403
|
+
"- never: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user.\n\n"
|
|
3404
|
+
"When requesting approval to execute a command that will require escalated privileges:\n"
|
|
3405
|
+
" - Provide the `with_escalated_permissions` parameter with the boolean value true (when available).\n"
|
|
3406
|
+
" - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter.\n\n"
|
|
3407
|
+
"## Special user requests\n"
|
|
3408
|
+
"- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.\n"
|
|
3409
|
+
"- If the user asks for a 'review', default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.\n\n"
|
|
3410
|
+
"## Presenting your work and final message\n"
|
|
3411
|
+
"You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.\n"
|
|
3412
|
+
"- Default: be very concise; friendly coding teammate tone.\n"
|
|
3413
|
+
"- Ask only when needed; suggest ideas; mirror the user's style.\n"
|
|
3414
|
+
"- For substantial work, summarize clearly; follow final-answer formatting.\n"
|
|
3415
|
+
"- Skip heavy formatting for simple confirmations.\n"
|
|
3416
|
+
"- Don't dump large files you've written; reference paths only.\n"
|
|
3417
|
+
"- No 'save/copy this file' - User is on the same machine.\n"
|
|
3418
|
+
"- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.\n"
|
|
3419
|
+
"- For code changes:\n"
|
|
3420
|
+
" * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with 'summary', just jump right in.\n"
|
|
3421
|
+
" * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.\n"
|
|
3422
|
+
"- File References: When referencing files in your response, include the relevant start line and follow these rules: use inline code for paths; each reference should have a standalone path; accepted: absolute, workspace-relative, a/ or b/ diff prefixes, or bare filename/suffix; optional line/column uses :line[:column] or #LlineCcolumn; do not use URIs; do not provide line ranges.\n\n"
|
|
3423
|
+
"Apply Patch\n"
|
|
3424
|
+
"As shared previously in the GPT-5 prompting guide, use apply_patch for file edits to match the training distribution.\n\n"
|
|
3425
|
+
"Preambles\n"
|
|
3426
|
+
"GPT-5-Codex does not emit preambles. Do not ask for them.\n\n"
|
|
3427
|
+
"Frontend Guidance\n"
|
|
3428
|
+
"Use the following libraries unless the user or repo specifies otherwise:\n"
|
|
3429
|
+
"Framework: React + TypeScript\n"
|
|
3430
|
+
"Styling: Tailwind CSS\n"
|
|
3431
|
+
"Components: shadcn/ui\n"
|
|
3432
|
+
"Icons: lucide-react\n"
|
|
3433
|
+
"Animation: Framer Motion\n"
|
|
3434
|
+
"Charts: Recharts\n"
|
|
3435
|
+
"Fonts: San Serif, Inter, Geist, Mona Sans, IBM Plex Sans, Manrope\n"
|
|
3436
|
+
)
|
|
3437
|
+
|
|
3438
|
+
def _build_kimi_raw_messages(self, user_input: str) -> List[Dict[str, Any]]:
|
|
3275
3439
|
"""Build provider-native messages for Kimi preserving prior assistant reasoning_content.
|
|
3276
3440
|
Includes prior provider-native turns and the current user message with first-turn injections.
|
|
3277
3441
|
"""
|
|
@@ -3289,31 +3453,31 @@ class ChatCLI:
|
|
|
3289
3453
|
for m in (self._kimi_raw_history or []):
|
|
3290
3454
|
raw.append(m)
|
|
3291
3455
|
# Append current user message
|
|
3292
|
-
raw.append({"role": "user", "content": content})
|
|
3293
|
-
return raw
|
|
3294
|
-
|
|
3295
|
-
def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
|
|
3296
|
-
"""Normalize Gemini provider-native history.
|
|
3297
|
-
|
|
3298
|
-
Ensures we only send a flat list of dicts back to the server.
|
|
3299
|
-
This prevents accidental nesting like [[{...}, {...}]] which the
|
|
3300
|
-
google-genai SDK rejects with pydantic union validation errors.
|
|
3301
|
-
"""
|
|
3302
|
-
out: List[Dict[str, Any]] = []
|
|
3303
|
-
if not isinstance(rpm, list):
|
|
3304
|
-
return out
|
|
3305
|
-
for item in rpm:
|
|
3306
|
-
if item is None:
|
|
3307
|
-
continue
|
|
3308
|
-
if isinstance(item, list):
|
|
3309
|
-
# Flatten one level
|
|
3310
|
-
for sub in item:
|
|
3311
|
-
if isinstance(sub, dict):
|
|
3312
|
-
out.append(dict(sub))
|
|
3313
|
-
continue
|
|
3314
|
-
if isinstance(item, dict):
|
|
3315
|
-
out.append(dict(item))
|
|
3316
|
-
return out
|
|
3456
|
+
raw.append({"role": "user", "content": content})
|
|
3457
|
+
return raw
|
|
3458
|
+
|
|
3459
|
+
def _normalize_gemini_raw_messages(self, rpm: Any) -> List[Dict[str, Any]]:
|
|
3460
|
+
"""Normalize Gemini provider-native history.
|
|
3461
|
+
|
|
3462
|
+
Ensures we only send a flat list of dicts back to the server.
|
|
3463
|
+
This prevents accidental nesting like [[{...}, {...}]] which the
|
|
3464
|
+
google-genai SDK rejects with pydantic union validation errors.
|
|
3465
|
+
"""
|
|
3466
|
+
out: List[Dict[str, Any]] = []
|
|
3467
|
+
if not isinstance(rpm, list):
|
|
3468
|
+
return out
|
|
3469
|
+
for item in rpm:
|
|
3470
|
+
if item is None:
|
|
3471
|
+
continue
|
|
3472
|
+
if isinstance(item, list):
|
|
3473
|
+
# Flatten one level
|
|
3474
|
+
for sub in item:
|
|
3475
|
+
if isinstance(sub, dict):
|
|
3476
|
+
out.append(dict(sub))
|
|
3477
|
+
continue
|
|
3478
|
+
if isinstance(item, dict):
|
|
3479
|
+
out.append(dict(item))
|
|
3480
|
+
return out
|
|
3317
3481
|
|
|
3318
3482
|
def _build_working_memory_injection(self) -> Optional[str]:
|
|
3319
3483
|
try:
|
|
@@ -3483,54 +3647,54 @@ class ChatCLI:
|
|
|
3483
3647
|
except Exception:
|
|
3484
3648
|
return ""
|
|
3485
3649
|
|
|
3486
|
-
def _approval_prompt_ui(self, label: str, args: Dict[str, Any]) -> str:
|
|
3650
|
+
def _approval_prompt_ui(self, label: str, args: Dict[str, Any]) -> str:
|
|
3487
3651
|
"""Interactive approval prompt for Level 2.
|
|
3488
3652
|
|
|
3489
3653
|
Uses the same highlighted, arrow-key-driven menu UX as the rest of the CLI
|
|
3490
3654
|
when TTY input is available, and falls back to numeric input otherwise.
|
|
3491
3655
|
Returns one of: "once", "session", "always", "deny".
|
|
3492
3656
|
"""
|
|
3493
|
-
self.ui.print(f"\n[Level 2] Approval required for: {label}")
|
|
3494
|
-
# Show a compact summary
|
|
3495
|
-
summary = self._tool_summary(label.split(":")[0], args)
|
|
3496
|
-
self.ui.print(summary, style=self.ui.theme["dim"])
|
|
3497
|
-
|
|
3498
|
-
# Show what we're actually approving (key fields), so the user can make an informed decision.
|
|
3499
|
-
try:
|
|
3500
|
-
base_tool = (label.split(":")[0] if isinstance(label, str) and ":" in label else label) or ""
|
|
3501
|
-
except Exception:
|
|
3502
|
-
base_tool = label or ""
|
|
3503
|
-
bt = str(base_tool).strip().lower()
|
|
3504
|
-
try:
|
|
3505
|
-
if bt == "run_command":
|
|
3506
|
-
cmd = args.get("cmd")
|
|
3507
|
-
cwd = args.get("cwd")
|
|
3508
|
-
timeout = args.get("timeout")
|
|
3509
|
-
if cmd is not None:
|
|
3510
|
-
self.ui.print(f"cmd: {self._clip(cmd, 400)}", style=self.ui.theme["dim"])
|
|
3511
|
-
if cwd is not None:
|
|
3512
|
-
self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
|
|
3513
|
-
if timeout is not None:
|
|
3514
|
-
self.ui.print(f"timeout: {timeout}", style=self.ui.theme["dim"])
|
|
3515
|
-
elif bt in ("write_file", "append_file"):
|
|
3516
|
-
path = args.get("path")
|
|
3517
|
-
content = args.get("content") or ""
|
|
3518
|
-
self.ui.print(f"path: {path}", style=self.ui.theme["dim"])
|
|
3519
|
-
try:
|
|
3520
|
-
b = len(str(content).encode("utf-8", errors="replace"))
|
|
3521
|
-
except Exception:
|
|
3522
|
-
b = None
|
|
3523
|
-
if b is not None:
|
|
3524
|
-
self.ui.print(f"content_bytes: {b}", style=self.ui.theme["dim"])
|
|
3525
|
-
elif bt == "apply_patch":
|
|
3526
|
-
cwd = args.get("cwd")
|
|
3527
|
-
dry = bool(args.get("dry_run", False))
|
|
3528
|
-
if cwd is not None:
|
|
3529
|
-
self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
|
|
3530
|
-
self.ui.print(f"dry_run: {dry}", style=self.ui.theme["dim"])
|
|
3531
|
-
except Exception:
|
|
3532
|
-
# Never block approvals on formatting
|
|
3533
|
-
pass
|
|
3657
|
+
self.ui.print(f"\n[Level 2] Approval required for: {label}")
|
|
3658
|
+
# Show a compact summary
|
|
3659
|
+
summary = self._tool_summary(label.split(":")[0], args)
|
|
3660
|
+
self.ui.print(summary, style=self.ui.theme["dim"])
|
|
3661
|
+
|
|
3662
|
+
# Show what we're actually approving (key fields), so the user can make an informed decision.
|
|
3663
|
+
try:
|
|
3664
|
+
base_tool = (label.split(":")[0] if isinstance(label, str) and ":" in label else label) or ""
|
|
3665
|
+
except Exception:
|
|
3666
|
+
base_tool = label or ""
|
|
3667
|
+
bt = str(base_tool).strip().lower()
|
|
3668
|
+
try:
|
|
3669
|
+
if bt == "run_command":
|
|
3670
|
+
cmd = args.get("cmd")
|
|
3671
|
+
cwd = args.get("cwd")
|
|
3672
|
+
timeout = args.get("timeout")
|
|
3673
|
+
if cmd is not None:
|
|
3674
|
+
self.ui.print(f"cmd: {self._clip(cmd, 400)}", style=self.ui.theme["dim"])
|
|
3675
|
+
if cwd is not None:
|
|
3676
|
+
self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
|
|
3677
|
+
if timeout is not None:
|
|
3678
|
+
self.ui.print(f"timeout: {timeout}", style=self.ui.theme["dim"])
|
|
3679
|
+
elif bt in ("write_file", "append_file"):
|
|
3680
|
+
path = args.get("path")
|
|
3681
|
+
content = args.get("content") or ""
|
|
3682
|
+
self.ui.print(f"path: {path}", style=self.ui.theme["dim"])
|
|
3683
|
+
try:
|
|
3684
|
+
b = len(str(content).encode("utf-8", errors="replace"))
|
|
3685
|
+
except Exception:
|
|
3686
|
+
b = None
|
|
3687
|
+
if b is not None:
|
|
3688
|
+
self.ui.print(f"content_bytes: {b}", style=self.ui.theme["dim"])
|
|
3689
|
+
elif bt == "apply_patch":
|
|
3690
|
+
cwd = args.get("cwd")
|
|
3691
|
+
dry = bool(args.get("dry_run", False))
|
|
3692
|
+
if cwd is not None:
|
|
3693
|
+
self.ui.print(f"cwd: {cwd}", style=self.ui.theme["dim"])
|
|
3694
|
+
self.ui.print(f"dry_run: {dry}", style=self.ui.theme["dim"])
|
|
3695
|
+
except Exception:
|
|
3696
|
+
# Never block approvals on formatting
|
|
3697
|
+
pass
|
|
3534
3698
|
|
|
3535
3699
|
choices: List[Tuple[str, str]] = [
|
|
3536
3700
|
("once", "Approve once"),
|
|
@@ -3788,6 +3952,39 @@ class ChatCLI:
|
|
|
3788
3952
|
if n.lower() in ("context", "to_next"):
|
|
3789
3953
|
return "Context handoff to next turn"
|
|
3790
3954
|
|
|
3955
|
+
# Universal context handoff tool (server-side). Show what files the model chose to keep.
|
|
3956
|
+
if n.lower() == "context_handoff":
|
|
3957
|
+
try:
|
|
3958
|
+
rd = (result or {}).get("data") if isinstance(result, dict) else None
|
|
3959
|
+
rd = rd if isinstance(rd, dict) else {}
|
|
3960
|
+
reason = (rd.get("reason") if isinstance(rd, dict) else None) or ""
|
|
3961
|
+
mem_path = (rd.get("memory_path") if isinstance(rd, dict) else None) or ""
|
|
3962
|
+
keep_files = rd.get("keep_files") if isinstance(rd, dict) else None
|
|
3963
|
+
files: List[str] = []
|
|
3964
|
+
if isinstance(keep_files, list):
|
|
3965
|
+
for x in keep_files:
|
|
3966
|
+
if isinstance(x, str) and x.strip():
|
|
3967
|
+
files.append(x.strip())
|
|
3968
|
+
# Limit for single-line readability
|
|
3969
|
+
shown = files[:5]
|
|
3970
|
+
more = max(0, len(files) - len(shown))
|
|
3971
|
+
if shown:
|
|
3972
|
+
files_part = ", ".join(shown) + (f" (+{more} more)" if more else "")
|
|
3973
|
+
return (
|
|
3974
|
+
"Context handoff saved"
|
|
3975
|
+
+ (f" (reason={reason})" if str(reason).strip() else "")
|
|
3976
|
+
+ f". Files kept in context: {files_part}"
|
|
3977
|
+
+ (f". memory_path: {mem_path}" if str(mem_path).strip() else "")
|
|
3978
|
+
)
|
|
3979
|
+
return (
|
|
3980
|
+
"Context handoff saved"
|
|
3981
|
+
+ (f" (reason={reason})" if str(reason).strip() else "")
|
|
3982
|
+
+ ". No files were selected to be kept in context"
|
|
3983
|
+
+ (f". memory_path: {mem_path}" if str(mem_path).strip() else "")
|
|
3984
|
+
)
|
|
3985
|
+
except Exception:
|
|
3986
|
+
return "Context handoff saved"
|
|
3987
|
+
|
|
3791
3988
|
def _arg_path() -> str:
|
|
3792
3989
|
p = a.get("path") or data.get("path") or ""
|
|
3793
3990
|
try:
|
|
@@ -3897,6 +4094,35 @@ class ChatCLI:
|
|
|
3897
4094
|
return
|
|
3898
4095
|
|
|
3899
4096
|
data = result.get("data", {}) or {}
|
|
4097
|
+
|
|
4098
|
+
# Universal context_handoff tool: show where it was saved and what files were chosen.
|
|
4099
|
+
if name == "context_handoff":
|
|
4100
|
+
try:
|
|
4101
|
+
reason = data.get("reason")
|
|
4102
|
+
mem_path = data.get("memory_path")
|
|
4103
|
+
keep_files = data.get("keep_files")
|
|
4104
|
+
self.ui.print(
|
|
4105
|
+
f"⇐ [{self.ui.theme['tool_result']}]✅ Context handoff saved[/{self.ui.theme['tool_result']}]"
|
|
4106
|
+
+ (f" (reason={reason})" if isinstance(reason, str) and reason.strip() else "")
|
|
4107
|
+
)
|
|
4108
|
+
if isinstance(mem_path, str) and mem_path.strip():
|
|
4109
|
+
self.ui.print(f"memory_path: {mem_path}", style=self.ui.theme["dim"]) # type: ignore
|
|
4110
|
+
if isinstance(keep_files, list) and keep_files:
|
|
4111
|
+
cleaned = [str(x).strip() for x in keep_files if isinstance(x, (str, int, float)) and str(x).strip()]
|
|
4112
|
+
if cleaned:
|
|
4113
|
+
self.ui.print("Files kept in context:", style=self.ui.theme["dim"]) # type: ignore
|
|
4114
|
+
for p in cleaned[:25]:
|
|
4115
|
+
self.ui.print(f"- {p}", style=self.ui.theme["dim"]) # type: ignore
|
|
4116
|
+
if len(cleaned) > 25:
|
|
4117
|
+
self.ui.print(f"... +{len(cleaned)-25} more", style=self.ui.theme["dim"]) # type: ignore
|
|
4118
|
+
else:
|
|
4119
|
+
self.ui.print("No files were selected to be kept in context.", style=self.ui.theme["dim"]) # type: ignore
|
|
4120
|
+
else:
|
|
4121
|
+
self.ui.print("No files were selected to be kept in context.", style=self.ui.theme["dim"]) # type: ignore
|
|
4122
|
+
except Exception:
|
|
4123
|
+
# Fall through to default renderer below.
|
|
4124
|
+
pass
|
|
4125
|
+
return
|
|
3900
4126
|
if name == "read_file":
|
|
3901
4127
|
path = data.get("path", "")
|
|
3902
4128
|
content = data.get("content", "") or ""
|
|
@@ -4302,25 +4528,25 @@ class ChatCLI:
|
|
|
4302
4528
|
await self.open_settings()
|
|
4303
4529
|
return True
|
|
4304
4530
|
|
|
4305
|
-
if cmd.startswith("/history"):
|
|
4306
|
-
parts = cmd.split(maxsplit=1)
|
|
4307
|
-
if len(parts) == 1:
|
|
4308
|
-
self.ui.info("Usage: /history on|off")
|
|
4309
|
-
self.ui.info(f"Current: {'ON (synced to unified memory)' if self.save_chat_history else 'OFF (ephemeral)'}")
|
|
4310
|
-
return True
|
|
4311
|
-
arg = parts[1].strip().lower()
|
|
4312
|
-
if arg == "on":
|
|
4313
|
-
self.save_chat_history = True
|
|
4314
|
-
self.ui.success("Chat history will be saved to unified memory.")
|
|
4315
|
-
elif arg == "off":
|
|
4316
|
-
self.save_chat_history = False
|
|
4317
|
-
self.ui.success("Chat history is now ephemeral (local only, not synced).")
|
|
4318
|
-
else:
|
|
4319
|
-
self.ui.warn("Usage: /history on|off")
|
|
4320
|
-
return True
|
|
4321
|
-
self.save_settings()
|
|
4322
|
-
return True
|
|
4323
|
-
|
|
4531
|
+
if cmd.startswith("/history"):
|
|
4532
|
+
parts = cmd.split(maxsplit=1)
|
|
4533
|
+
if len(parts) == 1:
|
|
4534
|
+
self.ui.info("Usage: /history on|off")
|
|
4535
|
+
self.ui.info(f"Current: {'ON (synced to unified memory)' if self.save_chat_history else 'OFF (ephemeral)'}")
|
|
4536
|
+
return True
|
|
4537
|
+
arg = parts[1].strip().lower()
|
|
4538
|
+
if arg == "on":
|
|
4539
|
+
self.save_chat_history = True
|
|
4540
|
+
self.ui.success("Chat history will be saved to unified memory.")
|
|
4541
|
+
elif arg == "off":
|
|
4542
|
+
self.save_chat_history = False
|
|
4543
|
+
self.ui.success("Chat history is now ephemeral (local only, not synced).")
|
|
4544
|
+
else:
|
|
4545
|
+
self.ui.warn("Usage: /history on|off")
|
|
4546
|
+
return True
|
|
4547
|
+
self.save_settings()
|
|
4548
|
+
return True
|
|
4549
|
+
|
|
4324
4550
|
if cmd.startswith("/tools"):
|
|
4325
4551
|
parts = cmd.split(maxsplit=1)
|
|
4326
4552
|
if len(parts) == 1:
|
|
@@ -4433,25 +4659,25 @@ class ChatCLI:
|
|
|
4433
4659
|
self.ui.warn("Unknown /websearch subcommand. Use on, off, domains, sources, or location.")
|
|
4434
4660
|
return True
|
|
4435
4661
|
|
|
4436
|
-
if cmd.startswith("/reasoning"):
|
|
4437
|
-
parts = cmd.split(maxsplit=1)
|
|
4438
|
-
if len(parts) == 1:
|
|
4439
|
-
self.ui.info("Usage: /reasoning low|medium|high|xhigh")
|
|
4440
|
-
self.ui.info(f"Current: {self.reasoning_effort}")
|
|
4441
|
-
return True
|
|
4442
|
-
arg = (parts[1] or "").strip().lower()
|
|
4443
|
-
if arg in ("low", "medium", "high", "xhigh"):
|
|
4444
|
-
self.reasoning_effort = arg
|
|
4445
|
-
if arg == "xhigh" and not self._supports_xhigh_reasoning_effort(self.model):
|
|
4446
|
-
# Keep the user's preference, but be explicit about server-side downgrading.
|
|
4447
|
-
self.ui.warn(
|
|
4448
|
-
"Note: xhigh is only applied on models that support it (e.g., gpt-5.2* / gpt-5.1-codex-max). The server may downgrade it on other models."
|
|
4449
|
-
)
|
|
4450
|
-
self.ui.success(f"Reasoning effort set to: {self.reasoning_effort}")
|
|
4451
|
-
self.save_settings()
|
|
4452
|
-
else:
|
|
4453
|
-
self.ui.warn("Invalid value. Use: low, medium, high, or xhigh")
|
|
4454
|
-
return True
|
|
4662
|
+
if cmd.startswith("/reasoning"):
|
|
4663
|
+
parts = cmd.split(maxsplit=1)
|
|
4664
|
+
if len(parts) == 1:
|
|
4665
|
+
self.ui.info("Usage: /reasoning low|medium|high|xhigh")
|
|
4666
|
+
self.ui.info(f"Current: {self.reasoning_effort}")
|
|
4667
|
+
return True
|
|
4668
|
+
arg = (parts[1] or "").strip().lower()
|
|
4669
|
+
if arg in ("low", "medium", "high", "xhigh"):
|
|
4670
|
+
self.reasoning_effort = arg
|
|
4671
|
+
if arg == "xhigh" and not self._supports_xhigh_reasoning_effort(self.model):
|
|
4672
|
+
# Keep the user's preference, but be explicit about server-side downgrading.
|
|
4673
|
+
self.ui.warn(
|
|
4674
|
+
"Note: xhigh is only applied on models that support it (e.g., gpt-5.2* / gpt-5.1-codex-max). The server may downgrade it on other models."
|
|
4675
|
+
)
|
|
4676
|
+
self.ui.success(f"Reasoning effort set to: {self.reasoning_effort}")
|
|
4677
|
+
self.save_settings()
|
|
4678
|
+
else:
|
|
4679
|
+
self.ui.warn("Invalid value. Use: low, medium, high, or xhigh")
|
|
4680
|
+
return True
|
|
4455
4681
|
|
|
4456
4682
|
if cmd.startswith("/thinkingbudget"):
|
|
4457
4683
|
parts = cmd.split(maxsplit=1)
|
|
@@ -4628,6 +4854,14 @@ class ChatCLI:
|
|
|
4628
4854
|
except Exception:
|
|
4629
4855
|
pass
|
|
4630
4856
|
self.ui.success("System prompt set.")
|
|
4857
|
+
# OpenAI threaded state is invalid once the system prompt changes.
|
|
4858
|
+
try:
|
|
4859
|
+
self._openai_previous_response_id = None
|
|
4860
|
+
self._openai_response_id_history = []
|
|
4861
|
+
self._openai_input_items = []
|
|
4862
|
+
self._openai_last_sent_input_items = None
|
|
4863
|
+
except Exception:
|
|
4864
|
+
pass
|
|
4631
4865
|
self.save_settings()
|
|
4632
4866
|
return True
|
|
4633
4867
|
|
|
@@ -4641,7 +4875,7 @@ class ChatCLI:
|
|
|
4641
4875
|
self.ui.success(f"Thread title set to: {self.thread_name}")
|
|
4642
4876
|
return True
|
|
4643
4877
|
|
|
4644
|
-
if cmd == "/clear":
|
|
4878
|
+
if cmd == "/clear":
|
|
4645
4879
|
self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
|
|
4646
4880
|
self._did_inject_codebase_map = False
|
|
4647
4881
|
try:
|
|
@@ -4650,13 +4884,17 @@ class ChatCLI:
|
|
|
4650
4884
|
pass
|
|
4651
4885
|
# Reset provider-native histories
|
|
4652
4886
|
try:
|
|
4653
|
-
self.messages_for_save = []
|
|
4654
|
-
if not self.save_chat_history:
|
|
4655
|
-
self.thread_uid = None
|
|
4656
|
-
self._kimi_raw_history = []
|
|
4657
|
-
self._gemini_raw_history = []
|
|
4658
|
-
|
|
4659
|
-
|
|
4887
|
+
self.messages_for_save = []
|
|
4888
|
+
if not self.save_chat_history:
|
|
4889
|
+
self.thread_uid = None
|
|
4890
|
+
self._kimi_raw_history = []
|
|
4891
|
+
self._gemini_raw_history = []
|
|
4892
|
+
self._openai_previous_response_id = None
|
|
4893
|
+
self._openai_response_id_history = []
|
|
4894
|
+
self._openai_input_items = []
|
|
4895
|
+
self._openai_last_sent_input_items = None
|
|
4896
|
+
except Exception:
|
|
4897
|
+
pass
|
|
4660
4898
|
# Reset local cumulative token counters on session clear
|
|
4661
4899
|
self._cum_input_tokens = 0
|
|
4662
4900
|
self._cum_output_tokens = 0
|
|
@@ -4857,9 +5095,9 @@ class ChatCLI:
|
|
|
4857
5095
|
# We have a fallback map (repo copy) but none at host base
|
|
4858
5096
|
self.ui.print("Code Map: fallback example in use (host base missing CODEBASE_MAP.md). It will be prefixed.", style=self.ui.theme["dim"])
|
|
4859
5097
|
else:
|
|
4860
|
-
self.ui.print("Code Map: missing at host base — toggle with /map on|off", style=self.ui.theme["dim"])
|
|
4861
|
-
# History status
|
|
4862
|
-
hist_status = "ON (synced)" if self.save_chat_history else "OFF (ephemeral)"
|
|
5098
|
+
self.ui.print("Code Map: missing at host base — toggle with /map on|off", style=self.ui.theme["dim"])
|
|
5099
|
+
# History status
|
|
5100
|
+
hist_status = "ON (synced)" if self.save_chat_history else "OFF (ephemeral)"
|
|
4863
5101
|
self.ui.print(f"Chat history: {hist_status}", style=self.ui.theme["dim"])
|
|
4864
5102
|
# If a host base is configured and code map injection is enabled, offer to generate when missing
|
|
4865
5103
|
try:
|
|
@@ -4909,20 +5147,48 @@ class ChatCLI:
|
|
|
4909
5147
|
pt_completer = self._commands_word_completer()
|
|
4910
5148
|
while True:
|
|
4911
5149
|
try:
|
|
4912
|
-
|
|
4913
|
-
|
|
4914
|
-
|
|
4915
|
-
|
|
4916
|
-
|
|
4917
|
-
|
|
4918
|
-
|
|
4919
|
-
|
|
4920
|
-
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
|
|
5150
|
+
pending_edit = self._pending_user_edit
|
|
5151
|
+
edit_mode = pending_edit is not None
|
|
5152
|
+
|
|
5153
|
+
# prompt_toolkit is intentionally not used.
|
|
5154
|
+
# Always prefer our dependency-free input engine when available.
|
|
5155
|
+
if self._input_engine:
|
|
5156
|
+
if edit_mode:
|
|
5157
|
+
# The low-level input engine doesn't support prefilling.
|
|
5158
|
+
# Show the prior message and let the user paste a replacement.
|
|
5159
|
+
try:
|
|
5160
|
+
self.ui.print(
|
|
5161
|
+
"\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):",
|
|
5162
|
+
style=self.ui.theme["warn"], # type: ignore
|
|
5163
|
+
)
|
|
5164
|
+
self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
|
|
5165
|
+
except Exception:
|
|
5166
|
+
pass
|
|
5167
|
+
new_txt = self._read_multiline_input("Edit> ")
|
|
5168
|
+
user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
|
|
5169
|
+
else:
|
|
5170
|
+
# Do not add continuation prefixes on new lines.
|
|
5171
|
+
user_input = self._input_engine.read_message("You: ", "")
|
|
4924
5172
|
else:
|
|
4925
|
-
|
|
5173
|
+
# Last-resort fallback.
|
|
5174
|
+
if edit_mode:
|
|
5175
|
+
try:
|
|
5176
|
+
self.ui.print(
|
|
5177
|
+
"\nInterrupted. Edit last message (press Enter on an empty line to resend unchanged):",
|
|
5178
|
+
style=self.ui.theme["warn"], # type: ignore
|
|
5179
|
+
)
|
|
5180
|
+
self.ui.print(str(pending_edit), style=self.ui.theme["dim"]) # type: ignore
|
|
5181
|
+
except Exception:
|
|
5182
|
+
pass
|
|
5183
|
+
new_txt = self._read_multiline_input("Edit> ")
|
|
5184
|
+
user_input = (str(pending_edit) if not new_txt.strip() else new_txt)
|
|
5185
|
+
else:
|
|
5186
|
+
user_input = self._read_multiline_input("You: ")
|
|
5187
|
+
|
|
5188
|
+
# Clear pending edit state after we successfully collected input.
|
|
5189
|
+
if edit_mode:
|
|
5190
|
+
self._pending_user_edit = None
|
|
5191
|
+
self._pending_turn_snapshot = None
|
|
4926
5192
|
# Successful read resets interrupt window
|
|
4927
5193
|
self._last_interrupt_ts = None
|
|
4928
5194
|
except KeyboardInterrupt:
|
|
@@ -4969,6 +5235,29 @@ class ChatCLI:
|
|
|
4969
5235
|
continue
|
|
4970
5236
|
|
|
4971
5237
|
try:
|
|
5238
|
+
# Snapshot pre-turn state so Ctrl+C during streaming can revert cleanly.
|
|
5239
|
+
# This is critical for first-turn injections (code map/custom note/working memory)
|
|
5240
|
+
# which are applied by mutating flags during payload construction.
|
|
5241
|
+
self._pending_turn_snapshot = {
|
|
5242
|
+
"history": copy.deepcopy(self.history),
|
|
5243
|
+
"messages_for_save": copy.deepcopy(self.messages_for_save),
|
|
5244
|
+
"kimi_raw": copy.deepcopy(self._kimi_raw_history),
|
|
5245
|
+
"gemini_raw": copy.deepcopy(self._gemini_raw_history),
|
|
5246
|
+
"openai_prev": getattr(self, "_openai_previous_response_id", None),
|
|
5247
|
+
"openai_ids": copy.deepcopy(getattr(self, "_openai_response_id_history", [])),
|
|
5248
|
+
"openai_input_items": copy.deepcopy(getattr(self, "_openai_input_items", [])),
|
|
5249
|
+
"openai_last_sent_input_items": copy.deepcopy(getattr(self, "_openai_last_sent_input_items", None)),
|
|
5250
|
+
"inflight_dispatch": copy.deepcopy(getattr(self, "_inflight_dispatch", None)),
|
|
5251
|
+
"did_inject_codebase_map": bool(getattr(self, "_did_inject_codebase_map", False)),
|
|
5252
|
+
"did_inject_custom_first_turn": bool(getattr(self, "_did_inject_custom_first_turn", False)),
|
|
5253
|
+
"did_inject_working_memory": bool(getattr(self, "_did_inject_working_memory", False)),
|
|
5254
|
+
"memory_paths_for_first_turn": copy.deepcopy(getattr(self, "_memory_paths_for_first_turn", [])),
|
|
5255
|
+
"last_built_user_content": getattr(self, "_last_built_user_content", None),
|
|
5256
|
+
}
|
|
5257
|
+
|
|
5258
|
+
# Clear any stale in-flight dispatch context at turn start.
|
|
5259
|
+
self._inflight_dispatch = None
|
|
5260
|
+
|
|
4972
5261
|
# Record user message for local/server save
|
|
4973
5262
|
if self.save_chat_history:
|
|
4974
5263
|
self.messages_for_save.append({
|
|
@@ -4992,6 +5281,67 @@ class ChatCLI:
|
|
|
4992
5281
|
assistant_text = await self._stream_once(user_input)
|
|
4993
5282
|
finally:
|
|
4994
5283
|
self._busy = False
|
|
5284
|
+
except KeyboardInterrupt:
|
|
5285
|
+
# Ctrl+C mid-stream / mid-tool: do not exit the CLI.
|
|
5286
|
+
# Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
|
|
5287
|
+
try:
|
|
5288
|
+
await self._cancel_inflight_dispatch()
|
|
5289
|
+
except (Exception, BaseException):
|
|
5290
|
+
pass
|
|
5291
|
+
|
|
5292
|
+
# Restore state to *before* this turn started.
|
|
5293
|
+
try:
|
|
5294
|
+
snap = self._pending_turn_snapshot or {}
|
|
5295
|
+
if isinstance(snap.get("history"), list):
|
|
5296
|
+
self.history = snap.get("history")
|
|
5297
|
+
if isinstance(snap.get("messages_for_save"), list):
|
|
5298
|
+
self.messages_for_save = snap.get("messages_for_save")
|
|
5299
|
+
if isinstance(snap.get("kimi_raw"), list):
|
|
5300
|
+
self._kimi_raw_history = snap.get("kimi_raw")
|
|
5301
|
+
if isinstance(snap.get("gemini_raw"), list):
|
|
5302
|
+
self._gemini_raw_history = snap.get("gemini_raw")
|
|
5303
|
+
if "openai_prev" in snap:
|
|
5304
|
+
self._openai_previous_response_id = snap.get("openai_prev")
|
|
5305
|
+
if isinstance(snap.get("openai_ids"), list):
|
|
5306
|
+
self._openai_response_id_history = snap.get("openai_ids")
|
|
5307
|
+
if isinstance(snap.get("openai_input_items"), list):
|
|
5308
|
+
self._openai_input_items = snap.get("openai_input_items")
|
|
5309
|
+
if "openai_last_sent_input_items" in snap:
|
|
5310
|
+
self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
|
|
5311
|
+
if "inflight_dispatch" in snap:
|
|
5312
|
+
self._inflight_dispatch = snap.get("inflight_dispatch")
|
|
5313
|
+
if "did_inject_codebase_map" in snap:
|
|
5314
|
+
self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
|
|
5315
|
+
if "did_inject_custom_first_turn" in snap:
|
|
5316
|
+
self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
|
|
5317
|
+
if "did_inject_working_memory" in snap:
|
|
5318
|
+
self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
|
|
5319
|
+
if "memory_paths_for_first_turn" in snap:
|
|
5320
|
+
self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
|
|
5321
|
+
self._last_built_user_content = snap.get("last_built_user_content")
|
|
5322
|
+
except Exception:
|
|
5323
|
+
pass
|
|
5324
|
+
|
|
5325
|
+
# Clear any transient indicator line and land on a fresh prompt line.
|
|
5326
|
+
try:
|
|
5327
|
+
sys.stdout.write("\r\x1b[2K\n")
|
|
5328
|
+
sys.stdout.flush()
|
|
5329
|
+
except Exception:
|
|
5330
|
+
try:
|
|
5331
|
+
self.ui.print()
|
|
5332
|
+
except Exception:
|
|
5333
|
+
pass
|
|
5334
|
+
|
|
5335
|
+
try:
|
|
5336
|
+
supports = self._provider_supports_native_retention(self.model)
|
|
5337
|
+
except Exception:
|
|
5338
|
+
supports = False
|
|
5339
|
+
if supports:
|
|
5340
|
+
self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
|
|
5341
|
+
else:
|
|
5342
|
+
self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
|
|
5343
|
+
self._pending_user_edit = user_input
|
|
5344
|
+
continue
|
|
4995
5345
|
except httpx.HTTPStatusError as he:
|
|
4996
5346
|
try:
|
|
4997
5347
|
if he.response is not None:
|
|
@@ -5024,14 +5374,14 @@ class ChatCLI:
|
|
|
5024
5374
|
auth_action_key = "logout" if self.auth_user else "login"
|
|
5025
5375
|
auth_action_label = f"🔓 Logout ({self.auth_user})" if self.auth_user else "🔑 Login"
|
|
5026
5376
|
choices = [
|
|
5027
|
-
("toggle_tools", f"🧰 Toggle Tools ({self._tools_label()}) - Enable/disable file tools per request (ON: request tools, OFF: no tools, DEFAULT: server setting)"),
|
|
5377
|
+
("toggle_tools", f"🧰 Toggle Tools ({self._tools_label()}) - Enable/disable file tools per request (ON: request tools, OFF: no tools, DEFAULT: server setting)"),
|
|
5028
5378
|
("toggle_history", f"🕘 Toggle History ({'ON' if self.save_chat_history else 'OFF'}) - Save chats to unified memory"),
|
|
5029
5379
|
("set_scope", f"📦 Set Filesystem Scope (current: {self._fs_label()}) - Choose workspace (sandbox) or host (full filesystem access if allowed)"),
|
|
5030
5380
|
("set_host_base", f"🖥️ Set Agent Scope (current: {self.host_base or '(none)'}) - Absolute path the agent can access when host scope is enabled"),
|
|
5031
5381
|
("set_level", f"🔒 Set Control Level (current: {self.control_level or 'server default'}) - Security level: 1=read-only, 2=write/exec with approval, 3=full access"),
|
|
5032
5382
|
("set_auto_approve", f"⚙️ Set Auto-approve Tools (current: {','.join(self.auto_approve) if self.auto_approve else '(none)'}) - Tools to auto-approve at Level 2 (e.g., write_file)"),
|
|
5033
5383
|
(auth_action_key, auth_action_label),
|
|
5034
|
-
("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5,
|
|
5384
|
+
("select_model", f"📋 Select Model (current: {self.model or 'server default'}) - Pick from presets (gpt-5.2, gpt-5.2-codex, gemini-3-pro-preview, kimi-k2.5, etc.)"),
|
|
5035
5385
|
("change_model", f"🤖 Change Model (current: {self.model or 'server default'}) - Manually type a model name"),
|
|
5036
5386
|
("set_system_prompt", "📝 Set System Prompt - Add initial instructions for the AI"),
|
|
5037
5387
|
("clear_history", "🧹 Clear History - Reset chat history"),
|
|
@@ -5067,22 +5417,21 @@ class ChatCLI:
|
|
|
5067
5417
|
has_credits = (self._last_remaining_credits is not None and self._last_remaining_credits > 0)
|
|
5068
5418
|
is_effectively_free = (self.is_free_tier and not has_credits)
|
|
5069
5419
|
|
|
5070
|
-
# Recommended models (
|
|
5071
|
-
|
|
5072
|
-
|
|
5073
|
-
"
|
|
5074
|
-
"
|
|
5075
|
-
"gemini-3-pro-preview",
|
|
5076
|
-
"gemini-3-flash-preview",
|
|
5077
|
-
"
|
|
5078
|
-
"
|
|
5079
|
-
"
|
|
5080
|
-
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
# If effectively free, shuffle kimi-k2-thinking to the top
|
|
5420
|
+
# Recommended models ("feelings" order)
|
|
5421
|
+
rec_keys = [
|
|
5422
|
+
"gpt-5.2",
|
|
5423
|
+
"gpt-5.2-codex",
|
|
5424
|
+
"gpt-5",
|
|
5425
|
+
"gemini-3-pro-preview",
|
|
5426
|
+
"gemini-3-flash-preview",
|
|
5427
|
+
"claude-opus-4-6",
|
|
5428
|
+
"kimi-k2.5",
|
|
5429
|
+
"grok-code-fast-1",
|
|
5430
|
+
]
|
|
5431
|
+
|
|
5432
|
+
# If effectively free, shuffle kimi-k2.5 to the top
|
|
5084
5433
|
if is_effectively_free:
|
|
5085
|
-
target = "kimi-k2
|
|
5434
|
+
target = "kimi-k2.5"
|
|
5086
5435
|
if target in rec_keys:
|
|
5087
5436
|
rec_keys.remove(target)
|
|
5088
5437
|
rec_keys.insert(0, target)
|
|
@@ -5117,8 +5466,7 @@ class ChatCLI:
|
|
|
5117
5466
|
suffix = " [PAID]" if (is_effectively_free and is_paid_model(m)) else ""
|
|
5118
5467
|
choices.append((m, f"{lbl}{suffix}"))
|
|
5119
5468
|
|
|
5120
|
-
|
|
5121
|
-
choices.append(("custom", "Custom (enter a model name)"))
|
|
5469
|
+
# Per issue list: do not surface "server default" or "custom" in this picker.
|
|
5122
5470
|
|
|
5123
5471
|
# Render and select using the unified highlighted picker
|
|
5124
5472
|
picked: Optional[str] = None
|
|
@@ -5133,27 +5481,15 @@ class ChatCLI:
|
|
|
5133
5481
|
picked = str(val)
|
|
5134
5482
|
|
|
5135
5483
|
# Enforce free tier restrictions
|
|
5136
|
-
if
|
|
5484
|
+
if is_effectively_free and is_paid_model(picked):
|
|
5137
5485
|
self.ui.warn(f"Model '{picked}' is a paid tier model. Access is restricted on the free tier without credits.")
|
|
5138
5486
|
continue
|
|
5139
5487
|
|
|
5140
5488
|
break
|
|
5141
5489
|
|
|
5142
5490
|
# Apply selection
|
|
5143
|
-
|
|
5144
|
-
|
|
5145
|
-
self.ui.info("Model cleared; server default will be used.")
|
|
5146
|
-
elif picked == "custom":
|
|
5147
|
-
typed = self.ui.prompt(
|
|
5148
|
-
"Enter model name (e.g., deepseek-chat, gpt-5, gemini-3-flash-preview)",
|
|
5149
|
-
default=self.model or "",
|
|
5150
|
-
)
|
|
5151
|
-
self.model = self._resolve_model_alias(typed.strip() or None)
|
|
5152
|
-
if not self.model:
|
|
5153
|
-
self.ui.info("Model cleared; server default will be used.")
|
|
5154
|
-
else:
|
|
5155
|
-
self.model = picked
|
|
5156
|
-
self.ui.success(f"Model set to: {self.model}")
|
|
5491
|
+
self.model = picked
|
|
5492
|
+
self.ui.success(f"Model set to: {self.model}")
|
|
5157
5493
|
|
|
5158
5494
|
self._apply_model_side_effects()
|
|
5159
5495
|
self.save_settings()
|
|
@@ -5176,12 +5512,12 @@ class ChatCLI:
|
|
|
5176
5512
|
self.save_settings()
|
|
5177
5513
|
return True
|
|
5178
5514
|
|
|
5179
|
-
if choice == "toggle_history":
|
|
5180
|
-
self.save_chat_history = not self.save_chat_history
|
|
5181
|
-
self.ui.success(f"History set to: {'ON' if self.save_chat_history else 'OFF'}")
|
|
5182
|
-
self.save_settings()
|
|
5183
|
-
return True
|
|
5184
|
-
|
|
5515
|
+
if choice == "toggle_history":
|
|
5516
|
+
self.save_chat_history = not self.save_chat_history
|
|
5517
|
+
self.ui.success(f"History set to: {'ON' if self.save_chat_history else 'OFF'}")
|
|
5518
|
+
self.save_settings()
|
|
5519
|
+
return True
|
|
5520
|
+
|
|
5185
5521
|
if choice == "set_scope":
|
|
5186
5522
|
await self.set_scope_menu()
|
|
5187
5523
|
return True
|
|
@@ -5256,31 +5592,38 @@ class ChatCLI:
|
|
|
5256
5592
|
pass
|
|
5257
5593
|
# Clear provider-native histories on system reset
|
|
5258
5594
|
try:
|
|
5259
|
-
self.messages_for_save = []
|
|
5260
|
-
if not self.save_chat_history:
|
|
5261
|
-
self.thread_uid = None
|
|
5595
|
+
self.messages_for_save = []
|
|
5596
|
+
if not self.save_chat_history:
|
|
5597
|
+
self.thread_uid = None
|
|
5262
5598
|
self._kimi_raw_history = []
|
|
5599
|
+
self._gemini_raw_history = []
|
|
5600
|
+
self._openai_previous_response_id = None
|
|
5601
|
+
self._openai_response_id_history = []
|
|
5602
|
+
self._openai_input_items = []
|
|
5603
|
+
self._openai_last_sent_input_items = None
|
|
5263
5604
|
except Exception:
|
|
5264
5605
|
pass
|
|
5265
5606
|
self.ui.success("System prompt set.")
|
|
5266
5607
|
self.save_settings()
|
|
5267
5608
|
return True
|
|
5268
5609
|
|
|
5269
|
-
if choice == "clear_history":
|
|
5610
|
+
if choice == "clear_history":
|
|
5270
5611
|
self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
|
|
5271
5612
|
self._did_inject_codebase_map = False
|
|
5272
5613
|
try:
|
|
5273
5614
|
self._did_inject_custom_first_turn = False
|
|
5274
5615
|
except Exception:
|
|
5275
5616
|
pass
|
|
5276
|
-
try:
|
|
5277
|
-
self.messages_for_save = []
|
|
5278
|
-
if not self.save_chat_history:
|
|
5279
|
-
self.thread_uid = None
|
|
5280
|
-
self._kimi_raw_history = []
|
|
5281
|
-
self._gemini_raw_history = []
|
|
5282
|
-
|
|
5283
|
-
|
|
5617
|
+
try:
|
|
5618
|
+
self.messages_for_save = []
|
|
5619
|
+
if not self.save_chat_history:
|
|
5620
|
+
self.thread_uid = None
|
|
5621
|
+
self._kimi_raw_history = []
|
|
5622
|
+
self._gemini_raw_history = []
|
|
5623
|
+
self._openai_previous_response_id = None
|
|
5624
|
+
self._openai_response_id_history = []
|
|
5625
|
+
except Exception:
|
|
5626
|
+
pass
|
|
5284
5627
|
# Reset local cumulative token counters on session clear
|
|
5285
5628
|
self._cum_input_tokens = 0
|
|
5286
5629
|
self._cum_output_tokens = 0
|
|
@@ -5334,10 +5677,81 @@ class ChatCLI:
|
|
|
5334
5677
|
|
|
5335
5678
|
# ----------------------- SSE Streaming loop ------------------------
|
|
5336
5679
|
async def _stream_once(self, user_input: str) -> str:
|
|
5337
|
-
# Build request payload
|
|
5338
|
-
|
|
5339
|
-
|
|
5340
|
-
|
|
5680
|
+
# Build request payload.
|
|
5681
|
+
# OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
|
|
5682
|
+
# `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
|
|
5683
|
+
if self._is_openai_model(self.model):
|
|
5684
|
+
msgs: List[Dict[str, str]] = []
|
|
5685
|
+
# Codex developer prompt (if enabled) + system prompt
|
|
5686
|
+
try:
|
|
5687
|
+
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
5688
|
+
msgs.append({"role": "system", "content": self._codex_system_prompt()})
|
|
5689
|
+
except Exception:
|
|
5690
|
+
pass
|
|
5691
|
+
if self.system_prompt:
|
|
5692
|
+
msgs.append({"role": "system", "content": self.system_prompt})
|
|
5693
|
+
|
|
5694
|
+
# Apply first-turn-only injections to the current user content
|
|
5695
|
+
content = user_input
|
|
5696
|
+
prefix = self._build_first_turn_injection(user_input)
|
|
5697
|
+
if prefix:
|
|
5698
|
+
content = f"{prefix}\n\n{user_input}"
|
|
5699
|
+
try:
|
|
5700
|
+
self._last_built_user_content = content
|
|
5701
|
+
except Exception:
|
|
5702
|
+
self._last_built_user_content = user_input
|
|
5703
|
+
msgs.append({"role": "user", "content": content})
|
|
5704
|
+
|
|
5705
|
+
payload: Dict[str, Any] = {"messages": msgs}
|
|
5706
|
+
|
|
5707
|
+
# Build OpenAI native input items (authoritative for the server OpenAI path).
|
|
5708
|
+
try:
|
|
5709
|
+
if isinstance(self._openai_input_items, list) and self._openai_input_items:
|
|
5710
|
+
items: List[Dict[str, Any]] = copy.deepcopy(self._openai_input_items)
|
|
5711
|
+
else:
|
|
5712
|
+
# Seed with system prompts for the first OpenAI turn.
|
|
5713
|
+
items = []
|
|
5714
|
+
try:
|
|
5715
|
+
if self._is_codex_model(self.model) and bool(getattr(self, "codex_prompt_enabled", True)):
|
|
5716
|
+
items.append({"role": "system", "content": self._codex_system_prompt()})
|
|
5717
|
+
except Exception:
|
|
5718
|
+
pass
|
|
5719
|
+
if self.system_prompt:
|
|
5720
|
+
items.append({"role": "system", "content": self.system_prompt})
|
|
5721
|
+
items.append({"role": "user", "content": content})
|
|
5722
|
+
payload["openai_input_items"] = self._sanitize_openai_items(items)
|
|
5723
|
+
self._openai_last_sent_input_items = copy.deepcopy(items)
|
|
5724
|
+
except Exception:
|
|
5725
|
+
# If this fails for any reason, fall back to normal message-based history.
|
|
5726
|
+
self._openai_last_sent_input_items = None
|
|
5727
|
+
|
|
5728
|
+
# OpenAI Threading: DISABLED. We use full manual input item replay now.
|
|
5729
|
+
# if "openai_input_items" not in payload:
|
|
5730
|
+
# try:
|
|
5731
|
+
# if isinstance(self._openai_previous_response_id, str) and self._openai_previous_response_id.strip():
|
|
5732
|
+
# payload["openai_previous_response_id"] = self._openai_previous_response_id.strip()
|
|
5733
|
+
# except Exception:
|
|
5734
|
+
# pass
|
|
5735
|
+
try:
|
|
5736
|
+
if isinstance(self._openai_response_id_history, list) and self._openai_response_id_history:
|
|
5737
|
+
payload["openai_response_id_history"] = list(self._openai_response_id_history)
|
|
5738
|
+
except Exception:
|
|
5739
|
+
pass
|
|
5740
|
+
else:
|
|
5741
|
+
payload = {"messages": self._build_messages(user_input)}
|
|
5742
|
+
if self.model:
|
|
5743
|
+
payload["model"] = self.model
|
|
5744
|
+
# OpenAI: include id chain even when not using previous_response_id yet (e.g. first turn)
|
|
5745
|
+
try:
|
|
5746
|
+
if self._is_openai_model(self.model):
|
|
5747
|
+
if (
|
|
5748
|
+
isinstance(getattr(self, "_openai_response_id_history", None), list)
|
|
5749
|
+
and self._openai_response_id_history
|
|
5750
|
+
and "openai_response_id_history" not in payload
|
|
5751
|
+
):
|
|
5752
|
+
payload["openai_response_id_history"] = list(self._openai_response_id_history)
|
|
5753
|
+
except Exception:
|
|
5754
|
+
pass
|
|
5341
5755
|
# Include terminal identifier so the server can isolate per-terminal workspace if it executes tools
|
|
5342
5756
|
try:
|
|
5343
5757
|
if self.terminal_id:
|
|
@@ -5360,60 +5774,69 @@ class ChatCLI:
|
|
|
5360
5774
|
payload["host_roots_mode"] = mode
|
|
5361
5775
|
if mode in ("cwd", "custom") and self.host_base:
|
|
5362
5776
|
payload["host_allowed_dirs"] = [self.host_base]
|
|
5363
|
-
# Controls and approvals
|
|
5364
|
-
if self.control_level in (1, 2, 3):
|
|
5365
|
-
payload["control_level"] = self.control_level
|
|
5366
|
-
# Auto-approve tools at L2: merge explicit auto_approve with trust lists so
|
|
5367
|
-
# "trust for this session" / "always trust" choices also suppress repeat server prompts.
|
|
5368
|
-
try:
|
|
5369
|
-
auto_tools: List[str] = []
|
|
5370
|
-
for seq in (self.auto_approve or [], self.trust_tools_session or [], self.trust_tools_always or []):
|
|
5371
|
-
for t in seq:
|
|
5372
|
-
if isinstance(t, str) and t.strip():
|
|
5373
|
-
k = t.strip()
|
|
5374
|
-
if k not in auto_tools:
|
|
5375
|
-
auto_tools.append(k)
|
|
5376
|
-
if auto_tools:
|
|
5377
|
-
payload["auto_approve"] = auto_tools
|
|
5378
|
-
except Exception:
|
|
5379
|
-
if self.auto_approve:
|
|
5380
|
-
payload["auto_approve"] = self.auto_approve
|
|
5381
|
-
|
|
5382
|
-
# Auto-approve run_command base commands at L2 (hybrid approval + trust).
|
|
5383
|
-
try:
|
|
5384
|
-
cmd_bases: List[str] = []
|
|
5385
|
-
for seq in (self.trust_cmds_session or [], self.trust_cmds_always or []):
|
|
5386
|
-
for c in seq:
|
|
5387
|
-
if isinstance(c, str) and c.strip():
|
|
5388
|
-
k = c.strip().lower()
|
|
5389
|
-
if k not in cmd_bases:
|
|
5390
|
-
cmd_bases.append(k)
|
|
5391
|
-
if cmd_bases:
|
|
5392
|
-
payload["auto_approve_command_bases"] = cmd_bases
|
|
5393
|
-
except Exception:
|
|
5394
|
-
pass
|
|
5395
|
-
# Reasoning effort (OpenAI reasoning models only; server will ignore for others).
|
|
5396
|
-
# Let the server decide whether xhigh is supported for the selected (or default) model.
|
|
5397
|
-
try:
|
|
5398
|
-
if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
|
|
5399
|
-
payload["reasoning_effort"] = self.reasoning_effort
|
|
5400
|
-
else:
|
|
5401
|
-
payload["reasoning_effort"] = "medium"
|
|
5402
|
-
except Exception:
|
|
5403
|
-
payload["reasoning_effort"] = "medium"
|
|
5777
|
+
# Controls and approvals
|
|
5778
|
+
if self.control_level in (1, 2, 3):
|
|
5779
|
+
payload["control_level"] = self.control_level
|
|
5780
|
+
# Auto-approve tools at L2: merge explicit auto_approve with trust lists so
|
|
5781
|
+
# "trust for this session" / "always trust" choices also suppress repeat server prompts.
|
|
5782
|
+
try:
|
|
5783
|
+
auto_tools: List[str] = []
|
|
5784
|
+
for seq in (self.auto_approve or [], self.trust_tools_session or [], self.trust_tools_always or []):
|
|
5785
|
+
for t in seq:
|
|
5786
|
+
if isinstance(t, str) and t.strip():
|
|
5787
|
+
k = t.strip()
|
|
5788
|
+
if k not in auto_tools:
|
|
5789
|
+
auto_tools.append(k)
|
|
5790
|
+
if auto_tools:
|
|
5791
|
+
payload["auto_approve"] = auto_tools
|
|
5792
|
+
except Exception:
|
|
5793
|
+
if self.auto_approve:
|
|
5794
|
+
payload["auto_approve"] = self.auto_approve
|
|
5795
|
+
|
|
5796
|
+
# Auto-approve run_command base commands at L2 (hybrid approval + trust).
|
|
5797
|
+
try:
|
|
5798
|
+
cmd_bases: List[str] = []
|
|
5799
|
+
for seq in (self.trust_cmds_session or [], self.trust_cmds_always or []):
|
|
5800
|
+
for c in seq:
|
|
5801
|
+
if isinstance(c, str) and c.strip():
|
|
5802
|
+
k = c.strip().lower()
|
|
5803
|
+
if k not in cmd_bases:
|
|
5804
|
+
cmd_bases.append(k)
|
|
5805
|
+
if cmd_bases:
|
|
5806
|
+
payload["auto_approve_command_bases"] = cmd_bases
|
|
5807
|
+
except Exception:
|
|
5808
|
+
pass
|
|
5809
|
+
# Reasoning effort (OpenAI reasoning models only; server will ignore for others).
|
|
5810
|
+
# Let the server decide whether xhigh is supported for the selected (or default) model.
|
|
5811
|
+
try:
|
|
5812
|
+
if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
|
|
5813
|
+
payload["reasoning_effort"] = self.reasoning_effort
|
|
5814
|
+
else:
|
|
5815
|
+
payload["reasoning_effort"] = "medium"
|
|
5816
|
+
except Exception:
|
|
5817
|
+
payload["reasoning_effort"] = "medium"
|
|
5404
5818
|
|
|
5405
5819
|
# Anthropic thinking-mode budget (server ignores unless model ends with -thinking)
|
|
5406
5820
|
try:
|
|
5407
5821
|
if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
|
|
5408
|
-
payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
|
|
5409
|
-
except Exception:
|
|
5410
|
-
pass
|
|
5411
|
-
|
|
5412
|
-
|
|
5413
|
-
|
|
5414
|
-
|
|
5415
|
-
|
|
5416
|
-
|
|
5822
|
+
payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
|
|
5823
|
+
except Exception:
|
|
5824
|
+
pass
|
|
5825
|
+
|
|
5826
|
+
# Anthropic effort (Opus 4.6/4.5). Default to high.
|
|
5827
|
+
try:
|
|
5828
|
+
ae = getattr(self, "anthropic_effort", None)
|
|
5829
|
+
ae2 = str(ae or "high").strip().lower()
|
|
5830
|
+
if ae2 in ("low", "medium", "high", "max"):
|
|
5831
|
+
payload["anthropic_effort"] = ae2
|
|
5832
|
+
except Exception:
|
|
5833
|
+
payload["anthropic_effort"] = "high"
|
|
5834
|
+
# Anthropic prompt cache TTL (server override): send when set to 5m or 1h
|
|
5835
|
+
try:
|
|
5836
|
+
if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
|
|
5837
|
+
payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
|
|
5838
|
+
except Exception:
|
|
5839
|
+
pass
|
|
5417
5840
|
|
|
5418
5841
|
# Text verbosity and tool preambles preference (UI hints for the server)
|
|
5419
5842
|
try:
|
|
@@ -5421,8 +5844,10 @@ class ChatCLI:
|
|
|
5421
5844
|
payload["text_verbosity"] = self.text_verbosity
|
|
5422
5845
|
except Exception:
|
|
5423
5846
|
pass
|
|
5847
|
+
# Preambles are a GPT-5-only UX toggle.
|
|
5424
5848
|
try:
|
|
5425
|
-
|
|
5849
|
+
if self._supports_preambles(self.model):
|
|
5850
|
+
payload["preambles_enabled"] = bool(self.preambles_enabled)
|
|
5426
5851
|
except Exception:
|
|
5427
5852
|
pass
|
|
5428
5853
|
|
|
@@ -5519,23 +5944,23 @@ class ChatCLI:
|
|
|
5519
5944
|
headers["X-Request-Timeout"] = str(int(req_timeout_hint))
|
|
5520
5945
|
except Exception:
|
|
5521
5946
|
pass
|
|
5522
|
-
# If using a Kimi model, include provider-native messages to preserve reasoning_content
|
|
5523
|
-
try:
|
|
5524
|
-
if isinstance(self.model, str) and self.model.startswith("kimi-"):
|
|
5525
|
-
req_payload = dict(req_payload)
|
|
5526
|
-
req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
|
|
5527
|
-
except Exception:
|
|
5528
|
-
pass
|
|
5529
|
-
# If using a Gemini model, include provider-native contents to preserve thought signatures
|
|
5530
|
-
# and strict tool-call chains across HTTP turns.
|
|
5531
|
-
try:
|
|
5532
|
-
if isinstance(self.model, str) and self.model.startswith("gemini-"):
|
|
5533
|
-
req_payload = dict(req_payload)
|
|
5534
|
-
hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
|
|
5535
|
-
if hist:
|
|
5536
|
-
req_payload["raw_provider_messages"] = hist
|
|
5537
|
-
except Exception:
|
|
5538
|
-
pass
|
|
5947
|
+
# If using a Kimi model, include provider-native messages to preserve reasoning_content
|
|
5948
|
+
try:
|
|
5949
|
+
if isinstance(self.model, str) and self.model.startswith("kimi-"):
|
|
5950
|
+
req_payload = dict(req_payload)
|
|
5951
|
+
req_payload["raw_provider_messages"] = self._build_kimi_raw_messages(user_input)
|
|
5952
|
+
except Exception:
|
|
5953
|
+
pass
|
|
5954
|
+
# If using a Gemini model, include provider-native contents to preserve thought signatures
|
|
5955
|
+
# and strict tool-call chains across HTTP turns.
|
|
5956
|
+
try:
|
|
5957
|
+
if isinstance(self.model, str) and self.model.startswith("gemini-"):
|
|
5958
|
+
req_payload = dict(req_payload)
|
|
5959
|
+
hist = self._normalize_gemini_raw_messages(self._gemini_raw_history)
|
|
5960
|
+
if hist:
|
|
5961
|
+
req_payload["raw_provider_messages"] = hist
|
|
5962
|
+
except Exception:
|
|
5963
|
+
pass
|
|
5539
5964
|
async with httpx.AsyncClient(timeout=http_timeout, cookies=self.cookies) as client:
|
|
5540
5965
|
async with client.stream("POST", self.stream_url, json=req_payload, headers=headers, follow_redirects=True) as resp:
|
|
5541
5966
|
if resp.status_code == 429:
|
|
@@ -5639,18 +6064,63 @@ class ChatCLI:
|
|
|
5639
6064
|
self._rawlog_write(msg)
|
|
5640
6065
|
except Exception:
|
|
5641
6066
|
pass
|
|
5642
|
-
# Idle "thinking" indicator shown while waiting for the next event (first tokens or next tool call)
|
|
5643
|
-
indicator_task = None
|
|
5644
|
-
indicator_active = False
|
|
5645
|
-
indicator_started = False # used only to adjust leading newline behavior on first assistant header
|
|
5646
|
-
# Track whether we're currently positioned at the start of a fresh line.
|
|
5647
|
-
# This prevents double-newlines between back-to-back tool events.
|
|
5648
|
-
at_line_start = True
|
|
5649
|
-
|
|
5650
|
-
|
|
5651
|
-
|
|
5652
|
-
|
|
5653
|
-
|
|
6067
|
+
# Idle "thinking" indicator shown while waiting for the next event (first tokens or next tool call)
|
|
6068
|
+
indicator_task = None
|
|
6069
|
+
indicator_active = False
|
|
6070
|
+
indicator_started = False # used only to adjust leading newline behavior on first assistant header
|
|
6071
|
+
# Track whether we're currently positioned at the start of a fresh line.
|
|
6072
|
+
# This prevents double-newlines between back-to-back tool events.
|
|
6073
|
+
at_line_start = True
|
|
6074
|
+
|
|
6075
|
+
# --- Tool call in-place status (issuelist.md #7) ---
|
|
6076
|
+
# We render a single transient line for the current tool call (no trailing newline)
|
|
6077
|
+
# so the later tool.result SUCCESS/FAILURE line can replace it in-place.
|
|
6078
|
+
tool_status_active = False
|
|
6079
|
+
tool_status_call_id = None
|
|
6080
|
+
|
|
6081
|
+
def _tool_status_clear_line() -> None:
|
|
6082
|
+
"""Clear the current line (best-effort) and return to column 0."""
|
|
6083
|
+
nonlocal at_line_start
|
|
6084
|
+
try:
|
|
6085
|
+
sys.stdout.write("\r\x1b[2K")
|
|
6086
|
+
sys.stdout.flush()
|
|
6087
|
+
except Exception:
|
|
6088
|
+
pass
|
|
6089
|
+
at_line_start = True
|
|
6090
|
+
|
|
6091
|
+
def _tool_status_show(call_id: Any, line: str) -> None:
|
|
6092
|
+
"""Show the transient tool status line (no newline)."""
|
|
6093
|
+
nonlocal tool_status_active, tool_status_call_id, at_line_start
|
|
6094
|
+
if not self.show_tool_calls:
|
|
6095
|
+
return
|
|
6096
|
+
tool_status_active = True
|
|
6097
|
+
tool_status_call_id = str(call_id) if call_id is not None else None
|
|
6098
|
+
try:
|
|
6099
|
+
if not at_line_start:
|
|
6100
|
+
sys.stdout.write("\n")
|
|
6101
|
+
sys.stdout.write("\r\x1b[2K" + str(line))
|
|
6102
|
+
sys.stdout.flush()
|
|
6103
|
+
at_line_start = False
|
|
6104
|
+
except Exception:
|
|
6105
|
+
# Fallback: degrade to a normal printed line
|
|
6106
|
+
try:
|
|
6107
|
+
self.ui.print(str(line))
|
|
6108
|
+
except Exception:
|
|
6109
|
+
pass
|
|
6110
|
+
at_line_start = True
|
|
6111
|
+
|
|
6112
|
+
def _tool_status_stop() -> None:
|
|
6113
|
+
"""Remove the transient tool status line and clear tracking."""
|
|
6114
|
+
nonlocal tool_status_active, tool_status_call_id
|
|
6115
|
+
if tool_status_active:
|
|
6116
|
+
_tool_status_clear_line()
|
|
6117
|
+
tool_status_active = False
|
|
6118
|
+
tool_status_call_id = None
|
|
6119
|
+
# Mode: animate or static (default static for stability)
|
|
6120
|
+
try:
|
|
6121
|
+
_animate_indicator = (os.getenv("HENOSIS_THINKING_ANIMATE", "").strip().lower() in ("1", "true", "yes", "on"))
|
|
6122
|
+
except Exception:
|
|
6123
|
+
_animate_indicator = False
|
|
5654
6124
|
|
|
5655
6125
|
async def _thinking_indicator_loop(chosen_word: str, spacing: int = 3) -> None:
|
|
5656
6126
|
"""Animate a transient thinking word on a single line until indicator_active becomes False.
|
|
@@ -5689,8 +6159,8 @@ class ChatCLI:
|
|
|
5689
6159
|
except Exception:
|
|
5690
6160
|
pass
|
|
5691
6161
|
|
|
5692
|
-
async def _indicator_start() -> None:
|
|
5693
|
-
nonlocal indicator_task, indicator_active, indicator_started, at_line_start
|
|
6162
|
+
async def _indicator_start() -> None:
|
|
6163
|
+
nonlocal indicator_task, indicator_active, indicator_started, at_line_start
|
|
5694
6164
|
# Choose a random word and spacing each start
|
|
5695
6165
|
word_bank = list(self._thinking_words or ["thinking", "working..."])
|
|
5696
6166
|
if not word_bank:
|
|
@@ -5718,28 +6188,28 @@ class ChatCLI:
|
|
|
5718
6188
|
c = colors[i % len(colors)]
|
|
5719
6189
|
out_chars.append(f"\x1b[38;5;{c}m{ch}\x1b[0m")
|
|
5720
6190
|
line = " " + joiner.join(out_chars) + " "
|
|
5721
|
-
# Start on a dedicated new line so we never clobber prior output.
|
|
5722
|
-
# If we're already at a fresh line, don't emit an extra newline (prevents
|
|
5723
|
-
# visible blank lines between back-to-back tool events).
|
|
5724
|
-
if not at_line_start:
|
|
5725
|
-
sys.stdout.write("\n")
|
|
5726
|
-
sys.stdout.write("\r\x1b[2K" + line)
|
|
5727
|
-
sys.stdout.flush()
|
|
5728
|
-
at_line_start = False
|
|
6191
|
+
# Start on a dedicated new line so we never clobber prior output.
|
|
6192
|
+
# If we're already at a fresh line, don't emit an extra newline (prevents
|
|
6193
|
+
# visible blank lines between back-to-back tool events).
|
|
6194
|
+
if not at_line_start:
|
|
6195
|
+
sys.stdout.write("\n")
|
|
6196
|
+
sys.stdout.write("\r\x1b[2K" + line)
|
|
6197
|
+
sys.stdout.flush()
|
|
6198
|
+
at_line_start = False
|
|
5729
6199
|
# File debug
|
|
5730
6200
|
try:
|
|
5731
6201
|
self.ui.debug_log(f"indicator.start word='{chosen}' animate={_animate_indicator}")
|
|
5732
6202
|
except Exception:
|
|
5733
6203
|
pass
|
|
5734
|
-
except Exception:
|
|
5735
|
-
try:
|
|
5736
|
-
if not at_line_start:
|
|
5737
|
-
sys.stdout.write("\n")
|
|
5738
|
-
sys.stdout.write("\r\x1b[2K" + (" " + joiner.join(list(str(chosen))) + " "))
|
|
5739
|
-
sys.stdout.flush()
|
|
5740
|
-
at_line_start = False
|
|
5741
|
-
except Exception:
|
|
5742
|
-
pass
|
|
6204
|
+
except Exception:
|
|
6205
|
+
try:
|
|
6206
|
+
if not at_line_start:
|
|
6207
|
+
sys.stdout.write("\n")
|
|
6208
|
+
sys.stdout.write("\r\x1b[2K" + (" " + joiner.join(list(str(chosen))) + " "))
|
|
6209
|
+
sys.stdout.flush()
|
|
6210
|
+
at_line_start = False
|
|
6211
|
+
except Exception:
|
|
6212
|
+
pass
|
|
5743
6213
|
indicator_started = True
|
|
5744
6214
|
if _animate_indicator:
|
|
5745
6215
|
try:
|
|
@@ -5749,8 +6219,8 @@ class ChatCLI:
|
|
|
5749
6219
|
indicator_task = None
|
|
5750
6220
|
indicator_active = False
|
|
5751
6221
|
|
|
5752
|
-
async def _indicator_stop(clear: bool = False) -> None:
|
|
5753
|
-
nonlocal indicator_task, indicator_active, indicator_started, at_line_start
|
|
6222
|
+
async def _indicator_stop(clear: bool = False) -> None:
|
|
6223
|
+
nonlocal indicator_task, indicator_active, indicator_started, at_line_start
|
|
5754
6224
|
# Only clear the line if an indicator was actually started.
|
|
5755
6225
|
was_started = bool(indicator_started)
|
|
5756
6226
|
indicator_active = False
|
|
@@ -5768,21 +6238,21 @@ class ChatCLI:
|
|
|
5768
6238
|
finally:
|
|
5769
6239
|
indicator_task = None
|
|
5770
6240
|
# Default to not clearing to avoid erasing streamed content lines
|
|
5771
|
-
if was_started and clear:
|
|
5772
|
-
try:
|
|
5773
|
-
sys.stdout.write("\r\x1b[2K")
|
|
5774
|
-
sys.stdout.flush()
|
|
5775
|
-
at_line_start = True
|
|
5776
|
-
except Exception:
|
|
5777
|
-
pass
|
|
5778
|
-
elif was_started:
|
|
5779
|
-
# Move to the next line to separate subsequent output
|
|
5780
|
-
try:
|
|
5781
|
-
sys.stdout.write("\n")
|
|
5782
|
-
sys.stdout.flush()
|
|
5783
|
-
at_line_start = True
|
|
5784
|
-
except Exception:
|
|
5785
|
-
pass
|
|
6241
|
+
if was_started and clear:
|
|
6242
|
+
try:
|
|
6243
|
+
sys.stdout.write("\r\x1b[2K")
|
|
6244
|
+
sys.stdout.flush()
|
|
6245
|
+
at_line_start = True
|
|
6246
|
+
except Exception:
|
|
6247
|
+
pass
|
|
6248
|
+
elif was_started:
|
|
6249
|
+
# Move to the next line to separate subsequent output
|
|
6250
|
+
try:
|
|
6251
|
+
sys.stdout.write("\n")
|
|
6252
|
+
sys.stdout.flush()
|
|
6253
|
+
at_line_start = True
|
|
6254
|
+
except Exception:
|
|
6255
|
+
pass
|
|
5786
6256
|
# Reset started flag after stopping
|
|
5787
6257
|
indicator_started = False
|
|
5788
6258
|
try:
|
|
@@ -5812,26 +6282,26 @@ class ChatCLI:
|
|
|
5812
6282
|
except json.JSONDecodeError:
|
|
5813
6283
|
data = {"_raw": data_raw}
|
|
5814
6284
|
|
|
5815
|
-
if event == "session.started":
|
|
5816
|
-
# Keep indicator until first token; do not stop here
|
|
5817
|
-
session_id = data.get("session_id")
|
|
5818
|
-
lvl = data.get("level")
|
|
5819
|
-
scope = data.get("fs_scope")
|
|
5820
|
-
self.ui.print(f"[session] id={session_id} level={lvl} scope={scope}", style=self.ui.theme["dim"])
|
|
5821
|
-
self._log_line({"event": "session.started", "server_session_id": session_id, "level": lvl, "fs_scope": scope})
|
|
5822
|
-
# Record the server-authoritative level for this stream so approvals work
|
|
5823
|
-
# even when the user left control_level as "server default".
|
|
5824
|
-
try:
|
|
5825
|
-
if isinstance(lvl, int):
|
|
5826
|
-
self._current_turn["level"] = int(lvl)
|
|
5827
|
-
elif isinstance(lvl, str) and str(lvl).strip().isdigit():
|
|
5828
|
-
self._current_turn["level"] = int(str(lvl).strip())
|
|
5829
|
-
except Exception:
|
|
5830
|
-
pass
|
|
5831
|
-
try:
|
|
5832
|
-
await self._ws_broadcast("session.started", data)
|
|
5833
|
-
except Exception:
|
|
5834
|
-
pass
|
|
6285
|
+
if event == "session.started":
|
|
6286
|
+
# Keep indicator until first token; do not stop here
|
|
6287
|
+
session_id = data.get("session_id")
|
|
6288
|
+
lvl = data.get("level")
|
|
6289
|
+
scope = data.get("fs_scope")
|
|
6290
|
+
self.ui.print(f"[session] id={session_id} level={lvl} scope={scope}", style=self.ui.theme["dim"])
|
|
6291
|
+
self._log_line({"event": "session.started", "server_session_id": session_id, "level": lvl, "fs_scope": scope})
|
|
6292
|
+
# Record the server-authoritative level for this stream so approvals work
|
|
6293
|
+
# even when the user left control_level as "server default".
|
|
6294
|
+
try:
|
|
6295
|
+
if isinstance(lvl, int):
|
|
6296
|
+
self._current_turn["level"] = int(lvl)
|
|
6297
|
+
elif isinstance(lvl, str) and str(lvl).strip().isdigit():
|
|
6298
|
+
self._current_turn["level"] = int(str(lvl).strip())
|
|
6299
|
+
except Exception:
|
|
6300
|
+
pass
|
|
6301
|
+
try:
|
|
6302
|
+
await self._ws_broadcast("session.started", data)
|
|
6303
|
+
except Exception:
|
|
6304
|
+
pass
|
|
5835
6305
|
try:
|
|
5836
6306
|
self._current_turn["session_id"] = session_id
|
|
5837
6307
|
except Exception:
|
|
@@ -5842,16 +6312,16 @@ class ChatCLI:
|
|
|
5842
6312
|
pass
|
|
5843
6313
|
continue
|
|
5844
6314
|
|
|
5845
|
-
elif event == "message.delta":
|
|
5846
|
-
# Stop any transient indicator before printing content and clear the line
|
|
5847
|
-
try:
|
|
5848
|
-
await _indicator_stop(clear=True)
|
|
5849
|
-
except Exception:
|
|
5850
|
-
pass
|
|
5851
|
-
# Indicator line cleared; we're now at the start of a fresh line.
|
|
5852
|
-
at_line_start = True
|
|
5853
|
-
text = data.get("text", "")
|
|
5854
|
-
if text:
|
|
6315
|
+
elif event == "message.delta":
|
|
6316
|
+
# Stop any transient indicator before printing content and clear the line
|
|
6317
|
+
try:
|
|
6318
|
+
await _indicator_stop(clear=True)
|
|
6319
|
+
except Exception:
|
|
6320
|
+
pass
|
|
6321
|
+
# Indicator line cleared; we're now at the start of a fresh line.
|
|
6322
|
+
at_line_start = True
|
|
6323
|
+
text = data.get("text", "")
|
|
6324
|
+
if text:
|
|
5855
6325
|
try:
|
|
5856
6326
|
_deltas_total += 1
|
|
5857
6327
|
except Exception:
|
|
@@ -5886,23 +6356,23 @@ class ChatCLI:
|
|
|
5886
6356
|
print(str(model_label) + ": ", end="", flush=True)
|
|
5887
6357
|
except Exception:
|
|
5888
6358
|
pass
|
|
5889
|
-
header_printed = True
|
|
5890
|
-
at_line_start = False
|
|
5891
|
-
try:
|
|
5892
|
-
self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
|
|
5893
|
-
except Exception:
|
|
5894
|
-
pass
|
|
6359
|
+
header_printed = True
|
|
6360
|
+
at_line_start = False
|
|
6361
|
+
try:
|
|
6362
|
+
self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
|
|
6363
|
+
except Exception:
|
|
6364
|
+
pass
|
|
5895
6365
|
assistant_buf.append(text)
|
|
5896
6366
|
# Print the token delta raw to avoid any wrapping/markup side-effects
|
|
5897
6367
|
try:
|
|
5898
6368
|
self.ui.print(text, style=self.ui.theme["assistant"], end="")
|
|
5899
|
-
except Exception:
|
|
5900
|
-
try:
|
|
5901
|
-
print(str(text), end="", flush=True)
|
|
5902
|
-
except Exception:
|
|
5903
|
-
pass
|
|
5904
|
-
at_line_start = False
|
|
5905
|
-
# Deep debug: show each delta's size/preview
|
|
6369
|
+
except Exception:
|
|
6370
|
+
try:
|
|
6371
|
+
print(str(text), end="", flush=True)
|
|
6372
|
+
except Exception:
|
|
6373
|
+
pass
|
|
6374
|
+
at_line_start = False
|
|
6375
|
+
# Deep debug: show each delta's size/preview
|
|
5906
6376
|
try:
|
|
5907
6377
|
if DEBUG_SSE:
|
|
5908
6378
|
prev = text[:40].replace("\n", "\\n")
|
|
@@ -5921,44 +6391,68 @@ class ChatCLI:
|
|
|
5921
6391
|
except Exception:
|
|
5922
6392
|
pass
|
|
5923
6393
|
|
|
5924
|
-
elif event == "tool.call":
|
|
5925
|
-
# Ensure any prior indicator state is reset cleanly, then restart
|
|
5926
|
-
# a fresh indicator while waiting for the tool to run.
|
|
5927
|
-
try:
|
|
5928
|
-
await _indicator_stop(clear=True)
|
|
5929
|
-
except Exception:
|
|
5930
|
-
pass
|
|
5931
|
-
|
|
5932
|
-
# If we were mid-line (e.g., streamed assistant text), break cleanly before
|
|
5933
|
-
# showing the transient tool-wait indicator.
|
|
5934
|
-
if not at_line_start:
|
|
5935
|
-
try:
|
|
5936
|
-
self.ui.print()
|
|
5937
|
-
except Exception:
|
|
5938
|
-
try:
|
|
5939
|
-
print()
|
|
5940
|
-
except Exception:
|
|
5941
|
-
pass
|
|
5942
|
-
at_line_start = True
|
|
5943
|
-
|
|
5944
|
-
name = data.get("name")
|
|
5945
|
-
args = data.get("args", {}) or {}
|
|
5946
|
-
call_id = data.get("call_id")
|
|
6394
|
+
elif event == "tool.call":
|
|
6395
|
+
# Ensure any prior indicator state is reset cleanly, then restart
|
|
6396
|
+
# a fresh indicator while waiting for the tool to run.
|
|
6397
|
+
try:
|
|
6398
|
+
await _indicator_stop(clear=True)
|
|
6399
|
+
except Exception:
|
|
6400
|
+
pass
|
|
6401
|
+
|
|
6402
|
+
# If we were mid-line (e.g., streamed assistant text), break cleanly before
|
|
6403
|
+
# showing the transient tool-wait indicator.
|
|
6404
|
+
if not at_line_start:
|
|
6405
|
+
try:
|
|
6406
|
+
self.ui.print()
|
|
6407
|
+
except Exception:
|
|
6408
|
+
try:
|
|
6409
|
+
print()
|
|
6410
|
+
except Exception:
|
|
6411
|
+
pass
|
|
6412
|
+
at_line_start = True
|
|
6413
|
+
|
|
6414
|
+
name = data.get("name")
|
|
6415
|
+
args = data.get("args", {}) or {}
|
|
6416
|
+
call_id = data.get("call_id")
|
|
5947
6417
|
try:
|
|
5948
6418
|
self.ui.debug_log(f"tool.call name='{name}' call_id={call_id}")
|
|
5949
6419
|
except Exception:
|
|
5950
6420
|
pass
|
|
5951
6421
|
|
|
5952
|
-
#
|
|
5953
|
-
#
|
|
5954
|
-
# While the tool executes (server or client), show a subtle thinking
|
|
5955
|
-
# indicator so users see progress during potentially long operations.
|
|
6422
|
+
# issuelist.md #7:
|
|
6423
|
+
# Show a transient [RUNNING] line and replace it in-place when tool.result arrives.
|
|
5956
6424
|
try:
|
|
5957
|
-
#
|
|
5958
|
-
|
|
5959
|
-
await _indicator_start()
|
|
6425
|
+
# Clear any previous transient status line (shouldn't happen, but keep stable)
|
|
6426
|
+
_tool_status_stop()
|
|
5960
6427
|
except Exception:
|
|
5961
6428
|
pass
|
|
6429
|
+
try:
|
|
6430
|
+
tool_name = str(name or "").strip()
|
|
6431
|
+
label = self._tool_concise_label(
|
|
6432
|
+
tool_name,
|
|
6433
|
+
args if isinstance(args, dict) else {},
|
|
6434
|
+
None,
|
|
6435
|
+
)
|
|
6436
|
+
try:
|
|
6437
|
+
model_prefix = (
|
|
6438
|
+
self._current_turn.get("model")
|
|
6439
|
+
or self._last_used_model
|
|
6440
|
+
or self.model
|
|
6441
|
+
or "(server default)"
|
|
6442
|
+
)
|
|
6443
|
+
except Exception:
|
|
6444
|
+
model_prefix = self.model or "(server default)"
|
|
6445
|
+
ORANGE = "\x1b[38;5;214m"
|
|
6446
|
+
WHITE = "\x1b[97m"
|
|
6447
|
+
RESET = "\x1b[0m"
|
|
6448
|
+
status_line = f"{ORANGE}{model_prefix}{RESET}: {ORANGE}[RUNNING]{RESET} {WHITE}{label}{RESET}"
|
|
6449
|
+
_tool_status_show(call_id, status_line)
|
|
6450
|
+
except Exception:
|
|
6451
|
+
# Last-resort fallback: print something rather than crash streaming.
|
|
6452
|
+
try:
|
|
6453
|
+
self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
|
|
6454
|
+
except Exception:
|
|
6455
|
+
pass
|
|
5962
6456
|
# Count tool calls
|
|
5963
6457
|
try:
|
|
5964
6458
|
tool_calls += 1
|
|
@@ -5982,6 +6476,11 @@ class ChatCLI:
|
|
|
5982
6476
|
pass
|
|
5983
6477
|
|
|
5984
6478
|
elif event == "approval.request":
|
|
6479
|
+
# Don't let the transient [RUNNING] line collide with interactive prompts.
|
|
6480
|
+
try:
|
|
6481
|
+
_tool_status_stop()
|
|
6482
|
+
except Exception:
|
|
6483
|
+
pass
|
|
5985
6484
|
# First reply wins (web or CLI)
|
|
5986
6485
|
await self._handle_approval_request(client, session_id, data)
|
|
5987
6486
|
continue
|
|
@@ -6030,23 +6529,35 @@ class ChatCLI:
|
|
|
6030
6529
|
name = str(data.get("name"))
|
|
6031
6530
|
result = data.get("result", {}) or {}
|
|
6032
6531
|
call_id = data.get("call_id")
|
|
6033
|
-
#
|
|
6034
|
-
|
|
6035
|
-
|
|
6036
|
-
|
|
6037
|
-
|
|
6038
|
-
|
|
6039
|
-
|
|
6040
|
-
|
|
6041
|
-
|
|
6042
|
-
|
|
6043
|
-
|
|
6044
|
-
|
|
6045
|
-
|
|
6046
|
-
|
|
6047
|
-
|
|
6048
|
-
|
|
6049
|
-
|
|
6532
|
+
# If we previously rendered a transient [RUNNING] line for this tool call,
|
|
6533
|
+
# clear it now so the SUCCESS/FAILURE line prints in the same place.
|
|
6534
|
+
try:
|
|
6535
|
+
if tool_status_active:
|
|
6536
|
+
# Best-effort match on call_id (some providers may omit it).
|
|
6537
|
+
if (tool_status_call_id is None) or (call_id is None) or (str(call_id) == str(tool_status_call_id)):
|
|
6538
|
+
_tool_status_stop()
|
|
6539
|
+
except Exception:
|
|
6540
|
+
try:
|
|
6541
|
+
_tool_status_stop()
|
|
6542
|
+
except Exception:
|
|
6543
|
+
pass
|
|
6544
|
+
# Stop any indicator before rendering results
|
|
6545
|
+
try:
|
|
6546
|
+
await _indicator_stop(clear=True)
|
|
6547
|
+
except Exception:
|
|
6548
|
+
pass
|
|
6549
|
+
# Ensure tool result starts on a fresh line if assistant text was mid-line.
|
|
6550
|
+
# Don't rely on assistant_buf ending with "\n" because UI.ensure_newline()
|
|
6551
|
+
# prints without mutating the buffer, which can cause repeated blank lines.
|
|
6552
|
+
if not at_line_start:
|
|
6553
|
+
try:
|
|
6554
|
+
self.ui.print()
|
|
6555
|
+
except Exception:
|
|
6556
|
+
try:
|
|
6557
|
+
print()
|
|
6558
|
+
except Exception:
|
|
6559
|
+
pass
|
|
6560
|
+
at_line_start = True
|
|
6050
6561
|
# Concise default: one professional, natural-language line per tool call.
|
|
6051
6562
|
if not self.ui.verbose:
|
|
6052
6563
|
try:
|
|
@@ -6102,18 +6613,18 @@ class ChatCLI:
|
|
|
6102
6613
|
except Exception:
|
|
6103
6614
|
# Fall back to legacy renderer on unexpected issues
|
|
6104
6615
|
self._render_tool_result(name, result, call_id=call_id)
|
|
6105
|
-
else:
|
|
6106
|
-
# Verbose mode retains the richer summary with previews
|
|
6107
|
-
self._render_tool_result(name, result, call_id=call_id)
|
|
6108
|
-
|
|
6109
|
-
# Tool result output is line-oriented; after rendering we should be positioned
|
|
6110
|
-
# at the start of a fresh line so the next tool.call indicator doesn't insert
|
|
6111
|
-
# an extra blank line.
|
|
6112
|
-
at_line_start = True
|
|
6113
|
-
try:
|
|
6114
|
-
await self._ws_broadcast("tool.result", {"name": name, "result": result, "call_id": call_id})
|
|
6115
|
-
except Exception:
|
|
6116
|
-
pass
|
|
6616
|
+
else:
|
|
6617
|
+
# Verbose mode retains the richer summary with previews
|
|
6618
|
+
self._render_tool_result(name, result, call_id=call_id)
|
|
6619
|
+
|
|
6620
|
+
# Tool result output is line-oriented; after rendering we should be positioned
|
|
6621
|
+
# at the start of a fresh line so the next tool.call indicator doesn't insert
|
|
6622
|
+
# an extra blank line.
|
|
6623
|
+
at_line_start = True
|
|
6624
|
+
try:
|
|
6625
|
+
await self._ws_broadcast("tool.result", {"name": name, "result": result, "call_id": call_id})
|
|
6626
|
+
except Exception:
|
|
6627
|
+
pass
|
|
6117
6628
|
# For Kimi, append provider-native tool result to raw history so it's threaded correctly
|
|
6118
6629
|
try:
|
|
6119
6630
|
if bool(getattr(self, "retain_native_tool_results", False)) and isinstance(self.model, str) and self.model.startswith("kimi-") and call_id:
|
|
@@ -6136,7 +6647,7 @@ class ChatCLI:
|
|
|
6136
6647
|
pass
|
|
6137
6648
|
# Do not auto-restart the indicator here; wait for the next model event
|
|
6138
6649
|
|
|
6139
|
-
elif event == "tool.dispatch":
|
|
6650
|
+
elif event == "tool.dispatch":
|
|
6140
6651
|
# Client-executed tool flow
|
|
6141
6652
|
if not HAS_LOCAL_TOOLS:
|
|
6142
6653
|
self.ui.warn("Received tool.dispatch but local tools are unavailable (henosis_cli_tools not installed)")
|
|
@@ -6153,6 +6664,17 @@ class ChatCLI:
|
|
|
6153
6664
|
args = data.get("args", {}) or {}
|
|
6154
6665
|
job_token = data.get("job_token")
|
|
6155
6666
|
reqp = data.get("requested_policy", {}) or {}
|
|
6667
|
+
|
|
6668
|
+
# Track in-flight dispatch so Ctrl+C can cancel quickly.
|
|
6669
|
+
try:
|
|
6670
|
+
self._inflight_dispatch = {
|
|
6671
|
+
"session_id": session_id_d,
|
|
6672
|
+
"call_id": call_id,
|
|
6673
|
+
"job_token": job_token,
|
|
6674
|
+
"name": name,
|
|
6675
|
+
}
|
|
6676
|
+
except Exception:
|
|
6677
|
+
pass
|
|
6156
6678
|
|
|
6157
6679
|
if DEBUG_SSE:
|
|
6158
6680
|
self.ui.print(f"[debug] dispatch name={name} call_id={call_id}", style=self.ui.theme["dim"])
|
|
@@ -6163,16 +6685,16 @@ class ChatCLI:
|
|
|
6163
6685
|
except Exception:
|
|
6164
6686
|
pass
|
|
6165
6687
|
|
|
6166
|
-
# Level gating and CLI approvals (Level 2)
|
|
6167
|
-
try:
|
|
6168
|
-
lvl = int(self.control_level) if isinstance(self.control_level, int) else None
|
|
6169
|
-
if lvl is None:
|
|
6170
|
-
# Prefer the server-reported level from session.started
|
|
6171
|
-
sl = self._current_turn.get("level") if isinstance(self._current_turn, dict) else None
|
|
6172
|
-
if isinstance(sl, int):
|
|
6173
|
-
lvl = int(sl)
|
|
6174
|
-
except Exception:
|
|
6175
|
-
lvl = None
|
|
6688
|
+
# Level gating and CLI approvals (Level 2)
|
|
6689
|
+
try:
|
|
6690
|
+
lvl = int(self.control_level) if isinstance(self.control_level, int) else None
|
|
6691
|
+
if lvl is None:
|
|
6692
|
+
# Prefer the server-reported level from session.started
|
|
6693
|
+
sl = self._current_turn.get("level") if isinstance(self._current_turn, dict) else None
|
|
6694
|
+
if isinstance(sl, int):
|
|
6695
|
+
lvl = int(sl)
|
|
6696
|
+
except Exception:
|
|
6697
|
+
lvl = None
|
|
6176
6698
|
# Hard block at Level 1 for anything other than read/list
|
|
6177
6699
|
if lvl == 1:
|
|
6178
6700
|
disallowed = str(name) not in ("read_file", "list_dir")
|
|
@@ -6313,84 +6835,84 @@ class ChatCLI:
|
|
|
6313
6835
|
result = local_append_file(args.get("path", ""), args.get("content", ""), policy)
|
|
6314
6836
|
elif name == "list_dir":
|
|
6315
6837
|
result = local_list_dir(args.get("path", ""), policy)
|
|
6316
|
-
elif name == "run_command":
|
|
6317
|
-
# Command allow policy:
|
|
6318
|
-
# - L1: blocked earlier
|
|
6319
|
-
# - L2: approval required; once approved, allow any base command
|
|
6320
|
-
# - L3: no approval; allow any base command
|
|
6321
|
-
# Use '*' wildcard (supported by henosis_cli_tools.run_command).
|
|
6322
|
-
if lvl in (2, 3):
|
|
6323
|
-
allow_csv = "*"
|
|
6324
|
-
else:
|
|
6325
|
-
# Legacy: intersect server + local allowlists
|
|
6326
|
-
req_allow = (reqp.get("command_allow_csv") or "").strip()
|
|
6327
|
-
local_allow = os.getenv("HENOSIS_ALLOW_COMMANDS", "")
|
|
6328
|
-
if req_allow and local_allow:
|
|
6329
|
-
req_set = {c.strip().lower() for c in req_allow.split(",") if c.strip()}
|
|
6330
|
-
loc_set = {c.strip().lower() for c in local_allow.split(",") if c.strip()}
|
|
6331
|
-
allow_csv = ",".join(sorted(req_set & loc_set))
|
|
6332
|
-
else:
|
|
6333
|
-
allow_csv = local_allow or req_allow or ""
|
|
6334
|
-
# Include trusted commands from CLI settings (session + always)
|
|
6335
|
-
try:
|
|
6336
|
-
allow_set = {c.strip().lower() for c in allow_csv.split(",") if c.strip()}
|
|
6337
|
-
for k in (self.trust_cmds_session or []):
|
|
6338
|
-
allow_set.add(str(k).strip().lower())
|
|
6339
|
-
for k in (self.trust_cmds_always or []):
|
|
6340
|
-
allow_set.add(str(k).strip().lower())
|
|
6341
|
-
allow_csv = ",".join(sorted(allow_set))
|
|
6342
|
-
except Exception:
|
|
6343
|
-
pass
|
|
6344
|
-
timeout = args.get("timeout", None)
|
|
6345
|
-
result = local_run_command(args.get("cmd", ""), policy, cwd=args.get("cwd", "."), timeout=timeout, allow_commands_csv=allow_csv)
|
|
6346
|
-
# Legacy allowlist retry logic removed for L2/L3 (we allow '*').
|
|
6347
|
-
elif name == "apply_patch":
|
|
6348
|
-
result = local_apply_patch(
|
|
6349
|
-
patch=args.get("patch", ""),
|
|
6350
|
-
policy=policy,
|
|
6351
|
-
cwd=args.get("cwd", "."),
|
|
6352
|
-
lenient=bool(args.get("lenient", True)),
|
|
6353
|
-
dry_run=bool(args.get("dry_run", False)),
|
|
6354
|
-
backup=bool(args.get("backup", True)),
|
|
6355
|
-
safeguard_max_lines=int(args.get("safeguard_max_lines", 3000) or 3000),
|
|
6356
|
-
safeguard_confirm=bool(args.get("safeguard_confirm", False)),
|
|
6357
|
-
)
|
|
6358
|
-
elif name == "planning":
|
|
6359
|
-
# Persist plan under plans/ at the current root (workspace or host base)
|
|
6360
|
-
try:
|
|
6361
|
-
plan_text = str(args.get("plan", "") or "").strip()
|
|
6362
|
-
ctx_text = args.get("context")
|
|
6363
|
-
if not plan_text:
|
|
6364
|
-
result = {"ok": False, "error": "plan is required"}
|
|
6365
|
-
else:
|
|
6366
|
-
base = policy.workspace_base if policy.scope != "host" else (policy.host_base or Path(os.getcwd()).resolve())
|
|
6367
|
-
plans_dir = Path(base) / "plans"
|
|
6368
|
-
plans_dir.mkdir(parents=True, exist_ok=True)
|
|
6369
|
-
from datetime import datetime as _dt
|
|
6370
|
-
import re as _re, uuid as _uuid
|
|
6371
|
-
ts = _dt.utcnow().strftime("%Y%m%d-%H%M%S")
|
|
6372
|
-
first_line = plan_text.splitlines()[0] if plan_text else "plan"
|
|
6373
|
-
slug = _re.sub(r"[^a-zA-Z0-9_-]+", "-", first_line).strip("-") or "plan"
|
|
6374
|
-
slug = slug[:40]
|
|
6375
|
-
fname = f"plan-{ts}-{_uuid.uuid4().hex[:6]}-{slug}.md"
|
|
6376
|
-
fpath = plans_dir / fname
|
|
6377
|
-
body_lines = [f"# Plan ({ts} UTC)\n"]
|
|
6378
|
-
if ctx_text:
|
|
6379
|
-
body_lines.append("## Context\n")
|
|
6380
|
-
body_lines.append(str(ctx_text).strip() + "\n\n")
|
|
6381
|
-
body_lines.append("## Steps\n")
|
|
6382
|
-
body_lines.append(plan_text.rstrip() + "\n")
|
|
6383
|
-
content = "\n".join(body_lines)
|
|
6384
|
-
with fpath.open("w", encoding="utf-8", newline="") as f:
|
|
6385
|
-
f.write(content)
|
|
6386
|
-
result = {"ok": True, "data": {"path": str(fpath), "bytes_written": len(content.encode('utf-8'))}}
|
|
6387
|
-
except Exception as _pe:
|
|
6388
|
-
result = {"ok": False, "error": str(_pe)}
|
|
6389
|
-
elif name == "string_replace":
|
|
6390
|
-
result = local_string_replace(
|
|
6391
|
-
pattern=args.get("pattern", ""),
|
|
6392
|
-
replacement=args.get("replacement", ""),
|
|
6393
|
-
policy=policy,
|
|
6838
|
+
elif name == "run_command":
|
|
6839
|
+
# Command allow policy:
|
|
6840
|
+
# - L1: blocked earlier
|
|
6841
|
+
# - L2: approval required; once approved, allow any base command
|
|
6842
|
+
# - L3: no approval; allow any base command
|
|
6843
|
+
# Use '*' wildcard (supported by henosis_cli_tools.run_command).
|
|
6844
|
+
if lvl in (2, 3):
|
|
6845
|
+
allow_csv = "*"
|
|
6846
|
+
else:
|
|
6847
|
+
# Legacy: intersect server + local allowlists
|
|
6848
|
+
req_allow = (reqp.get("command_allow_csv") or "").strip()
|
|
6849
|
+
local_allow = os.getenv("HENOSIS_ALLOW_COMMANDS", "")
|
|
6850
|
+
if req_allow and local_allow:
|
|
6851
|
+
req_set = {c.strip().lower() for c in req_allow.split(",") if c.strip()}
|
|
6852
|
+
loc_set = {c.strip().lower() for c in local_allow.split(",") if c.strip()}
|
|
6853
|
+
allow_csv = ",".join(sorted(req_set & loc_set))
|
|
6854
|
+
else:
|
|
6855
|
+
allow_csv = local_allow or req_allow or ""
|
|
6856
|
+
# Include trusted commands from CLI settings (session + always)
|
|
6857
|
+
try:
|
|
6858
|
+
allow_set = {c.strip().lower() for c in allow_csv.split(",") if c.strip()}
|
|
6859
|
+
for k in (self.trust_cmds_session or []):
|
|
6860
|
+
allow_set.add(str(k).strip().lower())
|
|
6861
|
+
for k in (self.trust_cmds_always or []):
|
|
6862
|
+
allow_set.add(str(k).strip().lower())
|
|
6863
|
+
allow_csv = ",".join(sorted(allow_set))
|
|
6864
|
+
except Exception:
|
|
6865
|
+
pass
|
|
6866
|
+
timeout = args.get("timeout", None)
|
|
6867
|
+
result = local_run_command(args.get("cmd", ""), policy, cwd=args.get("cwd", "."), timeout=timeout, allow_commands_csv=allow_csv)
|
|
6868
|
+
# Legacy allowlist retry logic removed for L2/L3 (we allow '*').
|
|
6869
|
+
elif name == "apply_patch":
|
|
6870
|
+
result = local_apply_patch(
|
|
6871
|
+
patch=args.get("patch", ""),
|
|
6872
|
+
policy=policy,
|
|
6873
|
+
cwd=args.get("cwd", "."),
|
|
6874
|
+
lenient=bool(args.get("lenient", True)),
|
|
6875
|
+
dry_run=bool(args.get("dry_run", False)),
|
|
6876
|
+
backup=bool(args.get("backup", True)),
|
|
6877
|
+
safeguard_max_lines=int(args.get("safeguard_max_lines", 3000) or 3000),
|
|
6878
|
+
safeguard_confirm=bool(args.get("safeguard_confirm", False)),
|
|
6879
|
+
)
|
|
6880
|
+
elif name == "planning":
|
|
6881
|
+
# Persist plan under plans/ at the current root (workspace or host base)
|
|
6882
|
+
try:
|
|
6883
|
+
plan_text = str(args.get("plan", "") or "").strip()
|
|
6884
|
+
ctx_text = args.get("context")
|
|
6885
|
+
if not plan_text:
|
|
6886
|
+
result = {"ok": False, "error": "plan is required"}
|
|
6887
|
+
else:
|
|
6888
|
+
base = policy.workspace_base if policy.scope != "host" else (policy.host_base or Path(os.getcwd()).resolve())
|
|
6889
|
+
plans_dir = Path(base) / "plans"
|
|
6890
|
+
plans_dir.mkdir(parents=True, exist_ok=True)
|
|
6891
|
+
from datetime import datetime as _dt
|
|
6892
|
+
import re as _re, uuid as _uuid
|
|
6893
|
+
ts = _dt.utcnow().strftime("%Y%m%d-%H%M%S")
|
|
6894
|
+
first_line = plan_text.splitlines()[0] if plan_text else "plan"
|
|
6895
|
+
slug = _re.sub(r"[^a-zA-Z0-9_-]+", "-", first_line).strip("-") or "plan"
|
|
6896
|
+
slug = slug[:40]
|
|
6897
|
+
fname = f"plan-{ts}-{_uuid.uuid4().hex[:6]}-{slug}.md"
|
|
6898
|
+
fpath = plans_dir / fname
|
|
6899
|
+
body_lines = [f"# Plan ({ts} UTC)\n"]
|
|
6900
|
+
if ctx_text:
|
|
6901
|
+
body_lines.append("## Context\n")
|
|
6902
|
+
body_lines.append(str(ctx_text).strip() + "\n\n")
|
|
6903
|
+
body_lines.append("## Steps\n")
|
|
6904
|
+
body_lines.append(plan_text.rstrip() + "\n")
|
|
6905
|
+
content = "\n".join(body_lines)
|
|
6906
|
+
with fpath.open("w", encoding="utf-8", newline="") as f:
|
|
6907
|
+
f.write(content)
|
|
6908
|
+
result = {"ok": True, "data": {"path": str(fpath), "bytes_written": len(content.encode('utf-8'))}}
|
|
6909
|
+
except Exception as _pe:
|
|
6910
|
+
result = {"ok": False, "error": str(_pe)}
|
|
6911
|
+
elif name == "string_replace":
|
|
6912
|
+
result = local_string_replace(
|
|
6913
|
+
pattern=args.get("pattern", ""),
|
|
6914
|
+
replacement=args.get("replacement", ""),
|
|
6915
|
+
policy=policy,
|
|
6394
6916
|
cwd=args.get("cwd", "."),
|
|
6395
6917
|
file_globs=[str(g) for g in (args.get("file_globs") or [])],
|
|
6396
6918
|
exclude_globs=[str(e) for e in (args.get("exclude_globs") or [])],
|
|
@@ -6442,18 +6964,82 @@ class ChatCLI:
|
|
|
6442
6964
|
self.ui.warn(f"tools.callback POST failed: {r.status_code} {r.text}")
|
|
6443
6965
|
except Exception as e:
|
|
6444
6966
|
self.ui.warn(f"tools.callback error: {e}")
|
|
6967
|
+
finally:
|
|
6968
|
+
try:
|
|
6969
|
+
# Clear in-flight dispatch context when we send a callback.
|
|
6970
|
+
if isinstance(self._inflight_dispatch, dict):
|
|
6971
|
+
if str(self._inflight_dispatch.get("call_id")) == str(call_id):
|
|
6972
|
+
self._inflight_dispatch = None
|
|
6973
|
+
except Exception:
|
|
6974
|
+
pass
|
|
6445
6975
|
|
|
6446
|
-
elif event == "message.completed":
|
|
6976
|
+
elif event == "message.completed":
|
|
6447
6977
|
# Safety: this block handles only 'message.completed'.
|
|
6448
6978
|
usage = data.get("usage", {})
|
|
6449
|
-
model_used = data.get("model") or self.model
|
|
6450
|
-
#
|
|
6451
|
-
try:
|
|
6452
|
-
if
|
|
6453
|
-
|
|
6454
|
-
|
|
6455
|
-
|
|
6456
|
-
|
|
6979
|
+
model_used = data.get("model") or self.model
|
|
6980
|
+
# OpenAI: persist the last response id so future turns can use previous_response_id.
|
|
6981
|
+
try:
|
|
6982
|
+
if self._is_openai_model(model_used):
|
|
6983
|
+
# Prefer the explicit per-turn id list when provided by the server.
|
|
6984
|
+
ids = data.get("openai_response_ids")
|
|
6985
|
+
if isinstance(ids, list) and ids:
|
|
6986
|
+
for x in ids:
|
|
6987
|
+
if not isinstance(x, str):
|
|
6988
|
+
continue
|
|
6989
|
+
xs = x.strip()
|
|
6990
|
+
if not xs:
|
|
6991
|
+
continue
|
|
6992
|
+
try:
|
|
6993
|
+
if xs not in self._openai_response_id_history:
|
|
6994
|
+
self._openai_response_id_history.append(xs)
|
|
6995
|
+
except Exception:
|
|
6996
|
+
pass
|
|
6997
|
+
rid = data.get("openai_previous_response_id")
|
|
6998
|
+
if isinstance(rid, str) and rid.strip():
|
|
6999
|
+
self._openai_previous_response_id = rid.strip()
|
|
7000
|
+
try:
|
|
7001
|
+
if rid.strip() not in self._openai_response_id_history:
|
|
7002
|
+
self._openai_response_id_history.append(rid.strip())
|
|
7003
|
+
except Exception:
|
|
7004
|
+
pass
|
|
7005
|
+
|
|
7006
|
+
# OpenAI manual-state replay: server returns the delta items appended
|
|
7007
|
+
# during this turn (reasoning/tool calls/tool outputs). Persist them.
|
|
7008
|
+
try:
|
|
7009
|
+
delta = data.get("openai_delta_items")
|
|
7010
|
+
if isinstance(delta, list):
|
|
7011
|
+
base_items = (
|
|
7012
|
+
self._openai_last_sent_input_items
|
|
7013
|
+
if isinstance(self._openai_last_sent_input_items, list)
|
|
7014
|
+
else copy.deepcopy(self._openai_input_items)
|
|
7015
|
+
)
|
|
7016
|
+
# Normalize to a list of dicts where possible; keep unknown shapes as-is.
|
|
7017
|
+
merged: List[Any] = []
|
|
7018
|
+
try:
|
|
7019
|
+
merged.extend(list(base_items or []))
|
|
7020
|
+
except Exception:
|
|
7021
|
+
merged = list(base_items or []) if base_items is not None else []
|
|
7022
|
+
merged.extend(delta)
|
|
7023
|
+
# Store only dict-like items (server is expected to send dicts)
|
|
7024
|
+
cleaned: List[Dict[str, Any]] = []
|
|
7025
|
+
for it in merged:
|
|
7026
|
+
if isinstance(it, dict):
|
|
7027
|
+
cleaned.append(dict(it))
|
|
7028
|
+
self._openai_input_items = cleaned
|
|
7029
|
+
except Exception:
|
|
7030
|
+
pass
|
|
7031
|
+
finally:
|
|
7032
|
+
# Clear per-turn sent snapshot
|
|
7033
|
+
self._openai_last_sent_input_items = None
|
|
7034
|
+
except Exception:
|
|
7035
|
+
pass
|
|
7036
|
+
# Gemini: server may include an authoritative provider-native history snapshot.
|
|
7037
|
+
try:
|
|
7038
|
+
if isinstance(model_used, str) and model_used.startswith("gemini-"):
|
|
7039
|
+
rpm = data.get("raw_provider_messages")
|
|
7040
|
+
self._gemini_raw_history = self._normalize_gemini_raw_messages(rpm)
|
|
7041
|
+
except Exception:
|
|
7042
|
+
pass
|
|
6457
7043
|
# Mark completion for retry controller
|
|
6458
7044
|
try:
|
|
6459
7045
|
last_completed = True
|
|
@@ -6749,13 +7335,13 @@ class ChatCLI:
|
|
|
6749
7335
|
# Compact style: include reasoning effort inline with model name when applicable
|
|
6750
7336
|
try:
|
|
6751
7337
|
effort_seg = ""
|
|
6752
|
-
if self._is_openai_reasoning_model(model_used):
|
|
6753
|
-
# Convert low|medium|high|xhigh -> Low|Medium|High|XHigh for display
|
|
6754
|
-
lvl = str(self.reasoning_effort or "medium").strip().lower()
|
|
6755
|
-
if lvl not in ("low", "medium", "high", "xhigh"):
|
|
6756
|
-
lvl = "medium"
|
|
6757
|
-
disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
|
|
6758
|
-
effort_seg = f" {disp}"
|
|
7338
|
+
if self._is_openai_reasoning_model(model_used):
|
|
7339
|
+
# Convert low|medium|high|xhigh -> Low|Medium|High|XHigh for display
|
|
7340
|
+
lvl = str(self.reasoning_effort or "medium").strip().lower()
|
|
7341
|
+
if lvl not in ("low", "medium", "high", "xhigh"):
|
|
7342
|
+
lvl = "medium"
|
|
7343
|
+
disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
|
|
7344
|
+
effort_seg = f" {disp}"
|
|
6759
7345
|
except Exception:
|
|
6760
7346
|
effort_seg = ""
|
|
6761
7347
|
model_only_line = f"model: {model_used or '(unknown)'}{effort_seg}"
|
|
@@ -6955,47 +7541,47 @@ class ChatCLI:
|
|
|
6955
7541
|
except Exception:
|
|
6956
7542
|
pass
|
|
6957
7543
|
|
|
6958
|
-
# Anthropic prompt caching banner when detected (reads @10% input rate; creation billed at TTL multiplier)
|
|
6959
|
-
try:
|
|
6960
|
-
price = self._resolve_price(model_used)
|
|
6961
|
-
provider = (price.get("provider") or "").lower()
|
|
6962
|
-
if provider == "anthropic":
|
|
6963
|
-
cr = int(usage.get("cache_read_input_tokens", 0) or 0)
|
|
6964
|
-
cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
|
|
6965
|
-
# Optional breakdown
|
|
6966
|
-
cc_5m = 0
|
|
6967
|
-
cc_1h = 0
|
|
6968
|
-
try:
|
|
6969
|
-
ccmap = usage.get("cache_creation") if isinstance(usage, dict) else None
|
|
6970
|
-
if isinstance(ccmap, dict):
|
|
6971
|
-
cc_5m = int(ccmap.get("ephemeral_5m_input_tokens", 0) or 0)
|
|
6972
|
-
cc_1h = int(ccmap.get("ephemeral_1h_input_tokens", 0) or 0)
|
|
6973
|
-
except Exception:
|
|
6974
|
-
cc_5m = cc_5m or 0
|
|
6975
|
-
cc_1h = cc_1h or 0
|
|
6976
|
-
if (cr > 0) or (cc > 0) or (cc_5m > 0) or (cc_1h > 0):
|
|
6977
|
-
# Build a concise line similar to OpenAI banner
|
|
6978
|
-
line = f"Billing: Anthropic prompt cache read {int(cr)} token(s) @10% input rate"
|
|
6979
|
-
if (cc_5m > 0) or (cc_1h > 0):
|
|
6980
|
-
line += f" | created {int(cc_5m)} @1.25x + {int(cc_1h)} @2x"
|
|
6981
|
-
else:
|
|
6982
|
-
if cc > 0:
|
|
6983
|
-
line += f" | created {int(cc)} token(s) (billed at 1.25x/2x based on TTL)"
|
|
6984
|
-
# Calculate savings (reported on a separate line to match OpenAI style)
|
|
6985
|
-
saved_line = None
|
|
6986
|
-
if cr > 0:
|
|
6987
|
-
try:
|
|
6988
|
-
in_rate_per_m = float(price.get("input", 0.0))
|
|
6989
|
-
# Savings = cache_read * (1.0 - 0.1) * price
|
|
6990
|
-
saved_usd = (int(cr) / 1_000_000.0) * in_rate_per_m * 0.90
|
|
6991
|
-
saved_line = f"saved ${saved_usd:.2f} with prompt cache"
|
|
6992
|
-
except Exception:
|
|
6993
|
-
saved_line = None
|
|
6994
|
-
box_lines.append(line)
|
|
6995
|
-
if saved_line:
|
|
6996
|
-
box_lines.append(saved_line)
|
|
6997
|
-
except Exception:
|
|
6998
|
-
pass
|
|
7544
|
+
# Anthropic prompt caching banner when detected (reads @10% input rate; creation billed at TTL multiplier)
|
|
7545
|
+
try:
|
|
7546
|
+
price = self._resolve_price(model_used)
|
|
7547
|
+
provider = (price.get("provider") or "").lower()
|
|
7548
|
+
if provider == "anthropic":
|
|
7549
|
+
cr = int(usage.get("cache_read_input_tokens", 0) or 0)
|
|
7550
|
+
cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
|
|
7551
|
+
# Optional breakdown
|
|
7552
|
+
cc_5m = 0
|
|
7553
|
+
cc_1h = 0
|
|
7554
|
+
try:
|
|
7555
|
+
ccmap = usage.get("cache_creation") if isinstance(usage, dict) else None
|
|
7556
|
+
if isinstance(ccmap, dict):
|
|
7557
|
+
cc_5m = int(ccmap.get("ephemeral_5m_input_tokens", 0) or 0)
|
|
7558
|
+
cc_1h = int(ccmap.get("ephemeral_1h_input_tokens", 0) or 0)
|
|
7559
|
+
except Exception:
|
|
7560
|
+
cc_5m = cc_5m or 0
|
|
7561
|
+
cc_1h = cc_1h or 0
|
|
7562
|
+
if (cr > 0) or (cc > 0) or (cc_5m > 0) or (cc_1h > 0):
|
|
7563
|
+
# Build a concise line similar to OpenAI banner
|
|
7564
|
+
line = f"Billing: Anthropic prompt cache read {int(cr)} token(s) @10% input rate"
|
|
7565
|
+
if (cc_5m > 0) or (cc_1h > 0):
|
|
7566
|
+
line += f" | created {int(cc_5m)} @1.25x + {int(cc_1h)} @2x"
|
|
7567
|
+
else:
|
|
7568
|
+
if cc > 0:
|
|
7569
|
+
line += f" | created {int(cc)} token(s) (billed at 1.25x/2x based on TTL)"
|
|
7570
|
+
# Calculate savings (reported on a separate line to match OpenAI style)
|
|
7571
|
+
saved_line = None
|
|
7572
|
+
if cr > 0:
|
|
7573
|
+
try:
|
|
7574
|
+
in_rate_per_m = float(price.get("input", 0.0))
|
|
7575
|
+
# Savings = cache_read * (1.0 - 0.1) * price
|
|
7576
|
+
saved_usd = (int(cr) / 1_000_000.0) * in_rate_per_m * 0.90
|
|
7577
|
+
saved_line = f"saved ${saved_usd:.2f} with prompt cache"
|
|
7578
|
+
except Exception:
|
|
7579
|
+
saved_line = None
|
|
7580
|
+
box_lines.append(line)
|
|
7581
|
+
if saved_line:
|
|
7582
|
+
box_lines.append(saved_line)
|
|
7583
|
+
except Exception:
|
|
7584
|
+
pass
|
|
6999
7585
|
|
|
7000
7586
|
# Show consolidated usage summary
|
|
7001
7587
|
try:
|
|
@@ -7295,29 +7881,29 @@ class ChatCLI:
|
|
|
7295
7881
|
pass
|
|
7296
7882
|
return "".join(assistant_buf)
|
|
7297
7883
|
|
|
7298
|
-
elif event == "provider.message":
|
|
7299
|
-
# Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
|
|
7300
|
-
provider = (data.get("provider") or "").lower()
|
|
7301
|
-
msg = data.get("message")
|
|
7302
|
-
if provider == "gemini":
|
|
7303
|
-
# Always retain Gemini provider-native messages (needed for multi-turn tool calling).
|
|
7304
|
-
try:
|
|
7305
|
-
if isinstance(msg, dict):
|
|
7306
|
-
self._gemini_raw_history.append(dict(msg))
|
|
7307
|
-
elif isinstance(msg, list):
|
|
7308
|
-
self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
|
|
7309
|
-
except Exception:
|
|
7310
|
-
pass
|
|
7311
|
-
if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
|
|
7312
|
-
# Append as-is to local raw history for the next turn
|
|
7313
|
-
try:
|
|
7314
|
-
self._kimi_raw_history.append(dict(msg))
|
|
7315
|
-
except Exception:
|
|
7316
|
-
try:
|
|
7317
|
-
self._kimi_raw_history.append(msg) # type: ignore
|
|
7318
|
-
except Exception:
|
|
7319
|
-
pass
|
|
7320
|
-
continue
|
|
7884
|
+
elif event == "provider.message":
|
|
7885
|
+
# Provider-native message snapshot (e.g., Kimi assistant with reasoning_content)
|
|
7886
|
+
provider = (data.get("provider") or "").lower()
|
|
7887
|
+
msg = data.get("message")
|
|
7888
|
+
if provider == "gemini":
|
|
7889
|
+
# Always retain Gemini provider-native messages (needed for multi-turn tool calling).
|
|
7890
|
+
try:
|
|
7891
|
+
if isinstance(msg, dict):
|
|
7892
|
+
self._gemini_raw_history.append(dict(msg))
|
|
7893
|
+
elif isinstance(msg, list):
|
|
7894
|
+
self._gemini_raw_history.extend(self._normalize_gemini_raw_messages(msg))
|
|
7895
|
+
except Exception:
|
|
7896
|
+
pass
|
|
7897
|
+
if bool(getattr(self, "retain_native_tool_results", False)) and provider == "kimi" and isinstance(msg, dict):
|
|
7898
|
+
# Append as-is to local raw history for the next turn
|
|
7899
|
+
try:
|
|
7900
|
+
self._kimi_raw_history.append(dict(msg))
|
|
7901
|
+
except Exception:
|
|
7902
|
+
try:
|
|
7903
|
+
self._kimi_raw_history.append(msg) # type: ignore
|
|
7904
|
+
except Exception:
|
|
7905
|
+
pass
|
|
7906
|
+
continue
|
|
7321
7907
|
|
|
7322
7908
|
else:
|
|
7323
7909
|
# TEMP DEBUG: show unknown/unhandled events
|
|
@@ -7451,12 +8037,12 @@ class ChatCLI:
|
|
|
7451
8037
|
# Reasoning effort tag for OpenAI reasoning models
|
|
7452
8038
|
try:
|
|
7453
8039
|
effort_seg = ""
|
|
7454
|
-
if self._is_openai_reasoning_model(model_label):
|
|
7455
|
-
lvl = str(self.reasoning_effort or "medium").strip().lower()
|
|
7456
|
-
if lvl not in ("low", "medium", "high", "xhigh"):
|
|
7457
|
-
lvl = "medium"
|
|
7458
|
-
disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
|
|
7459
|
-
effort_seg = f" {disp}"
|
|
8040
|
+
if self._is_openai_reasoning_model(model_label):
|
|
8041
|
+
lvl = str(self.reasoning_effort or "medium").strip().lower()
|
|
8042
|
+
if lvl not in ("low", "medium", "high", "xhigh"):
|
|
8043
|
+
lvl = "medium"
|
|
8044
|
+
disp = {"low": "Low", "medium": "Medium", "high": "High", "xhigh": "XHigh"}.get(lvl, "Medium")
|
|
8045
|
+
effort_seg = f" {disp}"
|
|
7460
8046
|
except Exception:
|
|
7461
8047
|
effort_seg = ""
|
|
7462
8048
|
try:
|
|
@@ -7545,15 +8131,15 @@ class ChatCLI:
|
|
|
7545
8131
|
# Allow codebase map to be injected again
|
|
7546
8132
|
self._did_inject_codebase_map = False
|
|
7547
8133
|
# Ensure working-memory first-turn flag remains False so we inject now
|
|
7548
|
-
self._did_inject_working_memory = False # Allow custom first-turn text to inject again
|
|
7549
|
-
try:
|
|
7550
|
-
self._did_inject_custom_first_turn = False
|
|
7551
|
-
except Exception:
|
|
7552
|
-
pass
|
|
7553
|
-
# Reset provider-native histories
|
|
7554
|
-
self.messages_for_save = []
|
|
7555
|
-
if not self.save_chat_history:
|
|
7556
|
-
self.thread_uid = None
|
|
8134
|
+
self._did_inject_working_memory = False # Allow custom first-turn text to inject again
|
|
8135
|
+
try:
|
|
8136
|
+
self._did_inject_custom_first_turn = False
|
|
8137
|
+
except Exception:
|
|
8138
|
+
pass
|
|
8139
|
+
# Reset provider-native histories
|
|
8140
|
+
self.messages_for_save = []
|
|
8141
|
+
if not self.save_chat_history:
|
|
8142
|
+
self.thread_uid = None
|
|
7557
8143
|
self._kimi_raw_history = []
|
|
7558
8144
|
|
|
7559
8145
|
# Build a fresh payload so the first-turn injections (code map + working memory) are applied
|
|
@@ -7582,24 +8168,32 @@ class ChatCLI:
|
|
|
7582
8168
|
new_payload["control_level"] = self.control_level
|
|
7583
8169
|
if self.auto_approve:
|
|
7584
8170
|
new_payload["auto_approve"] = self.auto_approve
|
|
7585
|
-
try:
|
|
7586
|
-
if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
|
|
7587
|
-
new_payload["reasoning_effort"] = self.reasoning_effort
|
|
7588
|
-
else:
|
|
7589
|
-
new_payload["reasoning_effort"] = "medium"
|
|
7590
|
-
except Exception:
|
|
7591
|
-
new_payload["reasoning_effort"] = "medium"
|
|
7592
|
-
try:
|
|
7593
|
-
if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
|
|
7594
|
-
new_payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
|
|
7595
|
-
except Exception:
|
|
7596
|
-
pass
|
|
7597
|
-
# Anthropic
|
|
7598
|
-
try:
|
|
7599
|
-
|
|
7600
|
-
|
|
7601
|
-
|
|
7602
|
-
|
|
8171
|
+
try:
|
|
8172
|
+
if isinstance(self.reasoning_effort, str) and self.reasoning_effort in ("low", "medium", "high", "xhigh"):
|
|
8173
|
+
new_payload["reasoning_effort"] = self.reasoning_effort
|
|
8174
|
+
else:
|
|
8175
|
+
new_payload["reasoning_effort"] = "medium"
|
|
8176
|
+
except Exception:
|
|
8177
|
+
new_payload["reasoning_effort"] = "medium"
|
|
8178
|
+
try:
|
|
8179
|
+
if isinstance(self.thinking_budget_tokens, int) and self.thinking_budget_tokens > 0:
|
|
8180
|
+
new_payload["thinking_budget_tokens"] = int(self.thinking_budget_tokens)
|
|
8181
|
+
except Exception:
|
|
8182
|
+
pass
|
|
8183
|
+
# Anthropic effort (Opus 4.6/4.5)
|
|
8184
|
+
try:
|
|
8185
|
+
ae = getattr(self, "anthropic_effort", None)
|
|
8186
|
+
ae2 = str(ae or "high").strip().lower()
|
|
8187
|
+
if ae2 in ("low", "medium", "high", "max"):
|
|
8188
|
+
new_payload["anthropic_effort"] = ae2
|
|
8189
|
+
except Exception:
|
|
8190
|
+
new_payload["anthropic_effort"] = "high"
|
|
8191
|
+
# Anthropic prompt cache TTL (server override): send when set to 5m or 1h
|
|
8192
|
+
try:
|
|
8193
|
+
if isinstance(self.anthropic_cache_ttl, str) and self.anthropic_cache_ttl in ("5m", "1h"):
|
|
8194
|
+
new_payload["anthropic_cache_ttl"] = self.anthropic_cache_ttl
|
|
8195
|
+
except Exception:
|
|
8196
|
+
pass
|
|
7603
8197
|
if self.web_search_enabled:
|
|
7604
8198
|
new_payload["enable_web_search"] = True
|
|
7605
8199
|
if self.web_search_allowed_domains:
|
|
@@ -7857,32 +8451,31 @@ class ChatCLI:
|
|
|
7857
8451
|
except Exception:
|
|
7858
8452
|
pass
|
|
7859
8453
|
# Fallback defaults for common models
|
|
7860
|
-
if not ctx_map:
|
|
7861
|
-
try:
|
|
7862
|
-
ctx_map.update({
|
|
7863
|
-
"gpt-5.2": 400000,
|
|
7864
|
-
"gpt-5.2-pro": 400000,
|
|
7865
|
-
"gpt-5": 400000,
|
|
7866
|
-
"gpt-5-2025-08-07": 400000,
|
|
7867
|
-
"codex-mini-latest": 200000,
|
|
7868
|
-
|
|
7869
|
-
"gemini-3-flash-preview": 1048576,
|
|
7870
|
-
"gemini-3-pro-preview": 1000000,
|
|
8454
|
+
if not ctx_map:
|
|
8455
|
+
try:
|
|
8456
|
+
ctx_map.update({
|
|
8457
|
+
"gpt-5.2": 400000,
|
|
8458
|
+
"gpt-5.2-pro": 400000,
|
|
8459
|
+
"gpt-5": 400000,
|
|
8460
|
+
"gpt-5-2025-08-07": 400000,
|
|
8461
|
+
"codex-mini-latest": 200000,
|
|
8462
|
+
# (removed gemini-2.5-pro)
|
|
8463
|
+
"gemini-3-flash-preview": 1048576,
|
|
8464
|
+
"gemini-3-pro-preview": 1000000,
|
|
7871
8465
|
"grok-4-1-fast-reasoning": 2000000,
|
|
7872
8466
|
"grok-4-1-fast-non-reasoning": 2000000,
|
|
7873
8467
|
"grok-4": 200000,
|
|
7874
8468
|
"grok-code-fast-1": 262144,
|
|
7875
8469
|
"deepseek-chat": 128000,
|
|
7876
8470
|
"deepseek-reasoner": 128000,
|
|
7877
|
-
"kimi-k2
|
|
7878
|
-
"kimi-k2-0905-preview": 262144,
|
|
8471
|
+
"kimi-k2.5": 262144,
|
|
7879
8472
|
"claude-sonnet-4-20250514": 1000000,
|
|
7880
8473
|
"claude-sonnet-4-20250514-thinking": 1000000,
|
|
7881
|
-
"claude-sonnet-4-5-20250929": 1000000,
|
|
7882
|
-
"claude-sonnet-4-5-20250929-thinking": 1000000,
|
|
7883
|
-
"claude-opus-4-
|
|
7884
|
-
"claude-opus-4-
|
|
7885
|
-
"glm-4.
|
|
8474
|
+
"claude-sonnet-4-5-20250929": 1000000,
|
|
8475
|
+
"claude-sonnet-4-5-20250929-thinking": 1000000,
|
|
8476
|
+
"claude-opus-4-6": 1000000,
|
|
8477
|
+
"claude-opus-4-6-thinking": 1000000,
|
|
8478
|
+
"glm-4.7": 200000,
|
|
7886
8479
|
})
|
|
7887
8480
|
except Exception:
|
|
7888
8481
|
pass
|
|
@@ -7912,19 +8505,19 @@ class ChatCLI:
|
|
|
7912
8505
|
|
|
7913
8506
|
# --------------------- Tier-aware defaults -------------------------
|
|
7914
8507
|
|
|
7915
|
-
def _recommended_default_model(self) -> str:
|
|
7916
|
-
"""Return the tier-aware recommended default model.
|
|
8508
|
+
def _recommended_default_model(self) -> str:
|
|
8509
|
+
"""Return the tier-aware recommended default model.
|
|
7917
8510
|
|
|
7918
|
-
- Free-tier users: recommend Kimi k2
|
|
7919
|
-
- All other users: recommend gpt-5.2 (best overall default).
|
|
7920
|
-
When tier is unknown, fall back to gpt-5.2.
|
|
7921
|
-
"""
|
|
8511
|
+
- Free-tier users: recommend Kimi k2.5.
|
|
8512
|
+
- All other users: recommend gpt-5.2 (best overall default).
|
|
8513
|
+
When tier is unknown, fall back to gpt-5.2.
|
|
8514
|
+
"""
|
|
7922
8515
|
try:
|
|
7923
8516
|
if bool(self.is_free_tier):
|
|
7924
|
-
return "kimi-k2
|
|
8517
|
+
return "kimi-k2.5"
|
|
7925
8518
|
except Exception:
|
|
7926
8519
|
pass
|
|
7927
|
-
return "gpt-5.2"
|
|
8520
|
+
return "gpt-5.2"
|
|
7928
8521
|
|
|
7929
8522
|
# --------------------- Onboarding and Welcome ---------------------
|
|
7930
8523
|
async def _welcome_flow(self) -> None:
|
|
@@ -8155,6 +8748,57 @@ class ChatCLI:
|
|
|
8155
8748
|
self.ui.print("Please select a default model for new chats.")
|
|
8156
8749
|
await self.select_model_menu()
|
|
8157
8750
|
|
|
8751
|
+
async def _wizard_anthropic_effort_step(self) -> None:
|
|
8752
|
+
"""First-time wizard: choose Anthropic effort (Opus 4.6/4.5 only).
|
|
8753
|
+
|
|
8754
|
+
Per opus4-6.txt:
|
|
8755
|
+
- default effort is "high"
|
|
8756
|
+
- effort "max" is Opus 4.6 only
|
|
8757
|
+
"""
|
|
8758
|
+
try:
|
|
8759
|
+
model = str(self.model or "")
|
|
8760
|
+
except Exception:
|
|
8761
|
+
model = ""
|
|
8762
|
+
base = model[:-9] if model.endswith("-thinking") else model
|
|
8763
|
+
# Only prompt when it matters.
|
|
8764
|
+
if base not in ("claude-opus-4-6",):
|
|
8765
|
+
# Default behavior equals high.
|
|
8766
|
+
try:
|
|
8767
|
+
if not getattr(self, "anthropic_effort", None):
|
|
8768
|
+
self.anthropic_effort = "high"
|
|
8769
|
+
except Exception:
|
|
8770
|
+
self.anthropic_effort = "high"
|
|
8771
|
+
return
|
|
8772
|
+
|
|
8773
|
+
try:
|
|
8774
|
+
cur = str(getattr(self, "anthropic_effort", "high") or "high").strip().lower()
|
|
8775
|
+
except Exception:
|
|
8776
|
+
cur = "high"
|
|
8777
|
+
if cur not in ("low", "medium", "high", "max"):
|
|
8778
|
+
cur = "high"
|
|
8779
|
+
|
|
8780
|
+
choices: List[Tuple[str, str]] = [
|
|
8781
|
+
("high", "High (default)"),
|
|
8782
|
+
("medium", "Medium"),
|
|
8783
|
+
("low", "Low"),
|
|
8784
|
+
]
|
|
8785
|
+
if base == "claude-opus-4-6":
|
|
8786
|
+
choices.append(("max", "Max (Opus 4.6 only)"))
|
|
8787
|
+
|
|
8788
|
+
sel = await self._menu_choice(
|
|
8789
|
+
"Anthropic effort",
|
|
8790
|
+
"How thoroughly should Claude respond by default?",
|
|
8791
|
+
choices,
|
|
8792
|
+
)
|
|
8793
|
+
if sel in ("low", "medium", "high", "max"):
|
|
8794
|
+
# Guard: max is Opus 4.6 only
|
|
8795
|
+
if sel == "max" and base != "claude-opus-4-6":
|
|
8796
|
+
self.anthropic_effort = "high"
|
|
8797
|
+
else:
|
|
8798
|
+
self.anthropic_effort = sel
|
|
8799
|
+
else:
|
|
8800
|
+
self.anthropic_effort = cur or "high"
|
|
8801
|
+
|
|
8158
8802
|
async def _wizard_agent_scope_step(self) -> None:
|
|
8159
8803
|
"""First-time wizard: choose Agent scope root and mode via menus.
|
|
8160
8804
|
|
|
@@ -8272,8 +8916,14 @@ class ChatCLI:
|
|
|
8272
8916
|
"We’ll configure a few defaults. You can change these later via /settings.",
|
|
8273
8917
|
)
|
|
8274
8918
|
|
|
8275
|
-
# --- 1) Default model (menu
|
|
8919
|
+
# --- 1) Default model (menu) ---
|
|
8276
8920
|
await self._wizard_model_step()
|
|
8921
|
+
# If the picker was cancelled (or model still unset), choose a sensible default.
|
|
8922
|
+
if not self.model:
|
|
8923
|
+
self.model = self._recommended_default_model()
|
|
8924
|
+
|
|
8925
|
+
# --- 1b) Anthropic effort (Opus 4.6 / 4.5) ---
|
|
8926
|
+
await self._wizard_anthropic_effort_step()
|
|
8277
8927
|
|
|
8278
8928
|
# --- 2) Tools (always ON per design) ---
|
|
8279
8929
|
self.requested_tools = True
|
|
@@ -8284,8 +8934,8 @@ class ChatCLI:
|
|
|
8284
8934
|
)
|
|
8285
8935
|
await self.set_level_menu()
|
|
8286
8936
|
if self.control_level not in (1, 2, 3):
|
|
8287
|
-
# Default to Level
|
|
8288
|
-
self.control_level =
|
|
8937
|
+
# Default to Level 3 if user aborted
|
|
8938
|
+
self.control_level = 3
|
|
8289
8939
|
|
|
8290
8940
|
# --- 4) Agent scope (menus; only type on custom path) ---
|
|
8291
8941
|
self.ui.print(
|
|
@@ -8336,30 +8986,45 @@ class ChatCLI:
|
|
|
8336
8986
|
except Exception:
|
|
8337
8987
|
curv = "medium"
|
|
8338
8988
|
|
|
8339
|
-
|
|
8340
|
-
|
|
8341
|
-
|
|
8342
|
-
[
|
|
8989
|
+
verbosity_choices: List[Tuple[str, str]] = []
|
|
8990
|
+
if self._is_gpt_model(self.model):
|
|
8991
|
+
# Default-first: Low for GPT models.
|
|
8992
|
+
verbosity_choices = [
|
|
8343
8993
|
("low", "Low – short, to-the-point answers"),
|
|
8994
|
+
("medium", "Medium – balanced detail"),
|
|
8995
|
+
("high", "High – more verbose explanations"),
|
|
8996
|
+
]
|
|
8997
|
+
else:
|
|
8998
|
+
# Default-first: Medium for non-GPT models; do not surface "Low".
|
|
8999
|
+
verbosity_choices = [
|
|
8344
9000
|
("medium", "Medium – balanced detail (recommended)"),
|
|
8345
9001
|
("high", "High – more verbose explanations"),
|
|
8346
|
-
]
|
|
9002
|
+
]
|
|
9003
|
+
|
|
9004
|
+
verb_choice = await self._menu_choice(
|
|
9005
|
+
"Text verbosity",
|
|
9006
|
+
"How verbose should responses be by default?",
|
|
9007
|
+
verbosity_choices,
|
|
8347
9008
|
)
|
|
8348
9009
|
if verb_choice in ("low", "medium", "high"):
|
|
8349
9010
|
self.text_verbosity = verb_choice
|
|
8350
9011
|
else:
|
|
8351
9012
|
self.text_verbosity = curv or "medium"
|
|
8352
9013
|
|
|
8353
|
-
# --- 7) Tool preambles (
|
|
8354
|
-
|
|
8355
|
-
|
|
8356
|
-
|
|
8357
|
-
|
|
8358
|
-
|
|
8359
|
-
|
|
8360
|
-
|
|
8361
|
-
|
|
8362
|
-
|
|
9014
|
+
# --- 7) Tool preambles (GPT-5 only) ---
|
|
9015
|
+
if self._supports_preambles(self.model):
|
|
9016
|
+
preamble_choice = await self._menu_choice(
|
|
9017
|
+
"Tool call preambles",
|
|
9018
|
+
"Before using tools, the agent can briefly explain what it will do and why.",
|
|
9019
|
+
[
|
|
9020
|
+
("off", "Disable preambles (default)"),
|
|
9021
|
+
("on", "Enable preambles"),
|
|
9022
|
+
],
|
|
9023
|
+
)
|
|
9024
|
+
self.preambles_enabled = preamble_choice == "on"
|
|
9025
|
+
else:
|
|
9026
|
+
# Never enable preambles on unsupported models.
|
|
9027
|
+
self.preambles_enabled = False
|
|
8363
9028
|
|
|
8364
9029
|
# --- 8) Optional custom first-turn note (menu + text only when chosen) ---
|
|
8365
9030
|
custom_choice = await self._menu_choice(
|
|
@@ -8433,7 +9098,8 @@ class ChatCLI:
|
|
|
8433
9098
|
text = m.get("content", "")
|
|
8434
9099
|
contents.append({"role": role, "parts": [{"text": text}]})
|
|
8435
9100
|
# Pick a Gemini model for counting; fall back if current isn't Gemini
|
|
8436
|
-
|
|
9101
|
+
# (gemini-2.5-pro removed from curated lists)
|
|
9102
|
+
count_model = "gemini-3-flash-preview"
|
|
8437
9103
|
res = client.models.count_tokens(model=count_model, contents=contents)
|
|
8438
9104
|
t = int(getattr(res, "total_tokens", 0) or 0)
|
|
8439
9105
|
if t > 0:
|
|
@@ -8491,9 +9157,9 @@ class ChatCLI:
|
|
|
8491
9157
|
blocks.append(txt.strip())
|
|
8492
9158
|
except Exception:
|
|
8493
9159
|
pass
|
|
8494
|
-
# Tool preamble
|
|
9160
|
+
# Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
8495
9161
|
try:
|
|
8496
|
-
if bool(getattr(self, "preambles_enabled", False)):
|
|
9162
|
+
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
8497
9163
|
blocks.append(
|
|
8498
9164
|
"Tool usage: when you need to read or modify files or run commands, "
|
|
8499
9165
|
"explicitly explain why you're using a tool, what you'll do, and how it "
|
|
@@ -8918,11 +9584,11 @@ class ChatCLI:
|
|
|
8918
9584
|
await self._ws_broadcast("warning", {"message": f"Unknown inbound type: {mtype}"})
|
|
8919
9585
|
|
|
8920
9586
|
# Handle approval request: first reply wins (web or CLI), then POST to server
|
|
8921
|
-
async def _handle_approval_request(self, client: httpx.AsyncClient, session_id: Optional[str], data: Dict[str, Any]) -> None:
|
|
8922
|
-
tool = str(data.get("tool"))
|
|
8923
|
-
call_id = data.get("call_id")
|
|
8924
|
-
args_prev = data.get("args_preview", {}) or {}
|
|
8925
|
-
timeout_sec = int(data.get("timeout_sec", 60) or 60)
|
|
9587
|
+
async def _handle_approval_request(self, client: httpx.AsyncClient, session_id: Optional[str], data: Dict[str, Any]) -> None:
|
|
9588
|
+
tool = str(data.get("tool"))
|
|
9589
|
+
call_id = data.get("call_id")
|
|
9590
|
+
args_prev = data.get("args_preview", {}) or {}
|
|
9591
|
+
timeout_sec = int(data.get("timeout_sec", 60) or 60)
|
|
8926
9592
|
# Display summary
|
|
8927
9593
|
self.ui.print(f"⚠ Approval requested for {tool} (call_id={call_id})", style=self.ui.theme["warn"])
|
|
8928
9594
|
self.ui.print(truncate_json(args_prev, 600), style=self.ui.theme["dim"])
|
|
@@ -8947,92 +9613,92 @@ class ChatCLI:
|
|
|
8947
9613
|
|
|
8948
9614
|
# Run blocking CLI prompt in thread to avoid blocking event loop
|
|
8949
9615
|
loop = asyncio.get_event_loop()
|
|
8950
|
-
def prompt_cli() -> Tuple[bool, str, Optional[str], Optional[str]]:
|
|
8951
|
-
"""Return (approved, note, remember, remember_key)."""
|
|
8952
|
-
try:
|
|
8953
|
-
# Prefer the richer approve-once/session/always UX at L2.
|
|
8954
|
-
try:
|
|
8955
|
-
lvl = data.get("level")
|
|
8956
|
-
lvl_i = int(lvl) if isinstance(lvl, int) or (isinstance(lvl, str) and str(lvl).strip().isdigit()) else None
|
|
8957
|
-
except Exception:
|
|
8958
|
-
lvl_i = None
|
|
8959
|
-
|
|
8960
|
-
if lvl_i == 2:
|
|
8961
|
-
t = str(tool or "").strip().lower()
|
|
8962
|
-
remember_key = None
|
|
8963
|
-
label = t
|
|
8964
|
-
if t == "run_command":
|
|
8965
|
-
try:
|
|
8966
|
-
cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
|
|
8967
|
-
except Exception:
|
|
8968
|
-
cmd = None
|
|
8969
|
-
base = self._base_command(cmd) if cmd is not None else ""
|
|
8970
|
-
if base:
|
|
8971
|
-
label = f"run_command:{base}"
|
|
8972
|
-
remember_key = base
|
|
8973
|
-
else:
|
|
8974
|
-
label = "run_command"
|
|
8975
|
-
else:
|
|
8976
|
-
remember_key = t
|
|
8977
|
-
|
|
8978
|
-
choice = self._approval_prompt_ui(label, args_prev if isinstance(args_prev, dict) else {})
|
|
8979
|
-
if choice == "deny":
|
|
8980
|
-
return False, "Denied via CLI", None, remember_key
|
|
8981
|
-
|
|
8982
|
-
# Approved; update local trust registries immediately.
|
|
8983
|
-
try:
|
|
8984
|
-
if t == "run_command" and remember_key:
|
|
8985
|
-
if choice == "session":
|
|
8986
|
-
if remember_key not in self.trust_cmds_session:
|
|
8987
|
-
self.trust_cmds_session.append(remember_key)
|
|
8988
|
-
elif choice == "always":
|
|
8989
|
-
if remember_key not in self.trust_cmds_always:
|
|
8990
|
-
self.trust_cmds_always.append(remember_key)
|
|
8991
|
-
self.save_settings()
|
|
8992
|
-
elif t in {"write_file", "append_file", "edit_file", "apply_patch", "string_replace"}:
|
|
8993
|
-
if choice == "session":
|
|
8994
|
-
if t not in self.trust_tools_session:
|
|
8995
|
-
self.trust_tools_session.append(t)
|
|
8996
|
-
elif choice == "always":
|
|
8997
|
-
if t not in self.trust_tools_always:
|
|
8998
|
-
self.trust_tools_always.append(t)
|
|
8999
|
-
self.save_settings()
|
|
9000
|
-
except Exception:
|
|
9001
|
-
pass
|
|
9002
|
-
|
|
9003
|
-
remember = choice if choice in ("session", "always") else "once"
|
|
9004
|
-
return True, "Approved via CLI", remember, remember_key
|
|
9005
|
-
|
|
9006
|
-
# Fallback: simple yes/no confirmation.
|
|
9007
|
-
default_yes = True if str(tool).strip() == "context.summarize" else False
|
|
9008
|
-
prompt = f"Approve {tool} (timeout in {timeout_sec}s)?"
|
|
9009
|
-
try:
|
|
9010
|
-
if str(tool).strip().lower() == "run_command":
|
|
9011
|
-
cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
|
|
9012
|
-
if isinstance(cmd, str) and cmd.strip():
|
|
9013
|
-
prompt = f"Approve run_command: {self._clip(cmd, 120)} (timeout in {timeout_sec}s)?"
|
|
9014
|
-
except Exception:
|
|
9015
|
-
pass
|
|
9016
|
-
approved = self.ui.confirm(prompt, default=default_yes)
|
|
9017
|
-
return bool(approved), ("Approved via CLI" if approved else "Denied via CLI"), None, None
|
|
9018
|
-
except Exception:
|
|
9019
|
-
return False, "Denied via CLI (error)", None, None
|
|
9616
|
+
def prompt_cli() -> Tuple[bool, str, Optional[str], Optional[str]]:
|
|
9617
|
+
"""Return (approved, note, remember, remember_key)."""
|
|
9618
|
+
try:
|
|
9619
|
+
# Prefer the richer approve-once/session/always UX at L2.
|
|
9620
|
+
try:
|
|
9621
|
+
lvl = data.get("level")
|
|
9622
|
+
lvl_i = int(lvl) if isinstance(lvl, int) or (isinstance(lvl, str) and str(lvl).strip().isdigit()) else None
|
|
9623
|
+
except Exception:
|
|
9624
|
+
lvl_i = None
|
|
9625
|
+
|
|
9626
|
+
if lvl_i == 2:
|
|
9627
|
+
t = str(tool or "").strip().lower()
|
|
9628
|
+
remember_key = None
|
|
9629
|
+
label = t
|
|
9630
|
+
if t == "run_command":
|
|
9631
|
+
try:
|
|
9632
|
+
cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
|
|
9633
|
+
except Exception:
|
|
9634
|
+
cmd = None
|
|
9635
|
+
base = self._base_command(cmd) if cmd is not None else ""
|
|
9636
|
+
if base:
|
|
9637
|
+
label = f"run_command:{base}"
|
|
9638
|
+
remember_key = base
|
|
9639
|
+
else:
|
|
9640
|
+
label = "run_command"
|
|
9641
|
+
else:
|
|
9642
|
+
remember_key = t
|
|
9643
|
+
|
|
9644
|
+
choice = self._approval_prompt_ui(label, args_prev if isinstance(args_prev, dict) else {})
|
|
9645
|
+
if choice == "deny":
|
|
9646
|
+
return False, "Denied via CLI", None, remember_key
|
|
9647
|
+
|
|
9648
|
+
# Approved; update local trust registries immediately.
|
|
9649
|
+
try:
|
|
9650
|
+
if t == "run_command" and remember_key:
|
|
9651
|
+
if choice == "session":
|
|
9652
|
+
if remember_key not in self.trust_cmds_session:
|
|
9653
|
+
self.trust_cmds_session.append(remember_key)
|
|
9654
|
+
elif choice == "always":
|
|
9655
|
+
if remember_key not in self.trust_cmds_always:
|
|
9656
|
+
self.trust_cmds_always.append(remember_key)
|
|
9657
|
+
self.save_settings()
|
|
9658
|
+
elif t in {"write_file", "append_file", "edit_file", "apply_patch", "string_replace"}:
|
|
9659
|
+
if choice == "session":
|
|
9660
|
+
if t not in self.trust_tools_session:
|
|
9661
|
+
self.trust_tools_session.append(t)
|
|
9662
|
+
elif choice == "always":
|
|
9663
|
+
if t not in self.trust_tools_always:
|
|
9664
|
+
self.trust_tools_always.append(t)
|
|
9665
|
+
self.save_settings()
|
|
9666
|
+
except Exception:
|
|
9667
|
+
pass
|
|
9668
|
+
|
|
9669
|
+
remember = choice if choice in ("session", "always") else "once"
|
|
9670
|
+
return True, "Approved via CLI", remember, remember_key
|
|
9671
|
+
|
|
9672
|
+
# Fallback: simple yes/no confirmation.
|
|
9673
|
+
default_yes = True if str(tool).strip() == "context.summarize" else False
|
|
9674
|
+
prompt = f"Approve {tool} (timeout in {timeout_sec}s)?"
|
|
9675
|
+
try:
|
|
9676
|
+
if str(tool).strip().lower() == "run_command":
|
|
9677
|
+
cmd = args_prev.get("cmd") if isinstance(args_prev, dict) else None
|
|
9678
|
+
if isinstance(cmd, str) and cmd.strip():
|
|
9679
|
+
prompt = f"Approve run_command: {self._clip(cmd, 120)} (timeout in {timeout_sec}s)?"
|
|
9680
|
+
except Exception:
|
|
9681
|
+
pass
|
|
9682
|
+
approved = self.ui.confirm(prompt, default=default_yes)
|
|
9683
|
+
return bool(approved), ("Approved via CLI" if approved else "Denied via CLI"), None, None
|
|
9684
|
+
except Exception:
|
|
9685
|
+
return False, "Denied via CLI (error)", None, None
|
|
9020
9686
|
|
|
9021
9687
|
cli_task = loop.run_in_executor(None, prompt_cli)
|
|
9022
9688
|
|
|
9023
|
-
decided: Optional[Tuple[Any, ...]] = None
|
|
9689
|
+
decided: Optional[Tuple[Any, ...]] = None
|
|
9024
9690
|
try:
|
|
9025
9691
|
done, pending = await asyncio.wait({fut, asyncio.ensure_future(cli_task)}, timeout=timeout_sec, return_when=asyncio.FIRST_COMPLETED)
|
|
9026
9692
|
if fut in done and not fut.cancelled():
|
|
9027
9693
|
try:
|
|
9028
|
-
decided = fut.result()
|
|
9029
|
-
except Exception:
|
|
9030
|
-
decided = (False, "Denied via Web (error)")
|
|
9031
|
-
elif cli_task in done: # type: ignore
|
|
9032
|
-
try:
|
|
9033
|
-
decided = await cli_task # type: ignore
|
|
9034
|
-
except Exception:
|
|
9035
|
-
decided = (False, "Denied via CLI (error)")
|
|
9694
|
+
decided = fut.result()
|
|
9695
|
+
except Exception:
|
|
9696
|
+
decided = (False, "Denied via Web (error)")
|
|
9697
|
+
elif cli_task in done: # type: ignore
|
|
9698
|
+
try:
|
|
9699
|
+
decided = await cli_task # type: ignore
|
|
9700
|
+
except Exception:
|
|
9701
|
+
decided = (False, "Denied via CLI (error)")
|
|
9036
9702
|
# If web future not decided, set it so we can cleanly proceed
|
|
9037
9703
|
if not fut.done():
|
|
9038
9704
|
try:
|
|
@@ -9052,45 +9718,45 @@ class ChatCLI:
|
|
|
9052
9718
|
if call_id is not None:
|
|
9053
9719
|
self._pending_approvals.pop(str(call_id), None)
|
|
9054
9720
|
|
|
9055
|
-
# Normalize decision tuple to (approved, note, remember, remember_key)
|
|
9056
|
-
approved = False
|
|
9057
|
-
note = ""
|
|
9058
|
-
remember = None
|
|
9059
|
-
remember_key = None
|
|
9060
|
-
try:
|
|
9061
|
-
if decided is None:
|
|
9062
|
-
approved, note = False, ""
|
|
9063
|
-
elif isinstance(decided, tuple) and len(decided) >= 4:
|
|
9064
|
-
approved, note, remember, remember_key = decided[0], decided[1], decided[2], decided[3]
|
|
9065
|
-
elif isinstance(decided, tuple) and len(decided) >= 2:
|
|
9066
|
-
approved, note = decided[0], decided[1]
|
|
9067
|
-
else:
|
|
9068
|
-
approved, note = bool(decided), ""
|
|
9069
|
-
except Exception:
|
|
9070
|
-
approved, note = False, ""
|
|
9071
|
-
|
|
9072
|
-
# Post decision to server
|
|
9073
|
-
if session_id:
|
|
9074
|
-
try:
|
|
9075
|
-
payload = {
|
|
9076
|
-
"session_id": session_id,
|
|
9077
|
-
"call_id": call_id,
|
|
9078
|
-
"approve": bool(approved),
|
|
9079
|
-
"note": note,
|
|
9080
|
-
}
|
|
9081
|
-
# Optional remember semantics (used to suppress repeat approvals within the current stream).
|
|
9082
|
-
try:
|
|
9083
|
-
if bool(approved) and remember in ("session", "always"):
|
|
9084
|
-
payload["remember"] = remember
|
|
9085
|
-
if remember_key:
|
|
9086
|
-
payload["remember_key"] = str(remember_key)
|
|
9087
|
-
except Exception:
|
|
9088
|
-
pass
|
|
9089
|
-
r = await client.post(self.approvals_url, json=payload, timeout=self.timeout)
|
|
9090
|
-
if r.status_code >= 400:
|
|
9091
|
-
self.ui.warn(f"Approval POST failed: {r.status_code} {r.text}")
|
|
9092
|
-
except Exception as e:
|
|
9093
|
-
self.ui.warn(f"Approval POST error: {e}")
|
|
9721
|
+
# Normalize decision tuple to (approved, note, remember, remember_key)
|
|
9722
|
+
approved = False
|
|
9723
|
+
note = ""
|
|
9724
|
+
remember = None
|
|
9725
|
+
remember_key = None
|
|
9726
|
+
try:
|
|
9727
|
+
if decided is None:
|
|
9728
|
+
approved, note = False, ""
|
|
9729
|
+
elif isinstance(decided, tuple) and len(decided) >= 4:
|
|
9730
|
+
approved, note, remember, remember_key = decided[0], decided[1], decided[2], decided[3]
|
|
9731
|
+
elif isinstance(decided, tuple) and len(decided) >= 2:
|
|
9732
|
+
approved, note = decided[0], decided[1]
|
|
9733
|
+
else:
|
|
9734
|
+
approved, note = bool(decided), ""
|
|
9735
|
+
except Exception:
|
|
9736
|
+
approved, note = False, ""
|
|
9737
|
+
|
|
9738
|
+
# Post decision to server
|
|
9739
|
+
if session_id:
|
|
9740
|
+
try:
|
|
9741
|
+
payload = {
|
|
9742
|
+
"session_id": session_id,
|
|
9743
|
+
"call_id": call_id,
|
|
9744
|
+
"approve": bool(approved),
|
|
9745
|
+
"note": note,
|
|
9746
|
+
}
|
|
9747
|
+
# Optional remember semantics (used to suppress repeat approvals within the current stream).
|
|
9748
|
+
try:
|
|
9749
|
+
if bool(approved) and remember in ("session", "always"):
|
|
9750
|
+
payload["remember"] = remember
|
|
9751
|
+
if remember_key:
|
|
9752
|
+
payload["remember_key"] = str(remember_key)
|
|
9753
|
+
except Exception:
|
|
9754
|
+
pass
|
|
9755
|
+
r = await client.post(self.approvals_url, json=payload, timeout=self.timeout)
|
|
9756
|
+
if r.status_code >= 400:
|
|
9757
|
+
self.ui.warn(f"Approval POST failed: {r.status_code} {r.text}")
|
|
9758
|
+
except Exception as e:
|
|
9759
|
+
self.ui.warn(f"Approval POST error: {e}")
|
|
9094
9760
|
async def amain():
|
|
9095
9761
|
args = build_arg_parser().parse_args()
|
|
9096
9762
|
# Set global debug flags from args
|
|
@@ -9166,143 +9832,6 @@ async def amain():
|
|
|
9166
9832
|
await cli.run()
|
|
9167
9833
|
|
|
9168
9834
|
|
|
9169
|
-
# --- UX Hotfix: Replace menu UI with highlighted cursor picker (no radio buttons) ---
|
|
9170
|
-
# The default RadioList menu can be confusing and, on some terminals, non-interactive.
|
|
9171
|
-
# We override ChatCLI._menu_choice at runtime with a prompt_toolkit-based list that shows
|
|
9172
|
-
# a highlighted bar for the current item; Enter selects; Esc cancels. Falls back to
|
|
9173
|
-
# numeric selection when prompt_toolkit is unavailable.
|
|
9174
|
-
|
|
9175
|
-
async def _menu_choice_highlight(self, title: str, text: str, choices: list[tuple[str, str]]): # type: ignore
|
|
9176
|
-
if HAS_PT and Application and Layout and HSplit and Window and FormattedTextControl and Style and KeyBindings:
|
|
9177
|
-
try:
|
|
9178
|
-
items = [(val, str(label)) for (val, label) in choices]
|
|
9179
|
-
index = 0
|
|
9180
|
-
blink_on = [True]
|
|
9181
|
-
|
|
9182
|
-
def _lines():
|
|
9183
|
-
out = []
|
|
9184
|
-
if title:
|
|
9185
|
-
out.append(("class:menu.title", f"{title}\n"))
|
|
9186
|
-
for i, (_v, _lbl) in enumerate(items):
|
|
9187
|
-
if i == index:
|
|
9188
|
-
arrow = ">" if blink_on[0] else " "
|
|
9189
|
-
out.append(("class:menu.item.selected", f" {arrow} {_lbl}\n"))
|
|
9190
|
-
else:
|
|
9191
|
-
out.append(("class:menu.item", f" {_lbl}\n"))
|
|
9192
|
-
out.append(("class:menu.status", f"({index+1}/{len(items)})"))
|
|
9193
|
-
return out
|
|
9194
|
-
|
|
9195
|
-
body = FormattedTextControl(_lines)
|
|
9196
|
-
hint = FormattedTextControl(lambda: text or "Use ↑/↓, Enter=select, Esc=cancel")
|
|
9197
|
-
root = HSplit([
|
|
9198
|
-
Window(height=1, content=hint, style="class:menu.hint"),
|
|
9199
|
-
Window(content=body),
|
|
9200
|
-
])
|
|
9201
|
-
kb = KeyBindings()
|
|
9202
|
-
|
|
9203
|
-
@kb.add("up")
|
|
9204
|
-
def _up(event):
|
|
9205
|
-
nonlocal index
|
|
9206
|
-
index = (index - 1) % len(items)
|
|
9207
|
-
event.app.invalidate()
|
|
9208
|
-
|
|
9209
|
-
@kb.add("down")
|
|
9210
|
-
def _down(event):
|
|
9211
|
-
nonlocal index
|
|
9212
|
-
index = (index + 1) % len(items)
|
|
9213
|
-
event.app.invalidate()
|
|
9214
|
-
|
|
9215
|
-
@kb.add("pageup")
|
|
9216
|
-
def _pgup(event):
|
|
9217
|
-
nonlocal index
|
|
9218
|
-
index = max(0, index - 7)
|
|
9219
|
-
event.app.invalidate()
|
|
9220
|
-
|
|
9221
|
-
@kb.add("pagedown")
|
|
9222
|
-
def _pgdn(event):
|
|
9223
|
-
nonlocal index
|
|
9224
|
-
index = min(len(items) - 1, index + 7)
|
|
9225
|
-
event.app.invalidate()
|
|
9226
|
-
|
|
9227
|
-
@kb.add("home")
|
|
9228
|
-
def _home(event):
|
|
9229
|
-
nonlocal index
|
|
9230
|
-
index = 0
|
|
9231
|
-
event.app.invalidate()
|
|
9232
|
-
|
|
9233
|
-
@kb.add("end")
|
|
9234
|
-
def _end(event):
|
|
9235
|
-
nonlocal index
|
|
9236
|
-
index = len(items) - 1
|
|
9237
|
-
event.app.invalidate()
|
|
9238
|
-
|
|
9239
|
-
@kb.add("enter")
|
|
9240
|
-
def _enter(event):
|
|
9241
|
-
event.app.exit(result=items[index][0])
|
|
9242
|
-
|
|
9243
|
-
@kb.add("escape")
|
|
9244
|
-
def _esc(event):
|
|
9245
|
-
event.app.exit(result=None)
|
|
9246
|
-
|
|
9247
|
-
style = Style.from_dict({
|
|
9248
|
-
"menu.title": "bold",
|
|
9249
|
-
"menu.hint": "fg:#888888",
|
|
9250
|
-
"menu.status": "fg:#ff8700",
|
|
9251
|
-
"menu.item": "",
|
|
9252
|
-
# Bright highlighted selection; blink may be ignored on some terminals
|
|
9253
|
-
"menu.item.selected": "fg:#ff8700 reverse",
|
|
9254
|
-
})
|
|
9255
|
-
|
|
9256
|
-
app = Application(layout=Layout(root), key_bindings=kb, style=style, full_screen=False)
|
|
9257
|
-
|
|
9258
|
-
async def _blinker():
|
|
9259
|
-
while True:
|
|
9260
|
-
await asyncio.sleep(0.6)
|
|
9261
|
-
try:
|
|
9262
|
-
blink_on[0] = not blink_on[0]
|
|
9263
|
-
get_app().invalidate()
|
|
9264
|
-
except Exception:
|
|
9265
|
-
break
|
|
9266
|
-
|
|
9267
|
-
try:
|
|
9268
|
-
asyncio.create_task(_blinker())
|
|
9269
|
-
except Exception:
|
|
9270
|
-
pass
|
|
9271
|
-
|
|
9272
|
-
return await app.run_async()
|
|
9273
|
-
except Exception:
|
|
9274
|
-
pass
|
|
9275
|
-
# Fallback: numeric list
|
|
9276
|
-
self.ui.header(title, text)
|
|
9277
|
-
for i, (_, label) in enumerate(choices, start=1):
|
|
9278
|
-
style = None
|
|
9279
|
-
try:
|
|
9280
|
-
lbl = str(label)
|
|
9281
|
-
if ("VERY expensive" in lbl) or ("[DANGER]" in lbl) or ("!!!" in lbl and "expensive" in lbl.lower()):
|
|
9282
|
-
style = self.ui.theme.get("err")
|
|
9283
|
-
except Exception:
|
|
9284
|
-
style = None
|
|
9285
|
-
self.ui.print(f"{i}. {label}", style=style)
|
|
9286
|
-
self.ui.print()
|
|
9287
|
-
while True:
|
|
9288
|
-
raw = input("Choose an option: ").strip()
|
|
9289
|
-
if raw.lower() in ("q", "quit", "exit"):
|
|
9290
|
-
return None
|
|
9291
|
-
if not raw.isdigit():
|
|
9292
|
-
self.ui.warn("Enter a number from the list.")
|
|
9293
|
-
continue
|
|
9294
|
-
idx = int(raw)
|
|
9295
|
-
if not (1 <= idx <= len(choices)):
|
|
9296
|
-
self.ui.warn("Invalid selection.")
|
|
9297
|
-
continue
|
|
9298
|
-
return choices[idx - 1][0]
|
|
9299
|
-
|
|
9300
|
-
# Monkey-patch the method onto ChatCLI
|
|
9301
|
-
try:
|
|
9302
|
-
ChatCLI._menu_choice = _menu_choice_highlight # type: ignore[attr-defined]
|
|
9303
|
-
except Exception:
|
|
9304
|
-
pass
|
|
9305
|
-
|
|
9306
9835
|
# --- UX Hotfix v2: dependency-free highlighted menus (Enter selects) ---
|
|
9307
9836
|
# This override ensures the settings menu works without RadioList and that Enter
|
|
9308
9837
|
# activates the currently highlighted option even when prompt_toolkit is absent.
|