henosis-cli 0.6.10__py3-none-any.whl → 0.6.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli.py +345 -94
- {henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/METADATA +1 -1
- {henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/RECORD +6 -6
- {henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/WHEEL +0 -0
- {henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/entry_points.txt +0 -0
- {henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/top_level.txt +0 -0
cli.py
CHANGED
|
@@ -27,6 +27,7 @@ import getpass
|
|
|
27
27
|
from urllib.parse import urlparse, urlunparse
|
|
28
28
|
import subprocess
|
|
29
29
|
import shlex
|
|
30
|
+
import signal
|
|
30
31
|
import importlib
|
|
31
32
|
import importlib.util
|
|
32
33
|
import importlib.metadata
|
|
@@ -674,7 +675,7 @@ class UI:
|
|
|
674
675
|
for n, ty, sz in rows:
|
|
675
676
|
print(f"{n:<40} {ty:<8} {sz}")
|
|
676
677
|
|
|
677
|
-
class ChatCLI:
|
|
678
|
+
class ChatCLI:
|
|
678
679
|
def __init__(
|
|
679
680
|
self,
|
|
680
681
|
server: str,
|
|
@@ -1062,9 +1063,15 @@ class ChatCLI:
|
|
|
1062
1063
|
# Track Ctrl+C timing for double-press-to-exit behavior
|
|
1063
1064
|
self._last_interrupt_ts: Optional[float] = None
|
|
1064
1065
|
|
|
1066
|
+
# Ctrl+C cancel flag and partial-text accumulator for graceful stream cancel.
|
|
1067
|
+
# When _stream_cancelled is set, the SSE loop breaks early and the partial
|
|
1068
|
+
# assistant text is kept in context (no rollback, no edit mode).
|
|
1069
|
+
self._stream_cancelled: bool = False
|
|
1070
|
+
self._stream_partial_text: str = ""
|
|
1071
|
+
|
|
1065
1072
|
# Ctrl+C during a running stream should not kill the entire CLI.
|
|
1066
1073
|
# Instead, we cancel the in-flight turn and reopen the last user query for editing.
|
|
1067
|
-
#
|
|
1074
|
+
# Provider-native tool-chain context is preserved on cancel via _preserve_provider_state_on_cancel().
|
|
1068
1075
|
self._pending_user_edit: Optional[str] = None
|
|
1069
1076
|
self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
|
|
1070
1077
|
|
|
@@ -1320,9 +1327,84 @@ class ChatCLI:
|
|
|
1320
1327
|
except Exception:
|
|
1321
1328
|
return 0
|
|
1322
1329
|
|
|
1323
|
-
def _clip(self, s: Any, max_len: int = 300) -> str:
|
|
1324
|
-
s = str(s)
|
|
1325
|
-
return s if len(s) <= max_len else (s[: max_len//2] + " ... (truncated) ... " + s[- max_len//2 :])
|
|
1330
|
+
def _clip(self, s: Any, max_len: int = 300) -> str:
|
|
1331
|
+
s = str(s)
|
|
1332
|
+
return s if len(s) <= max_len else (s[: max_len//2] + " ... (truncated) ... " + s[- max_len//2 :])
|
|
1333
|
+
|
|
1334
|
+
# ----------------------- Cancel/usage helpers -----------------------
|
|
1335
|
+
def _rough_tokens_from_chars(self, chars: int) -> int:
|
|
1336
|
+
"""Very rough token estimator used only when a stream is cancelled.
|
|
1337
|
+
|
|
1338
|
+
We use ~4 chars/token as a fallback so we can display *some* token usage
|
|
1339
|
+
even when the server never emits message.completed (usage is then unavailable).
|
|
1340
|
+
"""
|
|
1341
|
+
try:
|
|
1342
|
+
n = int(chars or 0)
|
|
1343
|
+
except Exception:
|
|
1344
|
+
n = 0
|
|
1345
|
+
if n <= 0:
|
|
1346
|
+
return 0
|
|
1347
|
+
# ceil(n/4)
|
|
1348
|
+
return (n + 3) // 4
|
|
1349
|
+
|
|
1350
|
+
def _render_cancelled_usage_notice(
|
|
1351
|
+
self,
|
|
1352
|
+
*,
|
|
1353
|
+
model_label: Optional[str],
|
|
1354
|
+
est_completion_chars: int,
|
|
1355
|
+
events_total: int = 0,
|
|
1356
|
+
deltas_total: int = 0,
|
|
1357
|
+
bytes_total: int = 0,
|
|
1358
|
+
tool_calls: int = 0,
|
|
1359
|
+
turn_secs: Optional[float] = None,
|
|
1360
|
+
) -> None:
|
|
1361
|
+
"""Print a small usage notice when Ctrl+C cancels mid-stream.
|
|
1362
|
+
|
|
1363
|
+
The key requirement: do not show "0 tokens"/nothing when we know
|
|
1364
|
+
the model already emitted a tool.call or other output.
|
|
1365
|
+
"""
|
|
1366
|
+
try:
|
|
1367
|
+
tok = self._rough_tokens_from_chars(int(est_completion_chars or 0))
|
|
1368
|
+
except Exception:
|
|
1369
|
+
tok = 0
|
|
1370
|
+
label = model_label or self._current_turn.get("model") or self._last_used_model or self.model or "(unknown)"
|
|
1371
|
+
# Always show something human-readable even if estimate is 0.
|
|
1372
|
+
# (0 can happen if cancel occurred before we received any delta/tool.call lines.)
|
|
1373
|
+
lines: List[str] = []
|
|
1374
|
+
if tok > 0:
|
|
1375
|
+
lines.append(f"Cancelled — estimated tokens used so far (completion/tool output): ~{tok}")
|
|
1376
|
+
else:
|
|
1377
|
+
lines.append("Cancelled — usage unavailable (no message.completed); no output received to estimate tokens")
|
|
1378
|
+
try:
|
|
1379
|
+
lines.append(f"Model: {label}")
|
|
1380
|
+
except Exception:
|
|
1381
|
+
pass
|
|
1382
|
+
try:
|
|
1383
|
+
parts = [f"events={int(events_total)}", f"deltas={int(deltas_total)}", f"bytes={int(bytes_total)}", f"tools={int(tool_calls)}"]
|
|
1384
|
+
lines.append("Stream: " + " | ".join(parts))
|
|
1385
|
+
except Exception:
|
|
1386
|
+
pass
|
|
1387
|
+
try:
|
|
1388
|
+
if isinstance(turn_secs, (int, float)) and turn_secs is not None:
|
|
1389
|
+
lines.append(f"Time (turn): {float(turn_secs):.2f}s")
|
|
1390
|
+
except Exception:
|
|
1391
|
+
pass
|
|
1392
|
+
|
|
1393
|
+
try:
|
|
1394
|
+
# Match existing UX patterns: info box in verbose mode; single line in concise mode.
|
|
1395
|
+
if str(getattr(self, "usage_info_mode", "verbose")).lower() == "concise":
|
|
1396
|
+
# One-liner only
|
|
1397
|
+
self.ui.print(lines[0], style=self.ui.theme.get("warn"), force=True) # type: ignore
|
|
1398
|
+
else:
|
|
1399
|
+
self.ui.info_box("Usage (cancelled)", lines)
|
|
1400
|
+
except Exception:
|
|
1401
|
+
# Last-resort plain prints
|
|
1402
|
+
try:
|
|
1403
|
+
self.ui.warn(lines[0])
|
|
1404
|
+
for ln in lines[1:]:
|
|
1405
|
+
self.ui.print(ln)
|
|
1406
|
+
except Exception:
|
|
1407
|
+
pass
|
|
1326
1408
|
|
|
1327
1409
|
# ----------------------- Pricing + costs -----------------------
|
|
1328
1410
|
|
|
@@ -1435,7 +1517,7 @@ class ChatCLI:
|
|
|
1435
1517
|
# Codex family: disable preambles for better behavior
|
|
1436
1518
|
if "codex" in model_name:
|
|
1437
1519
|
self.preambles_enabled = False
|
|
1438
|
-
# Tool-call preambles are
|
|
1520
|
+
# Tool-call preambles are supported for GPT-5 / GPT-5.2 non-Codex models.
|
|
1439
1521
|
# Force-disable for all other models (even if a saved setting had it enabled).
|
|
1440
1522
|
if not self._supports_preambles(self.model):
|
|
1441
1523
|
self.preambles_enabled = False
|
|
@@ -1448,23 +1530,25 @@ class ChatCLI:
|
|
|
1448
1530
|
def _supports_preambles(self, model: Optional[str]) -> bool:
|
|
1449
1531
|
"""Tool-call preambles are a CLI-only UX hint.
|
|
1450
1532
|
|
|
1451
|
-
|
|
1452
|
-
|
|
1533
|
+
Supported (non-Codex only):
|
|
1534
|
+
- GPT-5 base line: "gpt-5" and date-pinned variants like "gpt-5-2025-08-07"
|
|
1535
|
+
- GPT-5.2 family: "gpt-5.2*" (e.g., gpt-5.2, gpt-5.2-pro)
|
|
1536
|
+
|
|
1537
|
+
Not supported:
|
|
1538
|
+
- Any Codex variants ("*codex*")
|
|
1539
|
+
- GPT-5.1* (kept off by default until validated)
|
|
1453
1540
|
"""
|
|
1454
1541
|
try:
|
|
1455
1542
|
if not model:
|
|
1456
1543
|
return False
|
|
1457
1544
|
m = str(model).strip().lower()
|
|
1458
|
-
#
|
|
1459
|
-
|
|
1460
|
-
# - "gpt-5"
|
|
1461
|
-
# - date-pinned variants like "gpt-5-2025-08-07"
|
|
1462
|
-
# Disallow:
|
|
1463
|
-
# - versioned families like "gpt-5.1*" / "gpt-5.2*"
|
|
1464
|
-
if not (m == "gpt-5" or m.startswith("gpt-5-")):
|
|
1545
|
+
# Allow GPT-5 base line and GPT-5.2 family.
|
|
1546
|
+
if not (m == "gpt-5" or m.startswith("gpt-5-") or m.startswith("gpt-5.2")):
|
|
1465
1547
|
return False
|
|
1466
1548
|
if "codex" in m:
|
|
1467
1549
|
return False
|
|
1550
|
+
if m.startswith("gpt-5.1"):
|
|
1551
|
+
return False
|
|
1468
1552
|
return True
|
|
1469
1553
|
except Exception:
|
|
1470
1554
|
return False
|
|
@@ -2835,7 +2919,7 @@ class ChatCLI:
|
|
|
2835
2919
|
{"label": "Preambles & First-turn", "type": "group", "items": [
|
|
2836
2920
|
{
|
|
2837
2921
|
"id": "preambles_enabled",
|
|
2838
|
-
"label": "Enable tool call preambles (GPT-5
|
|
2922
|
+
"label": "Enable tool call preambles (GPT-5 / GPT-5.2; non-Codex)",
|
|
2839
2923
|
"type": "bool",
|
|
2840
2924
|
# Only show this control when the *currently selected* model supports it.
|
|
2841
2925
|
# (This updates live as the Model picker changes.)
|
|
@@ -3306,7 +3390,7 @@ class ChatCLI:
|
|
|
3306
3390
|
except Exception:
|
|
3307
3391
|
pass
|
|
3308
3392
|
|
|
3309
|
-
# 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
3393
|
+
# 3) Tool usage preamble (UX hint) — GPT-5 / GPT-5.2 only (non-Codex)
|
|
3310
3394
|
try:
|
|
3311
3395
|
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
3312
3396
|
blocks.append(
|
|
@@ -5278,51 +5362,40 @@ class ChatCLI:
|
|
|
5278
5362
|
continue
|
|
5279
5363
|
self._busy = True
|
|
5280
5364
|
try:
|
|
5281
|
-
assistant_text = await self.
|
|
5365
|
+
assistant_text = await self._run_stream_with_cancel(user_input)
|
|
5282
5366
|
finally:
|
|
5283
5367
|
self._busy = False
|
|
5284
|
-
|
|
5285
|
-
#
|
|
5286
|
-
#
|
|
5368
|
+
|
|
5369
|
+
# If the stream was cancelled via Ctrl+C, keep partial text
|
|
5370
|
+
# in context and return to the normal prompt.
|
|
5371
|
+
if self._stream_cancelled:
|
|
5372
|
+
partial = self._stream_partial_text or ""
|
|
5373
|
+
content_sent = self._last_built_user_content or user_input
|
|
5374
|
+
# Always keep the user message in context so it is
|
|
5375
|
+
# not silently lost when cancel arrives before any
|
|
5376
|
+
# assistant tokens were streamed.
|
|
5377
|
+
self.history.append({"role": "user", "content": content_sent})
|
|
5378
|
+
if partial.strip():
|
|
5379
|
+
self.history.append({"role": "assistant", "content": partial})
|
|
5380
|
+
# Roll forward provider-native replay state so the
|
|
5381
|
+
# next turn can pick up seamlessly.
|
|
5382
|
+
try:
|
|
5383
|
+
self._preserve_provider_state_on_cancel(
|
|
5384
|
+
content_sent, partial,
|
|
5385
|
+
)
|
|
5386
|
+
except Exception:
|
|
5387
|
+
pass
|
|
5388
|
+
self.ui.warn("Interrupted. Partial response kept in context.")
|
|
5389
|
+
continue
|
|
5390
|
+
except (KeyboardInterrupt, asyncio.CancelledError):
|
|
5391
|
+
# Safety net for Ctrl+C outside of the streaming path
|
|
5392
|
+
# (e.g. during snapshot creation). _run_stream_with_cancel
|
|
5393
|
+
# handles the common streaming case internally.
|
|
5287
5394
|
try:
|
|
5288
5395
|
await self._cancel_inflight_dispatch()
|
|
5289
5396
|
except (Exception, BaseException):
|
|
5290
5397
|
pass
|
|
5291
5398
|
|
|
5292
|
-
# Restore state to *before* this turn started.
|
|
5293
|
-
try:
|
|
5294
|
-
snap = self._pending_turn_snapshot or {}
|
|
5295
|
-
if isinstance(snap.get("history"), list):
|
|
5296
|
-
self.history = snap.get("history")
|
|
5297
|
-
if isinstance(snap.get("messages_for_save"), list):
|
|
5298
|
-
self.messages_for_save = snap.get("messages_for_save")
|
|
5299
|
-
if isinstance(snap.get("kimi_raw"), list):
|
|
5300
|
-
self._kimi_raw_history = snap.get("kimi_raw")
|
|
5301
|
-
if isinstance(snap.get("gemini_raw"), list):
|
|
5302
|
-
self._gemini_raw_history = snap.get("gemini_raw")
|
|
5303
|
-
if "openai_prev" in snap:
|
|
5304
|
-
self._openai_previous_response_id = snap.get("openai_prev")
|
|
5305
|
-
if isinstance(snap.get("openai_ids"), list):
|
|
5306
|
-
self._openai_response_id_history = snap.get("openai_ids")
|
|
5307
|
-
if isinstance(snap.get("openai_input_items"), list):
|
|
5308
|
-
self._openai_input_items = snap.get("openai_input_items")
|
|
5309
|
-
if "openai_last_sent_input_items" in snap:
|
|
5310
|
-
self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
|
|
5311
|
-
if "inflight_dispatch" in snap:
|
|
5312
|
-
self._inflight_dispatch = snap.get("inflight_dispatch")
|
|
5313
|
-
if "did_inject_codebase_map" in snap:
|
|
5314
|
-
self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
|
|
5315
|
-
if "did_inject_custom_first_turn" in snap:
|
|
5316
|
-
self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
|
|
5317
|
-
if "did_inject_working_memory" in snap:
|
|
5318
|
-
self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
|
|
5319
|
-
if "memory_paths_for_first_turn" in snap:
|
|
5320
|
-
self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
|
|
5321
|
-
self._last_built_user_content = snap.get("last_built_user_content")
|
|
5322
|
-
except Exception:
|
|
5323
|
-
pass
|
|
5324
|
-
|
|
5325
|
-
# Clear any transient indicator line and land on a fresh prompt line.
|
|
5326
5399
|
try:
|
|
5327
5400
|
sys.stdout.write("\r\x1b[2K\n")
|
|
5328
5401
|
sys.stdout.flush()
|
|
@@ -5332,15 +5405,7 @@ class ChatCLI:
|
|
|
5332
5405
|
except Exception:
|
|
5333
5406
|
pass
|
|
5334
5407
|
|
|
5335
|
-
|
|
5336
|
-
supports = self._provider_supports_native_retention(self.model)
|
|
5337
|
-
except Exception:
|
|
5338
|
-
supports = False
|
|
5339
|
-
if supports:
|
|
5340
|
-
self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
|
|
5341
|
-
else:
|
|
5342
|
-
self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
|
|
5343
|
-
self._pending_user_edit = user_input
|
|
5408
|
+
self.ui.warn("Interrupted.")
|
|
5344
5409
|
continue
|
|
5345
5410
|
except httpx.HTTPStatusError as he:
|
|
5346
5411
|
try:
|
|
@@ -5676,7 +5741,145 @@ class ChatCLI:
|
|
|
5676
5741
|
return True
|
|
5677
5742
|
|
|
5678
5743
|
# ----------------------- SSE Streaming loop ------------------------
|
|
5679
|
-
|
|
5744
|
+
def _preserve_provider_state_on_cancel(
|
|
5745
|
+
self, user_content: str, partial_assistant: str,
|
|
5746
|
+
) -> None:
|
|
5747
|
+
"""Roll forward provider-native replay state on Ctrl+C cancel.
|
|
5748
|
+
|
|
5749
|
+
During a normal turn, ``message.completed`` delivers delta items
|
|
5750
|
+
(OpenAI), an authoritative ``raw_provider_messages`` snapshot
|
|
5751
|
+
(Gemini), and similar per-provider payloads. When the stream is
|
|
5752
|
+
cancelled those events never arrive, so this method patches the
|
|
5753
|
+
minimum state needed for the next turn to continue cleanly.
|
|
5754
|
+
|
|
5755
|
+
* **Gemini / Kimi** -- ``provider.message`` and ``tool.result``
|
|
5756
|
+
SSE events already update ``_gemini_raw_history`` /
|
|
5757
|
+
``_kimi_raw_history`` during streaming, so those histories are
|
|
5758
|
+
already consistent. No extra work needed.
|
|
5759
|
+
* **OpenAI** -- The manual input-item chain
|
|
5760
|
+
(``_openai_input_items``) is only updated from
|
|
5761
|
+
``openai_delta_items`` inside ``message.completed``. Here we
|
|
5762
|
+
roll the chain forward to include the user message we sent and
|
|
5763
|
+
any partial assistant text.
|
|
5764
|
+
"""
|
|
5765
|
+
model = (
|
|
5766
|
+
self._current_turn.get("model")
|
|
5767
|
+
or self._last_used_model
|
|
5768
|
+
or self.model
|
|
5769
|
+
or ""
|
|
5770
|
+
)
|
|
5771
|
+
|
|
5772
|
+
# --- OpenAI: roll _openai_input_items forward -----------------
|
|
5773
|
+
if self._is_openai_model(model):
|
|
5774
|
+
try:
|
|
5775
|
+
sent = self._openai_last_sent_input_items
|
|
5776
|
+
if isinstance(sent, list) and sent:
|
|
5777
|
+
items = copy.deepcopy(sent)
|
|
5778
|
+
# Include partial assistant text so the model knows
|
|
5779
|
+
# what it already said before the user interrupted.
|
|
5780
|
+
if partial_assistant.strip():
|
|
5781
|
+
items.append({
|
|
5782
|
+
"role": "assistant",
|
|
5783
|
+
"content": partial_assistant,
|
|
5784
|
+
})
|
|
5785
|
+
self._openai_input_items = (
|
|
5786
|
+
self._sanitize_openai_items(items)
|
|
5787
|
+
)
|
|
5788
|
+
except Exception:
|
|
5789
|
+
pass
|
|
5790
|
+
finally:
|
|
5791
|
+
self._openai_last_sent_input_items = None
|
|
5792
|
+
|
|
5793
|
+
# --- Gemini / Kimi: already up-to-date (see docstring) --------
|
|
5794
|
+
|
|
5795
|
+
async def _run_stream_with_cancel(self, user_input: str) -> str:
|
|
5796
|
+
"""Wrap _stream_once so Ctrl+C cancels the stream gracefully.
|
|
5797
|
+
|
|
5798
|
+
On cancel the SSE loop sees _stream_cancelled and breaks early.
|
|
5799
|
+
Whatever partial assistant text was accumulated is returned so the
|
|
5800
|
+
caller can keep it in conversation context.
|
|
5801
|
+
|
|
5802
|
+
Key design decision: on cancel we KEEP all streamed content in
|
|
5803
|
+
context, append the user message + partial assistant reply to
|
|
5804
|
+
history, and return to the normal ``You>`` prompt -- no edit mode
|
|
5805
|
+
and no snapshot rollback.
|
|
5806
|
+
"""
|
|
5807
|
+
self._stream_cancelled = False
|
|
5808
|
+
self._stream_partial_text = ""
|
|
5809
|
+
|
|
5810
|
+
# Install a custom SIGINT handler for the duration of streaming.
|
|
5811
|
+
# First Ctrl+C sets _stream_cancelled so the SSE loop breaks
|
|
5812
|
+
# naturally at the next event; second Ctrl+C force-raises.
|
|
5813
|
+
_original_handler = signal.getsignal(signal.SIGINT)
|
|
5814
|
+
_sigint_count = 0
|
|
5815
|
+
|
|
5816
|
+
def _stream_sigint_handler(signum, frame):
|
|
5817
|
+
nonlocal _sigint_count
|
|
5818
|
+
_sigint_count += 1
|
|
5819
|
+
self._stream_cancelled = True
|
|
5820
|
+
if _sigint_count >= 2:
|
|
5821
|
+
# Restore original handler and hard-interrupt on second press
|
|
5822
|
+
try:
|
|
5823
|
+
signal.signal(signal.SIGINT, _original_handler or signal.default_int_handler)
|
|
5824
|
+
except Exception:
|
|
5825
|
+
pass
|
|
5826
|
+
raise KeyboardInterrupt()
|
|
5827
|
+
|
|
5828
|
+
try:
|
|
5829
|
+
signal.signal(signal.SIGINT, _stream_sigint_handler)
|
|
5830
|
+
except (OSError, ValueError):
|
|
5831
|
+
# signal.signal() can only be called from the main thread
|
|
5832
|
+
pass
|
|
5833
|
+
|
|
5834
|
+
try:
|
|
5835
|
+
return await self._stream_once(user_input)
|
|
5836
|
+
except (KeyboardInterrupt, asyncio.CancelledError):
|
|
5837
|
+
self._stream_cancelled = True
|
|
5838
|
+
# Best-effort: cancel any in-flight client-dispatched tool
|
|
5839
|
+
try:
|
|
5840
|
+
await self._cancel_inflight_dispatch("cancelled by user")
|
|
5841
|
+
except Exception:
|
|
5842
|
+
pass
|
|
5843
|
+
# Clear any transient indicator / status line
|
|
5844
|
+
try:
|
|
5845
|
+
sys.stdout.write("\r\x1b[2K\n")
|
|
5846
|
+
sys.stdout.flush()
|
|
5847
|
+
except Exception:
|
|
5848
|
+
pass
|
|
5849
|
+
# Best-effort: show a usage notice even on hard-interrupt (no message.completed).
|
|
5850
|
+
# We can only estimate from what we already buffered locally.
|
|
5851
|
+
try:
|
|
5852
|
+
# Estimate completion/output chars from partial assistant text + tool event payload sizes.
|
|
5853
|
+
tool_chars = 0
|
|
5854
|
+
try:
|
|
5855
|
+
evs = (self._current_turn.get("tool_events") or []) if isinstance(self._current_turn, dict) else []
|
|
5856
|
+
if isinstance(evs, list):
|
|
5857
|
+
# Keep it cheap: only sum a bounded amount.
|
|
5858
|
+
for ev in evs[-50:]:
|
|
5859
|
+
try:
|
|
5860
|
+
tool_chars += len(json.dumps(ev, ensure_ascii=False))
|
|
5861
|
+
except Exception:
|
|
5862
|
+
tool_chars += len(str(ev))
|
|
5863
|
+
except Exception:
|
|
5864
|
+
tool_chars = 0
|
|
5865
|
+
est_chars = len(self._stream_partial_text or "") + tool_chars
|
|
5866
|
+
self._render_cancelled_usage_notice(
|
|
5867
|
+
model_label=(self._current_turn.get("model") if isinstance(self._current_turn, dict) else None),
|
|
5868
|
+
est_completion_chars=est_chars,
|
|
5869
|
+
tool_calls=len((self._current_turn.get("tool_events") or []) if isinstance(self._current_turn, dict) else []),
|
|
5870
|
+
)
|
|
5871
|
+
except Exception:
|
|
5872
|
+
pass
|
|
5873
|
+
return self._stream_partial_text
|
|
5874
|
+
finally:
|
|
5875
|
+
# Restore the original SIGINT handler so normal Ctrl+C
|
|
5876
|
+
# behavior resumes at the input prompt.
|
|
5877
|
+
try:
|
|
5878
|
+
signal.signal(signal.SIGINT, _original_handler or signal.default_int_handler)
|
|
5879
|
+
except (OSError, ValueError):
|
|
5880
|
+
pass
|
|
5881
|
+
|
|
5882
|
+
async def _stream_once(self, user_input: str) -> str:
|
|
5680
5883
|
# Build request payload.
|
|
5681
5884
|
# OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
|
|
5682
5885
|
# `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
|
|
@@ -5844,7 +6047,7 @@ class ChatCLI:
|
|
|
5844
6047
|
payload["text_verbosity"] = self.text_verbosity
|
|
5845
6048
|
except Exception:
|
|
5846
6049
|
pass
|
|
5847
|
-
# Preambles are a GPT-5-only UX toggle.
|
|
6050
|
+
# Preambles are a GPT-5 / GPT-5.2-only UX toggle.
|
|
5848
6051
|
try:
|
|
5849
6052
|
if self._supports_preambles(self.model):
|
|
5850
6053
|
payload["preambles_enabled"] = bool(self.preambles_enabled)
|
|
@@ -5917,15 +6120,18 @@ class ChatCLI:
|
|
|
5917
6120
|
except Exception:
|
|
5918
6121
|
pass
|
|
5919
6122
|
|
|
5920
|
-
async def do_stream(req_payload: Dict[str, Any]) -> str:
|
|
6123
|
+
async def do_stream(req_payload: Dict[str, Any]) -> str:
|
|
5921
6124
|
nonlocal session_id
|
|
5922
6125
|
nonlocal header_printed
|
|
5923
6126
|
# Retry tracking flags (updated per attempt)
|
|
5924
6127
|
nonlocal last_completed, last_error, last_bytes_total
|
|
5925
6128
|
# While streaming assistant text, suppress the thinking indicator to avoid clobbering output
|
|
5926
6129
|
streaming_assistant = False
|
|
5927
|
-
# Initialize per-turn timer and tool call counter
|
|
5928
|
-
tool_calls = 0
|
|
6130
|
+
# Initialize per-turn timer and tool call counter
|
|
6131
|
+
tool_calls = 0
|
|
6132
|
+
# Estimate of model output chars so far (assistant deltas + tool.call payloads).
|
|
6133
|
+
# Used only when Ctrl+C cancels the stream before message.completed.
|
|
6134
|
+
model_output_chars = 0
|
|
5929
6135
|
# Capture last N SSE events for diagnostics if stream ends without message.completed
|
|
5930
6136
|
from collections import deque
|
|
5931
6137
|
last_events = deque(maxlen=SSE_TAIL_MAX) # keep short, printable summaries
|
|
@@ -6270,6 +6476,9 @@ class ChatCLI:
|
|
|
6270
6476
|
pass
|
|
6271
6477
|
|
|
6272
6478
|
async for event, data_raw in parse_sse_lines(resp, debug=_sse_debug):
|
|
6479
|
+
# Graceful cancel: break early when Ctrl+C sets the flag.
|
|
6480
|
+
if self._stream_cancelled:
|
|
6481
|
+
break
|
|
6273
6482
|
try:
|
|
6274
6483
|
if isinstance(data_raw, str):
|
|
6275
6484
|
_bytes_total += len(data_raw)
|
|
@@ -6312,7 +6521,7 @@ class ChatCLI:
|
|
|
6312
6521
|
pass
|
|
6313
6522
|
continue
|
|
6314
6523
|
|
|
6315
|
-
elif event == "message.delta":
|
|
6524
|
+
elif event == "message.delta":
|
|
6316
6525
|
# Stop any transient indicator before printing content and clear the line
|
|
6317
6526
|
try:
|
|
6318
6527
|
await _indicator_stop(clear=True)
|
|
@@ -6320,8 +6529,8 @@ class ChatCLI:
|
|
|
6320
6529
|
pass
|
|
6321
6530
|
# Indicator line cleared; we're now at the start of a fresh line.
|
|
6322
6531
|
at_line_start = True
|
|
6323
|
-
text = data.get("text", "")
|
|
6324
|
-
if text:
|
|
6532
|
+
text = data.get("text", "")
|
|
6533
|
+
if text:
|
|
6325
6534
|
try:
|
|
6326
6535
|
_deltas_total += 1
|
|
6327
6536
|
except Exception:
|
|
@@ -6362,7 +6571,16 @@ class ChatCLI:
|
|
|
6362
6571
|
self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
|
|
6363
6572
|
except Exception:
|
|
6364
6573
|
pass
|
|
6365
|
-
assistant_buf.append(text)
|
|
6574
|
+
assistant_buf.append(text)
|
|
6575
|
+
try:
|
|
6576
|
+
model_output_chars += len(str(text))
|
|
6577
|
+
except Exception:
|
|
6578
|
+
pass
|
|
6579
|
+
# Keep partial text accessible for Ctrl+C cancel.
|
|
6580
|
+
try:
|
|
6581
|
+
self._stream_partial_text = "".join(assistant_buf)
|
|
6582
|
+
except Exception:
|
|
6583
|
+
pass
|
|
6366
6584
|
# Print the token delta raw to avoid any wrapping/markup side-effects
|
|
6367
6585
|
try:
|
|
6368
6586
|
self.ui.print(text, style=self.ui.theme["assistant"], end="")
|
|
@@ -6391,7 +6609,7 @@ class ChatCLI:
|
|
|
6391
6609
|
except Exception:
|
|
6392
6610
|
pass
|
|
6393
6611
|
|
|
6394
|
-
elif event == "tool.call":
|
|
6612
|
+
elif event == "tool.call":
|
|
6395
6613
|
# Ensure any prior indicator state is reset cleanly, then restart
|
|
6396
6614
|
# a fresh indicator while waiting for the tool to run.
|
|
6397
6615
|
try:
|
|
@@ -6411,8 +6629,8 @@ class ChatCLI:
|
|
|
6411
6629
|
pass
|
|
6412
6630
|
at_line_start = True
|
|
6413
6631
|
|
|
6414
|
-
name = data.get("name")
|
|
6415
|
-
args = data.get("args", {}) or {}
|
|
6632
|
+
name = data.get("name")
|
|
6633
|
+
args = data.get("args", {}) or {}
|
|
6416
6634
|
call_id = data.get("call_id")
|
|
6417
6635
|
try:
|
|
6418
6636
|
self.ui.debug_log(f"tool.call name='{name}' call_id={call_id}")
|
|
@@ -6453,11 +6671,21 @@ class ChatCLI:
|
|
|
6453
6671
|
self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
|
|
6454
6672
|
except Exception:
|
|
6455
6673
|
pass
|
|
6456
|
-
# Count tool calls
|
|
6457
|
-
try:
|
|
6458
|
-
tool_calls += 1
|
|
6459
|
-
except Exception:
|
|
6460
|
-
pass
|
|
6674
|
+
# Count tool calls
|
|
6675
|
+
try:
|
|
6676
|
+
tool_calls += 1
|
|
6677
|
+
except Exception:
|
|
6678
|
+
pass
|
|
6679
|
+
# tool.call is model output too (even when no message.delta happened yet).
|
|
6680
|
+
try:
|
|
6681
|
+
# Keep it simple; we only need a non-zero estimate.
|
|
6682
|
+
model_output_chars += len(str(name or ""))
|
|
6683
|
+
model_output_chars += len(json.dumps(args, ensure_ascii=False))
|
|
6684
|
+
except Exception:
|
|
6685
|
+
try:
|
|
6686
|
+
model_output_chars += len(str(args))
|
|
6687
|
+
except Exception:
|
|
6688
|
+
pass
|
|
6461
6689
|
|
|
6462
6690
|
# Track args for troubleshooting and broadcast to WS clients
|
|
6463
6691
|
if call_id:
|
|
@@ -7910,12 +8138,35 @@ class ChatCLI:
|
|
|
7910
8138
|
if DEBUG_SSE:
|
|
7911
8139
|
self.ui.print(f"[debug] unhandled event: {event} payload={truncate_json(data, 800)}", style=self.ui.theme["dim"])
|
|
7912
8140
|
|
|
7913
|
-
# If stream ended without a message.completed, render a fallback info box
|
|
7914
|
-
# Ensure the indicator is stopped on abnormal termination and fully cleared
|
|
7915
|
-
try:
|
|
7916
|
-
await _indicator_stop(clear=True)
|
|
7917
|
-
except Exception:
|
|
7918
|
-
pass
|
|
8141
|
+
# If stream ended without a message.completed, render a fallback info box
|
|
8142
|
+
# Ensure the indicator is stopped on abnormal termination and fully cleared
|
|
8143
|
+
try:
|
|
8144
|
+
await _indicator_stop(clear=True)
|
|
8145
|
+
except Exception:
|
|
8146
|
+
pass
|
|
8147
|
+
# If cancelled via Ctrl+C, return *but* still show an estimated token/usage notice.
|
|
8148
|
+
if self._stream_cancelled:
|
|
8149
|
+
try:
|
|
8150
|
+
# Attempt to compute turn duration if we have a start timestamp.
|
|
8151
|
+
turn_secs = None
|
|
8152
|
+
try:
|
|
8153
|
+
now_pc = time.perf_counter()
|
|
8154
|
+
if self._turn_started_at is not None:
|
|
8155
|
+
turn_secs = float(now_pc - float(self._turn_started_at))
|
|
8156
|
+
except Exception:
|
|
8157
|
+
turn_secs = None
|
|
8158
|
+
self._render_cancelled_usage_notice(
|
|
8159
|
+
model_label=(self._current_turn.get("model") if isinstance(self._current_turn, dict) else None),
|
|
8160
|
+
est_completion_chars=int(model_output_chars or 0),
|
|
8161
|
+
events_total=int(_events_total or 0),
|
|
8162
|
+
deltas_total=int(_deltas_total or 0),
|
|
8163
|
+
bytes_total=int(_bytes_total or 0),
|
|
8164
|
+
tool_calls=int(tool_calls or 0),
|
|
8165
|
+
turn_secs=turn_secs,
|
|
8166
|
+
)
|
|
8167
|
+
except Exception:
|
|
8168
|
+
pass
|
|
8169
|
+
return "".join(assistant_buf)
|
|
7919
8170
|
buf_str2 = "".join(assistant_buf)
|
|
7920
8171
|
self.ui.ensure_newline(buf_str2)
|
|
7921
8172
|
# Use a visible notice (non-dim) so users are aware something ended unexpectedly
|
|
@@ -8124,7 +8375,7 @@ class ChatCLI:
|
|
|
8124
8375
|
result_text = await do_stream(payload)
|
|
8125
8376
|
|
|
8126
8377
|
# Auto-restart after summarization: clear conversation and resend same user input with injections
|
|
8127
|
-
if self._restart_after_summary:
|
|
8378
|
+
if self._restart_after_summary and not self._stream_cancelled:
|
|
8128
8379
|
self._restart_after_summary = False
|
|
8129
8380
|
# Reset conversation to a fresh session (preserve system prompt)
|
|
8130
8381
|
self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
|
|
@@ -8209,14 +8460,14 @@ class ChatCLI:
|
|
|
8209
8460
|
return await do_stream(new_payload)
|
|
8210
8461
|
|
|
8211
8462
|
# If we marked an auto-retry due to provider output size limits, retry once using the same payload
|
|
8212
|
-
if self._auto_retry_after_tailed:
|
|
8213
|
-
self._auto_retry_after_tailed = False
|
|
8463
|
+
if self._auto_retry_after_tailed and not self._stream_cancelled:
|
|
8464
|
+
self._auto_retry_after_tailed = False
|
|
8214
8465
|
self.ui.warn("Retrying turn with tailed file content due to provider output size limit...")
|
|
8215
8466
|
return await do_stream(payload)
|
|
8216
8467
|
|
|
8217
8468
|
# Generic retry: when stream ended without a message.completed, retry the last model turn
|
|
8218
8469
|
max_attempts = 3
|
|
8219
|
-
while (not last_completed) and (attempts_done < max_attempts):
|
|
8470
|
+
while (not last_completed) and (not self._stream_cancelled) and (attempts_done < max_attempts):
|
|
8220
8471
|
attempts_done += 1
|
|
8221
8472
|
# Emit CLI/WS notice
|
|
8222
8473
|
try:
|
|
@@ -8250,7 +8501,7 @@ class ChatCLI:
|
|
|
8250
8501
|
result_text = await do_stream(payload)
|
|
8251
8502
|
|
|
8252
8503
|
# If still not completed after retries, emit a final failure notice
|
|
8253
|
-
if not last_completed:
|
|
8504
|
+
if not last_completed and not self._stream_cancelled:
|
|
8254
8505
|
try:
|
|
8255
8506
|
self.ui.print(f"[retry] failed after {attempts_done} attempt(s)", style=self.ui.theme["warn"]) # type: ignore
|
|
8256
8507
|
await self._ws_broadcast("retry.failed", {"attempts": attempts_done, "max_attempts": max_attempts})
|
|
@@ -9011,7 +9262,7 @@ class ChatCLI:
|
|
|
9011
9262
|
else:
|
|
9012
9263
|
self.text_verbosity = curv or "medium"
|
|
9013
9264
|
|
|
9014
|
-
# --- 7) Tool preambles (GPT-5 only) ---
|
|
9265
|
+
# --- 7) Tool preambles (GPT-5 / GPT-5.2 only; non-Codex) ---
|
|
9015
9266
|
if self._supports_preambles(self.model):
|
|
9016
9267
|
preamble_choice = await self._menu_choice(
|
|
9017
9268
|
"Tool call preambles",
|
|
@@ -9157,7 +9408,7 @@ class ChatCLI:
|
|
|
9157
9408
|
blocks.append(txt.strip())
|
|
9158
9409
|
except Exception:
|
|
9159
9410
|
pass
|
|
9160
|
-
# Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
|
|
9411
|
+
# Tool usage preamble (UX hint) — GPT-5 / GPT-5.2 only (non-Codex)
|
|
9161
9412
|
try:
|
|
9162
9413
|
if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
|
|
9163
9414
|
blocks.append(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: henosis-cli
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.11
|
|
4
4
|
Summary: henosis-cli — interactive CLI for the Henosis multi-provider streaming chat backend, with optional local tools.
|
|
5
5
|
Author-email: henosis <henosis@henosis.us>
|
|
6
6
|
License-Expression: LicenseRef-Proprietary
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
cli.py,sha256=
|
|
1
|
+
cli.py,sha256=rVE8sqFR3bbW4r4NPsqJaYnq_tVMGFokdMyt9ul837M,548601
|
|
2
2
|
henosis_cli_tools/__init__.py,sha256=x3uaN_ub32uALx_oURna0VnuoSsj7i9NYY6uRsc2ZzM,1147
|
|
3
3
|
henosis_cli_tools/cli_entry.py,sha256=OZTe_s9Hfy3mcsYG77T3RTdtCDod-CSwmhskbXjmmqs,1713
|
|
4
4
|
henosis_cli_tools/input_engine.py,sha256=qUCSvTTiqmujELkVbpvMXOpZWxTGDhDTMQccU7yZJto,24126
|
|
5
5
|
henosis_cli_tools/settings_ui.py,sha256=sUlgUIev4BhApgZf80U3GpPUufaMWnguOP8HLgZmjfg,22809
|
|
6
6
|
henosis_cli_tools/tool_impl.py,sha256=iSdkDIAecgphXrS8Nd702SwhZaEJ9zyL4ieeH_mmjJo,46213
|
|
7
|
-
henosis_cli-0.6.
|
|
8
|
-
henosis_cli-0.6.
|
|
9
|
-
henosis_cli-0.6.
|
|
10
|
-
henosis_cli-0.6.
|
|
11
|
-
henosis_cli-0.6.
|
|
7
|
+
henosis_cli-0.6.11.dist-info/METADATA,sha256=2_IkPpWTUEBI_jXrNgzpkj3lRYAWHaToYziQX2clJm8,5749
|
|
8
|
+
henosis_cli-0.6.11.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
9
|
+
henosis_cli-0.6.11.dist-info/entry_points.txt,sha256=KmXDdmIjq1SVMs8FK3wHPA2i89RMaerzZHIetllMLIk,74
|
|
10
|
+
henosis_cli-0.6.11.dist-info/top_level.txt,sha256=u7XMBcJ8Kb0n91WaSU-4Db8yURSUXFuOxGMsXti0a-g,34
|
|
11
|
+
henosis_cli-0.6.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|