henosis-cli 0.6.10__py3-none-any.whl → 0.6.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli.py CHANGED
@@ -27,6 +27,7 @@ import getpass
27
27
  from urllib.parse import urlparse, urlunparse
28
28
  import subprocess
29
29
  import shlex
30
+ import signal
30
31
  import importlib
31
32
  import importlib.util
32
33
  import importlib.metadata
@@ -674,7 +675,7 @@ class UI:
674
675
  for n, ty, sz in rows:
675
676
  print(f"{n:<40} {ty:<8} {sz}")
676
677
 
677
- class ChatCLI:
678
+ class ChatCLI:
678
679
  def __init__(
679
680
  self,
680
681
  server: str,
@@ -1062,9 +1063,15 @@ class ChatCLI:
1062
1063
  # Track Ctrl+C timing for double-press-to-exit behavior
1063
1064
  self._last_interrupt_ts: Optional[float] = None
1064
1065
 
1066
+ # Ctrl+C cancel flag and partial-text accumulator for graceful stream cancel.
1067
+ # When _stream_cancelled is set, the SSE loop breaks early and the partial
1068
+ # assistant text is kept in context (no rollback, no edit mode).
1069
+ self._stream_cancelled: bool = False
1070
+ self._stream_partial_text: str = ""
1071
+
1065
1072
  # Ctrl+C during a running stream should not kill the entire CLI.
1066
1073
  # Instead, we cancel the in-flight turn and reopen the last user query for editing.
1067
- # NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
1074
+ # Provider-native tool-chain context is preserved on cancel via _preserve_provider_state_on_cancel().
1068
1075
  self._pending_user_edit: Optional[str] = None
1069
1076
  self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
1070
1077
 
@@ -1320,9 +1327,84 @@ class ChatCLI:
1320
1327
  except Exception:
1321
1328
  return 0
1322
1329
 
1323
- def _clip(self, s: Any, max_len: int = 300) -> str:
1324
- s = str(s)
1325
- return s if len(s) <= max_len else (s[: max_len//2] + " ... (truncated) ... " + s[- max_len//2 :])
1330
+ def _clip(self, s: Any, max_len: int = 300) -> str:
1331
+ s = str(s)
1332
+ return s if len(s) <= max_len else (s[: max_len//2] + " ... (truncated) ... " + s[- max_len//2 :])
1333
+
1334
+ # ----------------------- Cancel/usage helpers -----------------------
1335
+ def _rough_tokens_from_chars(self, chars: int) -> int:
1336
+ """Very rough token estimator used only when a stream is cancelled.
1337
+
1338
+ We use ~4 chars/token as a fallback so we can display *some* token usage
1339
+ even when the server never emits message.completed (usage is then unavailable).
1340
+ """
1341
+ try:
1342
+ n = int(chars or 0)
1343
+ except Exception:
1344
+ n = 0
1345
+ if n <= 0:
1346
+ return 0
1347
+ # ceil(n/4)
1348
+ return (n + 3) // 4
1349
+
1350
+ def _render_cancelled_usage_notice(
1351
+ self,
1352
+ *,
1353
+ model_label: Optional[str],
1354
+ est_completion_chars: int,
1355
+ events_total: int = 0,
1356
+ deltas_total: int = 0,
1357
+ bytes_total: int = 0,
1358
+ tool_calls: int = 0,
1359
+ turn_secs: Optional[float] = None,
1360
+ ) -> None:
1361
+ """Print a small usage notice when Ctrl+C cancels mid-stream.
1362
+
1363
+ The key requirement: do not show "0 tokens"/nothing when we know
1364
+ the model already emitted a tool.call or other output.
1365
+ """
1366
+ try:
1367
+ tok = self._rough_tokens_from_chars(int(est_completion_chars or 0))
1368
+ except Exception:
1369
+ tok = 0
1370
+ label = model_label or self._current_turn.get("model") or self._last_used_model or self.model or "(unknown)"
1371
+ # Always show something human-readable even if estimate is 0.
1372
+ # (0 can happen if cancel occurred before we received any delta/tool.call lines.)
1373
+ lines: List[str] = []
1374
+ if tok > 0:
1375
+ lines.append(f"Cancelled — estimated tokens used so far (completion/tool output): ~{tok}")
1376
+ else:
1377
+ lines.append("Cancelled — usage unavailable (no message.completed); no output received to estimate tokens")
1378
+ try:
1379
+ lines.append(f"Model: {label}")
1380
+ except Exception:
1381
+ pass
1382
+ try:
1383
+ parts = [f"events={int(events_total)}", f"deltas={int(deltas_total)}", f"bytes={int(bytes_total)}", f"tools={int(tool_calls)}"]
1384
+ lines.append("Stream: " + " | ".join(parts))
1385
+ except Exception:
1386
+ pass
1387
+ try:
1388
+ if isinstance(turn_secs, (int, float)) and turn_secs is not None:
1389
+ lines.append(f"Time (turn): {float(turn_secs):.2f}s")
1390
+ except Exception:
1391
+ pass
1392
+
1393
+ try:
1394
+ # Match existing UX patterns: info box in verbose mode; single line in concise mode.
1395
+ if str(getattr(self, "usage_info_mode", "verbose")).lower() == "concise":
1396
+ # One-liner only
1397
+ self.ui.print(lines[0], style=self.ui.theme.get("warn"), force=True) # type: ignore
1398
+ else:
1399
+ self.ui.info_box("Usage (cancelled)", lines)
1400
+ except Exception:
1401
+ # Last-resort plain prints
1402
+ try:
1403
+ self.ui.warn(lines[0])
1404
+ for ln in lines[1:]:
1405
+ self.ui.print(ln)
1406
+ except Exception:
1407
+ pass
1326
1408
 
1327
1409
  # ----------------------- Pricing + costs -----------------------
1328
1410
 
@@ -1435,7 +1517,7 @@ class ChatCLI:
1435
1517
  # Codex family: disable preambles for better behavior
1436
1518
  if "codex" in model_name:
1437
1519
  self.preambles_enabled = False
1438
- # Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
1520
+ # Tool-call preambles are supported for GPT-5 / GPT-5.2 non-Codex models.
1439
1521
  # Force-disable for all other models (even if a saved setting had it enabled).
1440
1522
  if not self._supports_preambles(self.model):
1441
1523
  self.preambles_enabled = False
@@ -1448,23 +1530,25 @@ class ChatCLI:
1448
1530
  def _supports_preambles(self, model: Optional[str]) -> bool:
1449
1531
  """Tool-call preambles are a CLI-only UX hint.
1450
1532
 
1451
- Requirement: disabled for all models except GPT-5 (base model; non-Codex).
1452
- In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
1533
+ Supported (non-Codex only):
1534
+ - GPT-5 base line: "gpt-5" and date-pinned variants like "gpt-5-2025-08-07"
1535
+ - GPT-5.2 family: "gpt-5.2*" (e.g., gpt-5.2, gpt-5.2-pro)
1536
+
1537
+ Not supported:
1538
+ - Any Codex variants ("*codex*")
1539
+ - GPT-5.1* (kept off by default until validated)
1453
1540
  """
1454
1541
  try:
1455
1542
  if not model:
1456
1543
  return False
1457
1544
  m = str(model).strip().lower()
1458
- # Only the base GPT-5 line supports this UX toggle.
1459
- # Allow:
1460
- # - "gpt-5"
1461
- # - date-pinned variants like "gpt-5-2025-08-07"
1462
- # Disallow:
1463
- # - versioned families like "gpt-5.1*" / "gpt-5.2*"
1464
- if not (m == "gpt-5" or m.startswith("gpt-5-")):
1545
+ # Allow GPT-5 base line and GPT-5.2 family.
1546
+ if not (m == "gpt-5" or m.startswith("gpt-5-") or m.startswith("gpt-5.2")):
1465
1547
  return False
1466
1548
  if "codex" in m:
1467
1549
  return False
1550
+ if m.startswith("gpt-5.1"):
1551
+ return False
1468
1552
  return True
1469
1553
  except Exception:
1470
1554
  return False
@@ -2835,7 +2919,7 @@ class ChatCLI:
2835
2919
  {"label": "Preambles & First-turn", "type": "group", "items": [
2836
2920
  {
2837
2921
  "id": "preambles_enabled",
2838
- "label": "Enable tool call preambles (GPT-5 only)",
2922
+ "label": "Enable tool call preambles (GPT-5 / GPT-5.2; non-Codex)",
2839
2923
  "type": "bool",
2840
2924
  # Only show this control when the *currently selected* model supports it.
2841
2925
  # (This updates live as the Model picker changes.)
@@ -3306,7 +3390,7 @@ class ChatCLI:
3306
3390
  except Exception:
3307
3391
  pass
3308
3392
 
3309
- # 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
3393
+ # 3) Tool usage preamble (UX hint) — GPT-5 / GPT-5.2 only (non-Codex)
3310
3394
  try:
3311
3395
  if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
3312
3396
  blocks.append(
@@ -5278,51 +5362,40 @@ class ChatCLI:
5278
5362
  continue
5279
5363
  self._busy = True
5280
5364
  try:
5281
- assistant_text = await self._stream_once(user_input)
5365
+ assistant_text = await self._run_stream_with_cancel(user_input)
5282
5366
  finally:
5283
5367
  self._busy = False
5284
- except KeyboardInterrupt:
5285
- # Ctrl+C mid-stream / mid-tool: do not exit the CLI.
5286
- # Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
5368
+
5369
+ # If the stream was cancelled via Ctrl+C, keep partial text
5370
+ # in context and return to the normal prompt.
5371
+ if self._stream_cancelled:
5372
+ partial = self._stream_partial_text or ""
5373
+ content_sent = self._last_built_user_content or user_input
5374
+ # Always keep the user message in context so it is
5375
+ # not silently lost when cancel arrives before any
5376
+ # assistant tokens were streamed.
5377
+ self.history.append({"role": "user", "content": content_sent})
5378
+ if partial.strip():
5379
+ self.history.append({"role": "assistant", "content": partial})
5380
+ # Roll forward provider-native replay state so the
5381
+ # next turn can pick up seamlessly.
5382
+ try:
5383
+ self._preserve_provider_state_on_cancel(
5384
+ content_sent, partial,
5385
+ )
5386
+ except Exception:
5387
+ pass
5388
+ self.ui.warn("Interrupted. Partial response kept in context.")
5389
+ continue
5390
+ except (KeyboardInterrupt, asyncio.CancelledError):
5391
+ # Safety net for Ctrl+C outside of the streaming path
5392
+ # (e.g. during snapshot creation). _run_stream_with_cancel
5393
+ # handles the common streaming case internally.
5287
5394
  try:
5288
5395
  await self._cancel_inflight_dispatch()
5289
5396
  except (Exception, BaseException):
5290
5397
  pass
5291
5398
 
5292
- # Restore state to *before* this turn started.
5293
- try:
5294
- snap = self._pending_turn_snapshot or {}
5295
- if isinstance(snap.get("history"), list):
5296
- self.history = snap.get("history")
5297
- if isinstance(snap.get("messages_for_save"), list):
5298
- self.messages_for_save = snap.get("messages_for_save")
5299
- if isinstance(snap.get("kimi_raw"), list):
5300
- self._kimi_raw_history = snap.get("kimi_raw")
5301
- if isinstance(snap.get("gemini_raw"), list):
5302
- self._gemini_raw_history = snap.get("gemini_raw")
5303
- if "openai_prev" in snap:
5304
- self._openai_previous_response_id = snap.get("openai_prev")
5305
- if isinstance(snap.get("openai_ids"), list):
5306
- self._openai_response_id_history = snap.get("openai_ids")
5307
- if isinstance(snap.get("openai_input_items"), list):
5308
- self._openai_input_items = snap.get("openai_input_items")
5309
- if "openai_last_sent_input_items" in snap:
5310
- self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
5311
- if "inflight_dispatch" in snap:
5312
- self._inflight_dispatch = snap.get("inflight_dispatch")
5313
- if "did_inject_codebase_map" in snap:
5314
- self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
5315
- if "did_inject_custom_first_turn" in snap:
5316
- self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
5317
- if "did_inject_working_memory" in snap:
5318
- self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
5319
- if "memory_paths_for_first_turn" in snap:
5320
- self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
5321
- self._last_built_user_content = snap.get("last_built_user_content")
5322
- except Exception:
5323
- pass
5324
-
5325
- # Clear any transient indicator line and land on a fresh prompt line.
5326
5399
  try:
5327
5400
  sys.stdout.write("\r\x1b[2K\n")
5328
5401
  sys.stdout.flush()
@@ -5332,15 +5405,7 @@ class ChatCLI:
5332
5405
  except Exception:
5333
5406
  pass
5334
5407
 
5335
- try:
5336
- supports = self._provider_supports_native_retention(self.model)
5337
- except Exception:
5338
- supports = False
5339
- if supports:
5340
- self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
5341
- else:
5342
- self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
5343
- self._pending_user_edit = user_input
5408
+ self.ui.warn("Interrupted.")
5344
5409
  continue
5345
5410
  except httpx.HTTPStatusError as he:
5346
5411
  try:
@@ -5676,7 +5741,145 @@ class ChatCLI:
5676
5741
  return True
5677
5742
 
5678
5743
  # ----------------------- SSE Streaming loop ------------------------
5679
- async def _stream_once(self, user_input: str) -> str:
5744
+ def _preserve_provider_state_on_cancel(
5745
+ self, user_content: str, partial_assistant: str,
5746
+ ) -> None:
5747
+ """Roll forward provider-native replay state on Ctrl+C cancel.
5748
+
5749
+ During a normal turn, ``message.completed`` delivers delta items
5750
+ (OpenAI), an authoritative ``raw_provider_messages`` snapshot
5751
+ (Gemini), and similar per-provider payloads. When the stream is
5752
+ cancelled those events never arrive, so this method patches the
5753
+ minimum state needed for the next turn to continue cleanly.
5754
+
5755
+ * **Gemini / Kimi** -- ``provider.message`` and ``tool.result``
5756
+ SSE events already update ``_gemini_raw_history`` /
5757
+ ``_kimi_raw_history`` during streaming, so those histories are
5758
+ already consistent. No extra work needed.
5759
+ * **OpenAI** -- The manual input-item chain
5760
+ (``_openai_input_items``) is only updated from
5761
+ ``openai_delta_items`` inside ``message.completed``. Here we
5762
+ roll the chain forward to include the user message we sent and
5763
+ any partial assistant text.
5764
+ """
5765
+ model = (
5766
+ self._current_turn.get("model")
5767
+ or self._last_used_model
5768
+ or self.model
5769
+ or ""
5770
+ )
5771
+
5772
+ # --- OpenAI: roll _openai_input_items forward -----------------
5773
+ if self._is_openai_model(model):
5774
+ try:
5775
+ sent = self._openai_last_sent_input_items
5776
+ if isinstance(sent, list) and sent:
5777
+ items = copy.deepcopy(sent)
5778
+ # Include partial assistant text so the model knows
5779
+ # what it already said before the user interrupted.
5780
+ if partial_assistant.strip():
5781
+ items.append({
5782
+ "role": "assistant",
5783
+ "content": partial_assistant,
5784
+ })
5785
+ self._openai_input_items = (
5786
+ self._sanitize_openai_items(items)
5787
+ )
5788
+ except Exception:
5789
+ pass
5790
+ finally:
5791
+ self._openai_last_sent_input_items = None
5792
+
5793
+ # --- Gemini / Kimi: already up-to-date (see docstring) --------
5794
+
5795
+ async def _run_stream_with_cancel(self, user_input: str) -> str:
5796
+ """Wrap _stream_once so Ctrl+C cancels the stream gracefully.
5797
+
5798
+ On cancel the SSE loop sees _stream_cancelled and breaks early.
5799
+ Whatever partial assistant text was accumulated is returned so the
5800
+ caller can keep it in conversation context.
5801
+
5802
+ Key design decision: on cancel we KEEP all streamed content in
5803
+ context, append the user message + partial assistant reply to
5804
+ history, and return to the normal ``You>`` prompt -- no edit mode
5805
+ and no snapshot rollback.
5806
+ """
5807
+ self._stream_cancelled = False
5808
+ self._stream_partial_text = ""
5809
+
5810
+ # Install a custom SIGINT handler for the duration of streaming.
5811
+ # First Ctrl+C sets _stream_cancelled so the SSE loop breaks
5812
+ # naturally at the next event; second Ctrl+C force-raises.
5813
+ _original_handler = signal.getsignal(signal.SIGINT)
5814
+ _sigint_count = 0
5815
+
5816
+ def _stream_sigint_handler(signum, frame):
5817
+ nonlocal _sigint_count
5818
+ _sigint_count += 1
5819
+ self._stream_cancelled = True
5820
+ if _sigint_count >= 2:
5821
+ # Restore original handler and hard-interrupt on second press
5822
+ try:
5823
+ signal.signal(signal.SIGINT, _original_handler or signal.default_int_handler)
5824
+ except Exception:
5825
+ pass
5826
+ raise KeyboardInterrupt()
5827
+
5828
+ try:
5829
+ signal.signal(signal.SIGINT, _stream_sigint_handler)
5830
+ except (OSError, ValueError):
5831
+ # signal.signal() can only be called from the main thread
5832
+ pass
5833
+
5834
+ try:
5835
+ return await self._stream_once(user_input)
5836
+ except (KeyboardInterrupt, asyncio.CancelledError):
5837
+ self._stream_cancelled = True
5838
+ # Best-effort: cancel any in-flight client-dispatched tool
5839
+ try:
5840
+ await self._cancel_inflight_dispatch("cancelled by user")
5841
+ except Exception:
5842
+ pass
5843
+ # Clear any transient indicator / status line
5844
+ try:
5845
+ sys.stdout.write("\r\x1b[2K\n")
5846
+ sys.stdout.flush()
5847
+ except Exception:
5848
+ pass
5849
+ # Best-effort: show a usage notice even on hard-interrupt (no message.completed).
5850
+ # We can only estimate from what we already buffered locally.
5851
+ try:
5852
+ # Estimate completion/output chars from partial assistant text + tool event payload sizes.
5853
+ tool_chars = 0
5854
+ try:
5855
+ evs = (self._current_turn.get("tool_events") or []) if isinstance(self._current_turn, dict) else []
5856
+ if isinstance(evs, list):
5857
+ # Keep it cheap: only sum a bounded amount.
5858
+ for ev in evs[-50:]:
5859
+ try:
5860
+ tool_chars += len(json.dumps(ev, ensure_ascii=False))
5861
+ except Exception:
5862
+ tool_chars += len(str(ev))
5863
+ except Exception:
5864
+ tool_chars = 0
5865
+ est_chars = len(self._stream_partial_text or "") + tool_chars
5866
+ self._render_cancelled_usage_notice(
5867
+ model_label=(self._current_turn.get("model") if isinstance(self._current_turn, dict) else None),
5868
+ est_completion_chars=est_chars,
5869
+ tool_calls=len((self._current_turn.get("tool_events") or []) if isinstance(self._current_turn, dict) else []),
5870
+ )
5871
+ except Exception:
5872
+ pass
5873
+ return self._stream_partial_text
5874
+ finally:
5875
+ # Restore the original SIGINT handler so normal Ctrl+C
5876
+ # behavior resumes at the input prompt.
5877
+ try:
5878
+ signal.signal(signal.SIGINT, _original_handler or signal.default_int_handler)
5879
+ except (OSError, ValueError):
5880
+ pass
5881
+
5882
+ async def _stream_once(self, user_input: str) -> str:
5680
5883
  # Build request payload.
5681
5884
  # OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
5682
5885
  # `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
@@ -5844,7 +6047,7 @@ class ChatCLI:
5844
6047
  payload["text_verbosity"] = self.text_verbosity
5845
6048
  except Exception:
5846
6049
  pass
5847
- # Preambles are a GPT-5-only UX toggle.
6050
+ # Preambles are a GPT-5 / GPT-5.2-only UX toggle.
5848
6051
  try:
5849
6052
  if self._supports_preambles(self.model):
5850
6053
  payload["preambles_enabled"] = bool(self.preambles_enabled)
@@ -5917,15 +6120,18 @@ class ChatCLI:
5917
6120
  except Exception:
5918
6121
  pass
5919
6122
 
5920
- async def do_stream(req_payload: Dict[str, Any]) -> str:
6123
+ async def do_stream(req_payload: Dict[str, Any]) -> str:
5921
6124
  nonlocal session_id
5922
6125
  nonlocal header_printed
5923
6126
  # Retry tracking flags (updated per attempt)
5924
6127
  nonlocal last_completed, last_error, last_bytes_total
5925
6128
  # While streaming assistant text, suppress the thinking indicator to avoid clobbering output
5926
6129
  streaming_assistant = False
5927
- # Initialize per-turn timer and tool call counter
5928
- tool_calls = 0
6130
+ # Initialize per-turn timer and tool call counter
6131
+ tool_calls = 0
6132
+ # Estimate of model output chars so far (assistant deltas + tool.call payloads).
6133
+ # Used only when Ctrl+C cancels the stream before message.completed.
6134
+ model_output_chars = 0
5929
6135
  # Capture last N SSE events for diagnostics if stream ends without message.completed
5930
6136
  from collections import deque
5931
6137
  last_events = deque(maxlen=SSE_TAIL_MAX) # keep short, printable summaries
@@ -6270,6 +6476,9 @@ class ChatCLI:
6270
6476
  pass
6271
6477
 
6272
6478
  async for event, data_raw in parse_sse_lines(resp, debug=_sse_debug):
6479
+ # Graceful cancel: break early when Ctrl+C sets the flag.
6480
+ if self._stream_cancelled:
6481
+ break
6273
6482
  try:
6274
6483
  if isinstance(data_raw, str):
6275
6484
  _bytes_total += len(data_raw)
@@ -6312,7 +6521,7 @@ class ChatCLI:
6312
6521
  pass
6313
6522
  continue
6314
6523
 
6315
- elif event == "message.delta":
6524
+ elif event == "message.delta":
6316
6525
  # Stop any transient indicator before printing content and clear the line
6317
6526
  try:
6318
6527
  await _indicator_stop(clear=True)
@@ -6320,8 +6529,8 @@ class ChatCLI:
6320
6529
  pass
6321
6530
  # Indicator line cleared; we're now at the start of a fresh line.
6322
6531
  at_line_start = True
6323
- text = data.get("text", "")
6324
- if text:
6532
+ text = data.get("text", "")
6533
+ if text:
6325
6534
  try:
6326
6535
  _deltas_total += 1
6327
6536
  except Exception:
@@ -6362,7 +6571,16 @@ class ChatCLI:
6362
6571
  self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
6363
6572
  except Exception:
6364
6573
  pass
6365
- assistant_buf.append(text)
6574
+ assistant_buf.append(text)
6575
+ try:
6576
+ model_output_chars += len(str(text))
6577
+ except Exception:
6578
+ pass
6579
+ # Keep partial text accessible for Ctrl+C cancel.
6580
+ try:
6581
+ self._stream_partial_text = "".join(assistant_buf)
6582
+ except Exception:
6583
+ pass
6366
6584
  # Print the token delta raw to avoid any wrapping/markup side-effects
6367
6585
  try:
6368
6586
  self.ui.print(text, style=self.ui.theme["assistant"], end="")
@@ -6391,7 +6609,7 @@ class ChatCLI:
6391
6609
  except Exception:
6392
6610
  pass
6393
6611
 
6394
- elif event == "tool.call":
6612
+ elif event == "tool.call":
6395
6613
  # Ensure any prior indicator state is reset cleanly, then restart
6396
6614
  # a fresh indicator while waiting for the tool to run.
6397
6615
  try:
@@ -6411,8 +6629,8 @@ class ChatCLI:
6411
6629
  pass
6412
6630
  at_line_start = True
6413
6631
 
6414
- name = data.get("name")
6415
- args = data.get("args", {}) or {}
6632
+ name = data.get("name")
6633
+ args = data.get("args", {}) or {}
6416
6634
  call_id = data.get("call_id")
6417
6635
  try:
6418
6636
  self.ui.debug_log(f"tool.call name='{name}' call_id={call_id}")
@@ -6453,11 +6671,21 @@ class ChatCLI:
6453
6671
  self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
6454
6672
  except Exception:
6455
6673
  pass
6456
- # Count tool calls
6457
- try:
6458
- tool_calls += 1
6459
- except Exception:
6460
- pass
6674
+ # Count tool calls
6675
+ try:
6676
+ tool_calls += 1
6677
+ except Exception:
6678
+ pass
6679
+ # tool.call is model output too (even when no message.delta happened yet).
6680
+ try:
6681
+ # Keep it simple; we only need a non-zero estimate.
6682
+ model_output_chars += len(str(name or ""))
6683
+ model_output_chars += len(json.dumps(args, ensure_ascii=False))
6684
+ except Exception:
6685
+ try:
6686
+ model_output_chars += len(str(args))
6687
+ except Exception:
6688
+ pass
6461
6689
 
6462
6690
  # Track args for troubleshooting and broadcast to WS clients
6463
6691
  if call_id:
@@ -7910,12 +8138,35 @@ class ChatCLI:
7910
8138
  if DEBUG_SSE:
7911
8139
  self.ui.print(f"[debug] unhandled event: {event} payload={truncate_json(data, 800)}", style=self.ui.theme["dim"])
7912
8140
 
7913
- # If stream ended without a message.completed, render a fallback info box
7914
- # Ensure the indicator is stopped on abnormal termination and fully cleared
7915
- try:
7916
- await _indicator_stop(clear=True)
7917
- except Exception:
7918
- pass
8141
+ # If stream ended without a message.completed, render a fallback info box
8142
+ # Ensure the indicator is stopped on abnormal termination and fully cleared
8143
+ try:
8144
+ await _indicator_stop(clear=True)
8145
+ except Exception:
8146
+ pass
8147
+ # If cancelled via Ctrl+C, return *but* still show an estimated token/usage notice.
8148
+ if self._stream_cancelled:
8149
+ try:
8150
+ # Attempt to compute turn duration if we have a start timestamp.
8151
+ turn_secs = None
8152
+ try:
8153
+ now_pc = time.perf_counter()
8154
+ if self._turn_started_at is not None:
8155
+ turn_secs = float(now_pc - float(self._turn_started_at))
8156
+ except Exception:
8157
+ turn_secs = None
8158
+ self._render_cancelled_usage_notice(
8159
+ model_label=(self._current_turn.get("model") if isinstance(self._current_turn, dict) else None),
8160
+ est_completion_chars=int(model_output_chars or 0),
8161
+ events_total=int(_events_total or 0),
8162
+ deltas_total=int(_deltas_total or 0),
8163
+ bytes_total=int(_bytes_total or 0),
8164
+ tool_calls=int(tool_calls or 0),
8165
+ turn_secs=turn_secs,
8166
+ )
8167
+ except Exception:
8168
+ pass
8169
+ return "".join(assistant_buf)
7919
8170
  buf_str2 = "".join(assistant_buf)
7920
8171
  self.ui.ensure_newline(buf_str2)
7921
8172
  # Use a visible notice (non-dim) so users are aware something ended unexpectedly
@@ -8124,7 +8375,7 @@ class ChatCLI:
8124
8375
  result_text = await do_stream(payload)
8125
8376
 
8126
8377
  # Auto-restart after summarization: clear conversation and resend same user input with injections
8127
- if self._restart_after_summary:
8378
+ if self._restart_after_summary and not self._stream_cancelled:
8128
8379
  self._restart_after_summary = False
8129
8380
  # Reset conversation to a fresh session (preserve system prompt)
8130
8381
  self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
@@ -8209,14 +8460,14 @@ class ChatCLI:
8209
8460
  return await do_stream(new_payload)
8210
8461
 
8211
8462
  # If we marked an auto-retry due to provider output size limits, retry once using the same payload
8212
- if self._auto_retry_after_tailed:
8213
- self._auto_retry_after_tailed = False
8463
+ if self._auto_retry_after_tailed and not self._stream_cancelled:
8464
+ self._auto_retry_after_tailed = False
8214
8465
  self.ui.warn("Retrying turn with tailed file content due to provider output size limit...")
8215
8466
  return await do_stream(payload)
8216
8467
 
8217
8468
  # Generic retry: when stream ended without a message.completed, retry the last model turn
8218
8469
  max_attempts = 3
8219
- while (not last_completed) and (attempts_done < max_attempts):
8470
+ while (not last_completed) and (not self._stream_cancelled) and (attempts_done < max_attempts):
8220
8471
  attempts_done += 1
8221
8472
  # Emit CLI/WS notice
8222
8473
  try:
@@ -8250,7 +8501,7 @@ class ChatCLI:
8250
8501
  result_text = await do_stream(payload)
8251
8502
 
8252
8503
  # If still not completed after retries, emit a final failure notice
8253
- if not last_completed:
8504
+ if not last_completed and not self._stream_cancelled:
8254
8505
  try:
8255
8506
  self.ui.print(f"[retry] failed after {attempts_done} attempt(s)", style=self.ui.theme["warn"]) # type: ignore
8256
8507
  await self._ws_broadcast("retry.failed", {"attempts": attempts_done, "max_attempts": max_attempts})
@@ -9011,7 +9262,7 @@ class ChatCLI:
9011
9262
  else:
9012
9263
  self.text_verbosity = curv or "medium"
9013
9264
 
9014
- # --- 7) Tool preambles (GPT-5 only) ---
9265
+ # --- 7) Tool preambles (GPT-5 / GPT-5.2 only; non-Codex) ---
9015
9266
  if self._supports_preambles(self.model):
9016
9267
  preamble_choice = await self._menu_choice(
9017
9268
  "Tool call preambles",
@@ -9157,7 +9408,7 @@ class ChatCLI:
9157
9408
  blocks.append(txt.strip())
9158
9409
  except Exception:
9159
9410
  pass
9160
- # Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
9411
+ # Tool usage preamble (UX hint) — GPT-5 / GPT-5.2 only (non-Codex)
9161
9412
  try:
9162
9413
  if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
9163
9414
  blocks.append(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: henosis-cli
3
- Version: 0.6.10
3
+ Version: 0.6.11
4
4
  Summary: henosis-cli — interactive CLI for the Henosis multi-provider streaming chat backend, with optional local tools.
5
5
  Author-email: henosis <henosis@henosis.us>
6
6
  License-Expression: LicenseRef-Proprietary
@@ -1,11 +1,11 @@
1
- cli.py,sha256=syMx_cXQLS_wEsL7OnLc84gWEoAM3NNW2M_ypQSqeTQ,537230
1
+ cli.py,sha256=rVE8sqFR3bbW4r4NPsqJaYnq_tVMGFokdMyt9ul837M,548601
2
2
  henosis_cli_tools/__init__.py,sha256=x3uaN_ub32uALx_oURna0VnuoSsj7i9NYY6uRsc2ZzM,1147
3
3
  henosis_cli_tools/cli_entry.py,sha256=OZTe_s9Hfy3mcsYG77T3RTdtCDod-CSwmhskbXjmmqs,1713
4
4
  henosis_cli_tools/input_engine.py,sha256=qUCSvTTiqmujELkVbpvMXOpZWxTGDhDTMQccU7yZJto,24126
5
5
  henosis_cli_tools/settings_ui.py,sha256=sUlgUIev4BhApgZf80U3GpPUufaMWnguOP8HLgZmjfg,22809
6
6
  henosis_cli_tools/tool_impl.py,sha256=iSdkDIAecgphXrS8Nd702SwhZaEJ9zyL4ieeH_mmjJo,46213
7
- henosis_cli-0.6.10.dist-info/METADATA,sha256=2rol5BYKPEJBWBrdM4Vj-ykAiLUx1UaL_nlUUcBpKvQ,5749
8
- henosis_cli-0.6.10.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
9
- henosis_cli-0.6.10.dist-info/entry_points.txt,sha256=KmXDdmIjq1SVMs8FK3wHPA2i89RMaerzZHIetllMLIk,74
10
- henosis_cli-0.6.10.dist-info/top_level.txt,sha256=u7XMBcJ8Kb0n91WaSU-4Db8yURSUXFuOxGMsXti0a-g,34
11
- henosis_cli-0.6.10.dist-info/RECORD,,
7
+ henosis_cli-0.6.11.dist-info/METADATA,sha256=2_IkPpWTUEBI_jXrNgzpkj3lRYAWHaToYziQX2clJm8,5749
8
+ henosis_cli-0.6.11.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
9
+ henosis_cli-0.6.11.dist-info/entry_points.txt,sha256=KmXDdmIjq1SVMs8FK3wHPA2i89RMaerzZHIetllMLIk,74
10
+ henosis_cli-0.6.11.dist-info/top_level.txt,sha256=u7XMBcJ8Kb0n91WaSU-4Db8yURSUXFuOxGMsXti0a-g,34
11
+ henosis_cli-0.6.11.dist-info/RECORD,,