PyPI - henosis-cli - Versions diffs - 0.6.10__py3-none-any.whl → 0.6.11__py3-none-any.whl - Mend

henosis-cli 0.6.10py3-none-any.whl → 0.6.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

cli.py CHANGED Viewed

@@ -27,6 +27,7 @@ import getpass
 from urllib.parse import urlparse, urlunparse
 import subprocess
 import shlex
+import signal
 import importlib
 import importlib.util
 import importlib.metadata
@@ -674,7 +675,7 @@ class UI:
             for n, ty, sz in rows:
                 print(f"{n:<40} {ty:<8} {sz}")
-class ChatCLI:
+class ChatCLI:
     def __init__(
         self,
         server: str,
@@ -1062,9 +1063,15 @@ class ChatCLI:
         # Track Ctrl+C timing for double-press-to-exit behavior
         self._last_interrupt_ts: Optional[float] = None
+        # Ctrl+C cancel flag and partial-text accumulator for graceful stream cancel.
+        # When _stream_cancelled is set, the SSE loop breaks early and the partial
+        # assistant text is kept in context (no rollback, no edit mode).
+        self._stream_cancelled: bool = False
+        self._stream_partial_text: str = ""
         # Ctrl+C during a running stream should not kill the entire CLI.
         # Instead, we cancel the in-flight turn and reopen the last user query for editing.
-        # NOTE: We intentionally do NOT preserve provider tool-chain context yet (see issuelist.md #1).
+        # Provider-native tool-chain context is preserved on cancel via _preserve_provider_state_on_cancel().
         self._pending_user_edit: Optional[str] = None
         self._pending_turn_snapshot: Optional[Dict[str, Any]] = None
@@ -1320,9 +1327,84 @@ class ChatCLI:
         except Exception:
             return 0
-    def _clip(self, s: Any, max_len: int = 300) -> str:
-        s = str(s)
-        return s if len(s) <= max_len else (s[: max_len//2] + " ... (truncated) ... " + s[- max_len//2 :])
+    def _clip(self, s: Any, max_len: int = 300) -> str:
+        s = str(s)
+        return s if len(s) <= max_len else (s[: max_len//2] + " ... (truncated) ... " + s[- max_len//2 :])
+    # ----------------------- Cancel/usage helpers -----------------------
+    def _rough_tokens_from_chars(self, chars: int) -> int:
+        """Very rough token estimator used only when a stream is cancelled.
+        We use ~4 chars/token as a fallback so we can display *some* token usage
+        even when the server never emits message.completed (usage is then unavailable).
+        """
+        try:
+            n = int(chars or 0)
+        except Exception:
+            n = 0
+        if n <= 0:
+            return 0
+        # ceil(n/4)
+        return (n + 3) // 4
+    def _render_cancelled_usage_notice(
+        self,
+        *,
+        model_label: Optional[str],
+        est_completion_chars: int,
+        events_total: int = 0,
+        deltas_total: int = 0,
+        bytes_total: int = 0,
+        tool_calls: int = 0,
+        turn_secs: Optional[float] = None,
+    ) -> None:
+        """Print a small usage notice when Ctrl+C cancels mid-stream.
+        The key requirement: do not show "0 tokens"/nothing when we know
+        the model already emitted a tool.call or other output.
+        """
+        try:
+            tok = self._rough_tokens_from_chars(int(est_completion_chars or 0))
+        except Exception:
+            tok = 0
+        label = model_label or self._current_turn.get("model") or self._last_used_model or self.model or "(unknown)"
+        # Always show something human-readable even if estimate is 0.
+        # (0 can happen if cancel occurred before we received any delta/tool.call lines.)
+        lines: List[str] = []
+        if tok > 0:
+            lines.append(f"Cancelled — estimated tokens used so far (completion/tool output): ~{tok}")
+        else:
+            lines.append("Cancelled — usage unavailable (no message.completed); no output received to estimate tokens")
+        try:
+            lines.append(f"Model: {label}")
+        except Exception:
+            pass
+        try:
+            parts = [f"events={int(events_total)}", f"deltas={int(deltas_total)}", f"bytes={int(bytes_total)}", f"tools={int(tool_calls)}"]
+            lines.append("Stream: " + " | ".join(parts))
+        except Exception:
+            pass
+        try:
+            if isinstance(turn_secs, (int, float)) and turn_secs is not None:
+                lines.append(f"Time (turn): {float(turn_secs):.2f}s")
+        except Exception:
+            pass
+        try:
+            # Match existing UX patterns: info box in verbose mode; single line in concise mode.
+            if str(getattr(self, "usage_info_mode", "verbose")).lower() == "concise":
+                # One-liner only
+                self.ui.print(lines[0], style=self.ui.theme.get("warn"), force=True)  # type: ignore
+            else:
+                self.ui.info_box("Usage (cancelled)", lines)
+        except Exception:
+            # Last-resort plain prints
+            try:
+                self.ui.warn(lines[0])
+                for ln in lines[1:]:
+                    self.ui.print(ln)
+            except Exception:
+                pass
     # ----------------------- Pricing + costs -----------------------
@@ -1435,7 +1517,7 @@ class ChatCLI:
             # Codex family: disable preambles for better behavior
             if "codex" in model_name:
                 self.preambles_enabled = False
-            # Tool-call preambles are ONLY supported for GPT-5 non-Codex models.
+            # Tool-call preambles are supported for GPT-5 / GPT-5.2 non-Codex models.
             # Force-disable for all other models (even if a saved setting had it enabled).
             if not self._supports_preambles(self.model):
                 self.preambles_enabled = False
@@ -1448,23 +1530,25 @@ class ChatCLI:
     def _supports_preambles(self, model: Optional[str]) -> bool:
         """Tool-call preambles are a CLI-only UX hint.
-        Requirement: disabled for all models except GPT-5 (base model; non-Codex).
-        In particular, this must be OFF for gpt-5.1*, gpt-5.2*, and all Codex variants.
+        Supported (non-Codex only):
+        - GPT-5 base line: "gpt-5" and date-pinned variants like "gpt-5-2025-08-07"
+        - GPT-5.2 family: "gpt-5.2*" (e.g., gpt-5.2, gpt-5.2-pro)
+        Not supported:
+        - Any Codex variants ("*codex*")
+        - GPT-5.1* (kept off by default until validated)
         """
         try:
             if not model:
                 return False
             m = str(model).strip().lower()
-            # Only the base GPT-5 line supports this UX toggle.
-            # Allow:
-            #   - "gpt-5"
-            #   - date-pinned variants like "gpt-5-2025-08-07"
-            # Disallow:
-            #   - versioned families like "gpt-5.1*" / "gpt-5.2*"
-            if not (m == "gpt-5" or m.startswith("gpt-5-")):
+            # Allow GPT-5 base line and GPT-5.2 family.
+            if not (m == "gpt-5" or m.startswith("gpt-5-") or m.startswith("gpt-5.2")):
                 return False
             if "codex" in m:
                 return False
+            if m.startswith("gpt-5.1"):
+                return False
             return True
         except Exception:
             return False
@@ -2835,7 +2919,7 @@ class ChatCLI:
             {"label": "Preambles & First-turn", "type": "group", "items": [
                 {
                     "id": "preambles_enabled",
-                    "label": "Enable tool call preambles (GPT-5 only)",
+                    "label": "Enable tool call preambles (GPT-5 / GPT-5.2; non-Codex)",
                     "type": "bool",
                     # Only show this control when the *currently selected* model supports it.
                     # (This updates live as the Model picker changes.)
@@ -3306,7 +3390,7 @@ class ChatCLI:
         except Exception:
             pass
-        # 3) Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
+        # 3) Tool usage preamble (UX hint) — GPT-5 / GPT-5.2 only (non-Codex)
         try:
             if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
                 blocks.append(
@@ -5278,51 +5362,40 @@ class ChatCLI:
                     continue
                 self._busy = True
                 try:
-                    assistant_text = await self._stream_once(user_input)
+                    assistant_text = await self._run_stream_with_cancel(user_input)
                 finally:
                     self._busy = False
-            except KeyboardInterrupt:
-                # Ctrl+C mid-stream / mid-tool: do not exit the CLI.
-                # Best-effort: cancel any in-flight client-dispatched tool so the server unblocks quickly.
+                # If the stream was cancelled via Ctrl+C, keep partial text
+                # in context and return to the normal prompt.
+                if self._stream_cancelled:
+                    partial = self._stream_partial_text or ""
+                    content_sent = self._last_built_user_content or user_input
+                    # Always keep the user message in context so it is
+                    # not silently lost when cancel arrives before any
+                    # assistant tokens were streamed.
+                    self.history.append({"role": "user", "content": content_sent})
+                    if partial.strip():
+                        self.history.append({"role": "assistant", "content": partial})
+                    # Roll forward provider-native replay state so the
+                    # next turn can pick up seamlessly.
+                    try:
+                        self._preserve_provider_state_on_cancel(
+                            content_sent, partial,
+                        )
+                    except Exception:
+                        pass
+                    self.ui.warn("Interrupted. Partial response kept in context.")
+                    continue
+            except (KeyboardInterrupt, asyncio.CancelledError):
+                # Safety net for Ctrl+C outside of the streaming path
+                # (e.g. during snapshot creation). _run_stream_with_cancel
+                # handles the common streaming case internally.
                 try:
                     await self._cancel_inflight_dispatch()
                 except (Exception, BaseException):
                     pass
-                # Restore state to *before* this turn started.
-                try:
-                    snap = self._pending_turn_snapshot or {}
-                    if isinstance(snap.get("history"), list):
-                        self.history = snap.get("history")
-                    if isinstance(snap.get("messages_for_save"), list):
-                        self.messages_for_save = snap.get("messages_for_save")
-                    if isinstance(snap.get("kimi_raw"), list):
-                        self._kimi_raw_history = snap.get("kimi_raw")
-                    if isinstance(snap.get("gemini_raw"), list):
-                        self._gemini_raw_history = snap.get("gemini_raw")
-                    if "openai_prev" in snap:
-                        self._openai_previous_response_id = snap.get("openai_prev")
-                    if isinstance(snap.get("openai_ids"), list):
-                        self._openai_response_id_history = snap.get("openai_ids")
-                    if isinstance(snap.get("openai_input_items"), list):
-                        self._openai_input_items = snap.get("openai_input_items")
-                    if "openai_last_sent_input_items" in snap:
-                        self._openai_last_sent_input_items = snap.get("openai_last_sent_input_items")
-                    if "inflight_dispatch" in snap:
-                        self._inflight_dispatch = snap.get("inflight_dispatch")
-                    if "did_inject_codebase_map" in snap:
-                        self._did_inject_codebase_map = bool(snap.get("did_inject_codebase_map"))
-                    if "did_inject_custom_first_turn" in snap:
-                        self._did_inject_custom_first_turn = bool(snap.get("did_inject_custom_first_turn"))
-                    if "did_inject_working_memory" in snap:
-                        self._did_inject_working_memory = bool(snap.get("did_inject_working_memory"))
-                    if "memory_paths_for_first_turn" in snap:
-                        self._memory_paths_for_first_turn = snap.get("memory_paths_for_first_turn") or []
-                    self._last_built_user_content = snap.get("last_built_user_content")
-                except Exception:
-                    pass
-                # Clear any transient indicator line and land on a fresh prompt line.
                 try:
                     sys.stdout.write("\r\x1b[2K\n")
                     sys.stdout.flush()
@@ -5332,15 +5405,7 @@ class ChatCLI:
                     except Exception:
                         pass
-                try:
-                    supports = self._provider_supports_native_retention(self.model)
-                except Exception:
-                    supports = False
-                if supports:
-                    self.ui.warn("Interrupted. Cancelled the in-progress turn. Returning to your last message so you can edit and resend.")
-                else:
-                    self.ui.warn("Interrupted. Returning to your last message so you can edit and resend. (Provider-native tool/thinking retention not implemented for this model yet.)")
-                self._pending_user_edit = user_input
+                self.ui.warn("Interrupted.")
                 continue
             except httpx.HTTPStatusError as he:
                 try:
@@ -5676,7 +5741,145 @@ class ChatCLI:
         return True
     # ----------------------- SSE Streaming loop ------------------------
-    async def _stream_once(self, user_input: str) -> str:
+    def _preserve_provider_state_on_cancel(
+        self, user_content: str, partial_assistant: str,
+    ) -> None:
+        """Roll forward provider-native replay state on Ctrl+C cancel.
+        During a normal turn, ``message.completed`` delivers delta items
+        (OpenAI), an authoritative ``raw_provider_messages`` snapshot
+        (Gemini), and similar per-provider payloads.  When the stream is
+        cancelled those events never arrive, so this method patches the
+        minimum state needed for the next turn to continue cleanly.
+        * **Gemini / Kimi** -- ``provider.message`` and ``tool.result``
+          SSE events already update ``_gemini_raw_history`` /
+          ``_kimi_raw_history`` during streaming, so those histories are
+          already consistent.  No extra work needed.
+        * **OpenAI** -- The manual input-item chain
+          (``_openai_input_items``) is only updated from
+          ``openai_delta_items`` inside ``message.completed``.  Here we
+          roll the chain forward to include the user message we sent and
+          any partial assistant text.
+        """
+        model = (
+            self._current_turn.get("model")
+            or self._last_used_model
+            or self.model
+            or ""
+        )
+        # --- OpenAI: roll _openai_input_items forward -----------------
+        if self._is_openai_model(model):
+            try:
+                sent = self._openai_last_sent_input_items
+                if isinstance(sent, list) and sent:
+                    items = copy.deepcopy(sent)
+                    # Include partial assistant text so the model knows
+                    # what it already said before the user interrupted.
+                    if partial_assistant.strip():
+                        items.append({
+                            "role": "assistant",
+                            "content": partial_assistant,
+                        })
+                    self._openai_input_items = (
+                        self._sanitize_openai_items(items)
+                    )
+            except Exception:
+                pass
+            finally:
+                self._openai_last_sent_input_items = None
+        # --- Gemini / Kimi: already up-to-date (see docstring) --------
+    async def _run_stream_with_cancel(self, user_input: str) -> str:
+        """Wrap _stream_once so Ctrl+C cancels the stream gracefully.
+        On cancel the SSE loop sees _stream_cancelled and breaks early.
+        Whatever partial assistant text was accumulated is returned so the
+        caller can keep it in conversation context.
+        Key design decision: on cancel we KEEP all streamed content in
+        context, append the user message + partial assistant reply to
+        history, and return to the normal ``You>`` prompt -- no edit mode
+        and no snapshot rollback.
+        """
+        self._stream_cancelled = False
+        self._stream_partial_text = ""
+        # Install a custom SIGINT handler for the duration of streaming.
+        # First Ctrl+C sets _stream_cancelled so the SSE loop breaks
+        # naturally at the next event; second Ctrl+C force-raises.
+        _original_handler = signal.getsignal(signal.SIGINT)
+        _sigint_count = 0
+        def _stream_sigint_handler(signum, frame):
+            nonlocal _sigint_count
+            _sigint_count += 1
+            self._stream_cancelled = True
+            if _sigint_count >= 2:
+                # Restore original handler and hard-interrupt on second press
+                try:
+                    signal.signal(signal.SIGINT, _original_handler or signal.default_int_handler)
+                except Exception:
+                    pass
+                raise KeyboardInterrupt()
+        try:
+            signal.signal(signal.SIGINT, _stream_sigint_handler)
+        except (OSError, ValueError):
+            # signal.signal() can only be called from the main thread
+            pass
+        try:
+            return await self._stream_once(user_input)
+        except (KeyboardInterrupt, asyncio.CancelledError):
+            self._stream_cancelled = True
+            # Best-effort: cancel any in-flight client-dispatched tool
+            try:
+                await self._cancel_inflight_dispatch("cancelled by user")
+            except Exception:
+                pass
+            # Clear any transient indicator / status line
+            try:
+                sys.stdout.write("\r\x1b[2K\n")
+                sys.stdout.flush()
+            except Exception:
+                pass
+            # Best-effort: show a usage notice even on hard-interrupt (no message.completed).
+            # We can only estimate from what we already buffered locally.
+            try:
+                # Estimate completion/output chars from partial assistant text + tool event payload sizes.
+                tool_chars = 0
+                try:
+                    evs = (self._current_turn.get("tool_events") or []) if isinstance(self._current_turn, dict) else []
+                    if isinstance(evs, list):
+                        # Keep it cheap: only sum a bounded amount.
+                        for ev in evs[-50:]:
+                            try:
+                                tool_chars += len(json.dumps(ev, ensure_ascii=False))
+                            except Exception:
+                                tool_chars += len(str(ev))
+                except Exception:
+                    tool_chars = 0
+                est_chars = len(self._stream_partial_text or "") + tool_chars
+                self._render_cancelled_usage_notice(
+                    model_label=(self._current_turn.get("model") if isinstance(self._current_turn, dict) else None),
+                    est_completion_chars=est_chars,
+                    tool_calls=len((self._current_turn.get("tool_events") or []) if isinstance(self._current_turn, dict) else []),
+                )
+            except Exception:
+                pass
+            return self._stream_partial_text
+        finally:
+            # Restore the original SIGINT handler so normal Ctrl+C
+            # behavior resumes at the input prompt.
+            try:
+                signal.signal(signal.SIGINT, _original_handler or signal.default_int_handler)
+            except (OSError, ValueError):
+                pass
+    async def _stream_once(self, user_input: str) -> str:
         # Build request payload.
         # OpenAI: use manual conversation state replay (stateless/ZDR-safe) by sending
         # `openai_input_items` that include ALL OpenAI-native items (reasoning/tool calls/tool outputs).
@@ -5844,7 +6047,7 @@ class ChatCLI:
                 payload["text_verbosity"] = self.text_verbosity
         except Exception:
             pass
-        # Preambles are a GPT-5-only UX toggle.
+        # Preambles are a GPT-5 / GPT-5.2-only UX toggle.
         try:
             if self._supports_preambles(self.model):
                 payload["preambles_enabled"] = bool(self.preambles_enabled)
@@ -5917,15 +6120,18 @@ class ChatCLI:
                 except Exception:
                     pass
-        async def do_stream(req_payload: Dict[str, Any]) -> str:
+        async def do_stream(req_payload: Dict[str, Any]) -> str:
                 nonlocal session_id
                 nonlocal header_printed
                 # Retry tracking flags (updated per attempt)
                 nonlocal last_completed, last_error, last_bytes_total
                 # While streaming assistant text, suppress the thinking indicator to avoid clobbering output
                 streaming_assistant = False
-                # Initialize per-turn timer and tool call counter
-                tool_calls = 0
+                # Initialize per-turn timer and tool call counter
+                tool_calls = 0
+                # Estimate of model output chars so far (assistant deltas + tool.call payloads).
+                # Used only when Ctrl+C cancels the stream before message.completed.
+                model_output_chars = 0
                 # Capture last N SSE events for diagnostics if stream ends without message.completed
                 from collections import deque
                 last_events = deque(maxlen=SSE_TAIL_MAX)  # keep short, printable summaries
@@ -6270,6 +6476,9 @@ class ChatCLI:
                             pass
                         async for event, data_raw in parse_sse_lines(resp, debug=_sse_debug):
+                            # Graceful cancel: break early when Ctrl+C sets the flag.
+                            if self._stream_cancelled:
+                                break
                             try:
                                 if isinstance(data_raw, str):
                                     _bytes_total += len(data_raw)
@@ -6312,7 +6521,7 @@ class ChatCLI:
                                     pass
                                 continue
-                            elif event == "message.delta":
+                            elif event == "message.delta":
                                 # Stop any transient indicator before printing content and clear the line
                                 try:
                                     await _indicator_stop(clear=True)
@@ -6320,8 +6529,8 @@ class ChatCLI:
                                     pass
                                 # Indicator line cleared; we're now at the start of a fresh line.
                                 at_line_start = True
-                                text = data.get("text", "")
-                                if text:
+                                text = data.get("text", "")
+                                if text:
                                     try:
                                         _deltas_total += 1
                                     except Exception:
@@ -6362,7 +6571,16 @@ class ChatCLI:
                                             self.ui.debug_log(f"header.printed model='{model_label}' on_first_delta")
                                         except Exception:
                                             pass
-                                    assistant_buf.append(text)
+                                    assistant_buf.append(text)
+                                    try:
+                                        model_output_chars += len(str(text))
+                                    except Exception:
+                                        pass
+                                    # Keep partial text accessible for Ctrl+C cancel.
+                                    try:
+                                        self._stream_partial_text = "".join(assistant_buf)
+                                    except Exception:
+                                        pass
                                     # Print the token delta raw to avoid any wrapping/markup side-effects
                                     try:
                                         self.ui.print(text, style=self.ui.theme["assistant"], end="")
@@ -6391,7 +6609,7 @@ class ChatCLI:
                                     except Exception:
                                         pass
-                            elif event == "tool.call":
+                            elif event == "tool.call":
                                 # Ensure any prior indicator state is reset cleanly, then restart
                                 # a fresh indicator while waiting for the tool to run.
                                 try:
@@ -6411,8 +6629,8 @@ class ChatCLI:
                                             pass
                                     at_line_start = True
-                                name = data.get("name")
-                                args = data.get("args", {}) or {}
+                                name = data.get("name")
+                                args = data.get("args", {}) or {}
                                 call_id = data.get("call_id")
                                 try:
                                     self.ui.debug_log(f"tool.call name='{name}' call_id={call_id}")
@@ -6453,11 +6671,21 @@ class ChatCLI:
                                         self.ui.print(f"[RUNNING] {name}", style=self.ui.theme.get("tool_call"))
                                     except Exception:
                                         pass
-                                # Count tool calls
-                                try:
-                                    tool_calls += 1
-                                except Exception:
-                                    pass
+                                # Count tool calls
+                                try:
+                                    tool_calls += 1
+                                except Exception:
+                                    pass
+                                # tool.call is model output too (even when no message.delta happened yet).
+                                try:
+                                    # Keep it simple; we only need a non-zero estimate.
+                                    model_output_chars += len(str(name or ""))
+                                    model_output_chars += len(json.dumps(args, ensure_ascii=False))
+                                except Exception:
+                                    try:
+                                        model_output_chars += len(str(args))
+                                    except Exception:
+                                        pass
                                 # Track args for troubleshooting and broadcast to WS clients
                                 if call_id:
@@ -7910,12 +8138,35 @@ class ChatCLI:
                                 if DEBUG_SSE:
                                     self.ui.print(f"[debug] unhandled event: {event} payload={truncate_json(data, 800)}", style=self.ui.theme["dim"])
-                    # If stream ended without a message.completed, render a fallback info box
-                    # Ensure the indicator is stopped on abnormal termination and fully cleared
-                    try:
-                        await _indicator_stop(clear=True)
-                    except Exception:
-                        pass
+                    # If stream ended without a message.completed, render a fallback info box
+                    # Ensure the indicator is stopped on abnormal termination and fully cleared
+                    try:
+                        await _indicator_stop(clear=True)
+                    except Exception:
+                        pass
+                    # If cancelled via Ctrl+C, return *but* still show an estimated token/usage notice.
+                    if self._stream_cancelled:
+                        try:
+                            # Attempt to compute turn duration if we have a start timestamp.
+                            turn_secs = None
+                            try:
+                                now_pc = time.perf_counter()
+                                if self._turn_started_at is not None:
+                                    turn_secs = float(now_pc - float(self._turn_started_at))
+                            except Exception:
+                                turn_secs = None
+                            self._render_cancelled_usage_notice(
+                                model_label=(self._current_turn.get("model") if isinstance(self._current_turn, dict) else None),
+                                est_completion_chars=int(model_output_chars or 0),
+                                events_total=int(_events_total or 0),
+                                deltas_total=int(_deltas_total or 0),
+                                bytes_total=int(_bytes_total or 0),
+                                tool_calls=int(tool_calls or 0),
+                                turn_secs=turn_secs,
+                            )
+                        except Exception:
+                            pass
+                        return "".join(assistant_buf)
                     buf_str2 = "".join(assistant_buf)
                     self.ui.ensure_newline(buf_str2)
                     # Use a visible notice (non-dim) so users are aware something ended unexpectedly
@@ -8124,7 +8375,7 @@ class ChatCLI:
                 result_text = await do_stream(payload)
                 # Auto-restart after summarization: clear conversation and resend same user input with injections
-                if self._restart_after_summary:
+                if self._restart_after_summary and not self._stream_cancelled:
                     self._restart_after_summary = False
                     # Reset conversation to a fresh session (preserve system prompt)
                     self.history = [{"role": "system", "content": self.system_prompt}] if self.system_prompt else []
@@ -8209,14 +8460,14 @@ class ChatCLI:
                     return await do_stream(new_payload)
                 # If we marked an auto-retry due to provider output size limits, retry once using the same payload
-                if self._auto_retry_after_tailed:
-                    self._auto_retry_after_tailed = False
+                if self._auto_retry_after_tailed and not self._stream_cancelled:
+                    self._auto_retry_after_tailed = False
                     self.ui.warn("Retrying turn with tailed file content due to provider output size limit...")
                     return await do_stream(payload)
                 # Generic retry: when stream ended without a message.completed, retry the last model turn
                 max_attempts = 3
-                while (not last_completed) and (attempts_done < max_attempts):
+                while (not last_completed) and (not self._stream_cancelled) and (attempts_done < max_attempts):
                     attempts_done += 1
                     # Emit CLI/WS notice
                     try:
@@ -8250,7 +8501,7 @@ class ChatCLI:
                     result_text = await do_stream(payload)
                 # If still not completed after retries, emit a final failure notice
-                if not last_completed:
+                if not last_completed and not self._stream_cancelled:
                     try:
                         self.ui.print(f"[retry] failed after {attempts_done} attempt(s)", style=self.ui.theme["warn"])  # type: ignore
                         await self._ws_broadcast("retry.failed", {"attempts": attempts_done, "max_attempts": max_attempts})
@@ -9011,7 +9262,7 @@ class ChatCLI:
         else:
             self.text_verbosity = curv or "medium"
-        # --- 7) Tool preambles (GPT-5 only) ---
+        # --- 7) Tool preambles (GPT-5 / GPT-5.2 only; non-Codex) ---
         if self._supports_preambles(self.model):
             preamble_choice = await self._menu_choice(
                 "Tool call preambles",
@@ -9157,7 +9408,7 @@ class ChatCLI:
                     blocks.append(txt.strip())
         except Exception:
             pass
-        # Tool usage preamble (UX hint) — GPT-5 only (non-Codex)
+        # Tool usage preamble (UX hint) — GPT-5 / GPT-5.2 only (non-Codex)
         try:
             if bool(getattr(self, "preambles_enabled", False)) and self._supports_preambles(self.model):
                 blocks.append(

{henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: henosis-cli
-Version: 0.6.10
+Version: 0.6.11
 Summary: henosis-cli — interactive CLI for the Henosis multi-provider streaming chat backend, with optional local tools.
 Author-email: henosis <henosis@henosis.us>
 License-Expression: LicenseRef-Proprietary

{henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-cli.py,sha256=syMx_cXQLS_wEsL7OnLc84gWEoAM3NNW2M_ypQSqeTQ,537230
+cli.py,sha256=rVE8sqFR3bbW4r4NPsqJaYnq_tVMGFokdMyt9ul837M,548601
 henosis_cli_tools/__init__.py,sha256=x3uaN_ub32uALx_oURna0VnuoSsj7i9NYY6uRsc2ZzM,1147
 henosis_cli_tools/cli_entry.py,sha256=OZTe_s9Hfy3mcsYG77T3RTdtCDod-CSwmhskbXjmmqs,1713
 henosis_cli_tools/input_engine.py,sha256=qUCSvTTiqmujELkVbpvMXOpZWxTGDhDTMQccU7yZJto,24126
 henosis_cli_tools/settings_ui.py,sha256=sUlgUIev4BhApgZf80U3GpPUufaMWnguOP8HLgZmjfg,22809
 henosis_cli_tools/tool_impl.py,sha256=iSdkDIAecgphXrS8Nd702SwhZaEJ9zyL4ieeH_mmjJo,46213
-henosis_cli-0.6.10.dist-info/METADATA,sha256=2rol5BYKPEJBWBrdM4Vj-ykAiLUx1UaL_nlUUcBpKvQ,5749
-henosis_cli-0.6.10.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-henosis_cli-0.6.10.dist-info/entry_points.txt,sha256=KmXDdmIjq1SVMs8FK3wHPA2i89RMaerzZHIetllMLIk,74
-henosis_cli-0.6.10.dist-info/top_level.txt,sha256=u7XMBcJ8Kb0n91WaSU-4Db8yURSUXFuOxGMsXti0a-g,34
-henosis_cli-0.6.10.dist-info/RECORD,,
+henosis_cli-0.6.11.dist-info/METADATA,sha256=2_IkPpWTUEBI_jXrNgzpkj3lRYAWHaToYziQX2clJm8,5749
+henosis_cli-0.6.11.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+henosis_cli-0.6.11.dist-info/entry_points.txt,sha256=KmXDdmIjq1SVMs8FK3wHPA2i89RMaerzZHIetllMLIk,74
+henosis_cli-0.6.11.dist-info/top_level.txt,sha256=u7XMBcJ8Kb0n91WaSU-4Db8yURSUXFuOxGMsXti0a-g,34
+henosis_cli-0.6.11.dist-info/RECORD,,

{henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{henosis_cli-0.6.10.dist-info → henosis_cli-0.6.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

henosis-cli 0.6.10__py3-none-any.whl → 0.6.11__py3-none-any.whl

henosis-cli 0.6.10py3-none-any.whl → 0.6.11py3-none-any.whl