npm - @bitseek/hermes-webui - Versions diffs - 0.1.0-beta.0 → 0.1.0-beta.1 - Mend

@bitseek/hermes-webui 0.1.0-beta.0 → 0.1.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/vendor/agent-frontend-shell/api/run_journal.py CHANGED Viewed

@@ -262,6 +262,7 @@ def stale_interrupted_event(session_id: str, run_id: str, *, after_seq: int | No
         return None
     payload = {
         "type": "interrupted",
+        "recovery_control": True,
         "message": "The live worker stopped before this run finished.",
         "hint": "The transcript was restored to the last journaled event. Start a new turn if you still need the task to continue.",
         "session_id": session_id,

package/vendor/agent-frontend-shell/api/state_sync.py CHANGED Viewed

@@ -16,11 +16,12 @@ any double-counting risk.
 import logging
 import os
 from pathlib import Path
+from typing import Optional
 logger = logging.getLogger(__name__)
-def _get_state_db(profile: str=None):
+def _get_state_db(profile: Optional[str] = None):
     """Get a SessionDB instance for a profile's state.db.
     When ``profile`` is provided the function resolves *that* profile's
@@ -104,7 +105,7 @@ def _get_state_db(profile: str=None):
         return None
-def sync_session_start(session_id: str, model=None, profile: str=None) -> None:
+def sync_session_start(session_id: str, model=None, profile: Optional[str] = None) -> None:
     """Register a WebUI session in state.db (idempotent).
     Called when a session's first message is sent.
@@ -132,8 +133,8 @@ def sync_session_start(session_id: str, model=None, profile: str=None) -> None:
 def sync_session_usage(session_id: str, input_tokens: int=0, output_tokens: int=0,
-                       estimated_cost=None, model=None, title: str=None,
-                       message_count: int=None, profile: str=None) -> None:
+                       estimated_cost=None, model=None, title: Optional[str] = None,
+                       message_count: Optional[int] = None, profile: Optional[str] = None) -> None:
     """Update token usage and title for a WebUI session in state.db.
     Called after each turn completes. Uses absolute=True to set totals
     (the WebUI Session already accumulates across turns).

package/vendor/agent-frontend-shell/api/streaming.py CHANGED Viewed

@@ -203,7 +203,8 @@ WebUI progress guidance:
 - Each update should say what you are about to check, what you just confirmed, or why the next tool call is needed.
 - Keep updates concise, factual, and in the user's language. One or two short sentences are enough.
 - Do not reveal hidden reasoning, chain-of-thought, private scratchpads, secrets, raw logs, or long tool output.
-- Do not include terse planning fragments or scratchpad shorthand in visible assistant text. Avoid fragments like "Need check logs", "Need inspect email", or "maybe invite"; either omit them or rewrite them as clear user-facing progress.
+- Final visible assistant replies must be clear, user-facing, and in the user's language, not private planning notes.
+- Do not include terse planning fragments or scratchpad shorthand in visible assistant text. Avoid fragments like "Need script", "Need check logs", "Need inspect email", or "maybe invite"; either omit them or rewrite them as clear user-facing progress.
 - For direct answers or very short tasks, skip progress updates and answer normally.
 """.strip()
@@ -244,6 +245,7 @@ def _webui_surface_context_prompt(surface_context: Optional[dict]) -> str:
 def _webui_ephemeral_system_prompt(
     personality_prompt: Optional[str],
     surface_context: Optional[dict] = None,
+    config_data: Optional[dict] = None,
 ) -> str:
     """Build WebUI-only runtime instructions that are not persisted to history."""
     parts = []
@@ -253,6 +255,9 @@ def _webui_ephemeral_system_prompt(
     if surface_prompt:
         parts.append(surface_prompt)
     parts.append(_WEBUI_PROGRESS_PROMPT)
+    delivery_prompt = _webui_delivery_context_prompt(config_data)
+    if delivery_prompt:
+        parts.append(delivery_prompt)
     return "\n\n".join(part for part in parts if part)
@@ -497,48 +502,45 @@ def _public_prefill_context_status(prefill_context: dict) -> dict:
     }
-def _webui_session_context_message(config_data: Optional[dict] = None) -> dict:
-    """Return a compact browser-session context message for WebUI agents.
+def _webui_delivery_context_prompt(config_data: Optional[dict] = None) -> str:
+    """Return platform/delivery context for the ephemeral system prompt.
+    Connected platforms, home channels, and scheduled-task delivery hints
+    are injected into the system prompt (safe for role alternation) rather
+    than as a prefill ``user`` message, which strict chat templates (Mistral,
+    Gemma) reject.
-    Messaging gateway sessions get a small "Current Session Context" block that
-    tells the agent where the turn came from, which platforms are connected, and
-    how scheduled-task delivery should be interpreted. Browser-originated WebUI
-    turns do not have a Gateway ``SessionSource``, but they still benefit from a
-    safe equivalent so the model understands that this is a WebUI session, not a
-    literal Telegram thread, while retaining access to configured messaging
-    delivery targets.
+    NOTE: This function only covers platform/delivery info.  The session
+    framing (\"Source: WebUI\", \"Session ID\", \"Profile\", \"Workspace\") is
+    emitted by ``_webui_surface_context_prompt()``, which is called from
+    ``_webui_ephemeral_system_prompt()`` before this helper.  If you
+    refactor this area, keep that surface call in place — the two helpers
+    together produce the full session context block.
     """
     cfg = config_data if isinstance(config_data, dict) else get_config()
-    lines = [
-        "## Current Session Context",
-        "",
-        "**Source:** WebUI (browser session)",
-        "**Session type:** Browser-originated Hermes WebUI chat. This is a separate WebUI transcript, not the same live Telegram/Discord/other messaging thread.",
-    ]
+    lines: list[str] = []
+    display_hermes_home = None
     try:
-        from api.profiles import get_active_profile_name
-        profile_name = get_active_profile_name() or "default"
+        from hermes_constants import get_hermes_home, display_hermes_home as _dh
+        display_hermes_home = _dh
     except Exception:
-        profile_name = "default"
-    lines.append(f"**Active Hermes profile:** {profile_name}")
+        get_hermes_home = None  # type: ignore[assignment]
     connected = ["local (files on this machine)"]
     try:
-        from hermes_constants import get_hermes_home, display_hermes_home
-        state_path = get_hermes_home() / "gateway_state.json"
-        if state_path.exists():
-            raw_state = json.loads(state_path.read_text(encoding="utf-8"))
-            platforms = raw_state.get("platforms") if isinstance(raw_state, dict) else {}
-            if isinstance(platforms, dict):
-                for name in sorted(platforms):
-                    pdata = platforms.get(name) or {}
-                    if isinstance(pdata, dict) and pdata.get("state") == "connected" and name != "local":
-                        connected.append(f"{name}: Connected ✓")
+        if get_hermes_home is not None:
+            state_path = get_hermes_home() / "gateway_state.json"
+            if state_path.exists():
+                raw_state = json.loads(state_path.read_text(encoding="utf-8"))
+                platforms = raw_state.get("platforms") if isinstance(raw_state, dict) else {}
+                if isinstance(platforms, dict):
+                    for name in sorted(platforms):
+                        pdata = platforms.get(name) or {}
+                        if isinstance(pdata, dict) and pdata.get("state") == "connected" and name != "local":
+                            connected.append(f"{name}: Connected ✓")
     except Exception:
-        display_hermes_home = None  # type: ignore[assignment]
+        pass
     lines.append(f"**Connected Platforms:** {', '.join(connected)}")
     home_channels = {}
@@ -566,7 +568,7 @@ def _webui_session_context_message(config_data: Optional[dict] = None) -> dict:
     lines.append("**Delivery options for scheduled tasks:**")
     lines.append("- `\"origin\"` → Back to this WebUI/browser session when the WebUI runtime supports origin delivery; otherwise prefer an explicit platform target.")
     try:
-        home_display = display_hermes_home() if display_hermes_home else "~/.hermes"  # type: ignore[name-defined]
+        home_display = display_hermes_home() if display_hermes_home else "~/.hermes"
     except Exception:
         home_display = "~/.hermes"
     lines.append(f"- `\"local\"` → Save to local files only ({home_display}/cron/output/)")
@@ -575,14 +577,19 @@ def _webui_session_context_message(config_data: Optional[dict] = None) -> dict:
     lines.append("")
     lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID. Do not invent private IDs.*")
-    return {"role": "user", "content": "\n".join(lines)}
+    return "\n".join(lines)
 def _prefill_messages_with_webui_context(prefill_context: dict, config_data: Optional[dict] = None) -> list[dict]:
-    """Combine recall prefill with WebUI's gateway-like session context."""
-    messages = list(prefill_context.get("messages") or [])
-    messages.append(_webui_session_context_message(config_data))
-    return messages
+    """Combine recall prefill with WebUI session context.
+    The session context (connected platforms, delivery hints) is injected
+    via ``_webui_ephemeral_system_prompt`` / ``ephemeral_system_prompt``
+    instead of as a prefill ``user`` message.  Adding it as a user message
+    creates two consecutive user turns (prefill + actual) which strict chat
+    templates (Mistral, Gemma) reject with a Jinja 500.
+    """
+    return list(prefill_context.get("messages") or [])
 def _has_new_assistant_reply(all_messages: list, prev_count: int) -> bool:
@@ -1569,24 +1576,109 @@ def _detect_title_language(text: str) -> str:
     return ''
+def _script_counts(text: str) -> dict:
+    """Return per-script alphabetic character counts for *text*.
+    Buckets: ``latin``, ``cjk`` (Han/Hiragana/Katakana/Hangul), ``cyrillic``,
+    ``arabic``, ``hebrew``, ``greek``, ``devanagari``. Non-alphabetic and
+    unclassified characters are ignored.
+    """
+    counts: dict[str, int] = {}
+    for ch in str(text or ''):
+        if not ch.isalpha():
+            continue
+        o = ord(ch)
+        if (0x0041 <= o <= 0x024F) or (0x1E00 <= o <= 0x1EFF):
+            bucket = 'latin'
+        elif (
+            (0x4E00 <= o <= 0x9FFF) or (0x3400 <= o <= 0x4DBF)   # Han
+            or (0x3040 <= o <= 0x30FF)                            # Hiragana/Katakana
+            or (0xAC00 <= o <= 0xD7A3) or (0x1100 <= o <= 0x11FF) # Hangul
+        ):
+            bucket = 'cjk'
+        elif 0x0400 <= o <= 0x04FF:
+            bucket = 'cyrillic'
+        elif (0x0600 <= o <= 0x06FF) or (0x0750 <= o <= 0x077F):
+            bucket = 'arabic'
+        elif 0x0590 <= o <= 0x05FF:
+            bucket = 'hebrew'
+        elif 0x0370 <= o <= 0x03FF:
+            bucket = 'greek'
+        elif 0x0900 <= o <= 0x097F:
+            bucket = 'devanagari'
+        else:
+            continue
+        counts[bucket] = counts.get(bucket, 0) + 1
+    return counts
+def _dominant_script(text: str) -> str:
+    """Return a coarse writing-script bucket for *text*, or '' when undecidable.
+    Script-level (not language-level) classification is cheap and dependency-free.
+    Returns the dominant script only when it holds a clear (≥60%) majority of the
+    alphabetic characters, so mixed/borrowed text doesn't flip the bucket. Used
+    to establish the conversation start's expected script for cross-script title
+    drift detection (#3293).
+    """
+    counts = _script_counts(text)
+    total = sum(counts.values())
+    if total < 2:
+        return ''
+    top, top_n = max(counts.items(), key=lambda kv: kv[1])
+    if top_n / total >= 0.6:
+        return top
+    return ''
 def _title_prompt_language_rule(user_text: str) -> str:
     return "Match the language of the user question.\n"
 def _title_language_mismatch(user_text: str, title: str) -> bool:
-    """Reject obvious English titles for German conversation starts."""
-    if _detect_title_language(user_text) != 'de':
-        return False
-    candidate = str(title or '').strip().lower()
+    """Reject titles whose language clearly diverges from the conversation start.
+    Two independent signals:
+    1. Cross-script drift (#3293): when the conversation start has a clear
+       dominant writing script (e.g. latin/English) and the generated title
+       introduces a *substantial* amount of a different script (e.g. CJK or
+       Cyrillic), reject. This is language-agnostic and catches the common
+       "English chat -> Chinese/Spanish/Russian title" drift. Because titles are
+       short and frequently embed a borrowed Latin technical term (e.g. a CJK
+       title containing the word "Python"), the title side uses a proportion
+       threshold (>=35% of the title's alphabetic characters in a non-start
+       script, min 2 chars) rather than a strict majority -- so a CJK title with
+       one English word still trips, while an English title with a single
+       foreign place-name does not.
+    2. The legacy German-start → English-title heuristic, preserved verbatim so
+       the original behavior keeps working for same-script (latin) drift that
+       the script check can't see.
+    """
+    candidate = str(title or '').strip()
     if not candidate:
         return False
-    if _detect_title_language(candidate) == 'de':
+    # (1) Cross-script mismatch — language-agnostic.
+    user_script = _dominant_script(user_text)
+    if user_script:
+        title_counts = _script_counts(candidate)
+        title_total = sum(title_counts.values())
+        if title_total >= 2:
+            for script, n in title_counts.items():
+                if script != user_script and n >= 2 and (n / title_total) >= 0.35:
+                    return True
+    # (2) Legacy same-script German→English heuristic.
+    if _detect_title_language(user_text) != 'de':
+        return False
+    candidate_lower = candidate.lower()
+    if _detect_title_language(candidate_lower) == 'de':
         return False
     english_markers = {
         'old', 'image', 'display', 'issue', 'problem', 'discussion', 'conversation',
         'session', 'title', 'fix', 'bug', 'attachment', 'attachments', 'context',
     }
-    tokens = re.findall(r'[a-z]+', candidate)
+    tokens = re.findall(r'[a-z]+', candidate_lower)
     english_hits = sum(1 for tok in tokens if tok in english_markers)
     return english_hits >= 2
@@ -1794,6 +1886,17 @@ def generate_title_raw_via_aux(
         provider = ''
     model = model or configured.get('model', '') or ''
     base_url = base_url or configured.get('base_url', '') or ''
+    try:
+        from api.profiles import _split_webui_provider_model_value
+        normalized_model, normalized_provider = _split_webui_provider_model_value(
+            model or None,
+            provider or None,
+        )
+        model = normalized_model or ''
+        provider = normalized_provider or ''
+    except ValueError:
+        pass
     api_key = ''
     if not caller_supplied_route:
         api_key = str(configured.get('api_key', '') or '').strip()
@@ -2262,6 +2365,35 @@ def _run_background_title_refresh(session_id: str, user_text: str, assistant_tex
         logger.debug("Background title refresh failed for session %s", session_id, exc_info=True)
+def generate_session_title_for_session(session, *, prefer_latest: bool = False, agent=None) -> tuple[Optional[str], str, str]:
+    """Generate a session title on demand from persisted conversation messages.
+    This helper powers explicit UI title-regeneration controls. It intentionally
+    does not inspect or mutate ``llm_title_generated``; callers decide whether
+    replacing the current title is allowed, then persist the returned title.
+    """
+    messages = getattr(session, 'messages', None) or []
+    if prefer_latest:
+        user_text, assistant_text = _latest_exchange_snippets(messages)
+    else:
+        user_text, assistant_text = _first_exchange_snippets(messages)
+    if not user_text:
+        return None, 'empty_user_message', ''
+    from api import profiles as profiles_api
+    with profiles_api.profile_env_for_background_worker(session, "manual title regeneration", logger_override=logger):
+        next_title, llm_status, raw_preview = _generate_llm_session_title_via_aux(user_text, assistant_text, agent=agent)
+    if next_title:
+        return next_title, llm_status, raw_preview
+    fallback_title = _fallback_title_from_exchange(user_text, assistant_text)
+    if fallback_title and not _is_generic_fallback_title(fallback_title):
+        reason = f'local_summary:{llm_status}' if llm_status else 'local_summary'
+        return fallback_title, reason, raw_preview
+    return None, llm_status or 'empty_title', raw_preview
 def _preserve_pre_compression_snapshot(s, old_sid: str) -> None:
     """Persist old_sid as a read-only pre-compression snapshot.
@@ -3081,14 +3213,25 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
     # match are context-only gaps that get spliced in before the display msg.
     if previous_display and previous_context:
         _display_id_set = {_message_identity(m) for m in previous_display}
-        _context_id_set = {_message_identity(m) for m in previous_context}
+        _context_id_set = {
+            _message_identity(m)
+            for m in previous_context
+            if not _is_context_compression_marker(m)
+        }
         _has_context_only_turns = bool(_context_id_set - _display_id_set)
         if _has_context_only_turns:
             context_keys = [_message_identity(m) for m in previous_context]
             _backfilled = []
-            _emitted = set()
+            # #3300 fix: track ONLY context rows we splice in, so the
+            # visible-display backbone is never suppressed. Sharing one set
+            # between context inserts and display rows (and _message_identity
+            # ignoring timestamps) dropped a legitimate second identical visible
+            # user turn. Display rows are always appended in order; a context
+            # row is backfilled only if it isn't already a display row and
+            # hasn't already been inserted.
+            _context_inserted = set()
             _cursor = 0
-            for _dmsg in previous_display:
+            for _display_idx, _dmsg in enumerate(previous_display):
                 _dkey = _message_identity(_dmsg)
                 if _dkey is not None:
                     _j = _cursor
@@ -3098,21 +3241,32 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
                         for _k in range(_cursor, _j):
                             _ckey = context_keys[_k]
                             _cmsg = previous_context[_k]
-                            if _ckey is not None and _ckey not in _emitted and not _is_context_compression_marker(_cmsg):
+                            if _ckey is not None and _ckey not in _context_inserted and _ckey not in _display_id_set and not _is_context_compression_marker(_cmsg):
                                 _backfilled.append(copy.deepcopy(_cmsg))
-                                _emitted.add(_ckey)
+                                _context_inserted.add(_ckey)
                         _cursor = _j + 1
-                if _dkey not in _emitted:
-                    _backfilled.append(_dmsg)
-                    if _dkey is not None:
-                        _emitted.add(_dkey)
+                    elif not any(
+                        _message_identity(_future_dmsg) in context_keys[_cursor:]
+                        for _future_dmsg in previous_display[_display_idx + 1:]
+                    ):
+                        for _k in range(_cursor, len(context_keys)):
+                            _ckey = context_keys[_k]
+                            _cmsg = previous_context[_k]
+                            if _ckey is not None and _ckey not in _context_inserted and _ckey not in _display_id_set and not _is_context_compression_marker(_cmsg):
+                                _backfilled.append(copy.deepcopy(_cmsg))
+                                _context_inserted.add(_ckey)
+                        _cursor = len(context_keys)
+                # The display row is the visible backbone — always preserve it,
+                # in order, even when an earlier (identical-content) turn or a
+                # backfilled context row shares its timestamp-less identity.
+                _backfilled.append(_dmsg)
             while _cursor < len(context_keys):
                 _ckey = context_keys[_cursor]
                 _cmsg = previous_context[_cursor]
                 _cursor += 1
-                if _ckey is not None and _ckey not in _emitted and not _is_context_compression_marker(_cmsg):
+                if _ckey is not None and _ckey not in _context_inserted and _ckey not in _display_id_set and not _is_context_compression_marker(_cmsg):
                     _backfilled.append(copy.deepcopy(_cmsg))
-                    _emitted.add(_ckey)
+                    _context_inserted.add(_ckey)
             if len(_backfilled) > len(previous_display):
                 logger.debug(
                     "Backfilled %d context-only turns into previous_display (was %d, now %d)",
@@ -5146,6 +5300,7 @@ def _run_agent_streaming(
                     'profile': getattr(s, 'profile', None),
                     'workspace': s.workspace,
                 },
+                config_data=_cfg,
             )
             _pending_started_at = getattr(s, 'pending_started_at', None)
             # Normal chat-start sets pending_started_at before spawning this thread;

package/vendor/agent-frontend-shell/api/todo_state.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""Derive settled todo snapshots for session GET responses.
+The browser's Todos panel currently reconstructs state by reverse-scanning
+loaded tool messages. That works only when the latest todo tool result is inside
+the returned message window. This helper gives ``/api/session`` a compact,
+explicit ``todo_state`` sidecar derived from the full settled transcript.
+The detector intentionally mirrors ``run_agent.AIAgent._hydrate_todo_store``:
+walk messages newest-first and use the first tool message whose JSON content has
+a ``todos`` list. Empty ``todos`` is a valid snapshot so a cleared task list does
+not fall through to an older non-empty write.
+"""
+from __future__ import annotations
+import json
+import logging
+from typing import Any, Iterable, Optional, Sequence
+logger = logging.getLogger(__name__)
+VERSION = 1
+PAYLOAD_KEY = "todo_state"
+def _normalize_snapshot(data: Any) -> Optional[dict]:
+    """Return the canonical todo snapshot shape, or ``None`` for non-todo data."""
+    if not isinstance(data, dict):
+        return None
+    todos = data.get("todos")
+    if not isinstance(todos, list):
+        return None
+    summary = data.get("summary")
+    if not isinstance(summary, dict):
+        summary = {}
+    return {
+        "todos": todos,
+        "summary": summary,
+        "version": VERSION,
+    }
+def parse_todo_tool_result(function_result: Any) -> Optional[dict]:
+    """Parse a todo tool result JSON string or pre-parsed dict into a snapshot."""
+    data: Any = function_result
+    if isinstance(function_result, str):
+        try:
+            data = json.loads(function_result)
+        except (TypeError, ValueError):
+            return None
+    return _normalize_snapshot(data)
+def _message_ts_float(ts_raw: Any) -> float:
+    """Coerce a message ``timestamp`` field to a positive float, or 0.0."""
+    try:
+        return float(ts_raw) if ts_raw is not None else 0.0
+    except (TypeError, ValueError):
+        return 0.0
+def _max_timestamp_through(messages: Sequence[Any], upto_idx: int) -> float:
+    """Return the largest valid timestamp at or before ``upto_idx``."""
+    best = 0.0
+    end = min(upto_idx, len(messages) - 1)
+    for i in range(end, -1, -1):
+        msg = messages[i]
+        if not isinstance(msg, dict):
+            continue
+        best = max(best, _message_ts_float(msg.get("timestamp")))
+    return best
+def derive_todo_state(messages: Optional[Iterable[dict]]) -> Optional[dict]:
+    """Derive the latest settled todo snapshot from conversation history.
+    Returns ``None`` when the session has no todo writes. Malformed or
+    non-string tool contents are skipped so unrelated tool results never break
+    session loading.
+    """
+    if not messages:
+        return None
+    if not isinstance(messages, (list, tuple)):
+        messages = list(messages)
+    for idx in range(len(messages) - 1, -1, -1):
+        msg = messages[idx]
+        if not isinstance(msg, dict) or msg.get("role") != "tool":
+            continue
+        content = msg.get("content", "")
+        if not isinstance(content, str) or '"todos"' not in content:
+            continue
+        snapshot = parse_todo_tool_result(content)
+        if snapshot is None:
+            continue
+        ts_val = _message_ts_float(msg.get("timestamp"))
+        if ts_val <= 0:
+            ts_val = _max_timestamp_through(messages, idx)
+        if ts_val > 0:
+            snapshot["ts"] = ts_val
+        return snapshot
+    return None
+def attach_todo_state(payload: dict, messages: Optional[Iterable[dict]]) -> bool:
+    """Attach ``todo_state`` to a session payload when one can be derived.
+    Mutates ``payload`` in place. Errors are swallowed deliberately: a malformed
+    historical tool message must not make ``/api/session`` fail.
+    """
+    if not messages:
+        return False
+    try:
+        snapshot = derive_todo_state(messages)
+        if snapshot is None:
+            return False
+        payload[PAYLOAD_KEY] = snapshot
+        return True
+    except Exception:
+        logger.debug("todo_state attach failed", exc_info=True)
+        return False

package/vendor/agent-frontend-shell/api/updates.py CHANGED Viewed

@@ -1049,10 +1049,37 @@ def _schedule_restart(delay: float = 2.0) -> None:
         with _apply_lock:
             _wait_until_restart_safe()
             try:
-                os.execv(sys.executable, [sys.executable] + sys.argv)
+                # Re-exec into the just-pulled image.
+                #
+                # sys.argv[0]'s meaning depends on how the server was launched:
+                #
+                #   * Source checkout (`python server.py` via bootstrap.py /
+                #     ctl.sh / start.sh): sys.argv[0] is the SCRIPT path
+                #     (e.g. "/root/hermes-webui/server.py"), sys.executable is
+                #     the interpreter. CPython treats argv[1] as the script to
+                #     run, so we must pass [sys.executable] + sys.argv.
+                #
+                #   * Frozen/packaged build (PyInstaller, embedded zipapp,
+                #     etc.): sys.argv[0] == sys.executable == <binary>. Passing
+                #     [sys.executable] + sys.argv would re-insert the binary as
+                #     argv[1] — the kernel launches it, the interpreter treats
+                #     the binary itself as the "script" to run, and execv
+                #     effectively becomes a recursive no-op that never reaches
+                #     bind(), leaving the WebUI stuck "offline" after every
+                #     self-update. Pass argv as-is instead.
+                #
+                # Distinguish the two cases with sys.frozen (set by
+                # PyInstaller / zipapp / similar). For source checkouts the
+                # `[sys.executable] + sys.argv` form is the canonical CPython
+                # re-exec idiom (same shape Flask/Django reloaders use) and
+                # is the correct path.
+                if getattr(sys, "frozen", False):
+                    os.execv(sys.executable, sys.argv)
+                else:
+                    os.execv(sys.executable, [sys.executable] + sys.argv)
             except Exception:
-                # Last-resort: if execv fails (e.g. frozen binary), just exit
-                # so the process supervisor (start.sh / Docker) restarts us.
+                # Last-resort: if execv fails for any reason, just exit so the
+                # process supervisor (start.sh / Docker) restarts us.
                 os._exit(0)
     threading.Thread(target=_do, daemon=True).start()