npm - switchroom - Versions diffs - 0.12.26 → 0.12.28 - Mend

switchroom 0.12.26 → 0.12.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/dist/agent-scheduler/index.js +80 -80
package/dist/auth-broker/index.js +80 -80
package/dist/cli/drive-write-pretool.mjs +10 -10
package/dist/cli/skill-validate-pretool.mjs +72 -72
package/dist/cli/switchroom.js +359 -357
package/dist/host-control/main.js +99 -99
package/dist/vault/approvals/kernel-server.js +82 -82
package/dist/vault/broker/server.js +83 -83
package/package.json +2 -1
package/telegram-plugin/dist/bridge/bridge.js +112 -112
package/telegram-plugin/dist/gateway/gateway.js +368 -209
package/telegram-plugin/dist/server.js +160 -160
package/telegram-plugin/gateway/gateway.ts +55 -40
package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts +188 -0
package/telegram-plugin/stderr-timestamps.ts +106 -0
package/telegram-plugin/tests/inbound-delivery-machine-dispatch.test.ts +240 -0
package/telegram-plugin/tests/stderr-timestamps.test.ts +113 -0
package/vendor/hindsight-memory/.claude-plugin/plugin.json +8 -0
package/vendor/hindsight-memory/CHANGELOG.md +32 -0
package/vendor/hindsight-memory/LICENSE +21 -0
package/vendor/hindsight-memory/README.md +329 -0
package/vendor/hindsight-memory/hooks/hooks.json +49 -0
package/vendor/hindsight-memory/scripts/drain_pending.py +190 -0
package/vendor/hindsight-memory/scripts/lib/__init__.py +0 -0
package/vendor/hindsight-memory/scripts/lib/bank.py +122 -0
package/vendor/hindsight-memory/scripts/lib/client.py +204 -0
package/vendor/hindsight-memory/scripts/lib/config.py +180 -0
package/vendor/hindsight-memory/scripts/lib/content.py +493 -0
package/vendor/hindsight-memory/scripts/lib/daemon.py +334 -0
package/vendor/hindsight-memory/scripts/lib/directives.py +119 -0
package/vendor/hindsight-memory/scripts/lib/gateway_ipc.py +126 -0
package/vendor/hindsight-memory/scripts/lib/llm.py +146 -0
package/vendor/hindsight-memory/scripts/lib/pending.py +218 -0
package/vendor/hindsight-memory/scripts/lib/state.py +196 -0
package/vendor/hindsight-memory/scripts/recall.py +873 -0
package/vendor/hindsight-memory/scripts/retain.py +286 -0
package/vendor/hindsight-memory/scripts/session_end.py +122 -0
package/vendor/hindsight-memory/scripts/session_start.py +76 -0
package/vendor/hindsight-memory/scripts/setup_hooks.py +115 -0
package/vendor/hindsight-memory/scripts/tests/__init__.py +0 -0
package/vendor/hindsight-memory/scripts/tests/test_directives.py +211 -0
package/vendor/hindsight-memory/scripts/tests/test_gateway_ipc.py +205 -0
package/vendor/hindsight-memory/scripts/tests/test_recall_integration.py +621 -0
package/vendor/hindsight-memory/settings.json +37 -0
package/vendor/hindsight-memory/skills/setup.md +24 -0
package/vendor/hindsight-memory/tests/conftest.py +94 -0
package/vendor/hindsight-memory/tests/test_bank.py +142 -0
package/vendor/hindsight-memory/tests/test_client.py +232 -0
package/vendor/hindsight-memory/tests/test_config.py +128 -0
package/vendor/hindsight-memory/tests/test_content.py +471 -0
package/vendor/hindsight-memory/tests/test_drain_pending.py +192 -0
package/vendor/hindsight-memory/tests/test_hooks.py +808 -0
package/vendor/hindsight-memory/tests/test_manifest.py +14 -0
package/vendor/hindsight-memory/tests/test_pending.py +152 -0
package/vendor/hindsight-memory/tests/test_recall_exit_codes.py +325 -0
package/vendor/hindsight-memory/tests/test_session_end_pending.py +205 -0
package/vendor/hindsight-memory/tests/test_state.py +125 -0

package/vendor/hindsight-memory/scripts/recall.py ADDED Viewed

@@ -0,0 +1,873 @@
+#!/usr/bin/env python3
+"""Auto-recall hook for UserPromptSubmit.
+Port of: before_prompt_build handler in Openclaw index.js
+Adapted for Claude Code hooks (ephemeral process, JSON stdin/stdout).
+Flow:
+  1. Read hook input from stdin (prompt, session_id, transcript_path, cwd)
+  2. (switchroom #424 4.1) Check per-session recall cache; on hit, emit
+     cached output and skip the API round-trip.
+  3. Resolve API URL (external, existing local, or auto-start daemon)
+  4. Derive bank ID (static or dynamic from project context)
+  5. Ensure bank mission is set (first use only)
+  6. Compose multi-turn query if recallContextTurns > 1
+  7. Truncate to recallMaxQueryChars
+  8. Call Hindsight recall API
+  9. Format memories and output hookSpecificOutput.additionalContext
+ 10. Persist to per-session cache for the next prompt-equal invocation.
+ 11. Save last recall to state (for PostCompact re-injection)
+Exit codes:
+  0 — normal success (incl. graceful in-flight errors like recall API
+      timeouts where we still produce a valid hookSpecificOutput).
+  0 — uncaught exception in non-debug mode. Switchroom #1070 (redo,
+      after #1085 review): recall.py is registered as a DIRECT Claude
+      Code plugin hook (`vendor/hindsight-memory/hooks/hooks.json`),
+      NOT wrapped by `bin/run-hook.sh`. Per Claude Code's
+      UserPromptSubmit hook contract, exit 2 BLOCKS the user's
+      prompt and surfaces stderr to the user — so a hindsight outage
+      would block every turn. We instead exit 0 (agent prompt
+      assembly proceeds with no memories), emit a bounded stderr
+      line for journald, and shell out directly to `switchroom
+      issues record` so the #424 issue-sink still captures the
+      failure on the operator's issues card. The subprocess call
+      is fault-tolerant — if it fails for any reason, we still
+      exit 0 with the safe stdout shape.
+  2 — debug mode any error. HINDSIGHT_DEBUG=1 operators are
+      live-debugging and want maximum signal — full traceback to
+      stderr and non-zero exit. Existing behaviour.
+"""
+import hashlib
+import json
+import os
+import sys
+import time
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from lib.bank import derive_bank_id, ensure_bank_mission
+from lib.client import HindsightClient
+from lib.config import debug_log, load_config
+from lib.content import (
+    compose_recall_query,
+    format_current_time,
+    format_memories,
+    truncate_recall_query,
+)
+from lib.daemon import get_api_url
+from lib.directives import fetch_active_directives, format_active_directives_block
+from lib.gateway_ipc import extract_chat_id_from_prompt, update_placeholder
+from lib.state import read_state, write_state
+LAST_RECALL_STATE = "last_recall.json"
+RECALL_CACHE_STATE = "recall_cache.json"
+# Switchroom #424 phase 4.1 — per-session recall cache.
+#
+# Caching is opt-in via env var: HINDSIGHT_RECALL_CACHE_TTL_SECS=N. Set N
+# to 0 (or leave unset) to disable. On hit, the script emits the cached
+# `additionalContext` and skips the directive + recall API round-trips
+# entirely.
+#
+# Hits fire when (session_id, prompt, bank_id, extra_banks) match a
+# prior entry within the TTL. Cache entries are scoped to a single
+# session_id — a new session (e.g. agent restart, /reset, /new) starts
+# a fresh cache window even if the env-configured TTL hasn't elapsed.
+#
+# The expected hit rate in production is modest (real users don't
+# typically resubmit identical prompts), but this trims redundant
+# recall traffic on session-resume re-processing and any retry paths.
+CACHE_ENV = "HINDSIGHT_RECALL_CACHE_TTL_SECS"
+# Maximum number of cache entries kept per session before LRU eviction.
+# 100 is comfortably above the typical session size (~30 inbounds) and
+# well below any concern about state-file size growth.
+CACHE_MAX_ENTRIES = 100
+# Switchroom #432 phase 4.4 — demote-from-recall tag.
+#
+# A memory tagged with any of these strings stays in the bank (it can
+# still surface via reflect, manual mcp__hindsight__recall, etc.) but is
+# excluded from the auto-recall block injected on every UserPromptSubmit.
+# Useful when an over-broad "world fact" memory keeps drowning out more
+# relevant recent memories.
+DEMOTE_TAG_VARIANTS = (
+    "[demote-from-recall]",
+    "demote-from-recall",
+    "no-recall",
+)
+# Switchroom #432 phase 4.3 — recall telemetry log.
+#
+# Every recall (cache hit or miss) appends a JSONL record to
+# state/recall_log.jsonl: timestamp, session_id, bank, count, capped flag,
+# memory IDs. The file is bounded by RECALL_LOG_MAX_LINES so it stays
+# under a few MB even on chatty 24/7 agents. View via
+# `switchroom memory recall-log <agent>`.
+RECALL_LOG_FILE = "recall_log.jsonl"
+RECALL_LOG_MAX_LINES = 5000
+def _cache_ttl_secs() -> int:
+    """Read the recall-cache TTL from env. Returns 0 (disabled) on any
+    parse error or sub-zero value — caller treats 0 as "skip cache."""
+    raw = os.environ.get(CACHE_ENV, "").strip()
+    if not raw:
+        return 0
+    try:
+        n = int(raw)
+        return n if n > 0 else 0
+    except ValueError:
+        return 0
+def _cache_key(session_id: str, prompt: str, bank_id: str, extra_banks: list) -> str:
+    """Stable hash for cache keying. Session_id is included so a new
+    session always misses, regardless of the TTL setting. Extra banks
+    are sorted so list-order doesn't change the key."""
+    parts = [
+        session_id or "",
+        prompt or "",
+        bank_id or "",
+        ",".join(sorted(extra_banks or [])),
+    ]
+    payload = "\x1f".join(parts)
+    return hashlib.sha256(payload.encode("utf-8")).hexdigest()
+def _cache_lookup(key: str, ttl_secs: int) -> str | None:
+    """Return the cached `additionalContext` for `key` if present and
+    within TTL, else None. Failure-tolerant — any read error returns
+    None and the caller falls through to a fresh recall."""
+    if ttl_secs <= 0:
+        return None
+    state = read_state(RECALL_CACHE_STATE, {}) or {}
+    entries = state.get("entries") or {}
+    entry = entries.get(key)
+    if not isinstance(entry, dict):
+        return None
+    saved_at = entry.get("saved_at")
+    context = entry.get("context")
+    if not isinstance(saved_at, (int, float)) or not isinstance(context, str):
+        return None
+    if time.time() - saved_at > ttl_secs:
+        return None
+    return context
+def _cache_store(key: str, context: str) -> None:
+    """Write a cache entry. LRU-evicts the oldest entry when exceeding
+    CACHE_MAX_ENTRIES so the file stays bounded. Failure-tolerant."""
+    state = read_state(RECALL_CACHE_STATE, {}) or {}
+    entries = state.get("entries") or {}
+    if not isinstance(entries, dict):
+        entries = {}
+    entries[key] = {
+        "context": context,
+        "saved_at": time.time(),
+    }
+    if len(entries) > CACHE_MAX_ENTRIES:
+        # LRU evict by saved_at ascending.
+        sorted_keys = sorted(
+            entries.keys(),
+            key=lambda k: entries[k].get("saved_at") if isinstance(entries[k], dict) else 0,
+        )
+        for k in sorted_keys[: len(entries) - CACHE_MAX_ENTRIES]:
+            entries.pop(k, None)
+    state["entries"] = entries
+    state["updated_at"] = time.time()
+    write_state(RECALL_CACHE_STATE, state)
+def _emit_cached_context(context: str) -> None:
+    """Emit the same hookSpecificOutput shape that the fresh-recall
+    path emits, so the cached path is byte-equivalent from claude
+    code's perspective."""
+    json.dump(
+        {
+            "hookSpecificOutput": {
+                "hookEventName": "UserPromptSubmit",
+                "additionalContext": context,
+            }
+        },
+        sys.stdout,
+    )
+def _is_demoted_memory(memory) -> bool:
+    """Return True if the memory has any demote-from-recall tag.
+    Switchroom #432 phase 4.4. Tags are case-sensitive and can be
+    written with or without surrounding brackets (`[demote-from-recall]`
+    or `demote-from-recall` or `no-recall`). Anything that's not a list
+    of strings is treated as untagged.
+    """
+    tags = memory.get("tags") if isinstance(memory, dict) else None
+    if not isinstance(tags, list):
+        return False
+    for tag in tags:
+        if isinstance(tag, str) and tag.strip() in DEMOTE_TAG_VARIANTS:
+            return True
+    return False
+# Switchroom #475 — lexical-overlap relevance gate.
+#
+# Hindsight's HTTP API does not return similarity scores. Without a
+# score the existing `recallMaxMemories` cap acts as a *floor* on
+# low-relevance prompts: weak matches still fill the slot up to N,
+# mis-steering the model. This gate computes Jaccard overlap between
+# the user's query terms and each memory's text terms, and drops
+# memories below a configurable threshold.
+#
+# Threshold default is 0.0 (disabled) so the gate is opt-in initially.
+# Operators tune via `memory.recall.min_overlap` in switchroom.yaml or
+# `HINDSIGHT_RECALL_MIN_OVERLAP=0.15` env. Telemetry surfaces the dropped
+# count via the existing recall_log.jsonl (#432 4.3) under
+# `overlap_dropped`, so the gate's effect is observable per turn from
+# `switchroom memory recall-log <agent>`.
+#
+# A small English stop-word set is removed from both sides before the
+# overlap is computed — common-word coincidence is not a real signal.
+# Token comparison is case-insensitive and strips punctuation. The set
+# is intentionally tight; we'd rather miss a borderline drop than
+# silently throw out a real match.
+_OVERLAP_STOPWORDS = frozenset({
+    "a", "an", "and", "any", "are", "as", "at",
+    "be", "been", "being", "but", "by",
+    "can", "could", "did", "do", "does", "doing",
+    "for", "from",
+    "had", "has", "have", "having", "how",
+    "i", "if", "in", "into", "is", "it", "its",
+    "me", "my",
+    "of", "on", "one", "or",
+    "should", "so",
+    "that", "the", "their", "them", "then", "there", "these", "they",
+    "this", "to",
+    "was", "we", "were", "what", "when", "where", "which", "who",
+    "why", "will", "with", "would", "you", "your",
+})
+def _overlap_tokens(text) -> set:
+    """Tokenize text into a stop-word-stripped, lowercased set of terms.
+    Punctuation, digits, and short fragments (<= 1 char) are dropped.
+    Returns an empty set on non-string / empty input.
+    """
+    if not isinstance(text, str) or not text:
+        return set()
+    out = set()
+    cur = []
+    for ch in text:
+        if ch.isalpha():
+            cur.append(ch.lower())
+        else:
+            if cur:
+                tok = "".join(cur)
+                if len(tok) > 1 and tok not in _OVERLAP_STOPWORDS:
+                    out.add(tok)
+                cur = []
+    if cur:
+        tok = "".join(cur)
+        if len(tok) > 1 and tok not in _OVERLAP_STOPWORDS:
+            out.add(tok)
+    return out
+def jaccard_overlap(query: str, memory_text: str) -> float:
+    """Jaccard similarity between two texts, after stop-word + punctuation
+    stripping. Returns a float in [0.0, 1.0]. Empty/degenerate inputs
+    return 0.0 — it's safer to drop than retain when we can't compute.
+    """
+    a = _overlap_tokens(query)
+    b = _overlap_tokens(memory_text)
+    if not a or not b:
+        return 0.0
+    inter = len(a & b)
+    union = len(a | b)
+    return inter / union if union else 0.0
+def _filter_by_overlap(results, query: str, threshold: float):
+    """Drop memories whose Jaccard overlap with the query is below the
+    threshold. Threshold <= 0 short-circuits to passthrough (no
+    iteration cost).
+    Returns (kept_results, dropped_count).
+    """
+    if threshold <= 0:
+        return results, 0
+    kept = []
+    dropped = 0
+    for m in results:
+        text = m.get("text", "") if isinstance(m, dict) else ""
+        if jaccard_overlap(query, text) >= threshold:
+            kept.append(m)
+        else:
+            dropped += 1
+    return kept, dropped
+def _write_recall_log(entry: dict) -> None:
+    """Append a JSONL line to recall_log.jsonl. Bounded by line count.
+    Switchroom #432 phase 4.3. Failure-tolerant — telemetry must never
+    block recall, so any write error is swallowed silently. Unbounded
+    growth is prevented by truncating to the last RECALL_LOG_MAX_LINES
+    when the file is rolled over (cheap because we read once per
+    append; the alternative — keeping a separate index — is more code
+    for a feature that runs at most once per turn).
+    """
+    try:
+        plugin_data = os.environ.get("CLAUDE_PLUGIN_DATA", "")
+        if not plugin_data:
+            return
+        log_dir = os.path.join(plugin_data, "state")
+        os.makedirs(log_dir, exist_ok=True)
+        log_path = os.path.join(log_dir, RECALL_LOG_FILE)
+        line = json.dumps(entry, separators=(",", ":")) + "\n"
+        # Append-then-trim. For typical operation the file is well
+        # under the cap and the trim path is a no-op.
+        with open(log_path, "a", encoding="utf-8") as f:
+            f.write(line)
+        # Cheap rolling trim every ~50 writes (estimated by file size
+        # vs. 200 bytes/line average) to amortize the read cost.
+        try:
+            size = os.path.getsize(log_path)
+        except OSError:
+            return
+        if size > RECALL_LOG_MAX_LINES * 250:
+            try:
+                with open(log_path, "r", encoding="utf-8") as f:
+                    lines = f.readlines()
+                if len(lines) > RECALL_LOG_MAX_LINES:
+                    keep = lines[-RECALL_LOG_MAX_LINES:]
+                    with open(log_path, "w", encoding="utf-8") as f:
+                        f.writelines(keep)
+            except OSError:
+                pass
+    except Exception:
+        # Silently swallow — telemetry is never load-bearing.
+        pass
+def read_transcript_messages(transcript_path: str) -> list:
+    """Read messages from a JSONL transcript file for multi-turn context.
+    Claude Code transcript format nests messages:
+      {type: "user", message: {role: "user", content: "..."}, uuid: "...", ...}
+    Also supports flat format for testing:
+      {role: "user", content: "..."}
+    """
+    if not transcript_path or not os.path.isfile(transcript_path):
+        return []
+    messages = []
+    try:
+        with open(transcript_path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    entry = json.loads(line)
+                    # Claude Code nested format: {type: "user", message: {role, content}}
+                    if entry.get("type") in ("user", "assistant"):
+                        msg = entry.get("message", {})
+                        if isinstance(msg, dict) and msg.get("role"):
+                            messages.append(msg)
+                    # Flat format (testing / future compatibility)
+                    elif "role" in entry and "content" in entry:
+                        messages.append(entry)
+                except json.JSONDecodeError:
+                    continue
+    except OSError:
+        pass
+    return messages
+def main():
+    config = load_config()
+    if not config.get("autoRecall"):
+        debug_log(config, "Auto-recall disabled, exiting")
+        return
+    # Read hook input from stdin
+    try:
+        hook_input = json.load(sys.stdin)
+    except (json.JSONDecodeError, EOFError):
+        print("[Hindsight] Failed to read hook input", file=sys.stderr)
+        return
+    debug_log(config, f"Hook input keys: {list(hook_input.keys())}")
+    # Extract user query — hooks-reference.md documents "prompt", but some
+    # Claude Code sources reference "user_prompt". Accept both defensively.
+    prompt = (hook_input.get("prompt") or hook_input.get("user_prompt") or "").strip()
+    if not prompt or len(prompt) < 5:
+        debug_log(config, "Prompt too short for recall, skipping")
+        return
+    # Switchroom-local: skip recall on conversational acks.
+    #
+    # The 5-char short-circuit catches `ok`/`yes`/`no`/`ty` but passes
+    # longer acks like `thanks!`, `got it`, `see you tomorrow` that
+    # don't benefit from recall. Recall costs ~1-2s (low budget) to
+    # ~5s (mid budget) per turn — wasted on "I acknowledge" replies
+    # where the model is going to produce a one-liner regardless of
+    # what came back.
+    #
+    # Strip the optional `<channel ...>` wrapper that telegram-plugin
+    # prepends on inbound, then trim common trailing punctuation/emoji.
+    # Conservative match — we'd rather pay the recall cost on a
+    # borderline case than miss memory on a real query.
+    _stripped = prompt
+    _channel_close = _stripped.find(">")
+    if _stripped.startswith("<channel") and _channel_close != -1:
+        _stripped = _stripped[_channel_close + 1:]
+    _stripped = _stripped.replace("</channel>", "").strip()
+    _ack_form = _stripped.lower().strip(" \t\n\r.,!?…👍👌✅🆗🙏")
+    ACK_PHRASES = frozenset({
+        "ok", "okay", "k", "kk", "yes", "yep", "yup", "yeah", "y",
+        "no", "nope", "nah", "n",
+        "ty", "thanks", "thank you", "thx", "cheers",
+        "got it", "gotcha", "understood", "noted", "roger",
+        "sure", "sure thing", "alright", "all right",
+        "see you", "see ya", "later", "bye", "good night", "goodnight",
+        "great", "nice", "cool", "perfect",
+        "👍", "👌", "✅", "🆗", "🙏",
+    })
+    if _ack_form in ACK_PHRASES:
+        debug_log(config, f"Prompt is ack-only ({_ack_form!r}), skipping recall")
+        return
+    session_id = hook_input.get("session_id") or ""
+    # Switchroom #303 — push a "📚 recalling memories" status to the
+    # user's pre-allocated Telegram draft so the gap between inbound and
+    # the model's first content token isn't 25 s of dead air. No
+    # trailing ellipsis: sendMessageDraft already animates a "typing"
+    # indicator on the user's client, so a `…` is redundant noise.
+    # Best-effort and silent on every failure path; the gateway no-ops
+    # the IPC message when there's no draft for this chat (forum topic,
+    # fresh session before pre-alloc lands, etc.).
+    placeholder_chat_id = extract_chat_id_from_prompt(prompt)
+    if placeholder_chat_id:
+        update_placeholder(placeholder_chat_id, "📚 recalling memories")
+    # Resolve API URL (handles all three connection modes)
+    def _dbg(*a):
+        debug_log(config, *a)
+    try:
+        api_url = get_api_url(config, debug_fn=_dbg, allow_daemon_start=False)
+    except RuntimeError as e:
+        print(f"[Hindsight] {e}", file=sys.stderr)
+        return
+    api_token = config.get("hindsightApiToken")
+    try:
+        client = HindsightClient(api_url, api_token)
+    except ValueError as e:
+        print(f"[Hindsight] Invalid API URL: {e}", file=sys.stderr)
+        return
+    # Derive bank ID (static or dynamic from project context)
+    bank_id = derive_bank_id(hook_input, config)
+    additional_banks = config.get("recallAdditionalBanks", []) or []
+    # Switchroom #424 phase 4.1 — cache check BEFORE any HTTP traffic.
+    # Whole-session-scoped, opt-in via HINDSIGHT_RECALL_CACHE_TTL_SECS.
+    cache_ttl = _cache_ttl_secs()
+    cache_key = (
+        _cache_key(session_id, prompt, bank_id, additional_banks)
+        if cache_ttl > 0
+        else ""
+    )
+    if cache_ttl > 0:
+        try:
+            cached_context = _cache_lookup(cache_key, cache_ttl)
+        except Exception as e:
+            debug_log(config, f"Recall cache read failed (non-fatal): {e}")
+            cached_context = None
+        if cached_context is not None:
+            debug_log(config, f"Recall cache HIT (key={cache_key[:12]}…) — skipping API call")
+            _emit_cached_context(cached_context)
+            _write_recall_log({
+                "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+                "session_id": (session_id or "")[:32],
+                "bank_id": bank_id,
+                "additional_banks": additional_banks,
+                "query_chars": len(prompt),
+                "result_count": None,  # not known on cache hit
+                "directive_count": None,
+                "demoted_count": 0,
+                "capped": False,
+                "cache_hit": True,
+            })
+            return
+        debug_log(config, f"Recall cache MISS (key={cache_key[:12]}…)")
+    # Set bank mission on first use
+    ensure_bank_mission(client, bank_id, config, debug_fn=_dbg)
+    # Multi-turn query composition
+    recall_context_turns = config.get("recallContextTurns", 1)
+    recall_max_query_chars = config.get("recallMaxQueryChars", 800)
+    recall_roles = config.get("recallRoles", ["user", "assistant"])
+    if recall_context_turns > 1:
+        transcript_path = hook_input.get("transcript_path", "")
+        messages = read_transcript_messages(transcript_path)
+        debug_log(config, f"Multi-turn context: {recall_context_turns} turns, {len(messages)} messages from transcript")
+        query = compose_recall_query(prompt, messages, recall_context_turns, recall_roles)
+    else:
+        query = prompt
+    query = truncate_recall_query(query, prompt, recall_max_query_chars)
+    # Final defensive cap (mirrors Openclaw)
+    if len(query) > recall_max_query_chars:
+        query = query[:recall_max_query_chars]
+    debug_log(config, f"Recalling from bank '{bank_id}', query length: {len(query)}")
+    # Fetch active directives FIRST (independent of recall — even if recall
+    # finds no memories, an agent with active directives still needs them
+    # surfaced every turn). Workaround for upstream bug
+    # vectorize-io/hindsight#1269 (tagged directives silently dropped from
+    # `reflect`); `list_directives` itself works correctly upstream, so this
+    # is a pure client-side surface. fetch_active_directives is failure-safe
+    # and returns [] on any error.
+    directives = fetch_active_directives(client, bank_id)
+    directives_block = format_active_directives_block(directives) if directives else None
+    if directives_block:
+        debug_log(config, f"Injecting {len(directives)} active directives")
+    # Call Hindsight recall API
+    results = []
+    try:
+        response = client.recall(
+            bank_id=bank_id,
+            query=query,
+            max_tokens=config.get("recallMaxTokens", 1024),
+            budget=config.get("recallBudget", "mid"),
+            types=config.get("recallTypes"),
+            timeout=10,
+        )
+        results = response.get("results", [])
+    except Exception as e:
+        print(f"[Hindsight] Recall failed: {e}", file=sys.stderr)
+        # Fall through — we still want to emit the directives block if we
+        # have one, so a recall API failure doesn't blind the agent to
+        # its own active directives.
+    # Also recall from any additional banks (e.g. shared user profile bank).
+    # `additional_banks` was already extracted above the cache check so the
+    # cache key reflects every bank queried; reuse that local instead of
+    # re-reading config.
+    for extra_bank_id in additional_banks:
+        try:
+            extra_response = client.recall(
+                bank_id=extra_bank_id,
+                query=query,
+                max_tokens=config.get("recallMaxTokens", 1024),
+                budget=config.get("recallBudget", "mid"),
+                types=config.get("recallTypes"),
+                timeout=10,
+            )
+            extra_results = extra_response.get("results", [])
+            if extra_results:
+                debug_log(config, f"Got {len(extra_results)} memories from additional bank '{extra_bank_id}'")
+                results = results + extra_results
+        except Exception as e:
+            debug_log(config, f"Recall from additional bank '{extra_bank_id}' failed: {e}")
+    # Switchroom #432 phase 4.4 — drop demote-tagged memories before
+    # the cap. Filtering early means the cap kicks in over the
+    # non-demoted set (i.e. the user gets up to N "real" hits,
+    # not N including ones they explicitly demoted).
+    pre_filter_count = len(results)
+    results = [m for m in results if not _is_demoted_memory(m)]
+    demoted_count = pre_filter_count - len(results)
+    if demoted_count > 0:
+        debug_log(config, f"Filtered {demoted_count} demote-from-recall memories")
+    # Switchroom #475 — lexical-overlap relevance gate. Drops memories
+    # whose Jaccard overlap with the query is below
+    # `recallMinOverlap` (default 0.0 = disabled). Runs after the
+    # demote filter so the threshold sees the operator-curated set.
+    overlap_threshold = config.get("recallMinOverlap", 0.0)
+    if isinstance(overlap_threshold, (int, float)) and overlap_threshold > 0:
+        pre_overlap_count = len(results)
+        results, overlap_dropped = _filter_by_overlap(
+            results, query, float(overlap_threshold)
+        )
+        if overlap_dropped > 0:
+            debug_log(
+                config,
+                f"Overlap gate dropped {overlap_dropped}/{pre_overlap_count} "
+                f"memories below threshold {overlap_threshold}",
+            )
+    else:
+        overlap_dropped = 0
+    # Switchroom-local: client-side count cap. Plugin v0.4.0 has no
+    # `recallTopK` in the Claude Code integration (Openclaw-only), and a
+    # token budget alone doesn't bound count — a single long memory can
+    # blow past intended caps, while many short ones can flood the prompt.
+    # Slice the combined results from primary + additional banks before
+    # formatting. <= 0 disables the cap.
+    recall_max_memories = config.get("recallMaxMemories", 0)
+    pre_cap_count = len(results)
+    capped = False
+    if (
+        isinstance(recall_max_memories, int)
+        and recall_max_memories > 0
+        and len(results) > recall_max_memories
+    ):
+        debug_log(
+            config,
+            f"Capping {len(results)} memories to {recall_max_memories} "
+            f"(set HINDSIGHT_RECALL_MAX_MEMORIES=0 to disable)",
+        )
+        results = results[:recall_max_memories]
+        capped = True
+    memories_block = None
+    if results:
+        debug_log(config, f"Injecting {len(results)} memories")
+        # Format context message — exact match of Openclaw's format
+        memories_formatted = format_memories(results)
+        preamble = config.get("recallPromptPreamble", "")
+        current_time = format_current_time()
+        memories_block = (
+            f"<hindsight_memories>\n"
+            f"{preamble}\n"
+            f"Current time - {current_time}\n\n"
+            f"{memories_formatted}\n"
+            f"</hindsight_memories>"
+        )
+    else:
+        debug_log(config, "No memories found")
+    # Switchroom #303 — recall is done, model is about to start the long
+    # TTFT. Update the placeholder so the user doesn't keep staring at
+    # `📚 recalling memories` for the next 15–20 s of opus thinking.
+    # No trailing ellipsis — sendMessageDraft already animates the
+    # "typing" indicator, the `…` is redundant.
+    if placeholder_chat_id:
+        update_placeholder(placeholder_chat_id, "💭 thinking")
+    # If neither block has content, there's nothing to inject — exit
+    # silently to avoid emitting an empty hookSpecificOutput.
+    if not directives_block and not memories_block:
+        return
+    # Compose final context. Directives block goes ABOVE memories so the
+    # agent reads HARD RULES before low-signal recall traces.
+    parts = []
+    if directives_block:
+        parts.append(directives_block)
+    if memories_block:
+        parts.append(memories_block)
+    context_message = "\n\n".join(parts)
+    # Save last recall to state for diagnostics
+    write_state(
+        LAST_RECALL_STATE,
+        {
+            "context": context_message,
+            "saved_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+            "bank_id": bank_id,
+            "result_count": len(results),
+            "directive_count": len(directives),
+        },
+    )
+    # Switchroom #424 phase 4.1 — populate the cache for the next hit.
+    # Failure-tolerant: a write error here doesn't mask the recall result.
+    if cache_ttl > 0 and cache_key:
+        try:
+            _cache_store(cache_key, context_message)
+        except Exception as e:
+            debug_log(config, f"Recall cache write failed (non-fatal): {e}")
+    # Switchroom #432 phase 4.3 — telemetry log. memory IDs (when
+    # available) let an operator confirm what was injected on a given
+    # turn. Failure-tolerant.
+    _write_recall_log({
+        "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+        "session_id": (session_id or "")[:32],
+        "bank_id": bank_id,
+        "additional_banks": additional_banks,
+        "query_chars": len(query),
+        "result_count": len(results),
+        "directive_count": len(directives),
+        "demoted_count": demoted_count,
+        "overlap_dropped": overlap_dropped,
+        "capped": capped,
+        "pre_cap_count": pre_cap_count,
+        "memory_ids": [
+            m.get("id") for m in results
+            if isinstance(m, dict) and m.get("id")
+        ],
+        "cache_hit": False,
+    })
+    # Output JSON for Claude Code hook system
+    output = {
+        "hookSpecificOutput": {
+            "hookEventName": "UserPromptSubmit",
+            "additionalContext": context_message,
+        }
+    }
+    json.dump(output, sys.stdout)
+def _redact_secrets(text: str) -> str:
+    """Best-effort inline scrub for the obvious leak shapes that show
+    up in HTTP error messages (`lib/client.py:73` formats the URL into
+    the RuntimeError, and the URL may include query-string credentials).
+    We don't have a python-callable bridge to the TS `secret-detect`
+    module, so this is a small regex pass covering:
+      * Authorization: Bearer <token>
+      * ?key=val and &key=val for keys matching token|key|secret|auth
+      * x-api-key: <value> header shape
+    Bounded by `re` (anchored, no catastrophic alternation) so this is
+    safe to run on a 400-char input. Returns `text` unchanged if no
+    matches; on regex-engine error, falls back to returning the raw
+    text — redaction is best-effort, not a security boundary, and the
+    server-side detail handler (#1069) re-scans before persistence.
+    """
+    import re
+    try:
+        # Bearer tokens — case-insensitive
+        text = re.sub(
+            r"(?i)(bearer\s+)[A-Za-z0-9._\-]{8,}",
+            r"\1<redacted>",
+            text,
+        )
+        # x-api-key / api-key header values
+        text = re.sub(
+            r"(?i)(x?-?api[-_]?key\s*[:=]\s*)([A-Za-z0-9._\-]{8,})",
+            r"\1<redacted>",
+            text,
+        )
+        # Query-string credentials: ?token=…, &api_key=…, ?secret=…
+        text = re.sub(
+            r"(?i)([?&](?:[a-z0-9_\-]*?(?:token|key|secret|auth|password|pass)"
+            r"[a-z0-9_\-]*?)=)([^&\s]{4,})",
+            r"\1<redacted>",
+            text,
+        )
+        return text
+    except Exception:
+        return text
+def _record_issue_safely(detail: str, class_name: str) -> None:
+    """Fire-and-forget call into `switchroom issues record`. Bounded by
+    timeout; never raises. The agent's responsiveness on a hindsight
+    outage depends on this NOT propagating any failure.
+    """
+    import subprocess
+    try:
+        subprocess.run(
+            [
+                "switchroom",
+                "issues",
+                "record",
+                "--severity",
+                "warn",
+                "--source",
+                "hindsight.recall",
+                "--code",
+                "recall_failed",
+                "--summary",
+                f"Hindsight recall failed: {class_name}",
+                "--detail-stdin",
+                "--quiet",
+            ],
+            input=detail,
+            text=True,
+            timeout=5,
+            check=False,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+    except Exception:
+        # Hard swallow. The agent stays responsive even if the issue
+        # sink is wedged, missing, or the CLI isn't on PATH. The stderr
+        # line above is the operator's only signal in that case.
+        pass
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        # Switchroom #1070 (redo per #1085 review).
+        #
+        # recall.py is a DIRECT Claude Code plugin hook (see
+        # vendor/hindsight-memory/hooks/hooks.json). It is NOT wrapped
+        # by bin/run-hook.sh, so the `non-zero exit → record_failure`
+        # pipeline does NOT apply here. Per Claude Code's hook
+        # contract, exit 2 on UserPromptSubmit BLOCKS the user's
+        # prompt and surfaces stderr to them — which would turn a
+        # hindsight outage into "every turn blocked".
+        #
+        # Correct posture: exit 0 with the same safe-empty stdout
+        # shape as the no-memories success path (recall.py line ~660
+        # — `return` with no JSON dumped), so the agent's prompt
+        # assembly proceeds with no memories. Then shell out directly
+        # to `switchroom issues record` so the operator still sees
+        # the failure on their issues card. The subprocess call is
+        # fault-tolerant; if it fails for any reason the agent still
+        # stays responsive.
+        #
+        # Debug mode (HINDSIGHT_DEBUG=1) keeps the legacy posture —
+        # traceback + exit 2 — because live-debugging operators want
+        # maximum signal and have opted in.
+        _msg = str(e)
+        if len(_msg) > 400:
+            _msg = _msg[:400] + "…"
+        _msg = _redact_secrets(_msg)
+        _class = type(e).__name__
+        _detail = f"{_class}: {_msg}"
+        print(
+            f"[Hindsight] Unexpected error in recall: {_detail}",
+            file=sys.stderr,
+        )
+        # Decide on debug-branch behaviour. load_config may itself be
+        # what failed in main() (it's called early), so guard.
+        _is_debug = False
+        try:
+            from lib.config import load_config
+            _is_debug = bool(load_config().get("debug"))
+        except Exception:
+            pass
+        if _is_debug:
+            import traceback
+            traceback.print_exc(file=sys.stderr)
+            # Debug-mode exit 2 is intentional and unchanged —
+            # operators with HINDSIGHT_DEBUG=1 are chasing a broken
+            # recall and want the hook to surface its failure.
+            sys.exit(2)
+        # Non-debug: route the failure to the issue-sink, then exit
+        # 0 with no stdout (agent's prompt assembly treats absent
+        # additionalContext as "no recall this turn").
+        _record_issue_safely(_detail, _class)
+        sys.exit(0)