switchroom 0.12.26 → 0.12.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/agent-scheduler/index.js +80 -80
  2. package/dist/auth-broker/index.js +80 -80
  3. package/dist/cli/drive-write-pretool.mjs +10 -10
  4. package/dist/cli/skill-validate-pretool.mjs +72 -72
  5. package/dist/cli/switchroom.js +359 -357
  6. package/dist/host-control/main.js +99 -99
  7. package/dist/vault/approvals/kernel-server.js +82 -82
  8. package/dist/vault/broker/server.js +83 -83
  9. package/package.json +2 -1
  10. package/telegram-plugin/dist/bridge/bridge.js +112 -112
  11. package/telegram-plugin/dist/gateway/gateway.js +368 -209
  12. package/telegram-plugin/dist/server.js +160 -160
  13. package/telegram-plugin/gateway/gateway.ts +55 -40
  14. package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts +188 -0
  15. package/telegram-plugin/stderr-timestamps.ts +106 -0
  16. package/telegram-plugin/tests/inbound-delivery-machine-dispatch.test.ts +240 -0
  17. package/telegram-plugin/tests/stderr-timestamps.test.ts +113 -0
  18. package/vendor/hindsight-memory/.claude-plugin/plugin.json +8 -0
  19. package/vendor/hindsight-memory/CHANGELOG.md +32 -0
  20. package/vendor/hindsight-memory/LICENSE +21 -0
  21. package/vendor/hindsight-memory/README.md +329 -0
  22. package/vendor/hindsight-memory/hooks/hooks.json +49 -0
  23. package/vendor/hindsight-memory/scripts/drain_pending.py +190 -0
  24. package/vendor/hindsight-memory/scripts/lib/__init__.py +0 -0
  25. package/vendor/hindsight-memory/scripts/lib/bank.py +122 -0
  26. package/vendor/hindsight-memory/scripts/lib/client.py +204 -0
  27. package/vendor/hindsight-memory/scripts/lib/config.py +180 -0
  28. package/vendor/hindsight-memory/scripts/lib/content.py +493 -0
  29. package/vendor/hindsight-memory/scripts/lib/daemon.py +334 -0
  30. package/vendor/hindsight-memory/scripts/lib/directives.py +119 -0
  31. package/vendor/hindsight-memory/scripts/lib/gateway_ipc.py +126 -0
  32. package/vendor/hindsight-memory/scripts/lib/llm.py +146 -0
  33. package/vendor/hindsight-memory/scripts/lib/pending.py +218 -0
  34. package/vendor/hindsight-memory/scripts/lib/state.py +196 -0
  35. package/vendor/hindsight-memory/scripts/recall.py +873 -0
  36. package/vendor/hindsight-memory/scripts/retain.py +286 -0
  37. package/vendor/hindsight-memory/scripts/session_end.py +122 -0
  38. package/vendor/hindsight-memory/scripts/session_start.py +76 -0
  39. package/vendor/hindsight-memory/scripts/setup_hooks.py +115 -0
  40. package/vendor/hindsight-memory/scripts/tests/__init__.py +0 -0
  41. package/vendor/hindsight-memory/scripts/tests/test_directives.py +211 -0
  42. package/vendor/hindsight-memory/scripts/tests/test_gateway_ipc.py +205 -0
  43. package/vendor/hindsight-memory/scripts/tests/test_recall_integration.py +621 -0
  44. package/vendor/hindsight-memory/settings.json +37 -0
  45. package/vendor/hindsight-memory/skills/setup.md +24 -0
  46. package/vendor/hindsight-memory/tests/conftest.py +94 -0
  47. package/vendor/hindsight-memory/tests/test_bank.py +142 -0
  48. package/vendor/hindsight-memory/tests/test_client.py +232 -0
  49. package/vendor/hindsight-memory/tests/test_config.py +128 -0
  50. package/vendor/hindsight-memory/tests/test_content.py +471 -0
  51. package/vendor/hindsight-memory/tests/test_drain_pending.py +192 -0
  52. package/vendor/hindsight-memory/tests/test_hooks.py +808 -0
  53. package/vendor/hindsight-memory/tests/test_manifest.py +14 -0
  54. package/vendor/hindsight-memory/tests/test_pending.py +152 -0
  55. package/vendor/hindsight-memory/tests/test_recall_exit_codes.py +325 -0
  56. package/vendor/hindsight-memory/tests/test_session_end_pending.py +205 -0
  57. package/vendor/hindsight-memory/tests/test_state.py +125 -0
@@ -0,0 +1,873 @@
1
+ #!/usr/bin/env python3
2
+ """Auto-recall hook for UserPromptSubmit.
3
+
4
+ Port of: before_prompt_build handler in Openclaw index.js
5
+ Adapted for Claude Code hooks (ephemeral process, JSON stdin/stdout).
6
+
7
+ Flow:
8
+ 1. Read hook input from stdin (prompt, session_id, transcript_path, cwd)
9
+ 2. (switchroom #424 4.1) Check per-session recall cache; on hit, emit
10
+ cached output and skip the API round-trip.
11
+ 3. Resolve API URL (external, existing local, or auto-start daemon)
12
+ 4. Derive bank ID (static or dynamic from project context)
13
+ 5. Ensure bank mission is set (first use only)
14
+ 6. Compose multi-turn query if recallContextTurns > 1
15
+ 7. Truncate to recallMaxQueryChars
16
+ 8. Call Hindsight recall API
17
+ 9. Format memories and output hookSpecificOutput.additionalContext
18
+ 10. Persist to per-session cache for the next prompt-equal invocation.
19
+ 11. Save last recall to state (for PostCompact re-injection)
20
+
21
+ Exit codes:
22
+ 0 — normal success (incl. graceful in-flight errors like recall API
23
+ timeouts where we still produce a valid hookSpecificOutput).
24
+ 0 — uncaught exception in non-debug mode. Switchroom #1070 (redo,
25
+ after #1085 review): recall.py is registered as a DIRECT Claude
26
+ Code plugin hook (`vendor/hindsight-memory/hooks/hooks.json`),
27
+ NOT wrapped by `bin/run-hook.sh`. Per Claude Code's
28
+ UserPromptSubmit hook contract, exit 2 BLOCKS the user's
29
+ prompt and surfaces stderr to the user — so a hindsight outage
30
+ would block every turn. We instead exit 0 (agent prompt
31
+ assembly proceeds with no memories), emit a bounded stderr
32
+ line for journald, and shell out directly to `switchroom
33
+ issues record` so the #424 issue-sink still captures the
34
+ failure on the operator's issues card. The subprocess call
35
+ is fault-tolerant — if it fails for any reason, we still
36
+ exit 0 with the safe stdout shape.
37
+ 2 — debug mode any error. HINDSIGHT_DEBUG=1 operators are
38
+ live-debugging and want maximum signal — full traceback to
39
+ stderr and non-zero exit. Existing behaviour.
40
+ """
41
+
42
+ import hashlib
43
+ import json
44
+ import os
45
+ import sys
46
+ import time
47
+
48
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
49
+
50
+ from lib.bank import derive_bank_id, ensure_bank_mission
51
+ from lib.client import HindsightClient
52
+ from lib.config import debug_log, load_config
53
+ from lib.content import (
54
+ compose_recall_query,
55
+ format_current_time,
56
+ format_memories,
57
+ truncate_recall_query,
58
+ )
59
+ from lib.daemon import get_api_url
60
+ from lib.directives import fetch_active_directives, format_active_directives_block
61
+ from lib.gateway_ipc import extract_chat_id_from_prompt, update_placeholder
62
+ from lib.state import read_state, write_state
63
+
64
+ LAST_RECALL_STATE = "last_recall.json"
65
+ RECALL_CACHE_STATE = "recall_cache.json"
66
+
67
+ # Switchroom #424 phase 4.1 — per-session recall cache.
68
+ #
69
+ # Caching is opt-in via env var: HINDSIGHT_RECALL_CACHE_TTL_SECS=N. Set N
70
+ # to 0 (or leave unset) to disable. On hit, the script emits the cached
71
+ # `additionalContext` and skips the directive + recall API round-trips
72
+ # entirely.
73
+ #
74
+ # Hits fire when (session_id, prompt, bank_id, extra_banks) match a
75
+ # prior entry within the TTL. Cache entries are scoped to a single
76
+ # session_id — a new session (e.g. agent restart, /reset, /new) starts
77
+ # a fresh cache window even if the env-configured TTL hasn't elapsed.
78
+ #
79
+ # The expected hit rate in production is modest (real users don't
80
+ # typically resubmit identical prompts), but this trims redundant
81
+ # recall traffic on session-resume re-processing and any retry paths.
82
+ CACHE_ENV = "HINDSIGHT_RECALL_CACHE_TTL_SECS"
83
+
84
+ # Maximum number of cache entries kept per session before LRU eviction.
85
+ # 100 is comfortably above the typical session size (~30 inbounds) and
86
+ # well below any concern about state-file size growth.
87
+ CACHE_MAX_ENTRIES = 100
88
+
89
+ # Switchroom #432 phase 4.4 — demote-from-recall tag.
90
+ #
91
+ # A memory tagged with any of these strings stays in the bank (it can
92
+ # still surface via reflect, manual mcp__hindsight__recall, etc.) but is
93
+ # excluded from the auto-recall block injected on every UserPromptSubmit.
94
+ # Useful when an over-broad "world fact" memory keeps drowning out more
95
+ # relevant recent memories.
96
+ DEMOTE_TAG_VARIANTS = (
97
+ "[demote-from-recall]",
98
+ "demote-from-recall",
99
+ "no-recall",
100
+ )
101
+
102
+ # Switchroom #432 phase 4.3 — recall telemetry log.
103
+ #
104
+ # Every recall (cache hit or miss) appends a JSONL record to
105
+ # state/recall_log.jsonl: timestamp, session_id, bank, count, capped flag,
106
+ # memory IDs. The file is bounded by RECALL_LOG_MAX_LINES so it stays
107
+ # under a few MB even on chatty 24/7 agents. View via
108
+ # `switchroom memory recall-log <agent>`.
109
+ RECALL_LOG_FILE = "recall_log.jsonl"
110
+ RECALL_LOG_MAX_LINES = 5000
111
+
112
+
113
+ def _cache_ttl_secs() -> int:
114
+ """Read the recall-cache TTL from env. Returns 0 (disabled) on any
115
+ parse error or sub-zero value — caller treats 0 as "skip cache."""
116
+ raw = os.environ.get(CACHE_ENV, "").strip()
117
+ if not raw:
118
+ return 0
119
+ try:
120
+ n = int(raw)
121
+ return n if n > 0 else 0
122
+ except ValueError:
123
+ return 0
124
+
125
+
126
+ def _cache_key(session_id: str, prompt: str, bank_id: str, extra_banks: list) -> str:
127
+ """Stable hash for cache keying. Session_id is included so a new
128
+ session always misses, regardless of the TTL setting. Extra banks
129
+ are sorted so list-order doesn't change the key."""
130
+ parts = [
131
+ session_id or "",
132
+ prompt or "",
133
+ bank_id or "",
134
+ ",".join(sorted(extra_banks or [])),
135
+ ]
136
+ payload = "\x1f".join(parts)
137
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
138
+
139
+
140
+ def _cache_lookup(key: str, ttl_secs: int) -> str | None:
141
+ """Return the cached `additionalContext` for `key` if present and
142
+ within TTL, else None. Failure-tolerant — any read error returns
143
+ None and the caller falls through to a fresh recall."""
144
+ if ttl_secs <= 0:
145
+ return None
146
+ state = read_state(RECALL_CACHE_STATE, {}) or {}
147
+ entries = state.get("entries") or {}
148
+ entry = entries.get(key)
149
+ if not isinstance(entry, dict):
150
+ return None
151
+ saved_at = entry.get("saved_at")
152
+ context = entry.get("context")
153
+ if not isinstance(saved_at, (int, float)) or not isinstance(context, str):
154
+ return None
155
+ if time.time() - saved_at > ttl_secs:
156
+ return None
157
+ return context
158
+
159
+
160
+ def _cache_store(key: str, context: str) -> None:
161
+ """Write a cache entry. LRU-evicts the oldest entry when exceeding
162
+ CACHE_MAX_ENTRIES so the file stays bounded. Failure-tolerant."""
163
+ state = read_state(RECALL_CACHE_STATE, {}) or {}
164
+ entries = state.get("entries") or {}
165
+ if not isinstance(entries, dict):
166
+ entries = {}
167
+ entries[key] = {
168
+ "context": context,
169
+ "saved_at": time.time(),
170
+ }
171
+ if len(entries) > CACHE_MAX_ENTRIES:
172
+ # LRU evict by saved_at ascending.
173
+ sorted_keys = sorted(
174
+ entries.keys(),
175
+ key=lambda k: entries[k].get("saved_at") if isinstance(entries[k], dict) else 0,
176
+ )
177
+ for k in sorted_keys[: len(entries) - CACHE_MAX_ENTRIES]:
178
+ entries.pop(k, None)
179
+ state["entries"] = entries
180
+ state["updated_at"] = time.time()
181
+ write_state(RECALL_CACHE_STATE, state)
182
+
183
+
184
+ def _emit_cached_context(context: str) -> None:
185
+ """Emit the same hookSpecificOutput shape that the fresh-recall
186
+ path emits, so the cached path is byte-equivalent from claude
187
+ code's perspective."""
188
+ json.dump(
189
+ {
190
+ "hookSpecificOutput": {
191
+ "hookEventName": "UserPromptSubmit",
192
+ "additionalContext": context,
193
+ }
194
+ },
195
+ sys.stdout,
196
+ )
197
+
198
+
199
+ def _is_demoted_memory(memory) -> bool:
200
+ """Return True if the memory has any demote-from-recall tag.
201
+
202
+ Switchroom #432 phase 4.4. Tags are case-sensitive and can be
203
+ written with or without surrounding brackets (`[demote-from-recall]`
204
+ or `demote-from-recall` or `no-recall`). Anything that's not a list
205
+ of strings is treated as untagged.
206
+ """
207
+ tags = memory.get("tags") if isinstance(memory, dict) else None
208
+ if not isinstance(tags, list):
209
+ return False
210
+ for tag in tags:
211
+ if isinstance(tag, str) and tag.strip() in DEMOTE_TAG_VARIANTS:
212
+ return True
213
+ return False
214
+
215
+
216
+ # Switchroom #475 — lexical-overlap relevance gate.
217
+ #
218
+ # Hindsight's HTTP API does not return similarity scores. Without a
219
+ # score the existing `recallMaxMemories` cap acts as a *floor* on
220
+ # low-relevance prompts: weak matches still fill the slot up to N,
221
+ # mis-steering the model. This gate computes Jaccard overlap between
222
+ # the user's query terms and each memory's text terms, and drops
223
+ # memories below a configurable threshold.
224
+ #
225
+ # Threshold default is 0.0 (disabled) so the gate is opt-in initially.
226
+ # Operators tune via `memory.recall.min_overlap` in switchroom.yaml or
227
+ # `HINDSIGHT_RECALL_MIN_OVERLAP=0.15` env. Telemetry surfaces the dropped
228
+ # count via the existing recall_log.jsonl (#432 4.3) under
229
+ # `overlap_dropped`, so the gate's effect is observable per turn from
230
+ # `switchroom memory recall-log <agent>`.
231
+ #
232
+ # A small English stop-word set is removed from both sides before the
233
+ # overlap is computed — common-word coincidence is not a real signal.
234
+ # Token comparison is case-insensitive and strips punctuation. The set
235
+ # is intentionally tight; we'd rather miss a borderline drop than
236
+ # silently throw out a real match.
237
+ _OVERLAP_STOPWORDS = frozenset({
238
+ "a", "an", "and", "any", "are", "as", "at",
239
+ "be", "been", "being", "but", "by",
240
+ "can", "could", "did", "do", "does", "doing",
241
+ "for", "from",
242
+ "had", "has", "have", "having", "how",
243
+ "i", "if", "in", "into", "is", "it", "its",
244
+ "me", "my",
245
+ "of", "on", "one", "or",
246
+ "should", "so",
247
+ "that", "the", "their", "them", "then", "there", "these", "they",
248
+ "this", "to",
249
+ "was", "we", "were", "what", "when", "where", "which", "who",
250
+ "why", "will", "with", "would", "you", "your",
251
+ })
252
+
253
+
254
+ def _overlap_tokens(text) -> set:
255
+ """Tokenize text into a stop-word-stripped, lowercased set of terms.
256
+
257
+ Punctuation, digits, and short fragments (<= 1 char) are dropped.
258
+ Returns an empty set on non-string / empty input.
259
+ """
260
+ if not isinstance(text, str) or not text:
261
+ return set()
262
+ out = set()
263
+ cur = []
264
+ for ch in text:
265
+ if ch.isalpha():
266
+ cur.append(ch.lower())
267
+ else:
268
+ if cur:
269
+ tok = "".join(cur)
270
+ if len(tok) > 1 and tok not in _OVERLAP_STOPWORDS:
271
+ out.add(tok)
272
+ cur = []
273
+ if cur:
274
+ tok = "".join(cur)
275
+ if len(tok) > 1 and tok not in _OVERLAP_STOPWORDS:
276
+ out.add(tok)
277
+ return out
278
+
279
+
280
+ def jaccard_overlap(query: str, memory_text: str) -> float:
281
+ """Jaccard similarity between two texts, after stop-word + punctuation
282
+ stripping. Returns a float in [0.0, 1.0]. Empty/degenerate inputs
283
+ return 0.0 — it's safer to drop than retain when we can't compute.
284
+ """
285
+ a = _overlap_tokens(query)
286
+ b = _overlap_tokens(memory_text)
287
+ if not a or not b:
288
+ return 0.0
289
+ inter = len(a & b)
290
+ union = len(a | b)
291
+ return inter / union if union else 0.0
292
+
293
+
294
+ def _filter_by_overlap(results, query: str, threshold: float):
295
+ """Drop memories whose Jaccard overlap with the query is below the
296
+ threshold. Threshold <= 0 short-circuits to passthrough (no
297
+ iteration cost).
298
+
299
+ Returns (kept_results, dropped_count).
300
+ """
301
+ if threshold <= 0:
302
+ return results, 0
303
+ kept = []
304
+ dropped = 0
305
+ for m in results:
306
+ text = m.get("text", "") if isinstance(m, dict) else ""
307
+ if jaccard_overlap(query, text) >= threshold:
308
+ kept.append(m)
309
+ else:
310
+ dropped += 1
311
+ return kept, dropped
312
+
313
+
314
+ def _write_recall_log(entry: dict) -> None:
315
+ """Append a JSONL line to recall_log.jsonl. Bounded by line count.
316
+
317
+ Switchroom #432 phase 4.3. Failure-tolerant — telemetry must never
318
+ block recall, so any write error is swallowed silently. Unbounded
319
+ growth is prevented by truncating to the last RECALL_LOG_MAX_LINES
320
+ when the file is rolled over (cheap because we read once per
321
+ append; the alternative — keeping a separate index — is more code
322
+ for a feature that runs at most once per turn).
323
+ """
324
+ try:
325
+ plugin_data = os.environ.get("CLAUDE_PLUGIN_DATA", "")
326
+ if not plugin_data:
327
+ return
328
+ log_dir = os.path.join(plugin_data, "state")
329
+ os.makedirs(log_dir, exist_ok=True)
330
+ log_path = os.path.join(log_dir, RECALL_LOG_FILE)
331
+ line = json.dumps(entry, separators=(",", ":")) + "\n"
332
+ # Append-then-trim. For typical operation the file is well
333
+ # under the cap and the trim path is a no-op.
334
+ with open(log_path, "a", encoding="utf-8") as f:
335
+ f.write(line)
336
+ # Cheap rolling trim every ~50 writes (estimated by file size
337
+ # vs. 200 bytes/line average) to amortize the read cost.
338
+ try:
339
+ size = os.path.getsize(log_path)
340
+ except OSError:
341
+ return
342
+ if size > RECALL_LOG_MAX_LINES * 250:
343
+ try:
344
+ with open(log_path, "r", encoding="utf-8") as f:
345
+ lines = f.readlines()
346
+ if len(lines) > RECALL_LOG_MAX_LINES:
347
+ keep = lines[-RECALL_LOG_MAX_LINES:]
348
+ with open(log_path, "w", encoding="utf-8") as f:
349
+ f.writelines(keep)
350
+ except OSError:
351
+ pass
352
+ except Exception:
353
+ # Silently swallow — telemetry is never load-bearing.
354
+ pass
355
+
356
+
357
+ def read_transcript_messages(transcript_path: str) -> list:
358
+ """Read messages from a JSONL transcript file for multi-turn context.
359
+
360
+ Claude Code transcript format nests messages:
361
+ {type: "user", message: {role: "user", content: "..."}, uuid: "...", ...}
362
+ Also supports flat format for testing:
363
+ {role: "user", content: "..."}
364
+ """
365
+ if not transcript_path or not os.path.isfile(transcript_path):
366
+ return []
367
+ messages = []
368
+ try:
369
+ with open(transcript_path, encoding="utf-8") as f:
370
+ for line in f:
371
+ line = line.strip()
372
+ if not line:
373
+ continue
374
+ try:
375
+ entry = json.loads(line)
376
+ # Claude Code nested format: {type: "user", message: {role, content}}
377
+ if entry.get("type") in ("user", "assistant"):
378
+ msg = entry.get("message", {})
379
+ if isinstance(msg, dict) and msg.get("role"):
380
+ messages.append(msg)
381
+ # Flat format (testing / future compatibility)
382
+ elif "role" in entry and "content" in entry:
383
+ messages.append(entry)
384
+ except json.JSONDecodeError:
385
+ continue
386
+ except OSError:
387
+ pass
388
+ return messages
389
+
390
+
391
+ def main():
392
+ config = load_config()
393
+
394
+ if not config.get("autoRecall"):
395
+ debug_log(config, "Auto-recall disabled, exiting")
396
+ return
397
+
398
+ # Read hook input from stdin
399
+ try:
400
+ hook_input = json.load(sys.stdin)
401
+ except (json.JSONDecodeError, EOFError):
402
+ print("[Hindsight] Failed to read hook input", file=sys.stderr)
403
+ return
404
+
405
+ debug_log(config, f"Hook input keys: {list(hook_input.keys())}")
406
+
407
+ # Extract user query — hooks-reference.md documents "prompt", but some
408
+ # Claude Code sources reference "user_prompt". Accept both defensively.
409
+ prompt = (hook_input.get("prompt") or hook_input.get("user_prompt") or "").strip()
410
+ if not prompt or len(prompt) < 5:
411
+ debug_log(config, "Prompt too short for recall, skipping")
412
+ return
413
+
414
+ # Switchroom-local: skip recall on conversational acks.
415
+ #
416
+ # The 5-char short-circuit catches `ok`/`yes`/`no`/`ty` but passes
417
+ # longer acks like `thanks!`, `got it`, `see you tomorrow` that
418
+ # don't benefit from recall. Recall costs ~1-2s (low budget) to
419
+ # ~5s (mid budget) per turn — wasted on "I acknowledge" replies
420
+ # where the model is going to produce a one-liner regardless of
421
+ # what came back.
422
+ #
423
+ # Strip the optional `<channel ...>` wrapper that telegram-plugin
424
+ # prepends on inbound, then trim common trailing punctuation/emoji.
425
+ # Conservative match — we'd rather pay the recall cost on a
426
+ # borderline case than miss memory on a real query.
427
+ _stripped = prompt
428
+ _channel_close = _stripped.find(">")
429
+ if _stripped.startswith("<channel") and _channel_close != -1:
430
+ _stripped = _stripped[_channel_close + 1:]
431
+ _stripped = _stripped.replace("</channel>", "").strip()
432
+ _ack_form = _stripped.lower().strip(" \t\n\r.,!?…👍👌✅🆗🙏")
433
+ ACK_PHRASES = frozenset({
434
+ "ok", "okay", "k", "kk", "yes", "yep", "yup", "yeah", "y",
435
+ "no", "nope", "nah", "n",
436
+ "ty", "thanks", "thank you", "thx", "cheers",
437
+ "got it", "gotcha", "understood", "noted", "roger",
438
+ "sure", "sure thing", "alright", "all right",
439
+ "see you", "see ya", "later", "bye", "good night", "goodnight",
440
+ "great", "nice", "cool", "perfect",
441
+ "👍", "👌", "✅", "🆗", "🙏",
442
+ })
443
+ if _ack_form in ACK_PHRASES:
444
+ debug_log(config, f"Prompt is ack-only ({_ack_form!r}), skipping recall")
445
+ return
446
+
447
+ session_id = hook_input.get("session_id") or ""
448
+
449
+ # Switchroom #303 — push a "📚 recalling memories" status to the
450
+ # user's pre-allocated Telegram draft so the gap between inbound and
451
+ # the model's first content token isn't 25 s of dead air. No
452
+ # trailing ellipsis: sendMessageDraft already animates a "typing"
453
+ # indicator on the user's client, so a `…` is redundant noise.
454
+ # Best-effort and silent on every failure path; the gateway no-ops
455
+ # the IPC message when there's no draft for this chat (forum topic,
456
+ # fresh session before pre-alloc lands, etc.).
457
+ placeholder_chat_id = extract_chat_id_from_prompt(prompt)
458
+ if placeholder_chat_id:
459
+ update_placeholder(placeholder_chat_id, "📚 recalling memories")
460
+
461
+ # Resolve API URL (handles all three connection modes)
462
+ def _dbg(*a):
463
+ debug_log(config, *a)
464
+
465
+ try:
466
+ api_url = get_api_url(config, debug_fn=_dbg, allow_daemon_start=False)
467
+ except RuntimeError as e:
468
+ print(f"[Hindsight] {e}", file=sys.stderr)
469
+ return
470
+
471
+ api_token = config.get("hindsightApiToken")
472
+ try:
473
+ client = HindsightClient(api_url, api_token)
474
+ except ValueError as e:
475
+ print(f"[Hindsight] Invalid API URL: {e}", file=sys.stderr)
476
+ return
477
+
478
+ # Derive bank ID (static or dynamic from project context)
479
+ bank_id = derive_bank_id(hook_input, config)
480
+ additional_banks = config.get("recallAdditionalBanks", []) or []
481
+
482
+ # Switchroom #424 phase 4.1 — cache check BEFORE any HTTP traffic.
483
+ # Whole-session-scoped, opt-in via HINDSIGHT_RECALL_CACHE_TTL_SECS.
484
+ cache_ttl = _cache_ttl_secs()
485
+ cache_key = (
486
+ _cache_key(session_id, prompt, bank_id, additional_banks)
487
+ if cache_ttl > 0
488
+ else ""
489
+ )
490
+ if cache_ttl > 0:
491
+ try:
492
+ cached_context = _cache_lookup(cache_key, cache_ttl)
493
+ except Exception as e:
494
+ debug_log(config, f"Recall cache read failed (non-fatal): {e}")
495
+ cached_context = None
496
+ if cached_context is not None:
497
+ debug_log(config, f"Recall cache HIT (key={cache_key[:12]}…) — skipping API call")
498
+ _emit_cached_context(cached_context)
499
+ _write_recall_log({
500
+ "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
501
+ "session_id": (session_id or "")[:32],
502
+ "bank_id": bank_id,
503
+ "additional_banks": additional_banks,
504
+ "query_chars": len(prompt),
505
+ "result_count": None, # not known on cache hit
506
+ "directive_count": None,
507
+ "demoted_count": 0,
508
+ "capped": False,
509
+ "cache_hit": True,
510
+ })
511
+ return
512
+ debug_log(config, f"Recall cache MISS (key={cache_key[:12]}…)")
513
+
514
+ # Set bank mission on first use
515
+ ensure_bank_mission(client, bank_id, config, debug_fn=_dbg)
516
+
517
+ # Multi-turn query composition
518
+ recall_context_turns = config.get("recallContextTurns", 1)
519
+ recall_max_query_chars = config.get("recallMaxQueryChars", 800)
520
+ recall_roles = config.get("recallRoles", ["user", "assistant"])
521
+
522
+ if recall_context_turns > 1:
523
+ transcript_path = hook_input.get("transcript_path", "")
524
+ messages = read_transcript_messages(transcript_path)
525
+ debug_log(config, f"Multi-turn context: {recall_context_turns} turns, {len(messages)} messages from transcript")
526
+ query = compose_recall_query(prompt, messages, recall_context_turns, recall_roles)
527
+ else:
528
+ query = prompt
529
+
530
+ query = truncate_recall_query(query, prompt, recall_max_query_chars)
531
+
532
+ # Final defensive cap (mirrors Openclaw)
533
+ if len(query) > recall_max_query_chars:
534
+ query = query[:recall_max_query_chars]
535
+
536
+ debug_log(config, f"Recalling from bank '{bank_id}', query length: {len(query)}")
537
+
538
+ # Fetch active directives FIRST (independent of recall — even if recall
539
+ # finds no memories, an agent with active directives still needs them
540
+ # surfaced every turn). Workaround for upstream bug
541
+ # vectorize-io/hindsight#1269 (tagged directives silently dropped from
542
+ # `reflect`); `list_directives` itself works correctly upstream, so this
543
+ # is a pure client-side surface. fetch_active_directives is failure-safe
544
+ # and returns [] on any error.
545
+ directives = fetch_active_directives(client, bank_id)
546
+ directives_block = format_active_directives_block(directives) if directives else None
547
+ if directives_block:
548
+ debug_log(config, f"Injecting {len(directives)} active directives")
549
+
550
+ # Call Hindsight recall API
551
+ results = []
552
+ try:
553
+ response = client.recall(
554
+ bank_id=bank_id,
555
+ query=query,
556
+ max_tokens=config.get("recallMaxTokens", 1024),
557
+ budget=config.get("recallBudget", "mid"),
558
+ types=config.get("recallTypes"),
559
+ timeout=10,
560
+ )
561
+ results = response.get("results", [])
562
+ except Exception as e:
563
+ print(f"[Hindsight] Recall failed: {e}", file=sys.stderr)
564
+ # Fall through — we still want to emit the directives block if we
565
+ # have one, so a recall API failure doesn't blind the agent to
566
+ # its own active directives.
567
+
568
+ # Also recall from any additional banks (e.g. shared user profile bank).
569
+ # `additional_banks` was already extracted above the cache check so the
570
+ # cache key reflects every bank queried; reuse that local instead of
571
+ # re-reading config.
572
+ for extra_bank_id in additional_banks:
573
+ try:
574
+ extra_response = client.recall(
575
+ bank_id=extra_bank_id,
576
+ query=query,
577
+ max_tokens=config.get("recallMaxTokens", 1024),
578
+ budget=config.get("recallBudget", "mid"),
579
+ types=config.get("recallTypes"),
580
+ timeout=10,
581
+ )
582
+ extra_results = extra_response.get("results", [])
583
+ if extra_results:
584
+ debug_log(config, f"Got {len(extra_results)} memories from additional bank '{extra_bank_id}'")
585
+ results = results + extra_results
586
+ except Exception as e:
587
+ debug_log(config, f"Recall from additional bank '{extra_bank_id}' failed: {e}")
588
+
589
+ # Switchroom #432 phase 4.4 — drop demote-tagged memories before
590
+ # the cap. Filtering early means the cap kicks in over the
591
+ # non-demoted set (i.e. the user gets up to N "real" hits,
592
+ # not N including ones they explicitly demoted).
593
+ pre_filter_count = len(results)
594
+ results = [m for m in results if not _is_demoted_memory(m)]
595
+ demoted_count = pre_filter_count - len(results)
596
+ if demoted_count > 0:
597
+ debug_log(config, f"Filtered {demoted_count} demote-from-recall memories")
598
+
599
+ # Switchroom #475 — lexical-overlap relevance gate. Drops memories
600
+ # whose Jaccard overlap with the query is below
601
+ # `recallMinOverlap` (default 0.0 = disabled). Runs after the
602
+ # demote filter so the threshold sees the operator-curated set.
603
+ overlap_threshold = config.get("recallMinOverlap", 0.0)
604
+ if isinstance(overlap_threshold, (int, float)) and overlap_threshold > 0:
605
+ pre_overlap_count = len(results)
606
+ results, overlap_dropped = _filter_by_overlap(
607
+ results, query, float(overlap_threshold)
608
+ )
609
+ if overlap_dropped > 0:
610
+ debug_log(
611
+ config,
612
+ f"Overlap gate dropped {overlap_dropped}/{pre_overlap_count} "
613
+ f"memories below threshold {overlap_threshold}",
614
+ )
615
+ else:
616
+ overlap_dropped = 0
617
+
618
+ # Switchroom-local: client-side count cap. Plugin v0.4.0 has no
619
+ # `recallTopK` in the Claude Code integration (Openclaw-only), and a
620
+ # token budget alone doesn't bound count — a single long memory can
621
+ # blow past intended caps, while many short ones can flood the prompt.
622
+ # Slice the combined results from primary + additional banks before
623
+ # formatting. <= 0 disables the cap.
624
+ recall_max_memories = config.get("recallMaxMemories", 0)
625
+ pre_cap_count = len(results)
626
+ capped = False
627
+ if (
628
+ isinstance(recall_max_memories, int)
629
+ and recall_max_memories > 0
630
+ and len(results) > recall_max_memories
631
+ ):
632
+ debug_log(
633
+ config,
634
+ f"Capping {len(results)} memories to {recall_max_memories} "
635
+ f"(set HINDSIGHT_RECALL_MAX_MEMORIES=0 to disable)",
636
+ )
637
+ results = results[:recall_max_memories]
638
+ capped = True
639
+
640
+ memories_block = None
641
+ if results:
642
+ debug_log(config, f"Injecting {len(results)} memories")
643
+ # Format context message — exact match of Openclaw's format
644
+ memories_formatted = format_memories(results)
645
+ preamble = config.get("recallPromptPreamble", "")
646
+ current_time = format_current_time()
647
+ memories_block = (
648
+ f"<hindsight_memories>\n"
649
+ f"{preamble}\n"
650
+ f"Current time - {current_time}\n\n"
651
+ f"{memories_formatted}\n"
652
+ f"</hindsight_memories>"
653
+ )
654
+ else:
655
+ debug_log(config, "No memories found")
656
+
657
+ # Switchroom #303 — recall is done, model is about to start the long
658
+ # TTFT. Update the placeholder so the user doesn't keep staring at
659
+ # `📚 recalling memories` for the next 15–20 s of opus thinking.
660
+ # No trailing ellipsis — sendMessageDraft already animates the
661
+ # "typing" indicator, the `…` is redundant.
662
+ if placeholder_chat_id:
663
+ update_placeholder(placeholder_chat_id, "💭 thinking")
664
+
665
+ # If neither block has content, there's nothing to inject — exit
666
+ # silently to avoid emitting an empty hookSpecificOutput.
667
+ if not directives_block and not memories_block:
668
+ return
669
+
670
+ # Compose final context. Directives block goes ABOVE memories so the
671
+ # agent reads HARD RULES before low-signal recall traces.
672
+ parts = []
673
+ if directives_block:
674
+ parts.append(directives_block)
675
+ if memories_block:
676
+ parts.append(memories_block)
677
+ context_message = "\n\n".join(parts)
678
+
679
+ # Save last recall to state for diagnostics
680
+ write_state(
681
+ LAST_RECALL_STATE,
682
+ {
683
+ "context": context_message,
684
+ "saved_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
685
+ "bank_id": bank_id,
686
+ "result_count": len(results),
687
+ "directive_count": len(directives),
688
+ },
689
+ )
690
+
691
+ # Switchroom #424 phase 4.1 — populate the cache for the next hit.
692
+ # Failure-tolerant: a write error here doesn't mask the recall result.
693
+ if cache_ttl > 0 and cache_key:
694
+ try:
695
+ _cache_store(cache_key, context_message)
696
+ except Exception as e:
697
+ debug_log(config, f"Recall cache write failed (non-fatal): {e}")
698
+
699
+ # Switchroom #432 phase 4.3 — telemetry log. memory IDs (when
700
+ # available) let an operator confirm what was injected on a given
701
+ # turn. Failure-tolerant.
702
+ _write_recall_log({
703
+ "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
704
+ "session_id": (session_id or "")[:32],
705
+ "bank_id": bank_id,
706
+ "additional_banks": additional_banks,
707
+ "query_chars": len(query),
708
+ "result_count": len(results),
709
+ "directive_count": len(directives),
710
+ "demoted_count": demoted_count,
711
+ "overlap_dropped": overlap_dropped,
712
+ "capped": capped,
713
+ "pre_cap_count": pre_cap_count,
714
+ "memory_ids": [
715
+ m.get("id") for m in results
716
+ if isinstance(m, dict) and m.get("id")
717
+ ],
718
+ "cache_hit": False,
719
+ })
720
+
721
+ # Output JSON for Claude Code hook system
722
+ output = {
723
+ "hookSpecificOutput": {
724
+ "hookEventName": "UserPromptSubmit",
725
+ "additionalContext": context_message,
726
+ }
727
+ }
728
+ json.dump(output, sys.stdout)
729
+
730
+
731
+ def _redact_secrets(text: str) -> str:
732
+ """Best-effort inline scrub for the obvious leak shapes that show
733
+ up in HTTP error messages (`lib/client.py:73` formats the URL into
734
+ the RuntimeError, and the URL may include query-string credentials).
735
+
736
+ We don't have a python-callable bridge to the TS `secret-detect`
737
+ module, so this is a small regex pass covering:
738
+ * Authorization: Bearer <token>
739
+ * ?key=val and &key=val for keys matching token|key|secret|auth
740
+ * x-api-key: <value> header shape
741
+
742
+ Bounded by `re` (anchored, no catastrophic alternation) so this is
743
+ safe to run on a 400-char input. Returns `text` unchanged if no
744
+ matches; on regex-engine error, falls back to returning the raw
745
+ text — redaction is best-effort, not a security boundary, and the
746
+ server-side detail handler (#1069) re-scans before persistence.
747
+ """
748
+ import re
749
+
750
+ try:
751
+ # Bearer tokens — case-insensitive
752
+ text = re.sub(
753
+ r"(?i)(bearer\s+)[A-Za-z0-9._\-]{8,}",
754
+ r"\1<redacted>",
755
+ text,
756
+ )
757
+ # x-api-key / api-key header values
758
+ text = re.sub(
759
+ r"(?i)(x?-?api[-_]?key\s*[:=]\s*)([A-Za-z0-9._\-]{8,})",
760
+ r"\1<redacted>",
761
+ text,
762
+ )
763
+ # Query-string credentials: ?token=…, &api_key=…, ?secret=…
764
+ text = re.sub(
765
+ r"(?i)([?&](?:[a-z0-9_\-]*?(?:token|key|secret|auth|password|pass)"
766
+ r"[a-z0-9_\-]*?)=)([^&\s]{4,})",
767
+ r"\1<redacted>",
768
+ text,
769
+ )
770
+ return text
771
+ except Exception:
772
+ return text
773
+
774
+
775
+ def _record_issue_safely(detail: str, class_name: str) -> None:
776
+ """Fire-and-forget call into `switchroom issues record`. Bounded by
777
+ timeout; never raises. The agent's responsiveness on a hindsight
778
+ outage depends on this NOT propagating any failure.
779
+ """
780
+ import subprocess
781
+
782
+ try:
783
+ subprocess.run(
784
+ [
785
+ "switchroom",
786
+ "issues",
787
+ "record",
788
+ "--severity",
789
+ "warn",
790
+ "--source",
791
+ "hindsight.recall",
792
+ "--code",
793
+ "recall_failed",
794
+ "--summary",
795
+ f"Hindsight recall failed: {class_name}",
796
+ "--detail-stdin",
797
+ "--quiet",
798
+ ],
799
+ input=detail,
800
+ text=True,
801
+ timeout=5,
802
+ check=False,
803
+ stdout=subprocess.DEVNULL,
804
+ stderr=subprocess.DEVNULL,
805
+ )
806
+ except Exception:
807
+ # Hard swallow. The agent stays responsive even if the issue
808
+ # sink is wedged, missing, or the CLI isn't on PATH. The stderr
809
+ # line above is the operator's only signal in that case.
810
+ pass
811
+
812
+
813
+ if __name__ == "__main__":
814
+ try:
815
+ main()
816
+ except Exception as e:
817
+ # Switchroom #1070 (redo per #1085 review).
818
+ #
819
+ # recall.py is a DIRECT Claude Code plugin hook (see
820
+ # vendor/hindsight-memory/hooks/hooks.json). It is NOT wrapped
821
+ # by bin/run-hook.sh, so the `non-zero exit → record_failure`
822
+ # pipeline does NOT apply here. Per Claude Code's hook
823
+ # contract, exit 2 on UserPromptSubmit BLOCKS the user's
824
+ # prompt and surfaces stderr to them — which would turn a
825
+ # hindsight outage into "every turn blocked".
826
+ #
827
+ # Correct posture: exit 0 with the same safe-empty stdout
828
+ # shape as the no-memories success path (recall.py line ~660
829
+ # — `return` with no JSON dumped), so the agent's prompt
830
+ # assembly proceeds with no memories. Then shell out directly
831
+ # to `switchroom issues record` so the operator still sees
832
+ # the failure on their issues card. The subprocess call is
833
+ # fault-tolerant; if it fails for any reason the agent still
834
+ # stays responsive.
835
+ #
836
+ # Debug mode (HINDSIGHT_DEBUG=1) keeps the legacy posture —
837
+ # traceback + exit 2 — because live-debugging operators want
838
+ # maximum signal and have opted in.
839
+ _msg = str(e)
840
+ if len(_msg) > 400:
841
+ _msg = _msg[:400] + "…"
842
+ _msg = _redact_secrets(_msg)
843
+ _class = type(e).__name__
844
+ _detail = f"{_class}: {_msg}"
845
+ print(
846
+ f"[Hindsight] Unexpected error in recall: {_detail}",
847
+ file=sys.stderr,
848
+ )
849
+
850
+ # Decide on debug-branch behaviour. load_config may itself be
851
+ # what failed in main() (it's called early), so guard.
852
+ _is_debug = False
853
+ try:
854
+ from lib.config import load_config
855
+
856
+ _is_debug = bool(load_config().get("debug"))
857
+ except Exception:
858
+ pass
859
+
860
+ if _is_debug:
861
+ import traceback
862
+
863
+ traceback.print_exc(file=sys.stderr)
864
+ # Debug-mode exit 2 is intentional and unchanged —
865
+ # operators with HINDSIGHT_DEBUG=1 are chasing a broken
866
+ # recall and want the hook to surface its failure.
867
+ sys.exit(2)
868
+
869
+ # Non-debug: route the failure to the issue-sink, then exit
870
+ # 0 with no stdout (agent's prompt assembly treats absent
871
+ # additionalContext as "no recall this turn").
872
+ _record_issue_safely(_detail, _class)
873
+ sys.exit(0)