switchroom 0.12.27 → 0.12.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/cli/switchroom.js +4 -2
  2. package/package.json +2 -1
  3. package/telegram-plugin/dist/gateway/gateway.js +49 -5
  4. package/telegram-plugin/gateway/gateway.ts +5 -0
  5. package/telegram-plugin/stderr-timestamps.ts +106 -0
  6. package/telegram-plugin/tests/stderr-timestamps.test.ts +113 -0
  7. package/vendor/hindsight-memory/.claude-plugin/plugin.json +8 -0
  8. package/vendor/hindsight-memory/CHANGELOG.md +32 -0
  9. package/vendor/hindsight-memory/LICENSE +21 -0
  10. package/vendor/hindsight-memory/README.md +329 -0
  11. package/vendor/hindsight-memory/hooks/hooks.json +49 -0
  12. package/vendor/hindsight-memory/scripts/drain_pending.py +190 -0
  13. package/vendor/hindsight-memory/scripts/lib/__init__.py +0 -0
  14. package/vendor/hindsight-memory/scripts/lib/bank.py +122 -0
  15. package/vendor/hindsight-memory/scripts/lib/client.py +204 -0
  16. package/vendor/hindsight-memory/scripts/lib/config.py +180 -0
  17. package/vendor/hindsight-memory/scripts/lib/content.py +493 -0
  18. package/vendor/hindsight-memory/scripts/lib/daemon.py +334 -0
  19. package/vendor/hindsight-memory/scripts/lib/directives.py +119 -0
  20. package/vendor/hindsight-memory/scripts/lib/gateway_ipc.py +126 -0
  21. package/vendor/hindsight-memory/scripts/lib/llm.py +146 -0
  22. package/vendor/hindsight-memory/scripts/lib/pending.py +218 -0
  23. package/vendor/hindsight-memory/scripts/lib/state.py +196 -0
  24. package/vendor/hindsight-memory/scripts/recall.py +873 -0
  25. package/vendor/hindsight-memory/scripts/retain.py +286 -0
  26. package/vendor/hindsight-memory/scripts/session_end.py +122 -0
  27. package/vendor/hindsight-memory/scripts/session_start.py +76 -0
  28. package/vendor/hindsight-memory/scripts/setup_hooks.py +115 -0
  29. package/vendor/hindsight-memory/scripts/tests/__init__.py +0 -0
  30. package/vendor/hindsight-memory/scripts/tests/test_directives.py +211 -0
  31. package/vendor/hindsight-memory/scripts/tests/test_gateway_ipc.py +205 -0
  32. package/vendor/hindsight-memory/scripts/tests/test_recall_integration.py +621 -0
  33. package/vendor/hindsight-memory/settings.json +37 -0
  34. package/vendor/hindsight-memory/skills/setup.md +24 -0
  35. package/vendor/hindsight-memory/tests/conftest.py +94 -0
  36. package/vendor/hindsight-memory/tests/test_bank.py +142 -0
  37. package/vendor/hindsight-memory/tests/test_client.py +232 -0
  38. package/vendor/hindsight-memory/tests/test_config.py +128 -0
  39. package/vendor/hindsight-memory/tests/test_content.py +471 -0
  40. package/vendor/hindsight-memory/tests/test_drain_pending.py +192 -0
  41. package/vendor/hindsight-memory/tests/test_hooks.py +808 -0
  42. package/vendor/hindsight-memory/tests/test_manifest.py +14 -0
  43. package/vendor/hindsight-memory/tests/test_pending.py +152 -0
  44. package/vendor/hindsight-memory/tests/test_recall_exit_codes.py +325 -0
  45. package/vendor/hindsight-memory/tests/test_session_end_pending.py +205 -0
  46. package/vendor/hindsight-memory/tests/test_state.py +125 -0
@@ -0,0 +1,621 @@
1
+ """Integration tests for recall.py — block composition + ordering.
2
+
3
+ Exercises the actual main() flow with stubbed dependencies so we can
4
+ verify:
5
+ - The <active_directives> block is emitted ABOVE <hindsight_memories>
6
+ - Empty bank (no directives, no memories) → no output at all
7
+ - Active directives present but no memories → directives block alone
8
+ - Active memories present but no directives → unchanged legacy behavior
9
+ - Recall API failure with directives present → directives still emitted
10
+ (so a recall outage doesn't blind the agent to its own HARD RULES)
11
+
12
+ Stdlib-only (unittest + mock).
13
+ """
14
+
15
+ import io
16
+ import json
17
+ import os
18
+ import sys
19
+ import unittest
20
+ from unittest.mock import patch
21
+
22
+ SCRIPTS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
23
+ if SCRIPTS_DIR not in sys.path:
24
+ sys.path.insert(0, SCRIPTS_DIR)
25
+
26
+ import recall # noqa: E402
27
+
28
+
29
+ def _directive(name, content, priority=5):
30
+ return {
31
+ "id": f"id-{name}",
32
+ "bank_id": "test-bank",
33
+ "name": name,
34
+ "content": content,
35
+ "priority": priority,
36
+ "is_active": True,
37
+ "tags": [],
38
+ }
39
+
40
+
41
+ def _memory(text, mem_type="fact", mentioned_at="2026-01-01", mem_id=None, tags=None):
42
+ out = {"text": text, "type": mem_type, "mentioned_at": mentioned_at}
43
+ if mem_id is not None:
44
+ out["id"] = mem_id
45
+ if tags is not None:
46
+ out["tags"] = tags
47
+ return out
48
+
49
+
50
+ class _FakeClient:
51
+ """Stand-in for HindsightClient with configurable responses."""
52
+
53
+ def __init__(self, directives=None, memories=None, recall_exc=None, list_exc=None):
54
+ self._directives = directives if directives is not None else []
55
+ self._memories = memories if memories is not None else []
56
+ self._recall_exc = recall_exc
57
+ self._list_exc = list_exc
58
+
59
+ def list_directives(self, bank_id, active_only=True, timeout=2):
60
+ if self._list_exc is not None:
61
+ raise self._list_exc
62
+ return {"items": list(self._directives)}
63
+
64
+ def recall(self, bank_id, query, max_tokens=1024, budget="mid", types=None, timeout=10):
65
+ if self._recall_exc is not None:
66
+ raise self._recall_exc
67
+ return {"results": list(self._memories)}
68
+
69
+
70
+ def _run_main_with(client, prompt="What is the meaning of life?", config_extra=None):
71
+ """Invoke recall.main with a fake client and capture stdout JSON.
72
+
73
+ Returns (additional_context_string_or_None, raw_stdout).
74
+
75
+ `config_extra` is merged on top of the baseline config so individual
76
+ tests can override knobs like `recallMaxMemories` or
77
+ `recallAdditionalBanks` without growing the helper signature for
78
+ every new field.
79
+ """
80
+ hook_input = {
81
+ "prompt": prompt,
82
+ "session_id": "test-session",
83
+ "transcript_path": "",
84
+ "cwd": "/tmp",
85
+ }
86
+ config = {
87
+ "autoRecall": True,
88
+ "bankId": "test-bank",
89
+ "recallMaxTokens": 1024,
90
+ "recallBudget": "mid",
91
+ "recallContextTurns": 1,
92
+ "recallMaxQueryChars": 800,
93
+ "recallPromptPreamble": "",
94
+ }
95
+ if config_extra:
96
+ config.update(config_extra)
97
+
98
+ stdout = io.StringIO()
99
+ stderr = io.StringIO()
100
+ with patch.object(recall, "load_config", return_value=config), patch.object(
101
+ recall, "get_api_url", return_value="http://localhost:18888"
102
+ ), patch.object(recall, "HindsightClient", return_value=client), patch.object(
103
+ recall, "ensure_bank_mission", return_value=None
104
+ ), patch.object(recall, "write_state", return_value=None), patch(
105
+ "sys.stdin", new=io.StringIO(json.dumps(hook_input))
106
+ ), patch("sys.stdout", new=stdout), patch("sys.stderr", new=stderr):
107
+ recall.main()
108
+
109
+ raw = stdout.getvalue()
110
+ if not raw.strip():
111
+ return None, raw
112
+ parsed = json.loads(raw)
113
+ return parsed["hookSpecificOutput"]["additionalContext"], raw
114
+
115
+
116
+ class RecallIntegrationTests(unittest.TestCase):
117
+ def test_directives_block_appears_above_memories_block(self):
118
+ client = _FakeClient(
119
+ directives=[_directive("trailer", "End every response with: [VERIFIED]", priority=10)],
120
+ memories=[_memory("user prefers concise answers")],
121
+ )
122
+ ctx, _ = _run_main_with(client)
123
+ self.assertIsNotNone(ctx)
124
+ d_idx = ctx.find("<active_directives>")
125
+ m_idx = ctx.find("<hindsight_memories>")
126
+ self.assertGreaterEqual(d_idx, 0, "active_directives block missing")
127
+ self.assertGreaterEqual(m_idx, 0, "hindsight_memories block missing")
128
+ self.assertLess(d_idx, m_idx, "directives must come before memories")
129
+
130
+ def test_empty_bank_emits_no_output(self):
131
+ client = _FakeClient(directives=[], memories=[])
132
+ ctx, raw = _run_main_with(client)
133
+ self.assertIsNone(ctx)
134
+ self.assertEqual(raw.strip(), "")
135
+
136
+ def test_directives_only_emits_directives_block_alone(self):
137
+ client = _FakeClient(
138
+ directives=[_directive("trailer", "End every response with: [VERIFIED]", priority=10)],
139
+ memories=[],
140
+ )
141
+ ctx, _ = _run_main_with(client)
142
+ self.assertIsNotNone(ctx)
143
+ self.assertIn("<active_directives>", ctx)
144
+ self.assertNotIn("<hindsight_memories>", ctx)
145
+ self.assertIn("End every response with: [VERIFIED]", ctx)
146
+
147
+ def test_memories_only_unchanged_legacy_behavior(self):
148
+ # No directives → block omitted entirely (not an empty wrapper).
149
+ client = _FakeClient(directives=[], memories=[_memory("an old preference")])
150
+ ctx, _ = _run_main_with(client)
151
+ self.assertIsNotNone(ctx)
152
+ self.assertNotIn("<active_directives>", ctx)
153
+ self.assertIn("<hindsight_memories>", ctx)
154
+ self.assertIn("an old preference", ctx)
155
+
156
+ def test_recall_failure_with_directives_still_emits_directives(self):
157
+ # A recall API outage must NOT blind the agent to its HARD RULES.
158
+ client = _FakeClient(
159
+ directives=[_directive("trailer", "End every response with: [VERIFIED]", priority=10)],
160
+ memories=[],
161
+ recall_exc=RuntimeError("HTTP 503"),
162
+ )
163
+ ctx, _ = _run_main_with(client)
164
+ self.assertIsNotNone(ctx)
165
+ self.assertIn("<active_directives>", ctx)
166
+ self.assertNotIn("<hindsight_memories>", ctx)
167
+
168
+ def test_directives_failure_does_not_kill_recall(self):
169
+ # Symmetric: a list_directives failure must not block the recall
170
+ # block from being emitted.
171
+ client = _FakeClient(
172
+ directives=[],
173
+ memories=[_memory("legacy memory still useful")],
174
+ list_exc=RuntimeError("HTTP 500"),
175
+ )
176
+ ctx, _ = _run_main_with(client)
177
+ self.assertIsNotNone(ctx)
178
+ self.assertIn("<hindsight_memories>", ctx)
179
+ self.assertNotIn("<active_directives>", ctx)
180
+
181
+ def test_blocks_separated_by_blank_line(self):
182
+ client = _FakeClient(
183
+ directives=[_directive("rule", "do the thing", priority=5)],
184
+ memories=[_memory("a memory")],
185
+ )
186
+ ctx, _ = _run_main_with(client)
187
+ self.assertIn("</active_directives>\n\n<hindsight_memories>", ctx)
188
+
189
+
190
+ class RecallMaxMemoriesCapTests(unittest.TestCase):
191
+ """Tests for the switchroom-local recallMaxMemories count cap.
192
+
193
+ The cap is applied client-side after the (primary + additional banks)
194
+ results are concatenated and BEFORE formatting, so it bounds the
195
+ final injected memory count regardless of token budget. <= 0
196
+ disables the cap.
197
+ """
198
+
199
+ def test_cap_truncates_over_limit(self):
200
+ memories = [_memory(f"memory {i}") for i in range(8)]
201
+ client = _FakeClient(directives=[], memories=memories)
202
+ ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 3})
203
+ self.assertIsNotNone(ctx)
204
+ self.assertIn("memory 0", ctx)
205
+ self.assertIn("memory 2", ctx)
206
+ # memory 3 and beyond must be trimmed.
207
+ self.assertNotIn("memory 3", ctx)
208
+ self.assertNotIn("memory 7", ctx)
209
+
210
+ def test_cap_zero_disables_truncation(self):
211
+ memories = [_memory(f"memory {i}") for i in range(20)]
212
+ client = _FakeClient(directives=[], memories=memories)
213
+ ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 0})
214
+ self.assertIsNotNone(ctx)
215
+ # All 20 should make it through.
216
+ for i in range(20):
217
+ self.assertIn(f"memory {i}", ctx)
218
+
219
+ def test_cap_below_count_no_op(self):
220
+ # Cap=12 but only 5 memories returned → no slicing.
221
+ memories = [_memory(f"memory {i}") for i in range(5)]
222
+ client = _FakeClient(directives=[], memories=memories)
223
+ ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 12})
224
+ self.assertIsNotNone(ctx)
225
+ for i in range(5):
226
+ self.assertIn(f"memory {i}", ctx)
227
+
228
+ def test_cap_applies_after_additional_banks_concat(self):
229
+ # Primary bank returns 4 memories; additional bank returns 4
230
+ # more. Cap of 5 must apply to the total (slicing keeps primary
231
+ # 0..3 + first 1 from additional). This locks in the rule:
232
+ # "cap is total, not per-bank."
233
+ primary = [_memory(f"primary-{i}") for i in range(4)]
234
+
235
+ # Build a client whose `recall` returns different sets per bank.
236
+ class _MultiBankClient(_FakeClient):
237
+ def recall(self, bank_id, **kwargs):
238
+ if bank_id == "test-bank":
239
+ return {"results": list(primary)}
240
+ if bank_id == "shared-bank":
241
+ return {"results": [_memory(f"shared-{i}") for i in range(4)]}
242
+ return {"results": []}
243
+
244
+ client = _MultiBankClient(directives=[], memories=[])
245
+ ctx, _ = _run_main_with(
246
+ client,
247
+ config_extra={
248
+ "recallMaxMemories": 5,
249
+ "recallAdditionalBanks": ["shared-bank"],
250
+ },
251
+ )
252
+ self.assertIsNotNone(ctx)
253
+ # Primary 0..3 + the first shared (shared-0) survive the cap.
254
+ for i in range(4):
255
+ self.assertIn(f"primary-{i}", ctx)
256
+ self.assertIn("shared-0", ctx)
257
+ # shared-1..3 are sliced off.
258
+ self.assertNotIn("shared-1", ctx)
259
+ self.assertNotIn("shared-3", ctx)
260
+
261
+ def test_cap_negative_disables(self):
262
+ # Defensive: negative values are treated the same as 0 (uncapped).
263
+ memories = [_memory(f"memory {i}") for i in range(15)]
264
+ client = _FakeClient(directives=[], memories=memories)
265
+ ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": -1})
266
+ self.assertIsNotNone(ctx)
267
+ for i in range(15):
268
+ self.assertIn(f"memory {i}", ctx)
269
+
270
+
271
+ class DemoteFromRecallTagTests(unittest.TestCase):
272
+ """Switchroom #432 phase 4.4 — memories tagged demote-from-recall
273
+ are filtered out of the auto-recall block but otherwise stay in the
274
+ bank.
275
+ """
276
+
277
+ def test_bracketed_tag_is_filtered(self):
278
+ memories = [
279
+ _memory("keep this", tags=[]),
280
+ _memory("drop this", tags=["[demote-from-recall]"]),
281
+ ]
282
+ client = _FakeClient(directives=[], memories=memories)
283
+ ctx, _ = _run_main_with(client)
284
+ self.assertIsNotNone(ctx)
285
+ self.assertIn("keep this", ctx)
286
+ self.assertNotIn("drop this", ctx)
287
+
288
+ def test_unbracketed_tag_is_filtered(self):
289
+ memories = [
290
+ _memory("keep this"),
291
+ _memory("drop this", tags=["demote-from-recall"]),
292
+ ]
293
+ client = _FakeClient(directives=[], memories=memories)
294
+ ctx, _ = _run_main_with(client)
295
+ self.assertIn("keep this", ctx)
296
+ self.assertNotIn("drop this", ctx)
297
+
298
+ def test_no_recall_alias_is_filtered(self):
299
+ # `no-recall` is the third accepted variant — shorter to type when
300
+ # tagging via `mcp__hindsight__update_memory`.
301
+ memories = [
302
+ _memory("keep this"),
303
+ _memory("drop this", tags=["no-recall"]),
304
+ ]
305
+ client = _FakeClient(directives=[], memories=memories)
306
+ ctx, _ = _run_main_with(client)
307
+ self.assertIn("keep this", ctx)
308
+ self.assertNotIn("drop this", ctx)
309
+
310
+ def test_unrelated_tag_is_kept(self):
311
+ memories = [_memory("keep this", tags=["topic:fitness", "user:ken"])]
312
+ client = _FakeClient(directives=[], memories=memories)
313
+ ctx, _ = _run_main_with(client)
314
+ self.assertIn("keep this", ctx)
315
+
316
+ def test_filter_applies_before_cap(self):
317
+ # 8 memories total, 3 demoted, cap=4. Result: 4 non-demoted
318
+ # memories survive (proves the filter runs first; if the cap
319
+ # ran first we'd see 4 of the 8 including demoted ones).
320
+ memories = [_memory(f"keep {i}") for i in range(5)] + [
321
+ _memory(f"drop {i}", tags=["[demote-from-recall]"]) for i in range(3)
322
+ ]
323
+ client = _FakeClient(directives=[], memories=memories)
324
+ ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 4})
325
+ self.assertIsNotNone(ctx)
326
+ # All 4 cap survivors come from the "keep" pool.
327
+ for i in range(4):
328
+ self.assertIn(f"keep {i}", ctx)
329
+ for i in range(3):
330
+ self.assertNotIn(f"drop {i}", ctx)
331
+
332
+
333
+ class RecallTelemetryLogTests(unittest.TestCase):
334
+ """Switchroom #432 phase 4.3 — every recall (hit or miss) appends
335
+ a JSONL record to state/recall_log.jsonl when CLAUDE_PLUGIN_DATA is
336
+ set.
337
+ """
338
+
339
+ def setUp(self):
340
+ import tempfile
341
+ self._tmpdir = tempfile.mkdtemp(prefix="recall-log-test-")
342
+ # The log writer reads CLAUDE_PLUGIN_DATA at write time. Set it
343
+ # for the test and restore on tearDown.
344
+ self._prev = os.environ.get("CLAUDE_PLUGIN_DATA")
345
+ os.environ["CLAUDE_PLUGIN_DATA"] = self._tmpdir
346
+
347
+ def tearDown(self):
348
+ import shutil
349
+ shutil.rmtree(self._tmpdir, ignore_errors=True)
350
+ if self._prev is None:
351
+ os.environ.pop("CLAUDE_PLUGIN_DATA", None)
352
+ else:
353
+ os.environ["CLAUDE_PLUGIN_DATA"] = self._prev
354
+
355
+ def _read_log(self):
356
+ path = os.path.join(self._tmpdir, "state", "recall_log.jsonl")
357
+ if not os.path.isfile(path):
358
+ return []
359
+ with open(path, encoding="utf-8") as f:
360
+ return [json.loads(line) for line in f if line.strip()]
361
+
362
+ def test_logs_one_line_per_recall_with_memory_ids(self):
363
+ memories = [
364
+ _memory("first", mem_id="mem-1"),
365
+ _memory("second", mem_id="mem-2"),
366
+ ]
367
+ client = _FakeClient(directives=[], memories=memories)
368
+ _run_main_with(client)
369
+ entries = self._read_log()
370
+ self.assertEqual(len(entries), 1)
371
+ e = entries[0]
372
+ self.assertEqual(e["result_count"], 2)
373
+ self.assertEqual(e["memory_ids"], ["mem-1", "mem-2"])
374
+ self.assertFalse(e["cache_hit"])
375
+ self.assertFalse(e["capped"])
376
+ self.assertEqual(e["bank_id"], "test-bank")
377
+
378
+ def test_logs_capped_flag_when_cap_fires(self):
379
+ memories = [_memory(f"m {i}", mem_id=f"id-{i}") for i in range(8)]
380
+ client = _FakeClient(directives=[], memories=memories)
381
+ _run_main_with(client, config_extra={"recallMaxMemories": 3})
382
+ entries = self._read_log()
383
+ self.assertEqual(len(entries), 1)
384
+ e = entries[0]
385
+ self.assertTrue(e["capped"])
386
+ self.assertEqual(e["pre_cap_count"], 8)
387
+ self.assertEqual(e["result_count"], 3)
388
+ # Only the kept IDs are logged.
389
+ self.assertEqual(e["memory_ids"], ["id-0", "id-1", "id-2"])
390
+
391
+ def test_logs_demoted_count(self):
392
+ memories = [
393
+ _memory("keep", mem_id="k1"),
394
+ _memory("drop", mem_id="d1", tags=["[demote-from-recall]"]),
395
+ ]
396
+ client = _FakeClient(directives=[], memories=memories)
397
+ _run_main_with(client)
398
+ entries = self._read_log()
399
+ self.assertEqual(len(entries), 1)
400
+ self.assertEqual(entries[0]["demoted_count"], 1)
401
+ self.assertEqual(entries[0]["memory_ids"], ["k1"])
402
+
403
+ def test_no_log_when_plugin_data_unset(self):
404
+ # If CLAUDE_PLUGIN_DATA isn't set, the writer no-ops silently —
405
+ # we don't want a stray log file in the working directory.
406
+ del os.environ["CLAUDE_PLUGIN_DATA"]
407
+ client = _FakeClient(directives=[], memories=[_memory("x", mem_id="x1")])
408
+ _run_main_with(client)
409
+ # No file ever created.
410
+ self.assertEqual(self._read_log(), [])
411
+ # Restore so tearDown's pop doesn't error.
412
+ os.environ["CLAUDE_PLUGIN_DATA"] = self._tmpdir
413
+
414
+
415
+ class AckShortCircuitTests(unittest.TestCase):
416
+ """Switchroom: skip recall entirely on conversational acks
417
+ ("thanks", "ok", "got it", etc.) — saves the ~1-5s recall on
418
+ turns where the model is going to produce a one-liner regardless.
419
+ """
420
+
421
+ def _assert_no_recall(self, prompt):
422
+ # When ack-skip kicks in, the recall hook returns BEFORE
423
+ # constructing the client, so we can pass a client whose
424
+ # `recall` raises — if the test expectations hold, the raise
425
+ # never fires.
426
+ class _BoomClient:
427
+ def list_directives(self, *a, **kw):
428
+ raise AssertionError("list_directives called on ack-only turn")
429
+
430
+ def recall(self, *a, **kw):
431
+ raise AssertionError("recall called on ack-only turn")
432
+
433
+ ctx, raw = _run_main_with(_BoomClient(), prompt=prompt)
434
+ # No output → empty stdout, no hookSpecificOutput.
435
+ self.assertIsNone(ctx)
436
+ self.assertEqual(raw.strip(), "")
437
+
438
+ def test_simple_thanks(self):
439
+ self._assert_no_recall("thanks")
440
+
441
+ def test_thanks_with_punctuation(self):
442
+ self._assert_no_recall("thanks!")
443
+ self._assert_no_recall("Thank you.")
444
+
445
+ def test_got_it(self):
446
+ self._assert_no_recall("got it")
447
+
448
+ def test_emoji_ack(self):
449
+ self._assert_no_recall("👍")
450
+ self._assert_no_recall("👍👍") # also stripped to a known phrase
451
+
452
+ def test_channel_wrapped_ack(self):
453
+ # Telegram-plugin wraps inbound prompts; the ack-skip must look
454
+ # past the wrapper.
455
+ self._assert_no_recall(
456
+ '<channel source="switchroom-telegram" chat_id="123">thanks</channel>',
457
+ )
458
+
459
+ def test_real_question_does_not_skip(self):
460
+ # Sanity: a real question should not be treated as an ack —
461
+ # we expect recall to be CALLED. Use a real fake client (not
462
+ # _BoomClient) and assert it produced output.
463
+ client = _FakeClient(directives=[], memories=[_memory("relevant memory")])
464
+ ctx, _ = _run_main_with(client, prompt="What did we decide about the auth flow?")
465
+ self.assertIsNotNone(ctx)
466
+ self.assertIn("relevant memory", ctx)
467
+
468
+ def test_ack_with_extra_words_does_not_skip(self):
469
+ # "thanks for the update" is not a pure ack — should fall
470
+ # through to recall.
471
+ client = _FakeClient(directives=[], memories=[_memory("the relevant fact")])
472
+ ctx, _ = _run_main_with(
473
+ client,
474
+ prompt="thanks for the update on the deployment",
475
+ )
476
+ self.assertIsNotNone(ctx)
477
+
478
+
479
+ class JaccardOverlapUnitTests(unittest.TestCase):
480
+ """Switchroom #475: pure-function tests for the relevance helpers."""
481
+
482
+ def test_identical_text_is_full_overlap(self):
483
+ # Modulo stop-word stripping (`is`, `the`, `a`, `to` removed).
484
+ self.assertEqual(
485
+ recall.jaccard_overlap("deploy the staging server", "deploy the staging server"),
486
+ 1.0,
487
+ )
488
+
489
+ def test_disjoint_text_is_zero(self):
490
+ self.assertEqual(
491
+ recall.jaccard_overlap("deploy staging server", "vegan dinner recipes"),
492
+ 0.0,
493
+ )
494
+
495
+ def test_partial_overlap_is_between(self):
496
+ score = recall.jaccard_overlap(
497
+ "deploy staging server",
498
+ "deploy production server",
499
+ )
500
+ # {deploy, staging, server} vs {deploy, production, server}
501
+ # → intersection 2, union 4 → 0.5
502
+ self.assertAlmostEqual(score, 0.5, places=2)
503
+
504
+ def test_stopwords_dont_inflate_overlap(self):
505
+ # "the" / "is" / "a" present in both shouldn't count.
506
+ score = recall.jaccard_overlap("the cat is a pet", "the dog is a pet")
507
+ # Real tokens after stopword strip: {cat, pet} vs {dog, pet}
508
+ # → intersection 1, union 3 → 0.333…
509
+ self.assertAlmostEqual(score, 1 / 3, places=2)
510
+
511
+ def test_empty_text_yields_zero(self):
512
+ self.assertEqual(recall.jaccard_overlap("", "anything at all"), 0.0)
513
+ self.assertEqual(recall.jaccard_overlap("query", ""), 0.0)
514
+
515
+ def test_non_string_inputs_yield_zero(self):
516
+ self.assertEqual(recall.jaccard_overlap(None, "x"), 0.0)
517
+ self.assertEqual(recall.jaccard_overlap("x", None), 0.0)
518
+
519
+ def test_case_insensitive(self):
520
+ self.assertEqual(
521
+ recall.jaccard_overlap("DEPLOY Server", "deploy server"),
522
+ 1.0,
523
+ )
524
+
525
+ def test_punctuation_stripped(self):
526
+ self.assertEqual(
527
+ recall.jaccard_overlap("deploy, server!", "deploy server"),
528
+ 1.0,
529
+ )
530
+
531
+
532
+ class OverlapFilterUnitTests(unittest.TestCase):
533
+ """Switchroom #475: _filter_by_overlap behaviour."""
534
+
535
+ def test_threshold_zero_passthrough(self):
536
+ results = [_memory("totally unrelated text")]
537
+ kept, dropped = recall._filter_by_overlap(results, "deploy server", 0.0)
538
+ self.assertEqual(kept, results)
539
+ self.assertEqual(dropped, 0)
540
+
541
+ def test_high_threshold_drops_weak_matches(self):
542
+ results = [
543
+ _memory("deploy server staging"), # full overlap
544
+ _memory("vegan dinner recipes"), # zero overlap
545
+ ]
546
+ kept, dropped = recall._filter_by_overlap(results, "deploy server staging", 0.5)
547
+ self.assertEqual(len(kept), 1)
548
+ self.assertEqual(dropped, 1)
549
+ self.assertEqual(kept[0]["text"], "deploy server staging")
550
+
551
+ def test_threshold_keeps_partial_match_at_or_above(self):
552
+ results = [_memory("deploy production server")]
553
+ kept, dropped = recall._filter_by_overlap(results, "deploy staging server", 0.5)
554
+ # 2/4 = 0.5 ≥ 0.5 → kept
555
+ self.assertEqual(len(kept), 1)
556
+ self.assertEqual(dropped, 0)
557
+
558
+ def test_threshold_drops_partial_match_below(self):
559
+ results = [_memory("deploy production server")]
560
+ kept, dropped = recall._filter_by_overlap(results, "deploy staging server", 0.51)
561
+ self.assertEqual(len(kept), 0)
562
+ self.assertEqual(dropped, 1)
563
+
564
+
565
+ class OverlapGateIntegrationTests(unittest.TestCase):
566
+ """Switchroom #475: gate wired through main()."""
567
+
568
+ def test_default_off_passes_everything_through(self):
569
+ # No recallMinOverlap in config → behaves as before.
570
+ client = _FakeClient(
571
+ directives=[],
572
+ memories=[
573
+ _memory("deploy staging server"),
574
+ _memory("vegan dinner recipes"),
575
+ ],
576
+ )
577
+ ctx, _ = _run_main_with(client, prompt="how do we deploy staging?")
578
+ self.assertIsNotNone(ctx)
579
+ self.assertIn("deploy staging server", ctx)
580
+ self.assertIn("vegan dinner recipes", ctx)
581
+
582
+ def test_high_threshold_drops_irrelevant_memories(self):
583
+ client = _FakeClient(
584
+ directives=[],
585
+ memories=[
586
+ _memory("deploy staging server"),
587
+ _memory("vegan dinner recipes"),
588
+ ],
589
+ )
590
+ ctx, _ = _run_main_with(
591
+ client,
592
+ prompt="how do we deploy staging server",
593
+ config_extra={"recallMinOverlap": 0.5},
594
+ )
595
+ # Relevant survives, junk doesn't.
596
+ self.assertIsNotNone(ctx)
597
+ self.assertIn("deploy staging server", ctx)
598
+ self.assertNotIn("vegan", ctx)
599
+
600
+ def test_threshold_emits_no_block_when_all_dropped(self):
601
+ # All memories below threshold → no <hindsight_memories> block.
602
+ # Telemetry still records the dropped count.
603
+ client = _FakeClient(
604
+ directives=[],
605
+ memories=[
606
+ _memory("vegan dinner recipes"),
607
+ _memory("totally unrelated chatter"),
608
+ ],
609
+ )
610
+ ctx, _ = _run_main_with(
611
+ client,
612
+ prompt="how do we deploy staging server",
613
+ config_extra={"recallMinOverlap": 0.5},
614
+ )
615
+ # No memories survived; with no directives either, we expect no
616
+ # additionalContext at all.
617
+ self.assertIsNone(ctx)
618
+
619
+
620
+ if __name__ == "__main__":
621
+ unittest.main()
@@ -0,0 +1,37 @@
1
+ {
2
+ "hindsightApiUrl": "",
3
+ "bankId": "claude_code",
4
+ "bankMission": "You are a Claude Code AI assistant. Focus on technical discussions, decisions, and context relevant to the user's projects.",
5
+ "retainMission": "Extract technical decisions, architectural choices, user preferences, project context, and people/tool relationships. Ignore routine greetings and transient operational details.",
6
+ "autoRecall": true,
7
+ "autoRetain": true,
8
+ "retainMode": "full-session",
9
+ "recallBudget": "low",
10
+ "recallMaxTokens": 1024,
11
+ "recallMaxMemories": 12,
12
+ "recallTypes": ["world", "experience"],
13
+ "recallContextTurns": 1,
14
+ "recallMaxQueryChars": 800,
15
+ "recallRoles": ["user", "assistant"],
16
+ "recallPromptPreamble": "Relevant memories from past conversations (prioritize recent when conflicting). Only use memories that are directly useful to continue this conversation; ignore the rest:",
17
+ "retainRoles": ["user", "assistant"],
18
+ "retainEveryNTurns": 10,
19
+ "retainOverlapTurns": 2,
20
+ "retainToolCalls": true,
21
+ "retainTags": ["{session_id}"],
22
+ "retainMetadata": {},
23
+ "retainContext": "claude-code",
24
+ "hindsightApiToken": null,
25
+ "apiPort": 9077,
26
+ "daemonIdleTimeout": 0,
27
+ "embedVersion": "latest",
28
+ "embedPackagePath": null,
29
+ "bankIdPrefix": "",
30
+ "dynamicBankId": false,
31
+ "dynamicBankGranularity": ["agent", "project"],
32
+ "agentName": "",
33
+ "llmProvider": null,
34
+ "llmModel": null,
35
+ "llmApiKeyEnv": null,
36
+ "debug": false
37
+ }
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: hindsight:setup
3
+ description: Register hindsight-memory hooks into Claude Code settings. Run this once after installing the plugin.
4
+ ---
5
+
6
+ Register the hindsight-memory hooks into `~/.claude/settings.json` by running the setup script:
7
+
8
+ ```bash
9
+ python3 "$CLAUDE_PLUGIN_ROOT/scripts/setup_hooks.py"
10
+ ```
11
+
12
+ If `CLAUDE_PLUGIN_ROOT` is not set, find the path manually:
13
+
14
+ ```bash
15
+ ls ~/.claude/plugins/cache/hindsight/hindsight-memory/
16
+ ```
17
+
18
+ Then run:
19
+
20
+ ```bash
21
+ python3 ~/.claude/plugins/cache/hindsight/hindsight-memory/<version>/scripts/setup_hooks.py
22
+ ```
23
+
24
+ After the script completes, restart Claude Code for the hooks to take effect. You should see `[Hindsight]` log lines on the next session start.