switchroom 0.12.26 → 0.12.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +80 -80
- package/dist/auth-broker/index.js +80 -80
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +359 -357
- package/dist/host-control/main.js +99 -99
- package/dist/vault/approvals/kernel-server.js +82 -82
- package/dist/vault/broker/server.js +83 -83
- package/package.json +2 -1
- package/telegram-plugin/dist/bridge/bridge.js +112 -112
- package/telegram-plugin/dist/gateway/gateway.js +368 -209
- package/telegram-plugin/dist/server.js +160 -160
- package/telegram-plugin/gateway/gateway.ts +55 -40
- package/telegram-plugin/gateway/inbound-delivery-machine-dispatch.ts +188 -0
- package/telegram-plugin/stderr-timestamps.ts +106 -0
- package/telegram-plugin/tests/inbound-delivery-machine-dispatch.test.ts +240 -0
- package/telegram-plugin/tests/stderr-timestamps.test.ts +113 -0
- package/vendor/hindsight-memory/.claude-plugin/plugin.json +8 -0
- package/vendor/hindsight-memory/CHANGELOG.md +32 -0
- package/vendor/hindsight-memory/LICENSE +21 -0
- package/vendor/hindsight-memory/README.md +329 -0
- package/vendor/hindsight-memory/hooks/hooks.json +49 -0
- package/vendor/hindsight-memory/scripts/drain_pending.py +190 -0
- package/vendor/hindsight-memory/scripts/lib/__init__.py +0 -0
- package/vendor/hindsight-memory/scripts/lib/bank.py +122 -0
- package/vendor/hindsight-memory/scripts/lib/client.py +204 -0
- package/vendor/hindsight-memory/scripts/lib/config.py +180 -0
- package/vendor/hindsight-memory/scripts/lib/content.py +493 -0
- package/vendor/hindsight-memory/scripts/lib/daemon.py +334 -0
- package/vendor/hindsight-memory/scripts/lib/directives.py +119 -0
- package/vendor/hindsight-memory/scripts/lib/gateway_ipc.py +126 -0
- package/vendor/hindsight-memory/scripts/lib/llm.py +146 -0
- package/vendor/hindsight-memory/scripts/lib/pending.py +218 -0
- package/vendor/hindsight-memory/scripts/lib/state.py +196 -0
- package/vendor/hindsight-memory/scripts/recall.py +873 -0
- package/vendor/hindsight-memory/scripts/retain.py +286 -0
- package/vendor/hindsight-memory/scripts/session_end.py +122 -0
- package/vendor/hindsight-memory/scripts/session_start.py +76 -0
- package/vendor/hindsight-memory/scripts/setup_hooks.py +115 -0
- package/vendor/hindsight-memory/scripts/tests/__init__.py +0 -0
- package/vendor/hindsight-memory/scripts/tests/test_directives.py +211 -0
- package/vendor/hindsight-memory/scripts/tests/test_gateway_ipc.py +205 -0
- package/vendor/hindsight-memory/scripts/tests/test_recall_integration.py +621 -0
- package/vendor/hindsight-memory/settings.json +37 -0
- package/vendor/hindsight-memory/skills/setup.md +24 -0
- package/vendor/hindsight-memory/tests/conftest.py +94 -0
- package/vendor/hindsight-memory/tests/test_bank.py +142 -0
- package/vendor/hindsight-memory/tests/test_client.py +232 -0
- package/vendor/hindsight-memory/tests/test_config.py +128 -0
- package/vendor/hindsight-memory/tests/test_content.py +471 -0
- package/vendor/hindsight-memory/tests/test_drain_pending.py +192 -0
- package/vendor/hindsight-memory/tests/test_hooks.py +808 -0
- package/vendor/hindsight-memory/tests/test_manifest.py +14 -0
- package/vendor/hindsight-memory/tests/test_pending.py +152 -0
- package/vendor/hindsight-memory/tests/test_recall_exit_codes.py +325 -0
- package/vendor/hindsight-memory/tests/test_session_end_pending.py +205 -0
- package/vendor/hindsight-memory/tests/test_state.py +125 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
"""Integration tests for recall.py — block composition + ordering.
|
|
2
|
+
|
|
3
|
+
Exercises the actual main() flow with stubbed dependencies so we can
|
|
4
|
+
verify:
|
|
5
|
+
- The <active_directives> block is emitted ABOVE <hindsight_memories>
|
|
6
|
+
- Empty bank (no directives, no memories) → no output at all
|
|
7
|
+
- Active directives present but no memories → directives block alone
|
|
8
|
+
- Active memories present but no directives → unchanged legacy behavior
|
|
9
|
+
- Recall API failure with directives present → directives still emitted
|
|
10
|
+
(so a recall outage doesn't blind the agent to its own HARD RULES)
|
|
11
|
+
|
|
12
|
+
Stdlib-only (unittest + mock).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import io
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
import unittest
|
|
20
|
+
from unittest.mock import patch
|
|
21
|
+
|
|
22
|
+
SCRIPTS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
23
|
+
if SCRIPTS_DIR not in sys.path:
|
|
24
|
+
sys.path.insert(0, SCRIPTS_DIR)
|
|
25
|
+
|
|
26
|
+
import recall # noqa: E402
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _directive(name, content, priority=5):
|
|
30
|
+
return {
|
|
31
|
+
"id": f"id-{name}",
|
|
32
|
+
"bank_id": "test-bank",
|
|
33
|
+
"name": name,
|
|
34
|
+
"content": content,
|
|
35
|
+
"priority": priority,
|
|
36
|
+
"is_active": True,
|
|
37
|
+
"tags": [],
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _memory(text, mem_type="fact", mentioned_at="2026-01-01", mem_id=None, tags=None):
|
|
42
|
+
out = {"text": text, "type": mem_type, "mentioned_at": mentioned_at}
|
|
43
|
+
if mem_id is not None:
|
|
44
|
+
out["id"] = mem_id
|
|
45
|
+
if tags is not None:
|
|
46
|
+
out["tags"] = tags
|
|
47
|
+
return out
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class _FakeClient:
|
|
51
|
+
"""Stand-in for HindsightClient with configurable responses."""
|
|
52
|
+
|
|
53
|
+
def __init__(self, directives=None, memories=None, recall_exc=None, list_exc=None):
|
|
54
|
+
self._directives = directives if directives is not None else []
|
|
55
|
+
self._memories = memories if memories is not None else []
|
|
56
|
+
self._recall_exc = recall_exc
|
|
57
|
+
self._list_exc = list_exc
|
|
58
|
+
|
|
59
|
+
def list_directives(self, bank_id, active_only=True, timeout=2):
|
|
60
|
+
if self._list_exc is not None:
|
|
61
|
+
raise self._list_exc
|
|
62
|
+
return {"items": list(self._directives)}
|
|
63
|
+
|
|
64
|
+
def recall(self, bank_id, query, max_tokens=1024, budget="mid", types=None, timeout=10):
|
|
65
|
+
if self._recall_exc is not None:
|
|
66
|
+
raise self._recall_exc
|
|
67
|
+
return {"results": list(self._memories)}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _run_main_with(client, prompt="What is the meaning of life?", config_extra=None):
|
|
71
|
+
"""Invoke recall.main with a fake client and capture stdout JSON.
|
|
72
|
+
|
|
73
|
+
Returns (additional_context_string_or_None, raw_stdout).
|
|
74
|
+
|
|
75
|
+
`config_extra` is merged on top of the baseline config so individual
|
|
76
|
+
tests can override knobs like `recallMaxMemories` or
|
|
77
|
+
`recallAdditionalBanks` without growing the helper signature for
|
|
78
|
+
every new field.
|
|
79
|
+
"""
|
|
80
|
+
hook_input = {
|
|
81
|
+
"prompt": prompt,
|
|
82
|
+
"session_id": "test-session",
|
|
83
|
+
"transcript_path": "",
|
|
84
|
+
"cwd": "/tmp",
|
|
85
|
+
}
|
|
86
|
+
config = {
|
|
87
|
+
"autoRecall": True,
|
|
88
|
+
"bankId": "test-bank",
|
|
89
|
+
"recallMaxTokens": 1024,
|
|
90
|
+
"recallBudget": "mid",
|
|
91
|
+
"recallContextTurns": 1,
|
|
92
|
+
"recallMaxQueryChars": 800,
|
|
93
|
+
"recallPromptPreamble": "",
|
|
94
|
+
}
|
|
95
|
+
if config_extra:
|
|
96
|
+
config.update(config_extra)
|
|
97
|
+
|
|
98
|
+
stdout = io.StringIO()
|
|
99
|
+
stderr = io.StringIO()
|
|
100
|
+
with patch.object(recall, "load_config", return_value=config), patch.object(
|
|
101
|
+
recall, "get_api_url", return_value="http://localhost:18888"
|
|
102
|
+
), patch.object(recall, "HindsightClient", return_value=client), patch.object(
|
|
103
|
+
recall, "ensure_bank_mission", return_value=None
|
|
104
|
+
), patch.object(recall, "write_state", return_value=None), patch(
|
|
105
|
+
"sys.stdin", new=io.StringIO(json.dumps(hook_input))
|
|
106
|
+
), patch("sys.stdout", new=stdout), patch("sys.stderr", new=stderr):
|
|
107
|
+
recall.main()
|
|
108
|
+
|
|
109
|
+
raw = stdout.getvalue()
|
|
110
|
+
if not raw.strip():
|
|
111
|
+
return None, raw
|
|
112
|
+
parsed = json.loads(raw)
|
|
113
|
+
return parsed["hookSpecificOutput"]["additionalContext"], raw
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class RecallIntegrationTests(unittest.TestCase):
|
|
117
|
+
def test_directives_block_appears_above_memories_block(self):
|
|
118
|
+
client = _FakeClient(
|
|
119
|
+
directives=[_directive("trailer", "End every response with: [VERIFIED]", priority=10)],
|
|
120
|
+
memories=[_memory("user prefers concise answers")],
|
|
121
|
+
)
|
|
122
|
+
ctx, _ = _run_main_with(client)
|
|
123
|
+
self.assertIsNotNone(ctx)
|
|
124
|
+
d_idx = ctx.find("<active_directives>")
|
|
125
|
+
m_idx = ctx.find("<hindsight_memories>")
|
|
126
|
+
self.assertGreaterEqual(d_idx, 0, "active_directives block missing")
|
|
127
|
+
self.assertGreaterEqual(m_idx, 0, "hindsight_memories block missing")
|
|
128
|
+
self.assertLess(d_idx, m_idx, "directives must come before memories")
|
|
129
|
+
|
|
130
|
+
def test_empty_bank_emits_no_output(self):
|
|
131
|
+
client = _FakeClient(directives=[], memories=[])
|
|
132
|
+
ctx, raw = _run_main_with(client)
|
|
133
|
+
self.assertIsNone(ctx)
|
|
134
|
+
self.assertEqual(raw.strip(), "")
|
|
135
|
+
|
|
136
|
+
def test_directives_only_emits_directives_block_alone(self):
|
|
137
|
+
client = _FakeClient(
|
|
138
|
+
directives=[_directive("trailer", "End every response with: [VERIFIED]", priority=10)],
|
|
139
|
+
memories=[],
|
|
140
|
+
)
|
|
141
|
+
ctx, _ = _run_main_with(client)
|
|
142
|
+
self.assertIsNotNone(ctx)
|
|
143
|
+
self.assertIn("<active_directives>", ctx)
|
|
144
|
+
self.assertNotIn("<hindsight_memories>", ctx)
|
|
145
|
+
self.assertIn("End every response with: [VERIFIED]", ctx)
|
|
146
|
+
|
|
147
|
+
def test_memories_only_unchanged_legacy_behavior(self):
|
|
148
|
+
# No directives → block omitted entirely (not an empty wrapper).
|
|
149
|
+
client = _FakeClient(directives=[], memories=[_memory("an old preference")])
|
|
150
|
+
ctx, _ = _run_main_with(client)
|
|
151
|
+
self.assertIsNotNone(ctx)
|
|
152
|
+
self.assertNotIn("<active_directives>", ctx)
|
|
153
|
+
self.assertIn("<hindsight_memories>", ctx)
|
|
154
|
+
self.assertIn("an old preference", ctx)
|
|
155
|
+
|
|
156
|
+
def test_recall_failure_with_directives_still_emits_directives(self):
|
|
157
|
+
# A recall API outage must NOT blind the agent to its HARD RULES.
|
|
158
|
+
client = _FakeClient(
|
|
159
|
+
directives=[_directive("trailer", "End every response with: [VERIFIED]", priority=10)],
|
|
160
|
+
memories=[],
|
|
161
|
+
recall_exc=RuntimeError("HTTP 503"),
|
|
162
|
+
)
|
|
163
|
+
ctx, _ = _run_main_with(client)
|
|
164
|
+
self.assertIsNotNone(ctx)
|
|
165
|
+
self.assertIn("<active_directives>", ctx)
|
|
166
|
+
self.assertNotIn("<hindsight_memories>", ctx)
|
|
167
|
+
|
|
168
|
+
def test_directives_failure_does_not_kill_recall(self):
|
|
169
|
+
# Symmetric: a list_directives failure must not block the recall
|
|
170
|
+
# block from being emitted.
|
|
171
|
+
client = _FakeClient(
|
|
172
|
+
directives=[],
|
|
173
|
+
memories=[_memory("legacy memory still useful")],
|
|
174
|
+
list_exc=RuntimeError("HTTP 500"),
|
|
175
|
+
)
|
|
176
|
+
ctx, _ = _run_main_with(client)
|
|
177
|
+
self.assertIsNotNone(ctx)
|
|
178
|
+
self.assertIn("<hindsight_memories>", ctx)
|
|
179
|
+
self.assertNotIn("<active_directives>", ctx)
|
|
180
|
+
|
|
181
|
+
def test_blocks_separated_by_blank_line(self):
|
|
182
|
+
client = _FakeClient(
|
|
183
|
+
directives=[_directive("rule", "do the thing", priority=5)],
|
|
184
|
+
memories=[_memory("a memory")],
|
|
185
|
+
)
|
|
186
|
+
ctx, _ = _run_main_with(client)
|
|
187
|
+
self.assertIn("</active_directives>\n\n<hindsight_memories>", ctx)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class RecallMaxMemoriesCapTests(unittest.TestCase):
|
|
191
|
+
"""Tests for the switchroom-local recallMaxMemories count cap.
|
|
192
|
+
|
|
193
|
+
The cap is applied client-side after the (primary + additional banks)
|
|
194
|
+
results are concatenated and BEFORE formatting, so it bounds the
|
|
195
|
+
final injected memory count regardless of token budget. <= 0
|
|
196
|
+
disables the cap.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
def test_cap_truncates_over_limit(self):
|
|
200
|
+
memories = [_memory(f"memory {i}") for i in range(8)]
|
|
201
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
202
|
+
ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 3})
|
|
203
|
+
self.assertIsNotNone(ctx)
|
|
204
|
+
self.assertIn("memory 0", ctx)
|
|
205
|
+
self.assertIn("memory 2", ctx)
|
|
206
|
+
# memory 3 and beyond must be trimmed.
|
|
207
|
+
self.assertNotIn("memory 3", ctx)
|
|
208
|
+
self.assertNotIn("memory 7", ctx)
|
|
209
|
+
|
|
210
|
+
def test_cap_zero_disables_truncation(self):
|
|
211
|
+
memories = [_memory(f"memory {i}") for i in range(20)]
|
|
212
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
213
|
+
ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 0})
|
|
214
|
+
self.assertIsNotNone(ctx)
|
|
215
|
+
# All 20 should make it through.
|
|
216
|
+
for i in range(20):
|
|
217
|
+
self.assertIn(f"memory {i}", ctx)
|
|
218
|
+
|
|
219
|
+
def test_cap_below_count_no_op(self):
|
|
220
|
+
# Cap=12 but only 5 memories returned → no slicing.
|
|
221
|
+
memories = [_memory(f"memory {i}") for i in range(5)]
|
|
222
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
223
|
+
ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 12})
|
|
224
|
+
self.assertIsNotNone(ctx)
|
|
225
|
+
for i in range(5):
|
|
226
|
+
self.assertIn(f"memory {i}", ctx)
|
|
227
|
+
|
|
228
|
+
def test_cap_applies_after_additional_banks_concat(self):
|
|
229
|
+
# Primary bank returns 4 memories; additional bank returns 4
|
|
230
|
+
# more. Cap of 5 must apply to the total (slicing keeps primary
|
|
231
|
+
# 0..3 + first 1 from additional). This locks in the rule:
|
|
232
|
+
# "cap is total, not per-bank."
|
|
233
|
+
primary = [_memory(f"primary-{i}") for i in range(4)]
|
|
234
|
+
|
|
235
|
+
# Build a client whose `recall` returns different sets per bank.
|
|
236
|
+
class _MultiBankClient(_FakeClient):
|
|
237
|
+
def recall(self, bank_id, **kwargs):
|
|
238
|
+
if bank_id == "test-bank":
|
|
239
|
+
return {"results": list(primary)}
|
|
240
|
+
if bank_id == "shared-bank":
|
|
241
|
+
return {"results": [_memory(f"shared-{i}") for i in range(4)]}
|
|
242
|
+
return {"results": []}
|
|
243
|
+
|
|
244
|
+
client = _MultiBankClient(directives=[], memories=[])
|
|
245
|
+
ctx, _ = _run_main_with(
|
|
246
|
+
client,
|
|
247
|
+
config_extra={
|
|
248
|
+
"recallMaxMemories": 5,
|
|
249
|
+
"recallAdditionalBanks": ["shared-bank"],
|
|
250
|
+
},
|
|
251
|
+
)
|
|
252
|
+
self.assertIsNotNone(ctx)
|
|
253
|
+
# Primary 0..3 + the first shared (shared-0) survive the cap.
|
|
254
|
+
for i in range(4):
|
|
255
|
+
self.assertIn(f"primary-{i}", ctx)
|
|
256
|
+
self.assertIn("shared-0", ctx)
|
|
257
|
+
# shared-1..3 are sliced off.
|
|
258
|
+
self.assertNotIn("shared-1", ctx)
|
|
259
|
+
self.assertNotIn("shared-3", ctx)
|
|
260
|
+
|
|
261
|
+
def test_cap_negative_disables(self):
|
|
262
|
+
# Defensive: negative values are treated the same as 0 (uncapped).
|
|
263
|
+
memories = [_memory(f"memory {i}") for i in range(15)]
|
|
264
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
265
|
+
ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": -1})
|
|
266
|
+
self.assertIsNotNone(ctx)
|
|
267
|
+
for i in range(15):
|
|
268
|
+
self.assertIn(f"memory {i}", ctx)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class DemoteFromRecallTagTests(unittest.TestCase):
|
|
272
|
+
"""Switchroom #432 phase 4.4 — memories tagged demote-from-recall
|
|
273
|
+
are filtered out of the auto-recall block but otherwise stay in the
|
|
274
|
+
bank.
|
|
275
|
+
"""
|
|
276
|
+
|
|
277
|
+
def test_bracketed_tag_is_filtered(self):
|
|
278
|
+
memories = [
|
|
279
|
+
_memory("keep this", tags=[]),
|
|
280
|
+
_memory("drop this", tags=["[demote-from-recall]"]),
|
|
281
|
+
]
|
|
282
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
283
|
+
ctx, _ = _run_main_with(client)
|
|
284
|
+
self.assertIsNotNone(ctx)
|
|
285
|
+
self.assertIn("keep this", ctx)
|
|
286
|
+
self.assertNotIn("drop this", ctx)
|
|
287
|
+
|
|
288
|
+
def test_unbracketed_tag_is_filtered(self):
|
|
289
|
+
memories = [
|
|
290
|
+
_memory("keep this"),
|
|
291
|
+
_memory("drop this", tags=["demote-from-recall"]),
|
|
292
|
+
]
|
|
293
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
294
|
+
ctx, _ = _run_main_with(client)
|
|
295
|
+
self.assertIn("keep this", ctx)
|
|
296
|
+
self.assertNotIn("drop this", ctx)
|
|
297
|
+
|
|
298
|
+
def test_no_recall_alias_is_filtered(self):
|
|
299
|
+
# `no-recall` is the third accepted variant — shorter to type when
|
|
300
|
+
# tagging via `mcp__hindsight__update_memory`.
|
|
301
|
+
memories = [
|
|
302
|
+
_memory("keep this"),
|
|
303
|
+
_memory("drop this", tags=["no-recall"]),
|
|
304
|
+
]
|
|
305
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
306
|
+
ctx, _ = _run_main_with(client)
|
|
307
|
+
self.assertIn("keep this", ctx)
|
|
308
|
+
self.assertNotIn("drop this", ctx)
|
|
309
|
+
|
|
310
|
+
def test_unrelated_tag_is_kept(self):
|
|
311
|
+
memories = [_memory("keep this", tags=["topic:fitness", "user:ken"])]
|
|
312
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
313
|
+
ctx, _ = _run_main_with(client)
|
|
314
|
+
self.assertIn("keep this", ctx)
|
|
315
|
+
|
|
316
|
+
def test_filter_applies_before_cap(self):
|
|
317
|
+
# 8 memories total, 3 demoted, cap=4. Result: 4 non-demoted
|
|
318
|
+
# memories survive (proves the filter runs first; if the cap
|
|
319
|
+
# ran first we'd see 4 of the 8 including demoted ones).
|
|
320
|
+
memories = [_memory(f"keep {i}") for i in range(5)] + [
|
|
321
|
+
_memory(f"drop {i}", tags=["[demote-from-recall]"]) for i in range(3)
|
|
322
|
+
]
|
|
323
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
324
|
+
ctx, _ = _run_main_with(client, config_extra={"recallMaxMemories": 4})
|
|
325
|
+
self.assertIsNotNone(ctx)
|
|
326
|
+
# All 4 cap survivors come from the "keep" pool.
|
|
327
|
+
for i in range(4):
|
|
328
|
+
self.assertIn(f"keep {i}", ctx)
|
|
329
|
+
for i in range(3):
|
|
330
|
+
self.assertNotIn(f"drop {i}", ctx)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
class RecallTelemetryLogTests(unittest.TestCase):
|
|
334
|
+
"""Switchroom #432 phase 4.3 — every recall (hit or miss) appends
|
|
335
|
+
a JSONL record to state/recall_log.jsonl when CLAUDE_PLUGIN_DATA is
|
|
336
|
+
set.
|
|
337
|
+
"""
|
|
338
|
+
|
|
339
|
+
def setUp(self):
|
|
340
|
+
import tempfile
|
|
341
|
+
self._tmpdir = tempfile.mkdtemp(prefix="recall-log-test-")
|
|
342
|
+
# The log writer reads CLAUDE_PLUGIN_DATA at write time. Set it
|
|
343
|
+
# for the test and restore on tearDown.
|
|
344
|
+
self._prev = os.environ.get("CLAUDE_PLUGIN_DATA")
|
|
345
|
+
os.environ["CLAUDE_PLUGIN_DATA"] = self._tmpdir
|
|
346
|
+
|
|
347
|
+
def tearDown(self):
|
|
348
|
+
import shutil
|
|
349
|
+
shutil.rmtree(self._tmpdir, ignore_errors=True)
|
|
350
|
+
if self._prev is None:
|
|
351
|
+
os.environ.pop("CLAUDE_PLUGIN_DATA", None)
|
|
352
|
+
else:
|
|
353
|
+
os.environ["CLAUDE_PLUGIN_DATA"] = self._prev
|
|
354
|
+
|
|
355
|
+
def _read_log(self):
|
|
356
|
+
path = os.path.join(self._tmpdir, "state", "recall_log.jsonl")
|
|
357
|
+
if not os.path.isfile(path):
|
|
358
|
+
return []
|
|
359
|
+
with open(path, encoding="utf-8") as f:
|
|
360
|
+
return [json.loads(line) for line in f if line.strip()]
|
|
361
|
+
|
|
362
|
+
def test_logs_one_line_per_recall_with_memory_ids(self):
|
|
363
|
+
memories = [
|
|
364
|
+
_memory("first", mem_id="mem-1"),
|
|
365
|
+
_memory("second", mem_id="mem-2"),
|
|
366
|
+
]
|
|
367
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
368
|
+
_run_main_with(client)
|
|
369
|
+
entries = self._read_log()
|
|
370
|
+
self.assertEqual(len(entries), 1)
|
|
371
|
+
e = entries[0]
|
|
372
|
+
self.assertEqual(e["result_count"], 2)
|
|
373
|
+
self.assertEqual(e["memory_ids"], ["mem-1", "mem-2"])
|
|
374
|
+
self.assertFalse(e["cache_hit"])
|
|
375
|
+
self.assertFalse(e["capped"])
|
|
376
|
+
self.assertEqual(e["bank_id"], "test-bank")
|
|
377
|
+
|
|
378
|
+
def test_logs_capped_flag_when_cap_fires(self):
|
|
379
|
+
memories = [_memory(f"m {i}", mem_id=f"id-{i}") for i in range(8)]
|
|
380
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
381
|
+
_run_main_with(client, config_extra={"recallMaxMemories": 3})
|
|
382
|
+
entries = self._read_log()
|
|
383
|
+
self.assertEqual(len(entries), 1)
|
|
384
|
+
e = entries[0]
|
|
385
|
+
self.assertTrue(e["capped"])
|
|
386
|
+
self.assertEqual(e["pre_cap_count"], 8)
|
|
387
|
+
self.assertEqual(e["result_count"], 3)
|
|
388
|
+
# Only the kept IDs are logged.
|
|
389
|
+
self.assertEqual(e["memory_ids"], ["id-0", "id-1", "id-2"])
|
|
390
|
+
|
|
391
|
+
def test_logs_demoted_count(self):
|
|
392
|
+
memories = [
|
|
393
|
+
_memory("keep", mem_id="k1"),
|
|
394
|
+
_memory("drop", mem_id="d1", tags=["[demote-from-recall]"]),
|
|
395
|
+
]
|
|
396
|
+
client = _FakeClient(directives=[], memories=memories)
|
|
397
|
+
_run_main_with(client)
|
|
398
|
+
entries = self._read_log()
|
|
399
|
+
self.assertEqual(len(entries), 1)
|
|
400
|
+
self.assertEqual(entries[0]["demoted_count"], 1)
|
|
401
|
+
self.assertEqual(entries[0]["memory_ids"], ["k1"])
|
|
402
|
+
|
|
403
|
+
def test_no_log_when_plugin_data_unset(self):
|
|
404
|
+
# If CLAUDE_PLUGIN_DATA isn't set, the writer no-ops silently —
|
|
405
|
+
# we don't want a stray log file in the working directory.
|
|
406
|
+
del os.environ["CLAUDE_PLUGIN_DATA"]
|
|
407
|
+
client = _FakeClient(directives=[], memories=[_memory("x", mem_id="x1")])
|
|
408
|
+
_run_main_with(client)
|
|
409
|
+
# No file ever created.
|
|
410
|
+
self.assertEqual(self._read_log(), [])
|
|
411
|
+
# Restore so tearDown's pop doesn't error.
|
|
412
|
+
os.environ["CLAUDE_PLUGIN_DATA"] = self._tmpdir
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
class AckShortCircuitTests(unittest.TestCase):
|
|
416
|
+
"""Switchroom: skip recall entirely on conversational acks
|
|
417
|
+
("thanks", "ok", "got it", etc.) — saves the ~1-5s recall on
|
|
418
|
+
turns where the model is going to produce a one-liner regardless.
|
|
419
|
+
"""
|
|
420
|
+
|
|
421
|
+
def _assert_no_recall(self, prompt):
|
|
422
|
+
# When ack-skip kicks in, the recall hook returns BEFORE
|
|
423
|
+
# constructing the client, so we can pass a client whose
|
|
424
|
+
# `recall` raises — if the test expectations hold, the raise
|
|
425
|
+
# never fires.
|
|
426
|
+
class _BoomClient:
|
|
427
|
+
def list_directives(self, *a, **kw):
|
|
428
|
+
raise AssertionError("list_directives called on ack-only turn")
|
|
429
|
+
|
|
430
|
+
def recall(self, *a, **kw):
|
|
431
|
+
raise AssertionError("recall called on ack-only turn")
|
|
432
|
+
|
|
433
|
+
ctx, raw = _run_main_with(_BoomClient(), prompt=prompt)
|
|
434
|
+
# No output → empty stdout, no hookSpecificOutput.
|
|
435
|
+
self.assertIsNone(ctx)
|
|
436
|
+
self.assertEqual(raw.strip(), "")
|
|
437
|
+
|
|
438
|
+
def test_simple_thanks(self):
|
|
439
|
+
self._assert_no_recall("thanks")
|
|
440
|
+
|
|
441
|
+
def test_thanks_with_punctuation(self):
|
|
442
|
+
self._assert_no_recall("thanks!")
|
|
443
|
+
self._assert_no_recall("Thank you.")
|
|
444
|
+
|
|
445
|
+
def test_got_it(self):
|
|
446
|
+
self._assert_no_recall("got it")
|
|
447
|
+
|
|
448
|
+
def test_emoji_ack(self):
|
|
449
|
+
self._assert_no_recall("👍")
|
|
450
|
+
self._assert_no_recall("👍👍") # also stripped to a known phrase
|
|
451
|
+
|
|
452
|
+
def test_channel_wrapped_ack(self):
|
|
453
|
+
# Telegram-plugin wraps inbound prompts; the ack-skip must look
|
|
454
|
+
# past the wrapper.
|
|
455
|
+
self._assert_no_recall(
|
|
456
|
+
'<channel source="switchroom-telegram" chat_id="123">thanks</channel>',
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
def test_real_question_does_not_skip(self):
|
|
460
|
+
# Sanity: a real question should not be treated as an ack —
|
|
461
|
+
# we expect recall to be CALLED. Use a real fake client (not
|
|
462
|
+
# _BoomClient) and assert it produced output.
|
|
463
|
+
client = _FakeClient(directives=[], memories=[_memory("relevant memory")])
|
|
464
|
+
ctx, _ = _run_main_with(client, prompt="What did we decide about the auth flow?")
|
|
465
|
+
self.assertIsNotNone(ctx)
|
|
466
|
+
self.assertIn("relevant memory", ctx)
|
|
467
|
+
|
|
468
|
+
def test_ack_with_extra_words_does_not_skip(self):
|
|
469
|
+
# "thanks for the update" is not a pure ack — should fall
|
|
470
|
+
# through to recall.
|
|
471
|
+
client = _FakeClient(directives=[], memories=[_memory("the relevant fact")])
|
|
472
|
+
ctx, _ = _run_main_with(
|
|
473
|
+
client,
|
|
474
|
+
prompt="thanks for the update on the deployment",
|
|
475
|
+
)
|
|
476
|
+
self.assertIsNotNone(ctx)
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
class JaccardOverlapUnitTests(unittest.TestCase):
|
|
480
|
+
"""Switchroom #475: pure-function tests for the relevance helpers."""
|
|
481
|
+
|
|
482
|
+
def test_identical_text_is_full_overlap(self):
|
|
483
|
+
# Modulo stop-word stripping (`is`, `the`, `a`, `to` removed).
|
|
484
|
+
self.assertEqual(
|
|
485
|
+
recall.jaccard_overlap("deploy the staging server", "deploy the staging server"),
|
|
486
|
+
1.0,
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
def test_disjoint_text_is_zero(self):
|
|
490
|
+
self.assertEqual(
|
|
491
|
+
recall.jaccard_overlap("deploy staging server", "vegan dinner recipes"),
|
|
492
|
+
0.0,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
def test_partial_overlap_is_between(self):
|
|
496
|
+
score = recall.jaccard_overlap(
|
|
497
|
+
"deploy staging server",
|
|
498
|
+
"deploy production server",
|
|
499
|
+
)
|
|
500
|
+
# {deploy, staging, server} vs {deploy, production, server}
|
|
501
|
+
# → intersection 2, union 4 → 0.5
|
|
502
|
+
self.assertAlmostEqual(score, 0.5, places=2)
|
|
503
|
+
|
|
504
|
+
def test_stopwords_dont_inflate_overlap(self):
|
|
505
|
+
# "the" / "is" / "a" present in both shouldn't count.
|
|
506
|
+
score = recall.jaccard_overlap("the cat is a pet", "the dog is a pet")
|
|
507
|
+
# Real tokens after stopword strip: {cat, pet} vs {dog, pet}
|
|
508
|
+
# → intersection 1, union 3 → 0.333…
|
|
509
|
+
self.assertAlmostEqual(score, 1 / 3, places=2)
|
|
510
|
+
|
|
511
|
+
def test_empty_text_yields_zero(self):
|
|
512
|
+
self.assertEqual(recall.jaccard_overlap("", "anything at all"), 0.0)
|
|
513
|
+
self.assertEqual(recall.jaccard_overlap("query", ""), 0.0)
|
|
514
|
+
|
|
515
|
+
def test_non_string_inputs_yield_zero(self):
|
|
516
|
+
self.assertEqual(recall.jaccard_overlap(None, "x"), 0.0)
|
|
517
|
+
self.assertEqual(recall.jaccard_overlap("x", None), 0.0)
|
|
518
|
+
|
|
519
|
+
def test_case_insensitive(self):
|
|
520
|
+
self.assertEqual(
|
|
521
|
+
recall.jaccard_overlap("DEPLOY Server", "deploy server"),
|
|
522
|
+
1.0,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
def test_punctuation_stripped(self):
|
|
526
|
+
self.assertEqual(
|
|
527
|
+
recall.jaccard_overlap("deploy, server!", "deploy server"),
|
|
528
|
+
1.0,
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
class OverlapFilterUnitTests(unittest.TestCase):
|
|
533
|
+
"""Switchroom #475: _filter_by_overlap behaviour."""
|
|
534
|
+
|
|
535
|
+
def test_threshold_zero_passthrough(self):
|
|
536
|
+
results = [_memory("totally unrelated text")]
|
|
537
|
+
kept, dropped = recall._filter_by_overlap(results, "deploy server", 0.0)
|
|
538
|
+
self.assertEqual(kept, results)
|
|
539
|
+
self.assertEqual(dropped, 0)
|
|
540
|
+
|
|
541
|
+
def test_high_threshold_drops_weak_matches(self):
|
|
542
|
+
results = [
|
|
543
|
+
_memory("deploy server staging"), # full overlap
|
|
544
|
+
_memory("vegan dinner recipes"), # zero overlap
|
|
545
|
+
]
|
|
546
|
+
kept, dropped = recall._filter_by_overlap(results, "deploy server staging", 0.5)
|
|
547
|
+
self.assertEqual(len(kept), 1)
|
|
548
|
+
self.assertEqual(dropped, 1)
|
|
549
|
+
self.assertEqual(kept[0]["text"], "deploy server staging")
|
|
550
|
+
|
|
551
|
+
def test_threshold_keeps_partial_match_at_or_above(self):
|
|
552
|
+
results = [_memory("deploy production server")]
|
|
553
|
+
kept, dropped = recall._filter_by_overlap(results, "deploy staging server", 0.5)
|
|
554
|
+
# 2/4 = 0.5 ≥ 0.5 → kept
|
|
555
|
+
self.assertEqual(len(kept), 1)
|
|
556
|
+
self.assertEqual(dropped, 0)
|
|
557
|
+
|
|
558
|
+
def test_threshold_drops_partial_match_below(self):
|
|
559
|
+
results = [_memory("deploy production server")]
|
|
560
|
+
kept, dropped = recall._filter_by_overlap(results, "deploy staging server", 0.51)
|
|
561
|
+
self.assertEqual(len(kept), 0)
|
|
562
|
+
self.assertEqual(dropped, 1)
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
class OverlapGateIntegrationTests(unittest.TestCase):
|
|
566
|
+
"""Switchroom #475: gate wired through main()."""
|
|
567
|
+
|
|
568
|
+
def test_default_off_passes_everything_through(self):
|
|
569
|
+
# No recallMinOverlap in config → behaves as before.
|
|
570
|
+
client = _FakeClient(
|
|
571
|
+
directives=[],
|
|
572
|
+
memories=[
|
|
573
|
+
_memory("deploy staging server"),
|
|
574
|
+
_memory("vegan dinner recipes"),
|
|
575
|
+
],
|
|
576
|
+
)
|
|
577
|
+
ctx, _ = _run_main_with(client, prompt="how do we deploy staging?")
|
|
578
|
+
self.assertIsNotNone(ctx)
|
|
579
|
+
self.assertIn("deploy staging server", ctx)
|
|
580
|
+
self.assertIn("vegan dinner recipes", ctx)
|
|
581
|
+
|
|
582
|
+
def test_high_threshold_drops_irrelevant_memories(self):
|
|
583
|
+
client = _FakeClient(
|
|
584
|
+
directives=[],
|
|
585
|
+
memories=[
|
|
586
|
+
_memory("deploy staging server"),
|
|
587
|
+
_memory("vegan dinner recipes"),
|
|
588
|
+
],
|
|
589
|
+
)
|
|
590
|
+
ctx, _ = _run_main_with(
|
|
591
|
+
client,
|
|
592
|
+
prompt="how do we deploy staging server",
|
|
593
|
+
config_extra={"recallMinOverlap": 0.5},
|
|
594
|
+
)
|
|
595
|
+
# Relevant survives, junk doesn't.
|
|
596
|
+
self.assertIsNotNone(ctx)
|
|
597
|
+
self.assertIn("deploy staging server", ctx)
|
|
598
|
+
self.assertNotIn("vegan", ctx)
|
|
599
|
+
|
|
600
|
+
def test_threshold_emits_no_block_when_all_dropped(self):
|
|
601
|
+
# All memories below threshold → no <hindsight_memories> block.
|
|
602
|
+
# Telemetry still records the dropped count.
|
|
603
|
+
client = _FakeClient(
|
|
604
|
+
directives=[],
|
|
605
|
+
memories=[
|
|
606
|
+
_memory("vegan dinner recipes"),
|
|
607
|
+
_memory("totally unrelated chatter"),
|
|
608
|
+
],
|
|
609
|
+
)
|
|
610
|
+
ctx, _ = _run_main_with(
|
|
611
|
+
client,
|
|
612
|
+
prompt="how do we deploy staging server",
|
|
613
|
+
config_extra={"recallMinOverlap": 0.5},
|
|
614
|
+
)
|
|
615
|
+
# No memories survived; with no directives either, we expect no
|
|
616
|
+
# additionalContext at all.
|
|
617
|
+
self.assertIsNone(ctx)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
if __name__ == "__main__":
|
|
621
|
+
unittest.main()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"hindsightApiUrl": "",
|
|
3
|
+
"bankId": "claude_code",
|
|
4
|
+
"bankMission": "You are a Claude Code AI assistant. Focus on technical discussions, decisions, and context relevant to the user's projects.",
|
|
5
|
+
"retainMission": "Extract technical decisions, architectural choices, user preferences, project context, and people/tool relationships. Ignore routine greetings and transient operational details.",
|
|
6
|
+
"autoRecall": true,
|
|
7
|
+
"autoRetain": true,
|
|
8
|
+
"retainMode": "full-session",
|
|
9
|
+
"recallBudget": "low",
|
|
10
|
+
"recallMaxTokens": 1024,
|
|
11
|
+
"recallMaxMemories": 12,
|
|
12
|
+
"recallTypes": ["world", "experience"],
|
|
13
|
+
"recallContextTurns": 1,
|
|
14
|
+
"recallMaxQueryChars": 800,
|
|
15
|
+
"recallRoles": ["user", "assistant"],
|
|
16
|
+
"recallPromptPreamble": "Relevant memories from past conversations (prioritize recent when conflicting). Only use memories that are directly useful to continue this conversation; ignore the rest:",
|
|
17
|
+
"retainRoles": ["user", "assistant"],
|
|
18
|
+
"retainEveryNTurns": 10,
|
|
19
|
+
"retainOverlapTurns": 2,
|
|
20
|
+
"retainToolCalls": true,
|
|
21
|
+
"retainTags": ["{session_id}"],
|
|
22
|
+
"retainMetadata": {},
|
|
23
|
+
"retainContext": "claude-code",
|
|
24
|
+
"hindsightApiToken": null,
|
|
25
|
+
"apiPort": 9077,
|
|
26
|
+
"daemonIdleTimeout": 0,
|
|
27
|
+
"embedVersion": "latest",
|
|
28
|
+
"embedPackagePath": null,
|
|
29
|
+
"bankIdPrefix": "",
|
|
30
|
+
"dynamicBankId": false,
|
|
31
|
+
"dynamicBankGranularity": ["agent", "project"],
|
|
32
|
+
"agentName": "",
|
|
33
|
+
"llmProvider": null,
|
|
34
|
+
"llmModel": null,
|
|
35
|
+
"llmApiKeyEnv": null,
|
|
36
|
+
"debug": false
|
|
37
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: hindsight:setup
|
|
3
|
+
description: Register hindsight-memory hooks into Claude Code settings. Run this once after installing the plugin.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Register the hindsight-memory hooks into `~/.claude/settings.json` by running the setup script:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
python3 "$CLAUDE_PLUGIN_ROOT/scripts/setup_hooks.py"
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
If `CLAUDE_PLUGIN_ROOT` is not set, find the path manually:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
ls ~/.claude/plugins/cache/hindsight/hindsight-memory/
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Then run:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
python3 ~/.claude/plugins/cache/hindsight/hindsight-memory/<version>/scripts/setup_hooks.py
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
After the script completes, restart Claude Code for the hooks to take effect. You should see `[Hindsight]` log lines on the next session start.
|