switchroom 0.12.27 → 0.12.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +4 -2
- package/package.json +2 -1
- package/telegram-plugin/dist/gateway/gateway.js +49 -5
- package/telegram-plugin/gateway/gateway.ts +5 -0
- package/telegram-plugin/stderr-timestamps.ts +106 -0
- package/telegram-plugin/tests/stderr-timestamps.test.ts +113 -0
- package/vendor/hindsight-memory/.claude-plugin/plugin.json +8 -0
- package/vendor/hindsight-memory/CHANGELOG.md +32 -0
- package/vendor/hindsight-memory/LICENSE +21 -0
- package/vendor/hindsight-memory/README.md +329 -0
- package/vendor/hindsight-memory/hooks/hooks.json +49 -0
- package/vendor/hindsight-memory/scripts/drain_pending.py +190 -0
- package/vendor/hindsight-memory/scripts/lib/__init__.py +0 -0
- package/vendor/hindsight-memory/scripts/lib/bank.py +122 -0
- package/vendor/hindsight-memory/scripts/lib/client.py +204 -0
- package/vendor/hindsight-memory/scripts/lib/config.py +180 -0
- package/vendor/hindsight-memory/scripts/lib/content.py +493 -0
- package/vendor/hindsight-memory/scripts/lib/daemon.py +334 -0
- package/vendor/hindsight-memory/scripts/lib/directives.py +119 -0
- package/vendor/hindsight-memory/scripts/lib/gateway_ipc.py +126 -0
- package/vendor/hindsight-memory/scripts/lib/llm.py +146 -0
- package/vendor/hindsight-memory/scripts/lib/pending.py +218 -0
- package/vendor/hindsight-memory/scripts/lib/state.py +196 -0
- package/vendor/hindsight-memory/scripts/recall.py +873 -0
- package/vendor/hindsight-memory/scripts/retain.py +286 -0
- package/vendor/hindsight-memory/scripts/session_end.py +122 -0
- package/vendor/hindsight-memory/scripts/session_start.py +76 -0
- package/vendor/hindsight-memory/scripts/setup_hooks.py +115 -0
- package/vendor/hindsight-memory/scripts/tests/__init__.py +0 -0
- package/vendor/hindsight-memory/scripts/tests/test_directives.py +211 -0
- package/vendor/hindsight-memory/scripts/tests/test_gateway_ipc.py +205 -0
- package/vendor/hindsight-memory/scripts/tests/test_recall_integration.py +621 -0
- package/vendor/hindsight-memory/settings.json +37 -0
- package/vendor/hindsight-memory/skills/setup.md +24 -0
- package/vendor/hindsight-memory/tests/conftest.py +94 -0
- package/vendor/hindsight-memory/tests/test_bank.py +142 -0
- package/vendor/hindsight-memory/tests/test_client.py +232 -0
- package/vendor/hindsight-memory/tests/test_config.py +128 -0
- package/vendor/hindsight-memory/tests/test_content.py +471 -0
- package/vendor/hindsight-memory/tests/test_drain_pending.py +192 -0
- package/vendor/hindsight-memory/tests/test_hooks.py +808 -0
- package/vendor/hindsight-memory/tests/test_manifest.py +14 -0
- package/vendor/hindsight-memory/tests/test_pending.py +152 -0
- package/vendor/hindsight-memory/tests/test_recall_exit_codes.py +325 -0
- package/vendor/hindsight-memory/tests/test_session_end_pending.py +205 -0
- package/vendor/hindsight-memory/tests/test_state.py +125 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""Tests for lib/content.py — pure content-processing functions."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from lib.content import (
|
|
6
|
+
_extract_text_content,
|
|
7
|
+
_is_channel_message_tool,
|
|
8
|
+
compose_recall_query,
|
|
9
|
+
format_memories,
|
|
10
|
+
prepare_retention_transcript,
|
|
11
|
+
slice_last_turns_by_user_boundary,
|
|
12
|
+
strip_channel_envelope,
|
|
13
|
+
strip_memory_tags,
|
|
14
|
+
truncate_recall_query,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# strip_channel_envelope
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestStripChannelEnvelope:
|
|
24
|
+
def test_strips_channel_xml(self):
|
|
25
|
+
raw = '<channel source="plugin:telegram:telegram" chat_id="123">Hello world</channel>'
|
|
26
|
+
assert strip_channel_envelope(raw) == "Hello world"
|
|
27
|
+
|
|
28
|
+
def test_passthrough_plain_text(self):
|
|
29
|
+
assert strip_channel_envelope("just plain text") == "just plain text"
|
|
30
|
+
|
|
31
|
+
def test_strips_multiline_channel(self):
|
|
32
|
+
raw = "<channel source='s'>\nline1\nline2\n</channel>"
|
|
33
|
+
assert strip_channel_envelope(raw) == "line1\nline2"
|
|
34
|
+
|
|
35
|
+
def test_passthrough_when_no_channel_tag(self):
|
|
36
|
+
raw = "<other>stuff</other>"
|
|
37
|
+
assert strip_channel_envelope(raw) == raw
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# strip_memory_tags
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class TestStripMemoryTags:
|
|
46
|
+
def test_strips_hindsight_memories_block(self):
|
|
47
|
+
raw = "before\n<hindsight_memories>secret</hindsight_memories>\nafter"
|
|
48
|
+
assert "hindsight_memories" not in strip_memory_tags(raw)
|
|
49
|
+
assert "before" in strip_memory_tags(raw)
|
|
50
|
+
assert "after" in strip_memory_tags(raw)
|
|
51
|
+
|
|
52
|
+
def test_strips_relevant_memories_block(self):
|
|
53
|
+
raw = "text <relevant_memories>old stuff</relevant_memories> text"
|
|
54
|
+
result = strip_memory_tags(raw)
|
|
55
|
+
assert "relevant_memories" not in result
|
|
56
|
+
assert "old stuff" not in result
|
|
57
|
+
|
|
58
|
+
def test_passthrough_clean_text(self):
|
|
59
|
+
raw = "no memory tags here"
|
|
60
|
+
assert strip_memory_tags(raw) == raw
|
|
61
|
+
|
|
62
|
+
def test_strips_multiline_block(self):
|
|
63
|
+
raw = "<hindsight_memories>\n- mem1\n- mem2\n</hindsight_memories>"
|
|
64
|
+
assert strip_memory_tags(raw).strip() == ""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# slice_last_turns_by_user_boundary
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _msgs(*pairs):
|
|
73
|
+
"""Build a message list from (role, content) pairs."""
|
|
74
|
+
return [{"role": r, "content": c} for r, c in pairs]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class TestSliceLastTurnsByUserBoundary:
|
|
78
|
+
def test_returns_all_when_fewer_turns_than_requested(self):
|
|
79
|
+
msgs = _msgs(("user", "hi"), ("assistant", "hello"))
|
|
80
|
+
assert slice_last_turns_by_user_boundary(msgs, 5) == msgs
|
|
81
|
+
|
|
82
|
+
def test_slices_to_last_one_turn(self):
|
|
83
|
+
msgs = _msgs(
|
|
84
|
+
("user", "first"),
|
|
85
|
+
("assistant", "a1"),
|
|
86
|
+
("user", "second"),
|
|
87
|
+
("assistant", "a2"),
|
|
88
|
+
)
|
|
89
|
+
result = slice_last_turns_by_user_boundary(msgs, 1)
|
|
90
|
+
assert result[0]["content"] == "second"
|
|
91
|
+
assert len(result) == 2
|
|
92
|
+
|
|
93
|
+
def test_slices_to_last_two_turns(self):
|
|
94
|
+
msgs = _msgs(
|
|
95
|
+
("user", "u1"),
|
|
96
|
+
("assistant", "a1"),
|
|
97
|
+
("user", "u2"),
|
|
98
|
+
("assistant", "a2"),
|
|
99
|
+
("user", "u3"),
|
|
100
|
+
("assistant", "a3"),
|
|
101
|
+
)
|
|
102
|
+
result = slice_last_turns_by_user_boundary(msgs, 2)
|
|
103
|
+
assert result[0]["content"] == "u2"
|
|
104
|
+
assert len(result) == 4
|
|
105
|
+
|
|
106
|
+
def test_empty_list_returns_empty(self):
|
|
107
|
+
assert slice_last_turns_by_user_boundary([], 3) == []
|
|
108
|
+
|
|
109
|
+
def test_zero_turns_returns_empty(self):
|
|
110
|
+
msgs = _msgs(("user", "hi"))
|
|
111
|
+
assert slice_last_turns_by_user_boundary(msgs, 0) == []
|
|
112
|
+
|
|
113
|
+
def test_non_list_returns_empty(self):
|
|
114
|
+
assert slice_last_turns_by_user_boundary(None, 1) == []
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
# compose_recall_query
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class TestComposeRecallQuery:
|
|
123
|
+
def test_single_turn_returns_latest_only(self):
|
|
124
|
+
msgs = _msgs(("user", "previous"), ("assistant", "reply"))
|
|
125
|
+
result = compose_recall_query("new query", msgs, recall_context_turns=1)
|
|
126
|
+
assert result == "new query"
|
|
127
|
+
|
|
128
|
+
def test_multi_turn_includes_prior_context(self):
|
|
129
|
+
msgs = _msgs(("user", "prior question"), ("assistant", "prior answer"))
|
|
130
|
+
result = compose_recall_query("current question", msgs, recall_context_turns=2)
|
|
131
|
+
assert "Prior context:" in result
|
|
132
|
+
assert "prior question" in result
|
|
133
|
+
assert "current question" in result
|
|
134
|
+
|
|
135
|
+
def test_skips_duplicate_of_latest_query(self):
|
|
136
|
+
msgs = _msgs(("user", "same question"), ("assistant", "answer"))
|
|
137
|
+
result = compose_recall_query("same question", msgs, recall_context_turns=2)
|
|
138
|
+
# duplicate user msg should be dropped from context
|
|
139
|
+
assert result.count("same question") == 1
|
|
140
|
+
|
|
141
|
+
def test_empty_messages_returns_latest(self):
|
|
142
|
+
result = compose_recall_query("query", [], recall_context_turns=3)
|
|
143
|
+
assert result == "query"
|
|
144
|
+
|
|
145
|
+
def test_strips_memory_tags_from_context(self):
|
|
146
|
+
msgs = _msgs(
|
|
147
|
+
("user", "<hindsight_memories>secret</hindsight_memories> actual question"),
|
|
148
|
+
)
|
|
149
|
+
result = compose_recall_query("now", msgs, recall_context_turns=2)
|
|
150
|
+
assert "hindsight_memories" not in result
|
|
151
|
+
assert "secret" not in result
|
|
152
|
+
|
|
153
|
+
def test_filters_by_recall_roles(self):
|
|
154
|
+
msgs = _msgs(("user", "user msg"), ("assistant", "assistant msg"))
|
|
155
|
+
result = compose_recall_query("query", msgs, recall_context_turns=2, recall_roles=["user"])
|
|
156
|
+
assert "user msg" in result
|
|
157
|
+
assert "assistant msg" not in result
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
# truncate_recall_query
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class TestTruncateRecallQuery:
|
|
166
|
+
def test_short_query_unchanged(self):
|
|
167
|
+
q = "short"
|
|
168
|
+
assert truncate_recall_query(q, q, max_chars=100) == q
|
|
169
|
+
|
|
170
|
+
def test_plain_query_truncated_to_max(self):
|
|
171
|
+
q = "x" * 50
|
|
172
|
+
result = truncate_recall_query(q, q, max_chars=20)
|
|
173
|
+
assert len(result) <= 20
|
|
174
|
+
|
|
175
|
+
def test_preserves_latest_when_context_dropped(self):
|
|
176
|
+
latest = "final question"
|
|
177
|
+
query = f"Prior context:\n\nuser: old stuff\nassistant: old reply\n\n{latest}"
|
|
178
|
+
result = truncate_recall_query(query, latest, max_chars=30)
|
|
179
|
+
assert latest in result
|
|
180
|
+
|
|
181
|
+
def test_drops_oldest_context_lines_first(self):
|
|
182
|
+
latest = "latest"
|
|
183
|
+
query = f"Prior context:\n\nuser: oldest\nassistant: old\nuser: newer\n\n{latest}"
|
|
184
|
+
# Allow only the newest context line + latest
|
|
185
|
+
result = truncate_recall_query(query, latest, max_chars=len(f"Prior context:\n\nnewer\n\n{latest}") + 5)
|
|
186
|
+
if "Prior context:" in result:
|
|
187
|
+
assert "oldest" not in result
|
|
188
|
+
|
|
189
|
+
def test_zero_max_returns_query_unchanged(self):
|
|
190
|
+
q = "anything"
|
|
191
|
+
assert truncate_recall_query(q, q, max_chars=0) == q
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# ---------------------------------------------------------------------------
|
|
195
|
+
# format_memories
|
|
196
|
+
# ---------------------------------------------------------------------------
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class TestFormatMemories:
|
|
200
|
+
def test_formats_single_memory(self):
|
|
201
|
+
mems = [{"text": "Paris is the capital", "type": "world", "mentioned_at": "2024-01-01"}]
|
|
202
|
+
result = format_memories(mems)
|
|
203
|
+
assert "Paris is the capital" in result
|
|
204
|
+
assert "[world]" in result
|
|
205
|
+
assert "(2024-01-01)" in result
|
|
206
|
+
|
|
207
|
+
def test_formats_multiple_memories_with_separator(self):
|
|
208
|
+
mems = [
|
|
209
|
+
{"text": "mem1", "type": "experience", "mentioned_at": "2024-01-01"},
|
|
210
|
+
{"text": "mem2", "type": "world", "mentioned_at": "2024-02-01"},
|
|
211
|
+
]
|
|
212
|
+
result = format_memories(mems)
|
|
213
|
+
assert "mem1" in result
|
|
214
|
+
assert "mem2" in result
|
|
215
|
+
|
|
216
|
+
def test_empty_list_returns_empty_string(self):
|
|
217
|
+
assert format_memories([]) == ""
|
|
218
|
+
|
|
219
|
+
def test_missing_optional_fields_graceful(self):
|
|
220
|
+
mems = [{"text": "bare memory"}]
|
|
221
|
+
result = format_memories(mems)
|
|
222
|
+
assert "bare memory" in result
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
# _is_channel_message_tool
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class TestIsChannelMessageTool:
|
|
231
|
+
def test_telegram_send_message(self):
|
|
232
|
+
block = {"type": "tool_use", "name": "mcp__telegram__sendMessage", "input": {"text": "hello"}}
|
|
233
|
+
assert _is_channel_message_tool(block) is True
|
|
234
|
+
|
|
235
|
+
def test_slack_reply_tool(self):
|
|
236
|
+
block = {"type": "tool_use", "name": "mcp__slack__reply", "input": {"body": "hi there"}}
|
|
237
|
+
assert _is_channel_message_tool(block) is True
|
|
238
|
+
|
|
239
|
+
def test_operational_recall_tool_excluded(self):
|
|
240
|
+
block = {"type": "tool_use", "name": "mcp__hindsight__recall", "input": {"query": "test"}}
|
|
241
|
+
assert _is_channel_message_tool(block) is False
|
|
242
|
+
|
|
243
|
+
def test_builtin_bash_tool_excluded(self):
|
|
244
|
+
block = {"type": "tool_use", "name": "Bash", "input": {"command": "ls"}}
|
|
245
|
+
assert _is_channel_message_tool(block) is False
|
|
246
|
+
|
|
247
|
+
def test_mcp_tool_without_text_field_excluded(self):
|
|
248
|
+
block = {"type": "tool_use", "name": "mcp__something__action", "input": {"id": 123}}
|
|
249
|
+
assert _is_channel_message_tool(block) is False
|
|
250
|
+
|
|
251
|
+
def test_mcp_tool_with_empty_text_excluded(self):
|
|
252
|
+
block = {"type": "tool_use", "name": "mcp__telegram__send", "input": {"text": " "}}
|
|
253
|
+
assert _is_channel_message_tool(block) is False
|
|
254
|
+
|
|
255
|
+
def test_mcp_create_action_excluded(self):
|
|
256
|
+
block = {"type": "tool_use", "name": "mcp__notion__create_page", "input": {"content": "hello"}}
|
|
257
|
+
assert _is_channel_message_tool(block) is False
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ---------------------------------------------------------------------------
|
|
261
|
+
# _extract_text_content
|
|
262
|
+
# ---------------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class TestExtractTextContent:
|
|
266
|
+
def test_plain_string_returned_as_is(self):
|
|
267
|
+
assert _extract_text_content("hello", role="user") == "hello"
|
|
268
|
+
|
|
269
|
+
def test_text_block_extracted(self):
|
|
270
|
+
content = [{"type": "text", "text": "response text"}]
|
|
271
|
+
assert _extract_text_content(content, role="assistant") == "response text"
|
|
272
|
+
|
|
273
|
+
def test_thinking_block_excluded(self):
|
|
274
|
+
content = [{"type": "thinking", "thinking": "private"}, {"type": "text", "text": "public"}]
|
|
275
|
+
result = _extract_text_content(content, role="assistant")
|
|
276
|
+
assert "private" not in result
|
|
277
|
+
assert "public" in result
|
|
278
|
+
|
|
279
|
+
def test_channel_tool_use_extracted_for_assistant(self):
|
|
280
|
+
content = [{"type": "tool_use", "name": "mcp__telegram__send", "input": {"text": "hello user"}}]
|
|
281
|
+
result = _extract_text_content(content, role="assistant")
|
|
282
|
+
assert "hello user" in result
|
|
283
|
+
|
|
284
|
+
def test_tool_use_not_extracted_for_user(self):
|
|
285
|
+
content = [{"type": "tool_use", "name": "mcp__telegram__send", "input": {"text": "hello user"}}]
|
|
286
|
+
result = _extract_text_content(content, role="user")
|
|
287
|
+
assert "hello user" not in result
|
|
288
|
+
|
|
289
|
+
def test_empty_list_returns_empty_string(self):
|
|
290
|
+
assert _extract_text_content([], role="assistant") == ""
|
|
291
|
+
|
|
292
|
+
def test_non_string_non_list_returns_empty(self):
|
|
293
|
+
assert _extract_text_content(None, role="user") == ""
|
|
294
|
+
assert _extract_text_content(42, role="user") == ""
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# ---------------------------------------------------------------------------
|
|
298
|
+
# prepare_retention_transcript
|
|
299
|
+
# ---------------------------------------------------------------------------
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class TestPrepareRetentionTranscript:
|
|
303
|
+
def test_formats_last_turn_by_default(self):
|
|
304
|
+
msgs = _msgs(("user", "old"), ("assistant", "old reply"), ("user", "new"), ("assistant", "new reply"))
|
|
305
|
+
transcript, count = prepare_retention_transcript(msgs, retain_full_window=False)
|
|
306
|
+
assert "new" in transcript
|
|
307
|
+
assert "new reply" in transcript
|
|
308
|
+
assert count == 2
|
|
309
|
+
|
|
310
|
+
def test_full_window_retains_all(self):
|
|
311
|
+
msgs = _msgs(("user", "msg1"), ("assistant", "reply1"), ("user", "msg2"), ("assistant", "reply2"))
|
|
312
|
+
transcript, count = prepare_retention_transcript(msgs, retain_full_window=True)
|
|
313
|
+
assert "msg1" in transcript
|
|
314
|
+
assert "msg2" in transcript
|
|
315
|
+
assert count == 4
|
|
316
|
+
|
|
317
|
+
def test_strips_memory_tags(self):
|
|
318
|
+
msgs = _msgs(("user", "<hindsight_memories>leaked</hindsight_memories> actual question"))
|
|
319
|
+
transcript, _ = prepare_retention_transcript(msgs, retain_full_window=True)
|
|
320
|
+
assert "leaked" not in transcript
|
|
321
|
+
assert "actual question" in transcript
|
|
322
|
+
|
|
323
|
+
def test_filters_by_retain_roles(self):
|
|
324
|
+
msgs = _msgs(("user", "user msg"), ("assistant", "assistant msg"))
|
|
325
|
+
transcript, _ = prepare_retention_transcript(msgs, retain_roles=["user"], retain_full_window=True)
|
|
326
|
+
assert "user msg" in transcript
|
|
327
|
+
assert "assistant msg" not in transcript
|
|
328
|
+
|
|
329
|
+
def test_empty_messages_returns_none(self):
|
|
330
|
+
result, count = prepare_retention_transcript([])
|
|
331
|
+
assert result is None
|
|
332
|
+
assert count == 0
|
|
333
|
+
|
|
334
|
+
def test_role_markers_present(self):
|
|
335
|
+
msgs = _msgs(("user", "hello"))
|
|
336
|
+
transcript, _ = prepare_retention_transcript(msgs, retain_full_window=True)
|
|
337
|
+
assert "[role: user]" in transcript
|
|
338
|
+
assert "[user:end]" in transcript
|
|
339
|
+
|
|
340
|
+
def test_no_user_message_returns_none(self):
|
|
341
|
+
msgs = [{"role": "assistant", "content": "only assistant"}]
|
|
342
|
+
result, _ = prepare_retention_transcript(msgs, retain_full_window=False)
|
|
343
|
+
assert result is None
|
|
344
|
+
|
|
345
|
+
def test_json_format_with_tool_calls(self):
|
|
346
|
+
"""When include_tool_calls=True, output should be JSON with tool_use blocks."""
|
|
347
|
+
import json
|
|
348
|
+
|
|
349
|
+
msgs = [
|
|
350
|
+
{"role": "user", "content": "edit the file"},
|
|
351
|
+
{
|
|
352
|
+
"role": "assistant",
|
|
353
|
+
"content": [
|
|
354
|
+
{"type": "text", "text": "I'll edit that file."},
|
|
355
|
+
{
|
|
356
|
+
"type": "tool_use",
|
|
357
|
+
"name": "Edit",
|
|
358
|
+
"input": {"file_path": "/tmp/foo.py", "old_string": "old", "new_string": "new"},
|
|
359
|
+
},
|
|
360
|
+
],
|
|
361
|
+
},
|
|
362
|
+
]
|
|
363
|
+
transcript, count = prepare_retention_transcript(
|
|
364
|
+
msgs, retain_full_window=True, include_tool_calls=True
|
|
365
|
+
)
|
|
366
|
+
assert transcript is not None
|
|
367
|
+
data = json.loads(transcript)
|
|
368
|
+
assert len(data) == 2
|
|
369
|
+
assert data[0]["role"] == "user"
|
|
370
|
+
assert data[1]["role"] == "assistant"
|
|
371
|
+
# Should have both text and tool_use blocks
|
|
372
|
+
block_types = [b["type"] for b in data[1]["content"]]
|
|
373
|
+
assert "text" in block_types
|
|
374
|
+
assert "tool_use" in block_types
|
|
375
|
+
# Tool input should be preserved
|
|
376
|
+
tool_block = next(b for b in data[1]["content"] if b["type"] == "tool_use")
|
|
377
|
+
assert tool_block["name"] == "Edit"
|
|
378
|
+
assert tool_block["input"]["file_path"] == "/tmp/foo.py"
|
|
379
|
+
|
|
380
|
+
def test_json_format_excludes_hindsight_mcp_tools(self):
|
|
381
|
+
"""Hindsight MCP tools should be excluded even in JSON mode."""
|
|
382
|
+
import json
|
|
383
|
+
|
|
384
|
+
msgs = [
|
|
385
|
+
{"role": "user", "content": "recall something"},
|
|
386
|
+
{
|
|
387
|
+
"role": "assistant",
|
|
388
|
+
"content": [
|
|
389
|
+
{"type": "text", "text": "Let me check."},
|
|
390
|
+
{"type": "tool_use", "name": "mcp__hindsight__recall", "input": {"query": "test"}},
|
|
391
|
+
],
|
|
392
|
+
},
|
|
393
|
+
]
|
|
394
|
+
transcript, _ = prepare_retention_transcript(
|
|
395
|
+
msgs, retain_full_window=True, include_tool_calls=True
|
|
396
|
+
)
|
|
397
|
+
data = json.loads(transcript)
|
|
398
|
+
assistant_blocks = data[1]["content"]
|
|
399
|
+
assert len(assistant_blocks) == 1
|
|
400
|
+
assert assistant_blocks[0]["type"] == "text"
|
|
401
|
+
|
|
402
|
+
def test_json_format_includes_tool_results(self):
|
|
403
|
+
"""Tool results should be included in JSON mode."""
|
|
404
|
+
import json
|
|
405
|
+
|
|
406
|
+
msgs = [
|
|
407
|
+
{"role": "user", "content": "run ls"},
|
|
408
|
+
{
|
|
409
|
+
"role": "assistant",
|
|
410
|
+
"content": [
|
|
411
|
+
{"type": "text", "text": "Running ls."},
|
|
412
|
+
{"type": "tool_use", "name": "Bash", "input": {"command": "ls"}},
|
|
413
|
+
],
|
|
414
|
+
},
|
|
415
|
+
{
|
|
416
|
+
"role": "assistant",
|
|
417
|
+
"content": [
|
|
418
|
+
{"type": "tool_result", "tool_use_id": "123", "content": "file1.py\nfile2.py"},
|
|
419
|
+
{"type": "text", "text": "Here are the files."},
|
|
420
|
+
],
|
|
421
|
+
},
|
|
422
|
+
]
|
|
423
|
+
transcript, _ = prepare_retention_transcript(
|
|
424
|
+
msgs, retain_full_window=True, include_tool_calls=True
|
|
425
|
+
)
|
|
426
|
+
data = json.loads(transcript)
|
|
427
|
+
result_msg = next(m for m in data if any(b.get("type") == "tool_result" for b in m["content"]))
|
|
428
|
+
result_block = next(b for b in result_msg["content"] if b["type"] == "tool_result")
|
|
429
|
+
assert "file1.py" in result_block["content"]
|
|
430
|
+
|
|
431
|
+
def test_json_format_handles_list_content_tool_results(self):
|
|
432
|
+
"""Tool results with list content (e.g. Agent subagent responses) should be extracted."""
|
|
433
|
+
import json
|
|
434
|
+
|
|
435
|
+
msgs = [
|
|
436
|
+
{"role": "user", "content": "analyze the code"},
|
|
437
|
+
{
|
|
438
|
+
"role": "assistant",
|
|
439
|
+
"content": [
|
|
440
|
+
{"type": "tool_use", "name": "Agent", "input": {"prompt": "check code"}},
|
|
441
|
+
],
|
|
442
|
+
},
|
|
443
|
+
{
|
|
444
|
+
"role": "user",
|
|
445
|
+
"content": [
|
|
446
|
+
{
|
|
447
|
+
"type": "tool_result",
|
|
448
|
+
"tool_use_id": "toolu_abc",
|
|
449
|
+
"content": [
|
|
450
|
+
{"type": "text", "text": "Found 3 issues in the codebase."},
|
|
451
|
+
{"type": "text", "text": "1. Missing error handling in auth module"},
|
|
452
|
+
],
|
|
453
|
+
},
|
|
454
|
+
],
|
|
455
|
+
},
|
|
456
|
+
]
|
|
457
|
+
transcript, _ = prepare_retention_transcript(
|
|
458
|
+
msgs, retain_full_window=True, include_tool_calls=True
|
|
459
|
+
)
|
|
460
|
+
data = json.loads(transcript)
|
|
461
|
+
result_msg = next(m for m in data if any(b.get("type") == "tool_result" for b in m["content"]))
|
|
462
|
+
result_block = next(b for b in result_msg["content"] if b["type"] == "tool_result")
|
|
463
|
+
assert "Found 3 issues" in result_block["content"]
|
|
464
|
+
assert "Missing error handling" in result_block["content"]
|
|
465
|
+
|
|
466
|
+
def test_without_tool_calls_uses_text_format(self):
|
|
467
|
+
"""Default (include_tool_calls=False) should use legacy text format."""
|
|
468
|
+
msgs = _msgs(("user", "hello"), ("assistant", "world"))
|
|
469
|
+
transcript, _ = prepare_retention_transcript(msgs, retain_full_window=True, include_tool_calls=False)
|
|
470
|
+
assert "[role: user]" in transcript
|
|
471
|
+
assert "[user:end]" in transcript
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Tests for drain_pending.drain() (#1071)."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
import time
|
|
8
|
+
import unittest
|
|
9
|
+
import urllib.error
|
|
10
|
+
from unittest.mock import patch
|
|
11
|
+
|
|
12
|
+
SCRIPTS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|
13
|
+
if SCRIPTS_DIR not in sys.path:
|
|
14
|
+
sys.path.insert(0, SCRIPTS_DIR)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FakeOk:
|
|
18
|
+
"""Minimal urlopen() context-manager stand-in for a 200 OK."""
|
|
19
|
+
|
|
20
|
+
status = 200
|
|
21
|
+
|
|
22
|
+
def __init__(self, body: bytes = b'{"ok": true}'):
|
|
23
|
+
self._body = body
|
|
24
|
+
|
|
25
|
+
def read(self):
|
|
26
|
+
return self._body
|
|
27
|
+
|
|
28
|
+
def __enter__(self):
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def __exit__(self, *_):
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _seed_entry(pending_dir: str, document_id: str = "doc-x", attempt: int = 1) -> str:
|
|
36
|
+
os.makedirs(pending_dir, mode=0o700, exist_ok=True)
|
|
37
|
+
ts_ms = int(time.time() * 1000)
|
|
38
|
+
name = f"{ts_ms}-{document_id}.json"
|
|
39
|
+
path = os.path.join(pending_dir, name)
|
|
40
|
+
payload = {
|
|
41
|
+
"schema": 1,
|
|
42
|
+
"api_url": "http://fake:9077",
|
|
43
|
+
"api_token": None,
|
|
44
|
+
"bank_id": "bank-1",
|
|
45
|
+
"content": "user: hi\nassistant: hi back",
|
|
46
|
+
"document_id": document_id,
|
|
47
|
+
"context": "claude-code",
|
|
48
|
+
"metadata": {},
|
|
49
|
+
"tags": None,
|
|
50
|
+
"failed_at": "2026-05-12T00:00:00Z",
|
|
51
|
+
"error_class": "URLError",
|
|
52
|
+
"error_message": "Connection refused",
|
|
53
|
+
"attempt_count": attempt,
|
|
54
|
+
}
|
|
55
|
+
with open(path, "w") as f:
|
|
56
|
+
json.dump(payload, f)
|
|
57
|
+
return path
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DrainPendingTest(unittest.TestCase):
|
|
61
|
+
def setUp(self):
|
|
62
|
+
self._tmp = tempfile.mkdtemp(prefix="hindsight-drain-test-")
|
|
63
|
+
self._pending = os.path.join(self._tmp, "pending-retains")
|
|
64
|
+
# Each test resets modules so module-level config caching can't
|
|
65
|
+
# bleed across.
|
|
66
|
+
for n in ("drain_pending", "lib.pending"):
|
|
67
|
+
sys.modules.pop(n, None)
|
|
68
|
+
self._env = patch.dict(
|
|
69
|
+
os.environ,
|
|
70
|
+
{
|
|
71
|
+
"HINDSIGHT_PENDING_DIR": self._pending,
|
|
72
|
+
"HINDSIGHT_DRAIN_TIMEOUT": "2",
|
|
73
|
+
"HINDSIGHT_DRAIN_BUDGET_S": "60",
|
|
74
|
+
},
|
|
75
|
+
clear=False,
|
|
76
|
+
)
|
|
77
|
+
self._env.start()
|
|
78
|
+
|
|
79
|
+
def tearDown(self):
|
|
80
|
+
self._env.stop()
|
|
81
|
+
import shutil
|
|
82
|
+
|
|
83
|
+
shutil.rmtree(self._tmp, ignore_errors=True)
|
|
84
|
+
for n in ("drain_pending", "lib.pending"):
|
|
85
|
+
sys.modules.pop(n, None)
|
|
86
|
+
|
|
87
|
+
def test_drain_empty_queue_is_noop(self):
|
|
88
|
+
import drain_pending
|
|
89
|
+
|
|
90
|
+
summary = drain_pending.drain({})
|
|
91
|
+
self.assertEqual(summary["drained"], 0)
|
|
92
|
+
self.assertEqual(summary["retried"], 0)
|
|
93
|
+
self.assertEqual(summary["dead"], 0)
|
|
94
|
+
self.assertFalse(summary["stalled"])
|
|
95
|
+
self.assertFalse(summary["budget_exceeded"])
|
|
96
|
+
|
|
97
|
+
def test_drain_success_deletes_entry(self):
|
|
98
|
+
path = _seed_entry(self._pending)
|
|
99
|
+
import drain_pending
|
|
100
|
+
|
|
101
|
+
with patch("urllib.request.urlopen", return_value=FakeOk()):
|
|
102
|
+
summary = drain_pending.drain({})
|
|
103
|
+
self.assertEqual(summary["drained"], 1)
|
|
104
|
+
self.assertEqual(summary["retried"], 0)
|
|
105
|
+
self.assertFalse(os.path.exists(path))
|
|
106
|
+
|
|
107
|
+
def test_drain_failure_bumps_attempt_count(self):
|
|
108
|
+
path = _seed_entry(self._pending, attempt=1)
|
|
109
|
+
import drain_pending
|
|
110
|
+
|
|
111
|
+
def boom(*a, **kw):
|
|
112
|
+
raise urllib.error.URLError("still down")
|
|
113
|
+
|
|
114
|
+
with patch("urllib.request.urlopen", side_effect=boom):
|
|
115
|
+
summary = drain_pending.drain({})
|
|
116
|
+
# Single entry → consecutive_failures hits 1, threshold is 3,
|
|
117
|
+
# so we don't stall — but we only had one entry to try.
|
|
118
|
+
self.assertEqual(summary["drained"], 0)
|
|
119
|
+
self.assertEqual(summary["retried"], 1)
|
|
120
|
+
self.assertTrue(os.path.exists(path))
|
|
121
|
+
with open(path) as f:
|
|
122
|
+
entry = json.load(f)
|
|
123
|
+
self.assertEqual(entry["attempt_count"], 2)
|
|
124
|
+
self.assertIn("last_attempt_at", entry)
|
|
125
|
+
|
|
126
|
+
def test_drain_max_attempts_marks_dead(self):
|
|
127
|
+
from lib.pending import MAX_ATTEMPTS
|
|
128
|
+
|
|
129
|
+
path = _seed_entry(self._pending, attempt=MAX_ATTEMPTS)
|
|
130
|
+
import drain_pending
|
|
131
|
+
|
|
132
|
+
def boom(*a, **kw):
|
|
133
|
+
raise urllib.error.URLError("still down")
|
|
134
|
+
|
|
135
|
+
with patch("urllib.request.urlopen", side_effect=boom):
|
|
136
|
+
summary = drain_pending.drain({})
|
|
137
|
+
self.assertEqual(summary["dead"], 1)
|
|
138
|
+
self.assertFalse(os.path.exists(path))
|
|
139
|
+
self.assertTrue(os.path.exists(path + ".dead"))
|
|
140
|
+
|
|
141
|
+
def test_drain_stall_guard_stops_after_threshold(self):
|
|
142
|
+
# Seed 10 entries; with a same-error-class stream, the stall
|
|
143
|
+
# guard should trip at STALL_THRESHOLD (3) and leave the rest.
|
|
144
|
+
from drain_pending import STALL_THRESHOLD
|
|
145
|
+
|
|
146
|
+
for i in range(10):
|
|
147
|
+
_seed_entry(self._pending, document_id=f"doc-{i:02d}")
|
|
148
|
+
|
|
149
|
+
import drain_pending
|
|
150
|
+
|
|
151
|
+
def boom(*a, **kw):
|
|
152
|
+
raise urllib.error.URLError("permanent")
|
|
153
|
+
|
|
154
|
+
with patch("urllib.request.urlopen", side_effect=boom):
|
|
155
|
+
summary = drain_pending.drain({})
|
|
156
|
+
|
|
157
|
+
self.assertTrue(summary["stalled"])
|
|
158
|
+
self.assertEqual(summary["retried"], STALL_THRESHOLD)
|
|
159
|
+
self.assertEqual(summary["drained"], 0)
|
|
160
|
+
# 10 - STALL_THRESHOLD entries should remain untouched (still
|
|
161
|
+
# at attempt_count == 1)
|
|
162
|
+
remaining = [n for n in os.listdir(self._pending) if n.endswith(".json")]
|
|
163
|
+
self.assertEqual(len(remaining), 10)
|
|
164
|
+
|
|
165
|
+
def test_drain_mixed_success_failure(self):
|
|
166
|
+
# Three entries: server returns alternating ok/fail/ok.
|
|
167
|
+
_seed_entry(self._pending, document_id="doc-a")
|
|
168
|
+
time.sleep(0.005)
|
|
169
|
+
_seed_entry(self._pending, document_id="doc-b")
|
|
170
|
+
time.sleep(0.005)
|
|
171
|
+
_seed_entry(self._pending, document_id="doc-c")
|
|
172
|
+
|
|
173
|
+
call_idx = {"n": 0}
|
|
174
|
+
|
|
175
|
+
def maybe_ok(*a, **kw):
|
|
176
|
+
i = call_idx["n"]
|
|
177
|
+
call_idx["n"] += 1
|
|
178
|
+
if i == 1:
|
|
179
|
+
raise urllib.error.URLError("middle fail")
|
|
180
|
+
return FakeOk()
|
|
181
|
+
|
|
182
|
+
import drain_pending
|
|
183
|
+
|
|
184
|
+
with patch("urllib.request.urlopen", side_effect=maybe_ok):
|
|
185
|
+
summary = drain_pending.drain({})
|
|
186
|
+
|
|
187
|
+
self.assertEqual(summary["drained"], 2)
|
|
188
|
+
self.assertEqual(summary["retried"], 1)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
unittest.main()
|