@ricky-stevens/context-guardian 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +29 -0
- package/.claude-plugin/plugin.json +63 -0
- package/.github/workflows/ci.yml +66 -0
- package/CLAUDE.md +132 -0
- package/LICENSE +21 -0
- package/README.md +362 -0
- package/biome.json +34 -0
- package/bun.lock +31 -0
- package/hooks/precompact.mjs +73 -0
- package/hooks/session-start.mjs +133 -0
- package/hooks/stop.mjs +172 -0
- package/hooks/submit.mjs +133 -0
- package/lib/checkpoint.mjs +258 -0
- package/lib/compact-cli.mjs +124 -0
- package/lib/compact-output.mjs +350 -0
- package/lib/config.mjs +40 -0
- package/lib/content.mjs +33 -0
- package/lib/diagnostics.mjs +221 -0
- package/lib/estimate.mjs +254 -0
- package/lib/extract-helpers.mjs +869 -0
- package/lib/handoff.mjs +329 -0
- package/lib/logger.mjs +34 -0
- package/lib/mcp-tools.mjs +200 -0
- package/lib/paths.mjs +90 -0
- package/lib/stats.mjs +81 -0
- package/lib/statusline.mjs +123 -0
- package/lib/synthetic-session.mjs +273 -0
- package/lib/tokens.mjs +170 -0
- package/lib/tool-summary.mjs +399 -0
- package/lib/transcript.mjs +939 -0
- package/lib/trim.mjs +158 -0
- package/package.json +22 -0
- package/skills/compact/SKILL.md +20 -0
- package/skills/config/SKILL.md +70 -0
- package/skills/handoff/SKILL.md +26 -0
- package/skills/prune/SKILL.md +20 -0
- package/skills/stats/SKILL.md +100 -0
- package/sonar-project.properties +12 -0
- package/test/checkpoint.test.mjs +171 -0
- package/test/compact-cli.test.mjs +230 -0
- package/test/compact-output.test.mjs +284 -0
- package/test/compaction-e2e.test.mjs +809 -0
- package/test/content.test.mjs +86 -0
- package/test/diagnostics.test.mjs +188 -0
- package/test/edge-cases.test.mjs +543 -0
- package/test/estimate.test.mjs +262 -0
- package/test/extract-helpers-coverage.test.mjs +333 -0
- package/test/extract-helpers.test.mjs +234 -0
- package/test/handoff.test.mjs +738 -0
- package/test/integration.test.mjs +582 -0
- package/test/logger.test.mjs +70 -0
- package/test/manual-compaction-test.md +426 -0
- package/test/mcp-tools.test.mjs +443 -0
- package/test/paths.test.mjs +250 -0
- package/test/quick-compaction-test.md +191 -0
- package/test/stats.test.mjs +88 -0
- package/test/statusline.test.mjs +222 -0
- package/test/submit.test.mjs +232 -0
- package/test/synthetic-session.test.mjs +600 -0
- package/test/tokens.test.mjs +293 -0
- package/test/tool-summary.test.mjs +771 -0
- package/test/transcript-coverage.test.mjs +369 -0
- package/test/transcript.test.mjs +596 -0
- package/test/trim.test.mjs +356 -0
|
@@ -0,0 +1,809 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end compaction verification test.
|
|
3
|
+
*
|
|
4
|
+
* Creates a realistic synthetic transcript containing specific trackable facts,
|
|
5
|
+
* runs extractConversation and extractRecent on it, then verifies that every
|
|
6
|
+
* important fact survives the extraction pipeline.
|
|
7
|
+
*
|
|
8
|
+
* This is the ultimate regression test for context preservation: if a fact
|
|
9
|
+
* is listed in EXPECTED_FACTS and the extraction drops it, the test fails.
|
|
10
|
+
*
|
|
11
|
+
* Optionally, set CLAUDE_API_KEY env var to also verify that Claude can
|
|
12
|
+
* comprehend and answer questions from the checkpoint (LLM verification).
|
|
13
|
+
*
|
|
14
|
+
* @module compaction-e2e-test
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import assert from "node:assert/strict";
|
|
18
|
+
import fs from "node:fs";
|
|
19
|
+
import os from "node:os";
|
|
20
|
+
import path from "node:path";
|
|
21
|
+
import { afterEach, beforeEach, describe, it } from "node:test";
|
|
22
|
+
import { extractConversation, extractRecent } from "../lib/transcript.mjs";
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Test fixtures — a realistic multi-turn coding session with trackable facts
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Each fact has a unique identifier, the content that should appear in the
|
|
30
|
+
* checkpoint, and the type of content it tests.
|
|
31
|
+
*/
|
|
32
|
+
const EXPECTED_FACTS = [
|
|
33
|
+
// User messages and decisions
|
|
34
|
+
{
|
|
35
|
+
id: "user-goal",
|
|
36
|
+
text: "PaymentService crashes on negative amounts",
|
|
37
|
+
type: "user-message",
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
id: "user-decision",
|
|
41
|
+
text: "use the validation approach",
|
|
42
|
+
type: "user-decision",
|
|
43
|
+
},
|
|
44
|
+
{ id: "user-rejection", text: "no", type: "user-rejection" },
|
|
45
|
+
{
|
|
46
|
+
id: "user-context",
|
|
47
|
+
text: "This is for the Acme Corp billing module",
|
|
48
|
+
type: "user-context",
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
// Assistant reasoning
|
|
52
|
+
{
|
|
53
|
+
id: "assistant-analysis",
|
|
54
|
+
text: "root cause is missing input validation in processPayment",
|
|
55
|
+
type: "assistant-text",
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
id: "assistant-plan",
|
|
59
|
+
text: "add a guard clause and update the test suite",
|
|
60
|
+
type: "assistant-text",
|
|
61
|
+
},
|
|
62
|
+
|
|
63
|
+
// Edit diffs — the actual work product
|
|
64
|
+
{ id: "edit-file-path", text: "src/payment-service.js", type: "edit-path" },
|
|
65
|
+
{ id: "edit-old-code", text: "processPayment(amount) {", type: "edit-old" },
|
|
66
|
+
{
|
|
67
|
+
id: "edit-new-code",
|
|
68
|
+
text: "if (amount < 0) throw new ValidationError",
|
|
69
|
+
type: "edit-new",
|
|
70
|
+
},
|
|
71
|
+
{ id: "edit-test-path", text: "test/payment.test.js", type: "edit-path" },
|
|
72
|
+
{ id: "edit-test-code", text: "rejects negative amounts", type: "edit-new" },
|
|
73
|
+
|
|
74
|
+
// Bash command output — test results
|
|
75
|
+
{ id: "bash-command", text: "bun test src/payment", type: "bash-cmd" },
|
|
76
|
+
{ id: "bash-output-pass", text: "14 passed", type: "bash-output" },
|
|
77
|
+
{
|
|
78
|
+
id: "bash-output-fail-first",
|
|
79
|
+
text: "FAIL test/payment.test.js",
|
|
80
|
+
type: "bash-output",
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: "bash-error",
|
|
84
|
+
text: "ValidationError is not defined",
|
|
85
|
+
type: "bash-error",
|
|
86
|
+
},
|
|
87
|
+
|
|
88
|
+
// AskUserQuestion answer (user decision via tool)
|
|
89
|
+
{
|
|
90
|
+
id: "ask-answer",
|
|
91
|
+
text: "use ValidationError from the shared errors module",
|
|
92
|
+
type: "ask-answer",
|
|
93
|
+
},
|
|
94
|
+
|
|
95
|
+
// File tracking
|
|
96
|
+
{
|
|
97
|
+
id: "file-modified-1",
|
|
98
|
+
text: "src/payment-service.js",
|
|
99
|
+
type: "files-modified",
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
id: "file-modified-2",
|
|
103
|
+
text: "test/payment.test.js",
|
|
104
|
+
type: "files-modified",
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
// WebSearch results (ephemeral, must be kept)
|
|
108
|
+
{
|
|
109
|
+
id: "websearch-result",
|
|
110
|
+
text: "ValidationError best practices",
|
|
111
|
+
type: "websearch",
|
|
112
|
+
},
|
|
113
|
+
|
|
114
|
+
// Sequential thinking (reasoning chain)
|
|
115
|
+
{
|
|
116
|
+
id: "thinking-conclusion",
|
|
117
|
+
text: "the guard clause pattern is correct",
|
|
118
|
+
type: "thinking",
|
|
119
|
+
},
|
|
120
|
+
];
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Facts that should NOT appear in the checkpoint (noise that was removed).
|
|
124
|
+
*/
|
|
125
|
+
const REMOVED_CONTENT = [
|
|
126
|
+
{
|
|
127
|
+
id: "file-read-content",
|
|
128
|
+
text: "// Full contents of payment-service.js line by line",
|
|
129
|
+
type: "read-result",
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
id: "grep-results",
|
|
133
|
+
text: "src/billing.js:42: amount",
|
|
134
|
+
type: "grep-result",
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
id: "thinking-block",
|
|
138
|
+
text: "internal_chain_of_thought_marker",
|
|
139
|
+
type: "thinking-block",
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
id: "confirmation-yes",
|
|
143
|
+
text: "**User:** yes",
|
|
144
|
+
type: "skipped-confirmation",
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
id: "system-message",
|
|
148
|
+
text: "system_prompt_injection_content",
|
|
149
|
+
type: "system",
|
|
150
|
+
},
|
|
151
|
+
];
|
|
152
|
+
|
|
153
|
+
// ---------------------------------------------------------------------------
|
|
154
|
+
// Synthetic transcript builder
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
function buildTranscript() {
|
|
158
|
+
const lines = [];
|
|
159
|
+
const w = (obj) => lines.push(JSON.stringify(obj));
|
|
160
|
+
|
|
161
|
+
// Turn 1: User describes the bug
|
|
162
|
+
w({
|
|
163
|
+
type: "user",
|
|
164
|
+
message: {
|
|
165
|
+
role: "user",
|
|
166
|
+
content:
|
|
167
|
+
"PaymentService crashes on negative amounts. This is for the Acme Corp billing module. Can you investigate and fix it?",
|
|
168
|
+
},
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// Turn 2: Assistant reads the file
|
|
172
|
+
w({
|
|
173
|
+
type: "assistant",
|
|
174
|
+
message: {
|
|
175
|
+
role: "assistant",
|
|
176
|
+
content: [
|
|
177
|
+
{
|
|
178
|
+
type: "thinking",
|
|
179
|
+
thinking:
|
|
180
|
+
"internal_chain_of_thought_marker — should not appear in checkpoint",
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
type: "text",
|
|
184
|
+
text: "I'll investigate the PaymentService. Let me read the source file.",
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
type: "tool_use",
|
|
188
|
+
id: "t1",
|
|
189
|
+
name: "Read",
|
|
190
|
+
input: { file_path: "src/payment-service.js" },
|
|
191
|
+
},
|
|
192
|
+
],
|
|
193
|
+
},
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Turn 3: Read result (should be REMOVED — re-obtainable)
|
|
197
|
+
w({
|
|
198
|
+
type: "user",
|
|
199
|
+
message: {
|
|
200
|
+
role: "user",
|
|
201
|
+
content: [
|
|
202
|
+
{
|
|
203
|
+
type: "tool_result",
|
|
204
|
+
tool_use_id: "t1",
|
|
205
|
+
content:
|
|
206
|
+
"// Full contents of payment-service.js line by line\nclass PaymentService {\n processPayment(amount) {\n const fee = amount * 0.03;\n return { total: amount + fee };\n }\n}",
|
|
207
|
+
},
|
|
208
|
+
],
|
|
209
|
+
},
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
// Turn 4: Assistant analyses and searches
|
|
213
|
+
w({
|
|
214
|
+
type: "assistant",
|
|
215
|
+
message: {
|
|
216
|
+
role: "assistant",
|
|
217
|
+
content: [
|
|
218
|
+
{
|
|
219
|
+
type: "text",
|
|
220
|
+
text: "I can see the root cause is missing input validation in processPayment. Negative amounts flow through without any check.",
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
type: "tool_use",
|
|
224
|
+
id: "t2",
|
|
225
|
+
name: "Grep",
|
|
226
|
+
input: { pattern: "amount", path: "src/" },
|
|
227
|
+
},
|
|
228
|
+
],
|
|
229
|
+
},
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
// Turn 5: Grep result (should be REMOVED)
|
|
233
|
+
w({
|
|
234
|
+
type: "user",
|
|
235
|
+
message: {
|
|
236
|
+
role: "user",
|
|
237
|
+
content: [
|
|
238
|
+
{
|
|
239
|
+
type: "tool_result",
|
|
240
|
+
tool_use_id: "t2",
|
|
241
|
+
content:
|
|
242
|
+
"src/billing.js:42: amount\nsrc/payment-service.js:3: processPayment(amount)\nsrc/refund.js:10: refundAmount",
|
|
243
|
+
},
|
|
244
|
+
],
|
|
245
|
+
},
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
// Turn 6: Assistant proposes two approaches, asks user
|
|
249
|
+
w({
|
|
250
|
+
type: "assistant",
|
|
251
|
+
message: {
|
|
252
|
+
role: "assistant",
|
|
253
|
+
content: [
|
|
254
|
+
{
|
|
255
|
+
type: "text",
|
|
256
|
+
text: "I can fix this two ways: A) return an error object, or B) throw a ValidationError. Which do you prefer?",
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
type: "tool_use",
|
|
260
|
+
id: "t3",
|
|
261
|
+
name: "AskUserQuestion",
|
|
262
|
+
input: {
|
|
263
|
+
question:
|
|
264
|
+
"Should I use approach A (error object) or B (ValidationError)?",
|
|
265
|
+
},
|
|
266
|
+
},
|
|
267
|
+
],
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
// Turn 7: User answers via tool_result (MUST be kept — decision)
|
|
272
|
+
w({
|
|
273
|
+
type: "user",
|
|
274
|
+
message: {
|
|
275
|
+
role: "user",
|
|
276
|
+
content: [
|
|
277
|
+
{
|
|
278
|
+
type: "tool_result",
|
|
279
|
+
tool_use_id: "t3",
|
|
280
|
+
content: "use ValidationError from the shared errors module",
|
|
281
|
+
},
|
|
282
|
+
],
|
|
283
|
+
},
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
// Turn 8: User also sends a text message confirming
|
|
287
|
+
w({
|
|
288
|
+
type: "user",
|
|
289
|
+
message: { role: "user", content: "yes" },
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
// Turn 9: User provides more context (should NOT be skipped — not just "yes")
|
|
293
|
+
w({
|
|
294
|
+
type: "user",
|
|
295
|
+
message: {
|
|
296
|
+
role: "user",
|
|
297
|
+
content:
|
|
298
|
+
"Let's use the validation approach, and make sure to add a guard clause and update the test suite",
|
|
299
|
+
},
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
// Turn 10: Assistant does a web search for best practices
|
|
303
|
+
w({
|
|
304
|
+
type: "assistant",
|
|
305
|
+
message: {
|
|
306
|
+
role: "assistant",
|
|
307
|
+
content: [
|
|
308
|
+
{
|
|
309
|
+
type: "text",
|
|
310
|
+
text: "Good choice. Let me check current best practices for validation errors.",
|
|
311
|
+
},
|
|
312
|
+
{
|
|
313
|
+
type: "tool_use",
|
|
314
|
+
id: "t4",
|
|
315
|
+
name: "WebSearch",
|
|
316
|
+
input: { query: "ValidationError best practices javascript" },
|
|
317
|
+
},
|
|
318
|
+
],
|
|
319
|
+
},
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
// Turn 11: WebSearch result (ephemeral — MUST be kept)
|
|
323
|
+
w({
|
|
324
|
+
type: "user",
|
|
325
|
+
message: {
|
|
326
|
+
role: "user",
|
|
327
|
+
content: [
|
|
328
|
+
{
|
|
329
|
+
type: "tool_result",
|
|
330
|
+
tool_use_id: "t4",
|
|
331
|
+
content:
|
|
332
|
+
"ValidationError best practices:\n1. Extend Error class\n2. Include error code\n3. Use descriptive messages",
|
|
333
|
+
},
|
|
334
|
+
],
|
|
335
|
+
},
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
// Turn 12: Sequential thinking
|
|
339
|
+
w({
|
|
340
|
+
type: "assistant",
|
|
341
|
+
message: {
|
|
342
|
+
role: "assistant",
|
|
343
|
+
content: [
|
|
344
|
+
{
|
|
345
|
+
type: "tool_use",
|
|
346
|
+
id: "t5",
|
|
347
|
+
name: "mcp__sequential-thinking__sequentialthinking",
|
|
348
|
+
input: {
|
|
349
|
+
thought:
|
|
350
|
+
"Considering the options: the guard clause pattern is correct for this use case. We throw early, fail fast.",
|
|
351
|
+
thoughtNumber: 1,
|
|
352
|
+
totalThoughts: 1,
|
|
353
|
+
nextThoughtNeeded: false,
|
|
354
|
+
},
|
|
355
|
+
},
|
|
356
|
+
],
|
|
357
|
+
},
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
// Turn 13: Thinking result (REMOVED — redundant with input)
|
|
361
|
+
w({
|
|
362
|
+
type: "user",
|
|
363
|
+
message: {
|
|
364
|
+
role: "user",
|
|
365
|
+
content: [
|
|
366
|
+
{
|
|
367
|
+
type: "tool_result",
|
|
368
|
+
tool_use_id: "t5",
|
|
369
|
+
content: '{"thoughtNumber":1}',
|
|
370
|
+
},
|
|
371
|
+
],
|
|
372
|
+
},
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
// Turn 14: Assistant makes the edit
|
|
376
|
+
w({
|
|
377
|
+
type: "assistant",
|
|
378
|
+
message: {
|
|
379
|
+
role: "assistant",
|
|
380
|
+
content: [
|
|
381
|
+
{
|
|
382
|
+
type: "text",
|
|
383
|
+
text: "I'll add a guard clause and update the test suite. Let me plan to add a guard clause and update the test suite.",
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
type: "tool_use",
|
|
387
|
+
id: "t6",
|
|
388
|
+
name: "Edit",
|
|
389
|
+
input: {
|
|
390
|
+
file_path: "src/payment-service.js",
|
|
391
|
+
old_string:
|
|
392
|
+
"processPayment(amount) {\n const fee = amount * 0.03;",
|
|
393
|
+
new_string:
|
|
394
|
+
"processPayment(amount) {\n if (amount < 0) throw new ValidationError('Amount cannot be negative');\n const fee = amount * 0.03;",
|
|
395
|
+
},
|
|
396
|
+
},
|
|
397
|
+
],
|
|
398
|
+
},
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
// Turn 15: Edit result (REMOVED — just success)
|
|
402
|
+
w({
|
|
403
|
+
type: "user",
|
|
404
|
+
message: {
|
|
405
|
+
role: "user",
|
|
406
|
+
content: [
|
|
407
|
+
{
|
|
408
|
+
type: "tool_result",
|
|
409
|
+
tool_use_id: "t6",
|
|
410
|
+
content: "File edited successfully",
|
|
411
|
+
},
|
|
412
|
+
],
|
|
413
|
+
},
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
// Turn 16: Assistant adds test
|
|
417
|
+
w({
|
|
418
|
+
type: "assistant",
|
|
419
|
+
message: {
|
|
420
|
+
role: "assistant",
|
|
421
|
+
content: [
|
|
422
|
+
{
|
|
423
|
+
type: "tool_use",
|
|
424
|
+
id: "t7",
|
|
425
|
+
name: "Edit",
|
|
426
|
+
input: {
|
|
427
|
+
file_path: "test/payment.test.js",
|
|
428
|
+
old_string: "",
|
|
429
|
+
new_string:
|
|
430
|
+
"test('rejects negative amounts', () => {\n expect(() => service.processPayment(-100)).toThrow(ValidationError);\n});",
|
|
431
|
+
},
|
|
432
|
+
},
|
|
433
|
+
],
|
|
434
|
+
},
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
// Turn 17: Edit result
|
|
438
|
+
w({
|
|
439
|
+
type: "user",
|
|
440
|
+
message: {
|
|
441
|
+
role: "user",
|
|
442
|
+
content: [
|
|
443
|
+
{
|
|
444
|
+
type: "tool_result",
|
|
445
|
+
tool_use_id: "t7",
|
|
446
|
+
content: "File edited successfully",
|
|
447
|
+
},
|
|
448
|
+
],
|
|
449
|
+
},
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
// Turn 18: Assistant runs tests — FIRST RUN FAILS
|
|
453
|
+
w({
|
|
454
|
+
type: "assistant",
|
|
455
|
+
message: {
|
|
456
|
+
role: "assistant",
|
|
457
|
+
content: [
|
|
458
|
+
{ type: "text", text: "Let me run the tests to verify." },
|
|
459
|
+
{
|
|
460
|
+
type: "tool_use",
|
|
461
|
+
id: "t8",
|
|
462
|
+
name: "Bash",
|
|
463
|
+
input: { command: "bun test src/payment" },
|
|
464
|
+
},
|
|
465
|
+
],
|
|
466
|
+
},
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
// Turn 19: Test failure output (MUST be kept — critical error info)
|
|
470
|
+
w({
|
|
471
|
+
type: "user",
|
|
472
|
+
message: {
|
|
473
|
+
role: "user",
|
|
474
|
+
content: [
|
|
475
|
+
{
|
|
476
|
+
type: "tool_result",
|
|
477
|
+
tool_use_id: "t8",
|
|
478
|
+
content:
|
|
479
|
+
"FAIL test/payment.test.js\n ✗ rejects negative amounts\n ReferenceError: ValidationError is not defined\n\n13 passed, 1 failed",
|
|
480
|
+
},
|
|
481
|
+
],
|
|
482
|
+
},
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
// Turn 20: User says "no" to a question (rejection — MUST be kept)
|
|
486
|
+
w({
|
|
487
|
+
type: "assistant",
|
|
488
|
+
message: {
|
|
489
|
+
role: "assistant",
|
|
490
|
+
content: [
|
|
491
|
+
{
|
|
492
|
+
type: "text",
|
|
493
|
+
text: "The test failed because ValidationError isn't imported. Should I add it to the existing imports block or create a new import statement?",
|
|
494
|
+
},
|
|
495
|
+
],
|
|
496
|
+
},
|
|
497
|
+
});
|
|
498
|
+
w({
|
|
499
|
+
type: "user",
|
|
500
|
+
message: { role: "user", content: "no" },
|
|
501
|
+
});
|
|
502
|
+
|
|
503
|
+
// Turn 21: User provides direction
|
|
504
|
+
w({
|
|
505
|
+
type: "user",
|
|
506
|
+
message: {
|
|
507
|
+
role: "user",
|
|
508
|
+
content: "Add it to the existing imports at the top of the file",
|
|
509
|
+
},
|
|
510
|
+
});
|
|
511
|
+
|
|
512
|
+
// Turn 22: Assistant fixes and reruns
|
|
513
|
+
w({
|
|
514
|
+
type: "assistant",
|
|
515
|
+
message: {
|
|
516
|
+
role: "assistant",
|
|
517
|
+
content: [
|
|
518
|
+
{ type: "text", text: "I'll add the import and rerun the tests." },
|
|
519
|
+
{
|
|
520
|
+
type: "tool_use",
|
|
521
|
+
id: "t9",
|
|
522
|
+
name: "Edit",
|
|
523
|
+
input: {
|
|
524
|
+
file_path: "src/payment-service.js",
|
|
525
|
+
old_string: "class PaymentService {",
|
|
526
|
+
new_string:
|
|
527
|
+
"import { ValidationError } from './errors.js';\n\nclass PaymentService {",
|
|
528
|
+
},
|
|
529
|
+
},
|
|
530
|
+
{
|
|
531
|
+
type: "tool_use",
|
|
532
|
+
id: "t10",
|
|
533
|
+
name: "Bash",
|
|
534
|
+
input: { command: "bun test src/payment" },
|
|
535
|
+
},
|
|
536
|
+
],
|
|
537
|
+
},
|
|
538
|
+
});
|
|
539
|
+
|
|
540
|
+
// Turn 23: Edit + test results
|
|
541
|
+
w({
|
|
542
|
+
type: "user",
|
|
543
|
+
message: {
|
|
544
|
+
role: "user",
|
|
545
|
+
content: [
|
|
546
|
+
{
|
|
547
|
+
type: "tool_result",
|
|
548
|
+
tool_use_id: "t9",
|
|
549
|
+
content: "File edited successfully",
|
|
550
|
+
},
|
|
551
|
+
{
|
|
552
|
+
type: "tool_result",
|
|
553
|
+
tool_use_id: "t10",
|
|
554
|
+
content: "14 passed, 0 failed",
|
|
555
|
+
},
|
|
556
|
+
],
|
|
557
|
+
},
|
|
558
|
+
});
|
|
559
|
+
|
|
560
|
+
// Turn 24: System message (should be REMOVED)
|
|
561
|
+
w({
|
|
562
|
+
type: "system",
|
|
563
|
+
message: { content: "system_prompt_injection_content" },
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
// Turn 25: Final assistant summary
|
|
567
|
+
w({
|
|
568
|
+
type: "assistant",
|
|
569
|
+
message: {
|
|
570
|
+
role: "assistant",
|
|
571
|
+
content: [
|
|
572
|
+
{
|
|
573
|
+
type: "text",
|
|
574
|
+
text: "All 14 passed. The fix adds input validation to processPayment — negative amounts now throw a ValidationError with a descriptive message.",
|
|
575
|
+
},
|
|
576
|
+
],
|
|
577
|
+
},
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
// Turn 26: User shares an image (placeholder should appear)
|
|
581
|
+
w({
|
|
582
|
+
type: "user",
|
|
583
|
+
message: {
|
|
584
|
+
role: "user",
|
|
585
|
+
content: [
|
|
586
|
+
{
|
|
587
|
+
type: "image",
|
|
588
|
+
source: { type: "base64", media_type: "image/png", data: "abc123" },
|
|
589
|
+
},
|
|
590
|
+
{
|
|
591
|
+
type: "text",
|
|
592
|
+
text: "Here's a screenshot of the error in production",
|
|
593
|
+
},
|
|
594
|
+
],
|
|
595
|
+
},
|
|
596
|
+
});
|
|
597
|
+
|
|
598
|
+
return lines.join("\n");
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// ---------------------------------------------------------------------------
|
|
602
|
+
// Tests
|
|
603
|
+
// ---------------------------------------------------------------------------
|
|
604
|
+
|
|
605
|
+
describe("end-to-end compaction verification", () => {
|
|
606
|
+
let tmpDir;
|
|
607
|
+
let transcriptPath;
|
|
608
|
+
|
|
609
|
+
beforeEach(() => {
|
|
610
|
+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "cg-e2e-"));
|
|
611
|
+
transcriptPath = path.join(tmpDir, "transcript.jsonl");
|
|
612
|
+
fs.writeFileSync(transcriptPath, buildTranscript());
|
|
613
|
+
});
|
|
614
|
+
|
|
615
|
+
afterEach(() => {
|
|
616
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
617
|
+
});
|
|
618
|
+
|
|
619
|
+
describe("extractConversation — fact preservation", () => {
|
|
620
|
+
it("preserves all expected facts in the checkpoint", () => {
|
|
621
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
622
|
+
|
|
623
|
+
for (const fact of EXPECTED_FACTS) {
|
|
624
|
+
assert.ok(
|
|
625
|
+
checkpoint.includes(fact.text),
|
|
626
|
+
`FACT LOST [${fact.id}] (${fact.type}): expected checkpoint to contain "${fact.text}"`,
|
|
627
|
+
);
|
|
628
|
+
}
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
it("removes noise content that should not survive extraction", () => {
|
|
632
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
633
|
+
|
|
634
|
+
for (const noise of REMOVED_CONTENT) {
|
|
635
|
+
assert.ok(
|
|
636
|
+
!checkpoint.includes(noise.text),
|
|
637
|
+
`NOISE KEPT [${noise.id}] (${noise.type}): checkpoint should NOT contain "${noise.text}"`,
|
|
638
|
+
);
|
|
639
|
+
}
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
it("has a state header at the top", () => {
|
|
643
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
644
|
+
assert.ok(checkpoint.startsWith("## Session State"));
|
|
645
|
+
assert.ok(checkpoint.includes("Files modified:"));
|
|
646
|
+
assert.ok(checkpoint.includes("src/payment-service.js"));
|
|
647
|
+
});
|
|
648
|
+
|
|
649
|
+
it("preserves user rejection (no) as a meaningful decision", () => {
|
|
650
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
651
|
+
assert.ok(
|
|
652
|
+
checkpoint.includes("User: no") || checkpoint.includes("**User:** no"),
|
|
653
|
+
);
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
it("removes affirmative confirmation (yes) as zero-information", () => {
|
|
657
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
658
|
+
assert.ok(!checkpoint.includes("**User:** yes"));
|
|
659
|
+
});
|
|
660
|
+
|
|
661
|
+
it("preserves AskUserQuestion answer via tool_result", () => {
|
|
662
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
663
|
+
assert.ok(checkpoint.includes("User answered:"));
|
|
664
|
+
assert.ok(
|
|
665
|
+
checkpoint.includes("ValidationError from the shared errors module"),
|
|
666
|
+
);
|
|
667
|
+
});
|
|
668
|
+
|
|
669
|
+
it("preserves edit diffs in compact old/new format", () => {
|
|
670
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
671
|
+
assert.ok(checkpoint.includes("→ Edit `src/payment-service.js`"));
|
|
672
|
+
assert.ok(checkpoint.includes("old: |"));
|
|
673
|
+
assert.ok(checkpoint.includes("new: |"));
|
|
674
|
+
assert.ok(checkpoint.includes("processPayment(amount) {"));
|
|
675
|
+
assert.ok(
|
|
676
|
+
checkpoint.includes("if (amount < 0) throw new ValidationError"),
|
|
677
|
+
);
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
it("preserves pure insertion edits (test file)", () => {
|
|
681
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
682
|
+
assert.ok(checkpoint.includes("→ Edit `test/payment.test.js`"));
|
|
683
|
+
assert.ok(checkpoint.includes("rejects negative amounts"));
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
it("preserves bash command and output", () => {
|
|
687
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
688
|
+
assert.ok(checkpoint.includes("Ran `bun test src/payment`"));
|
|
689
|
+
assert.ok(checkpoint.includes("14 passed"));
|
|
690
|
+
});
|
|
691
|
+
|
|
692
|
+
it("preserves first test failure output (error details)", () => {
|
|
693
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
694
|
+
assert.ok(checkpoint.includes("FAIL test/payment.test.js"));
|
|
695
|
+
assert.ok(checkpoint.includes("ValidationError is not defined"));
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
it("preserves web search results (ephemeral content)", () => {
|
|
699
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
700
|
+
assert.ok(checkpoint.includes("ValidationError best practices"));
|
|
701
|
+
});
|
|
702
|
+
|
|
703
|
+
it("preserves sequential thinking content", () => {
|
|
704
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
705
|
+
assert.ok(checkpoint.includes("guard clause pattern is correct"));
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
it("emits image placeholder", () => {
|
|
709
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
710
|
+
assert.ok(checkpoint.includes("[User shared an image]"));
|
|
711
|
+
});
|
|
712
|
+
|
|
713
|
+
it("removes file read results (re-obtainable)", () => {
|
|
714
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
715
|
+
assert.ok(!checkpoint.includes("// Full contents of payment-service.js"));
|
|
716
|
+
});
|
|
717
|
+
|
|
718
|
+
it("removes grep results (re-obtainable)", () => {
|
|
719
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
720
|
+
assert.ok(!checkpoint.includes("src/billing.js:42: amount"));
|
|
721
|
+
});
|
|
722
|
+
|
|
723
|
+
it("removes thinking blocks", () => {
|
|
724
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
725
|
+
assert.ok(!checkpoint.includes("internal_chain_of_thought_marker"));
|
|
726
|
+
});
|
|
727
|
+
|
|
728
|
+
it("removes system messages", () => {
|
|
729
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
730
|
+
assert.ok(!checkpoint.includes("system_prompt_injection_content"));
|
|
731
|
+
});
|
|
732
|
+
|
|
733
|
+
it("removes edit success results", () => {
|
|
734
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
735
|
+
assert.ok(!checkpoint.includes("File edited successfully"));
|
|
736
|
+
});
|
|
737
|
+
});
|
|
738
|
+
|
|
739
|
+
describe("extractRecent — fact preservation in windowed mode", () => {
|
|
740
|
+
it("preserves recent facts within the window", () => {
|
|
741
|
+
const checkpoint = extractRecent(transcriptPath, 30);
|
|
742
|
+
|
|
743
|
+
// Recent facts that should be in the window
|
|
744
|
+
const recentFacts = EXPECTED_FACTS.filter(
|
|
745
|
+
(f) => f.id !== "websearch-result", // may fall outside window depending on count
|
|
746
|
+
);
|
|
747
|
+
for (const fact of recentFacts) {
|
|
748
|
+
assert.ok(
|
|
749
|
+
checkpoint.includes(fact.text),
|
|
750
|
+
`FACT LOST in extractRecent [${fact.id}]: "${fact.text}"`,
|
|
751
|
+
);
|
|
752
|
+
}
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
it("removes noise even in windowed mode", () => {
|
|
756
|
+
const checkpoint = extractRecent(transcriptPath, 30);
|
|
757
|
+
|
|
758
|
+
for (const noise of REMOVED_CONTENT) {
|
|
759
|
+
assert.ok(
|
|
760
|
+
!checkpoint.includes(noise.text),
|
|
761
|
+
`NOISE KEPT in extractRecent [${noise.id}]: "${noise.text}"`,
|
|
762
|
+
);
|
|
763
|
+
}
|
|
764
|
+
});
|
|
765
|
+
|
|
766
|
+
it("has a state header", () => {
|
|
767
|
+
const checkpoint = extractRecent(transcriptPath, 30);
|
|
768
|
+
assert.ok(checkpoint.startsWith("## Session State"));
|
|
769
|
+
});
|
|
770
|
+
});
|
|
771
|
+
|
|
772
|
+
describe("checkpoint structure", () => {
|
|
773
|
+
it("maintains chronological message order", () => {
|
|
774
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
775
|
+
// Skip the state header (appears before first ---) to check body ordering
|
|
776
|
+
const bodyStart = checkpoint.indexOf("\n\n---\n\n") + 7;
|
|
777
|
+
const body = checkpoint.slice(bodyStart);
|
|
778
|
+
|
|
779
|
+
const investigatePos = body.indexOf("investigate the PaymentService");
|
|
780
|
+
const rootCausePos = body.indexOf(
|
|
781
|
+
"root cause is missing input validation",
|
|
782
|
+
);
|
|
783
|
+
const editPos = body.indexOf("→ Edit `src/payment-service.js`");
|
|
784
|
+
const finalSummaryPos = body.indexOf("All 14 passed");
|
|
785
|
+
|
|
786
|
+
assert.ok(investigatePos > -1, "investigation text found");
|
|
787
|
+
assert.ok(rootCausePos > -1, "root cause text found");
|
|
788
|
+
assert.ok(editPos > -1, "edit text found");
|
|
789
|
+
assert.ok(finalSummaryPos > -1, "final summary found");
|
|
790
|
+
assert.ok(
|
|
791
|
+
investigatePos < rootCausePos,
|
|
792
|
+
"investigation before root cause",
|
|
793
|
+
);
|
|
794
|
+
assert.ok(rootCausePos < editPos, "root cause before edit");
|
|
795
|
+
assert.ok(editPos < finalSummaryPos, "edit before final summary");
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
it("uses --- separators between messages", () => {
|
|
799
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
800
|
+
assert.ok(checkpoint.includes("\n\n---\n\n"));
|
|
801
|
+
});
|
|
802
|
+
|
|
803
|
+
it("uses User: and Asst: prefixes", () => {
|
|
804
|
+
const checkpoint = extractConversation(transcriptPath);
|
|
805
|
+
assert.ok(checkpoint.includes("User:"));
|
|
806
|
+
assert.ok(checkpoint.includes("Asst:"));
|
|
807
|
+
});
|
|
808
|
+
});
|
|
809
|
+
});
|