@gswangg/duncan-cc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -0
- package/SPEC.md +195 -0
- package/package.json +39 -0
- package/src/content-replacements.ts +185 -0
- package/src/discovery.ts +340 -0
- package/src/mcp-server.ts +356 -0
- package/src/normalize.ts +702 -0
- package/src/parser.ts +257 -0
- package/src/pipeline.ts +274 -0
- package/src/query.ts +626 -0
- package/src/system-prompt.ts +408 -0
- package/src/tree.ts +371 -0
- package/tests/_skip-if-no-corpus.ts +12 -0
- package/tests/compaction.test.ts +205 -0
- package/tests/content-replacements.test.ts +214 -0
- package/tests/discovery.test.ts +129 -0
- package/tests/normalize.test.ts +192 -0
- package/tests/parity.test.ts +226 -0
- package/tests/parser-tree.test.ts +268 -0
- package/tests/pipeline.test.ts +174 -0
- package/tests/self-exclusion.test.ts +272 -0
- package/tests/system-prompt.test.ts +238 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parity tests: OHY post-processing, attachment conversion, content replacements, subagents.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { readFileSync, readdirSync } from "node:fs";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import { parseSession } from "../src/parser.js";
|
|
8
|
+
import { buildRawChain } from "../src/tree.js";
|
|
9
|
+
import { normalizeMessages } from "../src/normalize.js";
|
|
10
|
+
import { applyContentReplacements } from "../src/content-replacements.js";
|
|
11
|
+
import { processSessionFile } from "../src/pipeline.js";
|
|
12
|
+
import { listSubagentFiles } from "../src/discovery.js";
|
|
13
|
+
import { requireCorpus } from "./_skip-if-no-corpus.js";
|
|
14
|
+
|
|
15
|
+
const TESTDATA = requireCorpus();
|
|
16
|
+
|
|
17
|
+
let passed = 0, failed = 0;
|
|
18
|
+
function assert(c: boolean, m: string) { if (c) passed++; else { failed++; console.error(` ✗ ${m}`); } }
|
|
19
|
+
function ok(m: string) { passed++; console.log(` ✓ ${m}`); }
|
|
20
|
+
|
|
21
|
+
function buildSession(entries: any[]): string {
|
|
22
|
+
return entries.map(e => JSON.stringify(e)).join("\n");
|
|
23
|
+
}
|
|
24
|
+
let uid = 100;
|
|
25
|
+
function id() { return `parity-${++uid}`; }
|
|
26
|
+
|
|
27
|
+
// ============================================================================
|
|
28
|
+
console.log("\n--- OHY: Split assistant messages (same message.id) ---");
|
|
29
|
+
{
|
|
30
|
+
const u1 = id(), a1 = id(), a1b = id(), u2 = id();
|
|
31
|
+
const content = buildSession([
|
|
32
|
+
{ type: "user", uuid: u1, parentUuid: null, timestamp: "2026-01-01T00:00:00Z",
|
|
33
|
+
message: { role: "user", content: "hello" } },
|
|
34
|
+
// Split assistant: two entries with same message.id
|
|
35
|
+
{ type: "assistant", uuid: a1, parentUuid: u1, timestamp: "2026-01-01T00:00:01Z",
|
|
36
|
+
message: { role: "assistant", id: "msg_001", content: [{ type: "text", text: "part 1" }], model: "claude-sonnet-4" } },
|
|
37
|
+
{ type: "assistant", uuid: a1b, parentUuid: a1, timestamp: "2026-01-01T00:00:02Z",
|
|
38
|
+
message: { role: "assistant", id: "msg_001", content: [{ type: "tool_use", id: "tu1", name: "Bash", input: {} }], model: "claude-sonnet-4" } },
|
|
39
|
+
{ type: "user", uuid: u2, parentUuid: a1b, timestamp: "2026-01-01T00:00:03Z",
|
|
40
|
+
message: { role: "user", content: [{ type: "tool_result", tool_use_id: "tu1", content: "ok" }] } },
|
|
41
|
+
]);
|
|
42
|
+
const parsed = parseSession(content);
|
|
43
|
+
const chain = buildRawChain(parsed);
|
|
44
|
+
const normalized = normalizeMessages(chain);
|
|
45
|
+
|
|
46
|
+
// Should merge the two assistant messages into one
|
|
47
|
+
const assistants = normalized.filter(m => m.type === "assistant");
|
|
48
|
+
assert(assistants.length === 1, `1 merged assistant (got ${assistants.length})`);
|
|
49
|
+
|
|
50
|
+
const merged = assistants[0];
|
|
51
|
+
const contentArr = merged.message.content as any[];
|
|
52
|
+
assert(contentArr.length === 2, `merged has 2 blocks (got ${contentArr.length})`);
|
|
53
|
+
assert(contentArr[0].type === "text", "block 0 is text");
|
|
54
|
+
assert(contentArr[1].type === "tool_use", "block 1 is tool_use");
|
|
55
|
+
ok("split assistant messages merged correctly");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ============================================================================
|
|
59
|
+
console.log("\n--- OHY: Orphaned tool_use gets synthetic tool_result ---");
|
|
60
|
+
{
|
|
61
|
+
const u1 = id(), a1 = id();
|
|
62
|
+
const content = buildSession([
|
|
63
|
+
{ type: "user", uuid: u1, parentUuid: null, timestamp: "2026-01-01T00:00:00Z",
|
|
64
|
+
message: { role: "user", content: "run something" } },
|
|
65
|
+
{ type: "assistant", uuid: a1, parentUuid: u1, timestamp: "2026-01-01T00:00:01Z",
|
|
66
|
+
message: { role: "assistant", content: [
|
|
67
|
+
{ type: "text", text: "running" },
|
|
68
|
+
{ type: "tool_use", id: "orphan_tu", name: "Bash", input: { command: "ls" } },
|
|
69
|
+
], model: "claude-sonnet-4" } },
|
|
70
|
+
// No tool_result follows — session was interrupted
|
|
71
|
+
]);
|
|
72
|
+
const parsed = parseSession(content);
|
|
73
|
+
const chain = buildRawChain(parsed);
|
|
74
|
+
const normalized = normalizeMessages(chain);
|
|
75
|
+
|
|
76
|
+
// Should have user, assistant, user (with synthetic tool_result)
|
|
77
|
+
assert(normalized.length === 3, `3 messages (got ${normalized.length})`);
|
|
78
|
+
assert(normalized[2].type === "user", "synthetic user message added");
|
|
79
|
+
const synthContent = normalized[2].message.content as any[];
|
|
80
|
+
const toolResult = synthContent.find((c: any) => c.type === "tool_result" && c.tool_use_id === "orphan_tu");
|
|
81
|
+
assert(!!toolResult, "synthetic tool_result for orphan");
|
|
82
|
+
assert(toolResult.is_error === true, "marked as error");
|
|
83
|
+
ok("orphaned tool_use gets synthetic tool_result");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// ============================================================================
|
|
87
|
+
console.log("\n--- Attachment conversion: file ---");
|
|
88
|
+
{
|
|
89
|
+
const u1 = id(), att = id(), a1 = id();
|
|
90
|
+
const content = buildSession([
|
|
91
|
+
{ type: "user", uuid: u1, parentUuid: null, timestamp: "2026-01-01T00:00:00Z",
|
|
92
|
+
message: { role: "user", content: "read this file" } },
|
|
93
|
+
{ type: "attachment", uuid: att, parentUuid: u1, timestamp: "2026-01-01T00:00:01Z",
|
|
94
|
+
message: { role: "user", content: "" },
|
|
95
|
+
attachment: { type: "file", filename: "test.txt", content: { type: "text", text: "file contents here" } } },
|
|
96
|
+
{ type: "assistant", uuid: a1, parentUuid: att, timestamp: "2026-01-01T00:00:02Z",
|
|
97
|
+
message: { role: "assistant", content: [{ type: "text", text: "I see the file" }], model: "claude-sonnet-4" } },
|
|
98
|
+
]);
|
|
99
|
+
const parsed = parseSession(content);
|
|
100
|
+
const chain = buildRawChain(parsed);
|
|
101
|
+
const normalized = normalizeMessages(chain);
|
|
102
|
+
|
|
103
|
+
// Attachment should be converted to user message and merged
|
|
104
|
+
assert(normalized.every(m => m.type === "user" || m.type === "assistant"), "no attachment type in output");
|
|
105
|
+
const userMsgs = normalized.filter(m => m.type === "user");
|
|
106
|
+
assert(userMsgs.length === 1, `1 user message (attachment merged) (got ${userMsgs.length})`);
|
|
107
|
+
ok("file attachment converted and merged");
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// ============================================================================
|
|
111
|
+
console.log("\n--- Attachment conversion: directory ---");
|
|
112
|
+
{
|
|
113
|
+
const att = id(), a1 = id();
|
|
114
|
+
const content = buildSession([
|
|
115
|
+
{ type: "attachment", uuid: att, parentUuid: null, timestamp: "2026-01-01T00:00:00Z",
|
|
116
|
+
message: { role: "user", content: "" },
|
|
117
|
+
attachment: { type: "directory", path: "/home/user/project", content: "file1.ts\nfile2.ts\nREADME.md" } },
|
|
118
|
+
{ type: "assistant", uuid: a1, parentUuid: att, timestamp: "2026-01-01T00:00:01Z",
|
|
119
|
+
message: { role: "assistant", content: [{ type: "text", text: "I see the directory" }], model: "claude-sonnet-4" } },
|
|
120
|
+
]);
|
|
121
|
+
const parsed = parseSession(content);
|
|
122
|
+
const chain = buildRawChain(parsed);
|
|
123
|
+
const normalized = normalizeMessages(chain);
|
|
124
|
+
|
|
125
|
+
assert(normalized[0].type === "user", "directory converted to user");
|
|
126
|
+
const text = JSON.stringify(normalized[0].message.content);
|
|
127
|
+
assert(text.includes("file1.ts"), "directory listing included");
|
|
128
|
+
ok("directory attachment converted");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ============================================================================
|
|
132
|
+
console.log("\n--- Attachment conversion: plan_file_reference ---");
|
|
133
|
+
{
|
|
134
|
+
const att = id(), a1 = id();
|
|
135
|
+
const content = buildSession([
|
|
136
|
+
{ type: "attachment", uuid: att, parentUuid: null, timestamp: "2026-01-01T00:00:00Z",
|
|
137
|
+
message: { role: "user", content: "" },
|
|
138
|
+
attachment: { type: "plan_file_reference", planFilePath: "/tmp/plan.md", planContent: "## Step 1\nDo the thing" } },
|
|
139
|
+
{ type: "assistant", uuid: a1, parentUuid: att, timestamp: "2026-01-01T00:00:01Z",
|
|
140
|
+
message: { role: "assistant", content: [{ type: "text", text: "following plan" }], model: "claude-sonnet-4" } },
|
|
141
|
+
]);
|
|
142
|
+
const parsed = parseSession(content);
|
|
143
|
+
const chain = buildRawChain(parsed);
|
|
144
|
+
const normalized = normalizeMessages(chain);
|
|
145
|
+
|
|
146
|
+
const text = JSON.stringify(normalized[0].message.content);
|
|
147
|
+
assert(text.includes("plan.md"), "plan path included");
|
|
148
|
+
assert(text.includes("Do the thing"), "plan content included");
|
|
149
|
+
ok("plan_file_reference attachment converted");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ============================================================================
|
|
153
|
+
console.log("\n--- Content replacements: metadata entries ---");
|
|
154
|
+
{
|
|
155
|
+
const u1 = id(), a1 = id(), u2 = id();
|
|
156
|
+
const content = buildSession([
|
|
157
|
+
{ type: "user", uuid: u1, parentUuid: null, timestamp: "2026-01-01T00:00:00Z",
|
|
158
|
+
message: { role: "user", content: "hello" } },
|
|
159
|
+
{ type: "assistant", uuid: a1, parentUuid: u1, timestamp: "2026-01-01T00:00:01Z",
|
|
160
|
+
message: { role: "assistant", content: [
|
|
161
|
+
{ type: "text", text: "running tool" },
|
|
162
|
+
{ type: "tool_use", id: "tu_replace", name: "Bash", input: {} },
|
|
163
|
+
], model: "claude-sonnet-4" } },
|
|
164
|
+
{ type: "user", uuid: u2, parentUuid: a1, timestamp: "2026-01-01T00:00:02Z",
|
|
165
|
+
message: { role: "user", content: [
|
|
166
|
+
{ type: "tool_result", tool_use_id: "tu_replace", content: "very long original output that should be replaced" },
|
|
167
|
+
] } },
|
|
168
|
+
// Content replacement metadata entry
|
|
169
|
+
{ type: "content-replacement", sessionId: "test-session",
|
|
170
|
+
replacements: [{ kind: "tool-result", toolUseId: "tu_replace", replacement: "<persisted-output>replaced</persisted-output>" }] },
|
|
171
|
+
]);
|
|
172
|
+
const parsed = parseSession(content);
|
|
173
|
+
const chain = buildRawChain(parsed);
|
|
174
|
+
const normalized = normalizeMessages(chain);
|
|
175
|
+
const replaced = applyContentReplacements(normalized, parsed);
|
|
176
|
+
|
|
177
|
+
// Find the tool_result and check it was replaced
|
|
178
|
+
const userWithResult = replaced.find(m => {
|
|
179
|
+
if (m.type !== "user" || !Array.isArray(m.message.content)) return false;
|
|
180
|
+
return m.message.content.some((c: any) => c.type === "tool_result" && c.tool_use_id === "tu_replace");
|
|
181
|
+
});
|
|
182
|
+
assert(!!userWithResult, "found user with tool_result");
|
|
183
|
+
const tr = (userWithResult!.message.content as any[]).find((c: any) => c.tool_use_id === "tu_replace");
|
|
184
|
+
assert(tr.content.includes("replaced"), "content was replaced");
|
|
185
|
+
ok("content replacement from metadata entry applied");
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ============================================================================
|
|
189
|
+
console.log("\n--- Subagent processing ---");
|
|
190
|
+
{
|
|
191
|
+
const codexSession = join(TESTDATA,
|
|
192
|
+
"-Users-wednesdayniemeyer-Documents-gniemeyer-Projects-codex",
|
|
193
|
+
"630fd2b9-d94d-4287-8c24-e225fbedfc5c.jsonl");
|
|
194
|
+
|
|
195
|
+
const subagents = listSubagentFiles(codexSession);
|
|
196
|
+
assert(subagents.length > 0, `found ${subagents.length} subagent files`);
|
|
197
|
+
|
|
198
|
+
let processable = 0;
|
|
199
|
+
for (const sa of subagents) {
|
|
200
|
+
try {
|
|
201
|
+
const result = processSessionFile(sa.path);
|
|
202
|
+
if (result.messages.length > 0) processable++;
|
|
203
|
+
} catch {}
|
|
204
|
+
}
|
|
205
|
+
assert(processable > 0, `${processable}/${subagents.length} subagents processable`);
|
|
206
|
+
|
|
207
|
+
// Verify alternation on processable subagents
|
|
208
|
+
for (const sa of subagents) {
|
|
209
|
+
try {
|
|
210
|
+
const result = processSessionFile(sa.path);
|
|
211
|
+
if (result.messages.length < 2) continue;
|
|
212
|
+
for (let i = 1; i < result.messages.length; i++) {
|
|
213
|
+
assert(
|
|
214
|
+
result.messages[i].role !== result.messages[i-1].role,
|
|
215
|
+
`subagent ${sa.sessionId.slice(0,15)}: alternation at ${i}`,
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
} catch {}
|
|
219
|
+
}
|
|
220
|
+
ok("subagent sessions process through full pipeline");
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// ============================================================================
|
|
224
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
225
|
+
if (failed > 0) { console.log("❌ Some tests failed"); process.exit(1); }
|
|
226
|
+
else console.log("✅ All tests passed");
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for CC session parsing and tree operations.
|
|
3
|
+
* Uses real CC session files from testdata/.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
7
|
+
import { join, basename } from "node:path";
|
|
8
|
+
import { parseSession, parseJsonl, isTranscriptMessage, isCompactBoundary, isLocalCommand } from "../src/parser.js";
|
|
9
|
+
import { findLeaves, findBestLeaf, walkChain, stripInternalFields, sliceFromBoundary, getCompactionWindows, buildRawChain } from "../src/tree.js";
|
|
10
|
+
import { requireCorpus } from "./_skip-if-no-corpus.js";
|
|
11
|
+
|
|
12
|
+
const TESTDATA = requireCorpus();
|
|
13
|
+
|
|
14
|
+
// Find all main session files (not subagent)
|
|
15
|
+
function findSessionFiles(): string[] {
|
|
16
|
+
const files: string[] = [];
|
|
17
|
+
function walk(dir: string) {
|
|
18
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
19
|
+
const full = join(dir, entry.name);
|
|
20
|
+
if (entry.isDirectory() && entry.name !== "subagents") {
|
|
21
|
+
walk(full);
|
|
22
|
+
} else if (entry.isFile() && entry.name.endsWith(".jsonl") && !dir.includes("subagents")) {
|
|
23
|
+
files.push(full);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
walk(TESTDATA);
|
|
28
|
+
return files;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
let passed = 0;
|
|
32
|
+
let failed = 0;
|
|
33
|
+
|
|
34
|
+
function assert(condition: boolean, msg: string) {
|
|
35
|
+
if (condition) {
|
|
36
|
+
passed++;
|
|
37
|
+
} else {
|
|
38
|
+
failed++;
|
|
39
|
+
console.error(` ✗ ${msg}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function ok(msg: string) {
|
|
44
|
+
passed++;
|
|
45
|
+
console.log(` ✓ ${msg}`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ============================================================================
|
|
49
|
+
// Tests
|
|
50
|
+
// ============================================================================
|
|
51
|
+
|
|
52
|
+
console.log("\n--- JSONL Parsing ---");
|
|
53
|
+
{
|
|
54
|
+
const files = findSessionFiles();
|
|
55
|
+
assert(files.length > 0, "found session files");
|
|
56
|
+
ok(`found ${files.length} session files`);
|
|
57
|
+
|
|
58
|
+
for (const file of files) {
|
|
59
|
+
const content = readFileSync(file, "utf-8");
|
|
60
|
+
const entries = parseJsonl(content);
|
|
61
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
62
|
+
assert(entries.length > 0, `${name}: parsed ${entries.length} entries`);
|
|
63
|
+
}
|
|
64
|
+
ok("all files parsed without error");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
console.log("\n--- Transcript/Metadata Separation ---");
|
|
68
|
+
{
|
|
69
|
+
const files = findSessionFiles();
|
|
70
|
+
for (const file of files) {
|
|
71
|
+
const content = readFileSync(file, "utf-8");
|
|
72
|
+
const parsed = parseSession(content);
|
|
73
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
74
|
+
|
|
75
|
+
// Every message in the map should pass isTranscriptMessage
|
|
76
|
+
for (const [uuid, msg] of parsed.messages) {
|
|
77
|
+
assert(isTranscriptMessage(msg), `${name}: message ${uuid.slice(0, 8)} is transcript`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Count types
|
|
81
|
+
const types = new Map<string, number>();
|
|
82
|
+
for (const msg of parsed.messages.values()) {
|
|
83
|
+
types.set(msg.type, (types.get(msg.type) ?? 0) + 1);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const parts = [];
|
|
87
|
+
for (const [t, n] of types) parts.push(`${t}:${n}`);
|
|
88
|
+
ok(`${name}: ${parsed.messages.size} messages (${parts.join(", ")})`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
console.log("\n--- Tree Structure ---");
|
|
93
|
+
{
|
|
94
|
+
const files = findSessionFiles();
|
|
95
|
+
for (const file of files) {
|
|
96
|
+
const content = readFileSync(file, "utf-8");
|
|
97
|
+
const parsed = parseSession(content);
|
|
98
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
99
|
+
|
|
100
|
+
if (parsed.messages.size === 0) {
|
|
101
|
+
ok(`${name}: empty session, skip`);
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Check parentUuid references are valid
|
|
106
|
+
let brokenRefs = 0;
|
|
107
|
+
for (const msg of parsed.messages.values()) {
|
|
108
|
+
if (msg.parentUuid && !parsed.messages.has(msg.parentUuid)) {
|
|
109
|
+
brokenRefs++;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Some broken refs are expected (progress messages may reference non-transcript entries)
|
|
113
|
+
|
|
114
|
+
// Find leaves
|
|
115
|
+
const leaves = findLeaves(parsed.messages);
|
|
116
|
+
assert(leaves.length > 0, `${name}: has ${leaves.length} leaves`);
|
|
117
|
+
|
|
118
|
+
// Find best leaf
|
|
119
|
+
const bestLeaf = findBestLeaf(parsed.messages);
|
|
120
|
+
const hasUserOrAssistant = [...parsed.messages.values()].some(
|
|
121
|
+
(m) => m.type === "user" || m.type === "assistant"
|
|
122
|
+
);
|
|
123
|
+
if (!hasUserOrAssistant) {
|
|
124
|
+
assert(bestLeaf === undefined, `${name}: no user/assistant → no leaf`);
|
|
125
|
+
ok(`${name}: empty conversation (only progress/system)`);
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
assert(bestLeaf !== undefined, `${name}: found best leaf`);
|
|
129
|
+
|
|
130
|
+
if (bestLeaf) {
|
|
131
|
+
// Walk chain
|
|
132
|
+
const chain = walkChain(parsed.messages, bestLeaf);
|
|
133
|
+
assert(chain.length > 0, `${name}: chain has ${chain.length} messages`);
|
|
134
|
+
|
|
135
|
+
// First message should have no parent (or parent not in messages)
|
|
136
|
+
const first = chain[0];
|
|
137
|
+
assert(
|
|
138
|
+
first.parentUuid === null || !parsed.messages.has(first.parentUuid),
|
|
139
|
+
`${name}: chain starts at root`,
|
|
140
|
+
);
|
|
141
|
+
|
|
142
|
+
// Last message should be the leaf or reachable from it
|
|
143
|
+
const last = chain[chain.length - 1];
|
|
144
|
+
assert(last.uuid === bestLeaf.uuid, `${name}: chain ends at leaf`);
|
|
145
|
+
|
|
146
|
+
// Chain should have user and assistant messages
|
|
147
|
+
const hasUser = chain.some((m) => m.type === "user");
|
|
148
|
+
const hasAssistant = chain.some((m) => m.type === "assistant");
|
|
149
|
+
if (hasUser) ok(`${name}: chain has user messages`);
|
|
150
|
+
if (hasAssistant) ok(`${name}: chain has assistant messages`);
|
|
151
|
+
|
|
152
|
+
// Check alternation: should roughly alternate user/assistant
|
|
153
|
+
// (with system/progress interspersed)
|
|
154
|
+
const roles = chain
|
|
155
|
+
.filter((m) => m.type === "user" || m.type === "assistant")
|
|
156
|
+
.map((m) => m.type);
|
|
157
|
+
|
|
158
|
+
// No two consecutive same roles (loose check — tool results break this)
|
|
159
|
+
let consecutiveSame = 0;
|
|
160
|
+
for (let i = 1; i < roles.length; i++) {
|
|
161
|
+
if (roles[i] === roles[i - 1]) consecutiveSame++;
|
|
162
|
+
}
|
|
163
|
+
ok(`${name}: chain ${chain.length} msgs, ${roles.length} user/assistant, ${consecutiveSame} consecutive-same`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
console.log("\n--- Field Stripping ---");
|
|
169
|
+
{
|
|
170
|
+
const files = findSessionFiles();
|
|
171
|
+
const file = files.find((f) => statSync(f).size > 10000) ?? files[0];
|
|
172
|
+
const content = readFileSync(file, "utf-8");
|
|
173
|
+
const parsed = parseSession(content);
|
|
174
|
+
const leaf = findBestLeaf(parsed.messages);
|
|
175
|
+
|
|
176
|
+
if (leaf) {
|
|
177
|
+
const chain = walkChain(parsed.messages, leaf);
|
|
178
|
+
const stripped = stripInternalFields(chain);
|
|
179
|
+
|
|
180
|
+
for (const msg of stripped) {
|
|
181
|
+
assert(!("isSidechain" in msg), "no isSidechain field");
|
|
182
|
+
assert(!("parentUuid" in msg), "no parentUuid field");
|
|
183
|
+
}
|
|
184
|
+
ok(`stripped ${stripped.length} messages`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
console.log("\n--- Boundary Slicing ---");
|
|
189
|
+
{
|
|
190
|
+
// No boundaries in our test data, so sliceFromBoundary should return all messages
|
|
191
|
+
const files = findSessionFiles();
|
|
192
|
+
const file = files.find((f) => statSync(f).size > 10000) ?? files[0];
|
|
193
|
+
const content = readFileSync(file, "utf-8");
|
|
194
|
+
const parsed = parseSession(content);
|
|
195
|
+
const leaf = findBestLeaf(parsed.messages);
|
|
196
|
+
|
|
197
|
+
if (leaf) {
|
|
198
|
+
const chain = walkChain(parsed.messages, leaf);
|
|
199
|
+
const sliced = sliceFromBoundary(chain);
|
|
200
|
+
assert(sliced.length === chain.length, "no boundary: sliced === chain");
|
|
201
|
+
ok(`no boundary: ${sliced.length} messages preserved`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
console.log("\n--- Compaction Windows ---");
|
|
206
|
+
{
|
|
207
|
+
const files = findSessionFiles();
|
|
208
|
+
const file = files.find((f) => statSync(f).size > 10000) ?? files[0];
|
|
209
|
+
const content = readFileSync(file, "utf-8");
|
|
210
|
+
const parsed = parseSession(content);
|
|
211
|
+
const leaf = findBestLeaf(parsed.messages);
|
|
212
|
+
|
|
213
|
+
if (leaf) {
|
|
214
|
+
const chain = walkChain(parsed.messages, leaf);
|
|
215
|
+
const windows = getCompactionWindows(chain);
|
|
216
|
+
assert(windows.length === 1, "no boundaries: single window");
|
|
217
|
+
assert(windows[0].messages.length === chain.length, "window contains all messages");
|
|
218
|
+
|
|
219
|
+
if (windows[0].modelInfo) {
|
|
220
|
+
ok(`model: ${windows[0].modelInfo.provider}/${windows[0].modelInfo.modelId}`);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
console.log("\n--- buildRawChain ---");
|
|
226
|
+
{
|
|
227
|
+
const files = findSessionFiles();
|
|
228
|
+
for (const file of files) {
|
|
229
|
+
const content = readFileSync(file, "utf-8");
|
|
230
|
+
const parsed = parseSession(content);
|
|
231
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
232
|
+
|
|
233
|
+
const chain = buildRawChain(parsed);
|
|
234
|
+
const hasConversation = [...parsed.messages.values()].some(
|
|
235
|
+
(m) => m.type === "user" || m.type === "assistant"
|
|
236
|
+
);
|
|
237
|
+
if (chain.length === 0 && !hasConversation) {
|
|
238
|
+
ok(`${name}: no conversation`);
|
|
239
|
+
} else {
|
|
240
|
+
assert(chain.length > 0, `${name}: buildRawChain produced ${chain.length} messages`);
|
|
241
|
+
|
|
242
|
+
// Verify it's a valid chain (each message's parentUuid points to previous)
|
|
243
|
+
for (let i = 1; i < chain.length; i++) {
|
|
244
|
+
const msg = chain[i];
|
|
245
|
+
const prev = chain[i - 1];
|
|
246
|
+
// parentUuid should reference something earlier in chain (not necessarily i-1 due to branches)
|
|
247
|
+
const parentInChain = chain.some((m) => m.uuid === msg.parentUuid);
|
|
248
|
+
if (!parentInChain && msg.parentUuid !== null) {
|
|
249
|
+
// Parent might be outside the transcript (e.g., references a non-transcript entry)
|
|
250
|
+
// This is OK — CC sessions have progress messages etc. that break strict chaining
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
ok(`${name}: valid chain of ${chain.length}`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// ============================================================================
|
|
259
|
+
// Summary
|
|
260
|
+
// ============================================================================
|
|
261
|
+
|
|
262
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
263
|
+
if (failed > 0) {
|
|
264
|
+
console.log("❌ Some tests failed");
|
|
265
|
+
process.exit(1);
|
|
266
|
+
} else {
|
|
267
|
+
console.log("✅ All tests passed");
|
|
268
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end pipeline tests.
|
|
3
|
+
* Verifies the full path: session file → API-ready messages.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readFileSync, readdirSync } from "node:fs";
|
|
7
|
+
import { join, basename } from "node:path";
|
|
8
|
+
import { processSessionFile, processSessionWindows, toApiMessages } from "../src/pipeline.js";
|
|
9
|
+
import { requireCorpus } from "./_skip-if-no-corpus.js";
|
|
10
|
+
|
|
11
|
+
const TESTDATA = requireCorpus();
|
|
12
|
+
|
|
13
|
+
function findSessionFiles(): string[] {
|
|
14
|
+
const files: string[] = [];
|
|
15
|
+
function walk(dir: string) {
|
|
16
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
17
|
+
const full = join(dir, entry.name);
|
|
18
|
+
if (entry.isDirectory() && entry.name !== "subagents") walk(full);
|
|
19
|
+
else if (entry.isFile() && entry.name.endsWith(".jsonl") && !dir.includes("subagents")) files.push(full);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
walk(TESTDATA);
|
|
23
|
+
return files;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
let passed = 0;
|
|
27
|
+
let failed = 0;
|
|
28
|
+
|
|
29
|
+
function assert(condition: boolean, msg: string) {
|
|
30
|
+
if (condition) { passed++; }
|
|
31
|
+
else { failed++; console.error(` ✗ ${msg}`); }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function ok(msg: string) { passed++; console.log(` ✓ ${msg}`); }
|
|
35
|
+
|
|
36
|
+
// ============================================================================
|
|
37
|
+
|
|
38
|
+
console.log("\n--- Full Pipeline: all sessions ---");
|
|
39
|
+
{
|
|
40
|
+
const files = findSessionFiles();
|
|
41
|
+
for (const file of files) {
|
|
42
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
43
|
+
try {
|
|
44
|
+
const result = processSessionFile(file);
|
|
45
|
+
|
|
46
|
+
if (result.messages.length === 0) {
|
|
47
|
+
ok(`${name}: empty session`);
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// All messages should have role + content
|
|
52
|
+
for (const msg of result.messages) {
|
|
53
|
+
assert(
|
|
54
|
+
msg.role === "user" || msg.role === "assistant",
|
|
55
|
+
`${name}: valid role (${msg.role})`,
|
|
56
|
+
);
|
|
57
|
+
assert(
|
|
58
|
+
msg.content !== undefined && msg.content !== null,
|
|
59
|
+
`${name}: has content`,
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// First message should be user (system-reminder from context injection)
|
|
64
|
+
assert(
|
|
65
|
+
result.messages[0].role === "user",
|
|
66
|
+
`${name}: starts with user`,
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
// Role alternation (user/assistant must alternate)
|
|
70
|
+
for (let i = 1; i < result.messages.length; i++) {
|
|
71
|
+
assert(
|
|
72
|
+
result.messages[i].role !== result.messages[i - 1].role,
|
|
73
|
+
`${name}: alternation at ${i} (${result.messages[i - 1].role} → ${result.messages[i].role})`,
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// System prompt should be non-empty
|
|
78
|
+
assert(result.systemPrompt.length > 0, `${name}: has system prompt`);
|
|
79
|
+
|
|
80
|
+
// Should have extracted cwd
|
|
81
|
+
assert(result.sessionCwd.length > 0, `${name}: has cwd`);
|
|
82
|
+
|
|
83
|
+
ok(`${name}: ${result.rawMessageCount} raw → ${result.messages.length} API msgs, model: ${result.modelInfo?.modelId ?? "none"}, cwd: ${result.sessionCwd.slice(-30)}`);
|
|
84
|
+
} catch (err: any) {
|
|
85
|
+
failed++;
|
|
86
|
+
console.error(` ✗ ${name}: pipeline error: ${err.message}`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
console.log("\n--- Full Pipeline: window processing ---");
|
|
92
|
+
{
|
|
93
|
+
const files = findSessionFiles();
|
|
94
|
+
// Pick a session with content
|
|
95
|
+
const file = files.find((f) => readFileSync(f, "utf-8").length > 100000) ?? files[0];
|
|
96
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
97
|
+
|
|
98
|
+
const windows = processSessionWindows(file);
|
|
99
|
+
|
|
100
|
+
// No compaction boundaries in test data → single window
|
|
101
|
+
assert(windows.length === 1, `${name}: 1 window (no boundaries)`);
|
|
102
|
+
|
|
103
|
+
if (windows.length > 0) {
|
|
104
|
+
const w = windows[0];
|
|
105
|
+
assert(w.windowIndex === 0, "window index 0");
|
|
106
|
+
assert(w.messages.length > 0, "window has messages");
|
|
107
|
+
assert(w.systemPrompt.length > 0, "window has system prompt");
|
|
108
|
+
ok(`${name}: window 0: ${w.messages.length} msgs`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
console.log("\n--- Full Pipeline: options ---");
|
|
113
|
+
{
|
|
114
|
+
const file = findSessionFiles().find((f) => readFileSync(f, "utf-8").length > 10000) ?? findSessionFiles()[0];
|
|
115
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
116
|
+
|
|
117
|
+
// With all options disabled
|
|
118
|
+
const minimal = processSessionFile(file, {
|
|
119
|
+
applyReplacements: false,
|
|
120
|
+
applyMicrocompact: false,
|
|
121
|
+
injectContext: false,
|
|
122
|
+
skipSystemPrompt: true,
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// With all options enabled
|
|
126
|
+
const full = processSessionFile(file);
|
|
127
|
+
|
|
128
|
+
// Minimal should have fewer messages (no context injection)
|
|
129
|
+
assert(
|
|
130
|
+
full.messages.length >= minimal.messages.length,
|
|
131
|
+
`${name}: full (${full.messages.length}) >= minimal (${minimal.messages.length})`,
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
// Minimal should have empty system prompt
|
|
135
|
+
assert(minimal.systemPrompt === "", `${name}: no system prompt when skipped`);
|
|
136
|
+
|
|
137
|
+
// Full should have system prompt
|
|
138
|
+
assert(full.systemPrompt.length > 0, `${name}: has system prompt when enabled`);
|
|
139
|
+
|
|
140
|
+
ok(`${name}: options work correctly`);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
console.log("\n--- Full Pipeline: API message format ---");
|
|
144
|
+
{
|
|
145
|
+
const file = findSessionFiles().find((f) => readFileSync(f, "utf-8").length > 50000) ?? findSessionFiles()[0];
|
|
146
|
+
const result = processSessionFile(file);
|
|
147
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
148
|
+
|
|
149
|
+
for (const msg of result.messages) {
|
|
150
|
+
// Should only have role and content
|
|
151
|
+
const keys = Object.keys(msg);
|
|
152
|
+
assert(
|
|
153
|
+
keys.length === 2 && keys.includes("role") && keys.includes("content"),
|
|
154
|
+
`${name}: API message has only role+content (got: ${keys.join(", ")})`,
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
// Content should be string or array
|
|
158
|
+
assert(
|
|
159
|
+
typeof msg.content === "string" || Array.isArray(msg.content),
|
|
160
|
+
`${name}: content is string or array`,
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
ok(`${name}: all API messages have correct format`);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// ============================================================================
|
|
167
|
+
|
|
168
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
169
|
+
if (failed > 0) {
|
|
170
|
+
console.log("❌ Some tests failed");
|
|
171
|
+
process.exit(1);
|
|
172
|
+
} else {
|
|
173
|
+
console.log("✅ All tests passed");
|
|
174
|
+
}
|