@gswangg/duncan-cc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -0
- package/SPEC.md +195 -0
- package/package.json +39 -0
- package/src/content-replacements.ts +185 -0
- package/src/discovery.ts +340 -0
- package/src/mcp-server.ts +356 -0
- package/src/normalize.ts +702 -0
- package/src/parser.ts +257 -0
- package/src/pipeline.ts +274 -0
- package/src/query.ts +626 -0
- package/src/system-prompt.ts +408 -0
- package/src/tree.ts +371 -0
- package/tests/_skip-if-no-corpus.ts +12 -0
- package/tests/compaction.test.ts +205 -0
- package/tests/content-replacements.test.ts +214 -0
- package/tests/discovery.test.ts +129 -0
- package/tests/normalize.test.ts +192 -0
- package/tests/parity.test.ts +226 -0
- package/tests/parser-tree.test.ts +268 -0
- package/tests/pipeline.test.ts +174 -0
- package/tests/self-exclusion.test.ts +272 -0
- package/tests/system-prompt.test.ts +238 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for content replacements and microcompact.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { readFileSync, readdirSync, existsSync } from "node:fs";
|
|
6
|
+
import { join, basename, dirname } from "node:path";
|
|
7
|
+
import { parseSession } from "../src/parser.js";
|
|
8
|
+
import { buildRawChain } from "../src/tree.js";
|
|
9
|
+
import { normalizeMessages } from "../src/normalize.js";
|
|
10
|
+
import { applyContentReplacements, microcompact } from "../src/content-replacements.js";
|
|
11
|
+
import { requireCorpus } from "./_skip-if-no-corpus.js";
|
|
12
|
+
|
|
13
|
+
const TESTDATA = requireCorpus();
|
|
14
|
+
|
|
15
|
+
function findSessionFiles(): string[] {
|
|
16
|
+
const files: string[] = [];
|
|
17
|
+
function walk(dir: string) {
|
|
18
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
19
|
+
const full = join(dir, entry.name);
|
|
20
|
+
if (entry.isDirectory() && entry.name !== "subagents") walk(full);
|
|
21
|
+
else if (entry.isFile() && entry.name.endsWith(".jsonl") && !dir.includes("subagents")) files.push(full);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
walk(TESTDATA);
|
|
25
|
+
return files;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
let passed = 0;
|
|
29
|
+
let failed = 0;
|
|
30
|
+
|
|
31
|
+
function assert(condition: boolean, msg: string) {
|
|
32
|
+
if (condition) { passed++; }
|
|
33
|
+
else { failed++; console.error(` ✗ ${msg}`); }
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function ok(msg: string) { passed++; console.log(` ✓ ${msg}`); }
|
|
37
|
+
|
|
38
|
+
// ============================================================================
|
|
39
|
+
|
|
40
|
+
console.log("\n--- Content Replacements: persisted output resolution ---");
|
|
41
|
+
{
|
|
42
|
+
// The codex session 630fd2b9 has persisted-output markers and tool-results/ files
|
|
43
|
+
const sessionFile = join(
|
|
44
|
+
TESTDATA,
|
|
45
|
+
"-Users-wednesdayniemeyer-Documents-gniemeyer-Projects-codex",
|
|
46
|
+
"630fd2b9-d94d-4287-8c24-e225fbedfc5c.jsonl"
|
|
47
|
+
);
|
|
48
|
+
const content = readFileSync(sessionFile, "utf-8");
|
|
49
|
+
const parsed = parseSession(content);
|
|
50
|
+
const chain = buildRawChain(parsed);
|
|
51
|
+
const normalized = normalizeMessages(chain);
|
|
52
|
+
|
|
53
|
+
// Count persisted-output markers before replacement
|
|
54
|
+
let markersBefore = 0;
|
|
55
|
+
for (const msg of normalized) {
|
|
56
|
+
if (msg.type !== "user") continue;
|
|
57
|
+
const c = msg.message.content;
|
|
58
|
+
if (Array.isArray(c)) {
|
|
59
|
+
for (const block of c) {
|
|
60
|
+
if (block.type === "tool_result" && typeof block.content === "string" && block.content.includes("<persisted-output>")) {
|
|
61
|
+
markersBefore++;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const replaced = applyContentReplacements(normalized, parsed, sessionFile);
|
|
68
|
+
|
|
69
|
+
// Count persisted-output markers after replacement
|
|
70
|
+
let markersAfter = 0;
|
|
71
|
+
let resolved = 0;
|
|
72
|
+
for (const msg of replaced) {
|
|
73
|
+
if (msg.type !== "user") continue;
|
|
74
|
+
const c = msg.message.content;
|
|
75
|
+
if (Array.isArray(c)) {
|
|
76
|
+
for (const block of c) {
|
|
77
|
+
if (block.type === "tool_result" && typeof block.content === "string") {
|
|
78
|
+
if (block.content.includes("<persisted-output>")) markersAfter++;
|
|
79
|
+
// Check if it was resolved from disk
|
|
80
|
+
if (block.content.length > 200 && !block.content.includes("<persisted-output>")) resolved++;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
ok(`630fd2b9: ${markersBefore} persisted-output markers before, ${markersAfter} after, ${resolved} resolved from disk`);
|
|
87
|
+
|
|
88
|
+
// Check that tool-results/ directory exists for this session
|
|
89
|
+
const toolResultsDir = join(
|
|
90
|
+
dirname(sessionFile),
|
|
91
|
+
"630fd2b9-d94d-4287-8c24-e225fbedfc5c",
|
|
92
|
+
"tool-results"
|
|
93
|
+
);
|
|
94
|
+
assert(existsSync(toolResultsDir), "tool-results/ dir exists");
|
|
95
|
+
|
|
96
|
+
// Check files in it
|
|
97
|
+
const toolResultFiles = readdirSync(toolResultsDir);
|
|
98
|
+
ok(`tool-results/ has ${toolResultFiles.length} files: ${toolResultFiles.join(", ")}`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
console.log("\n--- Content Replacements: no-op for sessions without replacements ---");
|
|
102
|
+
{
|
|
103
|
+
const files = findSessionFiles();
|
|
104
|
+
for (const file of files) {
|
|
105
|
+
const content = readFileSync(file, "utf-8");
|
|
106
|
+
const parsed = parseSession(content);
|
|
107
|
+
const chain = buildRawChain(parsed);
|
|
108
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
109
|
+
|
|
110
|
+
if (chain.length === 0) continue;
|
|
111
|
+
|
|
112
|
+
const normalized = normalizeMessages(chain);
|
|
113
|
+
const replaced = applyContentReplacements(normalized, parsed, file);
|
|
114
|
+
|
|
115
|
+
// Should be same length (no messages added/removed)
|
|
116
|
+
assert(replaced.length === normalized.length, `${name}: same length after replacement`);
|
|
117
|
+
}
|
|
118
|
+
ok("all sessions: replacement preserves message count");
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
console.log("\n--- Microcompact: synthetic test ---");
|
|
122
|
+
{
|
|
123
|
+
// Create synthetic messages with a time gap
|
|
124
|
+
const now = new Date();
|
|
125
|
+
const oldTime = new Date(now.getTime() - 60 * 60 * 1000); // 1 hour ago
|
|
126
|
+
|
|
127
|
+
const messages: any[] = [
|
|
128
|
+
{
|
|
129
|
+
type: "user", uuid: "u1", parentUuid: null, timestamp: oldTime.toISOString(),
|
|
130
|
+
message: { role: "user", content: [
|
|
131
|
+
{ type: "tool_result", tool_use_id: "old_tool_1", content: "old result 1" },
|
|
132
|
+
{ type: "tool_result", tool_use_id: "old_tool_2", content: "old result 2" },
|
|
133
|
+
]},
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
type: "assistant", uuid: "a1", parentUuid: "u1", timestamp: oldTime.toISOString(),
|
|
137
|
+
message: { role: "assistant", content: [
|
|
138
|
+
{ type: "text", text: "Here's what I found..." },
|
|
139
|
+
{ type: "tool_use", id: "recent_tool_1", name: "Read", input: {} },
|
|
140
|
+
]},
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
type: "user", uuid: "u2", parentUuid: "a1", timestamp: oldTime.toISOString(),
|
|
144
|
+
message: { role: "user", content: [
|
|
145
|
+
{ type: "tool_result", tool_use_id: "recent_tool_1", content: "recent result" },
|
|
146
|
+
]},
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
type: "assistant", uuid: "a2", parentUuid: "u2", timestamp: oldTime.toISOString(),
|
|
150
|
+
message: { role: "assistant", content: [
|
|
151
|
+
{ type: "text", text: "Done." },
|
|
152
|
+
]},
|
|
153
|
+
},
|
|
154
|
+
];
|
|
155
|
+
|
|
156
|
+
// keepRecentTurns=2: keeps tool_use IDs from the last 2 assistant messages (a1 + a2)
|
|
157
|
+
const compacted = microcompact(messages, 30, 2);
|
|
158
|
+
|
|
159
|
+
// Old tool results (not from recent assistants) should be truncated
|
|
160
|
+
const u1 = compacted[0];
|
|
161
|
+
const u1Content = u1.message.content as any[];
|
|
162
|
+
assert(
|
|
163
|
+
u1Content[0].content.includes("truncated"),
|
|
164
|
+
"old_tool_1 truncated",
|
|
165
|
+
);
|
|
166
|
+
assert(
|
|
167
|
+
u1Content[1].content.includes("truncated"),
|
|
168
|
+
"old_tool_2 truncated",
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
// Recent tool result (from a1, which is in the last 2 assistants) should be preserved
|
|
172
|
+
const u2 = compacted[2];
|
|
173
|
+
const u2Content = u2.message.content as any[];
|
|
174
|
+
assert(
|
|
175
|
+
u2Content[0].content === "recent result",
|
|
176
|
+
"recent_tool_1 preserved",
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
ok("microcompact correctly truncates old, preserves recent");
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
console.log("\n--- Microcompact: no-op for recent sessions ---");
|
|
183
|
+
{
|
|
184
|
+
// Create messages that are recent (no gap)
|
|
185
|
+
const now = new Date();
|
|
186
|
+
|
|
187
|
+
const messages: any[] = [
|
|
188
|
+
{
|
|
189
|
+
type: "user", uuid: "u1", parentUuid: null, timestamp: now.toISOString(),
|
|
190
|
+
message: { role: "user", content: [
|
|
191
|
+
{ type: "tool_result", tool_use_id: "t1", content: "result" },
|
|
192
|
+
]},
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
type: "assistant", uuid: "a1", parentUuid: "u1", timestamp: now.toISOString(),
|
|
196
|
+
message: { role: "assistant", content: [{ type: "text", text: "ok" }] },
|
|
197
|
+
},
|
|
198
|
+
];
|
|
199
|
+
|
|
200
|
+
const compacted = microcompact(messages, 30, 1);
|
|
201
|
+
const u1Content = compacted[0].message.content as any[];
|
|
202
|
+
assert(u1Content[0].content === "result", "no truncation for recent session");
|
|
203
|
+
ok("microcompact no-op for recent sessions");
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// ============================================================================
|
|
207
|
+
|
|
208
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
209
|
+
if (failed > 0) {
|
|
210
|
+
console.log("❌ Some tests failed");
|
|
211
|
+
process.exit(1);
|
|
212
|
+
} else {
|
|
213
|
+
console.log("✅ All tests passed");
|
|
214
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for session discovery.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { join, basename } from "node:path";
|
|
6
|
+
import { listSessionFiles, listSubagentFiles, listAllSessionFiles, resolveSessionFiles } from "../src/discovery.js";
|
|
7
|
+
import { requireCorpus } from "./_skip-if-no-corpus.js";
|
|
8
|
+
|
|
9
|
+
const TESTDATA = requireCorpus();
|
|
10
|
+
|
|
11
|
+
let passed = 0;
|
|
12
|
+
let failed = 0;
|
|
13
|
+
|
|
14
|
+
function assert(condition: boolean, msg: string) {
|
|
15
|
+
if (condition) { passed++; }
|
|
16
|
+
else { failed++; console.error(` ✗ ${msg}`); }
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function ok(msg: string) { passed++; console.log(` ✓ ${msg}`); }
|
|
20
|
+
|
|
21
|
+
// ============================================================================
|
|
22
|
+
|
|
23
|
+
console.log("\n--- Session Discovery: list project sessions ---");
|
|
24
|
+
{
|
|
25
|
+
const codexDir = join(TESTDATA, "-Users-wednesdayniemeyer-Documents-gniemeyer-Projects-codex");
|
|
26
|
+
const sessions = listSessionFiles(codexDir);
|
|
27
|
+
assert(sessions.length > 0, `found ${sessions.length} codex sessions`);
|
|
28
|
+
|
|
29
|
+
// Verify sorted by mtime (newest first)
|
|
30
|
+
for (let i = 1; i < sessions.length; i++) {
|
|
31
|
+
assert(
|
|
32
|
+
sessions[i - 1].mtime >= sessions[i].mtime,
|
|
33
|
+
`sorted by mtime: ${sessions[i - 1].sessionId.slice(0, 8)} >= ${sessions[i].sessionId.slice(0, 8)}`,
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
for (const s of sessions) {
|
|
38
|
+
ok(`${s.sessionId.slice(0, 12)}: ${s.size} bytes, ${s.mtime.toISOString().slice(0, 10)}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
console.log("\n--- Session Discovery: list subagents ---");
|
|
43
|
+
{
|
|
44
|
+
const codexSession = join(
|
|
45
|
+
TESTDATA,
|
|
46
|
+
"-Users-wednesdayniemeyer-Documents-gniemeyer-Projects-codex",
|
|
47
|
+
"630fd2b9-d94d-4287-8c24-e225fbedfc5c.jsonl",
|
|
48
|
+
);
|
|
49
|
+
const subagents = listSubagentFiles(codexSession);
|
|
50
|
+
assert(subagents.length > 0, `found ${subagents.length} subagent files`);
|
|
51
|
+
ok(`subagents for 630fd2b9: ${subagents.map((s) => s.sessionId.slice(0, 15)).join(", ")}`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
console.log("\n--- Session Discovery: list all sessions ---");
|
|
55
|
+
{
|
|
56
|
+
// Use testdata as projects dir by overriding — but listAllSessionFiles uses homedir.
|
|
57
|
+
// We'll test listSessionFiles across multiple dirs instead.
|
|
58
|
+
const dirs = [
|
|
59
|
+
join(TESTDATA, "-Users-wednesdayniemeyer-Documents-gniemeyer-Projects-codex"),
|
|
60
|
+
join(TESTDATA, "-Users-wednesdayniemeyer--claude-skills-inspect-claude-source"),
|
|
61
|
+
join(TESTDATA, "-Users-wednesdayniemeyer-Documents-gniemeyer-Projects-sprites"),
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
let total = 0;
|
|
65
|
+
for (const dir of dirs) {
|
|
66
|
+
const sessions = listSessionFiles(dir);
|
|
67
|
+
total += sessions.length;
|
|
68
|
+
}
|
|
69
|
+
assert(total > 5, `found ${total} sessions across 3 projects`);
|
|
70
|
+
ok(`${total} total sessions`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
console.log("\n--- Session Discovery: routing ---");
|
|
74
|
+
{
|
|
75
|
+
// Project routing with explicit projectDir
|
|
76
|
+
const codexDir = join(TESTDATA, "-Users-wednesdayniemeyer-Documents-gniemeyer-Projects-codex");
|
|
77
|
+
const projectResult = resolveSessionFiles({
|
|
78
|
+
mode: "project",
|
|
79
|
+
projectDir: codexDir,
|
|
80
|
+
});
|
|
81
|
+
assert(projectResult.sessions.length > 0, `project routing: ${projectResult.sessions.length} sessions`);
|
|
82
|
+
assert(projectResult.totalCount === projectResult.sessions.length, "totalCount matches");
|
|
83
|
+
ok("project routing works");
|
|
84
|
+
|
|
85
|
+
// Project routing with subagents
|
|
86
|
+
const withSubagents = resolveSessionFiles({
|
|
87
|
+
mode: "project",
|
|
88
|
+
projectDir: codexDir,
|
|
89
|
+
includeSubagents: true,
|
|
90
|
+
});
|
|
91
|
+
assert(
|
|
92
|
+
withSubagents.sessions.length > projectResult.sessions.length,
|
|
93
|
+
`with subagents: ${withSubagents.sessions.length} > ${projectResult.sessions.length}`,
|
|
94
|
+
);
|
|
95
|
+
ok("subagent inclusion works");
|
|
96
|
+
|
|
97
|
+
// Session routing by path
|
|
98
|
+
const sessionPath = join(codexDir, "630fd2b9-d94d-4287-8c24-e225fbedfc5c.jsonl");
|
|
99
|
+
const sessionResult = resolveSessionFiles({
|
|
100
|
+
mode: "session",
|
|
101
|
+
sessionId: sessionPath,
|
|
102
|
+
});
|
|
103
|
+
assert(sessionResult.sessions.length === 1, "session routing: found 1");
|
|
104
|
+
assert(sessionResult.sessions[0].sessionId === "630fd2b9-d94d-4287-8c24-e225fbedfc5c", "correct session");
|
|
105
|
+
ok("session routing works");
|
|
106
|
+
|
|
107
|
+
// Pagination
|
|
108
|
+
const paginated = resolveSessionFiles({
|
|
109
|
+
mode: "project",
|
|
110
|
+
projectDir: codexDir,
|
|
111
|
+
limit: 1,
|
|
112
|
+
offset: 0,
|
|
113
|
+
});
|
|
114
|
+
assert(paginated.sessions.length === 1, "pagination: 1 session");
|
|
115
|
+
if (projectResult.totalCount > 1) {
|
|
116
|
+
assert(paginated.hasMore, "pagination: has more");
|
|
117
|
+
ok("pagination works");
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ============================================================================
|
|
122
|
+
|
|
123
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
124
|
+
if (failed > 0) {
|
|
125
|
+
console.log("❌ Some tests failed");
|
|
126
|
+
process.exit(1);
|
|
127
|
+
} else {
|
|
128
|
+
console.log("✅ All tests passed");
|
|
129
|
+
}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for CC message normalization.
|
|
3
|
+
* Uses real CC session files from testdata/.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readFileSync, readdirSync } from "node:fs";
|
|
7
|
+
import { join, basename } from "node:path";
|
|
8
|
+
import { parseSession } from "../src/parser.js";
|
|
9
|
+
import { buildRawChain } from "../src/tree.js";
|
|
10
|
+
import { normalizeMessages } from "../src/normalize.js";
|
|
11
|
+
import { requireCorpus } from "./_skip-if-no-corpus.js";
|
|
12
|
+
|
|
13
|
+
const TESTDATA = requireCorpus();
|
|
14
|
+
|
|
15
|
+
function findSessionFiles(): string[] {
|
|
16
|
+
const files: string[] = [];
|
|
17
|
+
function walk(dir: string) {
|
|
18
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
19
|
+
const full = join(dir, entry.name);
|
|
20
|
+
if (entry.isDirectory() && entry.name !== "subagents") walk(full);
|
|
21
|
+
else if (entry.isFile() && entry.name.endsWith(".jsonl") && !dir.includes("subagents")) files.push(full);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
walk(TESTDATA);
|
|
25
|
+
return files;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
let passed = 0;
|
|
29
|
+
let failed = 0;
|
|
30
|
+
|
|
31
|
+
function assert(condition: boolean, msg: string) {
|
|
32
|
+
if (condition) { passed++; }
|
|
33
|
+
else { failed++; console.error(` ✗ ${msg}`); }
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function ok(msg: string) { passed++; console.log(` ✓ ${msg}`); }
|
|
37
|
+
|
|
38
|
+
// ============================================================================
|
|
39
|
+
|
|
40
|
+
console.log("\n--- Normalization: filter progress/system ---");
|
|
41
|
+
{
|
|
42
|
+
const files = findSessionFiles();
|
|
43
|
+
for (const file of files) {
|
|
44
|
+
const content = readFileSync(file, "utf-8");
|
|
45
|
+
const parsed = parseSession(content);
|
|
46
|
+
const chain = buildRawChain(parsed);
|
|
47
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
48
|
+
|
|
49
|
+
if (chain.length === 0) { ok(`${name}: empty, skip`); continue; }
|
|
50
|
+
|
|
51
|
+
const normalized = normalizeMessages(chain);
|
|
52
|
+
|
|
53
|
+
// No progress messages should remain
|
|
54
|
+
const hasProgress = normalized.some((m) => m.type === "progress");
|
|
55
|
+
assert(!hasProgress, `${name}: no progress after normalization`);
|
|
56
|
+
|
|
57
|
+
// No non-local-command system messages
|
|
58
|
+
const hasBadSystem = normalized.some(
|
|
59
|
+
(m) => m.type === "system" && m.subtype !== "local_command"
|
|
60
|
+
);
|
|
61
|
+
assert(!hasBadSystem, `${name}: no non-local system messages`);
|
|
62
|
+
|
|
63
|
+
// All messages should be user or assistant (system converted to user)
|
|
64
|
+
const allUserAssistant = normalized.every(
|
|
65
|
+
(m) => m.type === "user" || m.type === "assistant"
|
|
66
|
+
);
|
|
67
|
+
assert(allUserAssistant, `${name}: all messages are user/assistant`);
|
|
68
|
+
|
|
69
|
+
ok(`${name}: ${chain.length} → ${normalized.length} messages`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
console.log("\n--- Normalization: role alternation ---");
|
|
74
|
+
{
|
|
75
|
+
const files = findSessionFiles();
|
|
76
|
+
for (const file of files) {
|
|
77
|
+
const content = readFileSync(file, "utf-8");
|
|
78
|
+
const parsed = parseSession(content);
|
|
79
|
+
const chain = buildRawChain(parsed);
|
|
80
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
81
|
+
|
|
82
|
+
if (chain.length === 0) continue;
|
|
83
|
+
|
|
84
|
+
const normalized = normalizeMessages(chain);
|
|
85
|
+
if (normalized.length < 2) continue;
|
|
86
|
+
|
|
87
|
+
// Check: no two consecutive messages of the same role
|
|
88
|
+
let consecutiveSame = 0;
|
|
89
|
+
for (let i = 1; i < normalized.length; i++) {
|
|
90
|
+
if (normalized[i].type === normalized[i - 1].type) {
|
|
91
|
+
consecutiveSame++;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// After normalization, consecutive same-role should be rare (merging handles most cases)
|
|
96
|
+
// Log any remaining for investigation
|
|
97
|
+
if (consecutiveSame > 0) {
|
|
98
|
+
// Find which ones are consecutive
|
|
99
|
+
const pairs: string[] = [];
|
|
100
|
+
for (let i = 1; i < normalized.length; i++) {
|
|
101
|
+
if (normalized[i].type === normalized[i - 1].type) {
|
|
102
|
+
pairs.push(`[${i - 1}:${normalized[i - 1].type}, ${i}:${normalized[i].type}]`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
console.log(` ⚠ ${name}: ${consecutiveSame} consecutive same-role: ${pairs.slice(0, 3).join(", ")}${pairs.length > 3 ? "..." : ""}`);
|
|
106
|
+
} else {
|
|
107
|
+
ok(`${name}: perfect role alternation (${normalized.length} msgs)`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
console.log("\n--- Normalization: no empty assistant content ---");
|
|
113
|
+
{
|
|
114
|
+
const files = findSessionFiles();
|
|
115
|
+
for (const file of files) {
|
|
116
|
+
const content = readFileSync(file, "utf-8");
|
|
117
|
+
const parsed = parseSession(content);
|
|
118
|
+
const chain = buildRawChain(parsed);
|
|
119
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
120
|
+
|
|
121
|
+
if (chain.length === 0) continue;
|
|
122
|
+
|
|
123
|
+
const normalized = normalizeMessages(chain);
|
|
124
|
+
|
|
125
|
+
// Check no assistant has empty content (except possibly the last)
|
|
126
|
+
for (let i = 0; i < normalized.length - 1; i++) {
|
|
127
|
+
const msg = normalized[i];
|
|
128
|
+
if (msg.type !== "assistant") continue;
|
|
129
|
+
const content = msg.message.content;
|
|
130
|
+
if (Array.isArray(content)) {
|
|
131
|
+
assert(content.length > 0, `${name}: assistant[${i}] has content`);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
ok("no empty non-terminal assistant content");
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
console.log("\n--- Normalization: user messages have content ---");
|
|
139
|
+
{
|
|
140
|
+
const files = findSessionFiles();
|
|
141
|
+
for (const file of files) {
|
|
142
|
+
const content = readFileSync(file, "utf-8");
|
|
143
|
+
const parsed = parseSession(content);
|
|
144
|
+
const chain = buildRawChain(parsed);
|
|
145
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
146
|
+
|
|
147
|
+
if (chain.length === 0) continue;
|
|
148
|
+
|
|
149
|
+
const normalized = normalizeMessages(chain);
|
|
150
|
+
|
|
151
|
+
for (const msg of normalized) {
|
|
152
|
+
if (msg.type !== "user") continue;
|
|
153
|
+
const c = msg.message.content;
|
|
154
|
+
const hasContent =
|
|
155
|
+
(typeof c === "string" && c.length > 0) ||
|
|
156
|
+
(Array.isArray(c) && c.length > 0);
|
|
157
|
+
assert(hasContent, `${name}: user message has content`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
ok("all user messages have content");
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
console.log("\n--- Normalization: first message is user ---");
|
|
164
|
+
{
|
|
165
|
+
const files = findSessionFiles();
|
|
166
|
+
for (const file of files) {
|
|
167
|
+
const content = readFileSync(file, "utf-8");
|
|
168
|
+
const parsed = parseSession(content);
|
|
169
|
+
const chain = buildRawChain(parsed);
|
|
170
|
+
const name = basename(file, ".jsonl").slice(0, 12);
|
|
171
|
+
|
|
172
|
+
if (chain.length === 0) continue;
|
|
173
|
+
|
|
174
|
+
const normalized = normalizeMessages(chain);
|
|
175
|
+
if (normalized.length === 0) continue;
|
|
176
|
+
|
|
177
|
+
assert(
|
|
178
|
+
normalized[0].type === "user",
|
|
179
|
+
`${name}: first message is user (got ${normalized[0].type})`
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// ============================================================================
|
|
185
|
+
|
|
186
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
187
|
+
if (failed > 0) {
|
|
188
|
+
console.log("❌ Some tests failed");
|
|
189
|
+
process.exit(1);
|
|
190
|
+
} else {
|
|
191
|
+
console.log("✅ All tests passed");
|
|
192
|
+
}
|