@ricky-stevens/context-guardian 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +29 -0
- package/.claude-plugin/plugin.json +63 -0
- package/.github/workflows/ci.yml +66 -0
- package/CLAUDE.md +132 -0
- package/LICENSE +21 -0
- package/README.md +362 -0
- package/biome.json +34 -0
- package/bun.lock +31 -0
- package/hooks/precompact.mjs +73 -0
- package/hooks/session-start.mjs +133 -0
- package/hooks/stop.mjs +172 -0
- package/hooks/submit.mjs +133 -0
- package/lib/checkpoint.mjs +258 -0
- package/lib/compact-cli.mjs +124 -0
- package/lib/compact-output.mjs +350 -0
- package/lib/config.mjs +40 -0
- package/lib/content.mjs +33 -0
- package/lib/diagnostics.mjs +221 -0
- package/lib/estimate.mjs +254 -0
- package/lib/extract-helpers.mjs +869 -0
- package/lib/handoff.mjs +329 -0
- package/lib/logger.mjs +34 -0
- package/lib/mcp-tools.mjs +200 -0
- package/lib/paths.mjs +90 -0
- package/lib/stats.mjs +81 -0
- package/lib/statusline.mjs +123 -0
- package/lib/synthetic-session.mjs +273 -0
- package/lib/tokens.mjs +170 -0
- package/lib/tool-summary.mjs +399 -0
- package/lib/transcript.mjs +939 -0
- package/lib/trim.mjs +158 -0
- package/package.json +22 -0
- package/skills/compact/SKILL.md +20 -0
- package/skills/config/SKILL.md +70 -0
- package/skills/handoff/SKILL.md +26 -0
- package/skills/prune/SKILL.md +20 -0
- package/skills/stats/SKILL.md +100 -0
- package/sonar-project.properties +12 -0
- package/test/checkpoint.test.mjs +171 -0
- package/test/compact-cli.test.mjs +230 -0
- package/test/compact-output.test.mjs +284 -0
- package/test/compaction-e2e.test.mjs +809 -0
- package/test/content.test.mjs +86 -0
- package/test/diagnostics.test.mjs +188 -0
- package/test/edge-cases.test.mjs +543 -0
- package/test/estimate.test.mjs +262 -0
- package/test/extract-helpers-coverage.test.mjs +333 -0
- package/test/extract-helpers.test.mjs +234 -0
- package/test/handoff.test.mjs +738 -0
- package/test/integration.test.mjs +582 -0
- package/test/logger.test.mjs +70 -0
- package/test/manual-compaction-test.md +426 -0
- package/test/mcp-tools.test.mjs +443 -0
- package/test/paths.test.mjs +250 -0
- package/test/quick-compaction-test.md +191 -0
- package/test/stats.test.mjs +88 -0
- package/test/statusline.test.mjs +222 -0
- package/test/submit.test.mjs +232 -0
- package/test/synthetic-session.test.mjs +600 -0
- package/test/tokens.test.mjs +293 -0
- package/test/tool-summary.test.mjs +771 -0
- package/test/transcript-coverage.test.mjs +369 -0
- package/test/transcript.test.mjs +596 -0
- package/test/trim.test.mjs +356 -0
|
@@ -0,0 +1,939 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcript extraction for Context Guardian smart compaction.
|
|
3
|
+
*
|
|
4
|
+
* Reads Claude Code's JSONL transcript and produces a filtered, human-readable
|
|
5
|
+
* checkpoint that preserves all decision-relevant content while removing
|
|
6
|
+
* re-obtainable noise (file reads, thinking blocks, system messages).
|
|
7
|
+
*
|
|
8
|
+
* Two extraction modes:
|
|
9
|
+
* - extractConversation (Smart Compact) — full history with preamble
|
|
10
|
+
* - extractRecent (Keep Recent) — sliding window of last N messages
|
|
11
|
+
*
|
|
12
|
+
* @module transcript
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import fs from "node:fs";
|
|
16
|
+
import { compactMessages } from "./compact-output.mjs";
|
|
17
|
+
import { flattenContent } from "./content.mjs";
|
|
18
|
+
import {
|
|
19
|
+
generateConversationIndex,
|
|
20
|
+
generateStateHeader,
|
|
21
|
+
isCGMenuMessage,
|
|
22
|
+
isSyntheticAck,
|
|
23
|
+
processAssistantContent,
|
|
24
|
+
processUserContent,
|
|
25
|
+
shouldSkipUserMessage,
|
|
26
|
+
} from "./extract-helpers.mjs";
|
|
27
|
+
import { isErrorResponse, startEndTrim } from "./trim.mjs";
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Constants
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
/** Matches any compact/restore marker that signals a compaction boundary. */
|
|
34
|
+
const COMPACT_MARKER_RE = /^\[(SMART COMPACT|KEEP RECENT|RESTORED CONTEXT)/;
|
|
35
|
+
|
|
36
|
+
/** Maximum bytes to read from a transcript to prevent OOM on large sessions.
|
|
37
|
+
* 50MB supports ~800K tokens (80% of 1M context). At this scale the extraction
|
|
38
|
+
* pipeline processes ~4000 JSONL lines in ~2 seconds — acceptable for a
|
|
39
|
+
* compaction operation that runs at most a few times per session. */
|
|
40
|
+
const MAX_READ_BYTES = 50 * 1024 * 1024; // 50 MB
|
|
41
|
+
|
|
42
|
+
/** Regex to extract file paths from Edit/Write tool summaries. */
|
|
43
|
+
const EDIT_WRITE_RE = /→ (?:Edit|Write) `([^`]+)`/g;
|
|
44
|
+
|
|
45
|
+
/** Regex to detect tool operation lines (summaries and results). */
|
|
46
|
+
const TOOL_OP_RE = /^(?:\s*→ |← )/;
|
|
47
|
+
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
// Transcript I/O
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Read transcript lines with a memory cap.
|
|
54
|
+
* For files > MAX_READ_BYTES, reads only the tail and drops the first
|
|
55
|
+
* partial line.
|
|
56
|
+
*
|
|
57
|
+
* @param {string} transcriptPath - Path to the JSONL transcript file
|
|
58
|
+
* @returns {string[]} Array of non-empty JSON lines
|
|
59
|
+
*/
|
|
60
|
+
export function readTranscriptLines(transcriptPath) {
|
|
61
|
+
const stat = fs.statSync(transcriptPath);
|
|
62
|
+
if (stat.size <= MAX_READ_BYTES) {
|
|
63
|
+
return fs
|
|
64
|
+
.readFileSync(transcriptPath, "utf8")
|
|
65
|
+
.split("\n")
|
|
66
|
+
.filter((l) => l.trim());
|
|
67
|
+
}
|
|
68
|
+
const buf = Buffer.alloc(MAX_READ_BYTES);
|
|
69
|
+
const fd = fs.openSync(transcriptPath, "r");
|
|
70
|
+
try {
|
|
71
|
+
fs.readSync(fd, buf, 0, MAX_READ_BYTES, stat.size - MAX_READ_BYTES);
|
|
72
|
+
} finally {
|
|
73
|
+
fs.closeSync(fd);
|
|
74
|
+
}
|
|
75
|
+
let text = buf.toString("utf8");
|
|
76
|
+
const firstNewline = text.indexOf("\n");
|
|
77
|
+
if (firstNewline > 0) text = text.slice(firstNewline + 1);
|
|
78
|
+
return text.split("\n").filter((l) => l.trim());
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// Shared extraction loop — helpers
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Extract file path from a tool_use content block.
|
|
87
|
+
* @param {object} block - A tool_use content block
|
|
88
|
+
* @returns {string} The file path or ""
|
|
89
|
+
*/
|
|
90
|
+
function getToolFilePath(block) {
|
|
91
|
+
return (
|
|
92
|
+
block.input?.file_path ||
|
|
93
|
+
block.input?.path ||
|
|
94
|
+
block.input?.relative_path ||
|
|
95
|
+
""
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Track tool usage stats from an assistant content block.
|
|
101
|
+
* Mutates filesModified, allToolPaths, and returns incremented toolOpCount.
|
|
102
|
+
*/
|
|
103
|
+
function trackToolBlock(block, filesModified, allToolPaths) {
|
|
104
|
+
const fp = getToolFilePath(block);
|
|
105
|
+
if (fp && (block.name === "Edit" || block.name === "Write")) {
|
|
106
|
+
filesModified.add(fp);
|
|
107
|
+
}
|
|
108
|
+
if (fp.startsWith("/") && fp.split("/").filter(Boolean).length >= 3) {
|
|
109
|
+
allToolPaths.push(fp);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Process a single assistant-type transcript line.
|
|
115
|
+
* Returns the formatted message string or null if skipped.
|
|
116
|
+
*/
|
|
117
|
+
function processAssistantLine(obj, toolUseMap, filesModified, allToolPaths) {
|
|
118
|
+
const content = obj.message.content;
|
|
119
|
+
const processed = processAssistantContent(content, toolUseMap);
|
|
120
|
+
let toolOps = 0;
|
|
121
|
+
|
|
122
|
+
if (Array.isArray(content)) {
|
|
123
|
+
for (const block of content) {
|
|
124
|
+
if (block.type === "tool_use") {
|
|
125
|
+
toolOps++;
|
|
126
|
+
trackToolBlock(block, filesModified, allToolPaths);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const isCGMenu = isCGMenuMessage(content);
|
|
132
|
+
const message =
|
|
133
|
+
processed && !isSyntheticAck(processed)
|
|
134
|
+
? `**Assistant:** ${processed}`
|
|
135
|
+
: null;
|
|
136
|
+
|
|
137
|
+
return { message, isCGMenu, toolOps };
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Process a single user-type transcript line.
|
|
142
|
+
* Returns tool result messages, the user message (or null), and menu state info.
|
|
143
|
+
*/
|
|
144
|
+
function processUserLine(obj, toolUseMap, lastAssistantIsCGMenu) {
|
|
145
|
+
const { userText, toolResults } = processUserContent(
|
|
146
|
+
obj.message.content,
|
|
147
|
+
toolUseMap,
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
const { skip, clearMenu } = shouldSkipUserMessage(
|
|
151
|
+
userText,
|
|
152
|
+
lastAssistantIsCGMenu,
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
const userMessage = skip ? null : `**User:** ${userText}`;
|
|
156
|
+
|
|
157
|
+
return { toolResults, userMessage, skip, clearMenu };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
// Shared extraction loop
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Core extraction loop shared by both Smart Compact and Keep Recent.
|
|
166
|
+
* Processes all transcript lines and returns structured message data.
|
|
167
|
+
*
|
|
168
|
+
* @param {string[]} lines - JSONL transcript lines to process
|
|
169
|
+
* @param {number} startIdx - Index to start processing from
|
|
170
|
+
* @returns {{ messages: string[], filesModified: Set<string>, toolOpCount: number, parseErrors: number }}
|
|
171
|
+
*/
|
|
172
|
+
function handleAssistantEntry(obj, state) {
|
|
173
|
+
const result = processAssistantLine(
|
|
174
|
+
obj,
|
|
175
|
+
state.toolUseMap,
|
|
176
|
+
state.filesModified,
|
|
177
|
+
state.allToolPaths,
|
|
178
|
+
);
|
|
179
|
+
state.toolOpCount += result.toolOps;
|
|
180
|
+
state.lastAssistantIsCGMenu = result.isCGMenu;
|
|
181
|
+
if (result.message) state.messages.push(result.message);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function handleUserEntry(obj, state) {
|
|
185
|
+
const result = processUserLine(
|
|
186
|
+
obj,
|
|
187
|
+
state.toolUseMap,
|
|
188
|
+
state.lastAssistantIsCGMenu,
|
|
189
|
+
);
|
|
190
|
+
|
|
191
|
+
for (const tr of result.toolResults) {
|
|
192
|
+
state.messages.push(tr);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (result.clearMenu) state.lastAssistantIsCGMenu = false;
|
|
196
|
+
if (result.skip) {
|
|
197
|
+
if (!result.clearMenu) state.lastAssistantIsCGMenu = false;
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
state.lastAssistantIsCGMenu = false;
|
|
201
|
+
|
|
202
|
+
if (result.userMessage) state.messages.push(result.userMessage);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function extractMessages(lines, startIdx) {
|
|
206
|
+
const state = {
|
|
207
|
+
toolUseMap: new Map(),
|
|
208
|
+
messages: [],
|
|
209
|
+
filesModified: new Set(),
|
|
210
|
+
allToolPaths: [],
|
|
211
|
+
toolOpCount: 0,
|
|
212
|
+
parseErrors: 0,
|
|
213
|
+
lastAssistantIsCGMenu: false,
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
for (let i = startIdx; i < lines.length; i++) {
|
|
217
|
+
let obj;
|
|
218
|
+
try {
|
|
219
|
+
obj = JSON.parse(lines[i]);
|
|
220
|
+
} catch {
|
|
221
|
+
state.parseErrors++;
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (obj.type === "assistant" && obj.message?.role === "assistant") {
|
|
226
|
+
handleAssistantEntry(obj, state);
|
|
227
|
+
} else if (obj.type === "user" && obj.message?.role === "user") {
|
|
228
|
+
handleUserEntry(obj, state);
|
|
229
|
+
}
|
|
230
|
+
// System and progress messages — skip (noise)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const projectRoot = computeProjectRoot(state.allToolPaths);
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
messages: state.messages,
|
|
237
|
+
filesModified: state.filesModified,
|
|
238
|
+
toolOpCount: state.toolOpCount,
|
|
239
|
+
parseErrors: state.parseErrors,
|
|
240
|
+
projectRoot,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Compute the most common directory prefix from collected tool file paths.
|
|
246
|
+
* Equivalent to detectProjectRoot but works on a pre-collected path array
|
|
247
|
+
* instead of re-parsing the entire transcript.
|
|
248
|
+
*/
|
|
249
|
+
function computeProjectRoot(paths) {
|
|
250
|
+
if (paths.length < 3) return "";
|
|
251
|
+
const counts = new Map();
|
|
252
|
+
for (const p of paths) {
|
|
253
|
+
const parts = p.split("/");
|
|
254
|
+
for (let len = 3; len < parts.length; len++) {
|
|
255
|
+
const prefix = `${parts.slice(0, len).join("/")}/`;
|
|
256
|
+
counts.set(prefix, (counts.get(prefix) || 0) + 1);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
const maxCount = Math.max(...counts.values());
|
|
260
|
+
const threshold = Math.floor(maxCount * 0.7);
|
|
261
|
+
let best = "";
|
|
262
|
+
for (const [prefix, count] of counts) {
|
|
263
|
+
if (count >= threshold && prefix.length > best.length) best = prefix;
|
|
264
|
+
}
|
|
265
|
+
return best && counts.get(best) >= 3 ? best : "";
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// ---------------------------------------------------------------------------
|
|
269
|
+
// Checkpoint footer — content pointers at the END (high-attention zone)
|
|
270
|
+
// ---------------------------------------------------------------------------
|
|
271
|
+
// The U-shaped attention curve means the END of context gets high attention.
|
|
272
|
+
// This footer "bookends" the checkpoint: the index at the START gives the
|
|
273
|
+
// model a map; the footer at the END reminds it what content exists in the
|
|
274
|
+
// body and where. Research (Liu et al. 2023 "Lost in the Middle") shows
|
|
275
|
+
// placing key information at BOTH ends improves recall significantly.
|
|
276
|
+
// ---------------------------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Classify a message and record its exchange number in the appropriate bucket.
|
|
280
|
+
* @param {string} msg - A message string
|
|
281
|
+
* @param {number} exchangeNum - Current exchange number
|
|
282
|
+
* @param {object} buckets - { edit, write, bash, error } arrays
|
|
283
|
+
*/
|
|
284
|
+
function classifyFooterMessage(msg, exchangeNum, buckets) {
|
|
285
|
+
if (msg.includes("→ Edit ") && !buckets.edit.includes(exchangeNum)) {
|
|
286
|
+
buckets.edit.push(exchangeNum);
|
|
287
|
+
}
|
|
288
|
+
if (msg.includes("→ Write ") && !buckets.write.includes(exchangeNum)) {
|
|
289
|
+
buckets.write.push(exchangeNum);
|
|
290
|
+
}
|
|
291
|
+
if (msg.includes("→ Ran ") && !buckets.bash.includes(exchangeNum)) {
|
|
292
|
+
buckets.bash.push(exchangeNum);
|
|
293
|
+
}
|
|
294
|
+
if (
|
|
295
|
+
msg.startsWith("←") &&
|
|
296
|
+
/\b(?:error|fail|FAIL)\b/i.test(msg) &&
|
|
297
|
+
!buckets.error.includes(exchangeNum)
|
|
298
|
+
) {
|
|
299
|
+
buckets.error.push(exchangeNum);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Format footer parts from classified exchange buckets.
|
|
305
|
+
* @param {object} buckets - { edit, write, bash, error } arrays
|
|
306
|
+
* @returns {string[]} Formatted part strings
|
|
307
|
+
*/
|
|
308
|
+
function formatFooterParts(buckets) {
|
|
309
|
+
const parts = [];
|
|
310
|
+
if (buckets.edit.length > 0)
|
|
311
|
+
parts.push(`${buckets.edit.length} edit diffs [${buckets.edit.join(",")}]`);
|
|
312
|
+
if (buckets.write.length > 0)
|
|
313
|
+
parts.push(
|
|
314
|
+
`${buckets.write.length} file creations [${buckets.write.join(",")}]`,
|
|
315
|
+
);
|
|
316
|
+
if (buckets.bash.length > 0)
|
|
317
|
+
parts.push(
|
|
318
|
+
`${buckets.bash.length} command outputs [${buckets.bash.join(",")}]`,
|
|
319
|
+
);
|
|
320
|
+
if (buckets.error.length > 0)
|
|
321
|
+
parts.push(
|
|
322
|
+
`${buckets.error.length} error results [${buckets.error.join(",")}]`,
|
|
323
|
+
);
|
|
324
|
+
return parts;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Generate a compact pointer footer for the end of the checkpoint.
|
|
329
|
+
* Lists content types present in the body with exchange numbers.
|
|
330
|
+
* NOT a summary — purely navigational pointers.
|
|
331
|
+
*
|
|
332
|
+
* @param {string[]} messages - Extracted message strings
|
|
333
|
+
* @returns {string} Footer section or "" if not worth adding
|
|
334
|
+
*/
|
|
335
|
+
function generateCheckpointFooter(messages) {
|
|
336
|
+
if (messages.length < 15) return "";
|
|
337
|
+
|
|
338
|
+
const buckets = { edit: [], write: [], bash: [], error: [] };
|
|
339
|
+
let exchangeNum = 0;
|
|
340
|
+
let totalUserExchanges = 0;
|
|
341
|
+
|
|
342
|
+
for (const msg of messages) {
|
|
343
|
+
if (msg.startsWith("**User:**")) {
|
|
344
|
+
exchangeNum++;
|
|
345
|
+
totalUserExchanges++;
|
|
346
|
+
}
|
|
347
|
+
classifyFooterMessage(msg, exchangeNum, buckets);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const parts = formatFooterParts(buckets);
|
|
351
|
+
if (parts.length === 0) return "";
|
|
352
|
+
|
|
353
|
+
return [
|
|
354
|
+
"### Checkpoint Contents",
|
|
355
|
+
`This record contains ${parts.join(", ")}, across ${totalUserExchanges} user exchanges.`,
|
|
356
|
+
"The **Conversation Index** at the top has the full reference. Check it before answering questions about this session.",
|
|
357
|
+
].join("\n");
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// ---------------------------------------------------------------------------
|
|
361
|
+
// Tiered compaction — post-processing for cold-tier messages
|
|
362
|
+
// ---------------------------------------------------------------------------
|
|
363
|
+
|
|
364
|
+
/** Max chars for assistant reasoning text in cold tier. */
|
|
365
|
+
const COLD_ASSISTANT_LIMIT = 500;
|
|
366
|
+
/** Max chars for tool results in cold tier. */
|
|
367
|
+
const COLD_RESULT_LIMIT = 200;
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* Tier boundaries — number of user exchanges from the end of the conversation.
|
|
371
|
+
*
|
|
372
|
+
* HOT (≤5 from end): Full fidelity — no compression at all.
|
|
373
|
+
* WARM (6–20 from end): Full fidelity — preserved for medium-term recall.
|
|
374
|
+
* COLD (>20 from end): Aggressive trimming — assistant text capped at
|
|
375
|
+
* COLD_ASSISTANT_LIMIT, tool results at COLD_RESULT_LIMIT.
|
|
376
|
+
*
|
|
377
|
+
* Rationale: the most recent exchanges are actively referenced; older exchanges
|
|
378
|
+
* mostly matter for their decisions and edit diffs (which are never trimmed).
|
|
379
|
+
* The warm tier exists as a buffer so context doesn't cliff-edge from full to
|
|
380
|
+
* aggressively trimmed. 20 exchanges ≈ a substantial coding sub-session.
|
|
381
|
+
*
|
|
382
|
+
* User messages are NEVER compressed regardless of tier.
|
|
383
|
+
*/
|
|
384
|
+
const HOT_TIER_BOUNDARY = 5;
|
|
385
|
+
const WARM_TIER_BOUNDARY = 20;
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Compute the tier for a given exchange based on distance from the end.
|
|
389
|
+
*
|
|
390
|
+
* @param {number} fromEnd - 1-based distance from the last exchange
|
|
391
|
+
* @returns {"hot"|"warm"|"cold"}
|
|
392
|
+
*/
|
|
393
|
+
function computeTier(fromEnd) {
|
|
394
|
+
if (fromEnd <= HOT_TIER_BOUNDARY) return "hot";
|
|
395
|
+
if (fromEnd <= WARM_TIER_BOUNDARY) return "warm";
|
|
396
|
+
return "cold";
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/** Keywords that indicate a decision or architectural reasoning worth preserving. */
|
|
400
|
+
/** Intentionally narrow — "because" and "approach" are too common in general text. */
|
|
401
|
+
const DECISION_RE_1 =
|
|
402
|
+
/\b(?:decided to|chose .+ over|went with|trade-?off|rationale)\b/i;
|
|
403
|
+
const DECISION_RE_2 =
|
|
404
|
+
/\b(?:design decision|instead of .+ because|pros? and cons?|reject(?:ed|ing) .+ in favou?r)\b/i;
|
|
405
|
+
|
|
406
|
+
/** Test whether text contains decision keywords (split for regex complexity). */
|
|
407
|
+
function isDecisionText(text) {
|
|
408
|
+
return DECISION_RE_1.test(text) || DECISION_RE_2.test(text);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Compress an assistant message for cold tier.
|
|
413
|
+
* Preserves tool invocation lines (→) and edit diffs verbatim.
|
|
414
|
+
* Trims reasoning text to COLD_ASSISTANT_LIMIT.
|
|
415
|
+
* Messages containing decision keywords get a higher limit to preserve reasoning.
|
|
416
|
+
*/
|
|
417
|
+
function compressColdAssistant(msg) {
|
|
418
|
+
const prefix = "**Assistant:** ";
|
|
419
|
+
const body = msg.startsWith(prefix) ? msg.slice(prefix.length) : msg;
|
|
420
|
+
const lines = body.split("\n");
|
|
421
|
+
const segments = []; // [{type: 'text'|'tool', content}]
|
|
422
|
+
let textBuf = [];
|
|
423
|
+
|
|
424
|
+
for (const line of lines) {
|
|
425
|
+
if (line.startsWith("→ ")) {
|
|
426
|
+
// Flush accumulated text
|
|
427
|
+
if (textBuf.length > 0) {
|
|
428
|
+
segments.push({ type: "text", content: textBuf.join("\n") });
|
|
429
|
+
textBuf = [];
|
|
430
|
+
}
|
|
431
|
+
segments.push({ type: "tool", content: line });
|
|
432
|
+
} else {
|
|
433
|
+
textBuf.push(line);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
if (textBuf.length > 0) {
|
|
437
|
+
segments.push({ type: "text", content: textBuf.join("\n") });
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Decision-containing messages get 3x the limit to preserve architectural reasoning
|
|
441
|
+
const limit = isDecisionText(body)
|
|
442
|
+
? COLD_ASSISTANT_LIMIT * 3
|
|
443
|
+
: COLD_ASSISTANT_LIMIT;
|
|
444
|
+
|
|
445
|
+
const compressed = segments.map((s) => {
|
|
446
|
+
if (s.type === "tool") return s.content;
|
|
447
|
+
// Trim reasoning text, but skip if it contains edit diffs (old:|new:)
|
|
448
|
+
if (/^\s*(old|new): \|/m.test(s.content)) return s.content;
|
|
449
|
+
return startEndTrim(s.content, limit);
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
return `${prefix}${compressed.join("\n")}`;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Compress a tool result line (← ...) for cold tier.
|
|
457
|
+
* Errors are always kept. Short results are kept. Long results are stubbed.
|
|
458
|
+
*/
|
|
459
|
+
function compressColdResult(msg) {
|
|
460
|
+
// Errors — always preserve
|
|
461
|
+
if (isErrorResponse(msg)) return msg;
|
|
462
|
+
// Short results — keep
|
|
463
|
+
if (msg.length <= COLD_RESULT_LIMIT) return msg;
|
|
464
|
+
// Long results — stub with first + last line
|
|
465
|
+
return `← ${startEndTrim(msg.slice(2), COLD_RESULT_LIMIT)}`;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Apply tiered compression to extracted messages.
|
|
470
|
+
* Only cold-tier messages are modified. Hot and warm tiers are untouched.
|
|
471
|
+
*
|
|
472
|
+
* @param {string[]} messages - Extracted message strings from extractMessages
|
|
473
|
+
* @returns {string[]} Messages with cold-tier compression applied
|
|
474
|
+
*/
|
|
475
|
+
export function applyTiers(messages) {
|
|
476
|
+
// Find user exchange boundaries (indices of **User:** messages)
|
|
477
|
+
const exchangeStarts = [];
|
|
478
|
+
for (let i = 0; i < messages.length; i++) {
|
|
479
|
+
if (messages[i].startsWith("**User:**")) exchangeStarts.push(i);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const total = exchangeStarts.length;
|
|
483
|
+
if (total <= WARM_TIER_BOUNDARY) return messages; // No cold tier content — nothing to do
|
|
484
|
+
|
|
485
|
+
// Build tier map: message index → tier
|
|
486
|
+
const tierMap = new Array(messages.length).fill("cold");
|
|
487
|
+
|
|
488
|
+
for (let e = 0; e < total; e++) {
|
|
489
|
+
const fromEnd = total - e;
|
|
490
|
+
const tier = computeTier(fromEnd);
|
|
491
|
+
const start = exchangeStarts[e];
|
|
492
|
+
const end = e + 1 < total ? exchangeStarts[e + 1] : messages.length;
|
|
493
|
+
for (let i = start; i < end; i++) {
|
|
494
|
+
tierMap[i] = tier;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Messages before the first user exchange inherit cold tier (tool results
|
|
499
|
+
// from before any user text — already set by fill('cold'))
|
|
500
|
+
|
|
501
|
+
// Post-process cold-tier messages
|
|
502
|
+
return messages.map((msg, i) => {
|
|
503
|
+
if (tierMap[i] !== "cold") return msg;
|
|
504
|
+
|
|
505
|
+
// User messages — NEVER compress
|
|
506
|
+
if (msg.startsWith("**User:**")) return msg;
|
|
507
|
+
|
|
508
|
+
// Assistant messages — trim reasoning, keep tool invocations
|
|
509
|
+
if (msg.startsWith("**Assistant:**")) return compressColdAssistant(msg);
|
|
510
|
+
|
|
511
|
+
// Tool results (← lines)
|
|
512
|
+
if (msg.startsWith("←")) return compressColdResult(msg);
|
|
513
|
+
|
|
514
|
+
// Anything else — keep as-is
|
|
515
|
+
return msg;
|
|
516
|
+
});
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// ---------------------------------------------------------------------------
|
|
520
|
+
// Edit coalescing — merge repeated edits to the same file region
|
|
521
|
+
// ---------------------------------------------------------------------------
|
|
522
|
+
|
|
523
|
+
/** Regex to match an Edit summary header line: → Edit `filepath`: */
|
|
524
|
+
const EDIT_HEADER_RE = /^→ Edit `([^`]+)`:/;
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* Accumulate a content line into the appropriate section buffer.
|
|
528
|
+
* @param {string} section - "old" | "new" | null
|
|
529
|
+
* @param {string} line - The raw line
|
|
530
|
+
* @param {string} oldStr - Accumulated old string
|
|
531
|
+
* @param {string} newStr - Accumulated new string
|
|
532
|
+
* @returns {{ oldStr: string, newStr: string }}
|
|
533
|
+
*/
|
|
534
|
+
function accumulateEditLine(section, line, oldStr, newStr) {
|
|
535
|
+
if (section === "old") {
|
|
536
|
+
oldStr += `${oldStr ? "\n" : ""}${line}`;
|
|
537
|
+
}
|
|
538
|
+
if (section === "new") {
|
|
539
|
+
newStr += `${newStr ? "\n" : ""}${line}`;
|
|
540
|
+
}
|
|
541
|
+
return { oldStr, newStr };
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
/**
|
|
545
|
+
* Parse an edit block from an assistant message line set.
|
|
546
|
+
* Returns { filePath, oldStr, newStr } or null if not parseable.
|
|
547
|
+
*/
|
|
548
|
+
function parseEditBlock(editText) {
|
|
549
|
+
const headerMatch = editText.match(EDIT_HEADER_RE);
|
|
550
|
+
if (!headerMatch) return null;
|
|
551
|
+
const filePath = headerMatch[1];
|
|
552
|
+
|
|
553
|
+
let oldStr = "";
|
|
554
|
+
let newStr = "";
|
|
555
|
+
const lines = editText.split("\n");
|
|
556
|
+
let section = null; // 'old' | 'new'
|
|
557
|
+
|
|
558
|
+
for (let i = 1; i < lines.length; i++) {
|
|
559
|
+
const trimmed = lines[i].trimStart();
|
|
560
|
+
if (trimmed.startsWith("old: |")) {
|
|
561
|
+
section = "old";
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
if (trimmed.startsWith("new: |")) {
|
|
565
|
+
section = "new";
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
// Content lines are indented 6 spaces under the section header
|
|
569
|
+
const acc = accumulateEditLine(section, lines[i], oldStr, newStr);
|
|
570
|
+
oldStr = acc.oldStr;
|
|
571
|
+
newStr = acc.newStr;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
return { filePath, oldStr: oldStr.trim(), newStr: newStr.trim() };
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Record a parsed edit into the editMap under its file path key.
|
|
579
|
+
* @param {Map} editMap - filePath → edit entry array
|
|
580
|
+
* @param {object} parsed - Parsed edit { filePath, oldStr, newStr }
|
|
581
|
+
* @param {number} msgIdx - Message index
|
|
582
|
+
* @param {number} editStart - Line index where edit starts
|
|
583
|
+
* @param {number} editEnd - Line index where edit ends
|
|
584
|
+
*/
|
|
585
|
+
function recordEdit(editMap, parsed, msgIdx, editStart, editEnd) {
|
|
586
|
+
const key = parsed.filePath;
|
|
587
|
+
if (!editMap.has(key)) editMap.set(key, []);
|
|
588
|
+
editMap.get(key).push({ msgIdx, editStart, editEnd, parsed });
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Collect all edit blocks from a single assistant message.
|
|
593
|
+
* @param {string} msg - The assistant message string
|
|
594
|
+
* @param {number} msgIdx - Index of this message in the messages array
|
|
595
|
+
* @param {Map} editMap - Accumulator: filePath → edit entries
|
|
596
|
+
*/
|
|
597
|
+
function collectEditsFromMessage(msg, msgIdx, editMap) {
|
|
598
|
+
const lines = msg.split("\n");
|
|
599
|
+
let editStart = -1;
|
|
600
|
+
|
|
601
|
+
for (let l = 0; l < lines.length; l++) {
|
|
602
|
+
if (!EDIT_HEADER_RE.test(lines[l])) continue;
|
|
603
|
+
|
|
604
|
+
// Close previous edit block if one was open
|
|
605
|
+
if (editStart >= 0) {
|
|
606
|
+
const editText = lines.slice(editStart, l).join("\n");
|
|
607
|
+
const parsed = parseEditBlock(editText);
|
|
608
|
+
if (parsed) recordEdit(editMap, parsed, msgIdx, editStart, l);
|
|
609
|
+
}
|
|
610
|
+
editStart = l;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// Close the last edit in this message
|
|
614
|
+
if (editStart < 0) return;
|
|
615
|
+
const editText = lines.slice(editStart).join("\n");
|
|
616
|
+
const parsed = parseEditBlock(editText);
|
|
617
|
+
if (parsed) recordEdit(editMap, parsed, msgIdx, editStart, lines.length);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* Build chains of overlapping edits for a single file path.
|
|
622
|
+
* Two edits overlap when the later edit's old_string is contained in the
|
|
623
|
+
* previous edit's new_string (same region being modified repeatedly).
|
|
624
|
+
*
|
|
625
|
+
* @param {object[]} edits - Array of edit entries for one file
|
|
626
|
+
* @returns {object[][]} Array of chains (each chain is an array of edit entries)
|
|
627
|
+
*/
|
|
628
|
+
function buildCoalesceChains(edits) {
|
|
629
|
+
const chains = [[edits[0]]];
|
|
630
|
+
for (let i = 1; i < edits.length; i++) {
|
|
631
|
+
const currentChain = chains.at(-1);
|
|
632
|
+
const lastInChain = currentChain.at(-1);
|
|
633
|
+
const prevNew = lastInChain.parsed.newStr;
|
|
634
|
+
const currOld = edits[i].parsed.oldStr;
|
|
635
|
+
if (prevNew && currOld && prevNew.includes(currOld)) {
|
|
636
|
+
currentChain.push(edits[i]);
|
|
637
|
+
} else {
|
|
638
|
+
chains.push([edits[i]]);
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
return chains;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
/**
|
|
645
|
+
* Build the replacement text for a coalesced edit chain.
|
|
646
|
+
* @param {object[]} edits - Chain of edit entries to coalesce
|
|
647
|
+
* @returns {string} The coalesced edit block text
|
|
648
|
+
*/
|
|
649
|
+
function buildCoalescedEditText(edits) {
|
|
650
|
+
const first = edits[0];
|
|
651
|
+
const last = edits.at(-1);
|
|
652
|
+
const filePath = first.parsed.filePath;
|
|
653
|
+
const count = edits.length;
|
|
654
|
+
const header = `→ Edit \`${filePath}\` [${count} edits coalesced]:`;
|
|
655
|
+
const parts = [header];
|
|
656
|
+
|
|
657
|
+
if (first.parsed.oldStr) {
|
|
658
|
+
parts.push(
|
|
659
|
+
" old: |",
|
|
660
|
+
first.parsed.oldStr
|
|
661
|
+
.split("\n")
|
|
662
|
+
.map((l) => ` ${l}`)
|
|
663
|
+
.join("\n"),
|
|
664
|
+
);
|
|
665
|
+
}
|
|
666
|
+
if (last.parsed.newStr) {
|
|
667
|
+
parts.push(
|
|
668
|
+
" new: |",
|
|
669
|
+
last.parsed.newStr
|
|
670
|
+
.split("\n")
|
|
671
|
+
.map((l) => ` ${l}`)
|
|
672
|
+
.join("\n"),
|
|
673
|
+
);
|
|
674
|
+
} else if (first.parsed.newStr && edits.length === 2) {
|
|
675
|
+
// If last has no newStr (deletion), still show it
|
|
676
|
+
parts.push(" new: | [deleted]");
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
return parts.join("\n");
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
/**
|
|
683
|
+
* Rebuild a single assistant message applying edit coalescing.
|
|
684
|
+
* @param {string} msg - Original message
|
|
685
|
+
* @param {number} msgIdx - Message index
|
|
686
|
+
* @param {Set} removeEdits - Set of "msgIdx:editStart" keys to remove
|
|
687
|
+
* @param {Map} replaceEdits - Map of "msgIdx:editStart" → replacement text
|
|
688
|
+
* @returns {string} Rebuilt message
|
|
689
|
+
*/
|
|
690
|
+
function rebuildMessageWithCoalescing(msg, msgIdx, removeEdits, replaceEdits) {
|
|
691
|
+
const lines = msg.split("\n");
|
|
692
|
+
const result = [];
|
|
693
|
+
let editStart = -1;
|
|
694
|
+
|
|
695
|
+
for (let l = 0; l <= lines.length; l++) {
|
|
696
|
+
const isEditHeader = l < lines.length && EDIT_HEADER_RE.test(lines[l]);
|
|
697
|
+
const isEnd = l === lines.length;
|
|
698
|
+
|
|
699
|
+
if ((isEditHeader || isEnd) && editStart >= 0) {
|
|
700
|
+
const key = `${msgIdx}:${editStart}`;
|
|
701
|
+
if (removeEdits.has(key)) {
|
|
702
|
+
// Skip — this edit is removed (intermediate or first in chain)
|
|
703
|
+
} else if (replaceEdits.has(key)) {
|
|
704
|
+
result.push(replaceEdits.get(key));
|
|
705
|
+
} else {
|
|
706
|
+
result.push(...lines.slice(editStart, l));
|
|
707
|
+
}
|
|
708
|
+
editStart = -1;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
if (isEditHeader) {
|
|
712
|
+
editStart = l;
|
|
713
|
+
} else if (editStart < 0 && !isEnd) {
|
|
714
|
+
result.push(lines[l]);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
return result.join("\n");
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
/**
|
|
722
|
+
* Coalesce repeated edits to the same file within assistant messages.
|
|
723
|
+
*
|
|
724
|
+
* When a file is edited N times, keeps the first edit's old_string and the
|
|
725
|
+
* last edit's new_string, replacing intermediate edits with a marker.
|
|
726
|
+
* Only coalesces edits to the same file — edits to different files are
|
|
727
|
+
* always kept independently.
|
|
728
|
+
*
|
|
729
|
+
* @param {string[]} messages - Extracted message strings
|
|
730
|
+
* @returns {string[]} Messages with coalesced edits
|
|
731
|
+
*/
|
|
732
|
+
export function coalesceEdits(messages) {
|
|
733
|
+
const editMap = new Map();
|
|
734
|
+
|
|
735
|
+
for (let i = 0; i < messages.length; i++) {
|
|
736
|
+
if (!messages[i].startsWith("**Assistant:**")) continue;
|
|
737
|
+
collectEditsFromMessage(messages[i], i, editMap);
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
// Find coalescing chains: only merge edits where a later edit's old_string
|
|
741
|
+
// contains content from the previous edit's new_string (same region).
|
|
742
|
+
const toCoalesce = new Map();
|
|
743
|
+
for (const [filePath, edits] of editMap) {
|
|
744
|
+
if (edits.length < 2) continue;
|
|
745
|
+
const chains = buildCoalesceChains(edits);
|
|
746
|
+
for (const chain of chains) {
|
|
747
|
+
if (chain.length >= 2) {
|
|
748
|
+
toCoalesce.set(`${filePath}:${chain[0].editStart}`, chain);
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
if (toCoalesce.size === 0) return messages;
|
|
754
|
+
|
|
755
|
+
// Build set of edits to remove (intermediate ones) and the last edit to replace
|
|
756
|
+
const removeEdits = new Set();
|
|
757
|
+
const replaceEdits = new Map();
|
|
758
|
+
|
|
759
|
+
for (const [, edits] of toCoalesce) {
|
|
760
|
+
// Remove all intermediate edits and the first (replaced by coalesced at last position)
|
|
761
|
+
for (let i = 0; i < edits.length - 1; i++) {
|
|
762
|
+
removeEdits.add(`${edits[i].msgIdx}:${edits[i].editStart}`);
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
const last = edits.at(-1);
|
|
766
|
+
replaceEdits.set(
|
|
767
|
+
`${last.msgIdx}:${last.editStart}`,
|
|
768
|
+
buildCoalescedEditText(edits),
|
|
769
|
+
);
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// Rebuild messages with coalesced edits
|
|
773
|
+
return messages.map((msg, i) => {
|
|
774
|
+
if (!msg.startsWith("**Assistant:**")) return msg;
|
|
775
|
+
return rebuildMessageWithCoalescing(msg, i, removeEdits, replaceEdits);
|
|
776
|
+
});
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
// ---------------------------------------------------------------------------
|
|
780
|
+
// Smart Compact — full conversation extraction
|
|
781
|
+
// ---------------------------------------------------------------------------
|
|
782
|
+
|
|
783
|
+
/**
|
|
784
|
+
* Extract the full conversation history from a transcript, preserving
|
|
785
|
+
* tool summaries and edit diffs while removing re-obtainable noise.
|
|
786
|
+
*
|
|
787
|
+
* @param {string} transcriptPath - Path to the JSONL transcript
|
|
788
|
+
* @returns {string} Formatted checkpoint content
|
|
789
|
+
*/
|
|
790
|
+
export function extractConversation(transcriptPath) {
|
|
791
|
+
if (!transcriptPath || !fs.existsSync(transcriptPath))
|
|
792
|
+
return "(no transcript available)";
|
|
793
|
+
|
|
794
|
+
const lines = readTranscriptLines(transcriptPath);
|
|
795
|
+
|
|
796
|
+
// Find the last compact marker (compaction boundary)
|
|
797
|
+
let compactPreamble = "";
|
|
798
|
+
let compactIdx = -1;
|
|
799
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
800
|
+
try {
|
|
801
|
+
const obj = JSON.parse(lines[i]);
|
|
802
|
+
const text = flattenContent(obj.message?.content).trim();
|
|
803
|
+
if (
|
|
804
|
+
COMPACT_MARKER_RE.test(text) ||
|
|
805
|
+
text.startsWith("# Context Checkpoint")
|
|
806
|
+
) {
|
|
807
|
+
compactPreamble = text;
|
|
808
|
+
compactIdx = i;
|
|
809
|
+
break;
|
|
810
|
+
}
|
|
811
|
+
} catch {
|
|
812
|
+
// Skip unparseable lines
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
// Process messages after the boundary
|
|
817
|
+
const { messages, filesModified, toolOpCount, parseErrors, projectRoot } =
|
|
818
|
+
extractMessages(lines, compactIdx + 1);
|
|
819
|
+
|
|
820
|
+
// Apply tiered compression — cold-tier messages get aggressive trimming
|
|
821
|
+
const tieredMessages = applyTiers(messages);
|
|
822
|
+
|
|
823
|
+
// Coalesce repeated edits to the same file into net-diffs
|
|
824
|
+
const coalescedMessages = coalesceEdits(tieredMessages);
|
|
825
|
+
|
|
826
|
+
// Build checkpoint: header + fact index + optional preamble + body
|
|
827
|
+
// Use original messages for state header and fact index (full fidelity)
|
|
828
|
+
const stateHeader = generateStateHeader(messages, filesModified, toolOpCount);
|
|
829
|
+
const factIndex = generateConversationIndex(messages);
|
|
830
|
+
|
|
831
|
+
// Prior checkpoint content is already compacted — preserve it verbatim.
|
|
832
|
+
// Only capCheckpointContent() in checkpoint.mjs applies a safety cap.
|
|
833
|
+
|
|
834
|
+
const merged = compactMessages(coalescedMessages);
|
|
835
|
+
let result = `${stateHeader}\n\n`;
|
|
836
|
+
if (factIndex) result += `${factIndex}\n\n`;
|
|
837
|
+
result += `---\n\n`;
|
|
838
|
+
if (compactPreamble) result += `${compactPreamble}\n\n---\n\n`;
|
|
839
|
+
result += merged.join("\n\n---\n\n");
|
|
840
|
+
|
|
841
|
+
if (parseErrors > 0) {
|
|
842
|
+
result += `\n\n> Warning: ${parseErrors} transcript line(s) could not be parsed and may be missing from this record.`;
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// Append footer with content pointers (high-attention end zone)
|
|
846
|
+
const footer = generateCheckpointFooter(messages);
|
|
847
|
+
if (footer) result += `\n\n---\n\n${footer}`;
|
|
848
|
+
|
|
849
|
+
// R1: Strip project root from paths to reduce noise
|
|
850
|
+
if (projectRoot) result = result.replaceAll(projectRoot, "");
|
|
851
|
+
|
|
852
|
+
// R5: Shorten prefixes for token efficiency — markdown bold serves
|
|
853
|
+
// no purpose in additionalContext injection, and shorter prefixes
|
|
854
|
+
// save ~300 tokens across a typical checkpoint.
|
|
855
|
+
result = result.replaceAll("**User:**", "User:");
|
|
856
|
+
result = result.replaceAll("**Assistant:**", "Asst:");
|
|
857
|
+
|
|
858
|
+
return result;
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
// ---------------------------------------------------------------------------
|
|
862
|
+
// Keep Recent — exchange-based sliding window extraction
|
|
863
|
+
// ---------------------------------------------------------------------------
|
|
864
|
+
|
|
865
|
+
/**
|
|
866
|
+
* Compute files modified and tool op count from a windowed message set.
|
|
867
|
+
* @param {string[]} recent - Windowed messages
|
|
868
|
+
* @returns {{ windowFiles: Set<string>, windowToolOps: number }}
|
|
869
|
+
*/
|
|
870
|
+
function computeWindowStats(recent) {
|
|
871
|
+
const windowFiles = new Set();
|
|
872
|
+
let windowToolOps = 0;
|
|
873
|
+
for (const msg of recent) {
|
|
874
|
+
for (const match of msg.matchAll(EDIT_WRITE_RE)) {
|
|
875
|
+
windowFiles.add(match[1]);
|
|
876
|
+
}
|
|
877
|
+
for (const line of msg.split("\n")) {
|
|
878
|
+
if (TOOL_OP_RE.test(line)) windowToolOps++;
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
return { windowFiles, windowToolOps };
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
/**
|
|
885
|
+
* Extract the last N user exchanges from the transcript.
|
|
886
|
+
*
|
|
887
|
+
* An "exchange" is a user message plus everything that follows it (assistant
|
|
888
|
+
* responses, tool summaries, tool results) up to the next user message.
|
|
889
|
+
* This groups logical conversations together so tool results and multi-step
|
|
890
|
+
* assistant work don't consume slots in the window.
|
|
891
|
+
*
|
|
892
|
+
* @param {string} transcriptPath - Path to the JSONL transcript
|
|
893
|
+
* @param {number} n - Number of user exchanges to keep (default 10)
|
|
894
|
+
* @returns {string} Formatted checkpoint content
|
|
895
|
+
*/
|
|
896
|
+
export function extractRecent(transcriptPath, n = 10) {
|
|
897
|
+
if (!transcriptPath || !fs.existsSync(transcriptPath))
|
|
898
|
+
return "(no transcript available)";
|
|
899
|
+
|
|
900
|
+
const lines = readTranscriptLines(transcriptPath);
|
|
901
|
+
|
|
902
|
+
// Process ALL lines, then window by user exchanges
|
|
903
|
+
const { messages, parseErrors, projectRoot } = extractMessages(lines, 0);
|
|
904
|
+
|
|
905
|
+
// Find indices of all **User:** entries
|
|
906
|
+
const userIndices = [];
|
|
907
|
+
for (let i = 0; i < messages.length; i++) {
|
|
908
|
+
if (messages[i].startsWith("**User:**")) userIndices.push(i);
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
// Take from the Nth-from-last user message to the end
|
|
912
|
+
let recent;
|
|
913
|
+
if (userIndices.length <= n) {
|
|
914
|
+
recent = messages;
|
|
915
|
+
} else {
|
|
916
|
+
const cutIdx = userIndices[userIndices.length - n];
|
|
917
|
+
recent = messages.slice(cutIdx);
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
// Compute files modified and tool ops from WINDOWED content only
|
|
921
|
+
const { windowFiles, windowToolOps } = computeWindowStats(recent);
|
|
922
|
+
|
|
923
|
+
const stateHeader = generateStateHeader(recent, windowFiles, windowToolOps);
|
|
924
|
+
|
|
925
|
+
const merged = compactMessages(recent);
|
|
926
|
+
let result = `${stateHeader}\n\n---\n\n${merged.join("\n\n---\n\n")}`;
|
|
927
|
+
|
|
928
|
+
if (parseErrors > 0) {
|
|
929
|
+
result += `\n\n> Warning: ${parseErrors} transcript line(s) could not be parsed and may be missing from this record.`;
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
if (projectRoot) result = result.replaceAll(projectRoot, "");
|
|
933
|
+
|
|
934
|
+
// Shorten prefixes for token efficiency (same as extractConversation)
|
|
935
|
+
result = result.replaceAll("**User:**", "User:");
|
|
936
|
+
result = result.replaceAll("**Assistant:**", "Asst:");
|
|
937
|
+
|
|
938
|
+
return result;
|
|
939
|
+
}
|