@ricky-stevens/context-guardian 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/.claude-plugin/marketplace.json +29 -0
  2. package/.claude-plugin/plugin.json +63 -0
  3. package/.github/workflows/ci.yml +66 -0
  4. package/CLAUDE.md +132 -0
  5. package/LICENSE +21 -0
  6. package/README.md +362 -0
  7. package/biome.json +34 -0
  8. package/bun.lock +31 -0
  9. package/hooks/precompact.mjs +73 -0
  10. package/hooks/session-start.mjs +133 -0
  11. package/hooks/stop.mjs +172 -0
  12. package/hooks/submit.mjs +133 -0
  13. package/lib/checkpoint.mjs +258 -0
  14. package/lib/compact-cli.mjs +124 -0
  15. package/lib/compact-output.mjs +350 -0
  16. package/lib/config.mjs +40 -0
  17. package/lib/content.mjs +33 -0
  18. package/lib/diagnostics.mjs +221 -0
  19. package/lib/estimate.mjs +254 -0
  20. package/lib/extract-helpers.mjs +869 -0
  21. package/lib/handoff.mjs +329 -0
  22. package/lib/logger.mjs +34 -0
  23. package/lib/mcp-tools.mjs +200 -0
  24. package/lib/paths.mjs +90 -0
  25. package/lib/stats.mjs +81 -0
  26. package/lib/statusline.mjs +123 -0
  27. package/lib/synthetic-session.mjs +273 -0
  28. package/lib/tokens.mjs +170 -0
  29. package/lib/tool-summary.mjs +399 -0
  30. package/lib/transcript.mjs +939 -0
  31. package/lib/trim.mjs +158 -0
  32. package/package.json +22 -0
  33. package/skills/compact/SKILL.md +20 -0
  34. package/skills/config/SKILL.md +70 -0
  35. package/skills/handoff/SKILL.md +26 -0
  36. package/skills/prune/SKILL.md +20 -0
  37. package/skills/stats/SKILL.md +100 -0
  38. package/sonar-project.properties +12 -0
  39. package/test/checkpoint.test.mjs +171 -0
  40. package/test/compact-cli.test.mjs +230 -0
  41. package/test/compact-output.test.mjs +284 -0
  42. package/test/compaction-e2e.test.mjs +809 -0
  43. package/test/content.test.mjs +86 -0
  44. package/test/diagnostics.test.mjs +188 -0
  45. package/test/edge-cases.test.mjs +543 -0
  46. package/test/estimate.test.mjs +262 -0
  47. package/test/extract-helpers-coverage.test.mjs +333 -0
  48. package/test/extract-helpers.test.mjs +234 -0
  49. package/test/handoff.test.mjs +738 -0
  50. package/test/integration.test.mjs +582 -0
  51. package/test/logger.test.mjs +70 -0
  52. package/test/manual-compaction-test.md +426 -0
  53. package/test/mcp-tools.test.mjs +443 -0
  54. package/test/paths.test.mjs +250 -0
  55. package/test/quick-compaction-test.md +191 -0
  56. package/test/stats.test.mjs +88 -0
  57. package/test/statusline.test.mjs +222 -0
  58. package/test/submit.test.mjs +232 -0
  59. package/test/synthetic-session.test.mjs +600 -0
  60. package/test/tokens.test.mjs +293 -0
  61. package/test/tool-summary.test.mjs +771 -0
  62. package/test/transcript-coverage.test.mjs +369 -0
  63. package/test/transcript.test.mjs +596 -0
  64. package/test/trim.test.mjs +356 -0
@@ -0,0 +1,869 @@
1
+ /**
2
+ * Content processing helpers for transcript extraction.
3
+ *
4
+ * Handles the per-message processing logic: interleaved content block
5
+ * handling for assistant messages, user message classification,
6
+ * skip-rule evaluation, and state header generation.
7
+ *
8
+ * @module extract-helpers
9
+ */
10
+
11
+ import {
12
+ contentBlockPlaceholder,
13
+ summarizeToolResult,
14
+ summarizeToolUse,
15
+ } from "./tool-summary.mjs";
16
+ import { isAffirmativeConfirmation, isSystemInjection } from "./trim.mjs";
17
+
18
+ /** Matches any compact/restore marker that signals a compaction boundary. */
19
+ const COMPACT_MARKER_RE = /^\[(SMART COMPACT|KEEP RECENT|RESTORED CONTEXT)/;
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Assistant message processing
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /**
26
+ * Process a single content block from an assistant message.
27
+ * Returns the string to append, or "" if the block should be skipped.
28
+ */
29
+ function processAssistantBlock(block, toolUseMap) {
30
+ if (block.type === "text" && block.text) {
31
+ return block.text.trim();
32
+ }
33
+ if (block.type === "tool_use") {
34
+ if (block.id) {
35
+ toolUseMap.set(block.id, { name: block.name, input: block.input });
36
+ }
37
+ return summarizeToolUse(block) || "";
38
+ }
39
+ if (block.type === "thinking" || block.type === "redacted_thinking") {
40
+ return "";
41
+ }
42
+ return contentBlockPlaceholder(block) || "";
43
+ }
44
+
45
+ /**
46
+ * Process an assistant message's content array in order, generating
47
+ * interleaved text and tool summaries. Also populates the tool_use ID map.
48
+ *
49
+ * @param {Array} contentArray - The message.content array
50
+ * @param {Map} toolUseMap - Map<tool_use_id, {name, input}> to populate
51
+ * @returns {string} Formatted assistant message content
52
+ */
53
+ export function processAssistantContent(contentArray, toolUseMap) {
54
+ if (!Array.isArray(contentArray)) {
55
+ return typeof contentArray === "string" ? contentArray.trim() : "";
56
+ }
57
+
58
+ const parts = [];
59
+ for (const block of contentArray) {
60
+ const result = processAssistantBlock(block, toolUseMap);
61
+ if (result) parts.push(result);
62
+ }
63
+ return parts.join("\n");
64
+ }
65
+
66
+ // ---------------------------------------------------------------------------
67
+ // User message processing
68
+ // ---------------------------------------------------------------------------
69
+
70
+ /**
71
+ * Process a single content block from a user message.
72
+ * Returns { text, toolResult } — exactly one will be non-empty.
73
+ */
74
+ function processUserBlock(block, toolUseMap) {
75
+ if (block.type === "text" && block.text) {
76
+ return { text: block.text.trim(), toolResult: "" };
77
+ }
78
+ if (block.type === "tool_result") {
79
+ const toolInfo = block.tool_use_id
80
+ ? toolUseMap.get(block.tool_use_id) || null
81
+ : null;
82
+ return { text: "", toolResult: summarizeToolResult(block, toolInfo) || "" };
83
+ }
84
+ return { text: contentBlockPlaceholder(block) || "", toolResult: "" };
85
+ }
86
+
87
+ /**
88
+ * Process a user message's content array, handling both human text
89
+ * and tool_result blocks (from tool call responses).
90
+ *
91
+ * @param {Array|string} content - The message.content (string or array)
92
+ * @param {Map} toolUseMap - Map<tool_use_id, {name, input}> for result lookup
93
+ * @returns {{ userText: string, toolResults: string[] }}
94
+ */
95
+ export function processUserContent(content, toolUseMap) {
96
+ // Simple string content — just human text
97
+ if (typeof content === "string") {
98
+ return { userText: content.trim(), toolResults: [] };
99
+ }
100
+ if (!Array.isArray(content)) {
101
+ return { userText: "", toolResults: [] };
102
+ }
103
+
104
+ const textParts = [];
105
+ const toolResults = [];
106
+
107
+ for (const block of content) {
108
+ const { text, toolResult } = processUserBlock(block, toolUseMap);
109
+ if (text) textParts.push(text);
110
+ if (toolResult) toolResults.push(toolResult);
111
+ }
112
+
113
+ return {
114
+ userText: textParts.join("\n").trim(),
115
+ toolResults,
116
+ };
117
+ }
118
+
119
+ // ---------------------------------------------------------------------------
120
+ // Skip rules
121
+ // ---------------------------------------------------------------------------
122
+
123
+ /**
124
+ * Determine whether a user text message should be skipped.
125
+ *
126
+ * @param {string} text - The extracted user text
127
+ * @param {boolean} lastAssistantIsCGMenu - Whether the previous message was a CG menu
128
+ * @returns {{ skip: boolean, clearMenu: boolean }}
129
+ */
130
+ export function shouldSkipUserMessage(text, lastAssistantIsCGMenu) {
131
+ if (!text) return { skip: true, clearMenu: false };
132
+
133
+ // Slash commands — meta-operations, not conversation
134
+ if (text.startsWith("/")) return { skip: true, clearMenu: false };
135
+
136
+ // CG menu replies
137
+ if (
138
+ lastAssistantIsCGMenu &&
139
+ (/^[0-4]$/.test(text) || text.toLowerCase() === "cancel")
140
+ ) {
141
+ return { skip: true, clearMenu: true };
142
+ }
143
+
144
+ // Compact markers from previous compactions
145
+ if (COMPACT_MARKER_RE.test(text) || text.startsWith("# Context Checkpoint")) {
146
+ return { skip: true, clearMenu: false };
147
+ }
148
+
149
+ // Known system injections (checkpoint restores, skill injections)
150
+ if (isSystemInjection(text)) return { skip: true, clearMenu: false };
151
+
152
+ // Short affirmative confirmations ("yes", "ok", "sure", etc.)
153
+ if (isAffirmativeConfirmation(text)) return { skip: true, clearMenu: false };
154
+
155
+ return { skip: false, clearMenu: false };
156
+ }
157
+
158
+ /** Matches the synthetic assistant ack injected by CG's writeSyntheticSession. */
159
+ const SYNTHETIC_ACK_RE =
160
+ /^Context restored from checkpoint\.\s+I have the full session history/;
161
+
162
+ export function isSyntheticAck(text) {
163
+ return SYNTHETIC_ACK_RE.test((text ?? "").trim());
164
+ }
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // State header generation
168
+ // ---------------------------------------------------------------------------
169
+
170
+ /**
171
+ * Find the first substantive user message for the Goal field.
172
+ */
173
+ function findFirstUserGoal(messages) {
174
+ const GREETING_RE =
175
+ /^(?:hi|hello|hey|good (?:morning|afternoon|evening)|what'?s up|sup)\b/i;
176
+ const META_MSG_RE =
177
+ /^(?:IMPORTANT:|Rules:|NOTE:|WARNING:|Context Guardian|We are about to run)/i;
178
+ for (const msg of messages) {
179
+ if (!msg.startsWith("**User:**")) continue;
180
+ const text = msg.replace("**User:** ", "");
181
+ if (isHeaderNoise(text)) continue;
182
+ if (GREETING_RE.test(text.trim())) continue;
183
+ if (META_MSG_RE.test(text.trim())) continue;
184
+ if (text.trim().length <= 20) continue;
185
+ return text.replaceAll("\n", " ").slice(0, 200);
186
+ }
187
+ return "";
188
+ }
189
+
190
+ /**
191
+ * Find the last substantive assistant message for the Last Action field.
192
+ */
193
+ function findLastAssistantAction(messages) {
194
+ for (let i = messages.length - 1; i >= 0; i--) {
195
+ if (!messages[i].startsWith("**Assistant:**")) continue;
196
+ const text = messages[i].replace("**Assistant:** ", "");
197
+ if (isHeaderNoise(text)) continue;
198
+ if (text.length <= 30) continue;
199
+ return text.replaceAll("\n", " ").slice(0, 200);
200
+ }
201
+ return "";
202
+ }
203
+
204
+ /**
205
+ * Generate a brief orientation header for the checkpoint.
206
+ * Gives the LLM an immediate anchor before the chronological detail.
207
+ * Costs ~50-100 tokens but exploits attention patterns (strongest at start).
208
+ *
209
+ * @param {string[]} messages - The formatted message strings
210
+ * @param {Set<string>} filesModified - Set of files with Edit/Write operations
211
+ * @param {number} toolOpCount - Total number of tool operations
212
+ * @returns {string} The header block
213
+ */
214
+ export function generateStateHeader(messages, filesModified, toolOpCount) {
215
+ const firstUser = findFirstUserGoal(messages);
216
+ const lastAssistant = findLastAssistantAction(messages);
217
+
218
+ const fileList =
219
+ filesModified.size > 0
220
+ ? Array.from(filesModified)
221
+ .sort((a, b) => a.localeCompare(b))
222
+ .join(", ")
223
+ : "none";
224
+
225
+ const topics = extractTopics(messages);
226
+ const topicLine =
227
+ topics.length > 0
228
+ ? topics
229
+ .map((t) => t.replaceAll(/[\r\n]+/g, " ").trim())
230
+ .filter((t) => t.length > 1 && t.length < 60)
231
+ .join(", ")
232
+ : "general discussion";
233
+
234
+ return [
235
+ "## Session State",
236
+ `Goal: ${firstUser || "[not available]"}`,
237
+ `Files modified: ${fileList}`,
238
+ `Topics covered: ${topicLine}`,
239
+ `Last action: ${lastAssistant || "[not available]"}`,
240
+ `Messages preserved: ${messages.length} | Tool operations: ${toolOpCount}`,
241
+ ].join("\n");
242
+ }
243
+
244
+ /**
245
+ * Check if a message is infrastructure noise that shouldn't appear in
246
+ * the Goal or Last action header fields.
247
+ */
248
+ function isHeaderNoise(text) {
249
+ if (!text) return true;
250
+ if (isSystemInjection(text)) return true;
251
+ if (text.includes("```")) return true;
252
+ if (text.includes("<command-message>")) return true;
253
+ if (text.includes("<command-name>")) return true;
254
+ if (text.startsWith("→ ")) return true; // tool summary line
255
+ return false;
256
+ }
257
+
258
+ /**
259
+ * Extract key topics from user messages for the state header index.
260
+ * Looks for identifiers, proper nouns, ticket/bug IDs, and named entities
261
+ * that help the LLM locate specific content in a dense checkpoint.
262
+ *
263
+ * @param {string[]} messages - The formatted message strings
264
+ * @returns {string[]} Deduplicated topic strings
265
+ */
266
+ function extractTopics(messages) {
267
+ const topics = new Set();
268
+
269
+ for (const msg of messages) {
270
+ if (!msg.startsWith("**User:**")) continue;
271
+ const text = msg.replace("**User:** ", "").replaceAll("\r", "");
272
+ collectTopicsFromText(text, topics);
273
+ }
274
+
275
+ removeTopicNoise(topics);
276
+ return Array.from(topics).slice(0, 15);
277
+ }
278
+
279
+ /**
280
+ * Collect topic candidates from a single user message text.
281
+ */
282
+ function collectTopicsFromText(text, topics) {
283
+ // Ticket/bug/incident IDs (e.g. ZEP-4471, INC-2891, SEC-0042)
284
+ for (const m of text.matchAll(/\b[A-Z]{2,6}-\d{2,6}\b/g)) {
285
+ topics.add(m[0]);
286
+ }
287
+
288
+ // Quoted project/service names (e.g. "Zephyr-9", "OrderMesh")
289
+ for (const m of text.matchAll(/"([A-Z][A-Za-z0-9_-]+)"/g)) {
290
+ topics.add(m[1]);
291
+ }
292
+
293
+ // Decision keywords
294
+ if (/\b(?:decided|chose|rejected|decision|approved)\b/i.test(text)) {
295
+ const match =
296
+ /(?:decided|chose|rejected|approved)\s+(?:to\s+)?(?:go\s+with\s+)?(?:Option\s+)?([A-Z][a-z0-9 _-]{2,30})/i.exec(
297
+ text,
298
+ );
299
+ if (match) topics.add(match[1].trim());
300
+ }
301
+
302
+ // Named entities — capitalized multi-word sequences (likely proper nouns)
303
+ for (const m of text.matchAll(/\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/g)) {
304
+ const name = m[1];
305
+ if (!isNamedEntityFalsePositive(name)) {
306
+ topics.add(name);
307
+ }
308
+ }
309
+ }
310
+
311
+ /** Common false-positive prefixes for named entity extraction — group 1. */
312
+ const NAMED_ENTITY_FP_1 =
313
+ /^(?:The|This|That|When|After|Before|During|Which|Where|What|How|NOT|WILL|REJECTED)\b/;
314
+
315
+ /** Common false-positive prefixes for named entity extraction — group 2. */
316
+ const NAMED_ENTITY_FP_2 =
317
+ /^(?:Confirmed|Discovered|On|In|Saturday|Sunday|Monday|Tuesday|Wednesday|Thursday|Friday)\b/;
318
+
319
+ /** Common false-positive prefixes for named entity extraction — group 3. */
320
+ const NAMED_ENTITY_FP_3 =
321
+ /^(?:January|February|March|April|May|June|July|August|September|October|November|December)\b/;
322
+
323
+ /**
324
+ * Check if a named entity candidate is a common false positive.
325
+ */
326
+ function isNamedEntityFalsePositive(name) {
327
+ return (
328
+ NAMED_ENTITY_FP_1.test(name) ||
329
+ NAMED_ENTITY_FP_2.test(name) ||
330
+ NAMED_ENTITY_FP_3.test(name)
331
+ );
332
+ }
333
+
334
+ /**
335
+ * Remove non-informative entries from the topics set.
336
+ */
337
+ function removeTopicNoise(topics) {
338
+ const TOPIC_NOISE = new Set([
339
+ "Confirmed",
340
+ "Read",
341
+ "Run",
342
+ "Context Guardian",
343
+ "Context Guardian Stats",
344
+ "Smart Compact",
345
+ "Keep Recent",
346
+ ]);
347
+ const NOISE_SUBSTRINGS = ["Context Guardian", "Smart Compact", "Keep Recent"];
348
+ for (const t of topics) {
349
+ const isNoise =
350
+ TOPIC_NOISE.has(t) ||
351
+ /^[A-Z_]{4,}$/.test(t) || // code identifiers (ALL_CAPS_WITH_UNDERSCORES)
352
+ NOISE_SUBSTRINGS.some((n) => t.includes(n)); // known noise substrings
353
+ if (isNoise) topics.delete(t);
354
+ }
355
+ }
356
+
357
+ // ---------------------------------------------------------------------------
358
+ // Conversation index — structured preamble for LLM consumption
359
+ // ---------------------------------------------------------------------------
360
+ // Exploits the U-shaped attention curve: the model attends most to the START
361
+ // and END of context. This index goes at the START, giving the model a complete
362
+ // map of what's in the checkpoint. The body (middle) is navigable via numbers.
363
+ // ---------------------------------------------------------------------------
364
+
365
+ /** Lines that are tool requests — not fact-bearing user content (group 1). */
366
+ const TOOL_REQUEST_RE_1 =
367
+ /^(?:Read |Run[: ]|Grep |Search |Find |List |Tell me |Explain |Show me |Describe )/i;
368
+
369
+ /** Lines that are tool requests — not fact-bearing user content (group 2). */
370
+ const TOOL_REQUEST_RE_2 =
371
+ /^(?:How (?:many|does|do |is )|What (?:is|does|are )|What'?s the |Can you )/i;
372
+
373
+ /** Lines that are tool requests — not fact-bearing user content (group 3). */
374
+ const TOOL_REQUEST_RE_3 =
375
+ /^(?:Then |After all |Create a file |In [/~].*(?:change |replace )|Also change |Actually change |Also — in [/~])/i;
376
+
377
+ /** Lines that are instructions/meta, not facts (group 1). */
378
+ const INSTRUCTION_RE_1 =
379
+ /^(?:Confirm|Now |Do |Let'?s |Please |Go ahead|Then |After all|After that)/i;
380
+
381
+ /** Lines that are instructions/meta, not facts (group 2). */
382
+ const INSTRUCTION_RE_2 =
383
+ /^(?:Give me|Also —|IMPORTANT:|Rules:|\d+\.\s+(?:The |When |Do not |Keep ))/i;
384
+
385
+ /** Lines that are code, not prose facts (group 1). */
386
+ const CODE_LINE_RE_1 =
387
+ /^\s*(?:function |const |let |var |return |if \(|for \(|while \()/;
388
+
389
+ /** Lines that are code, not prose facts (group 2). */
390
+ const CODE_LINE_RE_2 =
391
+ /^\s*(?:class |import |export |}\s*$|{\s*$|[{}();]+\s*$)/;
392
+
393
+ /** Decision-bearing language in user messages. */
394
+ const DECISION_RE =
395
+ /\b(?:chose|choose|picked|go with|use the|reject(?:ed)?|decided|decision|approved|option [a-z]|NOT migrating|WILL adopt|REJECTED|voted)\b/i;
396
+
397
+ /**
398
+ * Check if a line matches any tool-request pattern.
399
+ */
400
+ function isToolRequestLine(trimmed) {
401
+ return (
402
+ TOOL_REQUEST_RE_1.test(trimmed) ||
403
+ TOOL_REQUEST_RE_2.test(trimmed) ||
404
+ TOOL_REQUEST_RE_3.test(trimmed)
405
+ );
406
+ }
407
+
408
+ /**
409
+ * Check if a line matches any instruction pattern.
410
+ */
411
+ function isInstructionLine(trimmed) {
412
+ return INSTRUCTION_RE_1.test(trimmed) || INSTRUCTION_RE_2.test(trimmed);
413
+ }
414
+
415
+ /**
416
+ * Check if a line matches any code pattern.
417
+ */
418
+ function isCodeLine(trimmed) {
419
+ return CODE_LINE_RE_1.test(trimmed) || CODE_LINE_RE_2.test(trimmed);
420
+ }
421
+
422
+ /**
423
+ * Check if a line should be excluded from fact extraction.
424
+ */
425
+ function isNonFactLine(trimmed) {
426
+ if (!trimmed || trimmed.length < 15) return true;
427
+ if (isToolRequestLine(trimmed)) return true;
428
+ if (isInstructionLine(trimmed)) return true;
429
+ if (trimmed.startsWith("`") || trimmed.startsWith("/")) {
430
+ if (trimmed.length < 80) return true;
431
+ }
432
+ if (trimmed.startsWith("```")) return true;
433
+ if (trimmed.startsWith("[User shared ")) return true;
434
+ if (isCodeLine(trimmed)) return true;
435
+ if (/};\s*$/.test(trimmed) && trimmed.length < 30) return true;
436
+ return false;
437
+ }
438
+
439
+ /**
440
+ * Extract fact-bearing content from a user message, filtering out tool requests,
441
+ * instructions, and code lines. Returns empty string if nothing substantive.
442
+ */
443
+ function extractUserFacts(userText) {
444
+ const lines = userText.split("\n");
445
+ const factLines = [];
446
+ for (const line of lines) {
447
+ const trimmed = line.trim();
448
+ if (isNonFactLine(trimmed)) continue;
449
+ factLines.push(trimmed);
450
+ }
451
+ const combined = factLines.join(" ").replaceAll(/\s+/g, " ").trim();
452
+ return combined.length >= 20 ? combined : "";
453
+ }
454
+
455
+ /**
456
+ * Classify a single line from an exchange for work summary extraction.
457
+ * Returns { type, value } or null if the line is not relevant.
458
+ */
459
+ function classifyWorkLine(trimmed) {
460
+ const editMatch = trimmed.match(/^→ Edit `([^`]+)`/);
461
+ if (editMatch) {
462
+ return { type: "edit", value: editMatch[1].split("/").pop() };
463
+ }
464
+ const writeMatch = trimmed.match(/^→ Write `([^`]+)`/);
465
+ if (writeMatch) {
466
+ return { type: "write", value: writeMatch[1].split("/").pop() };
467
+ }
468
+ const bashMatch = trimmed.match(/^→ Ran `([^`]+)`/);
469
+ if (bashMatch) {
470
+ return { type: "bash", value: bashMatch[1].slice(0, 40) };
471
+ }
472
+ if (
473
+ trimmed.startsWith("←") &&
474
+ /\b(?:error|fail|FAIL|denied|not found)\b/i.test(trimmed)
475
+ ) {
476
+ return { type: "error", value: trimmed.slice(2, 80).trim() };
477
+ }
478
+ if (trimmed.startsWith("←") && trimmed.length > 3 && trimmed.length < 200) {
479
+ const result = trimmed.slice(2, 60).trim();
480
+ if (result && !result.startsWith("[")) {
481
+ return { type: "bash_result", value: result };
482
+ }
483
+ }
484
+ return null;
485
+ }
486
+
487
+ /**
488
+ * Extract a compact work summary from an exchange's messages.
489
+ * Scans tool invocations (→) and results (←) for edits, writes, bash runs.
490
+ * Returns e.g. "Edited auth.js; ran npm test → 14 passed"
491
+ */
492
+ function accumulateWorkItem(classified, buckets) {
493
+ const { edits, writes, bashRuns, errors } = buckets;
494
+ switch (classified.type) {
495
+ case "edit":
496
+ if (!edits.includes(classified.value)) edits.push(classified.value);
497
+ break;
498
+ case "write":
499
+ if (!writes.includes(classified.value)) writes.push(classified.value);
500
+ break;
501
+ case "bash":
502
+ bashRuns.push(classified.value);
503
+ break;
504
+ case "error":
505
+ errors.push(classified.value);
506
+ break;
507
+ case "bash_result":
508
+ if (bashRuns.length > 0) {
509
+ bashRuns[bashRuns.length - 1] += ` → ${classified.value}`;
510
+ }
511
+ break;
512
+ }
513
+ }
514
+
515
+ function extractWorkSummary(exchangeMsgs) {
516
+ const buckets = { edits: [], writes: [], bashRuns: [], errors: [] };
517
+ for (const msg of exchangeMsgs) {
518
+ for (const line of msg.split("\n")) {
519
+ const classified = classifyWorkLine(line.trim());
520
+ if (classified) accumulateWorkItem(classified, buckets);
521
+ }
522
+ }
523
+ const { edits, writes, bashRuns, errors } = buckets;
524
+ const parts = [];
525
+ if (writes.length > 0) parts.push(`Created ${writes.join(", ")}`);
526
+ if (edits.length > 0) parts.push(`Edited ${edits.join(", ")}`);
527
+ for (const run of bashRuns) parts.push(`Ran ${run}`);
528
+ if (errors.length > 0) parts.push(`ERROR: ${errors[0]}`);
529
+ return parts.join("; ");
530
+ }
531
+
532
+ /**
533
+ * Extract key entities from text that might be lost to truncation.
534
+ * Pulls out: IDs, money, dates, ports, thresholds, config values, person names.
535
+ * Returns array of compact entity strings.
536
+ */
537
+ function extractKeyEntities(text) {
538
+ const entities = new Set();
539
+
540
+ collectIdEntities(text, entities);
541
+ collectNumericEntities(text, entities);
542
+ collectPathEntities(text, entities);
543
+ collectPersonNames(text, entities);
544
+
545
+ // Deduplicate: remove entries that are substrings of longer entries
546
+ const entityArr = Array.from(entities);
547
+ const deduped = entityArr.filter(
548
+ (e) =>
549
+ !entityArr.some(
550
+ (other) => other !== e && other.length > e.length && other.includes(e),
551
+ ),
552
+ );
553
+
554
+ return deduped.slice(0, 10); // cap to avoid bloat
555
+ }
556
+
557
+ /**
558
+ * Collect ID-type entities (tickets, PRs, money).
559
+ */
560
+ function collectIdEntities(text, entities) {
561
+ // Bug/incident/ticket IDs: ZEP-4471, INC-2891, SEC-0042, PR #1847
562
+ for (const m of text.matchAll(/\b[A-Z]{2,6}-\d{2,6}\b/g)) entities.add(m[0]);
563
+ for (const m of text.matchAll(/PR #\d+/g)) entities.add(m[0]);
564
+
565
+ // Money amounts: $184,000
566
+ for (const m of text.matchAll(/\$[\d,]+(?:\.\d+)?/g)) entities.add(m[0]);
567
+ }
568
+
569
+ /** Months pattern for date extraction (group 1). */
570
+ const MONTHS_1_RE = /(?:January|February|March|April|May|June)/i;
571
+
572
+ /** Months pattern for date extraction (group 2). */
573
+ const MONTHS_2_RE = /(?:July|August|September|October|November|December)/i;
574
+
575
+ /**
576
+ * Build a date regex that matches "Month Day" patterns.
577
+ */
578
+ function matchDates(text) {
579
+ const DATE_SUFFIX = /\s+\d{1,2}(?:st|nd|rd|th)?(?:\s+\d{4})?/;
580
+ const DATE_RE = new RegExp(
581
+ `(?:${MONTHS_1_RE.source}|${MONTHS_2_RE.source})${DATE_SUFFIX.source}`,
582
+ "gi",
583
+ );
584
+ return text.matchAll(DATE_RE);
585
+ }
586
+
587
+ /** Counts with units — group 1 */
588
+ const COUNTS_RE_1 =
589
+ /[\d,]+\s+(?:transactions?|failed transactions?|items?|req\/s|errors?)/gi;
590
+
591
+ /** Counts with units — group 2 */
592
+ const COUNTS_RE_2 =
593
+ /[\d,]+\s+(?:seconds?|minutes?|hours?|pods?|shards?|batches|vCPU)/gi;
594
+
595
+ /**
596
+ * Collect numeric entities (dates, ports, config values, thresholds, counts).
597
+ */
598
+ function collectNumericEntities(text, entities) {
599
+ // Dates
600
+ for (const m of matchDates(text)) entities.add(m[0]);
601
+
602
+ // Ports: port 5433, port 9147
603
+ for (const m of text.matchAll(/port\s+\d+/gi)) entities.add(m[0]);
604
+
605
+ // Config values: key=value patterns
606
+ for (const m of text.matchAll(/\b\w+=\d[\d,]*/g)) entities.add(m[0]);
607
+
608
+ // Thresholds with comparisons: p99 > 340ms, > 2,847
609
+ for (const m of text.matchAll(/p\d+\s*[><]=?\s*\d[\d,.]*\s*\w*/g))
610
+ entities.add(m[0]);
611
+
612
+ // Rates/thresholds: 8 errors/30s, 5 errors/10s
613
+ for (const m of text.matchAll(/\d+\s+errors?\/\d+s/gi)) entities.add(m[0]);
614
+
615
+ // Counts with units
616
+ for (const m of text.matchAll(COUNTS_RE_1)) entities.add(m[0]);
617
+ for (const m of text.matchAll(COUNTS_RE_2)) entities.add(m[0]);
618
+ }
619
+
620
+ /**
621
+ * Collect path/image entities (Docker images, S3 paths).
622
+ */
623
+ function collectPathEntities(text, entities) {
624
+ for (const m of text.matchAll(/(?:s3:\/\/|docker image\s+)[\w./:@-]+/gi))
625
+ entities.add(m[0]);
626
+ for (const m of text.matchAll(/[\w-]+\/[\w-]+:v[\d.]+-\w+/g))
627
+ entities.add(m[0]);
628
+ }
629
+
630
+ /** False positive prefixes for person name detection. */
631
+ const PERSON_NAME_FP_RE =
632
+ /^(?:The|This|After|Before|Decision|Option|Root|Bug|Incident|Security|Capacity|Migration)\b/;
633
+
634
+ /**
635
+ * Collect person name entities.
636
+ */
637
+ function collectPersonNames(text, entities) {
638
+ for (const m of text.matchAll(
639
+ /\b([A-Z][a-z]+\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)\b/g,
640
+ )) {
641
+ const name = m[1];
642
+ if (!PERSON_NAME_FP_RE.test(name)) {
643
+ entities.add(name);
644
+ }
645
+ }
646
+ }
647
+
648
+ /** Extract decisions from user messages. */
649
+ function extractDecisions(messages) {
650
+ const decisions = [];
651
+ let exchangeNum = 0;
652
+ for (const msg of messages) {
653
+ if (!msg.startsWith("**User:**")) continue;
654
+ exchangeNum++;
655
+ const text = msg.slice("**User:** ".length).replaceAll("\n", " ");
656
+ if (/^\s*no\s*$/i.test(text.trim())) {
657
+ decisions.push({ text: "Rejected preceding option", num: exchangeNum });
658
+ continue;
659
+ }
660
+ if (DECISION_RE.test(text)) {
661
+ // Extract concise decision — the verb + its direct object, max 60 chars
662
+ const match = text.match(
663
+ /\b((?:NOT |WILL )?(?:chose|rejected?|decided|approved|voted|migrating|adopt)\b[^.;,]{0,50})/i,
664
+ );
665
+ if (match) {
666
+ const clause = match[1].trim().replaceAll(/\s+/g, " ").slice(0, 60);
667
+ decisions.push({ text: clause, num: exchangeNum });
668
+ }
669
+ }
670
+ }
671
+ return decisions;
672
+ }
673
+
674
+ /** Extract error→resolution pairs from the message stream. */
675
+ function extractErrorResolutions(messages) {
676
+ const pairs = [];
677
+ let lastError = null;
678
+ let lastErrorExchange = 0;
679
+ let exchangeNum = 0;
680
+ for (const msg of messages) {
681
+ if (msg.startsWith("**User:**")) exchangeNum++;
682
+ if (
683
+ msg.startsWith("←") &&
684
+ /\b(?:error|fail|FAIL|exception|not found|denied)\b/i.test(msg)
685
+ ) {
686
+ lastError = msg.slice(2, 100).trim();
687
+ lastErrorExchange = exchangeNum;
688
+ continue;
689
+ }
690
+ if (
691
+ lastError &&
692
+ msg.startsWith("←") &&
693
+ !/\b(?:error|fail|FAIL)\b/i.test(msg)
694
+ ) {
695
+ const successText = msg.slice(2, 60).trim();
696
+ if (successText && exchangeNum - lastErrorExchange <= 3) {
697
+ pairs.push({
698
+ error: lastError.slice(0, 80),
699
+ resolution: successText.slice(0, 60),
700
+ from: lastErrorExchange,
701
+ to: exchangeNum,
702
+ });
703
+ lastError = null;
704
+ }
705
+ }
706
+ }
707
+ return pairs;
708
+ }
709
+
710
+ /**
711
+ * Build exchange groups from the flat message array.
712
+ * Each exchange starts with a "**User:**" message and includes all
713
+ * following messages until the next user message.
714
+ */
715
+ function buildExchanges(messages) {
716
+ const exchanges = [];
717
+ let current = null;
718
+ let exchangeNum = 0;
719
+ for (const msg of messages) {
720
+ if (msg.startsWith("**User:**")) {
721
+ exchangeNum++;
722
+ if (current) exchanges.push(current);
723
+ current = {
724
+ num: exchangeNum,
725
+ userFacts: extractUserFacts(msg.slice("**User:** ".length)),
726
+ msgs: [msg],
727
+ };
728
+ } else if (current) {
729
+ current.msgs.push(msg);
730
+ }
731
+ }
732
+ if (current) exchanges.push(current);
733
+ return exchanges;
734
+ }
735
+
736
+ /**
737
+ * Format a single exchange into an index entry string, or "" if not substantive.
738
+ */
739
+ function formatExchangeEntry(ex) {
740
+ const facts = ex.userFacts;
741
+ const work = extractWorkSummary(ex.msgs);
742
+ if (!facts && !work) return "";
743
+
744
+ if (facts) {
745
+ if (facts.length <= 300) {
746
+ return `[${ex.num}] ${facts}`;
747
+ }
748
+ const tags = extractKeyEntities(facts);
749
+ const capped = `${facts.slice(0, 250)}...`;
750
+ const tagLine = tags.length > 0 ? ` {${tags.join(", ")}}` : "";
751
+ return `[${ex.num}] ${capped}${tagLine}`;
752
+ }
753
+ // Pure tool-work exchange
754
+ return `[${ex.num}] ${work}`;
755
+ }
756
+
757
+ /**
758
+ * Build the decision and error summary lines for the index footer.
759
+ */
760
+ function buildIndexFooter(messages) {
761
+ const footerLines = [];
762
+ const decisions = extractDecisions(messages);
763
+ const errorPairs = extractErrorResolutions(messages);
764
+
765
+ if (decisions.length > 0) {
766
+ const decisionStr = decisions
767
+ .map((d) => `${d.text} [${d.num}]`)
768
+ .join(" | ");
769
+ footerLines.push("", `**Decisions:** ${decisionStr}`);
770
+ }
771
+ if (errorPairs.length > 0) {
772
+ const errorStr = errorPairs
773
+ .map((e) => `${e.error} → ${e.resolution} [${e.from}→${e.to}]`)
774
+ .join(" | ");
775
+ footerLines.push("", `**Errors resolved:** ${errorStr}`);
776
+ }
777
+ return footerLines;
778
+ }
779
+
780
+ /**
781
+ * Generate a unified conversation index for the checkpoint preamble.
782
+ *
783
+ * Compact, scannable reference at the START of context (high-attention zone).
784
+ * Each entry: exchange number + condensed user facts + work performed.
785
+ * Followed by decision and error→resolution summaries.
786
+ * Target: <3K chars for 30 exchanges (~750 tokens).
787
+ *
788
+ * @param {string[]} messages - Extracted message strings from extractMessages
789
+ * @returns {string} Formatted markdown section, or "" if too few messages
790
+ */
791
+ export function generateConversationIndex(messages) {
792
+ if (messages.length < 10) return "";
793
+
794
+ const exchanges = buildExchanges(messages);
795
+
796
+ const entries = [];
797
+ for (const ex of exchanges) {
798
+ const entry = formatExchangeEntry(ex);
799
+ if (entry) entries.push(entry);
800
+ }
801
+ if (entries.length === 0) return "";
802
+
803
+ const lines = [
804
+ "## Conversation Index",
805
+ "",
806
+ "Compact reference — exchange numbers map to the full conversation below.",
807
+ "",
808
+ ...entries,
809
+ ...buildIndexFooter(messages),
810
+ ];
811
+ return lines.join("\n");
812
+ }
813
+
814
+ /**
815
+ * Insert section headers into the message body for navigation.
816
+ * Adds "### Exchanges N-M" headers every `groupSize` user exchanges.
817
+ */
818
+ export function addSectionHeaders(messages, groupSize = 10) {
819
+ if (messages.length < 20) return messages;
820
+ const totalExchanges = countUserExchanges(messages);
821
+ const result = [];
822
+ let exchangeNum = 0;
823
+ let lastHeaderAt = 0;
824
+ for (const msg of messages) {
825
+ if (msg.startsWith("**User:**")) {
826
+ exchangeNum++;
827
+ if (exchangeNum === 1 || exchangeNum - lastHeaderAt >= groupSize) {
828
+ const end = Math.min(exchangeNum + groupSize - 1, totalExchanges);
829
+ result.push(`### Exchanges ${exchangeNum}-${end}`);
830
+ lastHeaderAt = exchangeNum;
831
+ }
832
+ }
833
+ result.push(msg);
834
+ }
835
+ return result;
836
+ }
837
+
838
+ function countUserExchanges(messages) {
839
+ let count = 0;
840
+ for (const msg of messages) {
841
+ if (msg.startsWith("**User:**")) count++;
842
+ }
843
+ return count;
844
+ }
845
+
846
+ /**
847
+ * Detect whether an assistant message is a Context Guardian menu prompt.
848
+ * Used to identify and skip the user's numeric reply to the menu.
849
+ *
850
+ * @param {Array|string} content - The assistant message content
851
+ * @returns {boolean}
852
+ */
853
+ export function isCGMenuMessage(content) {
854
+ let textOnly;
855
+ if (Array.isArray(content)) {
856
+ textOnly = content
857
+ .filter((b) => b.type === "text")
858
+ .map((b) => b.text)
859
+ .join("\n");
860
+ } else if (typeof content === "string") {
861
+ textOnly = content;
862
+ } else {
863
+ textOnly = "";
864
+ }
865
+ return (
866
+ /Context Guardian\s.{0,5}\d/.test(textOnly) &&
867
+ textOnly.includes("Reply with 1,")
868
+ );
869
+ }