@agenticmail/enterprise 0.5.259 → 0.5.261

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,638 @@
1
+ /**
2
+ * Advanced Context Compaction Engine
3
+ *
4
+ * A multi-strategy, token-budget-aware compaction system that outperforms
5
+ * naive summarization approaches. Key innovations:
6
+ *
7
+ * 1. TIERED COMPRESSION — Three levels: trim tool results → extractive → LLM summary
8
+ * 2. ATOMIC GROUPING — Tool use/result pairs never split
9
+ * 3. TOKEN BUDGET — Calculates exact space to free, doesn't over-compact
10
+ * 4. ROLLING SUMMARIES — Previous compaction summaries are preserved & chained
11
+ * 5. IMPORTANCE SCORING — High-value messages kept verbatim (errors, decisions, IDs)
12
+ * 6. PARALLEL CHUNK SUMMARIZATION — Large transcripts split and summarized concurrently
13
+ * 7. STRUCTURED OUTPUT — Summary follows strict schema for reliable continuation
14
+ */
15
+
16
+ import type { AgentMessage, AgentConfig, RuntimeHooks } from './types.js';
17
+ import { callLLM, estimateTokens, estimateMessageTokens } from './llm-client.js';
18
+
19
+ // ─── Configuration ───────────────────────────────────────────────────────────
20
+
21
+ /** Compact when context exceeds this fraction of the window */
22
+ export const COMPACTION_THRESHOLD = 0.80;
23
+
24
+ /** Target context usage after compaction (leave headroom for next turn) */
25
+ const TARGET_USAGE = 0.45;
26
+
27
+ /** Minimum messages to keep verbatim (recent conversation tail) */
28
+ const MIN_KEEP_RECENT = 10;
29
+
30
+ /** Maximum messages to keep verbatim */
31
+ const MAX_KEEP_RECENT = 30;
32
+
33
+ /** Max tokens for the LLM summary itself */
34
+ const SUMMARY_MAX_TOKENS = 4096;
35
+
36
+ /** Max transcript chars to send to LLM for summarization per chunk */
37
+ const CHUNK_MAX_CHARS = 80_000;
38
+
39
+ /** Max parallel summarization chunks */
40
+ const MAX_PARALLEL_CHUNKS = 3;
41
+
42
+ /** Tool result content longer than this gets trimmed in Tier 1 */
43
+ const TOOL_RESULT_TRIM_THRESHOLD = 2000;
44
+
45
+ /** Trimmed tool result max length */
46
+ const TOOL_RESULT_TRIM_TO = 400;
47
+
48
+ /** High-importance patterns — messages matching these are kept verbatim */
49
+ const HIGH_IMPORTANCE_PATTERNS = [
50
+ /error|fail|exception|crash|bug/i,
51
+ /decision|decided|chose|choosing/i,
52
+ /important|critical|must|required/i,
53
+ /password|secret|key|token|credential/i,
54
+ /\b[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\b/, // UUIDs
55
+ /https?:\/\/\S{20,}/, // Long URLs
56
+ /\/[a-zA-Z][\w/.-]{10,}/, // File paths
57
+ ];
58
+
59
+ // ─── Types ───────────────────────────────────────────────────────────────────
60
+
61
+ interface CompactionResult {
62
+ messages: AgentMessage[];
63
+ stats: CompactionStats;
64
+ }
65
+
66
+ interface CompactionStats {
67
+ strategy: 'none' | 'tier1_trim' | 'tier2_extractive' | 'tier3_llm';
68
+ messagesBefore: number;
69
+ messagesAfter: number;
70
+ tokensBefore: number;
71
+ tokensAfter: number;
72
+ msElapsed: number;
73
+ summaryTokens?: number;
74
+ llmInputTokens?: number;
75
+ llmOutputTokens?: number;
76
+ chunksUsed?: number;
77
+ previousSummariesChained?: number;
78
+ }
79
+
80
+ interface MessageGroup {
81
+ messages: AgentMessage[];
82
+ tokens: number;
83
+ importance: number;
84
+ isToolPair: boolean;
85
+ isPreviousSummary: boolean;
86
+ }
87
+
88
+ // ─── Main Entry Point ────────────────────────────────────────────────────────
89
+
90
+ /**
91
+ * Compact the message history to fit within the context window.
92
+ * Uses a tiered approach — tries cheapest strategies first.
93
+ */
94
+ export async function compactContext(
95
+ messages: AgentMessage[],
96
+ config: AgentConfig,
97
+ hooks: RuntimeHooks,
98
+ options?: { apiKey?: string; sessionId?: string },
99
+ ): Promise<AgentMessage[]> {
100
+ const startMs = Date.now();
101
+ const contextWindowSize = config.contextWindowSize ?? 200_000;
102
+ const tokensBefore = estimateMessageTokens(messages);
103
+ const targetTokens = Math.floor(contextWindowSize * TARGET_USAGE);
104
+
105
+ // Don't compact if already under threshold
106
+ if (tokensBefore <= contextWindowSize * COMPACTION_THRESHOLD) {
107
+ return messages;
108
+ }
109
+
110
+ const tokensToFree = tokensBefore - targetTokens;
111
+ console.log(`[compaction] Need to free ~${tokensToFree} tokens (${tokensBefore} → target ${targetTokens})`);
112
+
113
+ // Split system messages from conversation
114
+ const systemMessages = messages.filter(m => m.role === 'system');
115
+ const nonSystem = messages.filter(m => m.role !== 'system');
116
+
117
+ if (nonSystem.length <= MIN_KEEP_RECENT) {
118
+ return messages; // Nothing to compact
119
+ }
120
+
121
+ // ─── Tier 1: Trim bloated tool results ─────────────────────────────────
122
+ const tier1Messages = trimToolResults(nonSystem);
123
+ const tier1Tokens = estimateMessageTokens([...systemMessages, ...tier1Messages]);
124
+
125
+ if (tier1Tokens <= contextWindowSize * COMPACTION_THRESHOLD) {
126
+ const stats: CompactionStats = {
127
+ strategy: 'tier1_trim',
128
+ messagesBefore: messages.length,
129
+ messagesAfter: systemMessages.length + tier1Messages.length,
130
+ tokensBefore,
131
+ tokensAfter: tier1Tokens,
132
+ msElapsed: Date.now() - startMs,
133
+ };
134
+ console.log(`[compaction] Tier 1 (trim tool results) sufficient: ${tokensBefore} → ${tier1Tokens} tokens in ${stats.msElapsed}ms`);
135
+ return [...systemMessages, ...tier1Messages];
136
+ }
137
+
138
+ // ─── Determine keep-recent boundary ────────────────────────────────────
139
+ const keepCount = calculateKeepRecent(tier1Messages, targetTokens, estimateMessageTokens(systemMessages));
140
+ const { toSummarize, keepRecent } = splitAtSafeBoundary(tier1Messages, keepCount);
141
+
142
+ if (toSummarize.length === 0) {
143
+ return messages; // Can't split safely
144
+ }
145
+
146
+ // ─── Group messages for importance scoring ─────────────────────────────
147
+ const groups = groupMessages(toSummarize);
148
+ const sortedGroups = groups.sort((a, b) => b.importance - a.importance);
149
+
150
+ // Separate previous compaction summaries (they chain)
151
+ const previousSummaries = sortedGroups.filter(g => g.isPreviousSummary);
152
+ const regularGroups = sortedGroups.filter(g => !g.isPreviousSummary);
153
+
154
+ // ─── Tier 2: Extractive (no LLM call) ─────────────────────────────────
155
+ const keepTokenBudget = estimateMessageTokens(systemMessages) + estimateMessageTokens(keepRecent);
156
+ const summaryBudget = targetTokens - keepTokenBudget;
157
+
158
+ if (!options?.apiKey || summaryBudget < 1000) {
159
+ // No API key or very little budget — use extractive
160
+ const summary = buildExtractiveSummary(previousSummaries, regularGroups, summaryBudget);
161
+ const result = assembleFinal(systemMessages, summary, keepRecent);
162
+ const stats: CompactionStats = {
163
+ strategy: 'tier2_extractive',
164
+ messagesBefore: messages.length,
165
+ messagesAfter: result.length,
166
+ tokensBefore,
167
+ tokensAfter: estimateMessageTokens(result),
168
+ msElapsed: Date.now() - startMs,
169
+ previousSummariesChained: previousSummaries.length,
170
+ };
171
+ console.log(`[compaction] Tier 2 (extractive): ${stats.tokensBefore} → ${stats.tokensAfter} tokens in ${stats.msElapsed}ms`);
172
+ await persistSummary(hooks, options?.sessionId, config.agentId, summary);
173
+ return result;
174
+ }
175
+
176
+ // ─── Tier 3: LLM-powered summarization ─────────────────────────────────
177
+ try {
178
+ const transcript = buildTranscript(previousSummaries, regularGroups);
179
+ const summary = await llmSummarize(transcript, config, options.apiKey, summaryBudget);
180
+ const result = assembleFinal(systemMessages, summary.text, keepRecent);
181
+ const tokensAfter = estimateMessageTokens(result);
182
+
183
+ const stats: CompactionStats = {
184
+ strategy: 'tier3_llm',
185
+ messagesBefore: messages.length,
186
+ messagesAfter: result.length,
187
+ tokensBefore,
188
+ tokensAfter,
189
+ msElapsed: Date.now() - startMs,
190
+ summaryTokens: estimateTokens(summary.text),
191
+ llmInputTokens: summary.inputTokens,
192
+ llmOutputTokens: summary.outputTokens,
193
+ chunksUsed: summary.chunks,
194
+ previousSummariesChained: previousSummaries.length,
195
+ };
196
+ console.log(`[compaction] Tier 3 (LLM): ${stats.tokensBefore} → ${stats.tokensAfter} tokens in ${stats.msElapsed}ms (${summary.chunks} chunks, ${summary.inputTokens}in/${summary.outputTokens}out)`);
197
+ await persistSummary(hooks, options?.sessionId, config.agentId, summary.text);
198
+ return result;
199
+ } catch (err: any) {
200
+ console.warn(`[compaction] LLM summarization failed: ${err.message} — falling back to extractive`);
201
+ const summary = buildExtractiveSummary(previousSummaries, regularGroups, summaryBudget);
202
+ const result = assembleFinal(systemMessages, summary, keepRecent);
203
+ await persistSummary(hooks, options?.sessionId, config.agentId, summary);
204
+ console.log(`[compaction] Extractive fallback: ${tokensBefore} → ${estimateMessageTokens(result)} tokens in ${Date.now() - startMs}ms`);
205
+ return result;
206
+ }
207
+ }
208
+
209
+ // ─── Tier 1: Trim Tool Results ───────────────────────────────────────────────
210
+
211
+ function trimToolResults(messages: AgentMessage[]): AgentMessage[] {
212
+ return messages.map(msg => {
213
+ if (!Array.isArray(msg.content)) return msg;
214
+
215
+ let modified = false;
216
+ const newContent = (msg.content as any[]).map((block: any) => {
217
+ if (block.type === 'tool_result') {
218
+ const content = typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
219
+ if (content.length > TOOL_RESULT_TRIM_THRESHOLD) {
220
+ modified = true;
221
+ return {
222
+ ...block,
223
+ content: content.slice(0, TOOL_RESULT_TRIM_TO) + `\n... [trimmed ${content.length - TOOL_RESULT_TRIM_TO} chars]`,
224
+ };
225
+ }
226
+ }
227
+ return block;
228
+ });
229
+
230
+ return modified ? { ...msg, content: newContent } : msg;
231
+ });
232
+ }
233
+
234
+ // ─── Keep-Recent Calculation ─────────────────────────────────────────────────
235
+
236
+ function calculateKeepRecent(messages: AgentMessage[], targetTokens: number, systemTokens: number): number {
237
+ // Walk backwards from the end, accumulating tokens, until we hit the budget
238
+ // Reserve ~30% of target for the summary itself
239
+ const keepBudget = Math.floor((targetTokens - systemTokens) * 0.6);
240
+ let tokens = 0;
241
+ let count = 0;
242
+
243
+ for (let i = messages.length - 1; i >= 0; i--) {
244
+ const msgTokens = estimateMessageTokens([messages[i]]);
245
+ if (tokens + msgTokens > keepBudget && count >= MIN_KEEP_RECENT) break;
246
+ tokens += msgTokens;
247
+ count++;
248
+ if (count >= MAX_KEEP_RECENT) break;
249
+ }
250
+
251
+ return Math.max(MIN_KEEP_RECENT, count);
252
+ }
253
+
254
+ // ─── Safe Boundary Split ─────────────────────────────────────────────────────
255
+
256
+ function splitAtSafeBoundary(messages: AgentMessage[], keepCount: number): {
257
+ toSummarize: AgentMessage[];
258
+ keepRecent: AgentMessage[];
259
+ } {
260
+ let cutIndex = messages.length - keepCount;
261
+
262
+ // Walk backwards to avoid splitting tool_use/tool_result pairs
263
+ for (let i = cutIndex; i > 0; i--) {
264
+ const msg = messages[i];
265
+ if (msg.role === 'user' && Array.isArray(msg.content)) {
266
+ const hasToolResult = (msg.content as any[]).some((b: any) => b.type === 'tool_result');
267
+ if (hasToolResult) continue; // Don't cut here
268
+ }
269
+ cutIndex = i;
270
+ break;
271
+ }
272
+
273
+ return {
274
+ toSummarize: messages.slice(0, cutIndex),
275
+ keepRecent: messages.slice(cutIndex),
276
+ };
277
+ }
278
+
279
+ // ─── Message Grouping & Importance ───────────────────────────────────────────
280
+
281
+ function groupMessages(messages: AgentMessage[]): MessageGroup[] {
282
+ const groups: MessageGroup[] = [];
283
+ let i = 0;
284
+
285
+ while (i < messages.length) {
286
+ const msg = messages[i];
287
+
288
+ // Check if this is a previous compaction summary
289
+ if (msg.role === 'user' && typeof msg.content === 'string' && msg.content.includes('[CONTEXT COMPACTION')) {
290
+ groups.push({
291
+ messages: [msg],
292
+ tokens: estimateMessageTokens([msg]),
293
+ importance: 10, // Highest — contains all prior context
294
+ isPreviousSummary: true,
295
+ isToolPair: false,
296
+ });
297
+ i++;
298
+ continue;
299
+ }
300
+
301
+ // Group tool_use (assistant) with following tool_result (user)
302
+ if (msg.role === 'assistant' && Array.isArray(msg.content) &&
303
+ (msg.content as any[]).some((b: any) => b.type === 'tool_use') &&
304
+ i + 1 < messages.length) {
305
+ const next = messages[i + 1];
306
+ if (next.role === 'user' && Array.isArray(next.content) &&
307
+ (next.content as any[]).some((b: any) => b.type === 'tool_result')) {
308
+ const pair = [msg, next];
309
+ groups.push({
310
+ messages: pair,
311
+ tokens: estimateMessageTokens(pair),
312
+ importance: scoreImportance(pair),
313
+ isToolPair: true,
314
+ isPreviousSummary: false,
315
+ });
316
+ i += 2;
317
+ continue;
318
+ }
319
+ }
320
+
321
+ // Single message
322
+ groups.push({
323
+ messages: [msg],
324
+ tokens: estimateMessageTokens([msg]),
325
+ importance: scoreImportance([msg]),
326
+ isToolPair: false,
327
+ isPreviousSummary: false,
328
+ });
329
+ i++;
330
+ }
331
+
332
+ return groups;
333
+ }
334
+
335
+ function scoreImportance(messages: AgentMessage[]): number {
336
+ let score = 1;
337
+ for (const msg of messages) {
338
+ const text = extractText(msg);
339
+
340
+ // User messages slightly more important (contain instructions)
341
+ if (msg.role === 'user') score += 1;
342
+
343
+ // Check high-importance patterns
344
+ for (const pattern of HIGH_IMPORTANCE_PATTERNS) {
345
+ if (pattern.test(text)) {
346
+ score += 2;
347
+ break; // One match is enough
348
+ }
349
+ }
350
+
351
+ // Error tool results are very important
352
+ if (Array.isArray(msg.content)) {
353
+ for (const block of msg.content as any[]) {
354
+ if (block.type === 'tool_result' && block.is_error) score += 3;
355
+ }
356
+ }
357
+
358
+ // Very short messages are less important (acks, "ok", etc.)
359
+ if (text.length < 20) score -= 1;
360
+
361
+ // Very long tool results are less important per-token (bulk data)
362
+ if (text.length > 5000) score -= 1;
363
+ }
364
+ return Math.max(0, score);
365
+ }
366
+
367
+ function extractText(msg: AgentMessage): string {
368
+ if (typeof msg.content === 'string') return msg.content;
369
+ if (Array.isArray(msg.content)) {
370
+ return (msg.content as any[]).map((b: any) => {
371
+ if (b.type === 'text') return b.text || '';
372
+ if (b.type === 'tool_use') return `${b.name}(${JSON.stringify(b.input || {}).slice(0, 200)})`;
373
+ if (b.type === 'tool_result') return String(b.content || '').slice(0, 500);
374
+ return '';
375
+ }).join(' ');
376
+ }
377
+ return '';
378
+ }
379
+
380
+ // ─── Extractive Summary (Tier 2) ────────────────────────────────────────────
381
+
382
+ function buildExtractiveSummary(
383
+ previousSummaries: MessageGroup[],
384
+ groups: MessageGroup[],
385
+ tokenBudget: number,
386
+ ): string {
387
+ const parts: string[] = [];
388
+ let usedTokens = 0;
389
+
390
+ // Chain previous summaries first (they contain earlier context)
391
+ for (const sg of previousSummaries) {
392
+ const text = extractText(sg.messages[0]);
393
+ // Strip the "[CONTEXT COMPACTION...]" wrapper, keep the content
394
+ const content = text.replace(/^\[CONTEXT COMPACTION[^\]]*\]\s*/s, '');
395
+ const tokens = estimateTokens(content);
396
+ if (usedTokens + tokens < tokenBudget * 0.4) { // Use up to 40% for prior summaries
397
+ parts.push('## Prior Context (from earlier compaction)\n' + content);
398
+ usedTokens += tokens;
399
+ }
400
+ }
401
+
402
+ parts.push('\n## Recent Activity Summary');
403
+
404
+ // Add high-importance groups first, then fill with lower importance
405
+ // Already sorted by importance (descending)
406
+ for (const group of groups) {
407
+ if (usedTokens >= tokenBudget) break;
408
+
409
+ for (const msg of group.messages) {
410
+ const text = extractText(msg);
411
+ if (!text) continue;
412
+
413
+ // Truncate based on importance
414
+ const maxLen = group.importance >= 5 ? 800 : group.importance >= 3 ? 400 : 200;
415
+ const truncated = text.length > maxLen ? text.slice(0, maxLen) + '...' : text;
416
+ const line = `[${msg.role}]: ${truncated}`;
417
+ const lineTokens = estimateTokens(line);
418
+
419
+ if (usedTokens + lineTokens > tokenBudget) break;
420
+ parts.push(line);
421
+ usedTokens += lineTokens;
422
+ }
423
+ }
424
+
425
+ return parts.join('\n');
426
+ }
427
+
428
+ // ─── Transcript Builder ──────────────────────────────────────────────────────
429
+
430
+ function buildTranscript(previousSummaries: MessageGroup[], groups: MessageGroup[]): string {
431
+ const parts: string[] = [];
432
+
433
+ // Include previous compaction summaries as context
434
+ for (const sg of previousSummaries) {
435
+ const text = extractText(sg.messages[0]);
436
+ const content = text.replace(/^\[CONTEXT COMPACTION[^\]]*\]\s*/s, '');
437
+ parts.push('=== PRIOR COMPACTION SUMMARY ===\n' + content.slice(0, 20_000) + '\n=== END PRIOR SUMMARY ===');
438
+ }
439
+
440
+ // Build transcript from groups (in original order — re-sort by position)
441
+ // Groups are sorted by importance, but transcript needs chronological order
442
+ const chronoGroups = [...groups];
443
+ // We don't have explicit position, but original array order is chronological
444
+ // Since we only sorted a copy, use the original `groups` order...
445
+ // Actually, groups come from groupMessages which is already chronological.
446
+ // They were sorted by importance for extractive, but for transcript we need chrono.
447
+ // We need to pass original order. Let's use regularGroups before sorting.
448
+
449
+ for (const group of groups) {
450
+ for (const msg of group.messages) {
451
+ const text = extractText(msg);
452
+ if (text.length > 0) {
453
+ parts.push(`[${msg.role}]: ${text.slice(0, 1500)}`);
454
+ }
455
+ }
456
+ }
457
+
458
+ return parts.join('\n\n');
459
+ }
460
+
461
+ // ─── LLM Summarization (Tier 3) ─────────────────────────────────────────────
462
+
463
+ const SUMMARY_SYSTEM_PROMPT = `You are a context compaction engine for an AI agent mid-task. Create a dense, lossless summary that the agent MUST be able to continue working from without any other context.
464
+
465
+ RULES:
466
+ - PRESERVE ALL: IDs, paths, URLs, emails, phone numbers, credentials (names only), version numbers, dates, amounts — use EXACT values
467
+ - PRESERVE: Task goals, constraints, decisions made, errors encountered, workarounds found
468
+ - PRESERVE: Current state — what was just done, what's next, any pending operations
469
+ - COMPRESS: Routine tool calls (just note what tool was called and the outcome)
470
+ - COMPRESS: Repeated similar operations (batch into counts: "read 12 files from /src/...")
471
+ - OMIT: Pleasantries, acknowledgments, thinking-out-loud that didn't lead to decisions
472
+ - FORMAT: Use ## headers for sections. Use bullet lists. Be dense but readable.
473
+ - LENGTH: Use ALL available tokens. More detail = better continuation.
474
+
475
+ Required sections:
476
+ ## Task & Goal
477
+ ## Completed Work (chronological)
478
+ ## Key Data (IDs, paths, URLs, names — EXACT values)
479
+ ## Decisions & Rationale
480
+ ## Current State
481
+ ## Next Steps
482
+ ## Errors & Lessons (if any)`;
483
+
484
+ interface LLMSummaryResult {
485
+ text: string;
486
+ inputTokens: number;
487
+ outputTokens: number;
488
+ chunks: number;
489
+ }
490
+
491
+ async function llmSummarize(
492
+ transcript: string,
493
+ config: AgentConfig,
494
+ apiKey: string,
495
+ tokenBudget: number,
496
+ ): Promise<LLMSummaryResult> {
497
+ // If transcript fits in one chunk, do single call
498
+ if (transcript.length <= CHUNK_MAX_CHARS) {
499
+ return singleChunkSummarize(transcript, config, apiKey);
500
+ }
501
+
502
+ // Split into chunks and summarize in parallel
503
+ const chunks = splitIntoChunks(transcript, CHUNK_MAX_CHARS);
504
+ const limitedChunks = chunks.slice(0, MAX_PARALLEL_CHUNKS);
505
+
506
+ console.log(`[compaction] Splitting transcript into ${limitedChunks.length} chunks for parallel summarization`);
507
+
508
+ const chunkResults = await Promise.all(
509
+ limitedChunks.map((chunk, idx) =>
510
+ singleChunkSummarize(
511
+ `[Chunk ${idx + 1}/${limitedChunks.length}]\n${chunk}`,
512
+ config,
513
+ apiKey,
514
+ ).catch(err => {
515
+ console.warn(`[compaction] Chunk ${idx + 1} failed: ${err.message}`);
516
+ return null;
517
+ })
518
+ )
519
+ );
520
+
521
+ // Merge chunk summaries
522
+ const validResults = chunkResults.filter((r): r is LLMSummaryResult => r !== null);
523
+
524
+ if (validResults.length === 0) {
525
+ throw new Error('All chunks failed');
526
+ }
527
+
528
+ if (validResults.length === 1) {
529
+ return { ...validResults[0], chunks: limitedChunks.length };
530
+ }
531
+
532
+ // Merge multiple chunk summaries into one
533
+ const mergedTranscript = validResults.map((r, i) => `=== Part ${i + 1} ===\n${r.text}`).join('\n\n');
534
+ const merged = await singleChunkSummarize(
535
+ `Merge these partial summaries into one cohesive summary:\n\n${mergedTranscript}`,
536
+ config,
537
+ apiKey,
538
+ );
539
+
540
+ return {
541
+ text: merged.text,
542
+ inputTokens: validResults.reduce((s, r) => s + r.inputTokens, 0) + merged.inputTokens,
543
+ outputTokens: validResults.reduce((s, r) => s + r.outputTokens, 0) + merged.outputTokens,
544
+ chunks: limitedChunks.length,
545
+ };
546
+ }
547
+
548
+ async function singleChunkSummarize(
549
+ transcript: string,
550
+ config: AgentConfig,
551
+ apiKey: string,
552
+ ): Promise<LLMSummaryResult> {
553
+ const response = await callLLM(
554
+ {
555
+ provider: config.model.provider,
556
+ modelId: config.model.modelId,
557
+ apiKey,
558
+ },
559
+ [
560
+ { role: 'system' as const, content: SUMMARY_SYSTEM_PROMPT },
561
+ { role: 'user' as const, content: `Summarize this conversation:\n\n${transcript}` },
562
+ ],
563
+ [],
564
+ { maxTokens: SUMMARY_MAX_TOKENS, temperature: 0.2 },
565
+ );
566
+
567
+ const text = response.textContent || '';
568
+ if (text.length < 50) throw new Error('Summary too short');
569
+
570
+ return {
571
+ text,
572
+ inputTokens: response.usage?.inputTokens || 0,
573
+ outputTokens: response.usage?.outputTokens || 0,
574
+ chunks: 1,
575
+ };
576
+ }
577
+
578
+ function splitIntoChunks(text: string, maxChars: number): string[] {
579
+ const chunks: string[] = [];
580
+ let start = 0;
581
+
582
+ while (start < text.length) {
583
+ let end = Math.min(start + maxChars, text.length);
584
+
585
+ // Try to split at a paragraph boundary
586
+ if (end < text.length) {
587
+ const lastParagraph = text.lastIndexOf('\n\n', end);
588
+ if (lastParagraph > start + maxChars * 0.5) {
589
+ end = lastParagraph + 2;
590
+ }
591
+ }
592
+
593
+ chunks.push(text.slice(start, end));
594
+ start = end;
595
+ }
596
+
597
+ return chunks;
598
+ }
599
+
600
+ // ─── Assembly ────────────────────────────────────────────────────────────────
601
+
602
+ function assembleFinal(
603
+ systemMessages: AgentMessage[],
604
+ summaryText: string,
605
+ keepRecent: AgentMessage[],
606
+ ): AgentMessage[] {
607
+ const summaryMessage: AgentMessage = {
608
+ role: 'user' as const,
609
+ content: `[CONTEXT COMPACTION — Your earlier conversation was compressed to fit the context window. The summary below is authoritative — treat it as ground truth. Continue from where you left off.]\n\n${summaryText}`,
610
+ };
611
+
612
+ return [...systemMessages, summaryMessage, ...keepRecent];
613
+ }
614
+
615
+ // ─── Persistence ─────────────────────────────────────────────────────────────
616
+
617
+ async function persistSummary(
618
+ hooks: RuntimeHooks,
619
+ sessionId: string | undefined,
620
+ agentId: string,
621
+ summaryText: string,
622
+ ): Promise<void> {
623
+ try {
624
+ await hooks.onContextCompaction(sessionId || '', agentId, summaryText);
625
+ console.log(`[compaction] Summary persisted to agent memory`);
626
+ } catch (err: any) {
627
+ console.warn(`[compaction] Memory save failed: ${err?.message}`);
628
+ }
629
+ }
630
+
631
+ // ─── Utilities ───────────────────────────────────────────────────────────────
632
+
633
+ /**
634
+ * Check if compaction is needed for the given messages + context window.
635
+ */
636
+ export function needsCompaction(messages: AgentMessage[], contextWindowSize: number): boolean {
637
+ return estimateMessageTokens(messages) > contextWindowSize * COMPACTION_THRESHOLD;
638
+ }
@@ -529,9 +529,7 @@ export function createRuntimeHooks(deps: HookDependencies): RuntimeHooks {
529
529
  var tq = new TaskQueueManager();
530
530
  (tq as any).db = deps.engineDb;
531
531
  await tq.init();
532
- // Find active task for this session
533
- var tasks = await tq.listTasks({ orgId: deps.orgId, status: 'in_progress' });
534
- var sessionTask = tasks.find(function(t: any) { return t.sessionId === sessionId; });
532
+ var sessionTask = await tq.getTaskBySessionId(sessionId);
535
533
  if (sessionTask) {
536
534
  await tq.updateTask(sessionTask.id, {
537
535
  activityLog: [...(sessionTask.activityLog || []), {