@agenticmail/enterprise 0.5.259 → 0.5.261
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-heartbeat-N6CDSNLK.js +510 -0
- package/dist/chunk-2BQMGELC.js +495 -0
- package/dist/chunk-BHXKIZGM.js +4735 -0
- package/dist/chunk-CO2KR3YH.js +1224 -0
- package/dist/chunk-GW65LEIJ.js +3778 -0
- package/dist/chunk-MTD6I7DM.js +1224 -0
- package/dist/chunk-PHQXZJVW.js +4732 -0
- package/dist/chunk-S4KFD3GO.js +3778 -0
- package/dist/cli-agent-6E3REIDS.js +1768 -0
- package/dist/cli-agent-VN5N2EHT.js +1768 -0
- package/dist/cli-serve-7EXF3774.js +114 -0
- package/dist/cli-serve-X7BZXTWY.js +114 -0
- package/dist/cli.js +3 -3
- package/dist/index.js +3 -3
- package/dist/routes-FHJXYOPE.js +13531 -0
- package/dist/runtime-3JY7IPTA.js +45 -0
- package/dist/runtime-NZYF7L25.js +45 -0
- package/dist/server-7T73HM6C.js +15 -0
- package/dist/server-JJSGLZCF.js +15 -0
- package/dist/setup-75DYAICT.js +20 -0
- package/dist/setup-W3M3TZN3.js +20 -0
- package/dist/task-queue-IGGE5ZTO.js +7 -0
- package/package.json +1 -1
- package/src/engine/task-queue.ts +16 -0
- package/src/runtime/agent-loop.ts +4 -201
- package/src/runtime/compaction.ts +638 -0
- package/src/runtime/hooks.ts +1 -3
|
@@ -0,0 +1,638 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advanced Context Compaction Engine
|
|
3
|
+
*
|
|
4
|
+
* A multi-strategy, token-budget-aware compaction system that outperforms
|
|
5
|
+
* naive summarization approaches. Key innovations:
|
|
6
|
+
*
|
|
7
|
+
* 1. TIERED COMPRESSION — Three levels: trim tool results → extractive → LLM summary
|
|
8
|
+
* 2. ATOMIC GROUPING — Tool use/result pairs never split
|
|
9
|
+
* 3. TOKEN BUDGET — Calculates exact space to free, doesn't over-compact
|
|
10
|
+
* 4. ROLLING SUMMARIES — Previous compaction summaries are preserved & chained
|
|
11
|
+
* 5. IMPORTANCE SCORING — High-value messages kept verbatim (errors, decisions, IDs)
|
|
12
|
+
* 6. PARALLEL CHUNK SUMMARIZATION — Large transcripts split and summarized concurrently
|
|
13
|
+
* 7. STRUCTURED OUTPUT — Summary follows strict schema for reliable continuation
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import type { AgentMessage, AgentConfig, RuntimeHooks } from './types.js';
|
|
17
|
+
import { callLLM, estimateTokens, estimateMessageTokens } from './llm-client.js';
|
|
18
|
+
|
|
19
|
+
// ─── Configuration ───────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
/** Compact when context exceeds this fraction of the window */
|
|
22
|
+
export const COMPACTION_THRESHOLD = 0.80;
|
|
23
|
+
|
|
24
|
+
/** Target context usage after compaction (leave headroom for next turn) */
|
|
25
|
+
const TARGET_USAGE = 0.45;
|
|
26
|
+
|
|
27
|
+
/** Minimum messages to keep verbatim (recent conversation tail) */
|
|
28
|
+
const MIN_KEEP_RECENT = 10;
|
|
29
|
+
|
|
30
|
+
/** Maximum messages to keep verbatim */
|
|
31
|
+
const MAX_KEEP_RECENT = 30;
|
|
32
|
+
|
|
33
|
+
/** Max tokens for the LLM summary itself */
|
|
34
|
+
const SUMMARY_MAX_TOKENS = 4096;
|
|
35
|
+
|
|
36
|
+
/** Max transcript chars to send to LLM for summarization per chunk */
|
|
37
|
+
const CHUNK_MAX_CHARS = 80_000;
|
|
38
|
+
|
|
39
|
+
/** Max parallel summarization chunks */
|
|
40
|
+
const MAX_PARALLEL_CHUNKS = 3;
|
|
41
|
+
|
|
42
|
+
/** Tool result content longer than this gets trimmed in Tier 1 */
|
|
43
|
+
const TOOL_RESULT_TRIM_THRESHOLD = 2000;
|
|
44
|
+
|
|
45
|
+
/** Trimmed tool result max length */
|
|
46
|
+
const TOOL_RESULT_TRIM_TO = 400;
|
|
47
|
+
|
|
48
|
+
/** High-importance patterns — messages matching these are kept verbatim */
|
|
49
|
+
const HIGH_IMPORTANCE_PATTERNS = [
|
|
50
|
+
/error|fail|exception|crash|bug/i,
|
|
51
|
+
/decision|decided|chose|choosing/i,
|
|
52
|
+
/important|critical|must|required/i,
|
|
53
|
+
/password|secret|key|token|credential/i,
|
|
54
|
+
/\b[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\b/, // UUIDs
|
|
55
|
+
/https?:\/\/\S{20,}/, // Long URLs
|
|
56
|
+
/\/[a-zA-Z][\w/.-]{10,}/, // File paths
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
interface CompactionResult {
|
|
62
|
+
messages: AgentMessage[];
|
|
63
|
+
stats: CompactionStats;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
interface CompactionStats {
|
|
67
|
+
strategy: 'none' | 'tier1_trim' | 'tier2_extractive' | 'tier3_llm';
|
|
68
|
+
messagesBefore: number;
|
|
69
|
+
messagesAfter: number;
|
|
70
|
+
tokensBefore: number;
|
|
71
|
+
tokensAfter: number;
|
|
72
|
+
msElapsed: number;
|
|
73
|
+
summaryTokens?: number;
|
|
74
|
+
llmInputTokens?: number;
|
|
75
|
+
llmOutputTokens?: number;
|
|
76
|
+
chunksUsed?: number;
|
|
77
|
+
previousSummariesChained?: number;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
interface MessageGroup {
|
|
81
|
+
messages: AgentMessage[];
|
|
82
|
+
tokens: number;
|
|
83
|
+
importance: number;
|
|
84
|
+
isToolPair: boolean;
|
|
85
|
+
isPreviousSummary: boolean;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ─── Main Entry Point ────────────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Compact the message history to fit within the context window.
|
|
92
|
+
* Uses a tiered approach — tries cheapest strategies first.
|
|
93
|
+
*/
|
|
94
|
+
export async function compactContext(
|
|
95
|
+
messages: AgentMessage[],
|
|
96
|
+
config: AgentConfig,
|
|
97
|
+
hooks: RuntimeHooks,
|
|
98
|
+
options?: { apiKey?: string; sessionId?: string },
|
|
99
|
+
): Promise<AgentMessage[]> {
|
|
100
|
+
const startMs = Date.now();
|
|
101
|
+
const contextWindowSize = config.contextWindowSize ?? 200_000;
|
|
102
|
+
const tokensBefore = estimateMessageTokens(messages);
|
|
103
|
+
const targetTokens = Math.floor(contextWindowSize * TARGET_USAGE);
|
|
104
|
+
|
|
105
|
+
// Don't compact if already under threshold
|
|
106
|
+
if (tokensBefore <= contextWindowSize * COMPACTION_THRESHOLD) {
|
|
107
|
+
return messages;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const tokensToFree = tokensBefore - targetTokens;
|
|
111
|
+
console.log(`[compaction] Need to free ~${tokensToFree} tokens (${tokensBefore} → target ${targetTokens})`);
|
|
112
|
+
|
|
113
|
+
// Split system messages from conversation
|
|
114
|
+
const systemMessages = messages.filter(m => m.role === 'system');
|
|
115
|
+
const nonSystem = messages.filter(m => m.role !== 'system');
|
|
116
|
+
|
|
117
|
+
if (nonSystem.length <= MIN_KEEP_RECENT) {
|
|
118
|
+
return messages; // Nothing to compact
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ─── Tier 1: Trim bloated tool results ─────────────────────────────────
|
|
122
|
+
const tier1Messages = trimToolResults(nonSystem);
|
|
123
|
+
const tier1Tokens = estimateMessageTokens([...systemMessages, ...tier1Messages]);
|
|
124
|
+
|
|
125
|
+
if (tier1Tokens <= contextWindowSize * COMPACTION_THRESHOLD) {
|
|
126
|
+
const stats: CompactionStats = {
|
|
127
|
+
strategy: 'tier1_trim',
|
|
128
|
+
messagesBefore: messages.length,
|
|
129
|
+
messagesAfter: systemMessages.length + tier1Messages.length,
|
|
130
|
+
tokensBefore,
|
|
131
|
+
tokensAfter: tier1Tokens,
|
|
132
|
+
msElapsed: Date.now() - startMs,
|
|
133
|
+
};
|
|
134
|
+
console.log(`[compaction] Tier 1 (trim tool results) sufficient: ${tokensBefore} → ${tier1Tokens} tokens in ${stats.msElapsed}ms`);
|
|
135
|
+
return [...systemMessages, ...tier1Messages];
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ─── Determine keep-recent boundary ────────────────────────────────────
|
|
139
|
+
const keepCount = calculateKeepRecent(tier1Messages, targetTokens, estimateMessageTokens(systemMessages));
|
|
140
|
+
const { toSummarize, keepRecent } = splitAtSafeBoundary(tier1Messages, keepCount);
|
|
141
|
+
|
|
142
|
+
if (toSummarize.length === 0) {
|
|
143
|
+
return messages; // Can't split safely
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ─── Group messages for importance scoring ─────────────────────────────
|
|
147
|
+
const groups = groupMessages(toSummarize);
|
|
148
|
+
const sortedGroups = groups.sort((a, b) => b.importance - a.importance);
|
|
149
|
+
|
|
150
|
+
// Separate previous compaction summaries (they chain)
|
|
151
|
+
const previousSummaries = sortedGroups.filter(g => g.isPreviousSummary);
|
|
152
|
+
const regularGroups = sortedGroups.filter(g => !g.isPreviousSummary);
|
|
153
|
+
|
|
154
|
+
// ─── Tier 2: Extractive (no LLM call) ─────────────────────────────────
|
|
155
|
+
const keepTokenBudget = estimateMessageTokens(systemMessages) + estimateMessageTokens(keepRecent);
|
|
156
|
+
const summaryBudget = targetTokens - keepTokenBudget;
|
|
157
|
+
|
|
158
|
+
if (!options?.apiKey || summaryBudget < 1000) {
|
|
159
|
+
// No API key or very little budget — use extractive
|
|
160
|
+
const summary = buildExtractiveSummary(previousSummaries, regularGroups, summaryBudget);
|
|
161
|
+
const result = assembleFinal(systemMessages, summary, keepRecent);
|
|
162
|
+
const stats: CompactionStats = {
|
|
163
|
+
strategy: 'tier2_extractive',
|
|
164
|
+
messagesBefore: messages.length,
|
|
165
|
+
messagesAfter: result.length,
|
|
166
|
+
tokensBefore,
|
|
167
|
+
tokensAfter: estimateMessageTokens(result),
|
|
168
|
+
msElapsed: Date.now() - startMs,
|
|
169
|
+
previousSummariesChained: previousSummaries.length,
|
|
170
|
+
};
|
|
171
|
+
console.log(`[compaction] Tier 2 (extractive): ${stats.tokensBefore} → ${stats.tokensAfter} tokens in ${stats.msElapsed}ms`);
|
|
172
|
+
await persistSummary(hooks, options?.sessionId, config.agentId, summary);
|
|
173
|
+
return result;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// ─── Tier 3: LLM-powered summarization ─────────────────────────────────
|
|
177
|
+
try {
|
|
178
|
+
const transcript = buildTranscript(previousSummaries, regularGroups);
|
|
179
|
+
const summary = await llmSummarize(transcript, config, options.apiKey, summaryBudget);
|
|
180
|
+
const result = assembleFinal(systemMessages, summary.text, keepRecent);
|
|
181
|
+
const tokensAfter = estimateMessageTokens(result);
|
|
182
|
+
|
|
183
|
+
const stats: CompactionStats = {
|
|
184
|
+
strategy: 'tier3_llm',
|
|
185
|
+
messagesBefore: messages.length,
|
|
186
|
+
messagesAfter: result.length,
|
|
187
|
+
tokensBefore,
|
|
188
|
+
tokensAfter,
|
|
189
|
+
msElapsed: Date.now() - startMs,
|
|
190
|
+
summaryTokens: estimateTokens(summary.text),
|
|
191
|
+
llmInputTokens: summary.inputTokens,
|
|
192
|
+
llmOutputTokens: summary.outputTokens,
|
|
193
|
+
chunksUsed: summary.chunks,
|
|
194
|
+
previousSummariesChained: previousSummaries.length,
|
|
195
|
+
};
|
|
196
|
+
console.log(`[compaction] Tier 3 (LLM): ${stats.tokensBefore} → ${stats.tokensAfter} tokens in ${stats.msElapsed}ms (${summary.chunks} chunks, ${summary.inputTokens}in/${summary.outputTokens}out)`);
|
|
197
|
+
await persistSummary(hooks, options?.sessionId, config.agentId, summary.text);
|
|
198
|
+
return result;
|
|
199
|
+
} catch (err: any) {
|
|
200
|
+
console.warn(`[compaction] LLM summarization failed: ${err.message} — falling back to extractive`);
|
|
201
|
+
const summary = buildExtractiveSummary(previousSummaries, regularGroups, summaryBudget);
|
|
202
|
+
const result = assembleFinal(systemMessages, summary, keepRecent);
|
|
203
|
+
await persistSummary(hooks, options?.sessionId, config.agentId, summary);
|
|
204
|
+
console.log(`[compaction] Extractive fallback: ${tokensBefore} → ${estimateMessageTokens(result)} tokens in ${Date.now() - startMs}ms`);
|
|
205
|
+
return result;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// ─── Tier 1: Trim Tool Results ───────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
function trimToolResults(messages: AgentMessage[]): AgentMessage[] {
|
|
212
|
+
return messages.map(msg => {
|
|
213
|
+
if (!Array.isArray(msg.content)) return msg;
|
|
214
|
+
|
|
215
|
+
let modified = false;
|
|
216
|
+
const newContent = (msg.content as any[]).map((block: any) => {
|
|
217
|
+
if (block.type === 'tool_result') {
|
|
218
|
+
const content = typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
219
|
+
if (content.length > TOOL_RESULT_TRIM_THRESHOLD) {
|
|
220
|
+
modified = true;
|
|
221
|
+
return {
|
|
222
|
+
...block,
|
|
223
|
+
content: content.slice(0, TOOL_RESULT_TRIM_TO) + `\n... [trimmed ${content.length - TOOL_RESULT_TRIM_TO} chars]`,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return block;
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
return modified ? { ...msg, content: newContent } : msg;
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ─── Keep-Recent Calculation ─────────────────────────────────────────────────
|
|
235
|
+
|
|
236
|
+
function calculateKeepRecent(messages: AgentMessage[], targetTokens: number, systemTokens: number): number {
|
|
237
|
+
// Walk backwards from the end, accumulating tokens, until we hit the budget
|
|
238
|
+
// Reserve ~30% of target for the summary itself
|
|
239
|
+
const keepBudget = Math.floor((targetTokens - systemTokens) * 0.6);
|
|
240
|
+
let tokens = 0;
|
|
241
|
+
let count = 0;
|
|
242
|
+
|
|
243
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
244
|
+
const msgTokens = estimateMessageTokens([messages[i]]);
|
|
245
|
+
if (tokens + msgTokens > keepBudget && count >= MIN_KEEP_RECENT) break;
|
|
246
|
+
tokens += msgTokens;
|
|
247
|
+
count++;
|
|
248
|
+
if (count >= MAX_KEEP_RECENT) break;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return Math.max(MIN_KEEP_RECENT, count);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// ─── Safe Boundary Split ─────────────────────────────────────────────────────
|
|
255
|
+
|
|
256
|
+
function splitAtSafeBoundary(messages: AgentMessage[], keepCount: number): {
|
|
257
|
+
toSummarize: AgentMessage[];
|
|
258
|
+
keepRecent: AgentMessage[];
|
|
259
|
+
} {
|
|
260
|
+
let cutIndex = messages.length - keepCount;
|
|
261
|
+
|
|
262
|
+
// Walk backwards to avoid splitting tool_use/tool_result pairs
|
|
263
|
+
for (let i = cutIndex; i > 0; i--) {
|
|
264
|
+
const msg = messages[i];
|
|
265
|
+
if (msg.role === 'user' && Array.isArray(msg.content)) {
|
|
266
|
+
const hasToolResult = (msg.content as any[]).some((b: any) => b.type === 'tool_result');
|
|
267
|
+
if (hasToolResult) continue; // Don't cut here
|
|
268
|
+
}
|
|
269
|
+
cutIndex = i;
|
|
270
|
+
break;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return {
|
|
274
|
+
toSummarize: messages.slice(0, cutIndex),
|
|
275
|
+
keepRecent: messages.slice(cutIndex),
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ─── Message Grouping & Importance ───────────────────────────────────────────
|
|
280
|
+
|
|
281
|
+
function groupMessages(messages: AgentMessage[]): MessageGroup[] {
|
|
282
|
+
const groups: MessageGroup[] = [];
|
|
283
|
+
let i = 0;
|
|
284
|
+
|
|
285
|
+
while (i < messages.length) {
|
|
286
|
+
const msg = messages[i];
|
|
287
|
+
|
|
288
|
+
// Check if this is a previous compaction summary
|
|
289
|
+
if (msg.role === 'user' && typeof msg.content === 'string' && msg.content.includes('[CONTEXT COMPACTION')) {
|
|
290
|
+
groups.push({
|
|
291
|
+
messages: [msg],
|
|
292
|
+
tokens: estimateMessageTokens([msg]),
|
|
293
|
+
importance: 10, // Highest — contains all prior context
|
|
294
|
+
isPreviousSummary: true,
|
|
295
|
+
isToolPair: false,
|
|
296
|
+
});
|
|
297
|
+
i++;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Group tool_use (assistant) with following tool_result (user)
|
|
302
|
+
if (msg.role === 'assistant' && Array.isArray(msg.content) &&
|
|
303
|
+
(msg.content as any[]).some((b: any) => b.type === 'tool_use') &&
|
|
304
|
+
i + 1 < messages.length) {
|
|
305
|
+
const next = messages[i + 1];
|
|
306
|
+
if (next.role === 'user' && Array.isArray(next.content) &&
|
|
307
|
+
(next.content as any[]).some((b: any) => b.type === 'tool_result')) {
|
|
308
|
+
const pair = [msg, next];
|
|
309
|
+
groups.push({
|
|
310
|
+
messages: pair,
|
|
311
|
+
tokens: estimateMessageTokens(pair),
|
|
312
|
+
importance: scoreImportance(pair),
|
|
313
|
+
isToolPair: true,
|
|
314
|
+
isPreviousSummary: false,
|
|
315
|
+
});
|
|
316
|
+
i += 2;
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Single message
|
|
322
|
+
groups.push({
|
|
323
|
+
messages: [msg],
|
|
324
|
+
tokens: estimateMessageTokens([msg]),
|
|
325
|
+
importance: scoreImportance([msg]),
|
|
326
|
+
isToolPair: false,
|
|
327
|
+
isPreviousSummary: false,
|
|
328
|
+
});
|
|
329
|
+
i++;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return groups;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function scoreImportance(messages: AgentMessage[]): number {
|
|
336
|
+
let score = 1;
|
|
337
|
+
for (const msg of messages) {
|
|
338
|
+
const text = extractText(msg);
|
|
339
|
+
|
|
340
|
+
// User messages slightly more important (contain instructions)
|
|
341
|
+
if (msg.role === 'user') score += 1;
|
|
342
|
+
|
|
343
|
+
// Check high-importance patterns
|
|
344
|
+
for (const pattern of HIGH_IMPORTANCE_PATTERNS) {
|
|
345
|
+
if (pattern.test(text)) {
|
|
346
|
+
score += 2;
|
|
347
|
+
break; // One match is enough
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Error tool results are very important
|
|
352
|
+
if (Array.isArray(msg.content)) {
|
|
353
|
+
for (const block of msg.content as any[]) {
|
|
354
|
+
if (block.type === 'tool_result' && block.is_error) score += 3;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Very short messages are less important (acks, "ok", etc.)
|
|
359
|
+
if (text.length < 20) score -= 1;
|
|
360
|
+
|
|
361
|
+
// Very long tool results are less important per-token (bulk data)
|
|
362
|
+
if (text.length > 5000) score -= 1;
|
|
363
|
+
}
|
|
364
|
+
return Math.max(0, score);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function extractText(msg: AgentMessage): string {
|
|
368
|
+
if (typeof msg.content === 'string') return msg.content;
|
|
369
|
+
if (Array.isArray(msg.content)) {
|
|
370
|
+
return (msg.content as any[]).map((b: any) => {
|
|
371
|
+
if (b.type === 'text') return b.text || '';
|
|
372
|
+
if (b.type === 'tool_use') return `${b.name}(${JSON.stringify(b.input || {}).slice(0, 200)})`;
|
|
373
|
+
if (b.type === 'tool_result') return String(b.content || '').slice(0, 500);
|
|
374
|
+
return '';
|
|
375
|
+
}).join(' ');
|
|
376
|
+
}
|
|
377
|
+
return '';
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// ─── Extractive Summary (Tier 2) ────────────────────────────────────────────
|
|
381
|
+
|
|
382
|
+
function buildExtractiveSummary(
|
|
383
|
+
previousSummaries: MessageGroup[],
|
|
384
|
+
groups: MessageGroup[],
|
|
385
|
+
tokenBudget: number,
|
|
386
|
+
): string {
|
|
387
|
+
const parts: string[] = [];
|
|
388
|
+
let usedTokens = 0;
|
|
389
|
+
|
|
390
|
+
// Chain previous summaries first (they contain earlier context)
|
|
391
|
+
for (const sg of previousSummaries) {
|
|
392
|
+
const text = extractText(sg.messages[0]);
|
|
393
|
+
// Strip the "[CONTEXT COMPACTION...]" wrapper, keep the content
|
|
394
|
+
const content = text.replace(/^\[CONTEXT COMPACTION[^\]]*\]\s*/s, '');
|
|
395
|
+
const tokens = estimateTokens(content);
|
|
396
|
+
if (usedTokens + tokens < tokenBudget * 0.4) { // Use up to 40% for prior summaries
|
|
397
|
+
parts.push('## Prior Context (from earlier compaction)\n' + content);
|
|
398
|
+
usedTokens += tokens;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
parts.push('\n## Recent Activity Summary');
|
|
403
|
+
|
|
404
|
+
// Add high-importance groups first, then fill with lower importance
|
|
405
|
+
// Already sorted by importance (descending)
|
|
406
|
+
for (const group of groups) {
|
|
407
|
+
if (usedTokens >= tokenBudget) break;
|
|
408
|
+
|
|
409
|
+
for (const msg of group.messages) {
|
|
410
|
+
const text = extractText(msg);
|
|
411
|
+
if (!text) continue;
|
|
412
|
+
|
|
413
|
+
// Truncate based on importance
|
|
414
|
+
const maxLen = group.importance >= 5 ? 800 : group.importance >= 3 ? 400 : 200;
|
|
415
|
+
const truncated = text.length > maxLen ? text.slice(0, maxLen) + '...' : text;
|
|
416
|
+
const line = `[${msg.role}]: ${truncated}`;
|
|
417
|
+
const lineTokens = estimateTokens(line);
|
|
418
|
+
|
|
419
|
+
if (usedTokens + lineTokens > tokenBudget) break;
|
|
420
|
+
parts.push(line);
|
|
421
|
+
usedTokens += lineTokens;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return parts.join('\n');
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// ─── Transcript Builder ──────────────────────────────────────────────────────
|
|
429
|
+
|
|
430
|
+
function buildTranscript(previousSummaries: MessageGroup[], groups: MessageGroup[]): string {
|
|
431
|
+
const parts: string[] = [];
|
|
432
|
+
|
|
433
|
+
// Include previous compaction summaries as context
|
|
434
|
+
for (const sg of previousSummaries) {
|
|
435
|
+
const text = extractText(sg.messages[0]);
|
|
436
|
+
const content = text.replace(/^\[CONTEXT COMPACTION[^\]]*\]\s*/s, '');
|
|
437
|
+
parts.push('=== PRIOR COMPACTION SUMMARY ===\n' + content.slice(0, 20_000) + '\n=== END PRIOR SUMMARY ===');
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Build transcript from groups (in original order — re-sort by position)
|
|
441
|
+
// Groups are sorted by importance, but transcript needs chronological order
|
|
442
|
+
const chronoGroups = [...groups];
|
|
443
|
+
// We don't have explicit position, but original array order is chronological
|
|
444
|
+
// Since we only sorted a copy, use the original `groups` order...
|
|
445
|
+
// Actually, groups come from groupMessages which is already chronological.
|
|
446
|
+
// They were sorted by importance for extractive, but for transcript we need chrono.
|
|
447
|
+
// We need to pass original order. Let's use regularGroups before sorting.
|
|
448
|
+
|
|
449
|
+
for (const group of groups) {
|
|
450
|
+
for (const msg of group.messages) {
|
|
451
|
+
const text = extractText(msg);
|
|
452
|
+
if (text.length > 0) {
|
|
453
|
+
parts.push(`[${msg.role}]: ${text.slice(0, 1500)}`);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
return parts.join('\n\n');
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// ─── LLM Summarization (Tier 3) ─────────────────────────────────────────────
|
|
462
|
+
|
|
463
|
+
const SUMMARY_SYSTEM_PROMPT = `You are a context compaction engine for an AI agent mid-task. Create a dense, lossless summary that the agent MUST be able to continue working from without any other context.
|
|
464
|
+
|
|
465
|
+
RULES:
|
|
466
|
+
- PRESERVE ALL: IDs, paths, URLs, emails, phone numbers, credentials (names only), version numbers, dates, amounts — use EXACT values
|
|
467
|
+
- PRESERVE: Task goals, constraints, decisions made, errors encountered, workarounds found
|
|
468
|
+
- PRESERVE: Current state — what was just done, what's next, any pending operations
|
|
469
|
+
- COMPRESS: Routine tool calls (just note what tool was called and the outcome)
|
|
470
|
+
- COMPRESS: Repeated similar operations (batch into counts: "read 12 files from /src/...")
|
|
471
|
+
- OMIT: Pleasantries, acknowledgments, thinking-out-loud that didn't lead to decisions
|
|
472
|
+
- FORMAT: Use ## headers for sections. Use bullet lists. Be dense but readable.
|
|
473
|
+
- LENGTH: Use ALL available tokens. More detail = better continuation.
|
|
474
|
+
|
|
475
|
+
Required sections:
|
|
476
|
+
## Task & Goal
|
|
477
|
+
## Completed Work (chronological)
|
|
478
|
+
## Key Data (IDs, paths, URLs, names — EXACT values)
|
|
479
|
+
## Decisions & Rationale
|
|
480
|
+
## Current State
|
|
481
|
+
## Next Steps
|
|
482
|
+
## Errors & Lessons (if any)`;
|
|
483
|
+
|
|
484
|
+
interface LLMSummaryResult {
|
|
485
|
+
text: string;
|
|
486
|
+
inputTokens: number;
|
|
487
|
+
outputTokens: number;
|
|
488
|
+
chunks: number;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
async function llmSummarize(
|
|
492
|
+
transcript: string,
|
|
493
|
+
config: AgentConfig,
|
|
494
|
+
apiKey: string,
|
|
495
|
+
tokenBudget: number,
|
|
496
|
+
): Promise<LLMSummaryResult> {
|
|
497
|
+
// If transcript fits in one chunk, do single call
|
|
498
|
+
if (transcript.length <= CHUNK_MAX_CHARS) {
|
|
499
|
+
return singleChunkSummarize(transcript, config, apiKey);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Split into chunks and summarize in parallel
|
|
503
|
+
const chunks = splitIntoChunks(transcript, CHUNK_MAX_CHARS);
|
|
504
|
+
const limitedChunks = chunks.slice(0, MAX_PARALLEL_CHUNKS);
|
|
505
|
+
|
|
506
|
+
console.log(`[compaction] Splitting transcript into ${limitedChunks.length} chunks for parallel summarization`);
|
|
507
|
+
|
|
508
|
+
const chunkResults = await Promise.all(
|
|
509
|
+
limitedChunks.map((chunk, idx) =>
|
|
510
|
+
singleChunkSummarize(
|
|
511
|
+
`[Chunk ${idx + 1}/${limitedChunks.length}]\n${chunk}`,
|
|
512
|
+
config,
|
|
513
|
+
apiKey,
|
|
514
|
+
).catch(err => {
|
|
515
|
+
console.warn(`[compaction] Chunk ${idx + 1} failed: ${err.message}`);
|
|
516
|
+
return null;
|
|
517
|
+
})
|
|
518
|
+
)
|
|
519
|
+
);
|
|
520
|
+
|
|
521
|
+
// Merge chunk summaries
|
|
522
|
+
const validResults = chunkResults.filter((r): r is LLMSummaryResult => r !== null);
|
|
523
|
+
|
|
524
|
+
if (validResults.length === 0) {
|
|
525
|
+
throw new Error('All chunks failed');
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if (validResults.length === 1) {
|
|
529
|
+
return { ...validResults[0], chunks: limitedChunks.length };
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Merge multiple chunk summaries into one
|
|
533
|
+
const mergedTranscript = validResults.map((r, i) => `=== Part ${i + 1} ===\n${r.text}`).join('\n\n');
|
|
534
|
+
const merged = await singleChunkSummarize(
|
|
535
|
+
`Merge these partial summaries into one cohesive summary:\n\n${mergedTranscript}`,
|
|
536
|
+
config,
|
|
537
|
+
apiKey,
|
|
538
|
+
);
|
|
539
|
+
|
|
540
|
+
return {
|
|
541
|
+
text: merged.text,
|
|
542
|
+
inputTokens: validResults.reduce((s, r) => s + r.inputTokens, 0) + merged.inputTokens,
|
|
543
|
+
outputTokens: validResults.reduce((s, r) => s + r.outputTokens, 0) + merged.outputTokens,
|
|
544
|
+
chunks: limitedChunks.length,
|
|
545
|
+
};
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
async function singleChunkSummarize(
|
|
549
|
+
transcript: string,
|
|
550
|
+
config: AgentConfig,
|
|
551
|
+
apiKey: string,
|
|
552
|
+
): Promise<LLMSummaryResult> {
|
|
553
|
+
const response = await callLLM(
|
|
554
|
+
{
|
|
555
|
+
provider: config.model.provider,
|
|
556
|
+
modelId: config.model.modelId,
|
|
557
|
+
apiKey,
|
|
558
|
+
},
|
|
559
|
+
[
|
|
560
|
+
{ role: 'system' as const, content: SUMMARY_SYSTEM_PROMPT },
|
|
561
|
+
{ role: 'user' as const, content: `Summarize this conversation:\n\n${transcript}` },
|
|
562
|
+
],
|
|
563
|
+
[],
|
|
564
|
+
{ maxTokens: SUMMARY_MAX_TOKENS, temperature: 0.2 },
|
|
565
|
+
);
|
|
566
|
+
|
|
567
|
+
const text = response.textContent || '';
|
|
568
|
+
if (text.length < 50) throw new Error('Summary too short');
|
|
569
|
+
|
|
570
|
+
return {
|
|
571
|
+
text,
|
|
572
|
+
inputTokens: response.usage?.inputTokens || 0,
|
|
573
|
+
outputTokens: response.usage?.outputTokens || 0,
|
|
574
|
+
chunks: 1,
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
function splitIntoChunks(text: string, maxChars: number): string[] {
|
|
579
|
+
const chunks: string[] = [];
|
|
580
|
+
let start = 0;
|
|
581
|
+
|
|
582
|
+
while (start < text.length) {
|
|
583
|
+
let end = Math.min(start + maxChars, text.length);
|
|
584
|
+
|
|
585
|
+
// Try to split at a paragraph boundary
|
|
586
|
+
if (end < text.length) {
|
|
587
|
+
const lastParagraph = text.lastIndexOf('\n\n', end);
|
|
588
|
+
if (lastParagraph > start + maxChars * 0.5) {
|
|
589
|
+
end = lastParagraph + 2;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
chunks.push(text.slice(start, end));
|
|
594
|
+
start = end;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
return chunks;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// ─── Assembly ────────────────────────────────────────────────────────────────
|
|
601
|
+
|
|
602
|
+
function assembleFinal(
|
|
603
|
+
systemMessages: AgentMessage[],
|
|
604
|
+
summaryText: string,
|
|
605
|
+
keepRecent: AgentMessage[],
|
|
606
|
+
): AgentMessage[] {
|
|
607
|
+
const summaryMessage: AgentMessage = {
|
|
608
|
+
role: 'user' as const,
|
|
609
|
+
content: `[CONTEXT COMPACTION — Your earlier conversation was compressed to fit the context window. The summary below is authoritative — treat it as ground truth. Continue from where you left off.]\n\n${summaryText}`,
|
|
610
|
+
};
|
|
611
|
+
|
|
612
|
+
return [...systemMessages, summaryMessage, ...keepRecent];
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// ─── Persistence ─────────────────────────────────────────────────────────────
|
|
616
|
+
|
|
617
|
+
async function persistSummary(
|
|
618
|
+
hooks: RuntimeHooks,
|
|
619
|
+
sessionId: string | undefined,
|
|
620
|
+
agentId: string,
|
|
621
|
+
summaryText: string,
|
|
622
|
+
): Promise<void> {
|
|
623
|
+
try {
|
|
624
|
+
await hooks.onContextCompaction(sessionId || '', agentId, summaryText);
|
|
625
|
+
console.log(`[compaction] Summary persisted to agent memory`);
|
|
626
|
+
} catch (err: any) {
|
|
627
|
+
console.warn(`[compaction] Memory save failed: ${err?.message}`);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// ─── Utilities ───────────────────────────────────────────────────────────────
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Check if compaction is needed for the given messages + context window.
|
|
635
|
+
*/
|
|
636
|
+
export function needsCompaction(messages: AgentMessage[], contextWindowSize: number): boolean {
|
|
637
|
+
return estimateMessageTokens(messages) > contextWindowSize * COMPACTION_THRESHOLD;
|
|
638
|
+
}
|
package/src/runtime/hooks.ts
CHANGED
|
@@ -529,9 +529,7 @@ export function createRuntimeHooks(deps: HookDependencies): RuntimeHooks {
|
|
|
529
529
|
var tq = new TaskQueueManager();
|
|
530
530
|
(tq as any).db = deps.engineDb;
|
|
531
531
|
await tq.init();
|
|
532
|
-
|
|
533
|
-
var tasks = await tq.listTasks({ orgId: deps.orgId, status: 'in_progress' });
|
|
534
|
-
var sessionTask = tasks.find(function(t: any) { return t.sessionId === sessionId; });
|
|
532
|
+
var sessionTask = await tq.getTaskBySessionId(sessionId);
|
|
535
533
|
if (sessionTask) {
|
|
536
534
|
await tq.updateTask(sessionTask.id, {
|
|
537
535
|
activityLog: [...(sessionTask.activityLog || []), {
|