@agi-cli/server 0.1.112 → 0.1.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/runtime/compaction.ts +396 -114
- package/src/runtime/history-builder.ts +7 -7
- package/src/runtime/message-service.ts +52 -9
- package/src/runtime/prompt.ts +14 -0
- package/src/runtime/runner.ts +110 -3
- package/src/runtime/session-queue.ts +2 -0
- package/src/runtime/stream-handlers.ts +174 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agi-cli/server",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.113",
|
|
4
4
|
"description": "HTTP API server for AGI CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
"typecheck": "tsc --noEmit"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@agi-cli/sdk": "0.1.
|
|
33
|
-
"@agi-cli/database": "0.1.
|
|
32
|
+
"@agi-cli/sdk": "0.1.113",
|
|
33
|
+
"@agi-cli/database": "0.1.113",
|
|
34
34
|
"drizzle-orm": "^0.44.5",
|
|
35
35
|
"hono": "^4.9.9",
|
|
36
36
|
"zod": "^4.1.8"
|
|
@@ -1,29 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Context compaction module for managing token usage.
|
|
3
3
|
*
|
|
4
|
-
* This module implements
|
|
5
|
-
* 1. Detects
|
|
6
|
-
* 2.
|
|
7
|
-
* 3. History builder
|
|
4
|
+
* This module implements intelligent context management:
|
|
5
|
+
* 1. Detects /compact command and builds summarization context
|
|
6
|
+
* 2. After LLM responds with summary, marks old parts as compacted
|
|
7
|
+
* 3. History builder skips compacted parts entirely
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
* -
|
|
11
|
-
* -
|
|
12
|
-
* -
|
|
13
|
-
* -
|
|
9
|
+
* Flow:
|
|
10
|
+
* - User sends "/compact" → stored as regular user message
|
|
11
|
+
* - Runner detects command, builds context for LLM to summarize
|
|
12
|
+
* - LLM streams summary response naturally
|
|
13
|
+
* - On completion, markSessionCompacted() marks old tool_call/tool_result parts
|
|
14
|
+
* - Future history builds skip compacted parts
|
|
14
15
|
*/
|
|
15
16
|
|
|
16
17
|
import type { getDb } from '@agi-cli/database';
|
|
17
18
|
import { messages, messageParts } from '@agi-cli/database/schema';
|
|
18
|
-
import { eq, desc } from 'drizzle-orm';
|
|
19
|
+
import { eq, desc, asc, and, lt } from 'drizzle-orm';
|
|
19
20
|
import { debugLog } from './debug.ts';
|
|
21
|
+
import { streamText } from 'ai';
|
|
22
|
+
import { resolveModel } from './provider.ts';
|
|
23
|
+
import { loadConfig } from '@agi-cli/sdk';
|
|
20
24
|
|
|
21
|
-
// Token thresholds
|
|
22
|
-
export const PRUNE_MINIMUM = 20_000; // Only prune if we'd save at least this many tokens
|
|
25
|
+
// Token thresholds
|
|
23
26
|
export const PRUNE_PROTECT = 40_000; // Protect last N tokens worth of tool calls
|
|
24
27
|
|
|
25
|
-
// Tools that should never be
|
|
26
|
-
const
|
|
28
|
+
// Tools that should never be compacted
|
|
29
|
+
const PROTECTED_TOOLS = ['skill'];
|
|
27
30
|
|
|
28
31
|
// Simple token estimation: ~4 chars per token
|
|
29
32
|
export function estimateTokens(text: string): number {
|
|
@@ -44,51 +47,151 @@ export interface ModelLimits {
|
|
|
44
47
|
}
|
|
45
48
|
|
|
46
49
|
/**
|
|
47
|
-
* Check if
|
|
48
|
-
* Returns true if we've used more tokens than (context_limit - output_limit).
|
|
50
|
+
* Check if a message content is the /compact command.
|
|
49
51
|
*/
|
|
50
|
-
export function
|
|
51
|
-
|
|
52
|
+
export function isCompactCommand(content: string): boolean {
|
|
53
|
+
const trimmed = content.trim().toLowerCase();
|
|
54
|
+
return trimmed === '/compact';
|
|
55
|
+
}
|
|
52
56
|
|
|
53
|
-
|
|
54
|
-
|
|
57
|
+
/**
|
|
58
|
+
* Build context for the LLM to generate a summary.
|
|
59
|
+
* Returns a prompt that describes what to summarize.
|
|
60
|
+
* Includes tool calls and results with appropriate truncation to fit within model limits.
|
|
61
|
+
* @param contextTokenLimit - Max tokens for context (uses ~4 chars per token estimate)
|
|
62
|
+
*/
|
|
63
|
+
export async function buildCompactionContext(
|
|
64
|
+
db: Awaited<ReturnType<typeof getDb>>,
|
|
65
|
+
sessionId: string,
|
|
66
|
+
contextTokenLimit?: number,
|
|
67
|
+
): Promise<string> {
|
|
68
|
+
const allMessages = await db
|
|
69
|
+
.select()
|
|
70
|
+
.from(messages)
|
|
71
|
+
.where(eq(messages.sessionId, sessionId))
|
|
72
|
+
.orderBy(asc(messages.createdAt));
|
|
55
73
|
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
74
|
+
const lines: string[] = [];
|
|
75
|
+
let totalChars = 0;
|
|
76
|
+
// Use provided limit or default to 60k chars (~15k tokens)
|
|
77
|
+
// We use ~50% of model context for compaction, leaving room for system prompt + response
|
|
78
|
+
const maxChars = contextTokenLimit ? contextTokenLimit * 4 : 60000;
|
|
79
|
+
|
|
80
|
+
for (const msg of allMessages) {
|
|
81
|
+
if (totalChars > maxChars) {
|
|
82
|
+
lines.unshift('[...earlier content truncated...]');
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const parts = await db
|
|
87
|
+
.select()
|
|
88
|
+
.from(messageParts)
|
|
89
|
+
.where(eq(messageParts.messageId, msg.id))
|
|
90
|
+
.orderBy(asc(messageParts.index));
|
|
91
|
+
|
|
92
|
+
for (const part of parts) {
|
|
93
|
+
if (part.compactedAt) continue; // Skip already compacted
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
const content = JSON.parse(part.content ?? '{}');
|
|
97
|
+
|
|
98
|
+
if (part.type === 'text' && content.text) {
|
|
99
|
+
const text = `[${msg.role.toUpperCase()}]: ${content.text}`;
|
|
100
|
+
lines.push(text.slice(0, 3000)); // Allow more text content
|
|
101
|
+
totalChars += text.length;
|
|
102
|
+
} else if (part.type === 'tool_call' && content.name) {
|
|
103
|
+
// Include tool name and relevant args (file paths, commands, etc.)
|
|
104
|
+
const argsStr =
|
|
105
|
+
typeof content.args === 'object'
|
|
106
|
+
? JSON.stringify(content.args).slice(0, 500)
|
|
107
|
+
: '';
|
|
108
|
+
const text = `[TOOL ${content.name}]: ${argsStr}`;
|
|
109
|
+
lines.push(text);
|
|
110
|
+
totalChars += text.length;
|
|
111
|
+
} else if (part.type === 'tool_result' && content.result !== null) {
|
|
112
|
+
// Include enough result context for the LLM to understand what happened
|
|
113
|
+
const resultStr =
|
|
114
|
+
typeof content.result === 'string'
|
|
115
|
+
? content.result.slice(0, 1500)
|
|
116
|
+
: JSON.stringify(content.result ?? '').slice(0, 1500);
|
|
117
|
+
const text = `[RESULT]: ${resultStr}`;
|
|
118
|
+
lines.push(text);
|
|
119
|
+
totalChars += text.length;
|
|
120
|
+
}
|
|
121
|
+
} catch {}
|
|
122
|
+
}
|
|
61
123
|
}
|
|
62
124
|
|
|
63
|
-
return
|
|
125
|
+
return lines.join('\n');
|
|
64
126
|
}
|
|
65
127
|
|
|
66
128
|
/**
|
|
67
|
-
*
|
|
129
|
+
* Get the system prompt addition for compaction.
|
|
130
|
+
*/
|
|
131
|
+
export function getCompactionSystemPrompt(): string {
|
|
132
|
+
return `
|
|
133
|
+
The user has requested to compact the conversation. Generate a comprehensive summary that captures:
|
|
134
|
+
|
|
135
|
+
1. **Main Goals**: What was the user trying to accomplish?
|
|
136
|
+
2. **Key Actions**: What files were created, modified, or deleted?
|
|
137
|
+
3. **Important Decisions**: What approaches or solutions were chosen and why?
|
|
138
|
+
4. **Current State**: What is done and what might be pending?
|
|
139
|
+
5. **Critical Context**: Any gotchas, errors encountered, or important details for continuing.
|
|
140
|
+
|
|
141
|
+
Format your response as a clear, structured summary. Start with "📦 **Context Compacted**" header.
|
|
142
|
+
Keep under 2000 characters but be thorough. This summary will replace detailed tool history.
|
|
143
|
+
`;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Mark old tool_call and tool_result parts as compacted.
|
|
148
|
+
* Called after the compaction summary response is complete.
|
|
68
149
|
*
|
|
69
|
-
*
|
|
70
|
-
*
|
|
150
|
+
* Protects:
|
|
151
|
+
* - Last N tokens of tool results (PRUNE_PROTECT)
|
|
152
|
+
* - Last 2 user turns
|
|
153
|
+
* - Protected tool names (skill, etc.)
|
|
71
154
|
*/
|
|
72
|
-
export async function
|
|
155
|
+
export async function markSessionCompacted(
|
|
73
156
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
74
157
|
sessionId: string,
|
|
75
|
-
|
|
76
|
-
|
|
158
|
+
compactMessageId: string,
|
|
159
|
+
): Promise<{ compacted: number; saved: number }> {
|
|
160
|
+
debugLog(`[compaction] Marking session ${sessionId} as compacted`);
|
|
77
161
|
|
|
78
|
-
// Get
|
|
79
|
-
const
|
|
162
|
+
// Get the compact message to find the cutoff point
|
|
163
|
+
const compactMsg = await db
|
|
80
164
|
.select()
|
|
81
165
|
.from(messages)
|
|
82
|
-
.where(eq(messages.
|
|
166
|
+
.where(eq(messages.id, compactMessageId))
|
|
167
|
+
.limit(1);
|
|
168
|
+
|
|
169
|
+
if (!compactMsg.length) {
|
|
170
|
+
debugLog('[compaction] Compact message not found');
|
|
171
|
+
return { compacted: 0, saved: 0 };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const cutoffTime = compactMsg[0].createdAt;
|
|
175
|
+
|
|
176
|
+
// Get all messages before the compact command
|
|
177
|
+
const oldMessages = await db
|
|
178
|
+
.select()
|
|
179
|
+
.from(messages)
|
|
180
|
+
.where(
|
|
181
|
+
and(
|
|
182
|
+
eq(messages.sessionId, sessionId),
|
|
183
|
+
lt(messages.createdAt, cutoffTime),
|
|
184
|
+
),
|
|
185
|
+
)
|
|
83
186
|
.orderBy(desc(messages.createdAt));
|
|
84
187
|
|
|
85
188
|
let totalTokens = 0;
|
|
86
|
-
let
|
|
87
|
-
const
|
|
189
|
+
let compactedTokens = 0;
|
|
190
|
+
const toCompact: Array<{ id: string; content: string }> = [];
|
|
88
191
|
let turns = 0;
|
|
89
192
|
|
|
90
193
|
// Go backwards through messages
|
|
91
|
-
for (const msg of
|
|
194
|
+
for (const msg of oldMessages) {
|
|
92
195
|
// Count user messages as turns
|
|
93
196
|
if (msg.role === 'user') {
|
|
94
197
|
turns++;
|
|
@@ -105,31 +208,113 @@ export async function pruneSession(
|
|
|
105
208
|
.orderBy(desc(messageParts.index));
|
|
106
209
|
|
|
107
210
|
for (const part of parts) {
|
|
108
|
-
// Only
|
|
109
|
-
if (part.type !== 'tool_result') continue;
|
|
211
|
+
// Only compact tool_call and tool_result
|
|
212
|
+
if (part.type !== 'tool_call' && part.type !== 'tool_result') continue;
|
|
110
213
|
|
|
111
214
|
// Skip protected tools
|
|
112
|
-
if (part.toolName &&
|
|
215
|
+
if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) {
|
|
113
216
|
continue;
|
|
114
217
|
}
|
|
115
218
|
|
|
116
|
-
//
|
|
117
|
-
|
|
219
|
+
// Skip already compacted
|
|
220
|
+
if (part.compactedAt) continue;
|
|
221
|
+
|
|
222
|
+
// Parse content
|
|
223
|
+
let content: { result?: unknown; args?: unknown };
|
|
118
224
|
try {
|
|
119
225
|
content = JSON.parse(part.content ?? '{}');
|
|
120
226
|
} catch {
|
|
121
227
|
continue;
|
|
122
228
|
}
|
|
123
229
|
|
|
124
|
-
//
|
|
125
|
-
|
|
230
|
+
// Estimate tokens
|
|
231
|
+
const contentStr =
|
|
232
|
+
part.type === 'tool_result'
|
|
233
|
+
? typeof content.result === 'string'
|
|
234
|
+
? content.result
|
|
235
|
+
: JSON.stringify(content.result ?? '')
|
|
236
|
+
: JSON.stringify(content.args ?? '');
|
|
237
|
+
|
|
238
|
+
const estimate = estimateTokens(contentStr);
|
|
239
|
+
totalTokens += estimate;
|
|
240
|
+
|
|
241
|
+
// If we've exceeded the protection threshold, mark for compaction
|
|
242
|
+
if (totalTokens > PRUNE_PROTECT) {
|
|
243
|
+
compactedTokens += estimate;
|
|
244
|
+
toCompact.push({ id: part.id, content: part.content ?? '{}' });
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
debugLog(
|
|
250
|
+
`[compaction] Found ${toCompact.length} parts to compact, saving ~${compactedTokens} tokens`,
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
if (toCompact.length > 0) {
|
|
254
|
+
const compactedAt = Date.now();
|
|
255
|
+
|
|
256
|
+
for (const part of toCompact) {
|
|
257
|
+
try {
|
|
258
|
+
await db
|
|
259
|
+
.update(messageParts)
|
|
260
|
+
.set({ compactedAt })
|
|
261
|
+
.where(eq(messageParts.id, part.id));
|
|
262
|
+
} catch (err) {
|
|
126
263
|
debugLog(
|
|
127
|
-
`[compaction]
|
|
264
|
+
`[compaction] Failed to mark part ${part.id}: ${err instanceof Error ? err.message : String(err)}`,
|
|
128
265
|
);
|
|
129
|
-
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
debugLog(`[compaction] Marked ${toCompact.length} parts as compacted`);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return { compacted: toCompact.length, saved: compactedTokens };
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Legacy prune function - marks tool results as compacted.
|
|
277
|
+
* Used for automatic overflow-triggered compaction.
|
|
278
|
+
*/
|
|
279
|
+
export async function pruneSession(
|
|
280
|
+
db: Awaited<ReturnType<typeof getDb>>,
|
|
281
|
+
sessionId: string,
|
|
282
|
+
): Promise<{ pruned: number; saved: number }> {
|
|
283
|
+
debugLog(`[compaction] Auto-pruning session ${sessionId}`);
|
|
284
|
+
|
|
285
|
+
const allMessages = await db
|
|
286
|
+
.select()
|
|
287
|
+
.from(messages)
|
|
288
|
+
.where(eq(messages.sessionId, sessionId))
|
|
289
|
+
.orderBy(desc(messages.createdAt));
|
|
290
|
+
|
|
291
|
+
let totalTokens = 0;
|
|
292
|
+
let prunedTokens = 0;
|
|
293
|
+
const toPrune: Array<{ id: string }> = [];
|
|
294
|
+
let turns = 0;
|
|
295
|
+
|
|
296
|
+
for (const msg of allMessages) {
|
|
297
|
+
if (msg.role === 'user') turns++;
|
|
298
|
+
if (turns < 2) continue;
|
|
299
|
+
|
|
300
|
+
const parts = await db
|
|
301
|
+
.select()
|
|
302
|
+
.from(messageParts)
|
|
303
|
+
.where(eq(messageParts.messageId, msg.id))
|
|
304
|
+
.orderBy(desc(messageParts.index));
|
|
305
|
+
|
|
306
|
+
for (const part of parts) {
|
|
307
|
+
if (part.type !== 'tool_result') continue;
|
|
308
|
+
if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) continue;
|
|
309
|
+
if (part.compactedAt) continue;
|
|
310
|
+
|
|
311
|
+
let content: { result?: unknown };
|
|
312
|
+
try {
|
|
313
|
+
content = JSON.parse(part.content ?? '{}');
|
|
314
|
+
} catch {
|
|
315
|
+
continue;
|
|
130
316
|
}
|
|
131
317
|
|
|
132
|
-
// Estimate tokens for this result
|
|
133
318
|
const estimate = estimateTokens(
|
|
134
319
|
typeof content.result === 'string'
|
|
135
320
|
? content.result
|
|
@@ -137,118 +322,215 @@ export async function pruneSession(
|
|
|
137
322
|
);
|
|
138
323
|
totalTokens += estimate;
|
|
139
324
|
|
|
140
|
-
// If we've exceeded the protection threshold, mark for pruning
|
|
141
325
|
if (totalTokens > PRUNE_PROTECT) {
|
|
142
326
|
prunedTokens += estimate;
|
|
143
|
-
toPrune.push({ id: part.id
|
|
327
|
+
toPrune.push({ id: part.id });
|
|
144
328
|
}
|
|
145
329
|
}
|
|
146
330
|
}
|
|
147
331
|
|
|
148
|
-
|
|
149
|
-
`[compaction] Found ${toPrune.length} tool results to prune, saving ~${prunedTokens} tokens`,
|
|
150
|
-
);
|
|
151
|
-
|
|
152
|
-
// Only prune if we'd save enough tokens to be worthwhile
|
|
153
|
-
if (prunedTokens > PRUNE_MINIMUM) {
|
|
332
|
+
if (toPrune.length > 0) {
|
|
154
333
|
const compactedAt = Date.now();
|
|
155
|
-
|
|
156
334
|
for (const part of toPrune) {
|
|
157
335
|
try {
|
|
158
|
-
const content = JSON.parse(part.content);
|
|
159
|
-
// Keep the structure but mark as compacted
|
|
160
|
-
content.compactedAt = compactedAt;
|
|
161
|
-
// Keep a small summary if it was a string result
|
|
162
|
-
if (typeof content.result === 'string' && content.result.length > 100) {
|
|
163
|
-
content.resultSummary = `${content.result.slice(0, 100)}...`;
|
|
164
|
-
}
|
|
165
|
-
// Clear the actual result to save space
|
|
166
|
-
content.result = null;
|
|
167
|
-
|
|
168
336
|
await db
|
|
169
337
|
.update(messageParts)
|
|
170
|
-
.set({
|
|
338
|
+
.set({ compactedAt })
|
|
171
339
|
.where(eq(messageParts.id, part.id));
|
|
172
|
-
} catch
|
|
173
|
-
debugLog(
|
|
174
|
-
`[compaction] Failed to prune part ${part.id}: ${err instanceof Error ? err.message : String(err)}`,
|
|
175
|
-
);
|
|
176
|
-
}
|
|
340
|
+
} catch {}
|
|
177
341
|
}
|
|
178
|
-
|
|
179
|
-
debugLog(
|
|
180
|
-
`[compaction] Pruned ${toPrune.length} tool results, saved ~${prunedTokens} tokens`,
|
|
181
|
-
);
|
|
182
|
-
} else {
|
|
183
|
-
debugLog(
|
|
184
|
-
`[compaction] Skipping prune, would only save ${prunedTokens} tokens (min: ${PRUNE_MINIMUM})`,
|
|
185
|
-
);
|
|
186
342
|
}
|
|
187
343
|
|
|
188
344
|
return { pruned: toPrune.length, saved: prunedTokens };
|
|
189
345
|
}
|
|
190
346
|
|
|
347
|
+
/**
|
|
348
|
+
* Check if context is overflowing based on token usage and model limits.
|
|
349
|
+
*/
|
|
350
|
+
export function isOverflow(tokens: TokenUsage, limits: ModelLimits): boolean {
|
|
351
|
+
if (limits.context === 0) return false;
|
|
352
|
+
|
|
353
|
+
const count = tokens.input + (tokens.cacheRead ?? 0) + tokens.output;
|
|
354
|
+
const usableContext = limits.context - limits.output;
|
|
355
|
+
|
|
356
|
+
return count > usableContext;
|
|
357
|
+
}
|
|
358
|
+
|
|
191
359
|
/**
|
|
192
360
|
* Get model limits from provider catalog or use defaults.
|
|
193
361
|
*/
|
|
194
362
|
export function getModelLimits(
|
|
195
|
-
|
|
363
|
+
_provider: string,
|
|
196
364
|
model: string,
|
|
197
365
|
): ModelLimits | null {
|
|
198
|
-
// Default limits for common models
|
|
199
|
-
// These should ideally come from the provider catalog
|
|
200
366
|
const defaults: Record<string, ModelLimits> = {
|
|
201
|
-
// Anthropic
|
|
202
367
|
'claude-sonnet-4-20250514': { context: 200000, output: 16000 },
|
|
203
368
|
'claude-3-5-sonnet-20241022': { context: 200000, output: 8192 },
|
|
204
369
|
'claude-3-5-haiku-20241022': { context: 200000, output: 8192 },
|
|
205
|
-
'claude-3-opus-20240229': { context: 200000, output: 4096 },
|
|
206
|
-
// OpenAI
|
|
207
370
|
'gpt-4o': { context: 128000, output: 16384 },
|
|
208
371
|
'gpt-4o-mini': { context: 128000, output: 16384 },
|
|
209
|
-
'gpt-4-turbo': { context: 128000, output: 4096 },
|
|
210
372
|
o1: { context: 200000, output: 100000 },
|
|
211
|
-
'o1-mini': { context: 128000, output: 65536 },
|
|
212
|
-
'o1-pro': { context: 200000, output: 100000 },
|
|
213
373
|
'o3-mini': { context: 200000, output: 100000 },
|
|
214
|
-
// Google
|
|
215
374
|
'gemini-2.0-flash': { context: 1000000, output: 8192 },
|
|
216
375
|
'gemini-1.5-pro': { context: 2000000, output: 8192 },
|
|
217
|
-
'gemini-1.5-flash': { context: 1000000, output: 8192 },
|
|
218
376
|
};
|
|
219
377
|
|
|
220
|
-
|
|
221
|
-
if (defaults[model]) {
|
|
222
|
-
return defaults[model];
|
|
223
|
-
}
|
|
378
|
+
if (defaults[model]) return defaults[model];
|
|
224
379
|
|
|
225
|
-
// Try partial match (e.g., "claude-3-5-sonnet" matches "claude-3-5-sonnet-20241022")
|
|
226
380
|
for (const [key, limits] of Object.entries(defaults)) {
|
|
227
|
-
if (model.includes(key) || key.includes(model))
|
|
228
|
-
return limits;
|
|
229
|
-
}
|
|
381
|
+
if (model.includes(key) || key.includes(model)) return limits;
|
|
230
382
|
}
|
|
231
383
|
|
|
232
|
-
// Return null if no match - caller should handle
|
|
233
|
-
debugLog(
|
|
234
|
-
`[compaction] No model limits found for ${provider}/${model}, skipping overflow check`,
|
|
235
|
-
);
|
|
236
384
|
return null;
|
|
237
385
|
}
|
|
238
386
|
|
|
239
387
|
/**
|
|
240
|
-
* Check if a
|
|
388
|
+
* Check if a part is compacted.
|
|
241
389
|
*/
|
|
242
|
-
export function isCompacted(
|
|
243
|
-
|
|
244
|
-
const parsed = JSON.parse(content);
|
|
245
|
-
return !!parsed.compactedAt;
|
|
246
|
-
} catch {
|
|
247
|
-
return false;
|
|
248
|
-
}
|
|
390
|
+
export function isCompacted(part: { compactedAt?: number | null }): boolean {
|
|
391
|
+
return !!part.compactedAt;
|
|
249
392
|
}
|
|
250
393
|
|
|
394
|
+
export const COMPACTED_PLACEHOLDER = '[Compacted]';
|
|
395
|
+
|
|
251
396
|
/**
|
|
252
|
-
*
|
|
397
|
+
* Perform auto-compaction when context overflows.
|
|
398
|
+
* Streams the compaction summary (like /compact does), marks old parts as compacted.
|
|
399
|
+
* Returns info needed for caller to trigger a retry.
|
|
400
|
+
* Uses the session's model for consistency with /compact command.
|
|
253
401
|
*/
|
|
254
|
-
export
|
|
402
|
+
export async function performAutoCompaction(
|
|
403
|
+
db: Awaited<ReturnType<typeof getDb>>,
|
|
404
|
+
sessionId: string,
|
|
405
|
+
assistantMessageId: string,
|
|
406
|
+
publishFn: (event: {
|
|
407
|
+
type: string;
|
|
408
|
+
sessionId: string;
|
|
409
|
+
payload: Record<string, unknown>;
|
|
410
|
+
}) => void,
|
|
411
|
+
provider: string,
|
|
412
|
+
modelId: string,
|
|
413
|
+
): Promise<{
|
|
414
|
+
success: boolean;
|
|
415
|
+
summary?: string;
|
|
416
|
+
error?: string;
|
|
417
|
+
compactMessageId?: string;
|
|
418
|
+
}> {
|
|
419
|
+
debugLog(`[compaction] Starting auto-compaction for session ${sessionId}`);
|
|
420
|
+
|
|
421
|
+
try {
|
|
422
|
+
// 1. Get model limits and build compaction context
|
|
423
|
+
const limits = getModelLimits(provider, modelId);
|
|
424
|
+
// Use 50% of context window for compaction, minimum 15k tokens
|
|
425
|
+
const contextTokenLimit = limits
|
|
426
|
+
? Math.max(Math.floor(limits.context * 0.5), 15000)
|
|
427
|
+
: 15000;
|
|
428
|
+
debugLog(
|
|
429
|
+
`[compaction] Model ${modelId} context limit: ${limits?.context ?? 'unknown'}, using ${contextTokenLimit} tokens for compaction`,
|
|
430
|
+
);
|
|
431
|
+
|
|
432
|
+
const context = await buildCompactionContext(
|
|
433
|
+
db,
|
|
434
|
+
sessionId,
|
|
435
|
+
contextTokenLimit,
|
|
436
|
+
);
|
|
437
|
+
if (!context || context.length < 100) {
|
|
438
|
+
debugLog('[compaction] Not enough context to compact');
|
|
439
|
+
return { success: false, error: 'Not enough context to compact' };
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// 2. Stream the compaction summary
|
|
443
|
+
|
|
444
|
+
// Use the session's model for consistency
|
|
445
|
+
const cfg = await loadConfig();
|
|
446
|
+
debugLog(
|
|
447
|
+
`[compaction] Using session model ${provider}/${modelId} for auto-compaction`,
|
|
448
|
+
);
|
|
449
|
+
const model = await resolveModel(
|
|
450
|
+
provider as Parameters<typeof resolveModel>[0],
|
|
451
|
+
modelId,
|
|
452
|
+
cfg,
|
|
453
|
+
);
|
|
454
|
+
|
|
455
|
+
// Create a text part for the compaction summary (after model created successfully)
|
|
456
|
+
const compactPartId = crypto.randomUUID();
|
|
457
|
+
const now = Date.now();
|
|
458
|
+
|
|
459
|
+
await db.insert(messageParts).values({
|
|
460
|
+
id: compactPartId,
|
|
461
|
+
messageId: assistantMessageId,
|
|
462
|
+
index: 0,
|
|
463
|
+
stepIndex: 0,
|
|
464
|
+
type: 'text',
|
|
465
|
+
content: JSON.stringify({ text: '' }),
|
|
466
|
+
agent: 'system',
|
|
467
|
+
provider: provider,
|
|
468
|
+
model: modelId,
|
|
469
|
+
startedAt: now,
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
const prompt = getCompactionSystemPrompt();
|
|
473
|
+
const result = streamText({
|
|
474
|
+
model,
|
|
475
|
+
system: `${prompt}\n\nIMPORTANT: Generate a comprehensive summary. This will replace the detailed conversation history.`,
|
|
476
|
+
messages: [
|
|
477
|
+
{
|
|
478
|
+
role: 'user',
|
|
479
|
+
content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${context}\n</conversation-to-summarize>`,
|
|
480
|
+
},
|
|
481
|
+
],
|
|
482
|
+
maxTokens: 2000,
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
// Stream the summary
|
|
486
|
+
let summary = '';
|
|
487
|
+
for await (const chunk of result.textStream) {
|
|
488
|
+
summary += chunk;
|
|
489
|
+
|
|
490
|
+
// Publish delta event so UI updates in real-time
|
|
491
|
+
publishFn({
|
|
492
|
+
type: 'message.part.delta',
|
|
493
|
+
sessionId,
|
|
494
|
+
payload: {
|
|
495
|
+
messageId: assistantMessageId,
|
|
496
|
+
partId: compactPartId,
|
|
497
|
+
stepIndex: 0,
|
|
498
|
+
type: 'text',
|
|
499
|
+
delta: chunk,
|
|
500
|
+
},
|
|
501
|
+
});
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// Update the part with final content
|
|
505
|
+
await db
|
|
506
|
+
.update(messageParts)
|
|
507
|
+
.set({
|
|
508
|
+
content: JSON.stringify({ text: summary }),
|
|
509
|
+
completedAt: Date.now(),
|
|
510
|
+
})
|
|
511
|
+
.where(eq(messageParts.id, compactPartId));
|
|
512
|
+
|
|
513
|
+
if (!summary || summary.length < 50) {
|
|
514
|
+
debugLog('[compaction] Failed to generate summary');
|
|
515
|
+
return { success: false, error: 'Failed to generate summary' };
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
debugLog(`[compaction] Generated summary: ${summary.slice(0, 100)}...`);
|
|
519
|
+
|
|
520
|
+
// 3. Mark old parts as compacted (using the assistant message as the cutoff)
|
|
521
|
+
const compactResult = await markSessionCompacted(
|
|
522
|
+
db,
|
|
523
|
+
sessionId,
|
|
524
|
+
assistantMessageId,
|
|
525
|
+
);
|
|
526
|
+
debugLog(
|
|
527
|
+
`[compaction] Marked ${compactResult.compacted} parts as compacted, saved ~${compactResult.saved} tokens`,
|
|
528
|
+
);
|
|
529
|
+
|
|
530
|
+
return { success: true, summary, compactMessageId: assistantMessageId };
|
|
531
|
+
} catch (err) {
|
|
532
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
533
|
+
debugLog(`[compaction] Auto-compaction failed: ${errorMsg}`);
|
|
534
|
+
return { success: false, error: errorMsg };
|
|
535
|
+
}
|
|
536
|
+
}
|
|
@@ -4,7 +4,6 @@ import { messages, messageParts } from '@agi-cli/database/schema';
|
|
|
4
4
|
import { eq, asc } from 'drizzle-orm';
|
|
5
5
|
import { debugLog } from './debug.ts';
|
|
6
6
|
import { ToolHistoryTracker } from './history/tool-history-tracker.ts';
|
|
7
|
-
import { COMPACTED_PLACEHOLDER } from './compaction.ts';
|
|
8
7
|
|
|
9
8
|
/**
|
|
10
9
|
* Builds the conversation history for a session from the database,
|
|
@@ -89,6 +88,9 @@ export async function buildHistoryMessages(
|
|
|
89
88
|
if (t) assistantParts.push({ type: 'text', text: t });
|
|
90
89
|
} catch {}
|
|
91
90
|
} else if (p.type === 'tool_call') {
|
|
91
|
+
// Skip compacted tool calls entirely
|
|
92
|
+
if (p.compactedAt) continue;
|
|
93
|
+
|
|
92
94
|
try {
|
|
93
95
|
const obj = JSON.parse(p.content ?? '{}') as {
|
|
94
96
|
name?: string;
|
|
@@ -104,22 +106,20 @@ export async function buildHistoryMessages(
|
|
|
104
106
|
}
|
|
105
107
|
} catch {}
|
|
106
108
|
} else if (p.type === 'tool_result') {
|
|
109
|
+
// Skip compacted tool results entirely
|
|
110
|
+
if (p.compactedAt) continue;
|
|
111
|
+
|
|
107
112
|
try {
|
|
108
113
|
const obj = JSON.parse(p.content ?? '{}') as {
|
|
109
114
|
name?: string;
|
|
110
115
|
callId?: string;
|
|
111
116
|
result?: unknown;
|
|
112
|
-
compactedAt?: number;
|
|
113
117
|
};
|
|
114
118
|
if (obj.callId) {
|
|
115
|
-
// If this tool result was compacted, return placeholder instead
|
|
116
|
-
const result = obj.compactedAt
|
|
117
|
-
? COMPACTED_PLACEHOLDER
|
|
118
|
-
: obj.result;
|
|
119
119
|
toolResults.push({
|
|
120
120
|
name: obj.name ?? 'tool',
|
|
121
121
|
callId: obj.callId,
|
|
122
|
-
result,
|
|
122
|
+
result: obj.result,
|
|
123
123
|
});
|
|
124
124
|
}
|
|
125
125
|
} catch {}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { generateText } from 'ai';
|
|
1
|
+
import { generateText, streamText } from 'ai';
|
|
2
2
|
import { eq } from 'drizzle-orm';
|
|
3
3
|
import type { AGIConfig } from '@agi-cli/sdk';
|
|
4
4
|
import type { DB } from '@agi-cli/database';
|
|
@@ -9,6 +9,7 @@ import { runSessionLoop } from './runner.ts';
|
|
|
9
9
|
import { resolveModel } from './provider.ts';
|
|
10
10
|
import { getFastModel, type ProviderId } from '@agi-cli/sdk';
|
|
11
11
|
import { debugLog } from './debug.ts';
|
|
12
|
+
import { isCompactCommand, buildCompactionContext } from './compaction.ts';
|
|
12
13
|
|
|
13
14
|
type SessionRow = typeof sessions.$inferSelect;
|
|
14
15
|
|
|
@@ -119,6 +120,28 @@ export async function dispatchAssistantMessage(
|
|
|
119
120
|
`[MESSAGE_SERVICE] Enqueuing assistant run with userContext: ${userContext ? `${userContext.substring(0, 50)}...` : 'NONE'}`,
|
|
120
121
|
);
|
|
121
122
|
|
|
123
|
+
// Detect /compact command and build context with model-aware limits
|
|
124
|
+
const isCompact = isCompactCommand(content);
|
|
125
|
+
let compactionContext: string | undefined;
|
|
126
|
+
|
|
127
|
+
if (isCompact) {
|
|
128
|
+
debugLog('[MESSAGE_SERVICE] Detected /compact command, building context');
|
|
129
|
+
const { getModelLimits } = await import('./compaction.ts');
|
|
130
|
+
const limits = getModelLimits(provider, model);
|
|
131
|
+
// Use 50% of context window for compaction, minimum 15k tokens
|
|
132
|
+
const contextTokenLimit = limits
|
|
133
|
+
? Math.max(Math.floor(limits.context * 0.5), 15000)
|
|
134
|
+
: 15000;
|
|
135
|
+
compactionContext = await buildCompactionContext(
|
|
136
|
+
db,
|
|
137
|
+
sessionId,
|
|
138
|
+
contextTokenLimit,
|
|
139
|
+
);
|
|
140
|
+
debugLog(
|
|
141
|
+
`[message-service] /compact context length: ${compactionContext.length}, limit: ${contextTokenLimit} tokens`,
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
|
|
122
145
|
enqueueAssistantRun(
|
|
123
146
|
{
|
|
124
147
|
sessionId,
|
|
@@ -130,6 +153,8 @@ export async function dispatchAssistantMessage(
|
|
|
130
153
|
oneShot: Boolean(oneShot),
|
|
131
154
|
userContext,
|
|
132
155
|
reasoning,
|
|
156
|
+
isCompactCommand: isCompact,
|
|
157
|
+
compactionContext,
|
|
133
158
|
},
|
|
134
159
|
runSessionLoop,
|
|
135
160
|
);
|
|
@@ -240,7 +265,11 @@ async function generateSessionTitle(args: {
|
|
|
240
265
|
|
|
241
266
|
// Use a smaller, faster model for title generation
|
|
242
267
|
// Look up the cheapest/fastest model from the catalog for this provider
|
|
243
|
-
|
|
268
|
+
// For OpenAI OAuth, use codex-mini as it works with ChatGPT backend
|
|
269
|
+
const titleModel =
|
|
270
|
+
needsSpoof && provider === 'openai'
|
|
271
|
+
? 'gpt-5.1-codex-mini'
|
|
272
|
+
: (getFastModel(provider) ?? modelName);
|
|
244
273
|
debugLog(`[TITLE_GEN] Using title model: ${titleModel}`);
|
|
245
274
|
const model = await resolveModel(provider, titleModel, cfg);
|
|
246
275
|
|
|
@@ -291,15 +320,29 @@ async function generateSessionTitle(args: {
|
|
|
291
320
|
);
|
|
292
321
|
}
|
|
293
322
|
|
|
294
|
-
debugLog('[TITLE_GEN] Calling generateText...');
|
|
295
323
|
let modelTitle = '';
|
|
296
324
|
try {
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
325
|
+
// ChatGPT backend requires streaming - use streamText for OAuth
|
|
326
|
+
if (needsSpoof) {
|
|
327
|
+
debugLog('[TITLE_GEN] Using streamText for OAuth...');
|
|
328
|
+
const result = streamText({
|
|
329
|
+
model,
|
|
330
|
+
system,
|
|
331
|
+
messages: messagesArray,
|
|
332
|
+
});
|
|
333
|
+
for await (const chunk of result.textStream) {
|
|
334
|
+
modelTitle += chunk;
|
|
335
|
+
}
|
|
336
|
+
modelTitle = modelTitle.trim();
|
|
337
|
+
} else {
|
|
338
|
+
debugLog('[TITLE_GEN] Using generateText...');
|
|
339
|
+
const out = await generateText({
|
|
340
|
+
model,
|
|
341
|
+
system,
|
|
342
|
+
messages: messagesArray,
|
|
343
|
+
});
|
|
344
|
+
modelTitle = (out?.text || '').trim();
|
|
345
|
+
}
|
|
303
346
|
|
|
304
347
|
debugLog('[TITLE_GEN] Raw response from model:');
|
|
305
348
|
debugLog(`[TITLE_GEN] "${modelTitle}"`);
|
package/src/runtime/prompt.ts
CHANGED
|
@@ -28,6 +28,7 @@ export async function composeSystemPrompt(options: {
|
|
|
28
28
|
includeEnvironment?: boolean;
|
|
29
29
|
includeProjectTree?: boolean;
|
|
30
30
|
userContext?: string;
|
|
31
|
+
contextSummary?: string;
|
|
31
32
|
}): Promise<ComposedSystemPrompt> {
|
|
32
33
|
const components: string[] = [];
|
|
33
34
|
if (options.spoofPrompt) {
|
|
@@ -105,6 +106,19 @@ export async function composeSystemPrompt(options: {
|
|
|
105
106
|
components.push('user-context');
|
|
106
107
|
}
|
|
107
108
|
|
|
109
|
+
// Add compacted conversation summary if present
|
|
110
|
+
if (options.contextSummary?.trim()) {
|
|
111
|
+
const summaryBlock = [
|
|
112
|
+
'<compacted-conversation-summary>',
|
|
113
|
+
'The conversation was compacted to save context. Here is a summary of the previous context:',
|
|
114
|
+
'',
|
|
115
|
+
options.contextSummary.trim(),
|
|
116
|
+
'</compacted-conversation-summary>',
|
|
117
|
+
].join('\n');
|
|
118
|
+
parts.push(summaryBlock);
|
|
119
|
+
components.push('context-summary');
|
|
120
|
+
}
|
|
121
|
+
|
|
108
122
|
// Add terminal context if available
|
|
109
123
|
const terminalManager = getTerminalManager();
|
|
110
124
|
if (terminalManager) {
|
package/src/runtime/runner.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { hasToolCall, streamText } from 'ai';
|
|
2
2
|
import { loadConfig } from '@agi-cli/sdk';
|
|
3
3
|
import { getDb } from '@agi-cli/database';
|
|
4
|
-
import { messageParts } from '@agi-cli/database/schema';
|
|
4
|
+
import { messageParts, sessions } from '@agi-cli/database/schema';
|
|
5
5
|
import { eq } from 'drizzle-orm';
|
|
6
6
|
import { resolveModel } from './provider.ts';
|
|
7
7
|
import { resolveAgentConfig } from './agent-registry.ts';
|
|
@@ -32,6 +32,7 @@ import {
|
|
|
32
32
|
createAbortHandler,
|
|
33
33
|
createFinishHandler,
|
|
34
34
|
} from './stream-handlers.ts';
|
|
35
|
+
import { getCompactionSystemPrompt, pruneSession } from './compaction.ts';
|
|
35
36
|
|
|
36
37
|
export { enqueueAssistantRun, abortSession } from './session-queue.ts';
|
|
37
38
|
export { getRunnerState } from './session-queue.ts';
|
|
@@ -78,10 +79,30 @@ async function runAssistant(opts: RunOpts) {
|
|
|
78
79
|
|
|
79
80
|
const agentPrompt = agentCfg.prompt || '';
|
|
80
81
|
|
|
82
|
+
// For /compact command, use minimal history - the compaction context has everything needed
|
|
81
83
|
const historyTimer = time('runner:buildHistory');
|
|
82
|
-
|
|
84
|
+
let history: Awaited<ReturnType<typeof buildHistoryMessages>>;
|
|
85
|
+
if (opts.isCompactCommand && opts.compactionContext) {
|
|
86
|
+
debugLog('[RUNNER] Using minimal history for /compact command');
|
|
87
|
+
history = [];
|
|
88
|
+
} else {
|
|
89
|
+
history = await buildHistoryMessages(db, opts.sessionId);
|
|
90
|
+
}
|
|
83
91
|
historyTimer.end({ messages: history.length });
|
|
84
92
|
|
|
93
|
+
// Fetch session to get context summary for compaction
|
|
94
|
+
const sessionRows = await db
|
|
95
|
+
.select()
|
|
96
|
+
.from(sessions)
|
|
97
|
+
.where(eq(sessions.id, opts.sessionId))
|
|
98
|
+
.limit(1);
|
|
99
|
+
const contextSummary = sessionRows[0]?.contextSummary ?? undefined;
|
|
100
|
+
if (contextSummary) {
|
|
101
|
+
debugLog(
|
|
102
|
+
`[RUNNER] Using context summary from compaction (${contextSummary.length} chars)`,
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
|
|
85
106
|
// FIX: For OAuth, we need to check if this is the first ASSISTANT message
|
|
86
107
|
// The user message is already in history by this point, so history.length will be > 0
|
|
87
108
|
// We need to add additionalSystemMessages on the first assistant turn
|
|
@@ -127,6 +148,7 @@ async function runAssistant(opts: RunOpts) {
|
|
|
127
148
|
spoofPrompt: undefined,
|
|
128
149
|
includeProjectTree: isFirstMessage,
|
|
129
150
|
userContext: opts.userContext,
|
|
151
|
+
contextSummary,
|
|
130
152
|
});
|
|
131
153
|
oauthFullPromptComponents = fullPrompt.components;
|
|
132
154
|
|
|
@@ -157,6 +179,7 @@ async function runAssistant(opts: RunOpts) {
|
|
|
157
179
|
spoofPrompt: undefined,
|
|
158
180
|
includeProjectTree: isFirstMessage,
|
|
159
181
|
userContext: opts.userContext,
|
|
182
|
+
contextSummary,
|
|
160
183
|
});
|
|
161
184
|
system = composed.prompt;
|
|
162
185
|
systemComponents = composed.components;
|
|
@@ -169,6 +192,23 @@ async function runAssistant(opts: RunOpts) {
|
|
|
169
192
|
})}`,
|
|
170
193
|
);
|
|
171
194
|
|
|
195
|
+
// Inject compaction prompt if this is a /compact command
|
|
196
|
+
if (opts.isCompactCommand && opts.compactionContext) {
|
|
197
|
+
debugLog('[RUNNER] Injecting compaction context for /compact command');
|
|
198
|
+
const compactPrompt = getCompactionSystemPrompt();
|
|
199
|
+
// Add compaction instructions as system message
|
|
200
|
+
// Don't modify `system` directly as it may contain OAuth spoof prompt
|
|
201
|
+
additionalSystemMessages.push({
|
|
202
|
+
role: 'system',
|
|
203
|
+
content: compactPrompt,
|
|
204
|
+
});
|
|
205
|
+
// Add the conversation context as a USER message (Anthropic requires at least one user message)
|
|
206
|
+
additionalSystemMessages.push({
|
|
207
|
+
role: 'user',
|
|
208
|
+
content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${opts.compactionContext}\n</conversation-to-summarize>`,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
|
|
172
212
|
const toolsTimer = time('runner:discoverTools');
|
|
173
213
|
const allTools = await discoverProjectTools(cfg.projectRoot);
|
|
174
214
|
toolsTimer.end({ count: allTools.length });
|
|
@@ -286,7 +326,13 @@ async function runAssistant(opts: RunOpts) {
|
|
|
286
326
|
updateMessageTokensIncremental,
|
|
287
327
|
);
|
|
288
328
|
|
|
289
|
-
const onError = createErrorHandler(
|
|
329
|
+
const onError = createErrorHandler(
|
|
330
|
+
opts,
|
|
331
|
+
db,
|
|
332
|
+
getStepIndex,
|
|
333
|
+
sharedCtx,
|
|
334
|
+
runSessionLoop,
|
|
335
|
+
);
|
|
290
336
|
|
|
291
337
|
const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
|
|
292
338
|
|
|
@@ -491,6 +537,67 @@ async function runAssistant(opts: RunOpts) {
|
|
|
491
537
|
} catch (err) {
|
|
492
538
|
unsubscribeFinish();
|
|
493
539
|
const payload = toErrorPayload(err);
|
|
540
|
+
|
|
541
|
+
// Check if this is a "prompt too long" error and auto-compact
|
|
542
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
543
|
+
const errorCode = (err as { code?: string })?.code ?? '';
|
|
544
|
+
const responseBody = (err as { responseBody?: string })?.responseBody ?? '';
|
|
545
|
+
const apiErrorType = (err as { apiErrorType?: string })?.apiErrorType ?? '';
|
|
546
|
+
const combinedError = `${errorMessage} ${responseBody}`.toLowerCase();
|
|
547
|
+
debugLog(`[RUNNER] Error caught - message: ${errorMessage.slice(0, 100)}`);
|
|
548
|
+
debugLog(
|
|
549
|
+
`[RUNNER] Error caught - code: ${errorCode}, apiErrorType: ${apiErrorType}`,
|
|
550
|
+
);
|
|
551
|
+
debugLog(
|
|
552
|
+
`[RUNNER] Error caught - responseBody: ${responseBody.slice(0, 200)}`,
|
|
553
|
+
);
|
|
554
|
+
const isPromptTooLong =
|
|
555
|
+
combinedError.includes('prompt is too long') ||
|
|
556
|
+
combinedError.includes('maximum context length') ||
|
|
557
|
+
combinedError.includes('too many tokens') ||
|
|
558
|
+
combinedError.includes('context_length_exceeded') ||
|
|
559
|
+
combinedError.includes('request too large') ||
|
|
560
|
+
combinedError.includes('exceeds the model') ||
|
|
561
|
+
combinedError.includes('input is too long') ||
|
|
562
|
+
errorCode === 'context_length_exceeded' ||
|
|
563
|
+
apiErrorType === 'invalid_request_error';
|
|
564
|
+
debugLog(
|
|
565
|
+
`[RUNNER] isPromptTooLong: ${isPromptTooLong}, isCompactCommand: ${opts.isCompactCommand}`,
|
|
566
|
+
);
|
|
567
|
+
|
|
568
|
+
if (isPromptTooLong && !opts.isCompactCommand) {
|
|
569
|
+
debugLog(
|
|
570
|
+
'[RUNNER] Prompt too long - auto-compacting and will retry on next user message',
|
|
571
|
+
);
|
|
572
|
+
try {
|
|
573
|
+
const pruneResult = await pruneSession(db, opts.sessionId);
|
|
574
|
+
debugLog(
|
|
575
|
+
`[RUNNER] Auto-pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
|
|
576
|
+
);
|
|
577
|
+
|
|
578
|
+
// Publish a system message to inform the user
|
|
579
|
+
publish({
|
|
580
|
+
type: 'error',
|
|
581
|
+
sessionId: opts.sessionId,
|
|
582
|
+
payload: {
|
|
583
|
+
...payload,
|
|
584
|
+
message: `Context too large (${errorMessage.match(/\d+/)?.[0] || 'many'} tokens). Auto-compacted old tool results. Please retry your message.`,
|
|
585
|
+
name: 'ContextOverflow',
|
|
586
|
+
},
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
// Complete the message as failed
|
|
590
|
+
try {
|
|
591
|
+
await completeAssistantMessage({}, opts, db);
|
|
592
|
+
} catch {}
|
|
593
|
+
return;
|
|
594
|
+
} catch (pruneErr) {
|
|
595
|
+
debugLog(
|
|
596
|
+
`[RUNNER] Auto-prune failed: ${pruneErr instanceof Error ? pruneErr.message : String(pruneErr)}`,
|
|
597
|
+
);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
494
601
|
debugLog(`[RUNNER] Error during stream: ${payload.message}`);
|
|
495
602
|
debugLog(
|
|
496
603
|
`[RUNNER] Error stack: ${err instanceof Error ? err.stack : 'no stack'}`,
|
|
@@ -13,8 +13,11 @@ import {
|
|
|
13
13
|
isOverflow,
|
|
14
14
|
getModelLimits,
|
|
15
15
|
type TokenUsage,
|
|
16
|
+
markSessionCompacted,
|
|
17
|
+
performAutoCompaction,
|
|
16
18
|
} from './compaction.ts';
|
|
17
19
|
import { debugLog } from './debug.ts';
|
|
20
|
+
import { enqueueAssistantRun } from './session-queue.ts';
|
|
18
21
|
|
|
19
22
|
type StepFinishEvent = {
|
|
20
23
|
usage?: UsageData;
|
|
@@ -131,14 +134,143 @@ export function createErrorHandler(
|
|
|
131
134
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
132
135
|
getStepIndex: () => number,
|
|
133
136
|
sharedCtx: ToolAdapterContext,
|
|
137
|
+
retryCallback?: (sessionId: string) => Promise<void>,
|
|
134
138
|
) {
|
|
135
139
|
return async (err: unknown) => {
|
|
136
140
|
const errorPayload = toErrorPayload(err);
|
|
137
141
|
const isApiError = APICallError.isInstance(err);
|
|
138
142
|
const stepIndex = getStepIndex();
|
|
139
143
|
|
|
144
|
+
// Check if this is a prompt-too-long error and auto-compact
|
|
145
|
+
// Handle nested error structures from AI SDK
|
|
146
|
+
const errObj = err as Record<string, unknown>;
|
|
147
|
+
const nestedError = (errObj?.error as Record<string, unknown>)?.error as
|
|
148
|
+
| Record<string, unknown>
|
|
149
|
+
| undefined;
|
|
150
|
+
const errorCode =
|
|
151
|
+
(errObj?.code as string) ?? (nestedError?.code as string) ?? '';
|
|
152
|
+
const errorType =
|
|
153
|
+
(errObj?.apiErrorType as string) ?? (nestedError?.type as string) ?? '';
|
|
154
|
+
const fullErrorStr = JSON.stringify(err).toLowerCase();
|
|
155
|
+
|
|
156
|
+
const isPromptTooLong =
|
|
157
|
+
fullErrorStr.includes('prompt is too long') ||
|
|
158
|
+
fullErrorStr.includes('maximum context length') ||
|
|
159
|
+
fullErrorStr.includes('too many tokens') ||
|
|
160
|
+
fullErrorStr.includes('context_length_exceeded') ||
|
|
161
|
+
fullErrorStr.includes('request too large') ||
|
|
162
|
+
fullErrorStr.includes('exceeds the model') ||
|
|
163
|
+
fullErrorStr.includes('context window') ||
|
|
164
|
+
fullErrorStr.includes('input is too long') ||
|
|
165
|
+
errorCode === 'context_length_exceeded' ||
|
|
166
|
+
errorType === 'invalid_request_error';
|
|
167
|
+
|
|
168
|
+
debugLog(
|
|
169
|
+
`[stream-handlers] isPromptTooLong: ${isPromptTooLong}, errorCode: ${errorCode}, errorType: ${errorType}`,
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
if (isPromptTooLong && !opts.isCompactCommand) {
|
|
173
|
+
debugLog(
|
|
174
|
+
'[stream-handlers] Prompt too long detected, auto-compacting...',
|
|
175
|
+
);
|
|
176
|
+
let compactionSucceeded = false;
|
|
177
|
+
try {
|
|
178
|
+
// Stream the compaction summary with proper publish function
|
|
179
|
+
const compactResult = await performAutoCompaction(
|
|
180
|
+
db,
|
|
181
|
+
opts.sessionId,
|
|
182
|
+
opts.assistantMessageId,
|
|
183
|
+
publish,
|
|
184
|
+
opts.provider,
|
|
185
|
+
opts.model,
|
|
186
|
+
);
|
|
187
|
+
if (compactResult.success) {
|
|
188
|
+
debugLog(
|
|
189
|
+
`[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
|
|
190
|
+
);
|
|
191
|
+
compactionSucceeded = true;
|
|
192
|
+
} else {
|
|
193
|
+
debugLog(
|
|
194
|
+
`[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
|
|
195
|
+
);
|
|
196
|
+
// Fall back to simple prune
|
|
197
|
+
const pruneResult = await pruneSession(db, opts.sessionId);
|
|
198
|
+
debugLog(
|
|
199
|
+
`[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
|
|
200
|
+
);
|
|
201
|
+
compactionSucceeded = pruneResult.pruned > 0;
|
|
202
|
+
}
|
|
203
|
+
} catch (compactErr) {
|
|
204
|
+
debugLog(
|
|
205
|
+
`[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// If compaction succeeded, complete this message and trigger retry
|
|
210
|
+
if (compactionSucceeded) {
|
|
211
|
+
// Mark this compaction message as completed
|
|
212
|
+
await db
|
|
213
|
+
.update(messages)
|
|
214
|
+
.set({
|
|
215
|
+
status: 'completed',
|
|
216
|
+
})
|
|
217
|
+
.where(eq(messages.id, opts.assistantMessageId));
|
|
218
|
+
|
|
219
|
+
// Publish completion event for the compaction message
|
|
220
|
+
publish({
|
|
221
|
+
type: 'message.completed',
|
|
222
|
+
sessionId: opts.sessionId,
|
|
223
|
+
payload: {
|
|
224
|
+
id: opts.assistantMessageId,
|
|
225
|
+
autoCompacted: true,
|
|
226
|
+
},
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
// Trigger retry - create a new assistant message and enqueue the run
|
|
230
|
+
if (retryCallback) {
|
|
231
|
+
debugLog('[stream-handlers] Triggering retry after compaction...');
|
|
232
|
+
const newAssistantMessageId = crypto.randomUUID();
|
|
233
|
+
await db.insert(messages).values({
|
|
234
|
+
id: newAssistantMessageId,
|
|
235
|
+
sessionId: opts.sessionId,
|
|
236
|
+
role: 'assistant',
|
|
237
|
+
status: 'pending',
|
|
238
|
+
agent: opts.agent,
|
|
239
|
+
provider: opts.provider,
|
|
240
|
+
model: opts.model,
|
|
241
|
+
createdAt: Date.now(),
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
publish({
|
|
245
|
+
type: 'message.created',
|
|
246
|
+
sessionId: opts.sessionId,
|
|
247
|
+
payload: { id: newAssistantMessageId, role: 'assistant' },
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// Enqueue the retry with the new assistant message
|
|
251
|
+
enqueueAssistantRun(
|
|
252
|
+
{
|
|
253
|
+
...opts,
|
|
254
|
+
assistantMessageId: newAssistantMessageId,
|
|
255
|
+
},
|
|
256
|
+
retryCallback,
|
|
257
|
+
);
|
|
258
|
+
} else {
|
|
259
|
+
debugLog(
|
|
260
|
+
'[stream-handlers] No retryCallback provided, cannot auto-retry',
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return; // Don't show error, compaction and retry handled it
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
140
268
|
// Create error part for UI display
|
|
141
269
|
const errorPartId = crypto.randomUUID();
|
|
270
|
+
const displayMessage =
|
|
271
|
+
isPromptTooLong && !opts.isCompactCommand
|
|
272
|
+
? `${errorPayload.message}. Context auto-compacted - please retry your message.`
|
|
273
|
+
: errorPayload.message;
|
|
142
274
|
await db.insert(messageParts).values({
|
|
143
275
|
id: errorPartId,
|
|
144
276
|
messageId: opts.assistantMessageId,
|
|
@@ -146,7 +278,7 @@ export function createErrorHandler(
|
|
|
146
278
|
stepIndex,
|
|
147
279
|
type: 'error',
|
|
148
280
|
content: JSON.stringify({
|
|
149
|
-
message:
|
|
281
|
+
message: displayMessage,
|
|
150
282
|
type: errorPayload.type,
|
|
151
283
|
details: errorPayload.details,
|
|
152
284
|
isAborted: false,
|
|
@@ -163,11 +295,12 @@ export function createErrorHandler(
|
|
|
163
295
|
.update(messages)
|
|
164
296
|
.set({
|
|
165
297
|
status: 'error',
|
|
166
|
-
error:
|
|
298
|
+
error: displayMessage,
|
|
167
299
|
errorType: errorPayload.type,
|
|
168
300
|
errorDetails: JSON.stringify({
|
|
169
301
|
...errorPayload.details,
|
|
170
302
|
isApiError,
|
|
303
|
+
autoCompacted: isPromptTooLong && !opts.isCompactCommand,
|
|
171
304
|
}),
|
|
172
305
|
isAborted: false,
|
|
173
306
|
})
|
|
@@ -180,10 +313,11 @@ export function createErrorHandler(
|
|
|
180
313
|
payload: {
|
|
181
314
|
messageId: opts.assistantMessageId,
|
|
182
315
|
partId: errorPartId,
|
|
183
|
-
error:
|
|
316
|
+
error: displayMessage,
|
|
184
317
|
errorType: errorPayload.type,
|
|
185
318
|
details: errorPayload.details,
|
|
186
319
|
isAborted: false,
|
|
320
|
+
autoCompacted: isPromptTooLong && !opts.isCompactCommand,
|
|
187
321
|
},
|
|
188
322
|
});
|
|
189
323
|
};
|
|
@@ -273,6 +407,43 @@ export function createFinishHandler(
|
|
|
273
407
|
await completeAssistantMessageFn(fin, opts, db);
|
|
274
408
|
} catch {}
|
|
275
409
|
|
|
410
|
+
// If this was a /compact command, mark old parts as compacted
|
|
411
|
+
// Only mark as compacted if the response was successful and has content
|
|
412
|
+
if (opts.isCompactCommand && fin.finishReason !== 'error') {
|
|
413
|
+
// Verify the assistant actually generated text content (the summary)
|
|
414
|
+
const assistantParts = await db
|
|
415
|
+
.select()
|
|
416
|
+
.from(messageParts)
|
|
417
|
+
.where(eq(messageParts.messageId, opts.assistantMessageId));
|
|
418
|
+
const hasTextContent = assistantParts.some(
|
|
419
|
+
(p) => p.type === 'text' && p.content && p.content !== '{"text":""}',
|
|
420
|
+
);
|
|
421
|
+
|
|
422
|
+
if (!hasTextContent) {
|
|
423
|
+
debugLog(
|
|
424
|
+
'[stream-handlers] /compact finished but no summary generated, skipping compaction marking',
|
|
425
|
+
);
|
|
426
|
+
} else {
|
|
427
|
+
try {
|
|
428
|
+
debugLog(
|
|
429
|
+
`[stream-handlers] /compact complete, marking session compacted`,
|
|
430
|
+
);
|
|
431
|
+
const result = await markSessionCompacted(
|
|
432
|
+
db,
|
|
433
|
+
opts.sessionId,
|
|
434
|
+
opts.assistantMessageId,
|
|
435
|
+
);
|
|
436
|
+
debugLog(
|
|
437
|
+
`[stream-handlers] Compacted ${result.compacted} parts, saved ~${result.saved} tokens`,
|
|
438
|
+
);
|
|
439
|
+
} catch (err) {
|
|
440
|
+
debugLog(
|
|
441
|
+
`[stream-handlers] Compaction failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
442
|
+
);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
276
447
|
// Use session totals from DB for accurate cost calculation
|
|
277
448
|
const sessRows = await db
|
|
278
449
|
.select()
|