@agi-cli/server 0.1.160 → 0.1.162
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/runtime/agent/runner-setup.ts +1 -0
- package/src/runtime/agent/runner.ts +8 -2
- package/src/runtime/message/compaction-context.ts +60 -23
- package/src/runtime/message/compaction-detect.ts +13 -6
- package/src/runtime/message/compaction-limits.ts +14 -42
- package/src/runtime/message/compaction-mark.ts +23 -27
- package/src/runtime/message/compaction.ts +0 -2
- package/src/runtime/session/branch.ts +1 -0
- package/src/runtime/session/db-operations.ts +50 -110
- package/src/runtime/session/manager.ts +1 -0
- package/src/runtime/session/queue.ts +1 -0
- package/src/runtime/stream/error-handler.ts +87 -47
- package/src/runtime/stream/finish-handler.ts +1 -38
- package/src/runtime/context/cache-optimizer.ts +0 -134
- package/src/runtime/context/optimizer.ts +0 -206
- package/src/runtime/message/history-truncator.ts +0 -26
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agi-cli/server",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.162",
|
|
4
4
|
"description": "HTTP API server for AGI CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
"typecheck": "tsc --noEmit"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@agi-cli/sdk": "0.1.
|
|
33
|
-
"@agi-cli/database": "0.1.
|
|
32
|
+
"@agi-cli/sdk": "0.1.162",
|
|
33
|
+
"@agi-cli/database": "0.1.162",
|
|
34
34
|
"drizzle-orm": "^0.44.5",
|
|
35
35
|
"hono": "^4.9.9",
|
|
36
36
|
"zod": "^4.1.8"
|
|
@@ -237,6 +237,7 @@ export async function setupRunner(opts: RunOpts): Promise<SetupResult> {
|
|
|
237
237
|
}
|
|
238
238
|
} else if (underlyingProvider === 'openai') {
|
|
239
239
|
providerOptions.openai = {
|
|
240
|
+
reasoningEffort: 'high',
|
|
240
241
|
reasoningSummary: 'auto',
|
|
241
242
|
};
|
|
242
243
|
} else if (underlyingProvider === 'google') {
|
|
@@ -187,6 +187,13 @@ async function runAssistant(opts: RunOpts) {
|
|
|
187
187
|
if (part.type === 'text-delta') {
|
|
188
188
|
const delta = part.text;
|
|
189
189
|
if (!delta) continue;
|
|
190
|
+
|
|
191
|
+
accumulated += delta;
|
|
192
|
+
|
|
193
|
+
if (!currentPartId && !accumulated.trim()) {
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
|
|
190
197
|
if (!firstDeltaSeen) {
|
|
191
198
|
firstDeltaSeen = true;
|
|
192
199
|
streamStartTimer.end();
|
|
@@ -208,7 +215,7 @@ async function runAssistant(opts: RunOpts) {
|
|
|
208
215
|
index: await sharedCtx.nextIndex(),
|
|
209
216
|
stepIndex: null,
|
|
210
217
|
type: 'text',
|
|
211
|
-
content: JSON.stringify({ text:
|
|
218
|
+
content: JSON.stringify({ text: accumulated }),
|
|
212
219
|
agent: opts.agent,
|
|
213
220
|
provider: opts.provider,
|
|
214
221
|
model: opts.model,
|
|
@@ -216,7 +223,6 @@ async function runAssistant(opts: RunOpts) {
|
|
|
216
223
|
});
|
|
217
224
|
}
|
|
218
225
|
|
|
219
|
-
accumulated += delta;
|
|
220
226
|
publish({
|
|
221
227
|
type: 'message.part.delta',
|
|
222
228
|
sessionId: opts.sessionId,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { getDb } from '@agi-cli/database';
|
|
2
2
|
import { messages, messageParts } from '@agi-cli/database/schema';
|
|
3
|
-
import { eq, asc } from 'drizzle-orm';
|
|
3
|
+
import { eq, asc, desc } from 'drizzle-orm';
|
|
4
4
|
|
|
5
5
|
export async function buildCompactionContext(
|
|
6
6
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
@@ -11,17 +11,22 @@ export async function buildCompactionContext(
|
|
|
11
11
|
.select()
|
|
12
12
|
.from(messages)
|
|
13
13
|
.where(eq(messages.sessionId, sessionId))
|
|
14
|
-
.orderBy(
|
|
14
|
+
.orderBy(desc(messages.createdAt));
|
|
15
15
|
|
|
16
|
-
const lines: string[] = [];
|
|
17
|
-
let totalChars = 0;
|
|
18
16
|
const maxChars = contextTokenLimit ? contextTokenLimit * 4 : 60000;
|
|
17
|
+
const recentBudget = Math.floor(maxChars * 0.65);
|
|
18
|
+
const olderBudget = maxChars - recentBudget;
|
|
19
|
+
|
|
20
|
+
const recentLines: string[] = [];
|
|
21
|
+
const olderLines: string[] = [];
|
|
22
|
+
let recentChars = 0;
|
|
23
|
+
let olderChars = 0;
|
|
24
|
+
let userTurns = 0;
|
|
25
|
+
let inRecent = true;
|
|
19
26
|
|
|
20
27
|
for (const msg of allMessages) {
|
|
21
|
-
if (
|
|
22
|
-
|
|
23
|
-
break;
|
|
24
|
-
}
|
|
28
|
+
if (msg.role === 'user') userTurns++;
|
|
29
|
+
if (userTurns > 3 && inRecent) inRecent = false;
|
|
25
30
|
|
|
26
31
|
const parts = await db
|
|
27
32
|
.select()
|
|
@@ -37,28 +42,60 @@ export async function buildCompactionContext(
|
|
|
37
42
|
|
|
38
43
|
if (part.type === 'text' && content.text) {
|
|
39
44
|
const text = `[${msg.role.toUpperCase()}]: ${content.text}`;
|
|
40
|
-
|
|
41
|
-
|
|
45
|
+
const limit = inRecent ? 3000 : 1000;
|
|
46
|
+
const line = text.slice(0, limit);
|
|
47
|
+
|
|
48
|
+
if (inRecent && recentChars < recentBudget) {
|
|
49
|
+
recentLines.unshift(line);
|
|
50
|
+
recentChars += line.length;
|
|
51
|
+
} else if (olderChars < olderBudget) {
|
|
52
|
+
olderLines.unshift(line);
|
|
53
|
+
olderChars += line.length;
|
|
54
|
+
}
|
|
42
55
|
} else if (part.type === 'tool_call' && content.name) {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
56
|
+
if (inRecent && recentChars < recentBudget) {
|
|
57
|
+
const argsStr =
|
|
58
|
+
typeof content.args === 'object'
|
|
59
|
+
? JSON.stringify(content.args).slice(0, 1000)
|
|
60
|
+
: '';
|
|
61
|
+
const line = `[TOOL ${content.name}]: ${argsStr}`;
|
|
62
|
+
recentLines.unshift(line);
|
|
63
|
+
recentChars += line.length;
|
|
64
|
+
} else if (olderChars < olderBudget) {
|
|
65
|
+
const line = `[TOOL ${content.name}]`;
|
|
66
|
+
olderLines.unshift(line);
|
|
67
|
+
olderChars += line.length;
|
|
68
|
+
}
|
|
50
69
|
} else if (part.type === 'tool_result' && content.result !== null) {
|
|
51
70
|
const resultStr =
|
|
52
71
|
typeof content.result === 'string'
|
|
53
|
-
? content.result
|
|
54
|
-
: JSON.stringify(content.result ?? '')
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
72
|
+
? content.result
|
|
73
|
+
: JSON.stringify(content.result ?? '');
|
|
74
|
+
|
|
75
|
+
if (inRecent && recentChars < recentBudget) {
|
|
76
|
+
const line = `[RESULT]: ${resultStr.slice(0, 2000)}`;
|
|
77
|
+
recentLines.unshift(line);
|
|
78
|
+
recentChars += line.length;
|
|
79
|
+
} else if (olderChars < olderBudget) {
|
|
80
|
+
const line = `[RESULT]: ${resultStr.slice(0, 150)}...`;
|
|
81
|
+
olderLines.unshift(line);
|
|
82
|
+
olderChars += line.length;
|
|
83
|
+
}
|
|
58
84
|
}
|
|
59
85
|
} catch {}
|
|
60
86
|
}
|
|
87
|
+
|
|
88
|
+
if (olderChars >= olderBudget) break;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const result: string[] = [];
|
|
92
|
+
if (olderLines.length > 0) {
|
|
93
|
+
result.push('[...older conversation (tool data truncated)...]');
|
|
94
|
+
result.push(...olderLines);
|
|
95
|
+
result.push('');
|
|
96
|
+
result.push('[--- Recent conversation (full detail) ---]');
|
|
61
97
|
}
|
|
98
|
+
result.push(...recentLines);
|
|
62
99
|
|
|
63
|
-
return
|
|
100
|
+
return result.join('\n');
|
|
64
101
|
}
|
|
@@ -5,13 +5,20 @@ export function isCompactCommand(content: string): boolean {
|
|
|
5
5
|
|
|
6
6
|
export function getCompactionSystemPrompt(): string {
|
|
7
7
|
return `
|
|
8
|
-
The
|
|
8
|
+
The conversation context is being compacted. The provided context is structured with
|
|
9
|
+
RECENT conversation in full detail at the end, and OLDER conversation (with truncated tool data) at the start.
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
Generate a comprehensive summary that captures:
|
|
12
|
+
|
|
13
|
+
1. **Current State**: What was the most recent task? What is the current state of the work RIGHT NOW?
|
|
14
|
+
2. **Key Changes Made**: What files were created, modified, or deleted? Summarize recent code changes.
|
|
15
|
+
3. **Main Goals**: What is the user trying to accomplish overall?
|
|
16
|
+
4. **Important Decisions**: What approaches or solutions were chosen and why?
|
|
17
|
+
5. **Pending Work**: What remains to be done? Any known issues or blockers?
|
|
18
|
+
6. **Critical Context**: Any gotchas, errors encountered, or important details for continuing.
|
|
19
|
+
|
|
20
|
+
IMPORTANT: Prioritize the RECENT conversation. The summary must allow seamless continuation
|
|
21
|
+
of work. Focus on what was just done and what comes next — not the early parts of the conversation.
|
|
15
22
|
|
|
16
23
|
Format your response as a clear, structured summary. Start with "📦 **Context Compacted**" header.
|
|
17
24
|
Keep under 2000 characters but be thorough. This summary will replace detailed tool history.
|
|
@@ -1,60 +1,32 @@
|
|
|
1
|
+
import { catalog, getModelInfo } from '@agi-cli/sdk';
|
|
2
|
+
import type { ProviderId } from '@agi-cli/sdk';
|
|
3
|
+
|
|
1
4
|
export const PRUNE_PROTECT = 40_000;
|
|
2
5
|
|
|
3
6
|
export function estimateTokens(text: string): number {
|
|
4
7
|
return Math.max(0, Math.round((text || '').length / 4));
|
|
5
8
|
}
|
|
6
9
|
|
|
7
|
-
export interface TokenUsage {
|
|
8
|
-
input: number;
|
|
9
|
-
output: number;
|
|
10
|
-
cacheRead?: number;
|
|
11
|
-
cacheWrite?: number;
|
|
12
|
-
reasoningText?: number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
10
|
export interface ModelLimits {
|
|
16
11
|
context: number;
|
|
17
12
|
output: number;
|
|
18
13
|
}
|
|
19
14
|
|
|
20
|
-
export function isOverflow(
|
|
21
|
-
tokens: LanguageModelUsage,
|
|
22
|
-
limits: ModelLimits,
|
|
23
|
-
): boolean {
|
|
24
|
-
if (limits.context === 0) return false;
|
|
25
|
-
|
|
26
|
-
const count =
|
|
27
|
-
tokens.input +
|
|
28
|
-
(tokens.cacheRead ?? 0) +
|
|
29
|
-
(tokens.cacheWrite ?? 0) +
|
|
30
|
-
tokens.output;
|
|
31
|
-
const usableContext = limits.context - limits.output;
|
|
32
|
-
|
|
33
|
-
return count > usableContext;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
15
|
export function getModelLimits(
|
|
37
|
-
|
|
16
|
+
provider: string,
|
|
38
17
|
model: string,
|
|
39
18
|
): ModelLimits | null {
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
};
|
|
51
|
-
|
|
52
|
-
if (defaults[model]) return defaults[model];
|
|
53
|
-
|
|
54
|
-
for (const [key, limits] of Object.entries(defaults)) {
|
|
55
|
-
if (model.includes(key) || key.includes(model)) return limits;
|
|
19
|
+
const info = getModelInfo(provider as ProviderId, model);
|
|
20
|
+
if (info?.limit?.context && info?.limit?.output) {
|
|
21
|
+
return { context: info.limit.context, output: info.limit.output };
|
|
22
|
+
}
|
|
23
|
+
for (const key of Object.keys(catalog) as ProviderId[]) {
|
|
24
|
+
const entry = catalog[key];
|
|
25
|
+
const m = entry?.models?.find((x) => x.id === model);
|
|
26
|
+
if (m?.limit?.context && m?.limit?.output) {
|
|
27
|
+
return { context: m.limit.context, output: m.limit.output };
|
|
28
|
+
}
|
|
56
29
|
}
|
|
57
|
-
|
|
58
30
|
return null;
|
|
59
31
|
}
|
|
60
32
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { getDb } from '@agi-cli/database';
|
|
2
2
|
import { messages, messageParts } from '@agi-cli/database/schema';
|
|
3
|
-
import { eq,
|
|
3
|
+
import { eq, asc, and, lt } from 'drizzle-orm';
|
|
4
4
|
import { debugLog } from '../debug/index.ts';
|
|
5
5
|
import { estimateTokens, PRUNE_PROTECT } from './compaction-limits.ts';
|
|
6
6
|
|
|
@@ -35,33 +35,22 @@ export async function markSessionCompacted(
|
|
|
35
35
|
lt(messages.createdAt, cutoffTime),
|
|
36
36
|
),
|
|
37
37
|
)
|
|
38
|
-
.orderBy(
|
|
38
|
+
.orderBy(asc(messages.createdAt));
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
let turns = 0;
|
|
40
|
+
type PartInfo = { id: string; tokens: number };
|
|
41
|
+
const allToolParts: PartInfo[] = [];
|
|
42
|
+
let totalToolTokens = 0;
|
|
44
43
|
|
|
45
44
|
for (const msg of oldMessages) {
|
|
46
|
-
if (msg.role === 'user') {
|
|
47
|
-
turns++;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
if (turns < 2) continue;
|
|
51
|
-
|
|
52
45
|
const parts = await db
|
|
53
46
|
.select()
|
|
54
47
|
.from(messageParts)
|
|
55
48
|
.where(eq(messageParts.messageId, msg.id))
|
|
56
|
-
.orderBy(
|
|
49
|
+
.orderBy(asc(messageParts.index));
|
|
57
50
|
|
|
58
51
|
for (const part of parts) {
|
|
59
52
|
if (part.type !== 'tool_call' && part.type !== 'tool_result') continue;
|
|
60
|
-
|
|
61
|
-
if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) {
|
|
62
|
-
continue;
|
|
63
|
-
}
|
|
64
|
-
|
|
53
|
+
if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) continue;
|
|
65
54
|
if (part.compactedAt) continue;
|
|
66
55
|
|
|
67
56
|
let content: { result?: unknown; args?: unknown };
|
|
@@ -78,18 +67,25 @@ export async function markSessionCompacted(
|
|
|
78
67
|
: JSON.stringify(content.result ?? '')
|
|
79
68
|
: JSON.stringify(content.args ?? '');
|
|
80
69
|
|
|
81
|
-
const
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if (totalTokens > PRUNE_PROTECT) {
|
|
85
|
-
compactedTokens += estimate;
|
|
86
|
-
toCompact.push({ id: part.id, content: part.content ?? '{}' });
|
|
87
|
-
}
|
|
70
|
+
const tokens = estimateTokens(contentStr);
|
|
71
|
+
totalToolTokens += tokens;
|
|
72
|
+
allToolParts.push({ id: part.id, tokens });
|
|
88
73
|
}
|
|
89
74
|
}
|
|
90
75
|
|
|
76
|
+
const tokensToFree = Math.max(0, totalToolTokens - PRUNE_PROTECT);
|
|
77
|
+
|
|
78
|
+
const toCompact: PartInfo[] = [];
|
|
79
|
+
let freedTokens = 0;
|
|
80
|
+
|
|
81
|
+
for (const part of allToolParts) {
|
|
82
|
+
if (freedTokens >= tokensToFree) break;
|
|
83
|
+
freedTokens += part.tokens;
|
|
84
|
+
toCompact.push(part);
|
|
85
|
+
}
|
|
86
|
+
|
|
91
87
|
debugLog(
|
|
92
|
-
`[compaction] Found ${toCompact.length} parts to compact, saving ~${
|
|
88
|
+
`[compaction] Found ${toCompact.length} parts to compact (oldest first), saving ~${freedTokens} tokens`,
|
|
93
89
|
);
|
|
94
90
|
|
|
95
91
|
if (toCompact.length > 0) {
|
|
@@ -111,5 +107,5 @@ export async function markSessionCompacted(
|
|
|
111
107
|
debugLog(`[compaction] Marked ${toCompact.length} parts as compacted`);
|
|
112
108
|
}
|
|
113
109
|
|
|
114
|
-
return { compacted: toCompact.length, saved:
|
|
110
|
+
return { compacted: toCompact.length, saved: freedTokens };
|
|
115
111
|
}
|
|
@@ -51,13 +51,10 @@ export function normalizeUsage(
|
|
|
51
51
|
: undefined;
|
|
52
52
|
|
|
53
53
|
const cachedValue = cachedInputTokens ?? 0;
|
|
54
|
-
const cacheCreationValue = cacheCreationInputTokens ?? 0;
|
|
55
54
|
|
|
56
55
|
let inputTokens = rawInputTokens;
|
|
57
56
|
if (provider === 'openai') {
|
|
58
57
|
inputTokens = Math.max(0, rawInputTokens - cachedValue);
|
|
59
|
-
} else if (provider === 'anthropic') {
|
|
60
|
-
inputTokens = Math.max(0, rawInputTokens - cacheCreationValue);
|
|
61
58
|
}
|
|
62
59
|
|
|
63
60
|
return {
|
|
@@ -94,8 +91,10 @@ export function resolveUsageProvider(
|
|
|
94
91
|
}
|
|
95
92
|
|
|
96
93
|
/**
|
|
97
|
-
* Updates session token counts
|
|
98
|
-
*
|
|
94
|
+
* Updates session token counts after each step.
|
|
95
|
+
* AI SDK v6: onStepFinish.usage is PER-STEP (each step = one API call).
|
|
96
|
+
* We ADD each step's tokens directly to session totals.
|
|
97
|
+
* We also track currentContextTokens = the latest step's full input context.
|
|
99
98
|
*/
|
|
100
99
|
export async function updateSessionTokensIncremental(
|
|
101
100
|
usage: UsageData,
|
|
@@ -105,10 +104,19 @@ export async function updateSessionTokensIncremental(
|
|
|
105
104
|
) {
|
|
106
105
|
if (!usage || !db) return;
|
|
107
106
|
|
|
107
|
+
const currentContextTokens = Number(usage.inputTokens ?? 0);
|
|
108
|
+
|
|
108
109
|
const usageProvider = resolveUsageProvider(opts.provider, opts.model);
|
|
109
110
|
const normalizedUsage = normalizeUsage(usage, providerOptions, usageProvider);
|
|
110
111
|
|
|
111
|
-
|
|
112
|
+
const stepInput = Number(normalizedUsage.inputTokens ?? 0);
|
|
113
|
+
const stepOutput = Number(normalizedUsage.outputTokens ?? 0);
|
|
114
|
+
const stepCached = Number(normalizedUsage.cachedInputTokens ?? 0);
|
|
115
|
+
const stepCacheCreation = Number(
|
|
116
|
+
normalizedUsage.cacheCreationInputTokens ?? 0,
|
|
117
|
+
);
|
|
118
|
+
const stepReasoning = Number(normalizedUsage.reasoningTokens ?? 0);
|
|
119
|
+
|
|
112
120
|
const sessRows = await db
|
|
113
121
|
.select()
|
|
114
122
|
.from(sessions)
|
|
@@ -117,73 +125,18 @@ export async function updateSessionTokensIncremental(
|
|
|
117
125
|
if (sessRows.length === 0 || !sessRows[0]) return;
|
|
118
126
|
|
|
119
127
|
const sess = sessRows[0];
|
|
120
|
-
const priorInputSess = Number(sess.totalInputTokens ?? 0);
|
|
121
|
-
const priorOutputSess = Number(sess.totalOutputTokens ?? 0);
|
|
122
|
-
const priorCachedSess = Number(sess.totalCachedTokens ?? 0);
|
|
123
|
-
const priorCacheCreationSess = Number(sess.totalCacheCreationTokens ?? 0);
|
|
124
|
-
const priorReasoningSess = Number(sess.totalReasoningTokens ?? 0);
|
|
125
|
-
|
|
126
|
-
// Read current message totals to compute delta
|
|
127
|
-
const msgRows = await db
|
|
128
|
-
.select()
|
|
129
|
-
.from(messages)
|
|
130
|
-
.where(eq(messages.id, opts.assistantMessageId));
|
|
131
|
-
|
|
132
|
-
const msg = msgRows[0];
|
|
133
|
-
const priorPromptMsg = Number(msg?.inputTokens ?? 0);
|
|
134
|
-
const priorCompletionMsg = Number(msg?.outputTokens ?? 0);
|
|
135
|
-
const priorCachedMsg = Number(msg?.cachedInputTokens ?? 0);
|
|
136
|
-
const priorCacheCreationMsg = Number(msg?.cacheCreationInputTokens ?? 0);
|
|
137
|
-
const priorReasoningMsg = Number(msg?.reasoningTokens ?? 0);
|
|
138
|
-
|
|
139
|
-
// Treat usage as cumulative per-message for this step
|
|
140
|
-
const cumPrompt =
|
|
141
|
-
normalizedUsage.inputTokens != null
|
|
142
|
-
? Number(normalizedUsage.inputTokens)
|
|
143
|
-
: priorPromptMsg;
|
|
144
|
-
const cumCompletion =
|
|
145
|
-
normalizedUsage.outputTokens != null
|
|
146
|
-
? Number(normalizedUsage.outputTokens)
|
|
147
|
-
: priorCompletionMsg;
|
|
148
|
-
const cumReasoning =
|
|
149
|
-
normalizedUsage.reasoningTokens != null
|
|
150
|
-
? Number(normalizedUsage.reasoningTokens)
|
|
151
|
-
: priorReasoningMsg;
|
|
152
|
-
|
|
153
|
-
const cumCached =
|
|
154
|
-
normalizedUsage.cachedInputTokens != null
|
|
155
|
-
? Number(normalizedUsage.cachedInputTokens)
|
|
156
|
-
: priorCachedMsg;
|
|
157
|
-
|
|
158
|
-
const cumCacheCreation =
|
|
159
|
-
normalizedUsage.cacheCreationInputTokens != null
|
|
160
|
-
? Number(normalizedUsage.cacheCreationInputTokens)
|
|
161
|
-
: priorCacheCreationMsg;
|
|
162
|
-
|
|
163
|
-
// Compute deltas for this step; clamp to 0 in case provider reports smaller values
|
|
164
|
-
const deltaInput = Math.max(0, cumPrompt - priorPromptMsg);
|
|
165
|
-
const deltaOutput = Math.max(0, cumCompletion - priorCompletionMsg);
|
|
166
|
-
const deltaCached = Math.max(0, cumCached - priorCachedMsg);
|
|
167
|
-
const deltaCacheCreation = Math.max(
|
|
168
|
-
0,
|
|
169
|
-
cumCacheCreation - priorCacheCreationMsg,
|
|
170
|
-
);
|
|
171
|
-
const deltaReasoning = Math.max(0, cumReasoning - priorReasoningMsg);
|
|
172
|
-
|
|
173
|
-
const nextInputSess = priorInputSess + deltaInput;
|
|
174
|
-
const nextOutputSess = priorOutputSess + deltaOutput;
|
|
175
|
-
const nextCachedSess = priorCachedSess + deltaCached;
|
|
176
|
-
const nextCacheCreationSess = priorCacheCreationSess + deltaCacheCreation;
|
|
177
|
-
const nextReasoningSess = priorReasoningSess + deltaReasoning;
|
|
178
128
|
|
|
179
129
|
await db
|
|
180
130
|
.update(sessions)
|
|
181
131
|
.set({
|
|
182
|
-
totalInputTokens:
|
|
183
|
-
totalOutputTokens:
|
|
184
|
-
totalCachedTokens:
|
|
185
|
-
totalCacheCreationTokens:
|
|
186
|
-
|
|
132
|
+
totalInputTokens: Number(sess.totalInputTokens ?? 0) + stepInput,
|
|
133
|
+
totalOutputTokens: Number(sess.totalOutputTokens ?? 0) + stepOutput,
|
|
134
|
+
totalCachedTokens: Number(sess.totalCachedTokens ?? 0) + stepCached,
|
|
135
|
+
totalCacheCreationTokens:
|
|
136
|
+
Number(sess.totalCacheCreationTokens ?? 0) + stepCacheCreation,
|
|
137
|
+
totalReasoningTokens:
|
|
138
|
+
Number(sess.totalReasoningTokens ?? 0) + stepReasoning,
|
|
139
|
+
currentContextTokens,
|
|
187
140
|
})
|
|
188
141
|
.where(eq(sessions.id, opts.sessionId));
|
|
189
142
|
}
|
|
@@ -222,8 +175,8 @@ export async function updateSessionTokens(
|
|
|
222
175
|
}
|
|
223
176
|
|
|
224
177
|
/**
|
|
225
|
-
* Updates message token counts
|
|
226
|
-
*
|
|
178
|
+
* Updates message token counts after each step.
|
|
179
|
+
* AI SDK v6: onStepFinish.usage is PER-STEP. We ADD each step's tokens to message totals.
|
|
227
180
|
*/
|
|
228
181
|
export async function updateMessageTokensIncremental(
|
|
229
182
|
usage: UsageData,
|
|
@@ -236,6 +189,14 @@ export async function updateMessageTokensIncremental(
|
|
|
236
189
|
const usageProvider = resolveUsageProvider(opts.provider, opts.model);
|
|
237
190
|
const normalizedUsage = normalizeUsage(usage, providerOptions, usageProvider);
|
|
238
191
|
|
|
192
|
+
const stepInput = Number(normalizedUsage.inputTokens ?? 0);
|
|
193
|
+
const stepOutput = Number(normalizedUsage.outputTokens ?? 0);
|
|
194
|
+
const stepCached = Number(normalizedUsage.cachedInputTokens ?? 0);
|
|
195
|
+
const stepCacheCreation = Number(
|
|
196
|
+
normalizedUsage.cacheCreationInputTokens ?? 0,
|
|
197
|
+
);
|
|
198
|
+
const stepReasoning = Number(normalizedUsage.reasoningTokens ?? 0);
|
|
199
|
+
|
|
239
200
|
const msgRows = await db
|
|
240
201
|
.select()
|
|
241
202
|
.from(messages)
|
|
@@ -243,48 +204,27 @@ export async function updateMessageTokensIncremental(
|
|
|
243
204
|
|
|
244
205
|
if (msgRows.length > 0 && msgRows[0]) {
|
|
245
206
|
const msg = msgRows[0];
|
|
246
|
-
const
|
|
247
|
-
const
|
|
248
|
-
const
|
|
249
|
-
const
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
// Treat usage as cumulative per-message - REPLACE not ADD
|
|
253
|
-
const cumPrompt =
|
|
254
|
-
normalizedUsage.inputTokens != null
|
|
255
|
-
? Number(normalizedUsage.inputTokens)
|
|
256
|
-
: priorPrompt;
|
|
257
|
-
const cumCompletion =
|
|
258
|
-
normalizedUsage.outputTokens != null
|
|
259
|
-
? Number(normalizedUsage.outputTokens)
|
|
260
|
-
: priorCompletion;
|
|
261
|
-
const cumReasoning =
|
|
262
|
-
normalizedUsage.reasoningTokens != null
|
|
263
|
-
? Number(normalizedUsage.reasoningTokens)
|
|
264
|
-
: priorReasoning;
|
|
265
|
-
|
|
266
|
-
const cumCached =
|
|
267
|
-
normalizedUsage.cachedInputTokens != null
|
|
268
|
-
? Number(normalizedUsage.cachedInputTokens)
|
|
269
|
-
: priorCached;
|
|
270
|
-
|
|
271
|
-
const cumCacheCreation =
|
|
272
|
-
normalizedUsage.cacheCreationInputTokens != null
|
|
273
|
-
? Number(normalizedUsage.cacheCreationInputTokens)
|
|
274
|
-
: priorCacheCreation;
|
|
275
|
-
|
|
276
|
-
const cumTotal =
|
|
277
|
-
cumPrompt + cumCompletion + cumCached + cumCacheCreation + cumReasoning;
|
|
207
|
+
const nextInput = Number(msg.inputTokens ?? 0) + stepInput;
|
|
208
|
+
const nextOutput = Number(msg.outputTokens ?? 0) + stepOutput;
|
|
209
|
+
const nextCached = Number(msg.cachedInputTokens ?? 0) + stepCached;
|
|
210
|
+
const nextCacheCreation =
|
|
211
|
+
Number(msg.cacheCreationInputTokens ?? 0) + stepCacheCreation;
|
|
212
|
+
const nextReasoning = Number(msg.reasoningTokens ?? 0) + stepReasoning;
|
|
278
213
|
|
|
279
214
|
await db
|
|
280
215
|
.update(messages)
|
|
281
216
|
.set({
|
|
282
|
-
inputTokens:
|
|
283
|
-
outputTokens:
|
|
284
|
-
totalTokens:
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
217
|
+
inputTokens: nextInput,
|
|
218
|
+
outputTokens: nextOutput,
|
|
219
|
+
totalTokens:
|
|
220
|
+
nextInput +
|
|
221
|
+
nextOutput +
|
|
222
|
+
nextCached +
|
|
223
|
+
nextCacheCreation +
|
|
224
|
+
nextReasoning,
|
|
225
|
+
cachedInputTokens: nextCached,
|
|
226
|
+
cacheCreationInputTokens: nextCacheCreation,
|
|
227
|
+
reasoningTokens: nextReasoning,
|
|
288
228
|
})
|
|
289
229
|
.where(eq(messages.id, opts.assistantMessageId));
|
|
290
230
|
}
|
|
@@ -337,7 +277,7 @@ export async function cleanupEmptyTextParts(
|
|
|
337
277
|
try {
|
|
338
278
|
t = JSON.parse(p.content || '{}')?.text || '';
|
|
339
279
|
} catch {}
|
|
340
|
-
if (!t || t.
|
|
280
|
+
if (!t || !t.trim()) {
|
|
341
281
|
await db.delete(messageParts).where(eq(messageParts.id, p.id));
|
|
342
282
|
}
|
|
343
283
|
}
|
|
@@ -55,6 +55,7 @@ export async function createSession({
|
|
|
55
55
|
totalReasoningTokens: null,
|
|
56
56
|
totalToolTimeMs: null,
|
|
57
57
|
toolCountsJson: null,
|
|
58
|
+
currentContextTokens: null,
|
|
58
59
|
};
|
|
59
60
|
await db.insert(sessions).values(row);
|
|
60
61
|
publish({ type: 'session.created', sessionId: id, payload: row });
|
|
@@ -187,66 +187,103 @@ export function createErrorHandler(
|
|
|
187
187
|
debugLog(
|
|
188
188
|
'[stream-handlers] Prompt too long detected, auto-compacting...',
|
|
189
189
|
);
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
payload: Record<string, unknown>;
|
|
196
|
-
}) => {
|
|
197
|
-
publish(event as Parameters<typeof publish>[0]);
|
|
198
|
-
};
|
|
199
|
-
const compactResult = await performAutoCompaction(
|
|
200
|
-
db,
|
|
201
|
-
opts.sessionId,
|
|
202
|
-
opts.assistantMessageId,
|
|
203
|
-
publishWrapper,
|
|
204
|
-
opts.provider,
|
|
205
|
-
opts.model,
|
|
190
|
+
|
|
191
|
+
const retries = opts.compactionRetries ?? 0;
|
|
192
|
+
if (retries >= 2) {
|
|
193
|
+
debugLog(
|
|
194
|
+
'[stream-handlers] Compaction retry limit reached, surfacing error',
|
|
206
195
|
);
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
196
|
+
} else {
|
|
197
|
+
await db
|
|
198
|
+
.update(messages)
|
|
199
|
+
.set({ status: 'completed', completedAt: Date.now() })
|
|
200
|
+
.where(eq(messages.id, opts.assistantMessageId));
|
|
201
|
+
|
|
202
|
+
publish({
|
|
203
|
+
type: 'message.completed',
|
|
204
|
+
sessionId: opts.sessionId,
|
|
205
|
+
payload: {
|
|
206
|
+
id: opts.assistantMessageId,
|
|
207
|
+
autoCompacted: true,
|
|
208
|
+
},
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
const compactMessageId = crypto.randomUUID();
|
|
212
|
+
const compactMessageTime = Date.now();
|
|
213
|
+
await db.insert(messages).values({
|
|
214
|
+
id: compactMessageId,
|
|
215
|
+
sessionId: opts.sessionId,
|
|
216
|
+
role: 'assistant',
|
|
217
|
+
status: 'pending',
|
|
218
|
+
agent: opts.agent,
|
|
219
|
+
provider: opts.provider,
|
|
220
|
+
model: opts.model,
|
|
221
|
+
createdAt: compactMessageTime,
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
publish({
|
|
225
|
+
type: 'message.created',
|
|
226
|
+
sessionId: opts.sessionId,
|
|
227
|
+
payload: { id: compactMessageId, role: 'assistant' },
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
let compactionSucceeded = false;
|
|
231
|
+
try {
|
|
232
|
+
const publishWrapper = (event: {
|
|
233
|
+
type: string;
|
|
234
|
+
sessionId: string;
|
|
235
|
+
payload: Record<string, unknown>;
|
|
236
|
+
}) => {
|
|
237
|
+
publish(event as Parameters<typeof publish>[0]);
|
|
238
|
+
};
|
|
239
|
+
const compactResult = await performAutoCompaction(
|
|
240
|
+
db,
|
|
241
|
+
opts.sessionId,
|
|
242
|
+
compactMessageId,
|
|
243
|
+
publishWrapper,
|
|
244
|
+
opts.provider,
|
|
245
|
+
opts.model,
|
|
215
246
|
);
|
|
216
|
-
|
|
247
|
+
if (compactResult.success) {
|
|
248
|
+
debugLog(
|
|
249
|
+
`[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
|
|
250
|
+
);
|
|
251
|
+
compactionSucceeded = true;
|
|
252
|
+
} else {
|
|
253
|
+
debugLog(
|
|
254
|
+
`[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
|
|
255
|
+
);
|
|
256
|
+
const pruneResult = await pruneSession(db, opts.sessionId);
|
|
257
|
+
debugLog(
|
|
258
|
+
`[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
|
|
259
|
+
);
|
|
260
|
+
compactionSucceeded = pruneResult.pruned > 0;
|
|
261
|
+
}
|
|
262
|
+
} catch (compactErr) {
|
|
217
263
|
debugLog(
|
|
218
|
-
`[stream-handlers]
|
|
264
|
+
`[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
|
|
219
265
|
);
|
|
220
|
-
compactionSucceeded = pruneResult.pruned > 0;
|
|
221
266
|
}
|
|
222
|
-
} catch (compactErr) {
|
|
223
|
-
debugLog(
|
|
224
|
-
`[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
|
|
225
|
-
);
|
|
226
|
-
}
|
|
227
267
|
|
|
228
|
-
if (compactionSucceeded) {
|
|
229
268
|
await db
|
|
230
269
|
.update(messages)
|
|
231
270
|
.set({
|
|
232
|
-
status: 'completed',
|
|
271
|
+
status: compactionSucceeded ? 'completed' : 'error',
|
|
272
|
+
completedAt: Date.now(),
|
|
233
273
|
})
|
|
234
|
-
.where(eq(messages.id,
|
|
274
|
+
.where(eq(messages.id, compactMessageId));
|
|
235
275
|
|
|
236
276
|
publish({
|
|
237
277
|
type: 'message.completed',
|
|
238
278
|
sessionId: opts.sessionId,
|
|
239
|
-
payload: {
|
|
240
|
-
id: opts.assistantMessageId,
|
|
241
|
-
autoCompacted: true,
|
|
242
|
-
},
|
|
279
|
+
payload: { id: compactMessageId, autoCompacted: true },
|
|
243
280
|
});
|
|
244
281
|
|
|
245
|
-
if (retryCallback) {
|
|
282
|
+
if (compactionSucceeded && retryCallback) {
|
|
246
283
|
debugLog('[stream-handlers] Triggering retry after compaction...');
|
|
247
|
-
const
|
|
284
|
+
const retryMessageId = crypto.randomUUID();
|
|
248
285
|
await db.insert(messages).values({
|
|
249
|
-
id:
|
|
286
|
+
id: retryMessageId,
|
|
250
287
|
sessionId: opts.sessionId,
|
|
251
288
|
role: 'assistant',
|
|
252
289
|
status: 'pending',
|
|
@@ -259,23 +296,26 @@ export function createErrorHandler(
|
|
|
259
296
|
publish({
|
|
260
297
|
type: 'message.created',
|
|
261
298
|
sessionId: opts.sessionId,
|
|
262
|
-
payload: { id:
|
|
299
|
+
payload: { id: retryMessageId, role: 'assistant' },
|
|
263
300
|
});
|
|
264
301
|
|
|
265
302
|
enqueueAssistantRun(
|
|
266
303
|
{
|
|
267
304
|
...opts,
|
|
268
|
-
assistantMessageId:
|
|
305
|
+
assistantMessageId: retryMessageId,
|
|
306
|
+
compactionRetries: retries + 1,
|
|
269
307
|
},
|
|
270
308
|
retryCallback,
|
|
271
309
|
);
|
|
272
|
-
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (compactionSucceeded) {
|
|
273
314
|
debugLog(
|
|
274
315
|
'[stream-handlers] No retryCallback provided, cannot auto-retry',
|
|
275
316
|
);
|
|
317
|
+
return;
|
|
276
318
|
}
|
|
277
|
-
|
|
278
|
-
return;
|
|
279
319
|
}
|
|
280
320
|
}
|
|
281
321
|
|
|
@@ -4,12 +4,7 @@ import { eq } from 'drizzle-orm';
|
|
|
4
4
|
import { publish } from '../../events/bus.ts';
|
|
5
5
|
import { estimateModelCostUsd } from '@agi-cli/sdk';
|
|
6
6
|
import type { RunOpts } from '../session/queue.ts';
|
|
7
|
-
import {
|
|
8
|
-
pruneSession,
|
|
9
|
-
isOverflow,
|
|
10
|
-
getModelLimits,
|
|
11
|
-
markSessionCompacted,
|
|
12
|
-
} from '../message/compaction.ts';
|
|
7
|
+
import { markSessionCompacted } from '../message/compaction.ts';
|
|
13
8
|
import { debugLog } from '../debug/index.ts';
|
|
14
9
|
import type { FinishEvent } from './types.ts';
|
|
15
10
|
import {
|
|
@@ -92,38 +87,6 @@ export function createFinishHandler(
|
|
|
92
87
|
? estimateModelCostUsd(opts.provider, opts.model, usage)
|
|
93
88
|
: undefined;
|
|
94
89
|
|
|
95
|
-
if (usage) {
|
|
96
|
-
try {
|
|
97
|
-
const limits = getModelLimits(opts.provider, opts.model);
|
|
98
|
-
if (limits) {
|
|
99
|
-
const tokenUsage: LanguageModelUsage = {
|
|
100
|
-
input: usage.inputTokens ?? 0,
|
|
101
|
-
output: usage.outputTokens ?? 0,
|
|
102
|
-
cacheRead:
|
|
103
|
-
(usage as { cachedInputTokens?: number }).cachedInputTokens ?? 0,
|
|
104
|
-
cacheWrite:
|
|
105
|
-
(usage as { cacheCreationInputTokens?: number })
|
|
106
|
-
.cacheCreationInputTokens ?? 0,
|
|
107
|
-
};
|
|
108
|
-
|
|
109
|
-
if (isOverflow(tokenUsage, limits)) {
|
|
110
|
-
debugLog(
|
|
111
|
-
`[stream-handlers] Context overflow detected, triggering prune for session ${opts.sessionId}`,
|
|
112
|
-
);
|
|
113
|
-
pruneSession(db, opts.sessionId).catch((err) => {
|
|
114
|
-
debugLog(
|
|
115
|
-
`[stream-handlers] Prune failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
116
|
-
);
|
|
117
|
-
});
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
} catch (err) {
|
|
121
|
-
debugLog(
|
|
122
|
-
`[stream-handlers] Overflow check failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
123
|
-
);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
|
|
127
90
|
publish({
|
|
128
91
|
type: 'message.completed',
|
|
129
92
|
sessionId: opts.sessionId,
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
import type { ModelMessage } from 'ai';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Adds cache control to messages for prompt caching optimization.
|
|
5
|
-
* Anthropic supports caching for system messages, tools, and long context.
|
|
6
|
-
*/
|
|
7
|
-
type CachedSystemValue =
|
|
8
|
-
| string
|
|
9
|
-
| undefined
|
|
10
|
-
| Array<{
|
|
11
|
-
type: 'text';
|
|
12
|
-
text: string;
|
|
13
|
-
cache_control?: { type: 'ephemeral' };
|
|
14
|
-
}>;
|
|
15
|
-
|
|
16
|
-
type TextContentPartWithProviderOptions = {
|
|
17
|
-
providerOptions?: {
|
|
18
|
-
anthropic?: { cacheControl?: { type: 'ephemeral' } };
|
|
19
|
-
[key: string]: unknown;
|
|
20
|
-
};
|
|
21
|
-
[key: string]: unknown;
|
|
22
|
-
};
|
|
23
|
-
|
|
24
|
-
export function addCacheControl(
|
|
25
|
-
provider: string,
|
|
26
|
-
system: string | undefined,
|
|
27
|
-
messages: ModelMessage[],
|
|
28
|
-
): {
|
|
29
|
-
system?: CachedSystemValue;
|
|
30
|
-
messages: ModelMessage[];
|
|
31
|
-
} {
|
|
32
|
-
// Only Anthropic supports prompt caching currently
|
|
33
|
-
if (provider !== 'anthropic') {
|
|
34
|
-
return { system, messages };
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
// Convert system to cacheable format if it's long enough
|
|
38
|
-
let cachedSystem: CachedSystemValue = system;
|
|
39
|
-
if (system && system.length > 1024) {
|
|
40
|
-
// Anthropic requires 1024+ tokens for Claude Sonnet/Opus
|
|
41
|
-
cachedSystem = [
|
|
42
|
-
{
|
|
43
|
-
type: 'text',
|
|
44
|
-
text: system,
|
|
45
|
-
cache_control: { type: 'ephemeral' as const },
|
|
46
|
-
},
|
|
47
|
-
];
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
// Anthropic cache_control limits:
|
|
51
|
-
// - Max 4 cache blocks total
|
|
52
|
-
// - System message: 1 block
|
|
53
|
-
// - Tools: 2 blocks (read, write)
|
|
54
|
-
// - Last user message: 1 block
|
|
55
|
-
// Total: 4 blocks
|
|
56
|
-
|
|
57
|
-
// Add cache control to the last user message if conversation is long
|
|
58
|
-
// This caches the conversation history up to that point
|
|
59
|
-
if (messages.length >= 3) {
|
|
60
|
-
const cachedMessages = [...messages];
|
|
61
|
-
|
|
62
|
-
// Find second-to-last user message (not the current one)
|
|
63
|
-
const userIndices = cachedMessages
|
|
64
|
-
.map((m, i) => (m.role === 'user' ? i : -1))
|
|
65
|
-
.filter((i) => i >= 0);
|
|
66
|
-
|
|
67
|
-
if (userIndices.length >= 2) {
|
|
68
|
-
const targetIndex = userIndices[userIndices.length - 2];
|
|
69
|
-
const targetMsg = cachedMessages[targetIndex];
|
|
70
|
-
|
|
71
|
-
if (Array.isArray(targetMsg.content)) {
|
|
72
|
-
// Add cache control to the last content part of that message
|
|
73
|
-
const lastPart = targetMsg.content[targetMsg.content.length - 1];
|
|
74
|
-
if (
|
|
75
|
-
lastPart &&
|
|
76
|
-
typeof lastPart === 'object' &&
|
|
77
|
-
'type' in lastPart &&
|
|
78
|
-
lastPart.type === 'text'
|
|
79
|
-
) {
|
|
80
|
-
const textPart =
|
|
81
|
-
lastPart as unknown as TextContentPartWithProviderOptions;
|
|
82
|
-
textPart.providerOptions = {
|
|
83
|
-
...textPart.providerOptions,
|
|
84
|
-
anthropic: { cacheControl: { type: 'ephemeral' } },
|
|
85
|
-
};
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
return { system: cachedSystem, messages: cachedMessages };
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return { system: cachedSystem, messages };
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/**
|
|
97
|
-
* Truncates old messages to reduce context size while keeping recent context.
|
|
98
|
-
* Strategy: Keep system message + last N messages
|
|
99
|
-
*/
|
|
100
|
-
export function truncateHistory(
|
|
101
|
-
messages: ModelMessage[],
|
|
102
|
-
maxMessages = 20,
|
|
103
|
-
): ModelMessage[] {
|
|
104
|
-
if (messages.length <= maxMessages) {
|
|
105
|
-
return messages;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Keep the most recent messages
|
|
109
|
-
return messages.slice(-maxMessages);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Estimates token count (rough approximation: ~4 chars per token)
|
|
114
|
-
*/
|
|
115
|
-
export function estimateTokens(text: string): number {
|
|
116
|
-
return Math.ceil(text.length / 4);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
/**
|
|
120
|
-
* Summarizes tool results if they're too long
|
|
121
|
-
*/
|
|
122
|
-
export function summarizeToolResult(result: unknown, maxLength = 5000): string {
|
|
123
|
-
const str = typeof result === 'string' ? result : JSON.stringify(result);
|
|
124
|
-
|
|
125
|
-
if (str.length <= maxLength) {
|
|
126
|
-
return str;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// Truncate and add indicator
|
|
130
|
-
return (
|
|
131
|
-
str.slice(0, maxLength) +
|
|
132
|
-
`\n\n[... truncated ${str.length - maxLength} characters]`
|
|
133
|
-
);
|
|
134
|
-
}
|
|
@@ -1,206 +0,0 @@
|
|
|
1
|
-
import type { ModelMessage } from 'ai';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Optimizes message context by deduplicating file reads and pruning old tool results.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
interface FileRead {
|
|
8
|
-
messageIndex: number;
|
|
9
|
-
partIndex: number;
|
|
10
|
-
path: string;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
interface ToolPart {
|
|
14
|
-
type: string;
|
|
15
|
-
input?: {
|
|
16
|
-
path?: string;
|
|
17
|
-
filePattern?: string;
|
|
18
|
-
pattern?: string;
|
|
19
|
-
};
|
|
20
|
-
output?: unknown;
|
|
21
|
-
[key: string]: unknown;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
/**
|
|
25
|
-
* Deduplicates file read results, keeping only the latest version of each file.
|
|
26
|
-
*
|
|
27
|
-
* Strategy:
|
|
28
|
-
* - Track all file reads (read, grep, glob tools)
|
|
29
|
-
* - For files read multiple times, remove older results
|
|
30
|
-
* - Keep only the most recent read of each file
|
|
31
|
-
*/
|
|
32
|
-
export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
|
|
33
|
-
const fileReads = new Map<string, FileRead[]>();
|
|
34
|
-
|
|
35
|
-
// First pass: identify all file reads and their locations
|
|
36
|
-
messages.forEach((msg, msgIdx) => {
|
|
37
|
-
if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return;
|
|
38
|
-
|
|
39
|
-
msg.content.forEach((part, partIdx) => {
|
|
40
|
-
if (!part || typeof part !== 'object') return;
|
|
41
|
-
if (!('type' in part)) return;
|
|
42
|
-
|
|
43
|
-
const toolType = part.type as string;
|
|
44
|
-
|
|
45
|
-
// Check if this is a file read tool (read, grep, glob)
|
|
46
|
-
if (!toolType.startsWith('tool-')) return;
|
|
47
|
-
|
|
48
|
-
const toolName = toolType.replace('tool-', '');
|
|
49
|
-
if (!['read', 'grep', 'glob'].includes(toolName)) return;
|
|
50
|
-
|
|
51
|
-
// Extract file path from input
|
|
52
|
-
const toolPart = part as ToolPart;
|
|
53
|
-
const input = toolPart.input;
|
|
54
|
-
if (!input) return;
|
|
55
|
-
|
|
56
|
-
const path = input.path || input.filePattern || input.pattern;
|
|
57
|
-
if (!path) return;
|
|
58
|
-
|
|
59
|
-
// Track this file read
|
|
60
|
-
if (!fileReads.has(path)) {
|
|
61
|
-
fileReads.set(path, []);
|
|
62
|
-
}
|
|
63
|
-
fileReads
|
|
64
|
-
.get(path)
|
|
65
|
-
?.push({ messageIndex: msgIdx, partIndex: partIdx, path });
|
|
66
|
-
});
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
// Second pass: identify reads to remove (all but the latest for each file)
|
|
70
|
-
const readsToRemove = new Set<string>();
|
|
71
|
-
|
|
72
|
-
for (const [_path, reads] of fileReads) {
|
|
73
|
-
if (reads.length <= 1) continue;
|
|
74
|
-
|
|
75
|
-
// Sort by message index descending (latest first)
|
|
76
|
-
reads.sort((a, b) => b.messageIndex - a.messageIndex);
|
|
77
|
-
|
|
78
|
-
// Remove all but the first (latest)
|
|
79
|
-
for (let i = 1; i < reads.length; i++) {
|
|
80
|
-
const read = reads[i];
|
|
81
|
-
readsToRemove.add(`${read.messageIndex}-${read.partIndex}`);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
// Third pass: rebuild messages without removed reads
|
|
86
|
-
return messages.map((msg, msgIdx) => {
|
|
87
|
-
if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return msg;
|
|
88
|
-
|
|
89
|
-
const filteredContent = msg.content.filter((_part, partIdx) => {
|
|
90
|
-
const key = `${msgIdx}-${partIdx}`;
|
|
91
|
-
return !readsToRemove.has(key);
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
return {
|
|
95
|
-
...msg,
|
|
96
|
-
content: filteredContent,
|
|
97
|
-
};
|
|
98
|
-
});
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
/**
|
|
102
|
-
* Prunes old tool results to reduce context size.
|
|
103
|
-
*
|
|
104
|
-
* Strategy:
|
|
105
|
-
* - Keep only the last N tool results
|
|
106
|
-
* - Preserve tool calls but remove their output
|
|
107
|
-
* - Keep text parts intact
|
|
108
|
-
*/
|
|
109
|
-
export function pruneToolResults(
|
|
110
|
-
messages: ModelMessage[],
|
|
111
|
-
maxToolResults = 30,
|
|
112
|
-
): ModelMessage[] {
|
|
113
|
-
// Collect all tool result locations
|
|
114
|
-
const toolResults: Array<{ messageIndex: number; partIndex: number }> = [];
|
|
115
|
-
|
|
116
|
-
messages.forEach((msg, msgIdx) => {
|
|
117
|
-
if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return;
|
|
118
|
-
|
|
119
|
-
msg.content.forEach((part, partIdx) => {
|
|
120
|
-
if (!part || typeof part !== 'object') return;
|
|
121
|
-
if (!('type' in part)) return;
|
|
122
|
-
|
|
123
|
-
const toolType = part.type as string;
|
|
124
|
-
if (!toolType.startsWith('tool-')) return;
|
|
125
|
-
|
|
126
|
-
// Check if this has output
|
|
127
|
-
const toolPart = part as ToolPart;
|
|
128
|
-
const hasOutput = toolPart.output !== undefined;
|
|
129
|
-
if (!hasOutput) return;
|
|
130
|
-
|
|
131
|
-
toolResults.push({ messageIndex: msgIdx, partIndex: partIdx });
|
|
132
|
-
});
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
// If under limit, no pruning needed
|
|
136
|
-
if (toolResults.length <= maxToolResults) {
|
|
137
|
-
return messages;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
// Keep only the last N tool results
|
|
141
|
-
const toKeep = new Set<string>();
|
|
142
|
-
const keepCount = Math.min(maxToolResults, toolResults.length);
|
|
143
|
-
const keepStart = toolResults.length - keepCount;
|
|
144
|
-
|
|
145
|
-
for (let i = keepStart; i < toolResults.length; i++) {
|
|
146
|
-
const result = toolResults[i];
|
|
147
|
-
toKeep.add(`${result.messageIndex}-${result.partIndex}`);
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
// Rebuild messages, removing old tool outputs
|
|
151
|
-
return messages.map((msg, msgIdx) => {
|
|
152
|
-
if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return msg;
|
|
153
|
-
|
|
154
|
-
const processedContent = msg.content.map((part, partIdx) => {
|
|
155
|
-
if (!part || typeof part !== 'object') return part;
|
|
156
|
-
if (!('type' in part)) return part;
|
|
157
|
-
|
|
158
|
-
const toolPart = part as ToolPart;
|
|
159
|
-
const toolType = toolPart.type;
|
|
160
|
-
if (!toolType.startsWith('tool-')) return part;
|
|
161
|
-
|
|
162
|
-
const key = `${msgIdx}-${partIdx}`;
|
|
163
|
-
const hasOutput = toolPart.output !== undefined;
|
|
164
|
-
|
|
165
|
-
// If this tool result should be pruned, remove its output
|
|
166
|
-
if (hasOutput && !toKeep.has(key)) {
|
|
167
|
-
return {
|
|
168
|
-
...part,
|
|
169
|
-
output: '[pruned to save context]',
|
|
170
|
-
};
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
return part;
|
|
174
|
-
});
|
|
175
|
-
|
|
176
|
-
return {
|
|
177
|
-
...msg,
|
|
178
|
-
content: processedContent,
|
|
179
|
-
};
|
|
180
|
-
});
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* Applies all context optimizations:
|
|
185
|
-
* 1. Deduplicate file reads
|
|
186
|
-
* 2. Prune old tool results
|
|
187
|
-
*/
|
|
188
|
-
export function optimizeContext(
|
|
189
|
-
messages: ModelMessage[],
|
|
190
|
-
options: {
|
|
191
|
-
deduplicateFiles?: boolean;
|
|
192
|
-
maxToolResults?: number;
|
|
193
|
-
} = {},
|
|
194
|
-
): ModelMessage[] {
|
|
195
|
-
let optimized = messages;
|
|
196
|
-
|
|
197
|
-
if (options.deduplicateFiles !== false) {
|
|
198
|
-
optimized = deduplicateFileReads(optimized);
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
if (options.maxToolResults !== undefined) {
|
|
202
|
-
optimized = pruneToolResults(optimized, options.maxToolResults);
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
return optimized;
|
|
206
|
-
}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import type { ModelMessage } from 'ai';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Truncates conversation history to keep only the most recent messages.
|
|
5
|
-
* This helps manage context window size and improves performance.
|
|
6
|
-
*
|
|
7
|
-
* Strategy:
|
|
8
|
-
* - Keep only the last N messages
|
|
9
|
-
* - Preserve message pairs (assistant + user responses) when possible
|
|
10
|
-
* - Always keep at least the system message if present
|
|
11
|
-
*/
|
|
12
|
-
export function truncateHistory(
|
|
13
|
-
messages: ModelMessage[],
|
|
14
|
-
maxMessages: number,
|
|
15
|
-
): ModelMessage[] {
|
|
16
|
-
if (messages.length <= maxMessages) {
|
|
17
|
-
return messages;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
// Calculate how many messages to keep
|
|
21
|
-
const keepCount = Math.min(maxMessages, messages.length);
|
|
22
|
-
const startIndex = messages.length - keepCount;
|
|
23
|
-
|
|
24
|
-
// Return the most recent messages
|
|
25
|
-
return messages.slice(startIndex);
|
|
26
|
-
}
|