@agi-cli/server 0.1.59 → 0.1.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/openapi/spec.ts +641 -0
- package/src/runtime/cache-optimizer.ts +29 -51
- package/src/runtime/context-optimizer.ts +20 -6
- package/src/runtime/db-operations.ts +43 -48
- package/src/runtime/history-truncator.ts +26 -0
- package/src/runtime/runner.ts +99 -248
- package/src/runtime/stream-handlers.ts +175 -209
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
import type { ModelMessage } from 'ai';
|
|
2
2
|
|
|
3
|
+
type SystemMessage =
|
|
4
|
+
| string
|
|
5
|
+
| Array<{
|
|
6
|
+
type: 'text';
|
|
7
|
+
text: string;
|
|
8
|
+
cache_control?: { type: 'ephemeral' };
|
|
9
|
+
}>;
|
|
10
|
+
|
|
11
|
+
interface ContentPart {
|
|
12
|
+
type: string;
|
|
13
|
+
[key: string]: unknown;
|
|
14
|
+
providerOptions?: {
|
|
15
|
+
anthropic?: {
|
|
16
|
+
cacheControl?: { type: 'ephemeral' };
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
3
21
|
/**
|
|
4
22
|
* Adds cache control to messages for prompt caching optimization.
|
|
5
23
|
* Anthropic supports caching for system messages, tools, and long context.
|
|
@@ -9,13 +27,7 @@ export function addCacheControl(
|
|
|
9
27
|
system: string | undefined,
|
|
10
28
|
messages: ModelMessage[],
|
|
11
29
|
): {
|
|
12
|
-
system?:
|
|
13
|
-
| string
|
|
14
|
-
| Array<{
|
|
15
|
-
type: 'text';
|
|
16
|
-
text: string;
|
|
17
|
-
cache_control?: { type: 'ephemeral' };
|
|
18
|
-
}>;
|
|
30
|
+
system?: SystemMessage;
|
|
19
31
|
messages: ModelMessage[];
|
|
20
32
|
} {
|
|
21
33
|
// Only Anthropic supports prompt caching currently
|
|
@@ -24,7 +36,7 @@ export function addCacheControl(
|
|
|
24
36
|
}
|
|
25
37
|
|
|
26
38
|
// Convert system to cacheable format if it's long enough
|
|
27
|
-
let cachedSystem:
|
|
39
|
+
let cachedSystem: SystemMessage | undefined = system;
|
|
28
40
|
if (system && system.length > 1024) {
|
|
29
41
|
// Anthropic requires 1024+ tokens for Claude Sonnet/Opus
|
|
30
42
|
cachedSystem = [
|
|
@@ -61,55 +73,21 @@ export function addCacheControl(
|
|
|
61
73
|
// Add cache control to the last content part of that message
|
|
62
74
|
const lastPart = targetMsg.content[targetMsg.content.length - 1];
|
|
63
75
|
if (lastPart && typeof lastPart === 'object' && 'type' in lastPart) {
|
|
64
|
-
(lastPart as
|
|
76
|
+
(lastPart as ContentPart).providerOptions = {
|
|
65
77
|
anthropic: { cacheControl: { type: 'ephemeral' } },
|
|
66
78
|
};
|
|
67
79
|
}
|
|
68
80
|
}
|
|
69
81
|
}
|
|
70
82
|
|
|
71
|
-
return {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* Truncates old messages to reduce context size while keeping recent context.
|
|
79
|
-
* Strategy: Keep system message + last N messages
|
|
80
|
-
*/
|
|
81
|
-
export function truncateHistory(
|
|
82
|
-
messages: ModelMessage[],
|
|
83
|
-
maxMessages = 20,
|
|
84
|
-
): ModelMessage[] {
|
|
85
|
-
if (messages.length <= maxMessages) {
|
|
86
|
-
return messages;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// Keep the most recent messages
|
|
90
|
-
return messages.slice(-maxMessages);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Estimates token count (rough approximation: ~4 chars per token)
|
|
95
|
-
*/
|
|
96
|
-
export function estimateTokens(text: string): number {
|
|
97
|
-
return Math.ceil(text.length / 4);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* Summarizes tool results if they're too long
|
|
102
|
-
*/
|
|
103
|
-
export function summarizeToolResult(result: unknown, maxLength = 5000): string {
|
|
104
|
-
const str = typeof result === 'string' ? result : JSON.stringify(result);
|
|
105
|
-
|
|
106
|
-
if (str.length <= maxLength) {
|
|
107
|
-
return str;
|
|
83
|
+
return {
|
|
84
|
+
system: cachedSystem,
|
|
85
|
+
messages: cachedMessages,
|
|
86
|
+
};
|
|
108
87
|
}
|
|
109
88
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
);
|
|
89
|
+
return {
|
|
90
|
+
system: cachedSystem,
|
|
91
|
+
messages,
|
|
92
|
+
};
|
|
115
93
|
}
|
|
@@ -10,6 +10,17 @@ interface FileRead {
|
|
|
10
10
|
path: string;
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
+
interface ToolPart {
|
|
14
|
+
type: string;
|
|
15
|
+
input?: {
|
|
16
|
+
path?: string;
|
|
17
|
+
filePattern?: string;
|
|
18
|
+
pattern?: string;
|
|
19
|
+
};
|
|
20
|
+
output?: unknown;
|
|
21
|
+
[key: string]: unknown;
|
|
22
|
+
}
|
|
23
|
+
|
|
13
24
|
/**
|
|
14
25
|
* Deduplicates file read results, keeping only the latest version of each file.
|
|
15
26
|
*
|
|
@@ -38,7 +49,8 @@ export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
|
|
|
38
49
|
if (!['read', 'grep', 'glob'].includes(toolName)) return;
|
|
39
50
|
|
|
40
51
|
// Extract file path from input
|
|
41
|
-
const
|
|
52
|
+
const toolPart = part as ToolPart;
|
|
53
|
+
const input = toolPart.input;
|
|
42
54
|
if (!input) return;
|
|
43
55
|
|
|
44
56
|
const path = input.path || input.filePattern || input.pattern;
|
|
@@ -49,8 +61,8 @@ export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
|
|
|
49
61
|
fileReads.set(path, []);
|
|
50
62
|
}
|
|
51
63
|
fileReads
|
|
52
|
-
.get(path)
|
|
53
|
-
|
|
64
|
+
.get(path)
|
|
65
|
+
?.push({ messageIndex: msgIdx, partIndex: partIdx, path });
|
|
54
66
|
});
|
|
55
67
|
});
|
|
56
68
|
|
|
@@ -112,7 +124,8 @@ export function pruneToolResults(
|
|
|
112
124
|
if (!toolType.startsWith('tool-')) return;
|
|
113
125
|
|
|
114
126
|
// Check if this has output
|
|
115
|
-
const
|
|
127
|
+
const toolPart = part as ToolPart;
|
|
128
|
+
const hasOutput = toolPart.output !== undefined;
|
|
116
129
|
if (!hasOutput) return;
|
|
117
130
|
|
|
118
131
|
toolResults.push({ messageIndex: msgIdx, partIndex: partIdx });
|
|
@@ -142,11 +155,12 @@ export function pruneToolResults(
|
|
|
142
155
|
if (!part || typeof part !== 'object') return part;
|
|
143
156
|
if (!('type' in part)) return part;
|
|
144
157
|
|
|
145
|
-
const
|
|
158
|
+
const toolPart = part as ToolPart;
|
|
159
|
+
const toolType = toolPart.type;
|
|
146
160
|
if (!toolType.startsWith('tool-')) return part;
|
|
147
161
|
|
|
148
162
|
const key = `${msgIdx}-${partIdx}`;
|
|
149
|
-
const hasOutput =
|
|
163
|
+
const hasOutput = toolPart.output !== undefined;
|
|
150
164
|
|
|
151
165
|
// If this tool result should be pruned, remove its output
|
|
152
166
|
if (hasOutput && !toKeep.has(key)) {
|
|
@@ -11,13 +11,20 @@ type UsageData = {
|
|
|
11
11
|
reasoningTokens?: number;
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
+
interface ProviderMetadata {
|
|
15
|
+
openai?: {
|
|
16
|
+
cachedPromptTokens?: number;
|
|
17
|
+
};
|
|
18
|
+
[key: string]: unknown;
|
|
19
|
+
}
|
|
20
|
+
|
|
14
21
|
/**
|
|
15
22
|
* Updates session token counts incrementally after each step.
|
|
16
23
|
* Note: onStepFinish.usage is CUMULATIVE per message, so we compute DELTA and add to session.
|
|
17
24
|
*/
|
|
18
25
|
export async function updateSessionTokensIncremental(
|
|
19
26
|
usage: UsageData,
|
|
20
|
-
providerMetadata:
|
|
27
|
+
providerMetadata: ProviderMetadata | undefined,
|
|
21
28
|
opts: RunOpts,
|
|
22
29
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
23
30
|
) {
|
|
@@ -129,7 +136,7 @@ export async function updateSessionTokens(
|
|
|
129
136
|
*/
|
|
130
137
|
export async function updateMessageTokensIncremental(
|
|
131
138
|
usage: UsageData,
|
|
132
|
-
providerMetadata:
|
|
139
|
+
providerMetadata: ProviderMetadata | undefined,
|
|
133
140
|
opts: RunOpts,
|
|
134
141
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
135
142
|
) {
|
|
@@ -148,86 +155,74 @@ export async function updateMessageTokensIncremental(
|
|
|
148
155
|
const priorReasoning = Number(msg.reasoningTokens ?? 0);
|
|
149
156
|
|
|
150
157
|
// Treat usage as cumulative per-message - REPLACE not ADD
|
|
151
|
-
const
|
|
158
|
+
const nextPrompt =
|
|
152
159
|
usage.inputTokens != null ? Number(usage.inputTokens) : priorPrompt;
|
|
153
|
-
const
|
|
160
|
+
const nextCompletion =
|
|
154
161
|
usage.outputTokens != null ? Number(usage.outputTokens) : priorCompletion;
|
|
155
|
-
const
|
|
162
|
+
const nextReasoning =
|
|
156
163
|
usage.reasoningTokens != null
|
|
157
164
|
? Number(usage.reasoningTokens)
|
|
158
165
|
: priorReasoning;
|
|
159
166
|
|
|
160
|
-
const
|
|
167
|
+
const nextCached =
|
|
161
168
|
usage.cachedInputTokens != null
|
|
162
169
|
? Number(usage.cachedInputTokens)
|
|
163
170
|
: providerMetadata?.openai?.cachedPromptTokens != null
|
|
164
171
|
? Number(providerMetadata.openai.cachedPromptTokens)
|
|
165
172
|
: priorCached;
|
|
166
173
|
|
|
167
|
-
const cumTotal =
|
|
168
|
-
usage.totalTokens != null
|
|
169
|
-
? Number(usage.totalTokens)
|
|
170
|
-
: cumPrompt + cumCompletion + cumReasoning;
|
|
171
|
-
|
|
172
174
|
await db
|
|
173
175
|
.update(messages)
|
|
174
176
|
.set({
|
|
175
|
-
promptTokens:
|
|
176
|
-
completionTokens:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
reasoningTokens: cumReasoning,
|
|
177
|
+
promptTokens: nextPrompt,
|
|
178
|
+
completionTokens: nextCompletion,
|
|
179
|
+
cachedInputTokens: nextCached,
|
|
180
|
+
reasoningTokens: nextReasoning,
|
|
180
181
|
})
|
|
181
182
|
.where(eq(messages.id, opts.assistantMessageId));
|
|
182
183
|
}
|
|
183
184
|
}
|
|
184
185
|
|
|
185
186
|
/**
|
|
186
|
-
*
|
|
187
|
-
*
|
|
187
|
+
* Completes the assistant message after the run finishes.
|
|
188
|
+
* Used to finalize timing but NOT tokens, which are already incremental.
|
|
188
189
|
*/
|
|
189
190
|
export async function completeAssistantMessage(
|
|
190
|
-
|
|
191
|
+
_fin: {
|
|
191
192
|
usage?: {
|
|
192
193
|
inputTokens?: number;
|
|
193
194
|
outputTokens?: number;
|
|
194
|
-
totalTokens?: number;
|
|
195
195
|
};
|
|
196
196
|
},
|
|
197
197
|
opts: RunOpts,
|
|
198
198
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
199
199
|
) {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
.
|
|
203
|
-
.set({
|
|
204
|
-
status: 'complete',
|
|
205
|
-
completedAt: Date.now(),
|
|
206
|
-
})
|
|
200
|
+
const msgRow = await db
|
|
201
|
+
.select()
|
|
202
|
+
.from(messages)
|
|
207
203
|
.where(eq(messages.id, opts.assistantMessageId));
|
|
204
|
+
|
|
205
|
+
if (msgRow.length > 0) {
|
|
206
|
+
await db
|
|
207
|
+
.update(messages)
|
|
208
|
+
.set({
|
|
209
|
+
finishedAt: new Date(),
|
|
210
|
+
})
|
|
211
|
+
.where(eq(messages.id, opts.assistantMessageId));
|
|
212
|
+
}
|
|
208
213
|
}
|
|
209
214
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
+
export async function createMessagePart(
|
|
216
|
+
partData: {
|
|
217
|
+
messageId: number;
|
|
218
|
+
contentType: 'text' | 'tool' | 'other';
|
|
219
|
+
toolName?: string | null;
|
|
220
|
+
toolArgs?: unknown;
|
|
221
|
+
toolResult?: unknown;
|
|
222
|
+
textContent?: string | null;
|
|
223
|
+
stepIndex?: number | null;
|
|
224
|
+
},
|
|
215
225
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
216
226
|
) {
|
|
217
|
-
|
|
218
|
-
.select()
|
|
219
|
-
.from(messageParts)
|
|
220
|
-
.where(eq(messageParts.messageId, opts.assistantMessageId));
|
|
221
|
-
|
|
222
|
-
for (const p of parts) {
|
|
223
|
-
if (p.type === 'text') {
|
|
224
|
-
let t = '';
|
|
225
|
-
try {
|
|
226
|
-
t = JSON.parse(p.content || '{}')?.text || '';
|
|
227
|
-
} catch {}
|
|
228
|
-
if (!t || t.length === 0) {
|
|
229
|
-
await db.delete(messageParts).where(eq(messageParts.id, p.id));
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
}
|
|
227
|
+
await db.insert(messageParts).values(partData);
|
|
233
228
|
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { ModelMessage } from 'ai';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Truncates conversation history to keep only the most recent messages.
|
|
5
|
+
* This helps manage context window size and improves performance.
|
|
6
|
+
*
|
|
7
|
+
* Strategy:
|
|
8
|
+
* - Keep only the last N messages
|
|
9
|
+
* - Preserve message pairs (assistant + user responses) when possible
|
|
10
|
+
* - Always keep at least the system message if present
|
|
11
|
+
*/
|
|
12
|
+
export function truncateHistory(
|
|
13
|
+
messages: ModelMessage[],
|
|
14
|
+
maxMessages: number,
|
|
15
|
+
): ModelMessage[] {
|
|
16
|
+
if (messages.length <= maxMessages) {
|
|
17
|
+
return messages;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Calculate how many messages to keep
|
|
21
|
+
const keepCount = Math.min(maxMessages, messages.length);
|
|
22
|
+
const startIndex = messages.length - keepCount;
|
|
23
|
+
|
|
24
|
+
// Return the most recent messages
|
|
25
|
+
return messages.slice(startIndex);
|
|
26
|
+
}
|