@agi-cli/server 0.1.58 → 0.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ import type { ModelMessage } from 'ai';
2
+
3
+ type SystemMessage =
4
+ | string
5
+ | Array<{
6
+ type: 'text';
7
+ text: string;
8
+ cache_control?: { type: 'ephemeral' };
9
+ }>;
10
+
11
+ interface ContentPart {
12
+ type: string;
13
+ [key: string]: unknown;
14
+ providerOptions?: {
15
+ anthropic?: {
16
+ cacheControl?: { type: 'ephemeral' };
17
+ };
18
+ };
19
+ }
20
+
21
+ /**
22
+ * Adds cache control to messages for prompt caching optimization.
23
+ * Anthropic supports caching for system messages, tools, and long context.
24
+ */
25
+ export function addCacheControl(
26
+ provider: string,
27
+ system: string | undefined,
28
+ messages: ModelMessage[],
29
+ ): {
30
+ system?: SystemMessage;
31
+ messages: ModelMessage[];
32
+ } {
33
+ // Only Anthropic supports prompt caching currently
34
+ if (provider !== 'anthropic') {
35
+ return { system, messages };
36
+ }
37
+
38
+ // Convert system to cacheable format if it's long enough
39
+ let cachedSystem: SystemMessage | undefined = system;
40
+ if (system && system.length > 1024) {
41
+ // Anthropic requires 1024+ tokens for Claude Sonnet/Opus
42
+ cachedSystem = [
43
+ {
44
+ type: 'text',
45
+ text: system,
46
+ cache_control: { type: 'ephemeral' as const },
47
+ },
48
+ ];
49
+ }
50
+
51
+ // Anthropic cache_control limits:
52
+ // - Max 4 cache blocks total
53
+ // - System message: 1 block
54
+ // - Tools: 2 blocks (read, write)
55
+ // - Last user message: 1 block
56
+ // Total: 4 blocks
57
+
58
+ // Add cache control to the last user message if conversation is long
59
+ // This caches the conversation history up to that point
60
+ if (messages.length >= 3) {
61
+ const cachedMessages = [...messages];
62
+
63
+ // Find second-to-last user message (not the current one)
64
+ const userIndices = cachedMessages
65
+ .map((m, i) => (m.role === 'user' ? i : -1))
66
+ .filter((i) => i >= 0);
67
+
68
+ if (userIndices.length >= 2) {
69
+ const targetIndex = userIndices[userIndices.length - 2];
70
+ const targetMsg = cachedMessages[targetIndex];
71
+
72
+ if (Array.isArray(targetMsg.content)) {
73
+ // Add cache control to the last content part of that message
74
+ const lastPart = targetMsg.content[targetMsg.content.length - 1];
75
+ if (lastPart && typeof lastPart === 'object' && 'type' in lastPart) {
76
+ (lastPart as ContentPart).providerOptions = {
77
+ anthropic: { cacheControl: { type: 'ephemeral' } },
78
+ };
79
+ }
80
+ }
81
+ }
82
+
83
+ return {
84
+ system: cachedSystem,
85
+ messages: cachedMessages,
86
+ };
87
+ }
88
+
89
+ return {
90
+ system: cachedSystem,
91
+ messages,
92
+ };
93
+ }
@@ -0,0 +1,206 @@
1
+ import type { ModelMessage } from 'ai';
2
+
3
+ /**
4
+ * Optimizes message context by deduplicating file reads and pruning old tool results.
5
+ */
6
+
7
+ interface FileRead {
8
+ messageIndex: number;
9
+ partIndex: number;
10
+ path: string;
11
+ }
12
+
13
+ interface ToolPart {
14
+ type: string;
15
+ input?: {
16
+ path?: string;
17
+ filePattern?: string;
18
+ pattern?: string;
19
+ };
20
+ output?: unknown;
21
+ [key: string]: unknown;
22
+ }
23
+
24
+ /**
25
+ * Deduplicates file read results, keeping only the latest version of each file.
26
+ *
27
+ * Strategy:
28
+ * - Track all file reads (read, grep, glob tools)
29
+ * - For files read multiple times, remove older results
30
+ * - Keep only the most recent read of each file
31
+ */
32
+ export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
33
+ const fileReads = new Map<string, FileRead[]>();
34
+
35
+ // First pass: identify all file reads and their locations
36
+ messages.forEach((msg, msgIdx) => {
37
+ if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return;
38
+
39
+ msg.content.forEach((part, partIdx) => {
40
+ if (!part || typeof part !== 'object') return;
41
+ if (!('type' in part)) return;
42
+
43
+ const toolType = part.type as string;
44
+
45
+ // Check if this is a file read tool (read, grep, glob)
46
+ if (!toolType.startsWith('tool-')) return;
47
+
48
+ const toolName = toolType.replace('tool-', '');
49
+ if (!['read', 'grep', 'glob'].includes(toolName)) return;
50
+
51
+ // Extract file path from input
52
+ const toolPart = part as ToolPart;
53
+ const input = toolPart.input;
54
+ if (!input) return;
55
+
56
+ const path = input.path || input.filePattern || input.pattern;
57
+ if (!path) return;
58
+
59
+ // Track this file read
60
+ if (!fileReads.has(path)) {
61
+ fileReads.set(path, []);
62
+ }
63
+ fileReads
64
+ .get(path)
65
+ ?.push({ messageIndex: msgIdx, partIndex: partIdx, path });
66
+ });
67
+ });
68
+
69
+ // Second pass: identify reads to remove (all but the latest for each file)
70
+ const readsToRemove = new Set<string>();
71
+
72
+ for (const [_path, reads] of fileReads) {
73
+ if (reads.length <= 1) continue;
74
+
75
+ // Sort by message index descending (latest first)
76
+ reads.sort((a, b) => b.messageIndex - a.messageIndex);
77
+
78
+ // Remove all but the first (latest)
79
+ for (let i = 1; i < reads.length; i++) {
80
+ const read = reads[i];
81
+ readsToRemove.add(`${read.messageIndex}-${read.partIndex}`);
82
+ }
83
+ }
84
+
85
+ // Third pass: rebuild messages without removed reads
86
+ return messages.map((msg, msgIdx) => {
87
+ if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return msg;
88
+
89
+ const filteredContent = msg.content.filter((_part, partIdx) => {
90
+ const key = `${msgIdx}-${partIdx}`;
91
+ return !readsToRemove.has(key);
92
+ });
93
+
94
+ return {
95
+ ...msg,
96
+ content: filteredContent,
97
+ };
98
+ });
99
+ }
100
+
101
+ /**
102
+ * Prunes old tool results to reduce context size.
103
+ *
104
+ * Strategy:
105
+ * - Keep only the last N tool results
106
+ * - Preserve tool calls but remove their output
107
+ * - Keep text parts intact
108
+ */
109
+ export function pruneToolResults(
110
+ messages: ModelMessage[],
111
+ maxToolResults = 30,
112
+ ): ModelMessage[] {
113
+ // Collect all tool result locations
114
+ const toolResults: Array<{ messageIndex: number; partIndex: number }> = [];
115
+
116
+ messages.forEach((msg, msgIdx) => {
117
+ if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return;
118
+
119
+ msg.content.forEach((part, partIdx) => {
120
+ if (!part || typeof part !== 'object') return;
121
+ if (!('type' in part)) return;
122
+
123
+ const toolType = part.type as string;
124
+ if (!toolType.startsWith('tool-')) return;
125
+
126
+ // Check if this has output
127
+ const toolPart = part as ToolPart;
128
+ const hasOutput = toolPart.output !== undefined;
129
+ if (!hasOutput) return;
130
+
131
+ toolResults.push({ messageIndex: msgIdx, partIndex: partIdx });
132
+ });
133
+ });
134
+
135
+ // If under limit, no pruning needed
136
+ if (toolResults.length <= maxToolResults) {
137
+ return messages;
138
+ }
139
+
140
+ // Keep only the last N tool results
141
+ const toKeep = new Set<string>();
142
+ const keepCount = Math.min(maxToolResults, toolResults.length);
143
+ const keepStart = toolResults.length - keepCount;
144
+
145
+ for (let i = keepStart; i < toolResults.length; i++) {
146
+ const result = toolResults[i];
147
+ toKeep.add(`${result.messageIndex}-${result.partIndex}`);
148
+ }
149
+
150
+ // Rebuild messages, removing old tool outputs
151
+ return messages.map((msg, msgIdx) => {
152
+ if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return msg;
153
+
154
+ const processedContent = msg.content.map((part, partIdx) => {
155
+ if (!part || typeof part !== 'object') return part;
156
+ if (!('type' in part)) return part;
157
+
158
+ const toolPart = part as ToolPart;
159
+ const toolType = toolPart.type;
160
+ if (!toolType.startsWith('tool-')) return part;
161
+
162
+ const key = `${msgIdx}-${partIdx}`;
163
+ const hasOutput = toolPart.output !== undefined;
164
+
165
+ // If this tool result should be pruned, remove its output
166
+ if (hasOutput && !toKeep.has(key)) {
167
+ return {
168
+ ...part,
169
+ output: '[pruned to save context]',
170
+ };
171
+ }
172
+
173
+ return part;
174
+ });
175
+
176
+ return {
177
+ ...msg,
178
+ content: processedContent,
179
+ };
180
+ });
181
+ }
182
+
183
+ /**
184
+ * Applies all context optimizations:
185
+ * 1. Deduplicate file reads
186
+ * 2. Prune old tool results
187
+ */
188
+ export function optimizeContext(
189
+ messages: ModelMessage[],
190
+ options: {
191
+ deduplicateFiles?: boolean;
192
+ maxToolResults?: number;
193
+ } = {},
194
+ ): ModelMessage[] {
195
+ let optimized = messages;
196
+
197
+ if (options.deduplicateFiles !== false) {
198
+ optimized = deduplicateFileReads(optimized);
199
+ }
200
+
201
+ if (options.maxToolResults !== undefined) {
202
+ optimized = pruneToolResults(optimized, options.maxToolResults);
203
+ }
204
+
205
+ return optimized;
206
+ }
@@ -3,8 +3,103 @@ import { messages, messageParts, sessions } from '@agi-cli/database/schema';
3
3
  import { eq } from 'drizzle-orm';
4
4
  import type { RunOpts } from './session-queue.ts';
5
5
 
6
+ type UsageData = {
7
+ inputTokens?: number;
8
+ outputTokens?: number;
9
+ totalTokens?: number;
10
+ cachedInputTokens?: number;
11
+ reasoningTokens?: number;
12
+ };
13
+
14
+ interface ProviderMetadata {
15
+ openai?: {
16
+ cachedPromptTokens?: number;
17
+ };
18
+ [key: string]: unknown;
19
+ }
20
+
21
+ /**
22
+ * Updates session token counts incrementally after each step.
23
+ * Note: onStepFinish.usage is CUMULATIVE per message, so we compute DELTA and add to session.
24
+ */
25
+ export async function updateSessionTokensIncremental(
26
+ usage: UsageData,
27
+ providerMetadata: ProviderMetadata | undefined,
28
+ opts: RunOpts,
29
+ db: Awaited<ReturnType<typeof getDb>>,
30
+ ) {
31
+ if (!usage) return;
32
+
33
+ // Read session totals
34
+ const sessRows = await db
35
+ .select()
36
+ .from(sessions)
37
+ .where(eq(sessions.id, opts.sessionId));
38
+
39
+ if (sessRows.length === 0 || !sessRows[0]) return;
40
+
41
+ const sess = sessRows[0];
42
+ const priorInputSess = Number(sess.totalInputTokens ?? 0);
43
+ const priorOutputSess = Number(sess.totalOutputTokens ?? 0);
44
+ const priorCachedSess = Number(sess.totalCachedTokens ?? 0);
45
+ const priorReasoningSess = Number(sess.totalReasoningTokens ?? 0);
46
+
47
+ // Read current message totals to compute delta
48
+ const msgRows = await db
49
+ .select()
50
+ .from(messages)
51
+ .where(eq(messages.id, opts.assistantMessageId));
52
+
53
+ const msg = msgRows[0];
54
+ const priorPromptMsg = Number(msg?.promptTokens ?? 0);
55
+ const priorCompletionMsg = Number(msg?.completionTokens ?? 0);
56
+ const priorCachedMsg = Number(msg?.cachedInputTokens ?? 0);
57
+ const priorReasoningMsg = Number(msg?.reasoningTokens ?? 0);
58
+
59
+ // Treat usage as cumulative per-message for this step
60
+ const cumPrompt =
61
+ usage.inputTokens != null ? Number(usage.inputTokens) : priorPromptMsg;
62
+ const cumCompletion =
63
+ usage.outputTokens != null
64
+ ? Number(usage.outputTokens)
65
+ : priorCompletionMsg;
66
+ const cumReasoning =
67
+ usage.reasoningTokens != null
68
+ ? Number(usage.reasoningTokens)
69
+ : priorReasoningMsg;
70
+
71
+ const cumCached =
72
+ usage.cachedInputTokens != null
73
+ ? Number(usage.cachedInputTokens)
74
+ : providerMetadata?.openai?.cachedPromptTokens != null
75
+ ? Number(providerMetadata.openai.cachedPromptTokens)
76
+ : priorCachedMsg;
77
+
78
+ // Compute deltas for this step; clamp to 0 in case provider reports smaller values
79
+ const deltaInput = Math.max(0, cumPrompt - priorPromptMsg);
80
+ const deltaOutput = Math.max(0, cumCompletion - priorCompletionMsg);
81
+ const deltaCached = Math.max(0, cumCached - priorCachedMsg);
82
+ const deltaReasoning = Math.max(0, cumReasoning - priorReasoningMsg);
83
+
84
+ const nextInputSess = priorInputSess + deltaInput;
85
+ const nextOutputSess = priorOutputSess + deltaOutput;
86
+ const nextCachedSess = priorCachedSess + deltaCached;
87
+ const nextReasoningSess = priorReasoningSess + deltaReasoning;
88
+
89
+ await db
90
+ .update(sessions)
91
+ .set({
92
+ totalInputTokens: nextInputSess,
93
+ totalOutputTokens: nextOutputSess,
94
+ totalCachedTokens: nextCachedSess,
95
+ totalReasoningTokens: nextReasoningSess,
96
+ })
97
+ .where(eq(sessions.id, opts.sessionId));
98
+ }
99
+
6
100
  /**
7
101
  * Updates session token counts after a run completes.
102
+ * @deprecated Use updateSessionTokensIncremental for per-step tracking
8
103
  */
9
104
  export async function updateSessionTokens(
10
105
  fin: { usage?: { inputTokens?: number; outputTokens?: number } },
@@ -36,59 +131,98 @@ export async function updateSessionTokens(
36
131
  }
37
132
 
38
133
  /**
39
- * Marks an assistant message as complete with token usage information.
134
+ * Updates message token counts incrementally after each step.
135
+ * Note: onStepFinish.usage is CUMULATIVE per message, so we REPLACE values, not add.
136
+ */
137
+ export async function updateMessageTokensIncremental(
138
+ usage: UsageData,
139
+ providerMetadata: ProviderMetadata | undefined,
140
+ opts: RunOpts,
141
+ db: Awaited<ReturnType<typeof getDb>>,
142
+ ) {
143
+ if (!usage) return;
144
+
145
+ const msgRows = await db
146
+ .select()
147
+ .from(messages)
148
+ .where(eq(messages.id, opts.assistantMessageId));
149
+
150
+ if (msgRows.length > 0 && msgRows[0]) {
151
+ const msg = msgRows[0];
152
+ const priorPrompt = Number(msg.promptTokens ?? 0);
153
+ const priorCompletion = Number(msg.completionTokens ?? 0);
154
+ const priorCached = Number(msg.cachedInputTokens ?? 0);
155
+ const priorReasoning = Number(msg.reasoningTokens ?? 0);
156
+
157
+ // Treat usage as cumulative per-message - REPLACE not ADD
158
+ const nextPrompt =
159
+ usage.inputTokens != null ? Number(usage.inputTokens) : priorPrompt;
160
+ const nextCompletion =
161
+ usage.outputTokens != null ? Number(usage.outputTokens) : priorCompletion;
162
+ const nextReasoning =
163
+ usage.reasoningTokens != null
164
+ ? Number(usage.reasoningTokens)
165
+ : priorReasoning;
166
+
167
+ const nextCached =
168
+ usage.cachedInputTokens != null
169
+ ? Number(usage.cachedInputTokens)
170
+ : providerMetadata?.openai?.cachedPromptTokens != null
171
+ ? Number(providerMetadata.openai.cachedPromptTokens)
172
+ : priorCached;
173
+
174
+ await db
175
+ .update(messages)
176
+ .set({
177
+ promptTokens: nextPrompt,
178
+ completionTokens: nextCompletion,
179
+ cachedInputTokens: nextCached,
180
+ reasoningTokens: nextReasoning,
181
+ })
182
+ .where(eq(messages.id, opts.assistantMessageId));
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Completes the assistant message after the run finishes.
188
+ * Used to finalize timing but NOT tokens, which are already incremental.
40
189
  */
41
190
  export async function completeAssistantMessage(
42
- fin: {
191
+ _fin: {
43
192
  usage?: {
44
193
  inputTokens?: number;
45
194
  outputTokens?: number;
46
- totalTokens?: number;
47
195
  };
48
196
  },
49
197
  opts: RunOpts,
50
198
  db: Awaited<ReturnType<typeof getDb>>,
51
199
  ) {
52
- const vals: Record<string, unknown> = {
53
- status: 'complete',
54
- completedAt: Date.now(),
55
- };
200
+ const msgRow = await db
201
+ .select()
202
+ .from(messages)
203
+ .where(eq(messages.id, opts.assistantMessageId));
56
204
 
57
- if (fin.usage) {
58
- vals.promptTokens = fin.usage.inputTokens;
59
- vals.completionTokens = fin.usage.outputTokens;
60
- vals.totalTokens =
61
- fin.usage.totalTokens ??
62
- (vals.promptTokens as number) + (vals.completionTokens as number);
205
+ if (msgRow.length > 0) {
206
+ await db
207
+ .update(messages)
208
+ .set({
209
+ finishedAt: new Date(),
210
+ })
211
+ .where(eq(messages.id, opts.assistantMessageId));
63
212
  }
64
-
65
- await db
66
- .update(messages)
67
- .set(vals)
68
- .where(eq(messages.id, opts.assistantMessageId));
69
213
  }
70
214
 
71
- /**
72
- * Removes empty text parts from an assistant message.
73
- */
74
- export async function cleanupEmptyTextParts(
75
- opts: RunOpts,
215
+ export async function createMessagePart(
216
+ partData: {
217
+ messageId: number;
218
+ contentType: 'text' | 'tool' | 'other';
219
+ toolName?: string | null;
220
+ toolArgs?: unknown;
221
+ toolResult?: unknown;
222
+ textContent?: string | null;
223
+ stepIndex?: number | null;
224
+ },
76
225
  db: Awaited<ReturnType<typeof getDb>>,
77
226
  ) {
78
- const parts = await db
79
- .select()
80
- .from(messageParts)
81
- .where(eq(messageParts.messageId, opts.assistantMessageId));
82
-
83
- for (const p of parts) {
84
- if (p.type === 'text') {
85
- let t = '';
86
- try {
87
- t = JSON.parse(p.content || '{}')?.text || '';
88
- } catch {}
89
- if (!t || t.length === 0) {
90
- await db.delete(messageParts).where(eq(messageParts.id, p.id));
91
- }
92
- }
93
- }
227
+ await db.insert(messageParts).values(partData);
94
228
  }
@@ -0,0 +1,26 @@
1
+ import type { ModelMessage } from 'ai';
2
+
3
+ /**
4
+ * Truncates conversation history to keep only the most recent messages.
5
+ * This helps manage context window size and improves performance.
6
+ *
7
+ * Strategy:
8
+ * - Keep only the last N messages
9
+ * - Preserve message pairs (assistant + user responses) when possible
10
+ * - Always keep at least the system message if present
11
+ */
12
+ export function truncateHistory(
13
+ messages: ModelMessage[],
14
+ maxMessages: number,
15
+ ): ModelMessage[] {
16
+ if (messages.length <= maxMessages) {
17
+ return messages;
18
+ }
19
+
20
+ // Calculate how many messages to keep
21
+ const keepCount = Math.min(maxMessages, messages.length);
22
+ const startIndex = messages.length - keepCount;
23
+
24
+ // Return the most recent messages
25
+ return messages.slice(startIndex);
26
+ }