@agi-cli/server 0.1.59 → 0.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,23 @@
1
1
  import type { ModelMessage } from 'ai';
2
2
 
3
+ type SystemMessage =
4
+ | string
5
+ | Array<{
6
+ type: 'text';
7
+ text: string;
8
+ cache_control?: { type: 'ephemeral' };
9
+ }>;
10
+
11
+ interface ContentPart {
12
+ type: string;
13
+ [key: string]: unknown;
14
+ providerOptions?: {
15
+ anthropic?: {
16
+ cacheControl?: { type: 'ephemeral' };
17
+ };
18
+ };
19
+ }
20
+
3
21
  /**
4
22
  * Adds cache control to messages for prompt caching optimization.
5
23
  * Anthropic supports caching for system messages, tools, and long context.
@@ -9,13 +27,7 @@ export function addCacheControl(
9
27
  system: string | undefined,
10
28
  messages: ModelMessage[],
11
29
  ): {
12
- system?:
13
- | string
14
- | Array<{
15
- type: 'text';
16
- text: string;
17
- cache_control?: { type: 'ephemeral' };
18
- }>;
30
+ system?: SystemMessage;
19
31
  messages: ModelMessage[];
20
32
  } {
21
33
  // Only Anthropic supports prompt caching currently
@@ -24,7 +36,7 @@ export function addCacheControl(
24
36
  }
25
37
 
26
38
  // Convert system to cacheable format if it's long enough
27
- let cachedSystem: any = system;
39
+ let cachedSystem: SystemMessage | undefined = system;
28
40
  if (system && system.length > 1024) {
29
41
  // Anthropic requires 1024+ tokens for Claude Sonnet/Opus
30
42
  cachedSystem = [
@@ -61,55 +73,21 @@ export function addCacheControl(
61
73
  // Add cache control to the last content part of that message
62
74
  const lastPart = targetMsg.content[targetMsg.content.length - 1];
63
75
  if (lastPart && typeof lastPart === 'object' && 'type' in lastPart) {
64
- (lastPart as any).providerOptions = {
76
+ (lastPart as ContentPart).providerOptions = {
65
77
  anthropic: { cacheControl: { type: 'ephemeral' } },
66
78
  };
67
79
  }
68
80
  }
69
81
  }
70
82
 
71
- return { system: cachedSystem, messages: cachedMessages };
72
- }
73
-
74
- return { system: cachedSystem, messages };
75
- }
76
-
77
- /**
78
- * Truncates old messages to reduce context size while keeping recent context.
79
- * Strategy: Keep system message + last N messages
80
- */
81
- export function truncateHistory(
82
- messages: ModelMessage[],
83
- maxMessages = 20,
84
- ): ModelMessage[] {
85
- if (messages.length <= maxMessages) {
86
- return messages;
87
- }
88
-
89
- // Keep the most recent messages
90
- return messages.slice(-maxMessages);
91
- }
92
-
93
- /**
94
- * Estimates token count (rough approximation: ~4 chars per token)
95
- */
96
- export function estimateTokens(text: string): number {
97
- return Math.ceil(text.length / 4);
98
- }
99
-
100
- /**
101
- * Summarizes tool results if they're too long
102
- */
103
- export function summarizeToolResult(result: unknown, maxLength = 5000): string {
104
- const str = typeof result === 'string' ? result : JSON.stringify(result);
105
-
106
- if (str.length <= maxLength) {
107
- return str;
83
+ return {
84
+ system: cachedSystem,
85
+ messages: cachedMessages,
86
+ };
108
87
  }
109
88
 
110
- // Truncate and add indicator
111
- return (
112
- str.slice(0, maxLength) +
113
- `\n\n[... truncated ${str.length - maxLength} characters]`
114
- );
89
+ return {
90
+ system: cachedSystem,
91
+ messages,
92
+ };
115
93
  }
@@ -10,6 +10,17 @@ interface FileRead {
10
10
  path: string;
11
11
  }
12
12
 
13
+ interface ToolPart {
14
+ type: string;
15
+ input?: {
16
+ path?: string;
17
+ filePattern?: string;
18
+ pattern?: string;
19
+ };
20
+ output?: unknown;
21
+ [key: string]: unknown;
22
+ }
23
+
13
24
  /**
14
25
  * Deduplicates file read results, keeping only the latest version of each file.
15
26
  *
@@ -38,7 +49,8 @@ export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
38
49
  if (!['read', 'grep', 'glob'].includes(toolName)) return;
39
50
 
40
51
  // Extract file path from input
41
- const input = (part as any).input;
52
+ const toolPart = part as ToolPart;
53
+ const input = toolPart.input;
42
54
  if (!input) return;
43
55
 
44
56
  const path = input.path || input.filePattern || input.pattern;
@@ -49,8 +61,8 @@ export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
49
61
  fileReads.set(path, []);
50
62
  }
51
63
  fileReads
52
- .get(path)!
53
- .push({ messageIndex: msgIdx, partIndex: partIdx, path });
64
+ .get(path)
65
+ ?.push({ messageIndex: msgIdx, partIndex: partIdx, path });
54
66
  });
55
67
  });
56
68
 
@@ -112,7 +124,8 @@ export function pruneToolResults(
112
124
  if (!toolType.startsWith('tool-')) return;
113
125
 
114
126
  // Check if this has output
115
- const hasOutput = (part as any).output !== undefined;
127
+ const toolPart = part as ToolPart;
128
+ const hasOutput = toolPart.output !== undefined;
116
129
  if (!hasOutput) return;
117
130
 
118
131
  toolResults.push({ messageIndex: msgIdx, partIndex: partIdx });
@@ -142,11 +155,12 @@ export function pruneToolResults(
142
155
  if (!part || typeof part !== 'object') return part;
143
156
  if (!('type' in part)) return part;
144
157
 
145
- const toolType = (part as any).type as string;
158
+ const toolPart = part as ToolPart;
159
+ const toolType = toolPart.type;
146
160
  if (!toolType.startsWith('tool-')) return part;
147
161
 
148
162
  const key = `${msgIdx}-${partIdx}`;
149
- const hasOutput = (part as any).output !== undefined;
163
+ const hasOutput = toolPart.output !== undefined;
150
164
 
151
165
  // If this tool result should be pruned, remove its output
152
166
  if (hasOutput && !toKeep.has(key)) {
@@ -11,13 +11,20 @@ type UsageData = {
11
11
  reasoningTokens?: number;
12
12
  };
13
13
 
14
+ interface ProviderMetadata {
15
+ openai?: {
16
+ cachedPromptTokens?: number;
17
+ };
18
+ [key: string]: unknown;
19
+ }
20
+
14
21
  /**
15
22
  * Updates session token counts incrementally after each step.
16
23
  * Note: onStepFinish.usage is CUMULATIVE per message, so we compute DELTA and add to session.
17
24
  */
18
25
  export async function updateSessionTokensIncremental(
19
26
  usage: UsageData,
20
- providerMetadata: Record<string, any> | undefined,
27
+ providerMetadata: ProviderMetadata | undefined,
21
28
  opts: RunOpts,
22
29
  db: Awaited<ReturnType<typeof getDb>>,
23
30
  ) {
@@ -129,7 +136,7 @@ export async function updateSessionTokens(
129
136
  */
130
137
  export async function updateMessageTokensIncremental(
131
138
  usage: UsageData,
132
- providerMetadata: Record<string, any> | undefined,
139
+ providerMetadata: ProviderMetadata | undefined,
133
140
  opts: RunOpts,
134
141
  db: Awaited<ReturnType<typeof getDb>>,
135
142
  ) {
@@ -148,86 +155,74 @@ export async function updateMessageTokensIncremental(
148
155
  const priorReasoning = Number(msg.reasoningTokens ?? 0);
149
156
 
150
157
  // Treat usage as cumulative per-message - REPLACE not ADD
151
- const cumPrompt =
158
+ const nextPrompt =
152
159
  usage.inputTokens != null ? Number(usage.inputTokens) : priorPrompt;
153
- const cumCompletion =
160
+ const nextCompletion =
154
161
  usage.outputTokens != null ? Number(usage.outputTokens) : priorCompletion;
155
- const cumReasoning =
162
+ const nextReasoning =
156
163
  usage.reasoningTokens != null
157
164
  ? Number(usage.reasoningTokens)
158
165
  : priorReasoning;
159
166
 
160
- const cumCached =
167
+ const nextCached =
161
168
  usage.cachedInputTokens != null
162
169
  ? Number(usage.cachedInputTokens)
163
170
  : providerMetadata?.openai?.cachedPromptTokens != null
164
171
  ? Number(providerMetadata.openai.cachedPromptTokens)
165
172
  : priorCached;
166
173
 
167
- const cumTotal =
168
- usage.totalTokens != null
169
- ? Number(usage.totalTokens)
170
- : cumPrompt + cumCompletion + cumReasoning;
171
-
172
174
  await db
173
175
  .update(messages)
174
176
  .set({
175
- promptTokens: cumPrompt,
176
- completionTokens: cumCompletion,
177
- totalTokens: cumTotal,
178
- cachedInputTokens: cumCached,
179
- reasoningTokens: cumReasoning,
177
+ promptTokens: nextPrompt,
178
+ completionTokens: nextCompletion,
179
+ cachedInputTokens: nextCached,
180
+ reasoningTokens: nextReasoning,
180
181
  })
181
182
  .where(eq(messages.id, opts.assistantMessageId));
182
183
  }
183
184
  }
184
185
 
185
186
  /**
186
- * Marks an assistant message as complete.
187
- * Token usage is tracked incrementally via updateMessageTokensIncremental().
187
+ * Completes the assistant message after the run finishes.
188
+ * Used to finalize timing but NOT tokens, which are already incremental.
188
189
  */
189
190
  export async function completeAssistantMessage(
190
- fin: {
191
+ _fin: {
191
192
  usage?: {
192
193
  inputTokens?: number;
193
194
  outputTokens?: number;
194
- totalTokens?: number;
195
195
  };
196
196
  },
197
197
  opts: RunOpts,
198
198
  db: Awaited<ReturnType<typeof getDb>>,
199
199
  ) {
200
- // Only mark as complete - tokens are already tracked incrementally
201
- await db
202
- .update(messages)
203
- .set({
204
- status: 'complete',
205
- completedAt: Date.now(),
206
- })
200
+ const msgRow = await db
201
+ .select()
202
+ .from(messages)
207
203
  .where(eq(messages.id, opts.assistantMessageId));
204
+
205
+ if (msgRow.length > 0) {
206
+ await db
207
+ .update(messages)
208
+ .set({
209
+ finishedAt: new Date(),
210
+ })
211
+ .where(eq(messages.id, opts.assistantMessageId));
212
+ }
208
213
  }
209
214
 
210
- /**
211
- * Removes empty text parts from an assistant message.
212
- */
213
- export async function cleanupEmptyTextParts(
214
- opts: RunOpts,
215
+ export async function createMessagePart(
216
+ partData: {
217
+ messageId: number;
218
+ contentType: 'text' | 'tool' | 'other';
219
+ toolName?: string | null;
220
+ toolArgs?: unknown;
221
+ toolResult?: unknown;
222
+ textContent?: string | null;
223
+ stepIndex?: number | null;
224
+ },
215
225
  db: Awaited<ReturnType<typeof getDb>>,
216
226
  ) {
217
- const parts = await db
218
- .select()
219
- .from(messageParts)
220
- .where(eq(messageParts.messageId, opts.assistantMessageId));
221
-
222
- for (const p of parts) {
223
- if (p.type === 'text') {
224
- let t = '';
225
- try {
226
- t = JSON.parse(p.content || '{}')?.text || '';
227
- } catch {}
228
- if (!t || t.length === 0) {
229
- await db.delete(messageParts).where(eq(messageParts.id, p.id));
230
- }
231
- }
232
- }
227
+ await db.insert(messageParts).values(partData);
233
228
  }
@@ -0,0 +1,26 @@
1
+ import type { ModelMessage } from 'ai';
2
+
3
+ /**
4
+ * Truncates conversation history to keep only the most recent messages.
5
+ * This helps manage context window size and improves performance.
6
+ *
7
+ * Strategy:
8
+ * - Keep only the last N messages
9
+ * - Preserve message pairs (assistant + user responses) when possible
10
+ * - Always keep at least the system message if present
11
+ */
12
+ export function truncateHistory(
13
+ messages: ModelMessage[],
14
+ maxMessages: number,
15
+ ): ModelMessage[] {
16
+ if (messages.length <= maxMessages) {
17
+ return messages;
18
+ }
19
+
20
+ // Calculate how many messages to keep
21
+ const keepCount = Math.min(maxMessages, messages.length);
22
+ const startIndex = messages.length - keepCount;
23
+
24
+ // Return the most recent messages
25
+ return messages.slice(startIndex);
26
+ }