@agi-cli/server 0.1.161 → 0.1.163

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agi-cli/server",
3
- "version": "0.1.161",
3
+ "version": "0.1.163",
4
4
  "description": "HTTP API server for AGI CLI",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -29,8 +29,8 @@
29
29
  "typecheck": "tsc --noEmit"
30
30
  },
31
31
  "dependencies": {
32
- "@agi-cli/sdk": "0.1.161",
33
- "@agi-cli/database": "0.1.161",
32
+ "@agi-cli/sdk": "0.1.163",
33
+ "@agi-cli/database": "0.1.163",
34
34
  "drizzle-orm": "^0.44.5",
35
35
  "hono": "^4.9.9",
36
36
  "zod": "^4.1.8"
@@ -200,4 +200,28 @@ export function registerTerminalsRoutes(
200
200
  return c.json({ error: message }, 500);
201
201
  }
202
202
  });
203
+
204
+ app.post('/v1/terminals/:id/resize', async (c) => {
205
+ const id = c.req.param('id');
206
+ const terminal = terminalManager.get(id);
207
+
208
+ if (!terminal) {
209
+ return c.json({ error: 'Terminal not found' }, 404);
210
+ }
211
+
212
+ try {
213
+ const body = await c.req.json();
214
+ const { cols, rows } = body;
215
+
216
+ if (!cols || !rows || cols < 1 || rows < 1) {
217
+ return c.json({ error: 'valid cols and rows are required' }, 400);
218
+ }
219
+
220
+ terminal.resize(cols, rows);
221
+ return c.json({ success: true });
222
+ } catch (error) {
223
+ const message = error instanceof Error ? error.message : String(error);
224
+ return c.json({ error: message }, 500);
225
+ }
226
+ });
203
227
  }
@@ -237,6 +237,7 @@ export async function setupRunner(opts: RunOpts): Promise<SetupResult> {
237
237
  }
238
238
  } else if (underlyingProvider === 'openai') {
239
239
  providerOptions.openai = {
240
+ reasoningEffort: 'high',
240
241
  reasoningSummary: 'auto',
241
242
  };
242
243
  } else if (underlyingProvider === 'google') {
@@ -1,60 +1,32 @@
1
+ import { catalog, getModelInfo } from '@agi-cli/sdk';
2
+ import type { ProviderId } from '@agi-cli/sdk';
3
+
1
4
  export const PRUNE_PROTECT = 40_000;
2
5
 
3
6
  export function estimateTokens(text: string): number {
4
7
  return Math.max(0, Math.round((text || '').length / 4));
5
8
  }
6
9
 
7
- export interface TokenUsage {
8
- input: number;
9
- output: number;
10
- cacheRead?: number;
11
- cacheWrite?: number;
12
- reasoningText?: number;
13
- }
14
-
15
10
  export interface ModelLimits {
16
11
  context: number;
17
12
  output: number;
18
13
  }
19
14
 
20
- export function isOverflow(
21
- tokens: LanguageModelUsage,
22
- limits: ModelLimits,
23
- ): boolean {
24
- if (limits.context === 0) return false;
25
-
26
- const count =
27
- tokens.input +
28
- (tokens.cacheRead ?? 0) +
29
- (tokens.cacheWrite ?? 0) +
30
- tokens.output;
31
- const usableContext = limits.context - limits.output;
32
-
33
- return count > usableContext;
34
- }
35
-
36
15
  export function getModelLimits(
37
- _provider: string,
16
+ provider: string,
38
17
  model: string,
39
18
  ): ModelLimits | null {
40
- const defaults: Record<string, ModelLimits> = {
41
- 'claude-sonnet-4-20250514': { context: 200000, output: 16000 },
42
- 'claude-3-5-sonnet-20241022': { context: 200000, output: 8192 },
43
- 'claude-3-5-haiku-20241022': { context: 200000, output: 8192 },
44
- 'gpt-4o': { context: 128000, output: 16384 },
45
- 'gpt-4o-mini': { context: 128000, output: 16384 },
46
- o1: { context: 200000, output: 100000 },
47
- 'o3-mini': { context: 200000, output: 100000 },
48
- 'gemini-2.0-flash': { context: 1000000, output: 8192 },
49
- 'gemini-1.5-pro': { context: 2000000, output: 8192 },
50
- };
51
-
52
- if (defaults[model]) return defaults[model];
53
-
54
- for (const [key, limits] of Object.entries(defaults)) {
55
- if (model.includes(key) || key.includes(model)) return limits;
19
+ const info = getModelInfo(provider as ProviderId, model);
20
+ if (info?.limit?.context && info?.limit?.output) {
21
+ return { context: info.limit.context, output: info.limit.output };
22
+ }
23
+ for (const key of Object.keys(catalog) as ProviderId[]) {
24
+ const entry = catalog[key];
25
+ const m = entry?.models?.find((x) => x.id === model);
26
+ if (m?.limit?.context && m?.limit?.output) {
27
+ return { context: m.limit.context, output: m.limit.output };
28
+ }
56
29
  }
57
-
58
30
  return null;
59
31
  }
60
32
 
@@ -1,9 +1,7 @@
1
1
  export {
2
2
  PRUNE_PROTECT,
3
3
  estimateTokens,
4
- type TokenUsage,
5
4
  type ModelLimits,
6
- isOverflow,
7
5
  getModelLimits,
8
6
  isCompacted,
9
7
  COMPACTED_PLACEHOLDER,
@@ -168,6 +168,7 @@ export async function createBranch({
168
168
  totalReasoningTokens: null,
169
169
  totalToolTimeMs: null,
170
170
  toolCountsJson: null,
171
+ currentContextTokens: null,
171
172
  contextSummary: null,
172
173
  lastCompactedAt: null,
173
174
  };
@@ -51,13 +51,10 @@ export function normalizeUsage(
51
51
  : undefined;
52
52
 
53
53
  const cachedValue = cachedInputTokens ?? 0;
54
- const cacheCreationValue = cacheCreationInputTokens ?? 0;
55
54
 
56
55
  let inputTokens = rawInputTokens;
57
56
  if (provider === 'openai') {
58
57
  inputTokens = Math.max(0, rawInputTokens - cachedValue);
59
- } else if (provider === 'anthropic') {
60
- inputTokens = Math.max(0, rawInputTokens - cacheCreationValue);
61
58
  }
62
59
 
63
60
  return {
@@ -94,8 +91,10 @@ export function resolveUsageProvider(
94
91
  }
95
92
 
96
93
  /**
97
- * Updates session token counts incrementally after each step.
98
- * Note: onStepFinish.usage is CUMULATIVE per message, so we compute DELTA and add to session.
94
+ * Updates session token counts after each step.
95
+ * AI SDK v6: onStepFinish.usage is PER-STEP (each step = one API call).
96
+ * We ADD each step's tokens directly to session totals.
97
+ * We also track currentContextTokens = the latest step's full input context.
99
98
  */
100
99
  export async function updateSessionTokensIncremental(
101
100
  usage: UsageData,
@@ -105,10 +104,19 @@ export async function updateSessionTokensIncremental(
105
104
  ) {
106
105
  if (!usage || !db) return;
107
106
 
107
+ const currentContextTokens = Number(usage.inputTokens ?? 0);
108
+
108
109
  const usageProvider = resolveUsageProvider(opts.provider, opts.model);
109
110
  const normalizedUsage = normalizeUsage(usage, providerOptions, usageProvider);
110
111
 
111
- // Read session totals
112
+ const stepInput = Number(normalizedUsage.inputTokens ?? 0);
113
+ const stepOutput = Number(normalizedUsage.outputTokens ?? 0);
114
+ const stepCached = Number(normalizedUsage.cachedInputTokens ?? 0);
115
+ const stepCacheCreation = Number(
116
+ normalizedUsage.cacheCreationInputTokens ?? 0,
117
+ );
118
+ const stepReasoning = Number(normalizedUsage.reasoningTokens ?? 0);
119
+
112
120
  const sessRows = await db
113
121
  .select()
114
122
  .from(sessions)
@@ -117,73 +125,18 @@ export async function updateSessionTokensIncremental(
117
125
  if (sessRows.length === 0 || !sessRows[0]) return;
118
126
 
119
127
  const sess = sessRows[0];
120
- const priorInputSess = Number(sess.totalInputTokens ?? 0);
121
- const priorOutputSess = Number(sess.totalOutputTokens ?? 0);
122
- const priorCachedSess = Number(sess.totalCachedTokens ?? 0);
123
- const priorCacheCreationSess = Number(sess.totalCacheCreationTokens ?? 0);
124
- const priorReasoningSess = Number(sess.totalReasoningTokens ?? 0);
125
-
126
- // Read current message totals to compute delta
127
- const msgRows = await db
128
- .select()
129
- .from(messages)
130
- .where(eq(messages.id, opts.assistantMessageId));
131
-
132
- const msg = msgRows[0];
133
- const priorPromptMsg = Number(msg?.inputTokens ?? 0);
134
- const priorCompletionMsg = Number(msg?.outputTokens ?? 0);
135
- const priorCachedMsg = Number(msg?.cachedInputTokens ?? 0);
136
- const priorCacheCreationMsg = Number(msg?.cacheCreationInputTokens ?? 0);
137
- const priorReasoningMsg = Number(msg?.reasoningTokens ?? 0);
138
-
139
- // Treat usage as cumulative per-message for this step
140
- const cumPrompt =
141
- normalizedUsage.inputTokens != null
142
- ? Number(normalizedUsage.inputTokens)
143
- : priorPromptMsg;
144
- const cumCompletion =
145
- normalizedUsage.outputTokens != null
146
- ? Number(normalizedUsage.outputTokens)
147
- : priorCompletionMsg;
148
- const cumReasoning =
149
- normalizedUsage.reasoningTokens != null
150
- ? Number(normalizedUsage.reasoningTokens)
151
- : priorReasoningMsg;
152
-
153
- const cumCached =
154
- normalizedUsage.cachedInputTokens != null
155
- ? Number(normalizedUsage.cachedInputTokens)
156
- : priorCachedMsg;
157
-
158
- const cumCacheCreation =
159
- normalizedUsage.cacheCreationInputTokens != null
160
- ? Number(normalizedUsage.cacheCreationInputTokens)
161
- : priorCacheCreationMsg;
162
-
163
- // Compute deltas for this step; clamp to 0 in case provider reports smaller values
164
- const deltaInput = Math.max(0, cumPrompt - priorPromptMsg);
165
- const deltaOutput = Math.max(0, cumCompletion - priorCompletionMsg);
166
- const deltaCached = Math.max(0, cumCached - priorCachedMsg);
167
- const deltaCacheCreation = Math.max(
168
- 0,
169
- cumCacheCreation - priorCacheCreationMsg,
170
- );
171
- const deltaReasoning = Math.max(0, cumReasoning - priorReasoningMsg);
172
-
173
- const nextInputSess = priorInputSess + deltaInput;
174
- const nextOutputSess = priorOutputSess + deltaOutput;
175
- const nextCachedSess = priorCachedSess + deltaCached;
176
- const nextCacheCreationSess = priorCacheCreationSess + deltaCacheCreation;
177
- const nextReasoningSess = priorReasoningSess + deltaReasoning;
178
128
 
179
129
  await db
180
130
  .update(sessions)
181
131
  .set({
182
- totalInputTokens: nextInputSess,
183
- totalOutputTokens: nextOutputSess,
184
- totalCachedTokens: nextCachedSess,
185
- totalCacheCreationTokens: nextCacheCreationSess,
186
- totalReasoningTokens: nextReasoningSess,
132
+ totalInputTokens: Number(sess.totalInputTokens ?? 0) + stepInput,
133
+ totalOutputTokens: Number(sess.totalOutputTokens ?? 0) + stepOutput,
134
+ totalCachedTokens: Number(sess.totalCachedTokens ?? 0) + stepCached,
135
+ totalCacheCreationTokens:
136
+ Number(sess.totalCacheCreationTokens ?? 0) + stepCacheCreation,
137
+ totalReasoningTokens:
138
+ Number(sess.totalReasoningTokens ?? 0) + stepReasoning,
139
+ currentContextTokens,
187
140
  })
188
141
  .where(eq(sessions.id, opts.sessionId));
189
142
  }
@@ -222,8 +175,8 @@ export async function updateSessionTokens(
222
175
  }
223
176
 
224
177
  /**
225
- * Updates message token counts incrementally after each step.
226
- * Note: onStepFinish.usage is CUMULATIVE per message, so we REPLACE values, not add.
178
+ * Updates message token counts after each step.
179
+ * AI SDK v6: onStepFinish.usage is PER-STEP. We ADD each step's tokens to message totals.
227
180
  */
228
181
  export async function updateMessageTokensIncremental(
229
182
  usage: UsageData,
@@ -236,6 +189,14 @@ export async function updateMessageTokensIncremental(
236
189
  const usageProvider = resolveUsageProvider(opts.provider, opts.model);
237
190
  const normalizedUsage = normalizeUsage(usage, providerOptions, usageProvider);
238
191
 
192
+ const stepInput = Number(normalizedUsage.inputTokens ?? 0);
193
+ const stepOutput = Number(normalizedUsage.outputTokens ?? 0);
194
+ const stepCached = Number(normalizedUsage.cachedInputTokens ?? 0);
195
+ const stepCacheCreation = Number(
196
+ normalizedUsage.cacheCreationInputTokens ?? 0,
197
+ );
198
+ const stepReasoning = Number(normalizedUsage.reasoningTokens ?? 0);
199
+
239
200
  const msgRows = await db
240
201
  .select()
241
202
  .from(messages)
@@ -243,48 +204,27 @@ export async function updateMessageTokensIncremental(
243
204
 
244
205
  if (msgRows.length > 0 && msgRows[0]) {
245
206
  const msg = msgRows[0];
246
- const priorPrompt = Number(msg.inputTokens ?? 0);
247
- const priorCompletion = Number(msg.outputTokens ?? 0);
248
- const priorCached = Number(msg.cachedInputTokens ?? 0);
249
- const priorCacheCreation = Number(msg.cacheCreationInputTokens ?? 0);
250
- const priorReasoning = Number(msg.reasoningTokens ?? 0);
251
-
252
- // Treat usage as cumulative per-message - REPLACE not ADD
253
- const cumPrompt =
254
- normalizedUsage.inputTokens != null
255
- ? Number(normalizedUsage.inputTokens)
256
- : priorPrompt;
257
- const cumCompletion =
258
- normalizedUsage.outputTokens != null
259
- ? Number(normalizedUsage.outputTokens)
260
- : priorCompletion;
261
- const cumReasoning =
262
- normalizedUsage.reasoningTokens != null
263
- ? Number(normalizedUsage.reasoningTokens)
264
- : priorReasoning;
265
-
266
- const cumCached =
267
- normalizedUsage.cachedInputTokens != null
268
- ? Number(normalizedUsage.cachedInputTokens)
269
- : priorCached;
270
-
271
- const cumCacheCreation =
272
- normalizedUsage.cacheCreationInputTokens != null
273
- ? Number(normalizedUsage.cacheCreationInputTokens)
274
- : priorCacheCreation;
275
-
276
- const cumTotal =
277
- cumPrompt + cumCompletion + cumCached + cumCacheCreation + cumReasoning;
207
+ const nextInput = Number(msg.inputTokens ?? 0) + stepInput;
208
+ const nextOutput = Number(msg.outputTokens ?? 0) + stepOutput;
209
+ const nextCached = Number(msg.cachedInputTokens ?? 0) + stepCached;
210
+ const nextCacheCreation =
211
+ Number(msg.cacheCreationInputTokens ?? 0) + stepCacheCreation;
212
+ const nextReasoning = Number(msg.reasoningTokens ?? 0) + stepReasoning;
278
213
 
279
214
  await db
280
215
  .update(messages)
281
216
  .set({
282
- inputTokens: cumPrompt,
283
- outputTokens: cumCompletion,
284
- totalTokens: cumTotal,
285
- cachedInputTokens: cumCached,
286
- cacheCreationInputTokens: cumCacheCreation,
287
- reasoningTokens: cumReasoning,
217
+ inputTokens: nextInput,
218
+ outputTokens: nextOutput,
219
+ totalTokens:
220
+ nextInput +
221
+ nextOutput +
222
+ nextCached +
223
+ nextCacheCreation +
224
+ nextReasoning,
225
+ cachedInputTokens: nextCached,
226
+ cacheCreationInputTokens: nextCacheCreation,
227
+ reasoningTokens: nextReasoning,
288
228
  })
289
229
  .where(eq(messages.id, opts.assistantMessageId));
290
230
  }
@@ -55,6 +55,7 @@ export async function createSession({
55
55
  totalReasoningTokens: null,
56
56
  totalToolTimeMs: null,
57
57
  toolCountsJson: null,
58
+ currentContextTokens: null,
58
59
  };
59
60
  await db.insert(sessions).values(row);
60
61
  publish({ type: 'session.created', sessionId: id, payload: row });
@@ -4,12 +4,7 @@ import { eq } from 'drizzle-orm';
4
4
  import { publish } from '../../events/bus.ts';
5
5
  import { estimateModelCostUsd } from '@agi-cli/sdk';
6
6
  import type { RunOpts } from '../session/queue.ts';
7
- import {
8
- pruneSession,
9
- isOverflow,
10
- getModelLimits,
11
- markSessionCompacted,
12
- } from '../message/compaction.ts';
7
+ import { markSessionCompacted } from '../message/compaction.ts';
13
8
  import { debugLog } from '../debug/index.ts';
14
9
  import type { FinishEvent } from './types.ts';
15
10
  import {
@@ -92,38 +87,6 @@ export function createFinishHandler(
92
87
  ? estimateModelCostUsd(opts.provider, opts.model, usage)
93
88
  : undefined;
94
89
 
95
- if (usage) {
96
- try {
97
- const limits = getModelLimits(opts.provider, opts.model);
98
- if (limits) {
99
- const tokenUsage: LanguageModelUsage = {
100
- input: usage.inputTokens ?? 0,
101
- output: usage.outputTokens ?? 0,
102
- cacheRead:
103
- (usage as { cachedInputTokens?: number }).cachedInputTokens ?? 0,
104
- cacheWrite:
105
- (usage as { cacheCreationInputTokens?: number })
106
- .cacheCreationInputTokens ?? 0,
107
- };
108
-
109
- if (isOverflow(tokenUsage, limits)) {
110
- debugLog(
111
- `[stream-handlers] Context overflow detected, triggering prune for session ${opts.sessionId}`,
112
- );
113
- pruneSession(db, opts.sessionId).catch((err) => {
114
- debugLog(
115
- `[stream-handlers] Prune failed: ${err instanceof Error ? err.message : String(err)}`,
116
- );
117
- });
118
- }
119
- }
120
- } catch (err) {
121
- debugLog(
122
- `[stream-handlers] Overflow check failed: ${err instanceof Error ? err.message : String(err)}`,
123
- );
124
- }
125
- }
126
-
127
90
  publish({
128
91
  type: 'message.completed',
129
92
  sessionId: opts.sessionId,
@@ -1,134 +0,0 @@
1
- import type { ModelMessage } from 'ai';
2
-
3
- /**
4
- * Adds cache control to messages for prompt caching optimization.
5
- * Anthropic supports caching for system messages, tools, and long context.
6
- */
7
- type CachedSystemValue =
8
- | string
9
- | undefined
10
- | Array<{
11
- type: 'text';
12
- text: string;
13
- cache_control?: { type: 'ephemeral' };
14
- }>;
15
-
16
- type TextContentPartWithProviderOptions = {
17
- providerOptions?: {
18
- anthropic?: { cacheControl?: { type: 'ephemeral' } };
19
- [key: string]: unknown;
20
- };
21
- [key: string]: unknown;
22
- };
23
-
24
- export function addCacheControl(
25
- provider: string,
26
- system: string | undefined,
27
- messages: ModelMessage[],
28
- ): {
29
- system?: CachedSystemValue;
30
- messages: ModelMessage[];
31
- } {
32
- // Only Anthropic supports prompt caching currently
33
- if (provider !== 'anthropic') {
34
- return { system, messages };
35
- }
36
-
37
- // Convert system to cacheable format if it's long enough
38
- let cachedSystem: CachedSystemValue = system;
39
- if (system && system.length > 1024) {
40
- // Anthropic requires 1024+ tokens for Claude Sonnet/Opus
41
- cachedSystem = [
42
- {
43
- type: 'text',
44
- text: system,
45
- cache_control: { type: 'ephemeral' as const },
46
- },
47
- ];
48
- }
49
-
50
- // Anthropic cache_control limits:
51
- // - Max 4 cache blocks total
52
- // - System message: 1 block
53
- // - Tools: 2 blocks (read, write)
54
- // - Last user message: 1 block
55
- // Total: 4 blocks
56
-
57
- // Add cache control to the last user message if conversation is long
58
- // This caches the conversation history up to that point
59
- if (messages.length >= 3) {
60
- const cachedMessages = [...messages];
61
-
62
- // Find second-to-last user message (not the current one)
63
- const userIndices = cachedMessages
64
- .map((m, i) => (m.role === 'user' ? i : -1))
65
- .filter((i) => i >= 0);
66
-
67
- if (userIndices.length >= 2) {
68
- const targetIndex = userIndices[userIndices.length - 2];
69
- const targetMsg = cachedMessages[targetIndex];
70
-
71
- if (Array.isArray(targetMsg.content)) {
72
- // Add cache control to the last content part of that message
73
- const lastPart = targetMsg.content[targetMsg.content.length - 1];
74
- if (
75
- lastPart &&
76
- typeof lastPart === 'object' &&
77
- 'type' in lastPart &&
78
- lastPart.type === 'text'
79
- ) {
80
- const textPart =
81
- lastPart as unknown as TextContentPartWithProviderOptions;
82
- textPart.providerOptions = {
83
- ...textPart.providerOptions,
84
- anthropic: { cacheControl: { type: 'ephemeral' } },
85
- };
86
- }
87
- }
88
- }
89
-
90
- return { system: cachedSystem, messages: cachedMessages };
91
- }
92
-
93
- return { system: cachedSystem, messages };
94
- }
95
-
96
- /**
97
- * Truncates old messages to reduce context size while keeping recent context.
98
- * Strategy: Keep system message + last N messages
99
- */
100
- export function truncateHistory(
101
- messages: ModelMessage[],
102
- maxMessages = 20,
103
- ): ModelMessage[] {
104
- if (messages.length <= maxMessages) {
105
- return messages;
106
- }
107
-
108
- // Keep the most recent messages
109
- return messages.slice(-maxMessages);
110
- }
111
-
112
- /**
113
- * Estimates token count (rough approximation: ~4 chars per token)
114
- */
115
- export function estimateTokens(text: string): number {
116
- return Math.ceil(text.length / 4);
117
- }
118
-
119
- /**
120
- * Summarizes tool results if they're too long
121
- */
122
- export function summarizeToolResult(result: unknown, maxLength = 5000): string {
123
- const str = typeof result === 'string' ? result : JSON.stringify(result);
124
-
125
- if (str.length <= maxLength) {
126
- return str;
127
- }
128
-
129
- // Truncate and add indicator
130
- return (
131
- str.slice(0, maxLength) +
132
- `\n\n[... truncated ${str.length - maxLength} characters]`
133
- );
134
- }
@@ -1,206 +0,0 @@
1
- import type { ModelMessage } from 'ai';
2
-
3
- /**
4
- * Optimizes message context by deduplicating file reads and pruning old tool results.
5
- */
6
-
7
- interface FileRead {
8
- messageIndex: number;
9
- partIndex: number;
10
- path: string;
11
- }
12
-
13
- interface ToolPart {
14
- type: string;
15
- input?: {
16
- path?: string;
17
- filePattern?: string;
18
- pattern?: string;
19
- };
20
- output?: unknown;
21
- [key: string]: unknown;
22
- }
23
-
24
- /**
25
- * Deduplicates file read results, keeping only the latest version of each file.
26
- *
27
- * Strategy:
28
- * - Track all file reads (read, grep, glob tools)
29
- * - For files read multiple times, remove older results
30
- * - Keep only the most recent read of each file
31
- */
32
- export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
33
- const fileReads = new Map<string, FileRead[]>();
34
-
35
- // First pass: identify all file reads and their locations
36
- messages.forEach((msg, msgIdx) => {
37
- if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return;
38
-
39
- msg.content.forEach((part, partIdx) => {
40
- if (!part || typeof part !== 'object') return;
41
- if (!('type' in part)) return;
42
-
43
- const toolType = part.type as string;
44
-
45
- // Check if this is a file read tool (read, grep, glob)
46
- if (!toolType.startsWith('tool-')) return;
47
-
48
- const toolName = toolType.replace('tool-', '');
49
- if (!['read', 'grep', 'glob'].includes(toolName)) return;
50
-
51
- // Extract file path from input
52
- const toolPart = part as ToolPart;
53
- const input = toolPart.input;
54
- if (!input) return;
55
-
56
- const path = input.path || input.filePattern || input.pattern;
57
- if (!path) return;
58
-
59
- // Track this file read
60
- if (!fileReads.has(path)) {
61
- fileReads.set(path, []);
62
- }
63
- fileReads
64
- .get(path)
65
- ?.push({ messageIndex: msgIdx, partIndex: partIdx, path });
66
- });
67
- });
68
-
69
- // Second pass: identify reads to remove (all but the latest for each file)
70
- const readsToRemove = new Set<string>();
71
-
72
- for (const [_path, reads] of fileReads) {
73
- if (reads.length <= 1) continue;
74
-
75
- // Sort by message index descending (latest first)
76
- reads.sort((a, b) => b.messageIndex - a.messageIndex);
77
-
78
- // Remove all but the first (latest)
79
- for (let i = 1; i < reads.length; i++) {
80
- const read = reads[i];
81
- readsToRemove.add(`${read.messageIndex}-${read.partIndex}`);
82
- }
83
- }
84
-
85
- // Third pass: rebuild messages without removed reads
86
- return messages.map((msg, msgIdx) => {
87
- if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return msg;
88
-
89
- const filteredContent = msg.content.filter((_part, partIdx) => {
90
- const key = `${msgIdx}-${partIdx}`;
91
- return !readsToRemove.has(key);
92
- });
93
-
94
- return {
95
- ...msg,
96
- content: filteredContent,
97
- };
98
- });
99
- }
100
-
101
- /**
102
- * Prunes old tool results to reduce context size.
103
- *
104
- * Strategy:
105
- * - Keep only the last N tool results
106
- * - Preserve tool calls but remove their output
107
- * - Keep text parts intact
108
- */
109
- export function pruneToolResults(
110
- messages: ModelMessage[],
111
- maxToolResults = 30,
112
- ): ModelMessage[] {
113
- // Collect all tool result locations
114
- const toolResults: Array<{ messageIndex: number; partIndex: number }> = [];
115
-
116
- messages.forEach((msg, msgIdx) => {
117
- if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return;
118
-
119
- msg.content.forEach((part, partIdx) => {
120
- if (!part || typeof part !== 'object') return;
121
- if (!('type' in part)) return;
122
-
123
- const toolType = part.type as string;
124
- if (!toolType.startsWith('tool-')) return;
125
-
126
- // Check if this has output
127
- const toolPart = part as ToolPart;
128
- const hasOutput = toolPart.output !== undefined;
129
- if (!hasOutput) return;
130
-
131
- toolResults.push({ messageIndex: msgIdx, partIndex: partIdx });
132
- });
133
- });
134
-
135
- // If under limit, no pruning needed
136
- if (toolResults.length <= maxToolResults) {
137
- return messages;
138
- }
139
-
140
- // Keep only the last N tool results
141
- const toKeep = new Set<string>();
142
- const keepCount = Math.min(maxToolResults, toolResults.length);
143
- const keepStart = toolResults.length - keepCount;
144
-
145
- for (let i = keepStart; i < toolResults.length; i++) {
146
- const result = toolResults[i];
147
- toKeep.add(`${result.messageIndex}-${result.partIndex}`);
148
- }
149
-
150
- // Rebuild messages, removing old tool outputs
151
- return messages.map((msg, msgIdx) => {
152
- if (msg.role !== 'assistant' || !Array.isArray(msg.content)) return msg;
153
-
154
- const processedContent = msg.content.map((part, partIdx) => {
155
- if (!part || typeof part !== 'object') return part;
156
- if (!('type' in part)) return part;
157
-
158
- const toolPart = part as ToolPart;
159
- const toolType = toolPart.type;
160
- if (!toolType.startsWith('tool-')) return part;
161
-
162
- const key = `${msgIdx}-${partIdx}`;
163
- const hasOutput = toolPart.output !== undefined;
164
-
165
- // If this tool result should be pruned, remove its output
166
- if (hasOutput && !toKeep.has(key)) {
167
- return {
168
- ...part,
169
- output: '[pruned to save context]',
170
- };
171
- }
172
-
173
- return part;
174
- });
175
-
176
- return {
177
- ...msg,
178
- content: processedContent,
179
- };
180
- });
181
- }
182
-
183
- /**
184
- * Applies all context optimizations:
185
- * 1. Deduplicate file reads
186
- * 2. Prune old tool results
187
- */
188
- export function optimizeContext(
189
- messages: ModelMessage[],
190
- options: {
191
- deduplicateFiles?: boolean;
192
- maxToolResults?: number;
193
- } = {},
194
- ): ModelMessage[] {
195
- let optimized = messages;
196
-
197
- if (options.deduplicateFiles !== false) {
198
- optimized = deduplicateFileReads(optimized);
199
- }
200
-
201
- if (options.maxToolResults !== undefined) {
202
- optimized = pruneToolResults(optimized, options.maxToolResults);
203
- }
204
-
205
- return optimized;
206
- }
@@ -1,26 +0,0 @@
1
- import type { ModelMessage } from 'ai';
2
-
3
- /**
4
- * Truncates conversation history to keep only the most recent messages.
5
- * This helps manage context window size and improves performance.
6
- *
7
- * Strategy:
8
- * - Keep only the last N messages
9
- * - Preserve message pairs (assistant + user responses) when possible
10
- * - Always keep at least the system message if present
11
- */
12
- export function truncateHistory(
13
- messages: ModelMessage[],
14
- maxMessages: number,
15
- ): ModelMessage[] {
16
- if (messages.length <= maxMessages) {
17
- return messages;
18
- }
19
-
20
- // Calculate how many messages to keep
21
- const keepCount = Math.min(maxMessages, messages.length);
22
- const startIndex = messages.length - keepCount;
23
-
24
- // Return the most recent messages
25
- return messages.slice(startIndex);
26
- }