@agi-cli/server 0.1.105 → 0.1.107

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agi-cli/server",
3
- "version": "0.1.105",
3
+ "version": "0.1.107",
4
4
  "description": "HTTP API server for AGI CLI",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -29,8 +29,8 @@
29
29
  "typecheck": "tsc --noEmit"
30
30
  },
31
31
  "dependencies": {
32
- "@agi-cli/sdk": "0.1.105",
33
- "@agi-cli/database": "0.1.105",
32
+ "@agi-cli/sdk": "0.1.107",
33
+ "@agi-cli/database": "0.1.107",
34
34
  "drizzle-orm": "^0.44.5",
35
35
  "hono": "^4.9.9",
36
36
  "zod": "^4.1.8"
@@ -0,0 +1,254 @@
1
+ /**
2
+ * Context compaction module for managing token usage.
3
+ *
4
+ * This module implements OpenCode-style context management:
5
+ * 1. Detects when context is overflowing (tokens > context_limit - output_limit)
6
+ * 2. Prunes old tool outputs by marking them as "compacted"
7
+ * 3. History builder returns "[Old tool result content cleared]" for compacted parts
8
+ *
9
+ * Pruning strategy:
10
+ * - Protect the last PRUNE_PROTECT tokens worth of tool calls (40,000)
11
+ * - Only prune if we'd save at least PRUNE_MINIMUM tokens (20,000)
12
+ * - Skip the last 2 turns to preserve recent context
13
+ * - Never prune "skill" or other protected tools
14
+ */
15
+
16
+ import type { getDb } from '@agi-cli/database';
17
+ import { messages, messageParts } from '@agi-cli/database/schema';
18
+ import { eq, desc } from 'drizzle-orm';
19
+ import { debugLog } from './debug.ts';
20
+
21
+ // Token thresholds (matching OpenCode)
22
+ export const PRUNE_MINIMUM = 20_000; // Only prune if we'd save at least this many tokens
23
+ export const PRUNE_PROTECT = 40_000; // Protect last N tokens worth of tool calls
24
+
25
+ // Tools that should never be pruned
26
+ const PRUNE_PROTECTED_TOOLS = ['skill'];
27
+
28
+ // Simple token estimation: ~4 chars per token
29
+ export function estimateTokens(text: string): number {
30
+ return Math.max(0, Math.round((text || '').length / 4));
31
+ }
32
+
33
+ export interface TokenUsage {
34
+ input: number;
35
+ output: number;
36
+ cacheRead?: number;
37
+ cacheWrite?: number;
38
+ reasoning?: number;
39
+ }
40
+
41
+ export interface ModelLimits {
42
+ context: number;
43
+ output: number;
44
+ }
45
+
46
+ /**
47
+ * Check if context is overflowing based on token usage and model limits.
48
+ * Returns true if we've used more tokens than (context_limit - output_limit).
49
+ */
50
+ export function isOverflow(tokens: TokenUsage, limits: ModelLimits): boolean {
51
+ if (limits.context === 0) return false;
52
+
53
+ const count = tokens.input + (tokens.cacheRead ?? 0) + tokens.output;
54
+ const usableContext = limits.context - limits.output;
55
+
56
+ const overflow = count > usableContext;
57
+ if (overflow) {
58
+ debugLog(
59
+ `[compaction] Context overflow detected: ${count} tokens used, ${usableContext} usable (${limits.context} context - ${limits.output} output)`,
60
+ );
61
+ }
62
+
63
+ return overflow;
64
+ }
65
+
66
+ /**
67
+ * Prune old tool outputs from a session to reduce context size.
68
+ *
69
+ * Goes backwards through tool results, protecting the last PRUNE_PROTECT tokens.
70
+ * Marks older tool results as "compacted" so history builder returns placeholder text.
71
+ */
72
+ export async function pruneSession(
73
+ db: Awaited<ReturnType<typeof getDb>>,
74
+ sessionId: string,
75
+ ): Promise<{ pruned: number; saved: number }> {
76
+ debugLog(`[compaction] Starting prune for session ${sessionId}`);
77
+
78
+ // Get all messages in the session ordered by creation time
79
+ const allMessages = await db
80
+ .select()
81
+ .from(messages)
82
+ .where(eq(messages.sessionId, sessionId))
83
+ .orderBy(desc(messages.createdAt));
84
+
85
+ let totalTokens = 0;
86
+ let prunedTokens = 0;
87
+ const toPrune: Array<{ id: string; content: string }> = [];
88
+ let turns = 0;
89
+
90
+ // Go backwards through messages
91
+ for (const msg of allMessages) {
92
+ // Count user messages as turns
93
+ if (msg.role === 'user') {
94
+ turns++;
95
+ }
96
+
97
+ // Skip the last 2 turns to preserve recent context
98
+ if (turns < 2) continue;
99
+
100
+ // Get all parts for this message
101
+ const parts = await db
102
+ .select()
103
+ .from(messageParts)
104
+ .where(eq(messageParts.messageId, msg.id))
105
+ .orderBy(desc(messageParts.index));
106
+
107
+ for (const part of parts) {
108
+ // Only process tool results
109
+ if (part.type !== 'tool_result') continue;
110
+
111
+ // Skip protected tools
112
+ if (part.toolName && PRUNE_PROTECTED_TOOLS.includes(part.toolName)) {
113
+ continue;
114
+ }
115
+
116
+ // Parse content to check if already compacted
117
+ let content: { result?: unknown; compactedAt?: number };
118
+ try {
119
+ content = JSON.parse(part.content ?? '{}');
120
+ } catch {
121
+ continue;
122
+ }
123
+
124
+ // Stop if we hit already compacted content (we've pruned before)
125
+ if (content.compactedAt) {
126
+ debugLog(
127
+ `[compaction] Hit previously compacted content, stopping prune`,
128
+ );
129
+ break;
130
+ }
131
+
132
+ // Estimate tokens for this result
133
+ const estimate = estimateTokens(
134
+ typeof content.result === 'string'
135
+ ? content.result
136
+ : JSON.stringify(content.result ?? ''),
137
+ );
138
+ totalTokens += estimate;
139
+
140
+ // If we've exceeded the protection threshold, mark for pruning
141
+ if (totalTokens > PRUNE_PROTECT) {
142
+ prunedTokens += estimate;
143
+ toPrune.push({ id: part.id, content: part.content ?? '{}' });
144
+ }
145
+ }
146
+ }
147
+
148
+ debugLog(
149
+ `[compaction] Found ${toPrune.length} tool results to prune, saving ~${prunedTokens} tokens`,
150
+ );
151
+
152
+ // Only prune if we'd save enough tokens to be worthwhile
153
+ if (prunedTokens > PRUNE_MINIMUM) {
154
+ const compactedAt = Date.now();
155
+
156
+ for (const part of toPrune) {
157
+ try {
158
+ const content = JSON.parse(part.content);
159
+ // Keep the structure but mark as compacted
160
+ content.compactedAt = compactedAt;
161
+ // Keep a small summary if it was a string result
162
+ if (typeof content.result === 'string' && content.result.length > 100) {
163
+ content.resultSummary = `${content.result.slice(0, 100)}...`;
164
+ }
165
+ // Clear the actual result to save space
166
+ content.result = null;
167
+
168
+ await db
169
+ .update(messageParts)
170
+ .set({ content: JSON.stringify(content) })
171
+ .where(eq(messageParts.id, part.id));
172
+ } catch (err) {
173
+ debugLog(
174
+ `[compaction] Failed to prune part ${part.id}: ${err instanceof Error ? err.message : String(err)}`,
175
+ );
176
+ }
177
+ }
178
+
179
+ debugLog(
180
+ `[compaction] Pruned ${toPrune.length} tool results, saved ~${prunedTokens} tokens`,
181
+ );
182
+ } else {
183
+ debugLog(
184
+ `[compaction] Skipping prune, would only save ${prunedTokens} tokens (min: ${PRUNE_MINIMUM})`,
185
+ );
186
+ }
187
+
188
+ return { pruned: toPrune.length, saved: prunedTokens };
189
+ }
190
+
191
+ /**
192
+ * Get model limits from provider catalog or use defaults.
193
+ */
194
+ export function getModelLimits(
195
+ provider: string,
196
+ model: string,
197
+ ): ModelLimits | null {
198
+ // Default limits for common models
199
+ // These should ideally come from the provider catalog
200
+ const defaults: Record<string, ModelLimits> = {
201
+ // Anthropic
202
+ 'claude-sonnet-4-20250514': { context: 200000, output: 16000 },
203
+ 'claude-3-5-sonnet-20241022': { context: 200000, output: 8192 },
204
+ 'claude-3-5-haiku-20241022': { context: 200000, output: 8192 },
205
+ 'claude-3-opus-20240229': { context: 200000, output: 4096 },
206
+ // OpenAI
207
+ 'gpt-4o': { context: 128000, output: 16384 },
208
+ 'gpt-4o-mini': { context: 128000, output: 16384 },
209
+ 'gpt-4-turbo': { context: 128000, output: 4096 },
210
+ o1: { context: 200000, output: 100000 },
211
+ 'o1-mini': { context: 128000, output: 65536 },
212
+ 'o1-pro': { context: 200000, output: 100000 },
213
+ 'o3-mini': { context: 200000, output: 100000 },
214
+ // Google
215
+ 'gemini-2.0-flash': { context: 1000000, output: 8192 },
216
+ 'gemini-1.5-pro': { context: 2000000, output: 8192 },
217
+ 'gemini-1.5-flash': { context: 1000000, output: 8192 },
218
+ };
219
+
220
+ // Try exact match first
221
+ if (defaults[model]) {
222
+ return defaults[model];
223
+ }
224
+
225
+ // Try partial match (e.g., "claude-3-5-sonnet" matches "claude-3-5-sonnet-20241022")
226
+ for (const [key, limits] of Object.entries(defaults)) {
227
+ if (model.includes(key) || key.includes(model)) {
228
+ return limits;
229
+ }
230
+ }
231
+
232
+ // Return null if no match - caller should handle
233
+ debugLog(
234
+ `[compaction] No model limits found for ${provider}/${model}, skipping overflow check`,
235
+ );
236
+ return null;
237
+ }
238
+
239
+ /**
240
+ * Check if a tool result content is compacted.
241
+ */
242
+ export function isCompacted(content: string): boolean {
243
+ try {
244
+ const parsed = JSON.parse(content);
245
+ return !!parsed.compactedAt;
246
+ } catch {
247
+ return false;
248
+ }
249
+ }
250
+
251
+ /**
252
+ * Get the placeholder text for compacted tool results.
253
+ */
254
+ export const COMPACTED_PLACEHOLDER = '[Old tool result content cleared]';
@@ -76,15 +76,14 @@ export async function updateSessionTokensIncremental(
76
76
  : priorCachedMsg;
77
77
 
78
78
  // Compute deltas for this step; clamp to 0 in case provider reports smaller values
79
- // Cached tokens reduce the billable input, so we subtract them from the delta
80
79
  const deltaInput = Math.max(0, cumPrompt - priorPromptMsg);
81
80
  const deltaOutput = Math.max(0, cumCompletion - priorCompletionMsg);
82
81
  const deltaCached = Math.max(0, cumCached - priorCachedMsg);
83
82
  const deltaReasoning = Math.max(0, cumReasoning - priorReasoningMsg);
84
83
 
85
- // Session input should only count non-cached tokens
86
- // Total cached tokens are tracked separately for reference
87
- const nextInputSess = priorInputSess + deltaInput - deltaCached;
84
+ // Note: AI SDK's inputTokens already excludes cached tokens for Anthropic,
85
+ // so we don't need to subtract deltaCached here. Just accumulate directly.
86
+ const nextInputSess = priorInputSess + deltaInput;
88
87
  const nextOutputSess = priorOutputSess + deltaOutput;
89
88
  const nextCachedSess = priorCachedSess + deltaCached;
90
89
  const nextReasoningSess = priorReasoningSess + deltaReasoning;
@@ -4,6 +4,7 @@ import { messages, messageParts } from '@agi-cli/database/schema';
4
4
  import { eq, asc } from 'drizzle-orm';
5
5
  import { debugLog } from './debug.ts';
6
6
  import { ToolHistoryTracker } from './history/tool-history-tracker.ts';
7
+ import { COMPACTED_PLACEHOLDER } from './compaction.ts';
7
8
 
8
9
  /**
9
10
  * Builds the conversation history for a session from the database,
@@ -93,12 +94,17 @@ export async function buildHistoryMessages(
93
94
  name?: string;
94
95
  callId?: string;
95
96
  result?: unknown;
97
+ compactedAt?: number;
96
98
  };
97
99
  if (obj.callId) {
100
+ // If this tool result was compacted, return placeholder instead
101
+ const result = obj.compactedAt
102
+ ? COMPACTED_PLACEHOLDER
103
+ : obj.result;
98
104
  toolResults.push({
99
105
  name: obj.name ?? 'tool',
100
106
  callId: obj.callId,
101
- result: obj.result,
107
+ result,
102
108
  });
103
109
  }
104
110
  } catch {}
@@ -7,9 +7,13 @@ import {
7
7
  setAuth,
8
8
  } from '@agi-cli/sdk';
9
9
  import { openai, createOpenAI } from '@ai-sdk/openai';
10
- import { anthropic, createAnthropic } from '@ai-sdk/anthropic';
10
+ import { createAnthropic } from '@ai-sdk/anthropic';
11
11
  import { google, createGoogleGenerativeAI } from '@ai-sdk/google';
12
12
  import { createOpenRouter } from '@openrouter/ai-sdk-provider';
13
+ import { toClaudeCodeName } from './tool-mapping.ts';
14
+
15
+ // Version to report in user-agent for Claude Code compatibility
16
+ const CLAUDE_CLI_VERSION = '1.0.61';
13
17
  import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
14
18
 
15
19
  export type ProviderName = ProviderId;
@@ -132,12 +136,181 @@ async function getAnthropicInstance(cfg: AGIConfig) {
132
136
  }
133
137
  }
134
138
 
139
+ // Required Claude Code headers
135
140
  headers.authorization = `Bearer ${currentAuth.access}`;
136
141
  headers['anthropic-beta'] =
137
- 'oauth-2025-04-20,claude-code-20250219,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14';
142
+ 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14';
143
+ headers['anthropic-dangerous-direct-browser-access'] = 'true';
144
+ headers['anthropic-version'] = '2023-06-01';
145
+ headers['user-agent'] =
146
+ `claude-cli/${CLAUDE_CLI_VERSION} (external, cli)`;
147
+ headers['x-app'] = 'cli';
148
+ headers['content-type'] = 'application/json';
149
+ headers.accept = 'application/json';
150
+
151
+ // Stainless headers (fingerprinting)
152
+ headers['x-stainless-arch'] = process.arch === 'arm64' ? 'arm64' : 'x64';
153
+ headers['x-stainless-helper-method'] = 'stream';
154
+ headers['x-stainless-lang'] = 'js';
155
+ headers['x-stainless-os'] =
156
+ process.platform === 'darwin'
157
+ ? 'MacOS'
158
+ : process.platform === 'win32'
159
+ ? 'Windows'
160
+ : 'Linux';
161
+ headers['x-stainless-package-version'] = '0.70.0';
162
+ headers['x-stainless-retry-count'] = '0';
163
+ headers['x-stainless-runtime'] = 'node';
164
+ headers['x-stainless-runtime-version'] = process.version;
165
+ headers['x-stainless-timeout'] = '600';
166
+
167
+ // Add ?beta=true to URL
168
+ let url = typeof input === 'string' ? input : input.toString();
169
+ if (url.includes('/v1/messages') && !url.includes('beta=true')) {
170
+ url += url.includes('?') ? '&beta=true' : '?beta=true';
171
+ }
172
+
173
+ // Transform request body: tool names to PascalCase + apply caching
174
+ let body = init?.body;
175
+ if (body && typeof body === 'string') {
176
+ try {
177
+ const parsed = JSON.parse(body);
178
+
179
+ // Transform tool names
180
+ if (parsed.tools && Array.isArray(parsed.tools)) {
181
+ parsed.tools = parsed.tools.map(
182
+ (tool: { name: string; [key: string]: unknown }) => ({
183
+ ...tool,
184
+ name: toClaudeCodeName(tool.name),
185
+ }),
186
+ );
187
+ }
188
+
189
+ // Apply ephemeral caching (max 4 cache breakpoints total)
190
+ // Adapter adds 2 tool cache blocks, so we can add 2 more:
191
+ // - 1 system block (the first one with tools description)
192
+ // - 1 message block (the last user message)
193
+ const MAX_SYSTEM_CACHE = 1;
194
+ const MAX_MESSAGE_CACHE = 1;
195
+ let systemCacheUsed = 0;
196
+ let messageCacheUsed = 0;
197
+
198
+ // Cache first system message only (contains agent instructions)
199
+ if (parsed.system && Array.isArray(parsed.system)) {
200
+ parsed.system = parsed.system.map(
201
+ (
202
+ block: { type: string; cache_control?: unknown },
203
+ index: number,
204
+ ) => {
205
+ if (block.cache_control) return block;
206
+ if (
207
+ systemCacheUsed < MAX_SYSTEM_CACHE &&
208
+ index === 0 &&
209
+ block.type === 'text'
210
+ ) {
211
+ systemCacheUsed++;
212
+ return { ...block, cache_control: { type: 'ephemeral' } };
213
+ }
214
+ return block;
215
+ },
216
+ );
217
+ }
218
+
219
+ // Transform tool names in messages and apply caching to last message only
220
+ if (parsed.messages && Array.isArray(parsed.messages)) {
221
+ const messageCount = parsed.messages.length;
222
+
223
+ parsed.messages = parsed.messages.map(
224
+ (
225
+ msg: {
226
+ role: string;
227
+ content: unknown;
228
+ [key: string]: unknown;
229
+ },
230
+ msgIndex: number,
231
+ ) => {
232
+ // Only cache the very last message
233
+ const isLast = msgIndex === messageCount - 1;
234
+
235
+ if (Array.isArray(msg.content)) {
236
+ const content = msg.content.map(
237
+ (
238
+ block: {
239
+ type: string;
240
+ name?: string;
241
+ cache_control?: unknown;
242
+ },
243
+ blockIndex: number,
244
+ ) => {
245
+ let transformedBlock = block;
246
+
247
+ // Transform tool names
248
+ if (block.type === 'tool_use' && block.name) {
249
+ transformedBlock = {
250
+ ...block,
251
+ name: toClaudeCodeName(block.name),
252
+ };
253
+ }
254
+ if (block.type === 'tool_result' && block.name) {
255
+ transformedBlock = {
256
+ ...block,
257
+ name: toClaudeCodeName(block.name),
258
+ };
259
+ }
260
+
261
+ // Add cache_control to last block of last message
262
+ if (
263
+ isLast &&
264
+ !transformedBlock.cache_control &&
265
+ messageCacheUsed < MAX_MESSAGE_CACHE &&
266
+ blockIndex === (msg.content as unknown[]).length - 1
267
+ ) {
268
+ messageCacheUsed++;
269
+ return {
270
+ ...transformedBlock,
271
+ cache_control: { type: 'ephemeral' },
272
+ };
273
+ }
274
+
275
+ return transformedBlock;
276
+ },
277
+ );
278
+ return { ...msg, content };
279
+ }
280
+
281
+ // For string content, wrap in array with cache_control if last message
282
+ if (
283
+ isLast &&
284
+ messageCacheUsed < MAX_MESSAGE_CACHE &&
285
+ typeof msg.content === 'string'
286
+ ) {
287
+ messageCacheUsed++;
288
+ return {
289
+ ...msg,
290
+ content: [
291
+ {
292
+ type: 'text',
293
+ text: msg.content,
294
+ cache_control: { type: 'ephemeral' },
295
+ },
296
+ ],
297
+ };
298
+ }
299
+
300
+ return msg;
301
+ },
302
+ );
303
+ }
138
304
 
139
- return fetch(input, {
305
+ body = JSON.stringify(parsed);
306
+ } catch {
307
+ // If parsing fails, send as-is
308
+ }
309
+ }
310
+
311
+ return fetch(url, {
140
312
  ...init,
313
+ body,
141
314
  headers,
142
315
  });
143
316
  };
@@ -147,7 +320,116 @@ async function getAnthropicInstance(cfg: AGIConfig) {
147
320
  });
148
321
  }
149
322
 
150
- return anthropic;
323
+ // For API key auth, also apply caching via customFetch
324
+ // This optimizes token usage even without OAuth
325
+ const customFetch = async (
326
+ input: string | URL | Request,
327
+ init?: RequestInit,
328
+ ) => {
329
+ let body = init?.body;
330
+ if (body && typeof body === 'string') {
331
+ try {
332
+ const parsed = JSON.parse(body);
333
+
334
+ // Apply ephemeral caching (max 4 cache breakpoints total)
335
+ // Adapter adds 2 tool cache blocks, so we can add 2 more:
336
+ // - 1 system block + 1 message block = 2
337
+ const MAX_SYSTEM_CACHE = 1;
338
+ const MAX_MESSAGE_CACHE = 1;
339
+ let systemCacheUsed = 0;
340
+ let messageCacheUsed = 0;
341
+
342
+ // Cache first system message
343
+ if (parsed.system && Array.isArray(parsed.system)) {
344
+ parsed.system = parsed.system.map(
345
+ (
346
+ block: { type: string; cache_control?: unknown },
347
+ index: number,
348
+ ) => {
349
+ if (block.cache_control) return block;
350
+ if (
351
+ systemCacheUsed < MAX_SYSTEM_CACHE &&
352
+ index === 0 &&
353
+ block.type === 'text'
354
+ ) {
355
+ systemCacheUsed++;
356
+ return { ...block, cache_control: { type: 'ephemeral' } };
357
+ }
358
+ return block;
359
+ },
360
+ );
361
+ }
362
+
363
+ // Cache last message only
364
+ if (parsed.messages && Array.isArray(parsed.messages)) {
365
+ const messageCount = parsed.messages.length;
366
+ parsed.messages = parsed.messages.map(
367
+ (
368
+ msg: {
369
+ role: string;
370
+ content: unknown;
371
+ [key: string]: unknown;
372
+ },
373
+ msgIndex: number,
374
+ ) => {
375
+ const isLast = msgIndex === messageCount - 1;
376
+
377
+ if (Array.isArray(msg.content)) {
378
+ const blocks = msg.content as {
379
+ type: string;
380
+ cache_control?: unknown;
381
+ }[];
382
+ const content = blocks.map((block, blockIndex) => {
383
+ if (block.cache_control) return block;
384
+ if (
385
+ isLast &&
386
+ messageCacheUsed < MAX_MESSAGE_CACHE &&
387
+ blockIndex === blocks.length - 1
388
+ ) {
389
+ messageCacheUsed++;
390
+ return { ...block, cache_control: { type: 'ephemeral' } };
391
+ }
392
+ return block;
393
+ });
394
+ return { ...msg, content };
395
+ }
396
+
397
+ if (
398
+ isLast &&
399
+ messageCacheUsed < MAX_MESSAGE_CACHE &&
400
+ typeof msg.content === 'string'
401
+ ) {
402
+ messageCacheUsed++;
403
+ return {
404
+ ...msg,
405
+ content: [
406
+ {
407
+ type: 'text',
408
+ text: msg.content,
409
+ cache_control: { type: 'ephemeral' },
410
+ },
411
+ ],
412
+ };
413
+ }
414
+
415
+ return msg;
416
+ },
417
+ );
418
+ }
419
+
420
+ body = JSON.stringify(parsed);
421
+ } catch {
422
+ // If parsing fails, send as-is
423
+ }
424
+ }
425
+
426
+ const url = typeof input === 'string' ? input : input.toString();
427
+ return fetch(url, { ...init, body });
428
+ };
429
+
430
+ return createAnthropic({
431
+ fetch: customFetch as typeof fetch,
432
+ });
151
433
  }
152
434
 
153
435
  export async function resolveModel(
@@ -1,5 +1,5 @@
1
1
  import { hasToolCall, streamText } from 'ai';
2
- import { loadConfig } from '@agi-cli/sdk';
2
+ import { loadConfig, getAuth } from '@agi-cli/sdk';
3
3
  import { getDb } from '@agi-cli/database';
4
4
  import { messageParts } from '@agi-cli/database/schema';
5
5
  import { eq } from 'drizzle-orm';
@@ -223,7 +223,11 @@ async function runAssistant(opts: RunOpts) {
223
223
  opts,
224
224
  db,
225
225
  );
226
- const toolset = adaptTools(gated, sharedCtx, opts.provider);
226
+
227
+ // Get auth type for Claude Code OAuth detection
228
+ const providerAuth = await getAuth(opts.provider, opts.projectRoot);
229
+ const authType = providerAuth?.type;
230
+ const toolset = adaptTools(gated, sharedCtx, opts.provider, authType);
227
231
 
228
232
  let _finishObserved = false;
229
233
  const unsubscribeFinish = subscribe(opts.sessionId, (evt) => {
@@ -8,6 +8,13 @@ import { toErrorPayload } from './error-handling.ts';
8
8
  import type { RunOpts } from './session-queue.ts';
9
9
  import type { ToolAdapterContext } from '../tools/adapter.ts';
10
10
  import type { ProviderMetadata, UsageData } from './db-operations.ts';
11
+ import {
12
+ pruneSession,
13
+ isOverflow,
14
+ getModelLimits,
15
+ type TokenUsage,
16
+ } from './compaction.ts';
17
+ import { debugLog } from './debug.ts';
11
18
 
12
19
  type StepFinishEvent = {
13
20
  usage?: UsageData;
@@ -277,6 +284,7 @@ export function createFinishHandler(
277
284
  inputTokens: Number(sessRows[0].promptTokens ?? 0),
278
285
  outputTokens: Number(sessRows[0].completionTokens ?? 0),
279
286
  totalTokens: Number(sessRows[0].totalTokens ?? 0),
287
+ cachedInputTokens: Number(sessRows[0].cachedInputTokens ?? 0),
280
288
  }
281
289
  : fin.usage;
282
290
 
@@ -284,6 +292,37 @@ export function createFinishHandler(
284
292
  ? estimateModelCostUsd(opts.provider, opts.model, usage)
285
293
  : undefined;
286
294
 
295
+ // Check for context overflow and prune if needed
296
+ if (usage) {
297
+ try {
298
+ const limits = getModelLimits(opts.provider, opts.model);
299
+ if (limits) {
300
+ const tokenUsage: TokenUsage = {
301
+ input: usage.inputTokens ?? 0,
302
+ output: usage.outputTokens ?? 0,
303
+ cacheRead:
304
+ (usage as { cachedInputTokens?: number }).cachedInputTokens ?? 0,
305
+ };
306
+
307
+ if (isOverflow(tokenUsage, limits)) {
308
+ debugLog(
309
+ `[stream-handlers] Context overflow detected, triggering prune for session ${opts.sessionId}`,
310
+ );
311
+ // Prune asynchronously - don't block the finish handler
312
+ pruneSession(db, opts.sessionId).catch((err) => {
313
+ debugLog(
314
+ `[stream-handlers] Prune failed: ${err instanceof Error ? err.message : String(err)}`,
315
+ );
316
+ });
317
+ }
318
+ }
319
+ } catch (err) {
320
+ debugLog(
321
+ `[stream-handlers] Overflow check failed: ${err instanceof Error ? err.message : String(err)}`,
322
+ );
323
+ }
324
+ }
325
+
287
326
  publish({
288
327
  type: 'message.completed',
289
328
  sessionId: opts.sessionId,
@@ -0,0 +1,156 @@
1
+ /**
2
+ * Tool name mapping for Claude Code OAuth compatibility.
3
+ *
4
+ * Claude Code OAuth requires PascalCase tool names but does NOT whitelist
5
+ * specific tools. Any tool with a PascalCase name is accepted.
6
+ *
7
+ * This module provides bidirectional mapping between AGI's canonical
8
+ * snake_case names and the PascalCase format required for OAuth.
9
+ */
10
+
11
+ export type ToolNamingConvention = 'canonical' | 'claude-code';
12
+
13
+ /**
14
+ * Mapping from AGI canonical names to PascalCase names.
15
+ * Includes ALL AGI tools for complete OAuth compatibility.
16
+ */
17
+ export const CANONICAL_TO_PASCAL: Record<string, string> = {
18
+ // File system operations
19
+ read: 'Read',
20
+ write: 'Write',
21
+ edit: 'Edit',
22
+ ls: 'Ls',
23
+ tree: 'Tree',
24
+ cd: 'Cd',
25
+ pwd: 'Pwd',
26
+
27
+ // Search operations
28
+ glob: 'Glob',
29
+ ripgrep: 'Grep', // Maps to Grep for Claude Code compatibility
30
+ grep: 'Grep',
31
+
32
+ // Execution
33
+ bash: 'Bash',
34
+ terminal: 'Terminal',
35
+
36
+ // Git operations
37
+ git_status: 'GitStatus',
38
+ git_diff: 'GitDiff',
39
+ git_commit: 'GitCommit',
40
+
41
+ // Patch/edit
42
+ apply_patch: 'ApplyPatch',
43
+
44
+ // Task management
45
+ update_plan: 'UpdatePlan',
46
+ progress_update: 'ProgressUpdate',
47
+ finish: 'Finish',
48
+
49
+ // Web operations
50
+ websearch: 'WebSearch',
51
+ };
52
+
53
+ /**
54
+ * Reverse mapping from PascalCase names to canonical.
55
+ * Built to handle the many-to-one ripgrep/grep → Grep mapping.
56
+ */
57
+ export const PASCAL_TO_CANONICAL: Record<string, string> = {
58
+ // File system operations
59
+ Read: 'read',
60
+ Write: 'write',
61
+ Edit: 'edit',
62
+ Ls: 'ls',
63
+ Tree: 'tree',
64
+ Cd: 'cd',
65
+ Pwd: 'pwd',
66
+
67
+ // Search operations
68
+ Glob: 'glob',
69
+ Grep: 'ripgrep', // Maps back to ripgrep (primary search tool)
70
+
71
+ // Execution
72
+ Bash: 'bash',
73
+ Terminal: 'terminal',
74
+
75
+ // Git operations
76
+ GitStatus: 'git_status',
77
+ GitDiff: 'git_diff',
78
+ GitCommit: 'git_commit',
79
+
80
+ // Patch/edit
81
+ ApplyPatch: 'apply_patch',
82
+
83
+ // Task management
84
+ UpdatePlan: 'update_plan',
85
+ ProgressUpdate: 'progress_update',
86
+ Finish: 'finish',
87
+
88
+ // Web operations
89
+ WebSearch: 'websearch',
90
+ };
91
+
92
+ /**
93
+ * Convert a canonical tool name to PascalCase format.
94
+ */
95
+ export function toClaudeCodeName(canonical: string): string {
96
+ if (CANONICAL_TO_PASCAL[canonical]) {
97
+ return CANONICAL_TO_PASCAL[canonical];
98
+ }
99
+ // Default: convert snake_case to PascalCase
100
+ return canonical
101
+ .split('_')
102
+ .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
103
+ .join('');
104
+ }
105
+
106
+ /**
107
+ * Convert a PascalCase tool name to canonical format.
108
+ */
109
+ export function toCanonicalName(pascalCase: string): string {
110
+ if (PASCAL_TO_CANONICAL[pascalCase]) {
111
+ return PASCAL_TO_CANONICAL[pascalCase];
112
+ }
113
+ // Default: convert PascalCase to snake_case
114
+ return pascalCase
115
+ .replace(/([A-Z])/g, '_$1')
116
+ .toLowerCase()
117
+ .replace(/^_/, '');
118
+ }
119
+
120
+ /**
121
+ * Check if the current provider/auth combo requires PascalCase naming.
122
+ */
123
+ export function requiresClaudeCodeNaming(
124
+ provider: string,
125
+ authType?: string,
126
+ ): boolean {
127
+ return provider === 'anthropic' && authType === 'oauth';
128
+ }
129
+
130
+ /**
131
+ * Transform a tool definition for Claude Code OAuth.
132
+ * Returns a new object with the transformed name.
133
+ */
134
+ export function transformToolForClaudeCode<T extends { name: string }>(
135
+ tool: T,
136
+ ): T {
137
+ return {
138
+ ...tool,
139
+ name: toClaudeCodeName(tool.name),
140
+ };
141
+ }
142
+
143
+ /**
144
+ * Transform tool call arguments to canonical names.
145
+ * Used when receiving tool calls from Claude Code OAuth.
146
+ */
147
+ export function normalizeToolCall<T extends { name: string }>(
148
+ call: T,
149
+ fromClaudeCode: boolean,
150
+ ): T {
151
+ if (!fromClaudeCode) return call;
152
+ return {
153
+ ...call,
154
+ name: toCanonicalName(call.name),
155
+ };
156
+ }
@@ -9,6 +9,10 @@ import type {
9
9
  StepExecutionState,
10
10
  } from '../runtime/tool-context.ts';
11
11
  import { isToolError } from '@agi-cli/sdk/tools/error';
12
+ import {
13
+ toClaudeCodeName,
14
+ requiresClaudeCodeNaming,
15
+ } from '../runtime/tool-mapping.ts';
12
16
 
13
17
  export type { ToolAdapterContext } from '../runtime/tool-context.ts';
14
18
 
@@ -47,6 +51,7 @@ export function adaptTools(
47
51
  tools: DiscoveredTool[],
48
52
  ctx: ToolAdapterContext,
49
53
  provider?: string,
54
+ authType?: string,
50
55
  ) {
51
56
  const out: Record<string, Tool> = {};
52
57
  const pendingCalls = new Map<string, PendingCallMeta[]>();
@@ -56,6 +61,12 @@ export function adaptTools(
56
61
  };
57
62
  let firstToolCallReported = false;
58
63
 
64
+ // Determine if we need Claude Code naming (PascalCase)
65
+ const useClaudeCodeNaming = requiresClaudeCodeNaming(
66
+ provider ?? '',
67
+ authType,
68
+ );
69
+
59
70
  if (!ctx.stepExecution) {
60
71
  ctx.stepExecution = { states: new Map<number, StepExecutionState>() };
61
72
  }
@@ -66,8 +77,14 @@ export function adaptTools(
66
77
  const cacheableTools = new Set(['read', 'write', 'bash', 'edit']);
67
78
  let cachedToolCount = 0;
68
79
 
69
- for (const { name, tool } of tools) {
80
+ for (const { name: canonicalName, tool } of tools) {
70
81
  const base = tool;
82
+ // Use PascalCase for Claude Code OAuth, otherwise canonical (snake_case)
83
+ const registrationName = useClaudeCodeNaming
84
+ ? toClaudeCodeName(canonicalName)
85
+ : canonicalName;
86
+ // Always use canonical name for DB storage and events
87
+ const name = canonicalName;
71
88
 
72
89
  const processedToolErrors = new WeakSet<object>();
73
90
 
@@ -145,7 +162,7 @@ export function adaptTools(
145
162
  ? { anthropic: { cacheControl: { type: 'ephemeral' as const } } }
146
163
  : undefined;
147
164
 
148
- out[name] = {
165
+ out[registrationName] = {
149
166
  ...base,
150
167
  ...(providerOptions ? { providerOptions } : {}),
151
168
  async onInputStart(options: unknown) {