@agi-cli/server 0.1.105 → 0.1.107
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/runtime/compaction.ts +254 -0
- package/src/runtime/db-operations.ts +3 -4
- package/src/runtime/history-builder.ts +7 -1
- package/src/runtime/provider.ts +286 -4
- package/src/runtime/runner.ts +6 -2
- package/src/runtime/stream-handlers.ts +39 -0
- package/src/runtime/tool-mapping.ts +156 -0
- package/src/tools/adapter.ts +19 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agi-cli/server",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.107",
|
|
4
4
|
"description": "HTTP API server for AGI CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
"typecheck": "tsc --noEmit"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@agi-cli/sdk": "0.1.
|
|
33
|
-
"@agi-cli/database": "0.1.
|
|
32
|
+
"@agi-cli/sdk": "0.1.107",
|
|
33
|
+
"@agi-cli/database": "0.1.107",
|
|
34
34
|
"drizzle-orm": "^0.44.5",
|
|
35
35
|
"hono": "^4.9.9",
|
|
36
36
|
"zod": "^4.1.8"
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context compaction module for managing token usage.
|
|
3
|
+
*
|
|
4
|
+
* This module implements OpenCode-style context management:
|
|
5
|
+
* 1. Detects when context is overflowing (tokens > context_limit - output_limit)
|
|
6
|
+
* 2. Prunes old tool outputs by marking them as "compacted"
|
|
7
|
+
* 3. History builder returns "[Old tool result content cleared]" for compacted parts
|
|
8
|
+
*
|
|
9
|
+
* Pruning strategy:
|
|
10
|
+
* - Protect the last PRUNE_PROTECT tokens worth of tool calls (40,000)
|
|
11
|
+
* - Only prune if we'd save at least PRUNE_MINIMUM tokens (20,000)
|
|
12
|
+
* - Skip the last 2 turns to preserve recent context
|
|
13
|
+
* - Never prune "skill" or other protected tools
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import type { getDb } from '@agi-cli/database';
|
|
17
|
+
import { messages, messageParts } from '@agi-cli/database/schema';
|
|
18
|
+
import { eq, desc } from 'drizzle-orm';
|
|
19
|
+
import { debugLog } from './debug.ts';
|
|
20
|
+
|
|
21
|
+
// Token thresholds (matching OpenCode)
|
|
22
|
+
export const PRUNE_MINIMUM = 20_000; // Only prune if we'd save at least this many tokens
|
|
23
|
+
export const PRUNE_PROTECT = 40_000; // Protect last N tokens worth of tool calls
|
|
24
|
+
|
|
25
|
+
// Tools that should never be pruned
|
|
26
|
+
const PRUNE_PROTECTED_TOOLS = ['skill'];
|
|
27
|
+
|
|
28
|
+
// Simple token estimation: ~4 chars per token
|
|
29
|
+
export function estimateTokens(text: string): number {
|
|
30
|
+
return Math.max(0, Math.round((text || '').length / 4));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface TokenUsage {
|
|
34
|
+
input: number;
|
|
35
|
+
output: number;
|
|
36
|
+
cacheRead?: number;
|
|
37
|
+
cacheWrite?: number;
|
|
38
|
+
reasoning?: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface ModelLimits {
|
|
42
|
+
context: number;
|
|
43
|
+
output: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Check if context is overflowing based on token usage and model limits.
|
|
48
|
+
* Returns true if we've used more tokens than (context_limit - output_limit).
|
|
49
|
+
*/
|
|
50
|
+
export function isOverflow(tokens: TokenUsage, limits: ModelLimits): boolean {
|
|
51
|
+
if (limits.context === 0) return false;
|
|
52
|
+
|
|
53
|
+
const count = tokens.input + (tokens.cacheRead ?? 0) + tokens.output;
|
|
54
|
+
const usableContext = limits.context - limits.output;
|
|
55
|
+
|
|
56
|
+
const overflow = count > usableContext;
|
|
57
|
+
if (overflow) {
|
|
58
|
+
debugLog(
|
|
59
|
+
`[compaction] Context overflow detected: ${count} tokens used, ${usableContext} usable (${limits.context} context - ${limits.output} output)`,
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return overflow;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Prune old tool outputs from a session to reduce context size.
|
|
68
|
+
*
|
|
69
|
+
* Goes backwards through tool results, protecting the last PRUNE_PROTECT tokens.
|
|
70
|
+
* Marks older tool results as "compacted" so history builder returns placeholder text.
|
|
71
|
+
*/
|
|
72
|
+
export async function pruneSession(
|
|
73
|
+
db: Awaited<ReturnType<typeof getDb>>,
|
|
74
|
+
sessionId: string,
|
|
75
|
+
): Promise<{ pruned: number; saved: number }> {
|
|
76
|
+
debugLog(`[compaction] Starting prune for session ${sessionId}`);
|
|
77
|
+
|
|
78
|
+
// Get all messages in the session ordered by creation time
|
|
79
|
+
const allMessages = await db
|
|
80
|
+
.select()
|
|
81
|
+
.from(messages)
|
|
82
|
+
.where(eq(messages.sessionId, sessionId))
|
|
83
|
+
.orderBy(desc(messages.createdAt));
|
|
84
|
+
|
|
85
|
+
let totalTokens = 0;
|
|
86
|
+
let prunedTokens = 0;
|
|
87
|
+
const toPrune: Array<{ id: string; content: string }> = [];
|
|
88
|
+
let turns = 0;
|
|
89
|
+
|
|
90
|
+
// Go backwards through messages
|
|
91
|
+
for (const msg of allMessages) {
|
|
92
|
+
// Count user messages as turns
|
|
93
|
+
if (msg.role === 'user') {
|
|
94
|
+
turns++;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Skip the last 2 turns to preserve recent context
|
|
98
|
+
if (turns < 2) continue;
|
|
99
|
+
|
|
100
|
+
// Get all parts for this message
|
|
101
|
+
const parts = await db
|
|
102
|
+
.select()
|
|
103
|
+
.from(messageParts)
|
|
104
|
+
.where(eq(messageParts.messageId, msg.id))
|
|
105
|
+
.orderBy(desc(messageParts.index));
|
|
106
|
+
|
|
107
|
+
for (const part of parts) {
|
|
108
|
+
// Only process tool results
|
|
109
|
+
if (part.type !== 'tool_result') continue;
|
|
110
|
+
|
|
111
|
+
// Skip protected tools
|
|
112
|
+
if (part.toolName && PRUNE_PROTECTED_TOOLS.includes(part.toolName)) {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Parse content to check if already compacted
|
|
117
|
+
let content: { result?: unknown; compactedAt?: number };
|
|
118
|
+
try {
|
|
119
|
+
content = JSON.parse(part.content ?? '{}');
|
|
120
|
+
} catch {
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Stop if we hit already compacted content (we've pruned before)
|
|
125
|
+
if (content.compactedAt) {
|
|
126
|
+
debugLog(
|
|
127
|
+
`[compaction] Hit previously compacted content, stopping prune`,
|
|
128
|
+
);
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Estimate tokens for this result
|
|
133
|
+
const estimate = estimateTokens(
|
|
134
|
+
typeof content.result === 'string'
|
|
135
|
+
? content.result
|
|
136
|
+
: JSON.stringify(content.result ?? ''),
|
|
137
|
+
);
|
|
138
|
+
totalTokens += estimate;
|
|
139
|
+
|
|
140
|
+
// If we've exceeded the protection threshold, mark for pruning
|
|
141
|
+
if (totalTokens > PRUNE_PROTECT) {
|
|
142
|
+
prunedTokens += estimate;
|
|
143
|
+
toPrune.push({ id: part.id, content: part.content ?? '{}' });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
debugLog(
|
|
149
|
+
`[compaction] Found ${toPrune.length} tool results to prune, saving ~${prunedTokens} tokens`,
|
|
150
|
+
);
|
|
151
|
+
|
|
152
|
+
// Only prune if we'd save enough tokens to be worthwhile
|
|
153
|
+
if (prunedTokens > PRUNE_MINIMUM) {
|
|
154
|
+
const compactedAt = Date.now();
|
|
155
|
+
|
|
156
|
+
for (const part of toPrune) {
|
|
157
|
+
try {
|
|
158
|
+
const content = JSON.parse(part.content);
|
|
159
|
+
// Keep the structure but mark as compacted
|
|
160
|
+
content.compactedAt = compactedAt;
|
|
161
|
+
// Keep a small summary if it was a string result
|
|
162
|
+
if (typeof content.result === 'string' && content.result.length > 100) {
|
|
163
|
+
content.resultSummary = `${content.result.slice(0, 100)}...`;
|
|
164
|
+
}
|
|
165
|
+
// Clear the actual result to save space
|
|
166
|
+
content.result = null;
|
|
167
|
+
|
|
168
|
+
await db
|
|
169
|
+
.update(messageParts)
|
|
170
|
+
.set({ content: JSON.stringify(content) })
|
|
171
|
+
.where(eq(messageParts.id, part.id));
|
|
172
|
+
} catch (err) {
|
|
173
|
+
debugLog(
|
|
174
|
+
`[compaction] Failed to prune part ${part.id}: ${err instanceof Error ? err.message : String(err)}`,
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
debugLog(
|
|
180
|
+
`[compaction] Pruned ${toPrune.length} tool results, saved ~${prunedTokens} tokens`,
|
|
181
|
+
);
|
|
182
|
+
} else {
|
|
183
|
+
debugLog(
|
|
184
|
+
`[compaction] Skipping prune, would only save ${prunedTokens} tokens (min: ${PRUNE_MINIMUM})`,
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return { pruned: toPrune.length, saved: prunedTokens };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Get model limits from provider catalog or use defaults.
|
|
193
|
+
*/
|
|
194
|
+
export function getModelLimits(
|
|
195
|
+
provider: string,
|
|
196
|
+
model: string,
|
|
197
|
+
): ModelLimits | null {
|
|
198
|
+
// Default limits for common models
|
|
199
|
+
// These should ideally come from the provider catalog
|
|
200
|
+
const defaults: Record<string, ModelLimits> = {
|
|
201
|
+
// Anthropic
|
|
202
|
+
'claude-sonnet-4-20250514': { context: 200000, output: 16000 },
|
|
203
|
+
'claude-3-5-sonnet-20241022': { context: 200000, output: 8192 },
|
|
204
|
+
'claude-3-5-haiku-20241022': { context: 200000, output: 8192 },
|
|
205
|
+
'claude-3-opus-20240229': { context: 200000, output: 4096 },
|
|
206
|
+
// OpenAI
|
|
207
|
+
'gpt-4o': { context: 128000, output: 16384 },
|
|
208
|
+
'gpt-4o-mini': { context: 128000, output: 16384 },
|
|
209
|
+
'gpt-4-turbo': { context: 128000, output: 4096 },
|
|
210
|
+
o1: { context: 200000, output: 100000 },
|
|
211
|
+
'o1-mini': { context: 128000, output: 65536 },
|
|
212
|
+
'o1-pro': { context: 200000, output: 100000 },
|
|
213
|
+
'o3-mini': { context: 200000, output: 100000 },
|
|
214
|
+
// Google
|
|
215
|
+
'gemini-2.0-flash': { context: 1000000, output: 8192 },
|
|
216
|
+
'gemini-1.5-pro': { context: 2000000, output: 8192 },
|
|
217
|
+
'gemini-1.5-flash': { context: 1000000, output: 8192 },
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
// Try exact match first
|
|
221
|
+
if (defaults[model]) {
|
|
222
|
+
return defaults[model];
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Try partial match (e.g., "claude-3-5-sonnet" matches "claude-3-5-sonnet-20241022")
|
|
226
|
+
for (const [key, limits] of Object.entries(defaults)) {
|
|
227
|
+
if (model.includes(key) || key.includes(model)) {
|
|
228
|
+
return limits;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Return null if no match - caller should handle
|
|
233
|
+
debugLog(
|
|
234
|
+
`[compaction] No model limits found for ${provider}/${model}, skipping overflow check`,
|
|
235
|
+
);
|
|
236
|
+
return null;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Check if a tool result content is compacted.
|
|
241
|
+
*/
|
|
242
|
+
export function isCompacted(content: string): boolean {
|
|
243
|
+
try {
|
|
244
|
+
const parsed = JSON.parse(content);
|
|
245
|
+
return !!parsed.compactedAt;
|
|
246
|
+
} catch {
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Get the placeholder text for compacted tool results.
|
|
253
|
+
*/
|
|
254
|
+
export const COMPACTED_PLACEHOLDER = '[Old tool result content cleared]';
|
|
@@ -76,15 +76,14 @@ export async function updateSessionTokensIncremental(
|
|
|
76
76
|
: priorCachedMsg;
|
|
77
77
|
|
|
78
78
|
// Compute deltas for this step; clamp to 0 in case provider reports smaller values
|
|
79
|
-
// Cached tokens reduce the billable input, so we subtract them from the delta
|
|
80
79
|
const deltaInput = Math.max(0, cumPrompt - priorPromptMsg);
|
|
81
80
|
const deltaOutput = Math.max(0, cumCompletion - priorCompletionMsg);
|
|
82
81
|
const deltaCached = Math.max(0, cumCached - priorCachedMsg);
|
|
83
82
|
const deltaReasoning = Math.max(0, cumReasoning - priorReasoningMsg);
|
|
84
83
|
|
|
85
|
-
//
|
|
86
|
-
//
|
|
87
|
-
const nextInputSess = priorInputSess + deltaInput
|
|
84
|
+
// Note: AI SDK's inputTokens already excludes cached tokens for Anthropic,
|
|
85
|
+
// so we don't need to subtract deltaCached here. Just accumulate directly.
|
|
86
|
+
const nextInputSess = priorInputSess + deltaInput;
|
|
88
87
|
const nextOutputSess = priorOutputSess + deltaOutput;
|
|
89
88
|
const nextCachedSess = priorCachedSess + deltaCached;
|
|
90
89
|
const nextReasoningSess = priorReasoningSess + deltaReasoning;
|
|
@@ -4,6 +4,7 @@ import { messages, messageParts } from '@agi-cli/database/schema';
|
|
|
4
4
|
import { eq, asc } from 'drizzle-orm';
|
|
5
5
|
import { debugLog } from './debug.ts';
|
|
6
6
|
import { ToolHistoryTracker } from './history/tool-history-tracker.ts';
|
|
7
|
+
import { COMPACTED_PLACEHOLDER } from './compaction.ts';
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* Builds the conversation history for a session from the database,
|
|
@@ -93,12 +94,17 @@ export async function buildHistoryMessages(
|
|
|
93
94
|
name?: string;
|
|
94
95
|
callId?: string;
|
|
95
96
|
result?: unknown;
|
|
97
|
+
compactedAt?: number;
|
|
96
98
|
};
|
|
97
99
|
if (obj.callId) {
|
|
100
|
+
// If this tool result was compacted, return placeholder instead
|
|
101
|
+
const result = obj.compactedAt
|
|
102
|
+
? COMPACTED_PLACEHOLDER
|
|
103
|
+
: obj.result;
|
|
98
104
|
toolResults.push({
|
|
99
105
|
name: obj.name ?? 'tool',
|
|
100
106
|
callId: obj.callId,
|
|
101
|
-
result
|
|
107
|
+
result,
|
|
102
108
|
});
|
|
103
109
|
}
|
|
104
110
|
} catch {}
|
package/src/runtime/provider.ts
CHANGED
|
@@ -7,9 +7,13 @@ import {
|
|
|
7
7
|
setAuth,
|
|
8
8
|
} from '@agi-cli/sdk';
|
|
9
9
|
import { openai, createOpenAI } from '@ai-sdk/openai';
|
|
10
|
-
import {
|
|
10
|
+
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
11
11
|
import { google, createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
12
12
|
import { createOpenRouter } from '@openrouter/ai-sdk-provider';
|
|
13
|
+
import { toClaudeCodeName } from './tool-mapping.ts';
|
|
14
|
+
|
|
15
|
+
// Version to report in user-agent for Claude Code compatibility
|
|
16
|
+
const CLAUDE_CLI_VERSION = '1.0.61';
|
|
13
17
|
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
|
14
18
|
|
|
15
19
|
export type ProviderName = ProviderId;
|
|
@@ -132,12 +136,181 @@ async function getAnthropicInstance(cfg: AGIConfig) {
|
|
|
132
136
|
}
|
|
133
137
|
}
|
|
134
138
|
|
|
139
|
+
// Required Claude Code headers
|
|
135
140
|
headers.authorization = `Bearer ${currentAuth.access}`;
|
|
136
141
|
headers['anthropic-beta'] =
|
|
137
|
-
'
|
|
142
|
+
'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14';
|
|
143
|
+
headers['anthropic-dangerous-direct-browser-access'] = 'true';
|
|
144
|
+
headers['anthropic-version'] = '2023-06-01';
|
|
145
|
+
headers['user-agent'] =
|
|
146
|
+
`claude-cli/${CLAUDE_CLI_VERSION} (external, cli)`;
|
|
147
|
+
headers['x-app'] = 'cli';
|
|
148
|
+
headers['content-type'] = 'application/json';
|
|
149
|
+
headers.accept = 'application/json';
|
|
150
|
+
|
|
151
|
+
// Stainless headers (fingerprinting)
|
|
152
|
+
headers['x-stainless-arch'] = process.arch === 'arm64' ? 'arm64' : 'x64';
|
|
153
|
+
headers['x-stainless-helper-method'] = 'stream';
|
|
154
|
+
headers['x-stainless-lang'] = 'js';
|
|
155
|
+
headers['x-stainless-os'] =
|
|
156
|
+
process.platform === 'darwin'
|
|
157
|
+
? 'MacOS'
|
|
158
|
+
: process.platform === 'win32'
|
|
159
|
+
? 'Windows'
|
|
160
|
+
: 'Linux';
|
|
161
|
+
headers['x-stainless-package-version'] = '0.70.0';
|
|
162
|
+
headers['x-stainless-retry-count'] = '0';
|
|
163
|
+
headers['x-stainless-runtime'] = 'node';
|
|
164
|
+
headers['x-stainless-runtime-version'] = process.version;
|
|
165
|
+
headers['x-stainless-timeout'] = '600';
|
|
166
|
+
|
|
167
|
+
// Add ?beta=true to URL
|
|
168
|
+
let url = typeof input === 'string' ? input : input.toString();
|
|
169
|
+
if (url.includes('/v1/messages') && !url.includes('beta=true')) {
|
|
170
|
+
url += url.includes('?') ? '&beta=true' : '?beta=true';
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Transform request body: tool names to PascalCase + apply caching
|
|
174
|
+
let body = init?.body;
|
|
175
|
+
if (body && typeof body === 'string') {
|
|
176
|
+
try {
|
|
177
|
+
const parsed = JSON.parse(body);
|
|
178
|
+
|
|
179
|
+
// Transform tool names
|
|
180
|
+
if (parsed.tools && Array.isArray(parsed.tools)) {
|
|
181
|
+
parsed.tools = parsed.tools.map(
|
|
182
|
+
(tool: { name: string; [key: string]: unknown }) => ({
|
|
183
|
+
...tool,
|
|
184
|
+
name: toClaudeCodeName(tool.name),
|
|
185
|
+
}),
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Apply ephemeral caching (max 4 cache breakpoints total)
|
|
190
|
+
// Adapter adds 2 tool cache blocks, so we can add 2 more:
|
|
191
|
+
// - 1 system block (the first one with tools description)
|
|
192
|
+
// - 1 message block (the last user message)
|
|
193
|
+
const MAX_SYSTEM_CACHE = 1;
|
|
194
|
+
const MAX_MESSAGE_CACHE = 1;
|
|
195
|
+
let systemCacheUsed = 0;
|
|
196
|
+
let messageCacheUsed = 0;
|
|
197
|
+
|
|
198
|
+
// Cache first system message only (contains agent instructions)
|
|
199
|
+
if (parsed.system && Array.isArray(parsed.system)) {
|
|
200
|
+
parsed.system = parsed.system.map(
|
|
201
|
+
(
|
|
202
|
+
block: { type: string; cache_control?: unknown },
|
|
203
|
+
index: number,
|
|
204
|
+
) => {
|
|
205
|
+
if (block.cache_control) return block;
|
|
206
|
+
if (
|
|
207
|
+
systemCacheUsed < MAX_SYSTEM_CACHE &&
|
|
208
|
+
index === 0 &&
|
|
209
|
+
block.type === 'text'
|
|
210
|
+
) {
|
|
211
|
+
systemCacheUsed++;
|
|
212
|
+
return { ...block, cache_control: { type: 'ephemeral' } };
|
|
213
|
+
}
|
|
214
|
+
return block;
|
|
215
|
+
},
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Transform tool names in messages and apply caching to last message only
|
|
220
|
+
if (parsed.messages && Array.isArray(parsed.messages)) {
|
|
221
|
+
const messageCount = parsed.messages.length;
|
|
222
|
+
|
|
223
|
+
parsed.messages = parsed.messages.map(
|
|
224
|
+
(
|
|
225
|
+
msg: {
|
|
226
|
+
role: string;
|
|
227
|
+
content: unknown;
|
|
228
|
+
[key: string]: unknown;
|
|
229
|
+
},
|
|
230
|
+
msgIndex: number,
|
|
231
|
+
) => {
|
|
232
|
+
// Only cache the very last message
|
|
233
|
+
const isLast = msgIndex === messageCount - 1;
|
|
234
|
+
|
|
235
|
+
if (Array.isArray(msg.content)) {
|
|
236
|
+
const content = msg.content.map(
|
|
237
|
+
(
|
|
238
|
+
block: {
|
|
239
|
+
type: string;
|
|
240
|
+
name?: string;
|
|
241
|
+
cache_control?: unknown;
|
|
242
|
+
},
|
|
243
|
+
blockIndex: number,
|
|
244
|
+
) => {
|
|
245
|
+
let transformedBlock = block;
|
|
246
|
+
|
|
247
|
+
// Transform tool names
|
|
248
|
+
if (block.type === 'tool_use' && block.name) {
|
|
249
|
+
transformedBlock = {
|
|
250
|
+
...block,
|
|
251
|
+
name: toClaudeCodeName(block.name),
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
if (block.type === 'tool_result' && block.name) {
|
|
255
|
+
transformedBlock = {
|
|
256
|
+
...block,
|
|
257
|
+
name: toClaudeCodeName(block.name),
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Add cache_control to last block of last message
|
|
262
|
+
if (
|
|
263
|
+
isLast &&
|
|
264
|
+
!transformedBlock.cache_control &&
|
|
265
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
266
|
+
blockIndex === (msg.content as unknown[]).length - 1
|
|
267
|
+
) {
|
|
268
|
+
messageCacheUsed++;
|
|
269
|
+
return {
|
|
270
|
+
...transformedBlock,
|
|
271
|
+
cache_control: { type: 'ephemeral' },
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return transformedBlock;
|
|
276
|
+
},
|
|
277
|
+
);
|
|
278
|
+
return { ...msg, content };
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// For string content, wrap in array with cache_control if last message
|
|
282
|
+
if (
|
|
283
|
+
isLast &&
|
|
284
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
285
|
+
typeof msg.content === 'string'
|
|
286
|
+
) {
|
|
287
|
+
messageCacheUsed++;
|
|
288
|
+
return {
|
|
289
|
+
...msg,
|
|
290
|
+
content: [
|
|
291
|
+
{
|
|
292
|
+
type: 'text',
|
|
293
|
+
text: msg.content,
|
|
294
|
+
cache_control: { type: 'ephemeral' },
|
|
295
|
+
},
|
|
296
|
+
],
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return msg;
|
|
301
|
+
},
|
|
302
|
+
);
|
|
303
|
+
}
|
|
138
304
|
|
|
139
|
-
|
|
305
|
+
body = JSON.stringify(parsed);
|
|
306
|
+
} catch {
|
|
307
|
+
// If parsing fails, send as-is
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
return fetch(url, {
|
|
140
312
|
...init,
|
|
313
|
+
body,
|
|
141
314
|
headers,
|
|
142
315
|
});
|
|
143
316
|
};
|
|
@@ -147,7 +320,116 @@ async function getAnthropicInstance(cfg: AGIConfig) {
|
|
|
147
320
|
});
|
|
148
321
|
}
|
|
149
322
|
|
|
150
|
-
|
|
323
|
+
// For API key auth, also apply caching via customFetch
|
|
324
|
+
// This optimizes token usage even without OAuth
|
|
325
|
+
const customFetch = async (
|
|
326
|
+
input: string | URL | Request,
|
|
327
|
+
init?: RequestInit,
|
|
328
|
+
) => {
|
|
329
|
+
let body = init?.body;
|
|
330
|
+
if (body && typeof body === 'string') {
|
|
331
|
+
try {
|
|
332
|
+
const parsed = JSON.parse(body);
|
|
333
|
+
|
|
334
|
+
// Apply ephemeral caching (max 4 cache breakpoints total)
|
|
335
|
+
// Adapter adds 2 tool cache blocks, so we can add 2 more:
|
|
336
|
+
// - 1 system block + 1 message block = 2
|
|
337
|
+
const MAX_SYSTEM_CACHE = 1;
|
|
338
|
+
const MAX_MESSAGE_CACHE = 1;
|
|
339
|
+
let systemCacheUsed = 0;
|
|
340
|
+
let messageCacheUsed = 0;
|
|
341
|
+
|
|
342
|
+
// Cache first system message
|
|
343
|
+
if (parsed.system && Array.isArray(parsed.system)) {
|
|
344
|
+
parsed.system = parsed.system.map(
|
|
345
|
+
(
|
|
346
|
+
block: { type: string; cache_control?: unknown },
|
|
347
|
+
index: number,
|
|
348
|
+
) => {
|
|
349
|
+
if (block.cache_control) return block;
|
|
350
|
+
if (
|
|
351
|
+
systemCacheUsed < MAX_SYSTEM_CACHE &&
|
|
352
|
+
index === 0 &&
|
|
353
|
+
block.type === 'text'
|
|
354
|
+
) {
|
|
355
|
+
systemCacheUsed++;
|
|
356
|
+
return { ...block, cache_control: { type: 'ephemeral' } };
|
|
357
|
+
}
|
|
358
|
+
return block;
|
|
359
|
+
},
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Cache last message only
|
|
364
|
+
if (parsed.messages && Array.isArray(parsed.messages)) {
|
|
365
|
+
const messageCount = parsed.messages.length;
|
|
366
|
+
parsed.messages = parsed.messages.map(
|
|
367
|
+
(
|
|
368
|
+
msg: {
|
|
369
|
+
role: string;
|
|
370
|
+
content: unknown;
|
|
371
|
+
[key: string]: unknown;
|
|
372
|
+
},
|
|
373
|
+
msgIndex: number,
|
|
374
|
+
) => {
|
|
375
|
+
const isLast = msgIndex === messageCount - 1;
|
|
376
|
+
|
|
377
|
+
if (Array.isArray(msg.content)) {
|
|
378
|
+
const blocks = msg.content as {
|
|
379
|
+
type: string;
|
|
380
|
+
cache_control?: unknown;
|
|
381
|
+
}[];
|
|
382
|
+
const content = blocks.map((block, blockIndex) => {
|
|
383
|
+
if (block.cache_control) return block;
|
|
384
|
+
if (
|
|
385
|
+
isLast &&
|
|
386
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
387
|
+
blockIndex === blocks.length - 1
|
|
388
|
+
) {
|
|
389
|
+
messageCacheUsed++;
|
|
390
|
+
return { ...block, cache_control: { type: 'ephemeral' } };
|
|
391
|
+
}
|
|
392
|
+
return block;
|
|
393
|
+
});
|
|
394
|
+
return { ...msg, content };
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
if (
|
|
398
|
+
isLast &&
|
|
399
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
400
|
+
typeof msg.content === 'string'
|
|
401
|
+
) {
|
|
402
|
+
messageCacheUsed++;
|
|
403
|
+
return {
|
|
404
|
+
...msg,
|
|
405
|
+
content: [
|
|
406
|
+
{
|
|
407
|
+
type: 'text',
|
|
408
|
+
text: msg.content,
|
|
409
|
+
cache_control: { type: 'ephemeral' },
|
|
410
|
+
},
|
|
411
|
+
],
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return msg;
|
|
416
|
+
},
|
|
417
|
+
);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
body = JSON.stringify(parsed);
|
|
421
|
+
} catch {
|
|
422
|
+
// If parsing fails, send as-is
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
const url = typeof input === 'string' ? input : input.toString();
|
|
427
|
+
return fetch(url, { ...init, body });
|
|
428
|
+
};
|
|
429
|
+
|
|
430
|
+
return createAnthropic({
|
|
431
|
+
fetch: customFetch as typeof fetch,
|
|
432
|
+
});
|
|
151
433
|
}
|
|
152
434
|
|
|
153
435
|
export async function resolveModel(
|
package/src/runtime/runner.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { hasToolCall, streamText } from 'ai';
|
|
2
|
-
import { loadConfig } from '@agi-cli/sdk';
|
|
2
|
+
import { loadConfig, getAuth } from '@agi-cli/sdk';
|
|
3
3
|
import { getDb } from '@agi-cli/database';
|
|
4
4
|
import { messageParts } from '@agi-cli/database/schema';
|
|
5
5
|
import { eq } from 'drizzle-orm';
|
|
@@ -223,7 +223,11 @@ async function runAssistant(opts: RunOpts) {
|
|
|
223
223
|
opts,
|
|
224
224
|
db,
|
|
225
225
|
);
|
|
226
|
-
|
|
226
|
+
|
|
227
|
+
// Get auth type for Claude Code OAuth detection
|
|
228
|
+
const providerAuth = await getAuth(opts.provider, opts.projectRoot);
|
|
229
|
+
const authType = providerAuth?.type;
|
|
230
|
+
const toolset = adaptTools(gated, sharedCtx, opts.provider, authType);
|
|
227
231
|
|
|
228
232
|
let _finishObserved = false;
|
|
229
233
|
const unsubscribeFinish = subscribe(opts.sessionId, (evt) => {
|
|
@@ -8,6 +8,13 @@ import { toErrorPayload } from './error-handling.ts';
|
|
|
8
8
|
import type { RunOpts } from './session-queue.ts';
|
|
9
9
|
import type { ToolAdapterContext } from '../tools/adapter.ts';
|
|
10
10
|
import type { ProviderMetadata, UsageData } from './db-operations.ts';
|
|
11
|
+
import {
|
|
12
|
+
pruneSession,
|
|
13
|
+
isOverflow,
|
|
14
|
+
getModelLimits,
|
|
15
|
+
type TokenUsage,
|
|
16
|
+
} from './compaction.ts';
|
|
17
|
+
import { debugLog } from './debug.ts';
|
|
11
18
|
|
|
12
19
|
type StepFinishEvent = {
|
|
13
20
|
usage?: UsageData;
|
|
@@ -277,6 +284,7 @@ export function createFinishHandler(
|
|
|
277
284
|
inputTokens: Number(sessRows[0].promptTokens ?? 0),
|
|
278
285
|
outputTokens: Number(sessRows[0].completionTokens ?? 0),
|
|
279
286
|
totalTokens: Number(sessRows[0].totalTokens ?? 0),
|
|
287
|
+
cachedInputTokens: Number(sessRows[0].cachedInputTokens ?? 0),
|
|
280
288
|
}
|
|
281
289
|
: fin.usage;
|
|
282
290
|
|
|
@@ -284,6 +292,37 @@ export function createFinishHandler(
|
|
|
284
292
|
? estimateModelCostUsd(opts.provider, opts.model, usage)
|
|
285
293
|
: undefined;
|
|
286
294
|
|
|
295
|
+
// Check for context overflow and prune if needed
|
|
296
|
+
if (usage) {
|
|
297
|
+
try {
|
|
298
|
+
const limits = getModelLimits(opts.provider, opts.model);
|
|
299
|
+
if (limits) {
|
|
300
|
+
const tokenUsage: TokenUsage = {
|
|
301
|
+
input: usage.inputTokens ?? 0,
|
|
302
|
+
output: usage.outputTokens ?? 0,
|
|
303
|
+
cacheRead:
|
|
304
|
+
(usage as { cachedInputTokens?: number }).cachedInputTokens ?? 0,
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
if (isOverflow(tokenUsage, limits)) {
|
|
308
|
+
debugLog(
|
|
309
|
+
`[stream-handlers] Context overflow detected, triggering prune for session ${opts.sessionId}`,
|
|
310
|
+
);
|
|
311
|
+
// Prune asynchronously - don't block the finish handler
|
|
312
|
+
pruneSession(db, opts.sessionId).catch((err) => {
|
|
313
|
+
debugLog(
|
|
314
|
+
`[stream-handlers] Prune failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
315
|
+
);
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
} catch (err) {
|
|
320
|
+
debugLog(
|
|
321
|
+
`[stream-handlers] Overflow check failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
322
|
+
);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
287
326
|
publish({
|
|
288
327
|
type: 'message.completed',
|
|
289
328
|
sessionId: opts.sessionId,
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool name mapping for Claude Code OAuth compatibility.
|
|
3
|
+
*
|
|
4
|
+
* Claude Code OAuth requires PascalCase tool names but does NOT whitelist
|
|
5
|
+
* specific tools. Any tool with a PascalCase name is accepted.
|
|
6
|
+
*
|
|
7
|
+
* This module provides bidirectional mapping between AGI's canonical
|
|
8
|
+
* snake_case names and the PascalCase format required for OAuth.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export type ToolNamingConvention = 'canonical' | 'claude-code';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Mapping from AGI canonical names to PascalCase names.
|
|
15
|
+
* Includes ALL AGI tools for complete OAuth compatibility.
|
|
16
|
+
*/
|
|
17
|
+
export const CANONICAL_TO_PASCAL: Record<string, string> = {
|
|
18
|
+
// File system operations
|
|
19
|
+
read: 'Read',
|
|
20
|
+
write: 'Write',
|
|
21
|
+
edit: 'Edit',
|
|
22
|
+
ls: 'Ls',
|
|
23
|
+
tree: 'Tree',
|
|
24
|
+
cd: 'Cd',
|
|
25
|
+
pwd: 'Pwd',
|
|
26
|
+
|
|
27
|
+
// Search operations
|
|
28
|
+
glob: 'Glob',
|
|
29
|
+
ripgrep: 'Grep', // Maps to Grep for Claude Code compatibility
|
|
30
|
+
grep: 'Grep',
|
|
31
|
+
|
|
32
|
+
// Execution
|
|
33
|
+
bash: 'Bash',
|
|
34
|
+
terminal: 'Terminal',
|
|
35
|
+
|
|
36
|
+
// Git operations
|
|
37
|
+
git_status: 'GitStatus',
|
|
38
|
+
git_diff: 'GitDiff',
|
|
39
|
+
git_commit: 'GitCommit',
|
|
40
|
+
|
|
41
|
+
// Patch/edit
|
|
42
|
+
apply_patch: 'ApplyPatch',
|
|
43
|
+
|
|
44
|
+
// Task management
|
|
45
|
+
update_plan: 'UpdatePlan',
|
|
46
|
+
progress_update: 'ProgressUpdate',
|
|
47
|
+
finish: 'Finish',
|
|
48
|
+
|
|
49
|
+
// Web operations
|
|
50
|
+
websearch: 'WebSearch',
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Reverse mapping from PascalCase names to canonical.
|
|
55
|
+
* Built to handle the many-to-one ripgrep/grep → Grep mapping.
|
|
56
|
+
*/
|
|
57
|
+
export const PASCAL_TO_CANONICAL: Record<string, string> = {
|
|
58
|
+
// File system operations
|
|
59
|
+
Read: 'read',
|
|
60
|
+
Write: 'write',
|
|
61
|
+
Edit: 'edit',
|
|
62
|
+
Ls: 'ls',
|
|
63
|
+
Tree: 'tree',
|
|
64
|
+
Cd: 'cd',
|
|
65
|
+
Pwd: 'pwd',
|
|
66
|
+
|
|
67
|
+
// Search operations
|
|
68
|
+
Glob: 'glob',
|
|
69
|
+
Grep: 'ripgrep', // Maps back to ripgrep (primary search tool)
|
|
70
|
+
|
|
71
|
+
// Execution
|
|
72
|
+
Bash: 'bash',
|
|
73
|
+
Terminal: 'terminal',
|
|
74
|
+
|
|
75
|
+
// Git operations
|
|
76
|
+
GitStatus: 'git_status',
|
|
77
|
+
GitDiff: 'git_diff',
|
|
78
|
+
GitCommit: 'git_commit',
|
|
79
|
+
|
|
80
|
+
// Patch/edit
|
|
81
|
+
ApplyPatch: 'apply_patch',
|
|
82
|
+
|
|
83
|
+
// Task management
|
|
84
|
+
UpdatePlan: 'update_plan',
|
|
85
|
+
ProgressUpdate: 'progress_update',
|
|
86
|
+
Finish: 'finish',
|
|
87
|
+
|
|
88
|
+
// Web operations
|
|
89
|
+
WebSearch: 'websearch',
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Convert a canonical tool name to PascalCase format.
|
|
94
|
+
*/
|
|
95
|
+
export function toClaudeCodeName(canonical: string): string {
|
|
96
|
+
if (CANONICAL_TO_PASCAL[canonical]) {
|
|
97
|
+
return CANONICAL_TO_PASCAL[canonical];
|
|
98
|
+
}
|
|
99
|
+
// Default: convert snake_case to PascalCase
|
|
100
|
+
return canonical
|
|
101
|
+
.split('_')
|
|
102
|
+
.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
|
|
103
|
+
.join('');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Convert a PascalCase tool name to canonical format.
|
|
108
|
+
*/
|
|
109
|
+
export function toCanonicalName(pascalCase: string): string {
|
|
110
|
+
if (PASCAL_TO_CANONICAL[pascalCase]) {
|
|
111
|
+
return PASCAL_TO_CANONICAL[pascalCase];
|
|
112
|
+
}
|
|
113
|
+
// Default: convert PascalCase to snake_case
|
|
114
|
+
return pascalCase
|
|
115
|
+
.replace(/([A-Z])/g, '_$1')
|
|
116
|
+
.toLowerCase()
|
|
117
|
+
.replace(/^_/, '');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Check if the current provider/auth combo requires PascalCase naming.
|
|
122
|
+
*/
|
|
123
|
+
export function requiresClaudeCodeNaming(
|
|
124
|
+
provider: string,
|
|
125
|
+
authType?: string,
|
|
126
|
+
): boolean {
|
|
127
|
+
return provider === 'anthropic' && authType === 'oauth';
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Transform a tool definition for Claude Code OAuth.
|
|
132
|
+
* Returns a new object with the transformed name.
|
|
133
|
+
*/
|
|
134
|
+
export function transformToolForClaudeCode<T extends { name: string }>(
|
|
135
|
+
tool: T,
|
|
136
|
+
): T {
|
|
137
|
+
return {
|
|
138
|
+
...tool,
|
|
139
|
+
name: toClaudeCodeName(tool.name),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Transform tool call arguments to canonical names.
|
|
145
|
+
* Used when receiving tool calls from Claude Code OAuth.
|
|
146
|
+
*/
|
|
147
|
+
export function normalizeToolCall<T extends { name: string }>(
|
|
148
|
+
call: T,
|
|
149
|
+
fromClaudeCode: boolean,
|
|
150
|
+
): T {
|
|
151
|
+
if (!fromClaudeCode) return call;
|
|
152
|
+
return {
|
|
153
|
+
...call,
|
|
154
|
+
name: toCanonicalName(call.name),
|
|
155
|
+
};
|
|
156
|
+
}
|
package/src/tools/adapter.ts
CHANGED
|
@@ -9,6 +9,10 @@ import type {
|
|
|
9
9
|
StepExecutionState,
|
|
10
10
|
} from '../runtime/tool-context.ts';
|
|
11
11
|
import { isToolError } from '@agi-cli/sdk/tools/error';
|
|
12
|
+
import {
|
|
13
|
+
toClaudeCodeName,
|
|
14
|
+
requiresClaudeCodeNaming,
|
|
15
|
+
} from '../runtime/tool-mapping.ts';
|
|
12
16
|
|
|
13
17
|
export type { ToolAdapterContext } from '../runtime/tool-context.ts';
|
|
14
18
|
|
|
@@ -47,6 +51,7 @@ export function adaptTools(
|
|
|
47
51
|
tools: DiscoveredTool[],
|
|
48
52
|
ctx: ToolAdapterContext,
|
|
49
53
|
provider?: string,
|
|
54
|
+
authType?: string,
|
|
50
55
|
) {
|
|
51
56
|
const out: Record<string, Tool> = {};
|
|
52
57
|
const pendingCalls = new Map<string, PendingCallMeta[]>();
|
|
@@ -56,6 +61,12 @@ export function adaptTools(
|
|
|
56
61
|
};
|
|
57
62
|
let firstToolCallReported = false;
|
|
58
63
|
|
|
64
|
+
// Determine if we need Claude Code naming (PascalCase)
|
|
65
|
+
const useClaudeCodeNaming = requiresClaudeCodeNaming(
|
|
66
|
+
provider ?? '',
|
|
67
|
+
authType,
|
|
68
|
+
);
|
|
69
|
+
|
|
59
70
|
if (!ctx.stepExecution) {
|
|
60
71
|
ctx.stepExecution = { states: new Map<number, StepExecutionState>() };
|
|
61
72
|
}
|
|
@@ -66,8 +77,14 @@ export function adaptTools(
|
|
|
66
77
|
const cacheableTools = new Set(['read', 'write', 'bash', 'edit']);
|
|
67
78
|
let cachedToolCount = 0;
|
|
68
79
|
|
|
69
|
-
for (const { name, tool } of tools) {
|
|
80
|
+
for (const { name: canonicalName, tool } of tools) {
|
|
70
81
|
const base = tool;
|
|
82
|
+
// Use PascalCase for Claude Code OAuth, otherwise canonical (snake_case)
|
|
83
|
+
const registrationName = useClaudeCodeNaming
|
|
84
|
+
? toClaudeCodeName(canonicalName)
|
|
85
|
+
: canonicalName;
|
|
86
|
+
// Always use canonical name for DB storage and events
|
|
87
|
+
const name = canonicalName;
|
|
71
88
|
|
|
72
89
|
const processedToolErrors = new WeakSet<object>();
|
|
73
90
|
|
|
@@ -145,7 +162,7 @@ export function adaptTools(
|
|
|
145
162
|
? { anthropic: { cacheControl: { type: 'ephemeral' as const } } }
|
|
146
163
|
: undefined;
|
|
147
164
|
|
|
148
|
-
out[
|
|
165
|
+
out[registrationName] = {
|
|
149
166
|
...base,
|
|
150
167
|
...(providerOptions ? { providerOptions } : {}),
|
|
151
168
|
async onInputStart(options: unknown) {
|