@agi-cli/server 0.1.106 → 0.1.107
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/runtime/db-operations.ts +3 -4
- package/src/runtime/provider.ts +215 -25
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agi-cli/server",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.107",
|
|
4
4
|
"description": "HTTP API server for AGI CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
"typecheck": "tsc --noEmit"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@agi-cli/sdk": "0.1.
|
|
33
|
-
"@agi-cli/database": "0.1.
|
|
32
|
+
"@agi-cli/sdk": "0.1.107",
|
|
33
|
+
"@agi-cli/database": "0.1.107",
|
|
34
34
|
"drizzle-orm": "^0.44.5",
|
|
35
35
|
"hono": "^4.9.9",
|
|
36
36
|
"zod": "^4.1.8"
|
|
@@ -76,15 +76,14 @@ export async function updateSessionTokensIncremental(
|
|
|
76
76
|
: priorCachedMsg;
|
|
77
77
|
|
|
78
78
|
// Compute deltas for this step; clamp to 0 in case provider reports smaller values
|
|
79
|
-
// Cached tokens reduce the billable input, so we subtract them from the delta
|
|
80
79
|
const deltaInput = Math.max(0, cumPrompt - priorPromptMsg);
|
|
81
80
|
const deltaOutput = Math.max(0, cumCompletion - priorCompletionMsg);
|
|
82
81
|
const deltaCached = Math.max(0, cumCached - priorCachedMsg);
|
|
83
82
|
const deltaReasoning = Math.max(0, cumReasoning - priorReasoningMsg);
|
|
84
83
|
|
|
85
|
-
//
|
|
86
|
-
//
|
|
87
|
-
const nextInputSess = priorInputSess + deltaInput
|
|
84
|
+
// Note: AI SDK's inputTokens already excludes cached tokens for Anthropic,
|
|
85
|
+
// so we don't need to subtract deltaCached here. Just accumulate directly.
|
|
86
|
+
const nextInputSess = priorInputSess + deltaInput;
|
|
88
87
|
const nextOutputSess = priorOutputSess + deltaOutput;
|
|
89
88
|
const nextCachedSess = priorCachedSess + deltaCached;
|
|
90
89
|
const nextReasoningSess = priorReasoningSess + deltaReasoning;
|
package/src/runtime/provider.ts
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
setAuth,
|
|
8
8
|
} from '@agi-cli/sdk';
|
|
9
9
|
import { openai, createOpenAI } from '@ai-sdk/openai';
|
|
10
|
-
import {
|
|
10
|
+
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
11
11
|
import { google, createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
12
12
|
import { createOpenRouter } from '@openrouter/ai-sdk-provider';
|
|
13
13
|
import { toClaudeCodeName } from './tool-mapping.ts';
|
|
@@ -170,7 +170,7 @@ async function getAnthropicInstance(cfg: AGIConfig) {
|
|
|
170
170
|
url += url.includes('?') ? '&beta=true' : '?beta=true';
|
|
171
171
|
}
|
|
172
172
|
|
|
173
|
-
// Transform request body: tool names to PascalCase
|
|
173
|
+
// Transform request body: tool names to PascalCase + apply caching
|
|
174
174
|
let body = init?.body;
|
|
175
175
|
if (body && typeof body === 'string') {
|
|
176
176
|
try {
|
|
@@ -186,36 +186,117 @@ async function getAnthropicInstance(cfg: AGIConfig) {
|
|
|
186
186
|
);
|
|
187
187
|
}
|
|
188
188
|
|
|
189
|
-
//
|
|
189
|
+
// Apply ephemeral caching (max 4 cache breakpoints total)
|
|
190
|
+
// Adapter adds 2 tool cache blocks, so we can add 2 more:
|
|
191
|
+
// - 1 system block (the first one with tools description)
|
|
192
|
+
// - 1 message block (the last user message)
|
|
193
|
+
const MAX_SYSTEM_CACHE = 1;
|
|
194
|
+
const MAX_MESSAGE_CACHE = 1;
|
|
195
|
+
let systemCacheUsed = 0;
|
|
196
|
+
let messageCacheUsed = 0;
|
|
197
|
+
|
|
198
|
+
// Cache first system message only (contains agent instructions)
|
|
199
|
+
if (parsed.system && Array.isArray(parsed.system)) {
|
|
200
|
+
parsed.system = parsed.system.map(
|
|
201
|
+
(
|
|
202
|
+
block: { type: string; cache_control?: unknown },
|
|
203
|
+
index: number,
|
|
204
|
+
) => {
|
|
205
|
+
if (block.cache_control) return block;
|
|
206
|
+
if (
|
|
207
|
+
systemCacheUsed < MAX_SYSTEM_CACHE &&
|
|
208
|
+
index === 0 &&
|
|
209
|
+
block.type === 'text'
|
|
210
|
+
) {
|
|
211
|
+
systemCacheUsed++;
|
|
212
|
+
return { ...block, cache_control: { type: 'ephemeral' } };
|
|
213
|
+
}
|
|
214
|
+
return block;
|
|
215
|
+
},
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Transform tool names in messages and apply caching to last message only
|
|
190
220
|
if (parsed.messages && Array.isArray(parsed.messages)) {
|
|
221
|
+
const messageCount = parsed.messages.length;
|
|
222
|
+
|
|
191
223
|
parsed.messages = parsed.messages.map(
|
|
192
|
-
(
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
224
|
+
(
|
|
225
|
+
msg: {
|
|
226
|
+
role: string;
|
|
227
|
+
content: unknown;
|
|
228
|
+
[key: string]: unknown;
|
|
229
|
+
},
|
|
230
|
+
msgIndex: number,
|
|
231
|
+
) => {
|
|
232
|
+
// Only cache the very last message
|
|
233
|
+
const isLast = msgIndex === messageCount - 1;
|
|
234
|
+
|
|
197
235
|
if (Array.isArray(msg.content)) {
|
|
236
|
+
const content = msg.content.map(
|
|
237
|
+
(
|
|
238
|
+
block: {
|
|
239
|
+
type: string;
|
|
240
|
+
name?: string;
|
|
241
|
+
cache_control?: unknown;
|
|
242
|
+
},
|
|
243
|
+
blockIndex: number,
|
|
244
|
+
) => {
|
|
245
|
+
let transformedBlock = block;
|
|
246
|
+
|
|
247
|
+
// Transform tool names
|
|
248
|
+
if (block.type === 'tool_use' && block.name) {
|
|
249
|
+
transformedBlock = {
|
|
250
|
+
...block,
|
|
251
|
+
name: toClaudeCodeName(block.name),
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
if (block.type === 'tool_result' && block.name) {
|
|
255
|
+
transformedBlock = {
|
|
256
|
+
...block,
|
|
257
|
+
name: toClaudeCodeName(block.name),
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Add cache_control to last block of last message
|
|
262
|
+
if (
|
|
263
|
+
isLast &&
|
|
264
|
+
!transformedBlock.cache_control &&
|
|
265
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
266
|
+
blockIndex === (msg.content as unknown[]).length - 1
|
|
267
|
+
) {
|
|
268
|
+
messageCacheUsed++;
|
|
269
|
+
return {
|
|
270
|
+
...transformedBlock,
|
|
271
|
+
cache_control: { type: 'ephemeral' },
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return transformedBlock;
|
|
276
|
+
},
|
|
277
|
+
);
|
|
278
|
+
return { ...msg, content };
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// For string content, wrap in array with cache_control if last message
|
|
282
|
+
if (
|
|
283
|
+
isLast &&
|
|
284
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
285
|
+
typeof msg.content === 'string'
|
|
286
|
+
) {
|
|
287
|
+
messageCacheUsed++;
|
|
198
288
|
return {
|
|
199
289
|
...msg,
|
|
200
|
-
content:
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
name: toClaudeCodeName(block.name),
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
if (block.type === 'tool_result' && block.name) {
|
|
209
|
-
return {
|
|
210
|
-
...block,
|
|
211
|
-
name: toClaudeCodeName(block.name),
|
|
212
|
-
};
|
|
213
|
-
}
|
|
214
|
-
return block;
|
|
290
|
+
content: [
|
|
291
|
+
{
|
|
292
|
+
type: 'text',
|
|
293
|
+
text: msg.content,
|
|
294
|
+
cache_control: { type: 'ephemeral' },
|
|
215
295
|
},
|
|
216
|
-
|
|
296
|
+
],
|
|
217
297
|
};
|
|
218
298
|
}
|
|
299
|
+
|
|
219
300
|
return msg;
|
|
220
301
|
},
|
|
221
302
|
);
|
|
@@ -239,7 +320,116 @@ async function getAnthropicInstance(cfg: AGIConfig) {
|
|
|
239
320
|
});
|
|
240
321
|
}
|
|
241
322
|
|
|
242
|
-
|
|
323
|
+
// For API key auth, also apply caching via customFetch
|
|
324
|
+
// This optimizes token usage even without OAuth
|
|
325
|
+
const customFetch = async (
|
|
326
|
+
input: string | URL | Request,
|
|
327
|
+
init?: RequestInit,
|
|
328
|
+
) => {
|
|
329
|
+
let body = init?.body;
|
|
330
|
+
if (body && typeof body === 'string') {
|
|
331
|
+
try {
|
|
332
|
+
const parsed = JSON.parse(body);
|
|
333
|
+
|
|
334
|
+
// Apply ephemeral caching (max 4 cache breakpoints total)
|
|
335
|
+
// Adapter adds 2 tool cache blocks, so we can add 2 more:
|
|
336
|
+
// - 1 system block + 1 message block = 2
|
|
337
|
+
const MAX_SYSTEM_CACHE = 1;
|
|
338
|
+
const MAX_MESSAGE_CACHE = 1;
|
|
339
|
+
let systemCacheUsed = 0;
|
|
340
|
+
let messageCacheUsed = 0;
|
|
341
|
+
|
|
342
|
+
// Cache first system message
|
|
343
|
+
if (parsed.system && Array.isArray(parsed.system)) {
|
|
344
|
+
parsed.system = parsed.system.map(
|
|
345
|
+
(
|
|
346
|
+
block: { type: string; cache_control?: unknown },
|
|
347
|
+
index: number,
|
|
348
|
+
) => {
|
|
349
|
+
if (block.cache_control) return block;
|
|
350
|
+
if (
|
|
351
|
+
systemCacheUsed < MAX_SYSTEM_CACHE &&
|
|
352
|
+
index === 0 &&
|
|
353
|
+
block.type === 'text'
|
|
354
|
+
) {
|
|
355
|
+
systemCacheUsed++;
|
|
356
|
+
return { ...block, cache_control: { type: 'ephemeral' } };
|
|
357
|
+
}
|
|
358
|
+
return block;
|
|
359
|
+
},
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Cache last message only
|
|
364
|
+
if (parsed.messages && Array.isArray(parsed.messages)) {
|
|
365
|
+
const messageCount = parsed.messages.length;
|
|
366
|
+
parsed.messages = parsed.messages.map(
|
|
367
|
+
(
|
|
368
|
+
msg: {
|
|
369
|
+
role: string;
|
|
370
|
+
content: unknown;
|
|
371
|
+
[key: string]: unknown;
|
|
372
|
+
},
|
|
373
|
+
msgIndex: number,
|
|
374
|
+
) => {
|
|
375
|
+
const isLast = msgIndex === messageCount - 1;
|
|
376
|
+
|
|
377
|
+
if (Array.isArray(msg.content)) {
|
|
378
|
+
const blocks = msg.content as {
|
|
379
|
+
type: string;
|
|
380
|
+
cache_control?: unknown;
|
|
381
|
+
}[];
|
|
382
|
+
const content = blocks.map((block, blockIndex) => {
|
|
383
|
+
if (block.cache_control) return block;
|
|
384
|
+
if (
|
|
385
|
+
isLast &&
|
|
386
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
387
|
+
blockIndex === blocks.length - 1
|
|
388
|
+
) {
|
|
389
|
+
messageCacheUsed++;
|
|
390
|
+
return { ...block, cache_control: { type: 'ephemeral' } };
|
|
391
|
+
}
|
|
392
|
+
return block;
|
|
393
|
+
});
|
|
394
|
+
return { ...msg, content };
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
if (
|
|
398
|
+
isLast &&
|
|
399
|
+
messageCacheUsed < MAX_MESSAGE_CACHE &&
|
|
400
|
+
typeof msg.content === 'string'
|
|
401
|
+
) {
|
|
402
|
+
messageCacheUsed++;
|
|
403
|
+
return {
|
|
404
|
+
...msg,
|
|
405
|
+
content: [
|
|
406
|
+
{
|
|
407
|
+
type: 'text',
|
|
408
|
+
text: msg.content,
|
|
409
|
+
cache_control: { type: 'ephemeral' },
|
|
410
|
+
},
|
|
411
|
+
],
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return msg;
|
|
416
|
+
},
|
|
417
|
+
);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
body = JSON.stringify(parsed);
|
|
421
|
+
} catch {
|
|
422
|
+
// If parsing fails, send as-is
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
const url = typeof input === 'string' ? input : input.toString();
|
|
427
|
+
return fetch(url, { ...init, body });
|
|
428
|
+
};
|
|
429
|
+
|
|
430
|
+
return createAnthropic({
|
|
431
|
+
fetch: customFetch as typeof fetch,
|
|
432
|
+
});
|
|
243
433
|
}
|
|
244
434
|
|
|
245
435
|
export async function resolveModel(
|