@agi-cli/server 0.1.136 → 0.1.137

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agi-cli/server",
3
- "version": "0.1.136",
3
+ "version": "0.1.137",
4
4
  "description": "HTTP API server for AGI CLI",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -29,8 +29,8 @@
29
29
  "typecheck": "tsc --noEmit"
30
30
  },
31
31
  "dependencies": {
32
- "@agi-cli/sdk": "0.1.136",
33
- "@agi-cli/database": "0.1.136",
32
+ "@agi-cli/sdk": "0.1.137",
33
+ "@agi-cli/database": "0.1.137",
34
34
  "drizzle-orm": "^0.44.5",
35
35
  "hono": "^4.9.9",
36
36
  "zod": "^4.1.8"
@@ -64,6 +64,8 @@ export const schemas = {
64
64
  lastActiveAt: { type: 'integer', format: 'int64', nullable: true },
65
65
  totalInputTokens: { type: 'integer', nullable: true },
66
66
  totalOutputTokens: { type: 'integer', nullable: true },
67
+ totalCachedTokens: { type: 'integer', nullable: true },
68
+ totalCacheCreationTokens: { type: 'integer', nullable: true },
67
69
  totalToolTimeMs: { type: 'integer', nullable: true },
68
70
  toolCounts: {
69
71
  type: 'object',
@@ -34,6 +34,10 @@ export function registerResearchRoutes(app: Hono) {
34
34
  lastActiveAt: sessions.lastActiveAt,
35
35
  provider: sessions.provider,
36
36
  model: sessions.model,
37
+ totalInputTokens: sessions.totalInputTokens,
38
+ totalOutputTokens: sessions.totalOutputTokens,
39
+ totalCachedTokens: sessions.totalCachedTokens,
40
+ totalCacheCreationTokens: sessions.totalCacheCreationTokens,
37
41
  })
38
42
  .from(sessions)
39
43
  .where(
@@ -112,6 +116,7 @@ export function registerResearchRoutes(app: Hono) {
112
116
  totalInputTokens: null,
113
117
  totalOutputTokens: null,
114
118
  totalCachedTokens: null,
119
+ totalCacheCreationTokens: null,
115
120
  totalReasoningTokens: null,
116
121
  totalToolTimeMs: null,
117
122
  toolCountsJson: null,
@@ -332,6 +337,7 @@ export function registerResearchRoutes(app: Hono) {
332
337
  totalInputTokens: null,
333
338
  totalOutputTokens: null,
334
339
  totalCachedTokens: null,
340
+ totalCacheCreationTokens: null,
335
341
  totalReasoningTokens: null,
336
342
  totalToolTimeMs: null,
337
343
  toolCountsJson: null,
@@ -20,7 +20,11 @@ export interface ModelLimits {
20
20
  export function isOverflow(tokens: TokenUsage, limits: ModelLimits): boolean {
21
21
  if (limits.context === 0) return false;
22
22
 
23
- const count = tokens.input + (tokens.cacheRead ?? 0) + tokens.output;
23
+ const count =
24
+ tokens.input +
25
+ (tokens.cacheRead ?? 0) +
26
+ (tokens.cacheWrite ?? 0) +
27
+ tokens.output;
24
28
  const usableContext = limits.context - limits.output;
25
29
 
26
30
  return count > usableContext;
@@ -119,6 +119,7 @@ export async function createBranch({
119
119
  completionTokens: msg.completionTokens,
120
120
  totalTokens: msg.totalTokens,
121
121
  cachedInputTokens: msg.cachedInputTokens,
122
+ cacheCreationInputTokens: msg.cacheCreationInputTokens,
122
123
  reasoningTokens: msg.reasoningTokens,
123
124
  error: msg.error,
124
125
  errorType: msg.errorType,
@@ -163,6 +164,7 @@ export async function createBranch({
163
164
  totalInputTokens: null,
164
165
  totalOutputTokens: null,
165
166
  totalCachedTokens: null,
167
+ totalCacheCreationTokens: null,
166
168
  totalReasoningTokens: null,
167
169
  totalToolTimeMs: null,
168
170
  toolCountsJson: null,
@@ -1,6 +1,7 @@
1
1
  import type { getDb } from '@agi-cli/database';
2
2
  import { messages, messageParts, sessions } from '@agi-cli/database/schema';
3
3
  import { eq } from 'drizzle-orm';
4
+ import { catalog, type ProviderId } from '@agi-cli/sdk';
4
5
  import type { RunOpts } from './queue.ts';
5
6
 
6
7
  export type UsageData = {
@@ -8,6 +9,7 @@ export type UsageData = {
8
9
  outputTokens?: number;
9
10
  totalTokens?: number;
10
11
  cachedInputTokens?: number;
12
+ cacheCreationInputTokens?: number;
11
13
  reasoningTokens?: number;
12
14
  };
13
15
 
@@ -16,8 +18,81 @@ export type ProviderMetadata = Record<string, unknown> & {
16
18
  cachedPromptTokens?: number;
17
19
  [key: string]: unknown;
18
20
  };
21
+ anthropic?: {
22
+ cacheCreationInputTokens?: number;
23
+ cacheReadInputTokens?: number;
24
+ [key: string]: unknown;
25
+ };
19
26
  };
20
27
 
28
+ export function normalizeUsage(
29
+ usage: UsageData,
30
+ providerMetadata: ProviderMetadata | undefined,
31
+ provider: ProviderId,
32
+ ): UsageData {
33
+ const rawInputTokens = Number(usage.inputTokens ?? 0);
34
+ const outputTokens = Number(usage.outputTokens ?? 0);
35
+ const reasoningTokens = Number(usage.reasoningTokens ?? 0);
36
+
37
+ const cachedInputTokens =
38
+ usage.cachedInputTokens != null
39
+ ? Number(usage.cachedInputTokens)
40
+ : providerMetadata?.openai?.cachedPromptTokens != null
41
+ ? Number(providerMetadata.openai.cachedPromptTokens)
42
+ : providerMetadata?.anthropic?.cacheReadInputTokens != null
43
+ ? Number(providerMetadata.anthropic.cacheReadInputTokens)
44
+ : undefined;
45
+
46
+ const cacheCreationInputTokens =
47
+ usage.cacheCreationInputTokens != null
48
+ ? Number(usage.cacheCreationInputTokens)
49
+ : providerMetadata?.anthropic?.cacheCreationInputTokens != null
50
+ ? Number(providerMetadata.anthropic.cacheCreationInputTokens)
51
+ : undefined;
52
+
53
+ const cachedValue = cachedInputTokens ?? 0;
54
+ const cacheCreationValue = cacheCreationInputTokens ?? 0;
55
+
56
+ let inputTokens = rawInputTokens;
57
+ if (provider === 'openai') {
58
+ inputTokens = Math.max(0, rawInputTokens - cachedValue);
59
+ } else if (provider === 'anthropic') {
60
+ inputTokens = Math.max(0, rawInputTokens - cacheCreationValue);
61
+ }
62
+
63
+ return {
64
+ inputTokens,
65
+ outputTokens,
66
+ cachedInputTokens,
67
+ cacheCreationInputTokens,
68
+ reasoningTokens,
69
+ };
70
+ }
71
+
72
+ export function resolveUsageProvider(
73
+ provider: ProviderId,
74
+ model: string,
75
+ ): ProviderId {
76
+ if (
77
+ provider !== 'solforge' &&
78
+ provider !== 'openrouter' &&
79
+ provider !== 'opencode'
80
+ ) {
81
+ return provider;
82
+ }
83
+ const entry = catalog[provider];
84
+ const normalizedModel = model.includes('/') ? model.split('/').at(-1) : model;
85
+ const modelEntry = entry?.models.find(
86
+ (m) => m.id?.toLowerCase() === normalizedModel?.toLowerCase(),
87
+ );
88
+ const npm = modelEntry?.provider?.npm ?? '';
89
+ if (npm.includes('openai')) return 'openai';
90
+ if (npm.includes('anthropic')) return 'anthropic';
91
+ if (npm.includes('google')) return 'google';
92
+ if (npm.includes('zai')) return 'zai';
93
+ return provider;
94
+ }
95
+
21
96
  /**
22
97
  * Updates session token counts incrementally after each step.
23
98
  * Note: onStepFinish.usage is CUMULATIVE per message, so we compute DELTA and add to session.
@@ -30,6 +105,13 @@ export async function updateSessionTokensIncremental(
30
105
  ) {
31
106
  if (!usage || !db) return;
32
107
 
108
+ const usageProvider = resolveUsageProvider(opts.provider, opts.model);
109
+ const normalizedUsage = normalizeUsage(
110
+ usage,
111
+ providerMetadata,
112
+ usageProvider,
113
+ );
114
+
33
115
  // Read session totals
34
116
  const sessRows = await db
35
117
  .select()
@@ -42,6 +124,7 @@ export async function updateSessionTokensIncremental(
42
124
  const priorInputSess = Number(sess.totalInputTokens ?? 0);
43
125
  const priorOutputSess = Number(sess.totalOutputTokens ?? 0);
44
126
  const priorCachedSess = Number(sess.totalCachedTokens ?? 0);
127
+ const priorCacheCreationSess = Number(sess.totalCacheCreationTokens ?? 0);
45
128
  const priorReasoningSess = Number(sess.totalReasoningTokens ?? 0);
46
129
 
47
130
  // Read current message totals to compute delta
@@ -54,38 +137,47 @@ export async function updateSessionTokensIncremental(
54
137
  const priorPromptMsg = Number(msg?.promptTokens ?? 0);
55
138
  const priorCompletionMsg = Number(msg?.completionTokens ?? 0);
56
139
  const priorCachedMsg = Number(msg?.cachedInputTokens ?? 0);
140
+ const priorCacheCreationMsg = Number(msg?.cacheCreationInputTokens ?? 0);
57
141
  const priorReasoningMsg = Number(msg?.reasoningTokens ?? 0);
58
142
 
59
143
  // Treat usage as cumulative per-message for this step
60
144
  const cumPrompt =
61
- usage.inputTokens != null ? Number(usage.inputTokens) : priorPromptMsg;
145
+ normalizedUsage.inputTokens != null
146
+ ? Number(normalizedUsage.inputTokens)
147
+ : priorPromptMsg;
62
148
  const cumCompletion =
63
- usage.outputTokens != null
64
- ? Number(usage.outputTokens)
149
+ normalizedUsage.outputTokens != null
150
+ ? Number(normalizedUsage.outputTokens)
65
151
  : priorCompletionMsg;
66
152
  const cumReasoning =
67
- usage.reasoningTokens != null
68
- ? Number(usage.reasoningTokens)
153
+ normalizedUsage.reasoningTokens != null
154
+ ? Number(normalizedUsage.reasoningTokens)
69
155
  : priorReasoningMsg;
70
156
 
71
157
  const cumCached =
72
- usage.cachedInputTokens != null
73
- ? Number(usage.cachedInputTokens)
74
- : providerMetadata?.openai?.cachedPromptTokens != null
75
- ? Number(providerMetadata.openai.cachedPromptTokens)
76
- : priorCachedMsg;
158
+ normalizedUsage.cachedInputTokens != null
159
+ ? Number(normalizedUsage.cachedInputTokens)
160
+ : priorCachedMsg;
161
+
162
+ const cumCacheCreation =
163
+ normalizedUsage.cacheCreationInputTokens != null
164
+ ? Number(normalizedUsage.cacheCreationInputTokens)
165
+ : priorCacheCreationMsg;
77
166
 
78
167
  // Compute deltas for this step; clamp to 0 in case provider reports smaller values
79
168
  const deltaInput = Math.max(0, cumPrompt - priorPromptMsg);
80
169
  const deltaOutput = Math.max(0, cumCompletion - priorCompletionMsg);
81
170
  const deltaCached = Math.max(0, cumCached - priorCachedMsg);
171
+ const deltaCacheCreation = Math.max(
172
+ 0,
173
+ cumCacheCreation - priorCacheCreationMsg,
174
+ );
82
175
  const deltaReasoning = Math.max(0, cumReasoning - priorReasoningMsg);
83
176
 
84
- // Note: AI SDK's inputTokens already excludes cached tokens for Anthropic,
85
- // so we don't need to subtract deltaCached here. Just accumulate directly.
86
177
  const nextInputSess = priorInputSess + deltaInput;
87
178
  const nextOutputSess = priorOutputSess + deltaOutput;
88
179
  const nextCachedSess = priorCachedSess + deltaCached;
180
+ const nextCacheCreationSess = priorCacheCreationSess + deltaCacheCreation;
89
181
  const nextReasoningSess = priorReasoningSess + deltaReasoning;
90
182
 
91
183
  await db
@@ -94,6 +186,7 @@ export async function updateSessionTokensIncremental(
94
186
  totalInputTokens: nextInputSess,
95
187
  totalOutputTokens: nextOutputSess,
96
188
  totalCachedTokens: nextCachedSess,
189
+ totalCacheCreationTokens: nextCacheCreationSess,
97
190
  totalReasoningTokens: nextReasoningSess,
98
191
  })
99
192
  .where(eq(sessions.id, opts.sessionId));
@@ -144,6 +237,13 @@ export async function updateMessageTokensIncremental(
144
237
  ) {
145
238
  if (!usage || !db) return;
146
239
 
240
+ const usageProvider = resolveUsageProvider(opts.provider, opts.model);
241
+ const normalizedUsage = normalizeUsage(
242
+ usage,
243
+ providerMetadata,
244
+ usageProvider,
245
+ );
246
+
147
247
  const msgRows = await db
148
248
  .select()
149
249
  .from(messages)
@@ -154,28 +254,35 @@ export async function updateMessageTokensIncremental(
154
254
  const priorPrompt = Number(msg.promptTokens ?? 0);
155
255
  const priorCompletion = Number(msg.completionTokens ?? 0);
156
256
  const priorCached = Number(msg.cachedInputTokens ?? 0);
257
+ const priorCacheCreation = Number(msg.cacheCreationInputTokens ?? 0);
157
258
  const priorReasoning = Number(msg.reasoningTokens ?? 0);
158
259
 
159
260
  // Treat usage as cumulative per-message - REPLACE not ADD
160
261
  const cumPrompt =
161
- usage.inputTokens != null ? Number(usage.inputTokens) : priorPrompt;
262
+ normalizedUsage.inputTokens != null
263
+ ? Number(normalizedUsage.inputTokens)
264
+ : priorPrompt;
162
265
  const cumCompletion =
163
- usage.outputTokens != null ? Number(usage.outputTokens) : priorCompletion;
266
+ normalizedUsage.outputTokens != null
267
+ ? Number(normalizedUsage.outputTokens)
268
+ : priorCompletion;
164
269
  const cumReasoning =
165
- usage.reasoningTokens != null
166
- ? Number(usage.reasoningTokens)
270
+ normalizedUsage.reasoningTokens != null
271
+ ? Number(normalizedUsage.reasoningTokens)
167
272
  : priorReasoning;
168
273
 
169
274
  const cumCached =
170
- usage.cachedInputTokens != null
171
- ? Number(usage.cachedInputTokens)
172
- : providerMetadata?.openai?.cachedPromptTokens != null
173
- ? Number(providerMetadata.openai.cachedPromptTokens)
174
- : priorCached;
275
+ normalizedUsage.cachedInputTokens != null
276
+ ? Number(normalizedUsage.cachedInputTokens)
277
+ : priorCached;
278
+
279
+ const cumCacheCreation =
280
+ normalizedUsage.cacheCreationInputTokens != null
281
+ ? Number(normalizedUsage.cacheCreationInputTokens)
282
+ : priorCacheCreation;
175
283
 
176
- // Note: AI SDK's totalTokens excludes cachedInputTokens for Anthropic,
177
- // so we always compute total ourselves to include all token types.
178
- const cumTotal = cumPrompt + cumCompletion + cumCached + cumReasoning;
284
+ const cumTotal =
285
+ cumPrompt + cumCompletion + cumCached + cumCacheCreation + cumReasoning;
179
286
 
180
287
  await db
181
288
  .update(messages)
@@ -184,6 +291,7 @@ export async function updateMessageTokensIncremental(
184
291
  completionTokens: cumCompletion,
185
292
  totalTokens: cumTotal,
186
293
  cachedInputTokens: cumCached,
294
+ cacheCreationInputTokens: cumCacheCreation,
187
295
  reasoningTokens: cumReasoning,
188
296
  })
189
297
  .where(eq(messages.id, opts.assistantMessageId));
@@ -51,6 +51,7 @@ export async function createSession({
51
51
  totalInputTokens: null,
52
52
  totalOutputTokens: null,
53
53
  totalCachedTokens: null,
54
+ totalCacheCreationTokens: null,
54
55
  totalReasoningTokens: null,
55
56
  totalToolTimeMs: null,
56
57
  toolCountsJson: null,
@@ -13,6 +13,10 @@ import {
13
13
  } from '../message/compaction.ts';
14
14
  import { debugLog } from '../debug/index.ts';
15
15
  import type { FinishEvent } from './types.ts';
16
+ import {
17
+ normalizeUsage,
18
+ resolveUsageProvider,
19
+ } from '../session/db-operations.ts';
16
20
 
17
21
  export function createFinishHandler(
18
22
  opts: RunOpts,
@@ -73,8 +77,17 @@ export function createFinishHandler(
73
77
  outputTokens: Number(sessRows[0].completionTokens ?? 0),
74
78
  totalTokens: Number(sessRows[0].totalTokens ?? 0),
75
79
  cachedInputTokens: Number(sessRows[0].cachedInputTokens ?? 0),
80
+ cacheCreationInputTokens: Number(
81
+ sessRows[0].cacheCreationInputTokens ?? 0,
82
+ ),
76
83
  }
77
- : fin.usage;
84
+ : fin.usage
85
+ ? normalizeUsage(
86
+ fin.usage,
87
+ undefined,
88
+ resolveUsageProvider(opts.provider, opts.model),
89
+ )
90
+ : undefined;
78
91
 
79
92
  const costUsd = usage
80
93
  ? estimateModelCostUsd(opts.provider, opts.model, usage)
@@ -89,6 +102,9 @@ export function createFinishHandler(
89
102
  output: usage.outputTokens ?? 0,
90
103
  cacheRead:
91
104
  (usage as { cachedInputTokens?: number }).cachedInputTokens ?? 0,
105
+ cacheWrite:
106
+ (usage as { cacheCreationInputTokens?: number })
107
+ .cacheCreationInputTokens ?? 0,
92
108
  };
93
109
 
94
110
  if (isOverflow(tokenUsage, limits)) {
@@ -8,7 +8,15 @@ export type StepFinishEvent = {
8
8
  };
9
9
 
10
10
  export type FinishEvent = {
11
- usage?: Pick<UsageData, 'inputTokens' | 'outputTokens' | 'totalTokens'>;
11
+ usage?: Pick<
12
+ UsageData,
13
+ | 'inputTokens'
14
+ | 'outputTokens'
15
+ | 'totalTokens'
16
+ | 'cachedInputTokens'
17
+ | 'cacheCreationInputTokens'
18
+ | 'reasoningTokens'
19
+ >;
12
20
  finishReason?: string;
13
21
  };
14
22
 
@@ -78,7 +78,10 @@ export function buildGetParentSessionTool(
78
78
  }
79
79
 
80
80
  const totalTokens =
81
- (session.totalInputTokens ?? 0) + (session.totalOutputTokens ?? 0);
81
+ (session.totalInputTokens ?? 0) +
82
+ (session.totalOutputTokens ?? 0) +
83
+ (session.totalCachedTokens ?? 0) +
84
+ (session.totalCacheCreationTokens ?? 0);
82
85
 
83
86
  const stats = {
84
87
  totalMessages: msgCountResult[0]?.count ?? 0,
@@ -87,6 +90,8 @@ export function buildGetParentSessionTool(
87
90
  totalTokens,
88
91
  totalInputTokens: session.totalInputTokens ?? 0,
89
92
  totalOutputTokens: session.totalOutputTokens ?? 0,
93
+ totalCachedTokens: session.totalCachedTokens ?? 0,
94
+ totalCacheCreationTokens: session.totalCacheCreationTokens ?? 0,
90
95
  };
91
96
 
92
97
  let messagesData:
@@ -75,7 +75,10 @@ export function buildGetSessionContextTool(projectRoot: string) {
75
75
  }
76
76
 
77
77
  const totalTokens =
78
- (session.totalInputTokens ?? 0) + (session.totalOutputTokens ?? 0);
78
+ (session.totalInputTokens ?? 0) +
79
+ (session.totalOutputTokens ?? 0) +
80
+ (session.totalCachedTokens ?? 0) +
81
+ (session.totalCacheCreationTokens ?? 0);
79
82
 
80
83
  const stats = {
81
84
  totalMessages: msgCountResult[0]?.count ?? 0,
@@ -84,6 +87,8 @@ export function buildGetSessionContextTool(projectRoot: string) {
84
87
  totalTokens,
85
88
  totalInputTokens: session.totalInputTokens ?? 0,
86
89
  totalOutputTokens: session.totalOutputTokens ?? 0,
90
+ totalCachedTokens: session.totalCachedTokens ?? 0,
91
+ totalCacheCreationTokens: session.totalCacheCreationTokens ?? 0,
87
92
  };
88
93
 
89
94
  let messagesData:
@@ -82,6 +82,8 @@ export function buildQuerySessionsTool(projectRoot: string) {
82
82
  lastActiveAt: sessions.lastActiveAt,
83
83
  totalInputTokens: sessions.totalInputTokens,
84
84
  totalOutputTokens: sessions.totalOutputTokens,
85
+ totalCachedTokens: sessions.totalCachedTokens,
86
+ totalCacheCreationTokens: sessions.totalCacheCreationTokens,
85
87
  sessionType: sessions.sessionType,
86
88
  parentSessionId: sessions.parentSessionId,
87
89
  })