@clinebot/llms 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -82,7 +82,7 @@ describe("models-dev-catalog", () => {
82
82
  id: "claude-defaults",
83
83
  name: "claude-defaults",
84
84
  contextWindow: 4096,
85
- maxTokens: 4096,
85
+ maxTokens: 204,
86
86
  capabilities: ["tools"],
87
87
  pricing: {
88
88
  input: 0,
@@ -97,7 +97,7 @@ describe("models-dev-catalog", () => {
97
97
  id: "claude-older",
98
98
  name: "claude-older",
99
99
  contextWindow: 4096,
100
- maxTokens: 4096,
100
+ maxTokens: 204,
101
101
  capabilities: ["tools"],
102
102
  pricing: {
103
103
  input: 0,
@@ -93,11 +93,18 @@ function toStatus(status: string | undefined): ModelInfo["status"] {
93
93
  }
94
94
 
95
95
  function toModelInfo(modelId: string, model: ModelsDevModel): ModelInfo {
96
+ // If context or output limits are missing, default to DEFAULT_CONTEXT_WINDOW and DEFAULT_MAX_TOKENS respectively.
97
+ // If context and max are the same value, assume max tokens should be 5% of that value to avoid overallocation.
98
+ const contextWindow = model.limit?.context ?? DEFAULT_CONTEXT_WINDOW;
99
+ const outputToken = model.limit?.output ?? DEFAULT_MAX_TOKENS;
100
+ const discounted =
101
+ contextWindow === outputToken ? outputToken * 0.05 : outputToken;
102
+
96
103
  return {
97
104
  id: modelId,
98
105
  name: model.name || modelId,
99
- contextWindow: model.limit?.context ?? DEFAULT_CONTEXT_WINDOW,
100
- maxTokens: model.limit?.output ?? DEFAULT_MAX_TOKENS,
106
+ contextWindow,
107
+ maxTokens: Math.floor(discounted),
101
108
  capabilities: toCapabilities(model),
102
109
  pricing: {
103
110
  input: model.cost?.input ?? 0,
@@ -41,6 +41,7 @@ export type EmitAiSdkStreamOptions = {
41
41
  inputTokens: number,
42
42
  outputTokens: number,
43
43
  cacheReadTokens: number,
44
+ cacheWriteTokens?: number,
44
45
  ) => number | undefined;
45
46
  reasoningTypes?: string[];
46
47
  enableToolCalls?: boolean;
@@ -180,6 +181,7 @@ export async function* emitAiSdkStream(
180
181
  usageMetrics.inputTokens,
181
182
  usageMetrics.outputTokens,
182
183
  usageMetrics.cacheReadTokens,
184
+ usageMetrics.cacheWriteTokens,
183
185
  ),
184
186
  id: responseId,
185
187
  };
@@ -217,6 +219,7 @@ export async function* emitAiSdkStream(
217
219
  usageMetrics.inputTokens,
218
220
  usageMetrics.outputTokens,
219
221
  usageMetrics.cacheReadTokens,
222
+ usageMetrics.cacheWriteTokens,
220
223
  ),
221
224
  id: responseId,
222
225
  };
@@ -185,8 +185,18 @@ export abstract class AiSdkProviderHandler extends BaseHandler {
185
185
  yield* emitAiSdkStream(stream, {
186
186
  responseId,
187
187
  errorMessage: this.getStreamErrorMessage(),
188
- calculateCost: (inputTokens, outputTokens, cacheReadTokens) =>
189
- this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
188
+ calculateCost: (
189
+ inputTokens,
190
+ outputTokens,
191
+ cacheReadTokens,
192
+ cacheWriteTokens,
193
+ ) =>
194
+ this.calculateCost(
195
+ inputTokens,
196
+ outputTokens,
197
+ cacheReadTokens,
198
+ cacheWriteTokens,
199
+ ),
190
200
  ...this.getEmitStreamOptions(),
191
201
  });
192
202
  }
@@ -0,0 +1,30 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { AnthropicHandler } from "./anthropic-base";
3
+
4
+ describe("AnthropicHandler prompt cache detection", () => {
5
+ it("enables prompt caching when model pricing includes cache pricing", () => {
6
+ const handler = new AnthropicHandler({
7
+ providerId: "anthropic",
8
+ modelId: "claude-sonnet-4-6",
9
+ apiKey: "test-key",
10
+ modelInfo: {
11
+ id: "claude-sonnet-4-6",
12
+ pricing: {
13
+ input: 3,
14
+ output: 15,
15
+ cacheRead: 0.3,
16
+ cacheWrite: 3.75,
17
+ },
18
+ },
19
+ });
20
+
21
+ const messages = handler.getMessages("system", [
22
+ { role: "user", content: "Tell me about this repo" },
23
+ ]);
24
+ const userTextBlock = messages[0]?.content?.[0] as
25
+ | { cache_control?: { type: string } }
26
+ | undefined;
27
+
28
+ expect(userTextBlock?.cache_control).toEqual({ type: "ephemeral" });
29
+ });
30
+ });
@@ -17,7 +17,6 @@ import {
17
17
  import {
18
18
  type ApiStream,
19
19
  type HandlerModelInfo,
20
- hasModelCapability,
21
20
  type ProviderConfig,
22
21
  supportsModelThinking,
23
22
  } from "../types";
@@ -76,10 +75,7 @@ export class AnthropicHandler extends BaseHandler {
76
75
  _systemPrompt: string,
77
76
  messages: Message[],
78
77
  ): Anthropic.MessageParam[] {
79
- const supportsPromptCache = hasModelCapability(
80
- this.getModel().info,
81
- "prompt-cache",
82
- );
78
+ const supportsPromptCache = this.supportsPromptCache(this.getModel().info);
83
79
  return convertToAnthropicMessages(
84
80
  messages,
85
81
  supportsPromptCache,
@@ -113,7 +109,7 @@ export class AnthropicHandler extends BaseHandler {
113
109
  const budgetTokens =
114
110
  thinkingSupported && requestedBudget > 0 ? requestedBudget : 0;
115
111
  const nativeToolsOn = tools && tools.length > 0;
116
- const supportsPromptCache = hasModelCapability(model.info, "prompt-cache");
112
+ const supportsPromptCache = this.supportsPromptCache(model.info);
117
113
  const reasoningOn = thinkingSupported && budgetTokens > 0;
118
114
  const debugThinking = isThinkingDebugEnabled();
119
115
  const debugChunkCounts: Record<string, number> = {};
@@ -139,30 +135,34 @@ export class AnthropicHandler extends BaseHandler {
139
135
  const requestOptions = { signal: abortSignal };
140
136
 
141
137
  // Create the request
138
+ // Use top-level automatic caching so the entire prefix (system +
139
+ // messages) is cached and the breakpoint advances each turn.
140
+ const createParams: Record<string, unknown> &
141
+ Anthropic.MessageCreateParamsStreaming = {
142
+ model: model.id,
143
+ thinking: reasoningOn
144
+ ? { type: "enabled", budget_tokens: budgetTokens }
145
+ : undefined,
146
+ max_tokens:
147
+ model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
148
+ temperature: reasoningOn ? undefined : 0,
149
+ system: [
150
+ supportsPromptCache
151
+ ? {
152
+ text: systemPrompt,
153
+ type: "text",
154
+ cache_control: { type: "ephemeral" },
155
+ }
156
+ : { text: systemPrompt, type: "text" },
157
+ ],
158
+ messages: anthropicMessages as Anthropic.MessageParam[],
159
+ stream: true,
160
+ tools: anthropicTools,
161
+ tool_choice: nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
162
+ };
163
+
142
164
  const stream = await client.messages.create(
143
- {
144
- model: model.id,
145
- thinking: reasoningOn
146
- ? { type: "enabled", budget_tokens: budgetTokens }
147
- : undefined,
148
- max_tokens:
149
- model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
150
- temperature: reasoningOn ? undefined : 0,
151
- system: supportsPromptCache
152
- ? [
153
- {
154
- text: systemPrompt,
155
- type: "text",
156
- cache_control: { type: "ephemeral" },
157
- },
158
- ]
159
- : [{ text: systemPrompt, type: "text" }],
160
- messages: anthropicMessages as Anthropic.MessageParam[],
161
- stream: true,
162
- tools: anthropicTools,
163
- tool_choice:
164
- nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
165
- },
165
+ createParams as Anthropic.MessageCreateParamsStreaming,
166
166
  requestOptions,
167
167
  );
168
168
 
@@ -244,6 +244,7 @@ export class AnthropicHandler extends BaseHandler {
244
244
  usageSnapshot.inputTokens,
245
245
  usageSnapshot.outputTokens,
246
246
  usageSnapshot.cacheReadTokens,
247
+ usageSnapshot.cacheWriteTokens,
247
248
  ),
248
249
  id: responseId,
249
250
  };
@@ -263,6 +264,7 @@ export class AnthropicHandler extends BaseHandler {
263
264
  usageSnapshot.inputTokens,
264
265
  usageSnapshot.outputTokens,
265
266
  usageSnapshot.cacheReadTokens,
267
+ usageSnapshot.cacheWriteTokens,
266
268
  ),
267
269
  id: responseId,
268
270
  };
@@ -15,13 +15,37 @@ class TestHandler extends BaseHandler {
15
15
  inputTokens: number,
16
16
  outputTokens: number,
17
17
  cacheReadTokens = 0,
18
+ cacheWriteTokens = 0,
18
19
  ): number | undefined {
19
- return this.calculateCost(inputTokens, outputTokens, cacheReadTokens);
20
+ return this.calculateCost(
21
+ inputTokens,
22
+ outputTokens,
23
+ cacheReadTokens,
24
+ cacheWriteTokens,
25
+ );
26
+ }
27
+
28
+ public computeCostFromInclusiveInput(
29
+ inputTokens: number,
30
+ outputTokens: number,
31
+ cacheReadTokens = 0,
32
+ cacheWriteTokens = 0,
33
+ ): number | undefined {
34
+ return this.calculateCostFromInclusiveInput(
35
+ inputTokens,
36
+ outputTokens,
37
+ cacheReadTokens,
38
+ cacheWriteTokens,
39
+ );
20
40
  }
21
41
 
22
42
  public exposeAbortSignal(): AbortSignal {
23
43
  return this.getAbortSignal();
24
44
  }
45
+
46
+ public normalizeBadRequest(error: unknown): Error | undefined {
47
+ return this.normalizeOpenAICompatibleBadRequest(error);
48
+ }
25
49
  }
26
50
 
27
51
  describe("BaseHandler.calculateCost", () => {
@@ -45,7 +69,54 @@ describe("BaseHandler.calculateCost", () => {
45
69
 
46
70
  const cost = handler.computeCost(1_000_000, 1_000_000, 100_000);
47
71
 
48
- expect(cost).toBeCloseTo(17.73, 6);
72
+ expect(cost).toBeCloseTo(18.03, 6);
73
+ });
74
+
75
+ it("does not charge cache reads twice when input already includes them", () => {
76
+ const config: ProviderConfig = {
77
+ providerId: "openai-native",
78
+ modelId: "gpt-test",
79
+ apiKey: "test-key",
80
+ knownModels: {
81
+ "gpt-test": {
82
+ id: "gpt-test",
83
+ pricing: {
84
+ input: 1,
85
+ output: 2,
86
+ cacheRead: 0.5,
87
+ },
88
+ },
89
+ },
90
+ };
91
+ const handler = new TestHandler(config);
92
+
93
+ const cost = handler.computeCostFromInclusiveInput(100, 40, 25);
94
+
95
+ expect(cost).toBeCloseTo(0.0001675, 10);
96
+ });
97
+
98
+ it("does not charge cache writes twice when input already includes them", () => {
99
+ const config: ProviderConfig = {
100
+ providerId: "openai-native",
101
+ modelId: "gpt-test",
102
+ apiKey: "test-key",
103
+ knownModels: {
104
+ "gpt-test": {
105
+ id: "gpt-test",
106
+ pricing: {
107
+ input: 1,
108
+ output: 2,
109
+ cacheRead: 0.5,
110
+ cacheWrite: 1.25,
111
+ },
112
+ },
113
+ },
114
+ };
115
+ const handler = new TestHandler(config);
116
+
117
+ const cost = handler.computeCostFromInclusiveInput(100, 40, 25, 10);
118
+
119
+ expect(cost).toBeCloseTo(0.00017, 10);
49
120
  });
50
121
  });
51
122
 
@@ -103,3 +174,57 @@ describe("BaseHandler abort signal wiring", () => {
103
174
  expect(signal2.aborted).toBe(false);
104
175
  });
105
176
  });
177
+
178
+ describe("BaseHandler.normalizeOpenAICompatibleBadRequest", () => {
179
+ it("rewrites provider metadata prompt-limit errors into a helpful message", () => {
180
+ const handler = new TestHandler({
181
+ providerId: "openrouter",
182
+ modelId: "anthropic/claude-sonnet-4.6",
183
+ apiKey: "test-key",
184
+ baseUrl: "https://openrouter.ai/api/v1",
185
+ });
186
+
187
+ const error = Object.assign(new Error("400 Provider returned error"), {
188
+ status: 400,
189
+ error: {
190
+ message: "Provider returned error",
191
+ code: 400,
192
+ metadata: {
193
+ provider_name: "Anthropic",
194
+ raw: JSON.stringify({
195
+ type: "error",
196
+ error: {
197
+ type: "invalid_request_error",
198
+ message: "prompt is too long: 1102640 tokens > 1000000 maximum",
199
+ },
200
+ request_id: "req_123",
201
+ }),
202
+ },
203
+ },
204
+ });
205
+
206
+ const normalized = handler.normalizeBadRequest(error);
207
+
208
+ expect(normalized?.message).toBe(
209
+ "Anthropic request was rejected (HTTP 400). Prompt is too long: 1102640 tokens exceeds the 1000000 token limit. Request ID: req_123.",
210
+ );
211
+ expect(normalized?.cause).toBe(error);
212
+ });
213
+
214
+ it("returns undefined for non-400 errors", () => {
215
+ const handler = new TestHandler({
216
+ providerId: "openrouter",
217
+ modelId: "anthropic/claude-sonnet-4.6",
218
+ apiKey: "test-key",
219
+ baseUrl: "https://openrouter.ai/api/v1",
220
+ });
221
+
222
+ const normalized = handler.normalizeBadRequest(
223
+ Object.assign(new Error("500 Provider returned error"), {
224
+ status: 500,
225
+ }),
226
+ );
227
+
228
+ expect(normalized).toBeUndefined();
229
+ });
230
+ });
@@ -10,6 +10,7 @@ import type {
10
10
  ApiStream,
11
11
  ApiStreamUsageChunk,
12
12
  HandlerModelInfo,
13
+ ModelInfo,
13
14
  ProviderConfig,
14
15
  } from "../types";
15
16
  import type { Message, ToolDefinition } from "../types/messages";
@@ -22,6 +23,22 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
22
23
  "X-CLIENT-TYPE": "cline-sdk",
23
24
  };
24
25
 
26
+ interface OpenAICompatibleProviderErrorShape {
27
+ status?: number;
28
+ message?: string;
29
+ error?: {
30
+ message?: string;
31
+ code?: number;
32
+ metadata?: {
33
+ raw?: string;
34
+ provider_name?: string;
35
+ };
36
+ };
37
+ response?: {
38
+ status?: number;
39
+ };
40
+ }
41
+
25
42
  const controllerIds = new WeakMap<AbortController, string>();
26
43
  let controllerIdCounter = 0;
27
44
 
@@ -146,10 +163,26 @@ export abstract class BaseHandler implements ApiHandler {
146
163
  });
147
164
  }
148
165
 
166
+ protected supportsPromptCache(modelInfo?: ModelInfo): boolean {
167
+ const resolvedModelInfo =
168
+ modelInfo ??
169
+ this.config.modelInfo ??
170
+ this.config.knownModels?.[this.config.modelId];
171
+ const pricing = resolvedModelInfo?.pricing;
172
+
173
+ return (
174
+ resolvedModelInfo?.capabilities?.includes("prompt-cache") === true ||
175
+ this.config.capabilities?.includes("prompt-cache") === true ||
176
+ typeof pricing?.cacheRead === "number" ||
177
+ typeof pricing?.cacheWrite === "number"
178
+ );
179
+ }
180
+
149
181
  protected calculateCost(
150
182
  inputTokens: number,
151
183
  outputTokens: number,
152
184
  cacheReadTokens = 0,
185
+ cacheWriteTokens = 0,
153
186
  ): number | undefined {
154
187
  const pricing = (
155
188
  this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
@@ -159,14 +192,32 @@ export abstract class BaseHandler implements ApiHandler {
159
192
  }
160
193
 
161
194
  return (
162
- ((inputTokens - cacheReadTokens) / 1_000_000) * pricing.input +
195
+ (inputTokens / 1_000_000) * pricing.input +
163
196
  (outputTokens / 1_000_000) * pricing.output +
164
197
  (cacheReadTokens > 0
165
198
  ? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
199
+ : 0) +
200
+ (cacheWriteTokens > 0
201
+ ? (cacheWriteTokens / 1_000_000) *
202
+ (pricing.cacheWrite ?? pricing.input * 1.25)
166
203
  : 0)
167
204
  );
168
205
  }
169
206
 
207
+ protected calculateCostFromInclusiveInput(
208
+ inputTokens: number,
209
+ outputTokens: number,
210
+ cacheReadTokens = 0,
211
+ cacheWriteTokens = 0,
212
+ ): number | undefined {
213
+ return this.calculateCost(
214
+ Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens),
215
+ outputTokens,
216
+ cacheReadTokens,
217
+ cacheWriteTokens,
218
+ );
219
+ }
220
+
170
221
  protected createResponseId(): string {
171
222
  return nanoid();
172
223
  }
@@ -193,4 +244,67 @@ export abstract class BaseHandler implements ApiHandler {
193
244
  ...(this.config.headers ?? {}),
194
245
  };
195
246
  }
247
+
248
+ protected normalizeOpenAICompatibleBadRequest(
249
+ error: unknown,
250
+ ): Error | undefined {
251
+ const rawError = error as OpenAICompatibleProviderErrorShape | undefined;
252
+ const status =
253
+ rawError?.status ??
254
+ rawError?.response?.status ??
255
+ rawError?.error?.code ??
256
+ (typeof rawError?.message === "string" && rawError.message.includes("400")
257
+ ? 400
258
+ : undefined);
259
+ if (status !== 400) {
260
+ return undefined;
261
+ }
262
+
263
+ const rawMetadata = rawError?.error?.metadata?.raw;
264
+ const parsedRaw = this.parseRawProviderError(rawMetadata);
265
+ const detail =
266
+ parsedRaw?.error?.message?.trim() ||
267
+ rawError?.error?.message?.trim() ||
268
+ rawError?.message?.trim() ||
269
+ "Provider returned error";
270
+ const providerName =
271
+ rawError?.error?.metadata?.provider_name?.trim() || "Provider";
272
+ const requestId = parsedRaw?.request_id?.trim();
273
+ const normalizedMessage = this.rewriteProviderBadRequestDetail(detail);
274
+ const suffix = requestId ? ` Request ID: ${requestId}.` : "";
275
+ return new Error(
276
+ `${providerName} request was rejected (HTTP 400). ${normalizedMessage}${suffix}`,
277
+ {
278
+ cause: error instanceof Error ? error : undefined,
279
+ },
280
+ );
281
+ }
282
+
283
+ private parseRawProviderError(
284
+ raw: string | undefined,
285
+ ): { error?: { message?: string }; request_id?: string } | undefined {
286
+ if (!raw) {
287
+ return undefined;
288
+ }
289
+ try {
290
+ return JSON.parse(raw) as {
291
+ error?: { message?: string };
292
+ request_id?: string;
293
+ };
294
+ } catch {
295
+ return undefined;
296
+ }
297
+ }
298
+
299
+ private rewriteProviderBadRequestDetail(detail: string): string {
300
+ const promptTooLongMatch = detail.match(
301
+ /prompt is too long:\s*([\d,]+)\s*tokens?\s*>\s*([\d,]+)\s*maximum/i,
302
+ );
303
+ if (promptTooLongMatch) {
304
+ const actual = promptTooLongMatch[1];
305
+ const maximum = promptTooLongMatch[2];
306
+ return `Prompt is too long: ${actual} tokens exceeds the ${maximum} token limit.`;
307
+ }
308
+ return detail.endsWith(".") ? detail : `${detail}.`;
309
+ }
196
310
  }
@@ -216,11 +216,11 @@ export class BedrockHandler extends BaseHandler {
216
216
 
217
217
  yield {
218
218
  type: "usage",
219
- inputTokens: inputTokens - cacheReadTokens,
219
+ inputTokens: Math.max(0, inputTokens - cacheReadTokens),
220
220
  outputTokens,
221
221
  thoughtsTokenCount,
222
222
  cacheReadTokens,
223
- totalCost: this.calculateCost(
223
+ totalCost: this.calculateCostFromInclusiveInput(
224
224
  inputTokens,
225
225
  outputTokens,
226
226
  cacheReadTokens,
@@ -245,11 +245,11 @@ export class BedrockHandler extends BaseHandler {
245
245
 
246
246
  yield {
247
247
  type: "usage",
248
- inputTokens: inputTokens - cacheReadTokens,
248
+ inputTokens: Math.max(0, inputTokens - cacheReadTokens),
249
249
  outputTokens,
250
250
  thoughtsTokenCount,
251
251
  cacheReadTokens,
252
- totalCost: this.calculateCost(
252
+ totalCost: this.calculateCostFromInclusiveInput(
253
253
  inputTokens,
254
254
  outputTokens,
255
255
  cacheReadTokens,
@@ -115,6 +115,39 @@ describe("Community SDK handlers", () => {
115
115
  expect(usageChunk?.outputTokens).toBe(3);
116
116
  });
117
117
 
118
+ it("keeps cached input tokens separate from total input tokens", async () => {
119
+ streamTextSpy.mockReturnValue({
120
+ fullStream: makeStreamParts([
121
+ {
122
+ type: "finish",
123
+ usage: { inputTokens: 10, outputTokens: 3, cachedInputTokens: 4 },
124
+ },
125
+ ]),
126
+ });
127
+
128
+ const handler = new ClaudeCodeHandler({
129
+ providerId: "claude-code",
130
+ modelId: "sonnet",
131
+ });
132
+
133
+ const chunks: ApiStreamChunk[] = [];
134
+ for await (const chunk of handler.createMessage("System", [
135
+ { role: "user", content: "Hi" },
136
+ ])) {
137
+ chunks.push(chunk);
138
+ }
139
+
140
+ const usageChunk = chunks.find(
141
+ (chunk): chunk is Extract<ApiStreamChunk, { type: "usage" }> =>
142
+ chunk.type === "usage",
143
+ );
144
+ expect(usageChunk).toMatchObject({
145
+ inputTokens: 6,
146
+ outputTokens: 3,
147
+ cacheReadTokens: 4,
148
+ });
149
+ });
150
+
118
151
  it("uses a fallback model id when model is missing", () => {
119
152
  const handler = new ClaudeCodeHandler({
120
153
  providerId: "claude-code",
@@ -18,7 +18,6 @@ import {
18
18
  import {
19
19
  type ApiStream,
20
20
  type HandlerModelInfo,
21
- type ModelInfo,
22
21
  type ProviderConfig,
23
22
  supportsModelThinking,
24
23
  } from "../types";
@@ -258,7 +257,6 @@ export class GeminiHandler extends BaseHandler {
258
257
 
259
258
  // Yield final usage
260
259
  const totalCost = this.calculateGeminiCost(
261
- info,
262
260
  promptTokens,
263
261
  outputTokens,
264
262
  thoughtsTokenCount,
@@ -267,7 +265,7 @@ export class GeminiHandler extends BaseHandler {
267
265
 
268
266
  yield {
269
267
  type: "usage",
270
- inputTokens: promptTokens - cacheReadTokens,
268
+ inputTokens: promptTokens,
271
269
  outputTokens,
272
270
  thoughtsTokenCount,
273
271
  cacheReadTokens,
@@ -288,27 +286,16 @@ export class GeminiHandler extends BaseHandler {
288
286
  }
289
287
 
290
288
  private calculateGeminiCost(
291
- info: ModelInfo,
292
289
  inputTokens: number,
293
290
  outputTokens: number,
294
291
  thoughtsTokenCount: number,
295
292
  cacheReadTokens: number,
296
293
  ): number | undefined {
297
- const pricing = info.pricing;
298
- if (!pricing?.input || !pricing?.output) {
299
- return undefined;
300
- }
301
-
302
- const uncachedInputTokens = inputTokens - cacheReadTokens;
303
- const inputCost = pricing.input * (uncachedInputTokens / 1_000_000);
304
- const outputCost =
305
- pricing.output * ((outputTokens + thoughtsTokenCount) / 1_000_000);
306
- const cacheReadCost =
307
- cacheReadTokens > 0
308
- ? (pricing.cacheRead ?? 0) * (cacheReadTokens / 1_000_000)
309
- : 0;
310
-
311
- return inputCost + outputCost + cacheReadCost;
294
+ return this.calculateCost(
295
+ inputTokens,
296
+ outputTokens + thoughtsTokenCount,
297
+ cacheReadTokens,
298
+ );
312
299
  }
313
300
  }
314
301