npm - @clinebot/llms - Versions diffs - 0.0.10 → 0.0.11 - Mend

@clinebot/llms 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/index.js +10 -10
package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
package/dist/providers/handlers/base.d.ts +3 -2
package/dist/providers/types/stream.d.ts +1 -1
package/package.json +2 -2
package/src/providers/handlers/ai-sdk-community.ts +5 -8
package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
package/src/providers/handlers/anthropic-base.test.ts +30 -0
package/src/providers/handlers/anthropic-base.ts +31 -29
package/src/providers/handlers/base.test.ts +8 -2
package/src/providers/handlers/base.ts +22 -1
package/src/providers/handlers/bedrock-base.ts +2 -2
package/src/providers/handlers/community-sdk.test.ts +33 -0
package/src/providers/handlers/gemini-base.ts +6 -19
package/src/providers/handlers/openai-base.ts +19 -8
package/src/providers/handlers/openai-responses.test.ts +46 -0
package/src/providers/handlers/openai-responses.ts +3 -7
package/src/providers/handlers/r1-base.ts +7 -8
package/src/providers/handlers/vertex.ts +14 -4
package/src/providers/transform/anthropic-format.ts +14 -2
package/src/providers/transform/format-conversion.test.ts +23 -0
package/src/providers/types/stream.ts +1 -1

package/dist/providers/handlers/ai-sdk-community.d.ts CHANGED Viewed

@@ -28,7 +28,7 @@ type AiSdkUsageMetrics = {
 export type EmitAiSdkStreamOptions = {
     responseId: string;
     errorMessage: string;
-    calculateCost: (inputTokens: number, outputTokens: number, cacheReadTokens: number) => number | undefined;
+    calculateCost: (inputTokens: number, outputTokens: number, cacheReadTokens: number, cacheWriteTokens?: number) => number | undefined;
     reasoningTypes?: string[];
     enableToolCalls?: boolean;
     toolCallArgsOrder?: Array<"args" | "input">;

package/dist/providers/handlers/base.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  *
  * Abstract base class that provides common functionality for all handlers.
  */
-import type { ApiHandler, ApiStream, ApiStreamUsageChunk, HandlerModelInfo, ProviderConfig } from "../types";
+import type { ApiHandler, ApiStream, ApiStreamUsageChunk, HandlerModelInfo, ModelInfo, ProviderConfig } from "../types";
 import type { Message, ToolDefinition } from "../types/messages";
 import type { ApiStreamChunk } from "../types/stream";
 export declare const DEFAULT_REQUEST_HEADERS: Record<string, string>;
@@ -23,7 +23,8 @@ export declare abstract class BaseHandler implements ApiHandler {
     abort(): void;
     setAbortSignal(signal: AbortSignal | undefined): void;
     private logAbort;
-    protected calculateCost(inputTokens: number, outputTokens: number, cacheReadTokens?: number): number | undefined;
+    protected supportsPromptCache(modelInfo?: ModelInfo): boolean;
+    protected calculateCost(inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): number | undefined;
     protected createResponseId(): string;
     protected withResponseId<T extends ApiStreamChunk>(chunk: T, responseId: string): T;
     protected withResponseIdForAll(chunks: Iterable<ApiStreamChunk>, responseId: string): Generator<ApiStreamChunk>;

package/dist/providers/types/stream.d.ts CHANGED Viewed

@@ -47,7 +47,7 @@ export interface ApiStreamReasoningChunk {
  */
 export interface ApiStreamUsageChunk {
     type: "usage";
-    /** Number of input tokens (excluding cached) */
+    /** Total number of input tokens reported by the provider */
     inputTokens: number;
     /** Number of output tokens */
     outputTokens: number;

package/package.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
 	"name": "@clinebot/llms",
-	"version": "0.0.10",
+	"version": "0.0.11",
 	"description": "Config-driven SDK for selecting, extending, and instantiating LLM providers and models",
 	"main": "./dist/index.js",
 	"module": "./dist/index.js",
 	"dependencies": {
-		"@clinebot/shared": "0.0.10",
+		"@clinebot/shared": "0.0.11",
 		"@ai-sdk/amazon-bedrock": "^4.0.67",
 		"@ai-sdk/google-vertex": "^4.0.74",
 		"@ai-sdk/mistral": "^3.0.24",

package/src/providers/handlers/ai-sdk-community.ts CHANGED Viewed

@@ -41,6 +41,7 @@ export type EmitAiSdkStreamOptions = {
 		inputTokens: number,
 		outputTokens: number,
 		cacheReadTokens: number,
+		cacheWriteTokens?: number,
 	) => number | undefined;
 	reasoningTypes?: string[];
 	enableToolCalls?: boolean;
@@ -168,10 +169,7 @@ export async function* emitAiSdkStream(
 				yield {
 					type: "usage",
-					inputTokens: Math.max(
-						0,
-						usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
-					),
+					inputTokens: usageMetrics.inputTokens,
 					outputTokens: usageMetrics.outputTokens,
 					thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
 					cacheReadTokens: usageMetrics.cacheReadTokens,
@@ -180,6 +178,7 @@ export async function* emitAiSdkStream(
 						usageMetrics.inputTokens,
 						usageMetrics.outputTokens,
 						usageMetrics.cacheReadTokens,
+						usageMetrics.cacheWriteTokens,
 					),
 					id: responseId,
 				};
@@ -205,10 +204,7 @@ export async function* emitAiSdkStream(
 		const usageMetrics = resolveUsageMetrics(usage);
 		yield {
 			type: "usage",
-			inputTokens: Math.max(
-				0,
-				usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
-			),
+			inputTokens: usageMetrics.inputTokens,
 			outputTokens: usageMetrics.outputTokens,
 			thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
 			cacheReadTokens: usageMetrics.cacheReadTokens,
@@ -217,6 +213,7 @@ export async function* emitAiSdkStream(
 				usageMetrics.inputTokens,
 				usageMetrics.outputTokens,
 				usageMetrics.cacheReadTokens,
+				usageMetrics.cacheWriteTokens,
 			),
 			id: responseId,
 		};

package/src/providers/handlers/ai-sdk-provider-base.ts CHANGED Viewed

@@ -185,8 +185,18 @@ export abstract class AiSdkProviderHandler extends BaseHandler {
 		yield* emitAiSdkStream(stream, {
 			responseId,
 			errorMessage: this.getStreamErrorMessage(),
-			calculateCost: (inputTokens, outputTokens, cacheReadTokens) =>
-				this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
+			calculateCost: (
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+				cacheWriteTokens,
+			) =>
+				this.calculateCost(
+					inputTokens,
+					outputTokens,
+					cacheReadTokens,
+					cacheWriteTokens,
+				),
 			...this.getEmitStreamOptions(),
 		});
 	}

package/src/providers/handlers/anthropic-base.test.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import { describe, expect, it } from "vitest";
+import { AnthropicHandler } from "./anthropic-base";
+describe("AnthropicHandler prompt cache detection", () => {
+	it("enables prompt caching when model pricing includes cache pricing", () => {
+		const handler = new AnthropicHandler({
+			providerId: "anthropic",
+			modelId: "claude-sonnet-4-6",
+			apiKey: "test-key",
+			modelInfo: {
+				id: "claude-sonnet-4-6",
+				pricing: {
+					input: 3,
+					output: 15,
+					cacheRead: 0.3,
+					cacheWrite: 3.75,
+				},
+			},
+		});
+		const messages = handler.getMessages("system", [
+			{ role: "user", content: "Tell me about this repo" },
+		]);
+		const userTextBlock = messages[0]?.content?.[0] as
+			| { cache_control?: { type: string } }
+			| undefined;
+		expect(userTextBlock?.cache_control).toEqual({ type: "ephemeral" });
+	});
+});

package/src/providers/handlers/anthropic-base.ts CHANGED Viewed

@@ -17,7 +17,6 @@ import {
 import {
 	type ApiStream,
 	type HandlerModelInfo,
-	hasModelCapability,
 	type ProviderConfig,
 	supportsModelThinking,
 } from "../types";
@@ -76,10 +75,7 @@ export class AnthropicHandler extends BaseHandler {
 		_systemPrompt: string,
 		messages: Message[],
 	): Anthropic.MessageParam[] {
-		const supportsPromptCache = hasModelCapability(
-			this.getModel().info,
-			"prompt-cache",
-		);
+		const supportsPromptCache = this.supportsPromptCache(this.getModel().info);
 		return convertToAnthropicMessages(
 			messages,
 			supportsPromptCache,
@@ -113,7 +109,7 @@ export class AnthropicHandler extends BaseHandler {
 		const budgetTokens =
 			thinkingSupported && requestedBudget > 0 ? requestedBudget : 0;
 		const nativeToolsOn = tools && tools.length > 0;
-		const supportsPromptCache = hasModelCapability(model.info, "prompt-cache");
+		const supportsPromptCache = this.supportsPromptCache(model.info);
 		const reasoningOn = thinkingSupported && budgetTokens > 0;
 		const debugThinking = isThinkingDebugEnabled();
 		const debugChunkCounts: Record<string, number> = {};
@@ -139,30 +135,34 @@ export class AnthropicHandler extends BaseHandler {
 		const requestOptions = { signal: abortSignal };
 		// Create the request
+		// Use top-level automatic caching so the entire prefix (system +
+		// messages) is cached and the breakpoint advances each turn.
+		const createParams: Record<string, unknown> &
+			Anthropic.MessageCreateParamsStreaming = {
+			model: model.id,
+			thinking: reasoningOn
+				? { type: "enabled", budget_tokens: budgetTokens }
+				: undefined,
+			max_tokens:
+				model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
+			temperature: reasoningOn ? undefined : 0,
+			system: [
+				supportsPromptCache
+					? {
+							text: systemPrompt,
+							type: "text",
+							cache_control: { type: "ephemeral" },
+						}
+					: { text: systemPrompt, type: "text" },
+			],
+			messages: anthropicMessages as Anthropic.MessageParam[],
+			stream: true,
+			tools: anthropicTools,
+			tool_choice: nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
+		};
 		const stream = await client.messages.create(
-			{
-				model: model.id,
-				thinking: reasoningOn
-					? { type: "enabled", budget_tokens: budgetTokens }
-					: undefined,
-				max_tokens:
-					model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
-				temperature: reasoningOn ? undefined : 0,
-				system: supportsPromptCache
-					? [
-							{
-								text: systemPrompt,
-								type: "text",
-								cache_control: { type: "ephemeral" },
-							},
-						]
-					: [{ text: systemPrompt, type: "text" }],
-				messages: anthropicMessages as Anthropic.MessageParam[],
-				stream: true,
-				tools: anthropicTools,
-				tool_choice:
-					nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
-			},
+			createParams as Anthropic.MessageCreateParamsStreaming,
 			requestOptions,
 		);
@@ -244,6 +244,7 @@ export class AnthropicHandler extends BaseHandler {
 						usageSnapshot.inputTokens,
 						usageSnapshot.outputTokens,
 						usageSnapshot.cacheReadTokens,
+						usageSnapshot.cacheWriteTokens,
 					),
 					id: responseId,
 				};
@@ -263,6 +264,7 @@ export class AnthropicHandler extends BaseHandler {
 						usageSnapshot.inputTokens,
 						usageSnapshot.outputTokens,
 						usageSnapshot.cacheReadTokens,
+						usageSnapshot.cacheWriteTokens,
 					),
 					id: responseId,
 				};

package/src/providers/handlers/base.test.ts CHANGED Viewed

@@ -15,8 +15,14 @@ class TestHandler extends BaseHandler {
 		inputTokens: number,
 		outputTokens: number,
 		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
 	): number | undefined {
-		return this.calculateCost(inputTokens, outputTokens, cacheReadTokens);
+		return this.calculateCost(
+			inputTokens,
+			outputTokens,
+			cacheReadTokens,
+			cacheWriteTokens,
+		);
 	}
 	public exposeAbortSignal(): AbortSignal {
@@ -45,7 +51,7 @@ describe("BaseHandler.calculateCost", () => {
 		const cost = handler.computeCost(1_000_000, 1_000_000, 100_000);
-		expect(cost).toBeCloseTo(17.73, 6);
+		expect(cost).toBeCloseTo(18.03, 6);
 	});
 });

package/src/providers/handlers/base.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import type {
 	ApiStream,
 	ApiStreamUsageChunk,
 	HandlerModelInfo,
+	ModelInfo,
 	ProviderConfig,
 } from "../types";
 import type { Message, ToolDefinition } from "../types/messages";
@@ -146,10 +147,26 @@ export abstract class BaseHandler implements ApiHandler {
 		});
 	}
+	protected supportsPromptCache(modelInfo?: ModelInfo): boolean {
+		const resolvedModelInfo =
+			modelInfo ??
+			this.config.modelInfo ??
+			this.config.knownModels?.[this.config.modelId];
+		const pricing = resolvedModelInfo?.pricing;
+		return (
+			resolvedModelInfo?.capabilities?.includes("prompt-cache") === true ||
+			this.config.capabilities?.includes("prompt-cache") === true ||
+			typeof pricing?.cacheRead === "number" ||
+			typeof pricing?.cacheWrite === "number"
+		);
+	}
 	protected calculateCost(
 		inputTokens: number,
 		outputTokens: number,
 		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
 	): number | undefined {
 		const pricing = (
 			this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
@@ -159,10 +176,14 @@ export abstract class BaseHandler implements ApiHandler {
 		}
 		return (
-			((inputTokens - cacheReadTokens) / 1_000_000) * pricing.input +
+			(inputTokens / 1_000_000) * pricing.input +
 			(outputTokens / 1_000_000) * pricing.output +
 			(cacheReadTokens > 0
 				? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
+				: 0) +
+			(cacheWriteTokens > 0
+				? (cacheWriteTokens / 1_000_000) *
+					(pricing.cacheWrite ?? pricing.input * 1.25)
 				: 0)
 		);
 	}

package/src/providers/handlers/bedrock-base.ts CHANGED Viewed

@@ -216,7 +216,7 @@ export class BedrockHandler extends BaseHandler {
 					yield {
 						type: "usage",
-						inputTokens: inputTokens - cacheReadTokens,
+						inputTokens,
 						outputTokens,
 						thoughtsTokenCount,
 						cacheReadTokens,
@@ -245,7 +245,7 @@ export class BedrockHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens: inputTokens - cacheReadTokens,
+				inputTokens,
 				outputTokens,
 				thoughtsTokenCount,
 				cacheReadTokens,

package/src/providers/handlers/community-sdk.test.ts CHANGED Viewed

@@ -115,6 +115,39 @@ describe("Community SDK handlers", () => {
 			expect(usageChunk?.outputTokens).toBe(3);
 		});
+		it("keeps cached input tokens separate from total input tokens", async () => {
+			streamTextSpy.mockReturnValue({
+				fullStream: makeStreamParts([
+					{
+						type: "finish",
+						usage: { inputTokens: 10, outputTokens: 3, cachedInputTokens: 4 },
+					},
+				]),
+			});
+			const handler = new ClaudeCodeHandler({
+				providerId: "claude-code",
+				modelId: "sonnet",
+			});
+			const chunks: ApiStreamChunk[] = [];
+			for await (const chunk of handler.createMessage("System", [
+				{ role: "user", content: "Hi" },
+			])) {
+				chunks.push(chunk);
+			}
+			const usageChunk = chunks.find(
+				(chunk): chunk is Extract<ApiStreamChunk, { type: "usage" }> =>
+					chunk.type === "usage",
+			);
+			expect(usageChunk).toMatchObject({
+				inputTokens: 10,
+				outputTokens: 3,
+				cacheReadTokens: 4,
+			});
+		});
 		it("uses a fallback model id when model is missing", () => {
 			const handler = new ClaudeCodeHandler({
 				providerId: "claude-code",

package/src/providers/handlers/gemini-base.ts CHANGED Viewed

@@ -18,7 +18,6 @@ import {
 import {
 	type ApiStream,
 	type HandlerModelInfo,
-	type ModelInfo,
 	type ProviderConfig,
 	supportsModelThinking,
 } from "../types";
@@ -258,7 +257,6 @@ export class GeminiHandler extends BaseHandler {
 			// Yield final usage
 			const totalCost = this.calculateGeminiCost(
-				info,
 				promptTokens,
 				outputTokens,
 				thoughtsTokenCount,
@@ -267,7 +265,7 @@ export class GeminiHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens: promptTokens - cacheReadTokens,
+				inputTokens: promptTokens,
 				outputTokens,
 				thoughtsTokenCount,
 				cacheReadTokens,
@@ -288,27 +286,16 @@ export class GeminiHandler extends BaseHandler {
 	}
 	private calculateGeminiCost(
-		info: ModelInfo,
 		inputTokens: number,
 		outputTokens: number,
 		thoughtsTokenCount: number,
 		cacheReadTokens: number,
 	): number | undefined {
-		const pricing = info.pricing;
-		if (!pricing?.input || !pricing?.output) {
-			return undefined;
-		}
-		const uncachedInputTokens = inputTokens - cacheReadTokens;
-		const inputCost = pricing.input * (uncachedInputTokens / 1_000_000);
-		const outputCost =
-			pricing.output * ((outputTokens + thoughtsTokenCount) / 1_000_000);
-		const cacheReadCost =
-			cacheReadTokens > 0
-				? (pricing.cacheRead ?? 0) * (cacheReadTokens / 1_000_000)
-				: 0;
-		return inputCost + outputCost + cacheReadCost;
+		return this.calculateCost(
+			inputTokens,
+			outputTokens + thoughtsTokenCount,
+			cacheReadTokens,
+		);
 	}
 }

package/src/providers/handlers/openai-base.ts CHANGED Viewed

@@ -22,7 +22,6 @@ import type {
 	ModelInfo,
 	ProviderConfig,
 } from "../types";
-import { hasModelCapability } from "../types";
 import type { Message, ToolDefinition } from "../types/messages";
 import { retryStream } from "../utils/retry";
 import { ToolCallProcessor } from "../utils/tool-processor";
@@ -108,9 +107,7 @@ export class OpenAIBaseHandler extends BaseHandler {
 		messages: Message[],
 	): OpenAI.Chat.ChatCompletionMessageParam[] {
 		const model = this.getModel();
-		const supportsPromptCache =
-			hasModelCapability(model.info, "prompt-cache") ||
-			this.config.capabilities?.includes("prompt-cache") === true;
+		const supportsPromptCache = this.supportsPromptCache(model.info);
 		const systemMessage = supportsPromptCache
 			? ({
 					role: "system",
@@ -156,7 +153,8 @@ export class OpenAIBaseHandler extends BaseHandler {
 		const openAiMessages = this.getMessages(systemPrompt, messages);
 		// Build request options
-		const requestOptions: OpenAI.ChatCompletionCreateParamsStreaming = {
+		const requestOptions: Record<string, unknown> &
+			OpenAI.ChatCompletionCreateParamsStreaming = {
 			model: modelId,
 			messages: openAiMessages,
 			stream: true,
@@ -167,6 +165,17 @@ export class OpenAIBaseHandler extends BaseHandler {
 			}),
 		};
+		// Add top-level cache_control for OpenRouter with Anthropic models.
+		// This enables automatic caching where the cache breakpoint advances
+		// as the conversation grows, rather than relying on explicit per-block
+		// breakpoints which are limited to 4.
+		if (
+			this.config.providerId === "openrouter" &&
+			modelId.startsWith("anthropic/")
+		) {
+			requestOptions.cache_control = { type: "ephemeral" };
+		}
 		// Add max tokens if configured
 		const maxTokens = modelInfo.maxTokens ?? this.config.maxOutputTokens;
 		if (maxTokens) {
@@ -286,15 +295,16 @@ export class OpenAIBaseHandler extends BaseHandler {
 					cached_tokens?: number;
 					cache_write_tokens?: number;
 				};
-				prompt_cache_miss_tokens?: number;
 				cache_creation_input_tokens?: number;
 				cache_read_input_tokens?: number;
 			};
 			const cacheReadTokens =
-				usageWithCache.prompt_tokens_details?.cached_tokens ?? 0;
+				usageWithCache.prompt_tokens_details?.cached_tokens ??
+				usageWithCache.cache_read_input_tokens ??
+				0;
 			const cacheWriteTokens =
 				usageWithCache.prompt_tokens_details?.cache_write_tokens ??
-				usageWithCache.prompt_cache_miss_tokens ??
+				usageWithCache.cache_creation_input_tokens ??
 				0;
 			yield {
@@ -307,6 +317,7 @@ export class OpenAIBaseHandler extends BaseHandler {
 					inputTokens,
 					outputTokens,
 					cacheReadTokens,
+					cacheWriteTokens,
 				),
 				id: responseId,
 			};

package/src/providers/handlers/openai-responses.test.ts CHANGED Viewed

@@ -210,4 +210,50 @@ describe("OpenAIResponsesHandler", () => {
 			},
 		});
 	});
+	it("keeps cached input tokens separate in usage chunks", () => {
+		const handler = new TestOpenAIResponsesHandler({
+			providerId: "openai-native",
+			modelId: "gpt-5.4",
+			apiKey: "test-key",
+			baseUrl: "https://example.com",
+			modelInfo: {
+				id: "gpt-5.4",
+				pricing: {
+					input: 1,
+					output: 2,
+					cacheRead: 0.5,
+				},
+			},
+		});
+		const chunks = handler.processChunkForTest({
+			type: "response.completed",
+			response: {
+				id: "resp_usage",
+				usage: {
+					input_tokens: 100,
+					output_tokens: 40,
+					input_tokens_details: {
+						cached_tokens: 25,
+					},
+					output_tokens_details: {
+						reasoning_tokens: 10,
+					},
+				},
+			},
+		});
+		expect(chunks[0]).toMatchObject({
+			type: "usage",
+			inputTokens: 100,
+			outputTokens: 40,
+			cacheReadTokens: 25,
+			cacheWriteTokens: 0,
+		});
+		expect(chunks[0]?.type).toBe("usage");
+		if (chunks[0]?.type === "usage") {
+			expect(chunks[0].totalCost).toBeCloseTo(0.0001925, 10);
+		}
+	});
 });

package/src/providers/handlers/openai-responses.ts CHANGED Viewed

@@ -565,23 +565,19 @@ export class OpenAIResponsesHandler extends BaseHandler {
 					const inputTokens = usage.input_tokens || 0;
 					const outputTokens = usage.output_tokens || 0;
 					const cacheReadTokens =
-						usage.output_tokens_details?.reasoning_tokens || 0;
-					const cacheWriteTokens =
 						usage.input_tokens_details?.cached_tokens || 0;
+					const cacheWriteTokens = 0;
 					const totalCost = this.calculateCost(
 						inputTokens,
 						outputTokens,
 						cacheReadTokens,
-					);
-					const nonCachedInputTokens = Math.max(
-						0,
-						inputTokens - cacheReadTokens - cacheWriteTokens,
+						cacheWriteTokens,
 					);
 					yield {
 						type: "usage",
-						inputTokens: nonCachedInputTokens,
+						inputTokens,
 						outputTokens,
 						cacheWriteTokens,
 						cacheReadTokens,

package/src/providers/handlers/r1-base.ts CHANGED Viewed

@@ -255,19 +255,18 @@ export class R1BaseHandler extends BaseHandler {
 		const cacheReadTokens = r1Usage.prompt_cache_hit_tokens ?? 0;
 		const cacheWriteTokens = r1Usage.prompt_cache_miss_tokens ?? 0;
-		// Calculate non-cached input tokens (will always be 0 for DeepSeek since input = read + write)
-		const nonCachedInputTokens = Math.max(
-			0,
-			inputTokens - cacheReadTokens - cacheWriteTokens,
-		);
 		yield {
 			type: "usage",
-			inputTokens: nonCachedInputTokens,
+			inputTokens,
 			outputTokens,
 			cacheReadTokens,
 			cacheWriteTokens,
-			totalCost: this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
+			totalCost: this.calculateCost(
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+				cacheWriteTokens,
+			),
 			id: responseId,
 		};
 	}

package/src/providers/handlers/vertex.ts CHANGED Viewed

@@ -189,7 +189,7 @@ export class VertexHandler extends BaseHandler {
 		if (!isClaudeModel(model.id)) {
 			return this.ensureGeminiHandler().getMessages(systemPrompt, messages);
 		}
-		const supportsPromptCache = hasModelCapability(model.info, "prompt-cache");
+		const supportsPromptCache = this.supportsPromptCache(model.info);
 		return convertToAnthropicMessages(messages, supportsPromptCache);
 	}
@@ -226,7 +226,7 @@ export class VertexHandler extends BaseHandler {
 		const budgetTokens = this.config.thinkingBudgetTokens ?? 0;
 		const reasoningOn =
 			hasModelCapability(model.info, "reasoning") && budgetTokens > 0;
-		const promptCacheOn = hasModelCapability(model.info, "prompt-cache");
+		const promptCacheOn = this.supportsPromptCache(model.info);
 		const providerOptions: Record<string, unknown> = {};
 		if (reasoningOn) {
@@ -251,8 +251,18 @@ export class VertexHandler extends BaseHandler {
 		yield* emitAiSdkStream(stream, {
 			responseId,
 			errorMessage: "Vertex Anthropic stream failed",
-			calculateCost: (inputTokens, outputTokens, cacheReadTokens) =>
-				this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
+			calculateCost: (
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+				cacheWriteTokens,
+			) =>
+				this.calculateCost(
+					inputTokens,
+					outputTokens,
+					cacheReadTokens,
+					cacheWriteTokens,
+				),
 			reasoningTypes: ["reasoning-delta"],
 			enableToolCalls: true,
 			toolCallArgsOrder: ["input", "args"],