npm - @clinebot/llms - Versions diffs - 0.0.7 → 0.0.11 - Mend

@clinebot/llms 0.0.7 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/index.browser.d.ts +2 -2
package/dist/index.browser.js +40 -1
package/dist/index.d.ts +2 -2
package/dist/index.js +12 -12
package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
package/dist/providers/handlers/base.d.ts +5 -29
package/dist/providers/transform/openai-format.d.ts +1 -1
package/dist/providers/types/config.d.ts +6 -0
package/dist/providers/types/stream.d.ts +1 -1
package/package.json +2 -1
package/src/index.browser.ts +2 -2
package/src/index.ts +2 -2
package/src/models/providers/vercel-ai-gateway.ts +1 -1
package/src/providers/handlers/ai-sdk-community.ts +5 -8
package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
package/src/providers/handlers/anthropic-base.test.ts +30 -0
package/src/providers/handlers/anthropic-base.ts +43 -30
package/src/providers/handlers/base.test.ts +68 -3
package/src/providers/handlers/base.ts +104 -54
package/src/providers/handlers/bedrock-base.ts +3 -3
package/src/providers/handlers/community-sdk.test.ts +33 -0
package/src/providers/handlers/gemini-base.test.ts +40 -0
package/src/providers/handlers/gemini-base.ts +22 -20
package/src/providers/handlers/openai-base.ts +67 -12
package/src/providers/handlers/openai-responses.test.ts +46 -0
package/src/providers/handlers/openai-responses.ts +3 -7
package/src/providers/handlers/r1-base.ts +7 -8
package/src/providers/handlers/vertex.ts +15 -5
package/src/providers/transform/anthropic-format.ts +14 -2
package/src/providers/transform/format-conversion.test.ts +49 -0
package/src/providers/transform/openai-format.ts +50 -7
package/src/providers/types/config.ts +8 -0
package/src/providers/types/stream.ts +1 -1

package/src/providers/handlers/base.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import type {
 	ApiStream,
 	ApiStreamUsageChunk,
 	HandlerModelInfo,
+	ModelInfo,
 	ProviderConfig,
 } from "../types";
 import type { Message, ToolDefinition } from "../types/messages";
@@ -22,37 +23,44 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
 	"X-CLIENT-TYPE": "cline-sdk",
 };
+const controllerIds = new WeakMap<AbortController, string>();
+let controllerIdCounter = 0;
+function getControllerId(controller: AbortController): string {
+	let id = controllerIds.get(controller);
+	if (!id) {
+		id = `abort_${++controllerIdCounter}`;
+		controllerIds.set(controller, id);
+	}
+	return id;
+}
+function serializeAbortReason(reason: unknown): unknown {
+	return reason instanceof Error
+		? { name: reason.name, message: reason.message }
+		: reason;
+}
 /**
  * Base handler class with common functionality
  */
 export abstract class BaseHandler implements ApiHandler {
 	protected config: ProviderConfig;
 	protected abortController: AbortController | undefined;
+	private abortSignalSequence = 0;
 	constructor(config: ProviderConfig) {
 		this.config = config;
 	}
-	/**
-	 * Convert Cline messages to provider-specific format
-	 * Must be implemented by subclasses
-	 */
 	abstract getMessages(systemPrompt: string, messages: Message[]): unknown;
-	/**
-	 * Create a streaming message completion
-	 * Must be implemented by subclasses
-	 */
 	abstract createMessage(
 		systemPrompt: string,
 		messages: Message[],
 		tools?: ToolDefinition[],
 	): ApiStream;
-	/**
-	 * Get the current model configuration
-	 * Can be overridden by subclasses for provider-specific logic
-	 */
 	getModel(): HandlerModelInfo {
 		const modelId = this.config.modelId;
 		return {
@@ -61,43 +69,55 @@ export abstract class BaseHandler implements ApiHandler {
 		};
 	}
-	/**
-	 * Get usage information (optional)
-	 * Override in subclasses that support this
-	 */
 	async getApiStreamUsage(): Promise<ApiStreamUsageChunk | undefined> {
 		return undefined;
 	}
-	/**
-	 * Get the abort signal for the current request
-	 * Creates a new AbortController if one doesn't exist or was already aborted
-	 * Combines with config.abortSignal if provided
-	 */
 	protected getAbortSignal(): AbortSignal {
-		// Create a new controller if needed
-		if (!this.abortController || this.abortController.signal.aborted) {
-			this.abortController = new AbortController();
-		}
-		// If a signal was provided in config, chain it
-		if (this.config.abortSignal) {
-			const configSignal = this.config.abortSignal;
+		const controller = new AbortController();
+		this.abortController = controller;
+		controller.signal.addEventListener(
+			"abort",
+			() => {
+				if (this.abortController === controller) {
+					this.abortController = undefined;
+				}
+			},
+			{ once: true },
+		);
+		const configSignal = this.config.abortSignal;
+		if (configSignal) {
 			if (configSignal.aborted) {
-				this.abortController.abort(configSignal.reason);
+				this.logAbort("debug", "Provider request inherited aborted signal", {
+					controllerId: getControllerId(controller),
+					reason: serializeAbortReason(configSignal.reason),
+				});
+				controller.abort(configSignal.reason);
 			} else {
-				configSignal.addEventListener("abort", () => {
-					this.abortController?.abort(configSignal.reason);
+				const signalId = ++this.abortSignalSequence;
+				configSignal.addEventListener(
+					"abort",
+					() => {
+						this.logAbort("warn", "Provider request abort signal fired", {
+							controllerId: getControllerId(controller),
+							signalId,
+							reason: serializeAbortReason(configSignal.reason),
+						});
+						controller.abort(configSignal.reason);
+					},
+					{ once: true },
+				);
+				this.logAbort("debug", "Provider request attached abort signal", {
+					controllerId: getControllerId(controller),
+					signalId,
 				});
 			}
 		}
-		return this.abortController.signal;
+		return controller.signal;
 	}
-	/**
-	 * Abort the current request
-	 */
 	abort(): void {
 		this.abortController?.abort();
 	}
@@ -105,37 +125,67 @@ export abstract class BaseHandler implements ApiHandler {
 	setAbortSignal(signal: AbortSignal | undefined): void {
 		this.config.abortSignal = signal;
 		if (signal?.aborted) {
+			this.logAbort("debug", "Provider handler received pre-aborted signal", {
+				controllerId: this.abortController
+					? getControllerId(this.abortController)
+					: undefined,
+				reason: serializeAbortReason(signal.reason),
+			});
 			this.abortController?.abort(signal.reason);
 		}
 	}
-	/**
-	 * Helper to calculate cost from usage
-	 */
+	private logAbort(
+		level: "debug" | "warn",
+		message: string,
+		metadata?: Record<string, unknown>,
+	): void {
+		this.config.logger?.[level]?.(message, {
+			providerId: this.config.providerId,
+			modelId: this.config.modelId,
+			...metadata,
+		});
+	}
+	protected supportsPromptCache(modelInfo?: ModelInfo): boolean {
+		const resolvedModelInfo =
+			modelInfo ??
+			this.config.modelInfo ??
+			this.config.knownModels?.[this.config.modelId];
+		const pricing = resolvedModelInfo?.pricing;
+		return (
+			resolvedModelInfo?.capabilities?.includes("prompt-cache") === true ||
+			this.config.capabilities?.includes("prompt-cache") === true ||
+			typeof pricing?.cacheRead === "number" ||
+			typeof pricing?.cacheWrite === "number"
+		);
+	}
 	protected calculateCost(
 		inputTokens: number,
 		outputTokens: number,
 		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
 	): number | undefined {
-		const modelPricingSource =
-			this.config.modelInfo ??
-			(this.config.modelId
-				? this.config.knownModels?.[this.config.modelId]
-				: undefined);
-		const pricing = modelPricingSource?.pricing;
+		const pricing = (
+			this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
+		)?.pricing;
 		if (!pricing?.input || !pricing?.output) {
 			return undefined;
 		}
-		const uncachedInputTokens = inputTokens - cacheReadTokens;
-		const inputCost = (uncachedInputTokens / 1_000_000) * pricing.input;
-		const outputCost = (outputTokens / 1_000_000) * pricing.output;
-		const cacheReadCost =
-			cacheReadTokens > 0
+		return (
+			(inputTokens / 1_000_000) * pricing.input +
+			(outputTokens / 1_000_000) * pricing.output +
+			(cacheReadTokens > 0
 				? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
-				: 0;
-		return inputCost + outputCost + cacheReadCost;
+				: 0) +
+			(cacheWriteTokens > 0
+				? (cacheWriteTokens / 1_000_000) *
+					(pricing.cacheWrite ?? pricing.input * 1.25)
+				: 0)
+		);
 	}
 	protected createResponseId(): string {
@@ -154,7 +204,7 @@ export abstract class BaseHandler implements ApiHandler {
 		responseId: string,
 	): Generator<ApiStreamChunk> {
 		for (const chunk of chunks) {
-			yield this.withResponseId(chunk, responseId);
+			yield { ...chunk, id: responseId };
 		}
 	}

package/src/providers/handlers/bedrock-base.ts CHANGED Viewed

@@ -143,7 +143,7 @@ export class BedrockHandler extends BaseHandler {
 			model: factory(modelId),
 			messages: this.getMessages(systemPrompt, messages),
 			tools: toAiSdkTools(tools),
-			maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 8192,
+			maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
 			temperature: reasoningEnabled ? undefined : (model.info.temperature ?? 0),
 			providerOptions:
 				Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
@@ -216,7 +216,7 @@ export class BedrockHandler extends BaseHandler {
 					yield {
 						type: "usage",
-						inputTokens: inputTokens - cacheReadTokens,
+						inputTokens,
 						outputTokens,
 						thoughtsTokenCount,
 						cacheReadTokens,
@@ -245,7 +245,7 @@ export class BedrockHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens: inputTokens - cacheReadTokens,
+				inputTokens,
 				outputTokens,
 				thoughtsTokenCount,
 				cacheReadTokens,

package/src/providers/handlers/community-sdk.test.ts CHANGED Viewed

@@ -115,6 +115,39 @@ describe("Community SDK handlers", () => {
 			expect(usageChunk?.outputTokens).toBe(3);
 		});
+		it("keeps cached input tokens separate from total input tokens", async () => {
+			streamTextSpy.mockReturnValue({
+				fullStream: makeStreamParts([
+					{
+						type: "finish",
+						usage: { inputTokens: 10, outputTokens: 3, cachedInputTokens: 4 },
+					},
+				]),
+			});
+			const handler = new ClaudeCodeHandler({
+				providerId: "claude-code",
+				modelId: "sonnet",
+			});
+			const chunks: ApiStreamChunk[] = [];
+			for await (const chunk of handler.createMessage("System", [
+				{ role: "user", content: "Hi" },
+			])) {
+				chunks.push(chunk);
+			}
+			const usageChunk = chunks.find(
+				(chunk): chunk is Extract<ApiStreamChunk, { type: "usage" }> =>
+					chunk.type === "usage",
+			);
+			expect(usageChunk).toMatchObject({
+				inputTokens: 10,
+				outputTokens: 3,
+				cacheReadTokens: 4,
+			});
+		});
 		it("uses a fallback model id when model is missing", () => {
 			const handler = new ClaudeCodeHandler({
 				providerId: "claude-code",

package/src/providers/handlers/gemini-base.test.ts CHANGED Viewed

@@ -218,4 +218,44 @@ describe("GeminiHandler", () => {
 		expect(secondId).toBeTruthy();
 		expect(firstId).not.toBe(secondId);
 	});
+	it("defaults maxOutputTokens to 8192 for gemini-3-flash when no model or config limit is provided", async () => {
+		generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
+		const handler = new GeminiHandler({
+			providerId: "gemini",
+			modelId: "gemini-3-flash",
+			apiKey: "test-key",
+		});
+		await collectChunks(
+			handler.createMessage("System", [{ role: "user", content: "go" }]),
+		);
+		expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
+		const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
+			config?: { maxOutputTokens?: number };
+		};
+		expect(request.config?.maxOutputTokens).toBe(8192);
+	});
+	it("defaults maxOutputTokens to 128000 for non gemini-3-flash models when no model or config limit is provided", async () => {
+		generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
+		const handler = new GeminiHandler({
+			providerId: "gemini",
+			modelId: "gemini-2.5-flash",
+			apiKey: "test-key",
+		});
+		await collectChunks(
+			handler.createMessage("System", [{ role: "user", content: "go" }]),
+		);
+		expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
+		const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
+			config?: { maxOutputTokens?: number };
+		};
+		expect(request.config?.maxOutputTokens).toBe(128000);
+	});
 });

package/src/providers/handlers/gemini-base.ts CHANGED Viewed

@@ -18,7 +18,6 @@ import {
 import {
 	type ApiStream,
 	type HandlerModelInfo,
-	type ModelInfo,
 	type ProviderConfig,
 	supportsModelThinking,
 } from "../types";
@@ -27,6 +26,16 @@ import { RetriableError, retryStream } from "../utils/retry";
 import { BaseHandler } from "./base";
 const DEFAULT_THINKING_BUDGET_TOKENS = 1024;
+const DEFAULT_MAX_OUTPUT_TOKENS = 128_000;
+const GEMINI_3_FLASH_MAX_OUTPUT_TOKENS = 8192;
+function isGemini3FlashModel(modelId: string): boolean {
+	const normalized = modelId.toLowerCase();
+	return (
+		normalized.includes("gemini-3-flash") ||
+		normalized.includes("gemini-3.0-flash")
+	);
+}
 /**
  * Handler for Google's Gemini API
@@ -131,6 +140,11 @@ export class GeminiHandler extends BaseHandler {
 		}
 		// Build request config with abort signal
+		const fallbackMaxOutputTokens = isGemini3FlashModel(modelId)
+			? GEMINI_3_FLASH_MAX_OUTPUT_TOKENS
+			: DEFAULT_MAX_OUTPUT_TOKENS;
+		const maxOutputTokens =
+			info.maxTokens ?? this.config.maxOutputTokens ?? fallbackMaxOutputTokens;
 		const requestConfig: GenerateContentConfig = {
 			httpOptions: this.config.baseUrl
 				? { baseUrl: this.config.baseUrl, headers: this.getRequestHeaders() }
@@ -138,7 +152,7 @@ export class GeminiHandler extends BaseHandler {
 			abortSignal,
 			systemInstruction: systemPrompt,
 			temperature: info.temperature ?? 1,
-			maxOutputTokens: info.maxTokens ?? this.config.maxOutputTokens,
+			maxOutputTokens,
 		};
 		// Add thinking config only when explicitly requested and supported.
@@ -243,7 +257,6 @@ export class GeminiHandler extends BaseHandler {
 			// Yield final usage
 			const totalCost = this.calculateGeminiCost(
-				info,
 				promptTokens,
 				outputTokens,
 				thoughtsTokenCount,
@@ -252,7 +265,7 @@ export class GeminiHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens: promptTokens - cacheReadTokens,
+				inputTokens: promptTokens,
 				outputTokens,
 				thoughtsTokenCount,
 				cacheReadTokens,
@@ -273,27 +286,16 @@ export class GeminiHandler extends BaseHandler {
 	}
 	private calculateGeminiCost(
-		info: ModelInfo,
 		inputTokens: number,
 		outputTokens: number,
 		thoughtsTokenCount: number,
 		cacheReadTokens: number,
 	): number | undefined {
-		const pricing = info.pricing;
-		if (!pricing?.input || !pricing?.output) {
-			return undefined;
-		}
-		const uncachedInputTokens = inputTokens - cacheReadTokens;
-		const inputCost = pricing.input * (uncachedInputTokens / 1_000_000);
-		const outputCost =
-			pricing.output * ((outputTokens + thoughtsTokenCount) / 1_000_000);
-		const cacheReadCost =
-			cacheReadTokens > 0
-				? (pricing.cacheRead ?? 0) * (cacheReadTokens / 1_000_000)
-				: 0;
-		return inputCost + outputCost + cacheReadCost;
+		return this.calculateCost(
+			inputTokens,
+			outputTokens + thoughtsTokenCount,
+			cacheReadTokens,
+		);
 	}
 }

package/src/providers/handlers/openai-base.ts CHANGED Viewed

@@ -106,9 +106,24 @@ export class OpenAIBaseHandler extends BaseHandler {
 		systemPrompt: string,
 		messages: Message[],
 	): OpenAI.Chat.ChatCompletionMessageParam[] {
+		const model = this.getModel();
+		const supportsPromptCache = this.supportsPromptCache(model.info);
+		const systemMessage = supportsPromptCache
+			? ({
+					role: "system",
+					content: [
+						{
+							type: "text",
+							text: systemPrompt,
+							cache_control: { type: "ephemeral" },
+						},
+					],
+				} as unknown as OpenAI.Chat.ChatCompletionMessageParam)
+			: { role: "system" as const, content: systemPrompt };
 		return [
-			{ role: "system", content: systemPrompt },
-			...convertToOpenAIMessages(messages),
+			systemMessage,
+			...convertToOpenAIMessages(messages, supportsPromptCache),
 		];
 	}
@@ -138,7 +153,8 @@ export class OpenAIBaseHandler extends BaseHandler {
 		const openAiMessages = this.getMessages(systemPrompt, messages);
 		// Build request options
-		const requestOptions: OpenAI.ChatCompletionCreateParamsStreaming = {
+		const requestOptions: Record<string, unknown> &
+			OpenAI.ChatCompletionCreateParamsStreaming = {
 			model: modelId,
 			messages: openAiMessages,
 			stream: true,
@@ -149,6 +165,17 @@ export class OpenAIBaseHandler extends BaseHandler {
 			}),
 		};
+		// Add top-level cache_control for OpenRouter with Anthropic models.
+		// This enables automatic caching where the cache breakpoint advances
+		// as the conversation grows, rather than relying on explicit per-block
+		// breakpoints which are limited to 4.
+		if (
+			this.config.providerId === "openrouter" &&
+			modelId.startsWith("anthropic/")
+		) {
+			requestOptions.cache_control = { type: "ephemeral" };
+		}
 		// Add max tokens if configured
 		const maxTokens = modelInfo.maxTokens ?? this.config.maxOutputTokens;
 		if (maxTokens) {
@@ -171,7 +198,11 @@ export class OpenAIBaseHandler extends BaseHandler {
 			this.config.reasoningEffort ??
 			(this.config.thinking ? DEFAULT_REASONING_EFFORT : undefined);
 		if (supportsReasoningEffort && effectiveReasoningEffort) {
-			(requestOptions as any).reasoning_effort = effectiveReasoningEffort;
+			(
+				requestOptions as OpenAI.ChatCompletionCreateParamsStreaming & {
+					reasoning_effort?: string;
+				}
+			).reasoning_effort = effectiveReasoningEffort;
 		}
 		const requestHeaders = this.getRequestHeaders();
@@ -191,16 +222,25 @@ export class OpenAIBaseHandler extends BaseHandler {
 			headers: requestHeaders,
 		});
 		const toolCallProcessor = new ToolCallProcessor();
+		let finishReason: string | null = null;
 		for await (const chunk of stream) {
+			const choice = chunk.choices?.[0];
+			if (choice?.finish_reason) {
+				finishReason = choice.finish_reason;
+			}
 			yield* this.withResponseIdForAll(
 				this.processChunk(chunk, toolCallProcessor, modelInfo, responseId),
 				responseId,
 			);
 		}
-		// Yield done chunk to indicate streaming completed successfully
-		yield { type: "done", success: true, id: responseId };
+		yield {
+			type: "done",
+			success: true,
+			id: responseId,
+			incompleteReason: finishReason === "length" ? "max_tokens" : undefined,
+		};
 	}
 	/**
@@ -213,9 +253,11 @@ export class OpenAIBaseHandler extends BaseHandler {
 		_modelInfo: ModelInfo,
 		responseId: string,
 	): Generator<import("../types").ApiStreamChunk> {
-		const delta = chunk.choices?.[0]?.delta && {
-			...chunk.choices[0].delta,
-			reasoning_content: (chunk.choices[0].delta as any).reasoning_content,
+		const rawDelta = chunk.choices?.[0]?.delta;
+		const delta = rawDelta && {
+			...rawDelta,
+			reasoning_content: (rawDelta as { reasoning_content?: string })
+				.reasoning_content,
 		};
 		// Handle text content
@@ -227,7 +269,7 @@ export class OpenAIBaseHandler extends BaseHandler {
 		if (delta?.reasoning_content) {
 			yield {
 				type: "reasoning",
-				reasoning: (delta as any).reasoning_content,
+				reasoning: delta.reasoning_content,
 				id: responseId,
 			};
 		}
@@ -248,10 +290,22 @@ export class OpenAIBaseHandler extends BaseHandler {
 		if (chunk.usage) {
 			const inputTokens = chunk.usage.prompt_tokens ?? 0;
 			const outputTokens = chunk.usage.completion_tokens ?? 0;
+			const usageWithCache = chunk.usage as typeof chunk.usage & {
+				prompt_tokens_details?: {
+					cached_tokens?: number;
+					cache_write_tokens?: number;
+				};
+				cache_creation_input_tokens?: number;
+				cache_read_input_tokens?: number;
+			};
 			const cacheReadTokens =
-				(chunk.usage as any).prompt_tokens_details?.cached_tokens ?? 0;
+				usageWithCache.prompt_tokens_details?.cached_tokens ??
+				usageWithCache.cache_read_input_tokens ??
+				0;
 			const cacheWriteTokens =
-				(chunk.usage as any).prompt_cache_miss_tokens ?? 0;
+				usageWithCache.prompt_tokens_details?.cache_write_tokens ??
+				usageWithCache.cache_creation_input_tokens ??
+				0;
 			yield {
 				type: "usage",
@@ -263,6 +317,7 @@ export class OpenAIBaseHandler extends BaseHandler {
 					inputTokens,
 					outputTokens,
 					cacheReadTokens,
+					cacheWriteTokens,
 				),
 				id: responseId,
 			};

package/src/providers/handlers/openai-responses.test.ts CHANGED Viewed

@@ -210,4 +210,50 @@ describe("OpenAIResponsesHandler", () => {
 			},
 		});
 	});
+	it("keeps cached input tokens separate in usage chunks", () => {
+		const handler = new TestOpenAIResponsesHandler({
+			providerId: "openai-native",
+			modelId: "gpt-5.4",
+			apiKey: "test-key",
+			baseUrl: "https://example.com",
+			modelInfo: {
+				id: "gpt-5.4",
+				pricing: {
+					input: 1,
+					output: 2,
+					cacheRead: 0.5,
+				},
+			},
+		});
+		const chunks = handler.processChunkForTest({
+			type: "response.completed",
+			response: {
+				id: "resp_usage",
+				usage: {
+					input_tokens: 100,
+					output_tokens: 40,
+					input_tokens_details: {
+						cached_tokens: 25,
+					},
+					output_tokens_details: {
+						reasoning_tokens: 10,
+					},
+				},
+			},
+		});
+		expect(chunks[0]).toMatchObject({
+			type: "usage",
+			inputTokens: 100,
+			outputTokens: 40,
+			cacheReadTokens: 25,
+			cacheWriteTokens: 0,
+		});
+		expect(chunks[0]?.type).toBe("usage");
+		if (chunks[0]?.type === "usage") {
+			expect(chunks[0].totalCost).toBeCloseTo(0.0001925, 10);
+		}
+	});
 });

package/src/providers/handlers/openai-responses.ts CHANGED Viewed

@@ -565,23 +565,19 @@ export class OpenAIResponsesHandler extends BaseHandler {
 					const inputTokens = usage.input_tokens || 0;
 					const outputTokens = usage.output_tokens || 0;
 					const cacheReadTokens =
-						usage.output_tokens_details?.reasoning_tokens || 0;
-					const cacheWriteTokens =
 						usage.input_tokens_details?.cached_tokens || 0;
+					const cacheWriteTokens = 0;
 					const totalCost = this.calculateCost(
 						inputTokens,
 						outputTokens,
 						cacheReadTokens,
-					);
-					const nonCachedInputTokens = Math.max(
-						0,
-						inputTokens - cacheReadTokens - cacheWriteTokens,
+						cacheWriteTokens,
 					);
 					yield {
 						type: "usage",
-						inputTokens: nonCachedInputTokens,
+						inputTokens,
 						outputTokens,
 						cacheWriteTokens,
 						cacheReadTokens,