npm - @clinebot/llms - Versions diffs - 0.0.10 → 0.0.12 - Mend

@clinebot/llms 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/index.browser.js +14 -14
package/dist/index.js +13 -13
package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
package/dist/providers/handlers/base.d.ts +7 -2
package/dist/providers/types/stream.d.ts +1 -1
package/package.json +2 -2
package/src/models/generated.ts +223 -641
package/src/models/models-dev-catalog.test.ts +2 -2
package/src/models/models-dev-catalog.ts +9 -2
package/src/providers/handlers/ai-sdk-community.ts +3 -0
package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
package/src/providers/handlers/anthropic-base.test.ts +30 -0
package/src/providers/handlers/anthropic-base.ts +31 -29
package/src/providers/handlers/base.test.ts +127 -2
package/src/providers/handlers/base.ts +115 -1
package/src/providers/handlers/bedrock-base.ts +4 -4
package/src/providers/handlers/community-sdk.test.ts +33 -0
package/src/providers/handlers/gemini-base.ts +6 -19
package/src/providers/handlers/openai-base.ts +33 -14
package/src/providers/handlers/openai-responses.test.ts +46 -0
package/src/providers/handlers/openai-responses.ts +12 -8
package/src/providers/handlers/r1-base.ts +10 -8
package/src/providers/handlers/vertex.ts +14 -4
package/src/providers/transform/anthropic-format.ts +14 -2
package/src/providers/transform/format-conversion.test.ts +23 -0
package/src/providers/transform/gemini-format.ts +77 -1
package/src/providers/types/stream.ts +1 -1

package/src/models/models-dev-catalog.test.ts CHANGED Viewed

@@ -82,7 +82,7 @@ describe("models-dev-catalog", () => {
 					id: "claude-defaults",
 					name: "claude-defaults",
 					contextWindow: 4096,
-					maxTokens: 4096,
+					maxTokens: 204,
 					capabilities: ["tools"],
 					pricing: {
 						input: 0,
@@ -97,7 +97,7 @@ describe("models-dev-catalog", () => {
 					id: "claude-older",
 					name: "claude-older",
 					contextWindow: 4096,
-					maxTokens: 4096,
+					maxTokens: 204,
 					capabilities: ["tools"],
 					pricing: {
 						input: 0,

package/src/models/models-dev-catalog.ts CHANGED Viewed

@@ -93,11 +93,18 @@ function toStatus(status: string | undefined): ModelInfo["status"] {
 }
 function toModelInfo(modelId: string, model: ModelsDevModel): ModelInfo {
+	// If context or output limits are missing, default to DEFAULT_CONTEXT_WINDOW and DEFAULT_MAX_TOKENS respectively.
+	// If context and max are the same value, assume max tokens should be 5% of that value to avoid overallocation.
+	const contextWindow = model.limit?.context ?? DEFAULT_CONTEXT_WINDOW;
+	const outputToken = model.limit?.output ?? DEFAULT_MAX_TOKENS;
+	const discounted =
+		contextWindow === outputToken ? outputToken * 0.05 : outputToken;
 	return {
 		id: modelId,
 		name: model.name || modelId,
-		contextWindow: model.limit?.context ?? DEFAULT_CONTEXT_WINDOW,
-		maxTokens: model.limit?.output ?? DEFAULT_MAX_TOKENS,
+		contextWindow,
+		maxTokens: Math.floor(discounted),
 		capabilities: toCapabilities(model),
 		pricing: {
 			input: model.cost?.input ?? 0,

package/src/providers/handlers/ai-sdk-community.ts CHANGED Viewed

@@ -41,6 +41,7 @@ export type EmitAiSdkStreamOptions = {
 		inputTokens: number,
 		outputTokens: number,
 		cacheReadTokens: number,
+		cacheWriteTokens?: number,
 	) => number | undefined;
 	reasoningTypes?: string[];
 	enableToolCalls?: boolean;
@@ -180,6 +181,7 @@ export async function* emitAiSdkStream(
 						usageMetrics.inputTokens,
 						usageMetrics.outputTokens,
 						usageMetrics.cacheReadTokens,
+						usageMetrics.cacheWriteTokens,
 					),
 					id: responseId,
 				};
@@ -217,6 +219,7 @@ export async function* emitAiSdkStream(
 				usageMetrics.inputTokens,
 				usageMetrics.outputTokens,
 				usageMetrics.cacheReadTokens,
+				usageMetrics.cacheWriteTokens,
 			),
 			id: responseId,
 		};

package/src/providers/handlers/ai-sdk-provider-base.ts CHANGED Viewed

@@ -185,8 +185,18 @@ export abstract class AiSdkProviderHandler extends BaseHandler {
 		yield* emitAiSdkStream(stream, {
 			responseId,
 			errorMessage: this.getStreamErrorMessage(),
-			calculateCost: (inputTokens, outputTokens, cacheReadTokens) =>
-				this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
+			calculateCost: (
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+				cacheWriteTokens,
+			) =>
+				this.calculateCost(
+					inputTokens,
+					outputTokens,
+					cacheReadTokens,
+					cacheWriteTokens,
+				),
 			...this.getEmitStreamOptions(),
 		});
 	}

package/src/providers/handlers/anthropic-base.test.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import { describe, expect, it } from "vitest";
+import { AnthropicHandler } from "./anthropic-base";
+describe("AnthropicHandler prompt cache detection", () => {
+	it("enables prompt caching when model pricing includes cache pricing", () => {
+		const handler = new AnthropicHandler({
+			providerId: "anthropic",
+			modelId: "claude-sonnet-4-6",
+			apiKey: "test-key",
+			modelInfo: {
+				id: "claude-sonnet-4-6",
+				pricing: {
+					input: 3,
+					output: 15,
+					cacheRead: 0.3,
+					cacheWrite: 3.75,
+				},
+			},
+		});
+		const messages = handler.getMessages("system", [
+			{ role: "user", content: "Tell me about this repo" },
+		]);
+		const userTextBlock = messages[0]?.content?.[0] as
+			| { cache_control?: { type: string } }
+			| undefined;
+		expect(userTextBlock?.cache_control).toEqual({ type: "ephemeral" });
+	});
+});

package/src/providers/handlers/anthropic-base.ts CHANGED Viewed

@@ -17,7 +17,6 @@ import {
 import {
 	type ApiStream,
 	type HandlerModelInfo,
-	hasModelCapability,
 	type ProviderConfig,
 	supportsModelThinking,
 } from "../types";
@@ -76,10 +75,7 @@ export class AnthropicHandler extends BaseHandler {
 		_systemPrompt: string,
 		messages: Message[],
 	): Anthropic.MessageParam[] {
-		const supportsPromptCache = hasModelCapability(
-			this.getModel().info,
-			"prompt-cache",
-		);
+		const supportsPromptCache = this.supportsPromptCache(this.getModel().info);
 		return convertToAnthropicMessages(
 			messages,
 			supportsPromptCache,
@@ -113,7 +109,7 @@ export class AnthropicHandler extends BaseHandler {
 		const budgetTokens =
 			thinkingSupported && requestedBudget > 0 ? requestedBudget : 0;
 		const nativeToolsOn = tools && tools.length > 0;
-		const supportsPromptCache = hasModelCapability(model.info, "prompt-cache");
+		const supportsPromptCache = this.supportsPromptCache(model.info);
 		const reasoningOn = thinkingSupported && budgetTokens > 0;
 		const debugThinking = isThinkingDebugEnabled();
 		const debugChunkCounts: Record<string, number> = {};
@@ -139,30 +135,34 @@ export class AnthropicHandler extends BaseHandler {
 		const requestOptions = { signal: abortSignal };
 		// Create the request
+		// Use top-level automatic caching so the entire prefix (system +
+		// messages) is cached and the breakpoint advances each turn.
+		const createParams: Record<string, unknown> &
+			Anthropic.MessageCreateParamsStreaming = {
+			model: model.id,
+			thinking: reasoningOn
+				? { type: "enabled", budget_tokens: budgetTokens }
+				: undefined,
+			max_tokens:
+				model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
+			temperature: reasoningOn ? undefined : 0,
+			system: [
+				supportsPromptCache
+					? {
+							text: systemPrompt,
+							type: "text",
+							cache_control: { type: "ephemeral" },
+						}
+					: { text: systemPrompt, type: "text" },
+			],
+			messages: anthropicMessages as Anthropic.MessageParam[],
+			stream: true,
+			tools: anthropicTools,
+			tool_choice: nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
+		};
 		const stream = await client.messages.create(
-			{
-				model: model.id,
-				thinking: reasoningOn
-					? { type: "enabled", budget_tokens: budgetTokens }
-					: undefined,
-				max_tokens:
-					model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
-				temperature: reasoningOn ? undefined : 0,
-				system: supportsPromptCache
-					? [
-							{
-								text: systemPrompt,
-								type: "text",
-								cache_control: { type: "ephemeral" },
-							},
-						]
-					: [{ text: systemPrompt, type: "text" }],
-				messages: anthropicMessages as Anthropic.MessageParam[],
-				stream: true,
-				tools: anthropicTools,
-				tool_choice:
-					nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
-			},
+			createParams as Anthropic.MessageCreateParamsStreaming,
 			requestOptions,
 		);
@@ -244,6 +244,7 @@ export class AnthropicHandler extends BaseHandler {
 						usageSnapshot.inputTokens,
 						usageSnapshot.outputTokens,
 						usageSnapshot.cacheReadTokens,
+						usageSnapshot.cacheWriteTokens,
 					),
 					id: responseId,
 				};
@@ -263,6 +264,7 @@ export class AnthropicHandler extends BaseHandler {
 						usageSnapshot.inputTokens,
 						usageSnapshot.outputTokens,
 						usageSnapshot.cacheReadTokens,
+						usageSnapshot.cacheWriteTokens,
 					),
 					id: responseId,
 				};

package/src/providers/handlers/base.test.ts CHANGED Viewed

@@ -15,13 +15,37 @@ class TestHandler extends BaseHandler {
 		inputTokens: number,
 		outputTokens: number,
 		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
 	): number | undefined {
-		return this.calculateCost(inputTokens, outputTokens, cacheReadTokens);
+		return this.calculateCost(
+			inputTokens,
+			outputTokens,
+			cacheReadTokens,
+			cacheWriteTokens,
+		);
+	}
+	public computeCostFromInclusiveInput(
+		inputTokens: number,
+		outputTokens: number,
+		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
+	): number | undefined {
+		return this.calculateCostFromInclusiveInput(
+			inputTokens,
+			outputTokens,
+			cacheReadTokens,
+			cacheWriteTokens,
+		);
 	}
 	public exposeAbortSignal(): AbortSignal {
 		return this.getAbortSignal();
 	}
+	public normalizeBadRequest(error: unknown): Error | undefined {
+		return this.normalizeOpenAICompatibleBadRequest(error);
+	}
 }
 describe("BaseHandler.calculateCost", () => {
@@ -45,7 +69,54 @@ describe("BaseHandler.calculateCost", () => {
 		const cost = handler.computeCost(1_000_000, 1_000_000, 100_000);
-		expect(cost).toBeCloseTo(17.73, 6);
+		expect(cost).toBeCloseTo(18.03, 6);
+	});
+	it("does not charge cache reads twice when input already includes them", () => {
+		const config: ProviderConfig = {
+			providerId: "openai-native",
+			modelId: "gpt-test",
+			apiKey: "test-key",
+			knownModels: {
+				"gpt-test": {
+					id: "gpt-test",
+					pricing: {
+						input: 1,
+						output: 2,
+						cacheRead: 0.5,
+					},
+				},
+			},
+		};
+		const handler = new TestHandler(config);
+		const cost = handler.computeCostFromInclusiveInput(100, 40, 25);
+		expect(cost).toBeCloseTo(0.0001675, 10);
+	});
+	it("does not charge cache writes twice when input already includes them", () => {
+		const config: ProviderConfig = {
+			providerId: "openai-native",
+			modelId: "gpt-test",
+			apiKey: "test-key",
+			knownModels: {
+				"gpt-test": {
+					id: "gpt-test",
+					pricing: {
+						input: 1,
+						output: 2,
+						cacheRead: 0.5,
+						cacheWrite: 1.25,
+					},
+				},
+			},
+		};
+		const handler = new TestHandler(config);
+		const cost = handler.computeCostFromInclusiveInput(100, 40, 25, 10);
+		expect(cost).toBeCloseTo(0.00017, 10);
 	});
 });
@@ -103,3 +174,57 @@ describe("BaseHandler abort signal wiring", () => {
 		expect(signal2.aborted).toBe(false);
 	});
 });
+describe("BaseHandler.normalizeOpenAICompatibleBadRequest", () => {
+	it("rewrites provider metadata prompt-limit errors into a helpful message", () => {
+		const handler = new TestHandler({
+			providerId: "openrouter",
+			modelId: "anthropic/claude-sonnet-4.6",
+			apiKey: "test-key",
+			baseUrl: "https://openrouter.ai/api/v1",
+		});
+		const error = Object.assign(new Error("400 Provider returned error"), {
+			status: 400,
+			error: {
+				message: "Provider returned error",
+				code: 400,
+				metadata: {
+					provider_name: "Anthropic",
+					raw: JSON.stringify({
+						type: "error",
+						error: {
+							type: "invalid_request_error",
+							message: "prompt is too long: 1102640 tokens > 1000000 maximum",
+						},
+						request_id: "req_123",
+					}),
+				},
+			},
+		});
+		const normalized = handler.normalizeBadRequest(error);
+		expect(normalized?.message).toBe(
+			"Anthropic request was rejected (HTTP 400). Prompt is too long: 1102640 tokens exceeds the 1000000 token limit. Request ID: req_123.",
+		);
+		expect(normalized?.cause).toBe(error);
+	});
+	it("returns undefined for non-400 errors", () => {
+		const handler = new TestHandler({
+			providerId: "openrouter",
+			modelId: "anthropic/claude-sonnet-4.6",
+			apiKey: "test-key",
+			baseUrl: "https://openrouter.ai/api/v1",
+		});
+		const normalized = handler.normalizeBadRequest(
+			Object.assign(new Error("500 Provider returned error"), {
+				status: 500,
+			}),
+		);
+		expect(normalized).toBeUndefined();
+	});
+});

package/src/providers/handlers/base.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import type {
 	ApiStream,
 	ApiStreamUsageChunk,
 	HandlerModelInfo,
+	ModelInfo,
 	ProviderConfig,
 } from "../types";
 import type { Message, ToolDefinition } from "../types/messages";
@@ -22,6 +23,22 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
 	"X-CLIENT-TYPE": "cline-sdk",
 };
+interface OpenAICompatibleProviderErrorShape {
+	status?: number;
+	message?: string;
+	error?: {
+		message?: string;
+		code?: number;
+		metadata?: {
+			raw?: string;
+			provider_name?: string;
+		};
+	};
+	response?: {
+		status?: number;
+	};
+}
 const controllerIds = new WeakMap<AbortController, string>();
 let controllerIdCounter = 0;
@@ -146,10 +163,26 @@ export abstract class BaseHandler implements ApiHandler {
 		});
 	}
+	protected supportsPromptCache(modelInfo?: ModelInfo): boolean {
+		const resolvedModelInfo =
+			modelInfo ??
+			this.config.modelInfo ??
+			this.config.knownModels?.[this.config.modelId];
+		const pricing = resolvedModelInfo?.pricing;
+		return (
+			resolvedModelInfo?.capabilities?.includes("prompt-cache") === true ||
+			this.config.capabilities?.includes("prompt-cache") === true ||
+			typeof pricing?.cacheRead === "number" ||
+			typeof pricing?.cacheWrite === "number"
+		);
+	}
 	protected calculateCost(
 		inputTokens: number,
 		outputTokens: number,
 		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
 	): number | undefined {
 		const pricing = (
 			this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
@@ -159,14 +192,32 @@ export abstract class BaseHandler implements ApiHandler {
 		}
 		return (
-			((inputTokens - cacheReadTokens) / 1_000_000) * pricing.input +
+			(inputTokens / 1_000_000) * pricing.input +
 			(outputTokens / 1_000_000) * pricing.output +
 			(cacheReadTokens > 0
 				? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
+				: 0) +
+			(cacheWriteTokens > 0
+				? (cacheWriteTokens / 1_000_000) *
+					(pricing.cacheWrite ?? pricing.input * 1.25)
 				: 0)
 		);
 	}
+	protected calculateCostFromInclusiveInput(
+		inputTokens: number,
+		outputTokens: number,
+		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
+	): number | undefined {
+		return this.calculateCost(
+			Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens),
+			outputTokens,
+			cacheReadTokens,
+			cacheWriteTokens,
+		);
+	}
 	protected createResponseId(): string {
 		return nanoid();
 	}
@@ -193,4 +244,67 @@ export abstract class BaseHandler implements ApiHandler {
 			...(this.config.headers ?? {}),
 		};
 	}
+	protected normalizeOpenAICompatibleBadRequest(
+		error: unknown,
+	): Error | undefined {
+		const rawError = error as OpenAICompatibleProviderErrorShape | undefined;
+		const status =
+			rawError?.status ??
+			rawError?.response?.status ??
+			rawError?.error?.code ??
+			(typeof rawError?.message === "string" && rawError.message.includes("400")
+				? 400
+				: undefined);
+		if (status !== 400) {
+			return undefined;
+		}
+		const rawMetadata = rawError?.error?.metadata?.raw;
+		const parsedRaw = this.parseRawProviderError(rawMetadata);
+		const detail =
+			parsedRaw?.error?.message?.trim() ||
+			rawError?.error?.message?.trim() ||
+			rawError?.message?.trim() ||
+			"Provider returned error";
+		const providerName =
+			rawError?.error?.metadata?.provider_name?.trim() || "Provider";
+		const requestId = parsedRaw?.request_id?.trim();
+		const normalizedMessage = this.rewriteProviderBadRequestDetail(detail);
+		const suffix = requestId ? ` Request ID: ${requestId}.` : "";
+		return new Error(
+			`${providerName} request was rejected (HTTP 400). ${normalizedMessage}${suffix}`,
+			{
+				cause: error instanceof Error ? error : undefined,
+			},
+		);
+	}
+	private parseRawProviderError(
+		raw: string | undefined,
+	): { error?: { message?: string }; request_id?: string } | undefined {
+		if (!raw) {
+			return undefined;
+		}
+		try {
+			return JSON.parse(raw) as {
+				error?: { message?: string };
+				request_id?: string;
+			};
+		} catch {
+			return undefined;
+		}
+	}
+	private rewriteProviderBadRequestDetail(detail: string): string {
+		const promptTooLongMatch = detail.match(
+			/prompt is too long:\s*([\d,]+)\s*tokens?\s*>\s*([\d,]+)\s*maximum/i,
+		);
+		if (promptTooLongMatch) {
+			const actual = promptTooLongMatch[1];
+			const maximum = promptTooLongMatch[2];
+			return `Prompt is too long: ${actual} tokens exceeds the ${maximum} token limit.`;
+		}
+		return detail.endsWith(".") ? detail : `${detail}.`;
+	}
 }

package/src/providers/handlers/bedrock-base.ts CHANGED Viewed

@@ -216,11 +216,11 @@ export class BedrockHandler extends BaseHandler {
 					yield {
 						type: "usage",
-						inputTokens: inputTokens - cacheReadTokens,
+						inputTokens: Math.max(0, inputTokens - cacheReadTokens),
 						outputTokens,
 						thoughtsTokenCount,
 						cacheReadTokens,
-						totalCost: this.calculateCost(
+						totalCost: this.calculateCostFromInclusiveInput(
 							inputTokens,
 							outputTokens,
 							cacheReadTokens,
@@ -245,11 +245,11 @@ export class BedrockHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens: inputTokens - cacheReadTokens,
+				inputTokens: Math.max(0, inputTokens - cacheReadTokens),
 				outputTokens,
 				thoughtsTokenCount,
 				cacheReadTokens,
-				totalCost: this.calculateCost(
+				totalCost: this.calculateCostFromInclusiveInput(
 					inputTokens,
 					outputTokens,
 					cacheReadTokens,

package/src/providers/handlers/community-sdk.test.ts CHANGED Viewed

@@ -115,6 +115,39 @@ describe("Community SDK handlers", () => {
 			expect(usageChunk?.outputTokens).toBe(3);
 		});
+		it("keeps cached input tokens separate from total input tokens", async () => {
+			streamTextSpy.mockReturnValue({
+				fullStream: makeStreamParts([
+					{
+						type: "finish",
+						usage: { inputTokens: 10, outputTokens: 3, cachedInputTokens: 4 },
+					},
+				]),
+			});
+			const handler = new ClaudeCodeHandler({
+				providerId: "claude-code",
+				modelId: "sonnet",
+			});
+			const chunks: ApiStreamChunk[] = [];
+			for await (const chunk of handler.createMessage("System", [
+				{ role: "user", content: "Hi" },
+			])) {
+				chunks.push(chunk);
+			}
+			const usageChunk = chunks.find(
+				(chunk): chunk is Extract<ApiStreamChunk, { type: "usage" }> =>
+					chunk.type === "usage",
+			);
+			expect(usageChunk).toMatchObject({
+				inputTokens: 6,
+				outputTokens: 3,
+				cacheReadTokens: 4,
+			});
+		});
 		it("uses a fallback model id when model is missing", () => {
 			const handler = new ClaudeCodeHandler({
 				providerId: "claude-code",

package/src/providers/handlers/gemini-base.ts CHANGED Viewed

@@ -18,7 +18,6 @@ import {
 import {
 	type ApiStream,
 	type HandlerModelInfo,
-	type ModelInfo,
 	type ProviderConfig,
 	supportsModelThinking,
 } from "../types";
@@ -258,7 +257,6 @@ export class GeminiHandler extends BaseHandler {
 			// Yield final usage
 			const totalCost = this.calculateGeminiCost(
-				info,
 				promptTokens,
 				outputTokens,
 				thoughtsTokenCount,
@@ -267,7 +265,7 @@ export class GeminiHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens: promptTokens - cacheReadTokens,
+				inputTokens: promptTokens,
 				outputTokens,
 				thoughtsTokenCount,
 				cacheReadTokens,
@@ -288,27 +286,16 @@ export class GeminiHandler extends BaseHandler {
 	}
 	private calculateGeminiCost(
-		info: ModelInfo,
 		inputTokens: number,
 		outputTokens: number,
 		thoughtsTokenCount: number,
 		cacheReadTokens: number,
 	): number | undefined {
-		const pricing = info.pricing;
-		if (!pricing?.input || !pricing?.output) {
-			return undefined;
-		}
-		const uncachedInputTokens = inputTokens - cacheReadTokens;
-		const inputCost = pricing.input * (uncachedInputTokens / 1_000_000);
-		const outputCost =
-			pricing.output * ((outputTokens + thoughtsTokenCount) / 1_000_000);
-		const cacheReadCost =
-			cacheReadTokens > 0
-				? (pricing.cacheRead ?? 0) * (cacheReadTokens / 1_000_000)
-				: 0;
-		return inputCost + outputCost + cacheReadCost;
+		return this.calculateCost(
+			inputTokens,
+			outputTokens + thoughtsTokenCount,
+			cacheReadTokens,
+		);
 	}
 }