npm - @clinebot/llms - Versions diffs - 0.0.11 → 0.0.13 - Mend

@clinebot/llms 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +1 -1
package/dist/index.browser.js +14 -14
package/dist/index.js +13 -13
package/dist/providers/handlers/base.d.ts +4 -0
package/package.json +2 -2
package/src/models/generated.ts +223 -641
package/src/models/models-dev-catalog.test.ts +2 -2
package/src/models/models-dev-catalog.ts +9 -2
package/src/providers/handlers/ai-sdk-community.ts +8 -2
package/src/providers/handlers/base.test.ts +119 -0
package/src/providers/handlers/base.ts +93 -0
package/src/providers/handlers/bedrock-base.ts +4 -4
package/src/providers/handlers/community-sdk.test.ts +1 -1
package/src/providers/handlers/openai-base.ts +14 -6
package/src/providers/handlers/openai-responses.test.ts +2 -2
package/src/providers/handlers/openai-responses.ts +10 -2
package/src/providers/handlers/r1-base.ts +5 -2
package/src/providers/transform/gemini-format.ts +77 -1

package/src/models/models-dev-catalog.test.ts CHANGED Viewed

@@ -82,7 +82,7 @@ describe("models-dev-catalog", () => {
 					id: "claude-defaults",
 					name: "claude-defaults",
 					contextWindow: 4096,
-					maxTokens: 4096,
+					maxTokens: 204,
 					capabilities: ["tools"],
 					pricing: {
 						input: 0,
@@ -97,7 +97,7 @@ describe("models-dev-catalog", () => {
 					id: "claude-older",
 					name: "claude-older",
 					contextWindow: 4096,
-					maxTokens: 4096,
+					maxTokens: 204,
 					capabilities: ["tools"],
 					pricing: {
 						input: 0,

package/src/models/models-dev-catalog.ts CHANGED Viewed

@@ -93,11 +93,18 @@ function toStatus(status: string | undefined): ModelInfo["status"] {
 }
 function toModelInfo(modelId: string, model: ModelsDevModel): ModelInfo {
+	// If context or output limits are missing, default to DEFAULT_CONTEXT_WINDOW and DEFAULT_MAX_TOKENS respectively.
+	// If context and max are the same value, assume max tokens should be 5% of that value to avoid overallocation.
+	const contextWindow = model.limit?.context ?? DEFAULT_CONTEXT_WINDOW;
+	const outputToken = model.limit?.output ?? DEFAULT_MAX_TOKENS;
+	const discounted =
+		contextWindow === outputToken ? outputToken * 0.05 : outputToken;
 	return {
 		id: modelId,
 		name: model.name || modelId,
-		contextWindow: model.limit?.context ?? DEFAULT_CONTEXT_WINDOW,
-		maxTokens: model.limit?.output ?? DEFAULT_MAX_TOKENS,
+		contextWindow,
+		maxTokens: Math.floor(discounted),
 		capabilities: toCapabilities(model),
 		pricing: {
 			input: model.cost?.input ?? 0,

package/src/providers/handlers/ai-sdk-community.ts CHANGED Viewed

@@ -169,7 +169,10 @@ export async function* emitAiSdkStream(
 				yield {
 					type: "usage",
-					inputTokens: usageMetrics.inputTokens,
+					inputTokens: Math.max(
+						0,
+						usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
+					),
 					outputTokens: usageMetrics.outputTokens,
 					thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
 					cacheReadTokens: usageMetrics.cacheReadTokens,
@@ -204,7 +207,10 @@ export async function* emitAiSdkStream(
 		const usageMetrics = resolveUsageMetrics(usage);
 		yield {
 			type: "usage",
-			inputTokens: usageMetrics.inputTokens,
+			inputTokens: Math.max(
+				0,
+				usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
+			),
 			outputTokens: usageMetrics.outputTokens,
 			thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
 			cacheReadTokens: usageMetrics.cacheReadTokens,

package/src/providers/handlers/base.test.ts CHANGED Viewed

@@ -25,9 +25,27 @@ class TestHandler extends BaseHandler {
 		);
 	}
+	public computeCostFromInclusiveInput(
+		inputTokens: number,
+		outputTokens: number,
+		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
+	): number | undefined {
+		return this.calculateCostFromInclusiveInput(
+			inputTokens,
+			outputTokens,
+			cacheReadTokens,
+			cacheWriteTokens,
+		);
+	}
 	public exposeAbortSignal(): AbortSignal {
 		return this.getAbortSignal();
 	}
+	public normalizeBadRequest(error: unknown): Error | undefined {
+		return this.normalizeOpenAICompatibleBadRequest(error);
+	}
 }
 describe("BaseHandler.calculateCost", () => {
@@ -53,6 +71,53 @@ describe("BaseHandler.calculateCost", () => {
 		expect(cost).toBeCloseTo(18.03, 6);
 	});
+	it("does not charge cache reads twice when input already includes them", () => {
+		const config: ProviderConfig = {
+			providerId: "openai-native",
+			modelId: "gpt-test",
+			apiKey: "test-key",
+			knownModels: {
+				"gpt-test": {
+					id: "gpt-test",
+					pricing: {
+						input: 1,
+						output: 2,
+						cacheRead: 0.5,
+					},
+				},
+			},
+		};
+		const handler = new TestHandler(config);
+		const cost = handler.computeCostFromInclusiveInput(100, 40, 25);
+		expect(cost).toBeCloseTo(0.0001675, 10);
+	});
+	it("does not charge cache writes twice when input already includes them", () => {
+		const config: ProviderConfig = {
+			providerId: "openai-native",
+			modelId: "gpt-test",
+			apiKey: "test-key",
+			knownModels: {
+				"gpt-test": {
+					id: "gpt-test",
+					pricing: {
+						input: 1,
+						output: 2,
+						cacheRead: 0.5,
+						cacheWrite: 1.25,
+					},
+				},
+			},
+		};
+		const handler = new TestHandler(config);
+		const cost = handler.computeCostFromInclusiveInput(100, 40, 25, 10);
+		expect(cost).toBeCloseTo(0.00017, 10);
+	});
 });
 describe("BaseHandler abort signal wiring", () => {
@@ -109,3 +174,57 @@ describe("BaseHandler abort signal wiring", () => {
 		expect(signal2.aborted).toBe(false);
 	});
 });
+describe("BaseHandler.normalizeOpenAICompatibleBadRequest", () => {
+	it("rewrites provider metadata prompt-limit errors into a helpful message", () => {
+		const handler = new TestHandler({
+			providerId: "openrouter",
+			modelId: "anthropic/claude-sonnet-4.6",
+			apiKey: "test-key",
+			baseUrl: "https://openrouter.ai/api/v1",
+		});
+		const error = Object.assign(new Error("400 Provider returned error"), {
+			status: 400,
+			error: {
+				message: "Provider returned error",
+				code: 400,
+				metadata: {
+					provider_name: "Anthropic",
+					raw: JSON.stringify({
+						type: "error",
+						error: {
+							type: "invalid_request_error",
+							message: "prompt is too long: 1102640 tokens > 1000000 maximum",
+						},
+						request_id: "req_123",
+					}),
+				},
+			},
+		});
+		const normalized = handler.normalizeBadRequest(error);
+		expect(normalized?.message).toBe(
+			"Anthropic request was rejected (HTTP 400). Prompt is too long: 1102640 tokens exceeds the 1000000 token limit. Request ID: req_123.",
+		);
+		expect(normalized?.cause).toBe(error);
+	});
+	it("returns undefined for non-400 errors", () => {
+		const handler = new TestHandler({
+			providerId: "openrouter",
+			modelId: "anthropic/claude-sonnet-4.6",
+			apiKey: "test-key",
+			baseUrl: "https://openrouter.ai/api/v1",
+		});
+		const normalized = handler.normalizeBadRequest(
+			Object.assign(new Error("500 Provider returned error"), {
+				status: 500,
+			}),
+		);
+		expect(normalized).toBeUndefined();
+	});
+});

package/src/providers/handlers/base.ts CHANGED Viewed

@@ -23,6 +23,22 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
 	"X-CLIENT-TYPE": "cline-sdk",
 };
+interface OpenAICompatibleProviderErrorShape {
+	status?: number;
+	message?: string;
+	error?: {
+		message?: string;
+		code?: number;
+		metadata?: {
+			raw?: string;
+			provider_name?: string;
+		};
+	};
+	response?: {
+		status?: number;
+	};
+}
 const controllerIds = new WeakMap<AbortController, string>();
 let controllerIdCounter = 0;
@@ -188,6 +204,20 @@ export abstract class BaseHandler implements ApiHandler {
 		);
 	}
+	protected calculateCostFromInclusiveInput(
+		inputTokens: number,
+		outputTokens: number,
+		cacheReadTokens = 0,
+		cacheWriteTokens = 0,
+	): number | undefined {
+		return this.calculateCost(
+			Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens),
+			outputTokens,
+			cacheReadTokens,
+			cacheWriteTokens,
+		);
+	}
 	protected createResponseId(): string {
 		return nanoid();
 	}
@@ -214,4 +244,67 @@ export abstract class BaseHandler implements ApiHandler {
 			...(this.config.headers ?? {}),
 		};
 	}
+	protected normalizeOpenAICompatibleBadRequest(
+		error: unknown,
+	): Error | undefined {
+		const rawError = error as OpenAICompatibleProviderErrorShape | undefined;
+		const status =
+			rawError?.status ??
+			rawError?.response?.status ??
+			rawError?.error?.code ??
+			(typeof rawError?.message === "string" && rawError.message.includes("400")
+				? 400
+				: undefined);
+		if (status !== 400) {
+			return undefined;
+		}
+		const rawMetadata = rawError?.error?.metadata?.raw;
+		const parsedRaw = this.parseRawProviderError(rawMetadata);
+		const detail =
+			parsedRaw?.error?.message?.trim() ||
+			rawError?.error?.message?.trim() ||
+			rawError?.message?.trim() ||
+			"Provider returned error";
+		const providerName =
+			rawError?.error?.metadata?.provider_name?.trim() || "Provider";
+		const requestId = parsedRaw?.request_id?.trim();
+		const normalizedMessage = this.rewriteProviderBadRequestDetail(detail);
+		const suffix = requestId ? ` Request ID: ${requestId}.` : "";
+		return new Error(
+			`${providerName} request was rejected (HTTP 400). ${normalizedMessage}${suffix}`,
+			{
+				cause: error instanceof Error ? error : undefined,
+			},
+		);
+	}
+	private parseRawProviderError(
+		raw: string | undefined,
+	): { error?: { message?: string }; request_id?: string } | undefined {
+		if (!raw) {
+			return undefined;
+		}
+		try {
+			return JSON.parse(raw) as {
+				error?: { message?: string };
+				request_id?: string;
+			};
+		} catch {
+			return undefined;
+		}
+	}
+	private rewriteProviderBadRequestDetail(detail: string): string {
+		const promptTooLongMatch = detail.match(
+			/prompt is too long:\s*([\d,]+)\s*tokens?\s*>\s*([\d,]+)\s*maximum/i,
+		);
+		if (promptTooLongMatch) {
+			const actual = promptTooLongMatch[1];
+			const maximum = promptTooLongMatch[2];
+			return `Prompt is too long: ${actual} tokens exceeds the ${maximum} token limit.`;
+		}
+		return detail.endsWith(".") ? detail : `${detail}.`;
+	}
 }

package/src/providers/handlers/bedrock-base.ts CHANGED Viewed

@@ -216,11 +216,11 @@ export class BedrockHandler extends BaseHandler {
 					yield {
 						type: "usage",
-						inputTokens,
+						inputTokens: Math.max(0, inputTokens - cacheReadTokens),
 						outputTokens,
 						thoughtsTokenCount,
 						cacheReadTokens,
-						totalCost: this.calculateCost(
+						totalCost: this.calculateCostFromInclusiveInput(
 							inputTokens,
 							outputTokens,
 							cacheReadTokens,
@@ -245,11 +245,11 @@ export class BedrockHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens,
+				inputTokens: Math.max(0, inputTokens - cacheReadTokens),
 				outputTokens,
 				thoughtsTokenCount,
 				cacheReadTokens,
-				totalCost: this.calculateCost(
+				totalCost: this.calculateCostFromInclusiveInput(
 					inputTokens,
 					outputTokens,
 					cacheReadTokens,

package/src/providers/handlers/community-sdk.test.ts CHANGED Viewed

@@ -142,7 +142,7 @@ describe("Community SDK handlers", () => {
 					chunk.type === "usage",
 			);
 			expect(usageChunk).toMatchObject({
-				inputTokens: 10,
+				inputTokens: 6,
 				outputTokens: 3,
 				cacheReadTokens: 4,
 			});

package/src/providers/handlers/openai-base.ts CHANGED Viewed

@@ -217,10 +217,15 @@ export class OpenAIBaseHandler extends BaseHandler {
 			requestHeaders.Authorization = `Bearer ${apiKey}`;
 		}
 		const abortSignal = this.getAbortSignal();
-		const stream = await client.chat.completions.create(requestOptions, {
-			signal: abortSignal,
-			headers: requestHeaders,
-		});
+		let stream: AsyncIterable<ChatCompletionChunk>;
+		try {
+			stream = await client.chat.completions.create(requestOptions, {
+				signal: abortSignal,
+				headers: requestHeaders,
+			});
+		} catch (error) {
+			throw this.normalizeOpenAICompatibleBadRequest(error) ?? error;
+		}
 		const toolCallProcessor = new ToolCallProcessor();
 		let finishReason: string | null = null;
@@ -309,11 +314,14 @@ export class OpenAIBaseHandler extends BaseHandler {
 			yield {
 				type: "usage",
-				inputTokens,
+				inputTokens: Math.max(
+					0,
+					inputTokens - cacheReadTokens - cacheWriteTokens,
+				),
 				outputTokens,
 				cacheReadTokens,
 				cacheWriteTokens,
-				totalCost: this.calculateCost(
+				totalCost: this.calculateCostFromInclusiveInput(
 					inputTokens,
 					outputTokens,
 					cacheReadTokens,

package/src/providers/handlers/openai-responses.test.ts CHANGED Viewed

@@ -246,14 +246,14 @@ describe("OpenAIResponsesHandler", () => {
 		expect(chunks[0]).toMatchObject({
 			type: "usage",
-			inputTokens: 100,
+			inputTokens: 75,
 			outputTokens: 40,
 			cacheReadTokens: 25,
 			cacheWriteTokens: 0,
 		});
 		expect(chunks[0]?.type).toBe("usage");
 		if (chunks[0]?.type === "usage") {
-			expect(chunks[0].totalCost).toBeCloseTo(0.0001925, 10);
+			expect(chunks[0].totalCost).toBeCloseTo(0.0001675, 10);
 		}
 	});
 });

package/src/providers/handlers/openai-responses.ts CHANGED Viewed

@@ -330,6 +330,11 @@ export class OpenAIResponsesHandler extends BaseHandler {
 				{ signal: abortSignal, headers: requestHeaders },
 			);
 		} catch (error) {
+			const normalizedBadRequest =
+				this.normalizeOpenAICompatibleBadRequest(error);
+			if (normalizedBadRequest) {
+				throw normalizedBadRequest;
+			}
 			if (this.config.providerId === "openai-codex") {
 				const rawError = error as
 					| (Error & {
@@ -568,7 +573,7 @@ export class OpenAIResponsesHandler extends BaseHandler {
 						usage.input_tokens_details?.cached_tokens || 0;
 					const cacheWriteTokens = 0;
-					const totalCost = this.calculateCost(
+					const totalCost = this.calculateCostFromInclusiveInput(
 						inputTokens,
 						outputTokens,
 						cacheReadTokens,
@@ -577,7 +582,10 @@ export class OpenAIResponsesHandler extends BaseHandler {
 					yield {
 						type: "usage",
-						inputTokens,
+						inputTokens: Math.max(
+							0,
+							inputTokens - cacheReadTokens - cacheWriteTokens,
+						),
 						outputTokens,
 						cacheWriteTokens,
 						cacheReadTokens,

package/src/providers/handlers/r1-base.ts CHANGED Viewed

@@ -257,11 +257,14 @@ export class R1BaseHandler extends BaseHandler {
 		yield {
 			type: "usage",
-			inputTokens,
+			inputTokens: Math.max(
+				0,
+				inputTokens - cacheReadTokens - cacheWriteTokens,
+			),
 			outputTokens,
 			cacheReadTokens,
 			cacheWriteTokens,
-			totalCost: this.calculateCost(
+			totalCost: this.calculateCostFromInclusiveInput(
 				inputTokens,
 				outputTokens,
 				cacheReadTokens,

package/src/providers/transform/gemini-format.ts CHANGED Viewed

@@ -172,6 +172,80 @@ function convertContentBlock(
 	}
 }
+/**
+ * Allowed JSON Schema properties per Gemini's supported subset.
+ * See: https://ai.google.dev/gemini-api/docs/structured-output
+ */
+const GEMINI_ALLOWED_PROPERTIES = new Set([
+	// Common
+	"type",
+	"title",
+	"description",
+	"enum",
+	// Object
+	"properties",
+	"required",
+	"additionalProperties",
+	// String
+	"format",
+	// Number / Integer
+	"minimum",
+	"maximum",
+	// Array
+	"items",
+	"prefixItems",
+	"minItems",
+	"maxItems",
+]);
+/**
+ * Recursively sanitize a JSON Schema to only include properties supported by Gemini.
+ * Converts exclusiveMinimum/exclusiveMaximum to minimum/maximum as a best-effort fallback.
+ */
+function sanitizeSchemaForGemini(schema: unknown): unknown {
+	if (!schema || typeof schema !== "object" || Array.isArray(schema)) {
+		return schema;
+	}
+	const input = schema as Record<string, unknown>;
+	const output: Record<string, unknown> = {};
+	for (const [key, value] of Object.entries(input)) {
+		if (!GEMINI_ALLOWED_PROPERTIES.has(key)) {
+			continue;
+		}
+		if (key === "properties" && value && typeof value === "object") {
+			const sanitized: Record<string, unknown> = {};
+			for (const [propName, propSchema] of Object.entries(
+				value as Record<string, unknown>,
+			)) {
+				sanitized[propName] = sanitizeSchemaForGemini(propSchema);
+			}
+			output[key] = sanitized;
+		} else if (key === "items" || key === "additionalProperties") {
+			output[key] =
+				typeof value === "object" && value !== null
+					? sanitizeSchemaForGemini(value)
+					: value;
+		} else if (key === "prefixItems" && Array.isArray(value)) {
+			output[key] = value.map((item) => sanitizeSchemaForGemini(item));
+		} else {
+			output[key] = value;
+		}
+	}
+	// Convert exclusiveMinimum/exclusiveMaximum to minimum/maximum
+	if (input.exclusiveMinimum !== undefined && output.minimum === undefined) {
+		output.minimum = input.exclusiveMinimum;
+	}
+	if (input.exclusiveMaximum !== undefined && output.maximum === undefined) {
+		output.maximum = input.exclusiveMaximum;
+	}
+	return output;
+}
 /**
  * Convert tool definitions to Gemini format
  */
@@ -181,6 +255,8 @@ export function convertToolsToGemini(
 	return tools.map((tool) => ({
 		name: tool.name,
 		description: tool.description,
-		parameters: tool.inputSchema as FunctionDeclaration["parameters"],
+		parameters: sanitizeSchemaForGemini(
+			tool.inputSchema,
+		) as FunctionDeclaration["parameters"],
 	}));
 }