npm - @clinebot/llms - Versions diffs - 0.0.10 → 0.0.12 - Mend

@clinebot/llms 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/index.browser.js +14 -14
package/dist/index.js +13 -13
package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
package/dist/providers/handlers/base.d.ts +7 -2
package/dist/providers/types/stream.d.ts +1 -1
package/package.json +2 -2
package/src/models/generated.ts +223 -641
package/src/models/models-dev-catalog.test.ts +2 -2
package/src/models/models-dev-catalog.ts +9 -2
package/src/providers/handlers/ai-sdk-community.ts +3 -0
package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
package/src/providers/handlers/anthropic-base.test.ts +30 -0
package/src/providers/handlers/anthropic-base.ts +31 -29
package/src/providers/handlers/base.test.ts +127 -2
package/src/providers/handlers/base.ts +115 -1
package/src/providers/handlers/bedrock-base.ts +4 -4
package/src/providers/handlers/community-sdk.test.ts +33 -0
package/src/providers/handlers/gemini-base.ts +6 -19
package/src/providers/handlers/openai-base.ts +33 -14
package/src/providers/handlers/openai-responses.test.ts +46 -0
package/src/providers/handlers/openai-responses.ts +12 -8
package/src/providers/handlers/r1-base.ts +10 -8
package/src/providers/handlers/vertex.ts +14 -4
package/src/providers/transform/anthropic-format.ts +14 -2
package/src/providers/transform/format-conversion.test.ts +23 -0
package/src/providers/transform/gemini-format.ts +77 -1
package/src/providers/types/stream.ts +1 -1

package/src/providers/handlers/openai-base.ts CHANGED Viewed

@@ -22,7 +22,6 @@ import type {
 	ModelInfo,
 	ProviderConfig,
 } from "../types";
-import { hasModelCapability } from "../types";
 import type { Message, ToolDefinition } from "../types/messages";
 import { retryStream } from "../utils/retry";
 import { ToolCallProcessor } from "../utils/tool-processor";
@@ -108,9 +107,7 @@ export class OpenAIBaseHandler extends BaseHandler {
 		messages: Message[],
 	): OpenAI.Chat.ChatCompletionMessageParam[] {
 		const model = this.getModel();
-		const supportsPromptCache =
-			hasModelCapability(model.info, "prompt-cache") ||
-			this.config.capabilities?.includes("prompt-cache") === true;
+		const supportsPromptCache = this.supportsPromptCache(model.info);
 		const systemMessage = supportsPromptCache
 			? ({
 					role: "system",
@@ -156,7 +153,8 @@ export class OpenAIBaseHandler extends BaseHandler {
 		const openAiMessages = this.getMessages(systemPrompt, messages);
 		// Build request options
-		const requestOptions: OpenAI.ChatCompletionCreateParamsStreaming = {
+		const requestOptions: Record<string, unknown> &
+			OpenAI.ChatCompletionCreateParamsStreaming = {
 			model: modelId,
 			messages: openAiMessages,
 			stream: true,
@@ -167,6 +165,17 @@ export class OpenAIBaseHandler extends BaseHandler {
 			}),
 		};
+		// Add top-level cache_control for OpenRouter with Anthropic models.
+		// This enables automatic caching where the cache breakpoint advances
+		// as the conversation grows, rather than relying on explicit per-block
+		// breakpoints which are limited to 4.
+		if (
+			this.config.providerId === "openrouter" &&
+			modelId.startsWith("anthropic/")
+		) {
+			requestOptions.cache_control = { type: "ephemeral" };
+		}
 		// Add max tokens if configured
 		const maxTokens = modelInfo.maxTokens ?? this.config.maxOutputTokens;
 		if (maxTokens) {
@@ -208,10 +217,15 @@ export class OpenAIBaseHandler extends BaseHandler {
 			requestHeaders.Authorization = `Bearer ${apiKey}`;
 		}
 		const abortSignal = this.getAbortSignal();
-		const stream = await client.chat.completions.create(requestOptions, {
-			signal: abortSignal,
-			headers: requestHeaders,
-		});
+		let stream: AsyncIterable<ChatCompletionChunk>;
+		try {
+			stream = await client.chat.completions.create(requestOptions, {
+				signal: abortSignal,
+				headers: requestHeaders,
+			});
+		} catch (error) {
+			throw this.normalizeOpenAICompatibleBadRequest(error) ?? error;
+		}
 		const toolCallProcessor = new ToolCallProcessor();
 		let finishReason: string | null = null;
@@ -286,27 +300,32 @@ export class OpenAIBaseHandler extends BaseHandler {
 					cached_tokens?: number;
 					cache_write_tokens?: number;
 				};
-				prompt_cache_miss_tokens?: number;
 				cache_creation_input_tokens?: number;
 				cache_read_input_tokens?: number;
 			};
 			const cacheReadTokens =
-				usageWithCache.prompt_tokens_details?.cached_tokens ?? 0;
+				usageWithCache.prompt_tokens_details?.cached_tokens ??
+				usageWithCache.cache_read_input_tokens ??
+				0;
 			const cacheWriteTokens =
 				usageWithCache.prompt_tokens_details?.cache_write_tokens ??
-				usageWithCache.prompt_cache_miss_tokens ??
+				usageWithCache.cache_creation_input_tokens ??
 				0;
 			yield {
 				type: "usage",
-				inputTokens,
+				inputTokens: Math.max(
+					0,
+					inputTokens - cacheReadTokens - cacheWriteTokens,
+				),
 				outputTokens,
 				cacheReadTokens,
 				cacheWriteTokens,
-				totalCost: this.calculateCost(
+				totalCost: this.calculateCostFromInclusiveInput(
 					inputTokens,
 					outputTokens,
 					cacheReadTokens,
+					cacheWriteTokens,
 				),
 				id: responseId,
 			};

package/src/providers/handlers/openai-responses.test.ts CHANGED Viewed

@@ -210,4 +210,50 @@ describe("OpenAIResponsesHandler", () => {
 			},
 		});
 	});
+	it("keeps cached input tokens separate in usage chunks", () => {
+		const handler = new TestOpenAIResponsesHandler({
+			providerId: "openai-native",
+			modelId: "gpt-5.4",
+			apiKey: "test-key",
+			baseUrl: "https://example.com",
+			modelInfo: {
+				id: "gpt-5.4",
+				pricing: {
+					input: 1,
+					output: 2,
+					cacheRead: 0.5,
+				},
+			},
+		});
+		const chunks = handler.processChunkForTest({
+			type: "response.completed",
+			response: {
+				id: "resp_usage",
+				usage: {
+					input_tokens: 100,
+					output_tokens: 40,
+					input_tokens_details: {
+						cached_tokens: 25,
+					},
+					output_tokens_details: {
+						reasoning_tokens: 10,
+					},
+				},
+			},
+		});
+		expect(chunks[0]).toMatchObject({
+			type: "usage",
+			inputTokens: 75,
+			outputTokens: 40,
+			cacheReadTokens: 25,
+			cacheWriteTokens: 0,
+		});
+		expect(chunks[0]?.type).toBe("usage");
+		if (chunks[0]?.type === "usage") {
+			expect(chunks[0].totalCost).toBeCloseTo(0.0001675, 10);
+		}
+	});
 });

package/src/providers/handlers/openai-responses.ts CHANGED Viewed

@@ -330,6 +330,11 @@ export class OpenAIResponsesHandler extends BaseHandler {
 				{ signal: abortSignal, headers: requestHeaders },
 			);
 		} catch (error) {
+			const normalizedBadRequest =
+				this.normalizeOpenAICompatibleBadRequest(error);
+			if (normalizedBadRequest) {
+				throw normalizedBadRequest;
+			}
 			if (this.config.providerId === "openai-codex") {
 				const rawError = error as
 					| (Error & {
@@ -565,23 +570,22 @@ export class OpenAIResponsesHandler extends BaseHandler {
 					const inputTokens = usage.input_tokens || 0;
 					const outputTokens = usage.output_tokens || 0;
 					const cacheReadTokens =
-						usage.output_tokens_details?.reasoning_tokens || 0;
-					const cacheWriteTokens =
 						usage.input_tokens_details?.cached_tokens || 0;
+					const cacheWriteTokens = 0;
-					const totalCost = this.calculateCost(
+					const totalCost = this.calculateCostFromInclusiveInput(
 						inputTokens,
 						outputTokens,
 						cacheReadTokens,
-					);
-					const nonCachedInputTokens = Math.max(
-						0,
-						inputTokens - cacheReadTokens - cacheWriteTokens,
+						cacheWriteTokens,
 					);
 					yield {
 						type: "usage",
-						inputTokens: nonCachedInputTokens,
+						inputTokens: Math.max(
+							0,
+							inputTokens - cacheReadTokens - cacheWriteTokens,
+						),
 						outputTokens,
 						cacheWriteTokens,
 						cacheReadTokens,

package/src/providers/handlers/r1-base.ts CHANGED Viewed

@@ -255,19 +255,21 @@ export class R1BaseHandler extends BaseHandler {
 		const cacheReadTokens = r1Usage.prompt_cache_hit_tokens ?? 0;
 		const cacheWriteTokens = r1Usage.prompt_cache_miss_tokens ?? 0;
-		// Calculate non-cached input tokens (will always be 0 for DeepSeek since input = read + write)
-		const nonCachedInputTokens = Math.max(
-			0,
-			inputTokens - cacheReadTokens - cacheWriteTokens,
-		);
 		yield {
 			type: "usage",
-			inputTokens: nonCachedInputTokens,
+			inputTokens: Math.max(
+				0,
+				inputTokens - cacheReadTokens - cacheWriteTokens,
+			),
 			outputTokens,
 			cacheReadTokens,
 			cacheWriteTokens,
-			totalCost: this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
+			totalCost: this.calculateCostFromInclusiveInput(
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+				cacheWriteTokens,
+			),
 			id: responseId,
 		};
 	}

package/src/providers/handlers/vertex.ts CHANGED Viewed

@@ -189,7 +189,7 @@ export class VertexHandler extends BaseHandler {
 		if (!isClaudeModel(model.id)) {
 			return this.ensureGeminiHandler().getMessages(systemPrompt, messages);
 		}
-		const supportsPromptCache = hasModelCapability(model.info, "prompt-cache");
+		const supportsPromptCache = this.supportsPromptCache(model.info);
 		return convertToAnthropicMessages(messages, supportsPromptCache);
 	}
@@ -226,7 +226,7 @@ export class VertexHandler extends BaseHandler {
 		const budgetTokens = this.config.thinkingBudgetTokens ?? 0;
 		const reasoningOn =
 			hasModelCapability(model.info, "reasoning") && budgetTokens > 0;
-		const promptCacheOn = hasModelCapability(model.info, "prompt-cache");
+		const promptCacheOn = this.supportsPromptCache(model.info);
 		const providerOptions: Record<string, unknown> = {};
 		if (reasoningOn) {
@@ -251,8 +251,18 @@ export class VertexHandler extends BaseHandler {
 		yield* emitAiSdkStream(stream, {
 			responseId,
 			errorMessage: "Vertex Anthropic stream failed",
-			calculateCost: (inputTokens, outputTokens, cacheReadTokens) =>
-				this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
+			calculateCost: (
+				inputTokens,
+				outputTokens,
+				cacheReadTokens,
+				cacheWriteTokens,
+			) =>
+				this.calculateCost(
+					inputTokens,
+					outputTokens,
+					cacheReadTokens,
+					cacheWriteTokens,
+				),
 			reasoningTypes: ["reasoning-delta"],
 			enableToolCalls: true,
 			toolCallArgsOrder: ["input", "args"],

package/src/providers/transform/anthropic-format.ts CHANGED Viewed

@@ -32,12 +32,24 @@ export function convertToAnthropicMessages(
 	messages: Message[],
 	enableCaching = false,
 ): AnthropicMessage[] {
+	const userMessageIndices = messages.reduce<number[]>(
+		(indices, message, index) => {
+			if (message.role === "user") {
+				indices.push(index);
+			}
+			return indices;
+		},
+		[],
+	);
+	const cacheableMessageIndices = enableCaching
+		? new Set(userMessageIndices.slice(-2))
+		: new Set<number>();
 	const result: AnthropicMessage[] = [];
-	for (const message of messages) {
+	for (const [index, message] of messages.entries()) {
 		const converted = convertMessage(
 			message,
-			enableCaching && messages.indexOf(message) === messages.length - 1,
+			cacheableMessageIndices.has(index),
 		);
 		if (converted) {
 			result.push(converted);

package/src/providers/transform/format-conversion.test.ts CHANGED Viewed

@@ -285,10 +285,33 @@ describe("format conversion", () => {
 		];
 		const anthropic = convertToAnthropicMessages(messages, true) as any[];
+		expect(anthropic[0].content[0].cache_control).toEqual({
+			type: "ephemeral",
+		});
 		expect(anthropic[1].content[0].type).toBe("thinking");
 		expect(anthropic[1].content[0].signature).toBe("anthropic-sig");
 	});
+	it("applies anthropic cache markers to the last two user messages", () => {
+		const messages: Message[] = [
+			{ role: "user", content: "first prompt" },
+			{ role: "assistant", content: "intermediate response" },
+			{ role: "user", content: "second prompt" },
+			{ role: "assistant", content: "another response" },
+			{ role: "user", content: "third prompt" },
+		];
+		const anthropic = convertToAnthropicMessages(messages, true) as any[];
+		expect(anthropic[0].content[0].cache_control).toBeUndefined();
+		expect(anthropic[2].content[0].cache_control).toEqual({
+			type: "ephemeral",
+		});
+		expect(anthropic[4].content[0].cache_control).toEqual({
+			type: "ephemeral",
+		});
+	});
 	it("normalizes array-shaped tool_use input for anthropic replay", () => {
 		const messages: Message[] = [
 			{ role: "user", content: "run these" },

package/src/providers/transform/gemini-format.ts CHANGED Viewed

@@ -172,6 +172,80 @@ function convertContentBlock(
 	}
 }
+/**
+ * Allowed JSON Schema properties per Gemini's supported subset.
+ * See: https://ai.google.dev/gemini-api/docs/structured-output
+ */
+const GEMINI_ALLOWED_PROPERTIES = new Set([
+	// Common
+	"type",
+	"title",
+	"description",
+	"enum",
+	// Object
+	"properties",
+	"required",
+	"additionalProperties",
+	// String
+	"format",
+	// Number / Integer
+	"minimum",
+	"maximum",
+	// Array
+	"items",
+	"prefixItems",
+	"minItems",
+	"maxItems",
+]);
+/**
+ * Recursively sanitize a JSON Schema to only include properties supported by Gemini.
+ * Converts exclusiveMinimum/exclusiveMaximum to minimum/maximum as a best-effort fallback.
+ */
+function sanitizeSchemaForGemini(schema: unknown): unknown {
+	if (!schema || typeof schema !== "object" || Array.isArray(schema)) {
+		return schema;
+	}
+	const input = schema as Record<string, unknown>;
+	const output: Record<string, unknown> = {};
+	for (const [key, value] of Object.entries(input)) {
+		if (!GEMINI_ALLOWED_PROPERTIES.has(key)) {
+			continue;
+		}
+		if (key === "properties" && value && typeof value === "object") {
+			const sanitized: Record<string, unknown> = {};
+			for (const [propName, propSchema] of Object.entries(
+				value as Record<string, unknown>,
+			)) {
+				sanitized[propName] = sanitizeSchemaForGemini(propSchema);
+			}
+			output[key] = sanitized;
+		} else if (key === "items" || key === "additionalProperties") {
+			output[key] =
+				typeof value === "object" && value !== null
+					? sanitizeSchemaForGemini(value)
+					: value;
+		} else if (key === "prefixItems" && Array.isArray(value)) {
+			output[key] = value.map((item) => sanitizeSchemaForGemini(item));
+		} else {
+			output[key] = value;
+		}
+	}
+	// Convert exclusiveMinimum/exclusiveMaximum to minimum/maximum
+	if (input.exclusiveMinimum !== undefined && output.minimum === undefined) {
+		output.minimum = input.exclusiveMinimum;
+	}
+	if (input.exclusiveMaximum !== undefined && output.maximum === undefined) {
+		output.maximum = input.exclusiveMaximum;
+	}
+	return output;
+}
 /**
  * Convert tool definitions to Gemini format
  */
@@ -181,6 +255,8 @@ export function convertToolsToGemini(
 	return tools.map((tool) => ({
 		name: tool.name,
 		description: tool.description,
-		parameters: tool.inputSchema as FunctionDeclaration["parameters"],
+		parameters: sanitizeSchemaForGemini(
+			tool.inputSchema,
+		) as FunctionDeclaration["parameters"],
 	}));
 }

package/src/providers/types/stream.ts CHANGED Viewed

@@ -55,7 +55,7 @@ export interface ApiStreamReasoningChunk {
  */
 export interface ApiStreamUsageChunk {
 	type: "usage";
-	/** Number of input tokens (excluding cached) */
+	/** Total number of input tokens reported by the provider */
 	inputTokens: number;
 	/** Number of output tokens */
 	outputTokens: number;