@clinebot/llms 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -82,7 +82,7 @@ describe("models-dev-catalog", () => {
82
82
  id: "claude-defaults",
83
83
  name: "claude-defaults",
84
84
  contextWindow: 4096,
85
- maxTokens: 4096,
85
+ maxTokens: 204,
86
86
  capabilities: ["tools"],
87
87
  pricing: {
88
88
  input: 0,
@@ -97,7 +97,7 @@ describe("models-dev-catalog", () => {
97
97
  id: "claude-older",
98
98
  name: "claude-older",
99
99
  contextWindow: 4096,
100
- maxTokens: 4096,
100
+ maxTokens: 204,
101
101
  capabilities: ["tools"],
102
102
  pricing: {
103
103
  input: 0,
@@ -93,11 +93,18 @@ function toStatus(status: string | undefined): ModelInfo["status"] {
93
93
  }
94
94
 
95
95
  function toModelInfo(modelId: string, model: ModelsDevModel): ModelInfo {
96
+ // If context or output limits are missing, default to DEFAULT_CONTEXT_WINDOW and DEFAULT_MAX_TOKENS respectively.
97
+ // If context and max are the same value, assume max tokens should be 5% of that value to avoid overallocation.
98
+ const contextWindow = model.limit?.context ?? DEFAULT_CONTEXT_WINDOW;
99
+ const outputToken = model.limit?.output ?? DEFAULT_MAX_TOKENS;
100
+ const discounted =
101
+ contextWindow === outputToken ? outputToken * 0.05 : outputToken;
102
+
96
103
  return {
97
104
  id: modelId,
98
105
  name: model.name || modelId,
99
- contextWindow: model.limit?.context ?? DEFAULT_CONTEXT_WINDOW,
100
- maxTokens: model.limit?.output ?? DEFAULT_MAX_TOKENS,
106
+ contextWindow,
107
+ maxTokens: Math.floor(discounted),
101
108
  capabilities: toCapabilities(model),
102
109
  pricing: {
103
110
  input: model.cost?.input ?? 0,
@@ -169,7 +169,10 @@ export async function* emitAiSdkStream(
169
169
 
170
170
  yield {
171
171
  type: "usage",
172
- inputTokens: usageMetrics.inputTokens,
172
+ inputTokens: Math.max(
173
+ 0,
174
+ usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
175
+ ),
173
176
  outputTokens: usageMetrics.outputTokens,
174
177
  thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
175
178
  cacheReadTokens: usageMetrics.cacheReadTokens,
@@ -204,7 +207,10 @@ export async function* emitAiSdkStream(
204
207
  const usageMetrics = resolveUsageMetrics(usage);
205
208
  yield {
206
209
  type: "usage",
207
- inputTokens: usageMetrics.inputTokens,
210
+ inputTokens: Math.max(
211
+ 0,
212
+ usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
213
+ ),
208
214
  outputTokens: usageMetrics.outputTokens,
209
215
  thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
210
216
  cacheReadTokens: usageMetrics.cacheReadTokens,
@@ -25,9 +25,27 @@ class TestHandler extends BaseHandler {
25
25
  );
26
26
  }
27
27
 
28
+ public computeCostFromInclusiveInput(
29
+ inputTokens: number,
30
+ outputTokens: number,
31
+ cacheReadTokens = 0,
32
+ cacheWriteTokens = 0,
33
+ ): number | undefined {
34
+ return this.calculateCostFromInclusiveInput(
35
+ inputTokens,
36
+ outputTokens,
37
+ cacheReadTokens,
38
+ cacheWriteTokens,
39
+ );
40
+ }
41
+
28
42
  public exposeAbortSignal(): AbortSignal {
29
43
  return this.getAbortSignal();
30
44
  }
45
+
46
+ public normalizeBadRequest(error: unknown): Error | undefined {
47
+ return this.normalizeOpenAICompatibleBadRequest(error);
48
+ }
31
49
  }
32
50
 
33
51
  describe("BaseHandler.calculateCost", () => {
@@ -53,6 +71,53 @@ describe("BaseHandler.calculateCost", () => {
53
71
 
54
72
  expect(cost).toBeCloseTo(18.03, 6);
55
73
  });
74
+
75
+ it("does not charge cache reads twice when input already includes them", () => {
76
+ const config: ProviderConfig = {
77
+ providerId: "openai-native",
78
+ modelId: "gpt-test",
79
+ apiKey: "test-key",
80
+ knownModels: {
81
+ "gpt-test": {
82
+ id: "gpt-test",
83
+ pricing: {
84
+ input: 1,
85
+ output: 2,
86
+ cacheRead: 0.5,
87
+ },
88
+ },
89
+ },
90
+ };
91
+ const handler = new TestHandler(config);
92
+
93
+ const cost = handler.computeCostFromInclusiveInput(100, 40, 25);
94
+
95
+ expect(cost).toBeCloseTo(0.0001675, 10);
96
+ });
97
+
98
+ it("does not charge cache writes twice when input already includes them", () => {
99
+ const config: ProviderConfig = {
100
+ providerId: "openai-native",
101
+ modelId: "gpt-test",
102
+ apiKey: "test-key",
103
+ knownModels: {
104
+ "gpt-test": {
105
+ id: "gpt-test",
106
+ pricing: {
107
+ input: 1,
108
+ output: 2,
109
+ cacheRead: 0.5,
110
+ cacheWrite: 1.25,
111
+ },
112
+ },
113
+ },
114
+ };
115
+ const handler = new TestHandler(config);
116
+
117
+ const cost = handler.computeCostFromInclusiveInput(100, 40, 25, 10);
118
+
119
+ expect(cost).toBeCloseTo(0.00017, 10);
120
+ });
56
121
  });
57
122
 
58
123
  describe("BaseHandler abort signal wiring", () => {
@@ -109,3 +174,57 @@ describe("BaseHandler abort signal wiring", () => {
109
174
  expect(signal2.aborted).toBe(false);
110
175
  });
111
176
  });
177
+
178
+ describe("BaseHandler.normalizeOpenAICompatibleBadRequest", () => {
179
+ it("rewrites provider metadata prompt-limit errors into a helpful message", () => {
180
+ const handler = new TestHandler({
181
+ providerId: "openrouter",
182
+ modelId: "anthropic/claude-sonnet-4.6",
183
+ apiKey: "test-key",
184
+ baseUrl: "https://openrouter.ai/api/v1",
185
+ });
186
+
187
+ const error = Object.assign(new Error("400 Provider returned error"), {
188
+ status: 400,
189
+ error: {
190
+ message: "Provider returned error",
191
+ code: 400,
192
+ metadata: {
193
+ provider_name: "Anthropic",
194
+ raw: JSON.stringify({
195
+ type: "error",
196
+ error: {
197
+ type: "invalid_request_error",
198
+ message: "prompt is too long: 1102640 tokens > 1000000 maximum",
199
+ },
200
+ request_id: "req_123",
201
+ }),
202
+ },
203
+ },
204
+ });
205
+
206
+ const normalized = handler.normalizeBadRequest(error);
207
+
208
+ expect(normalized?.message).toBe(
209
+ "Anthropic request was rejected (HTTP 400). Prompt is too long: 1102640 tokens exceeds the 1000000 token limit. Request ID: req_123.",
210
+ );
211
+ expect(normalized?.cause).toBe(error);
212
+ });
213
+
214
+ it("returns undefined for non-400 errors", () => {
215
+ const handler = new TestHandler({
216
+ providerId: "openrouter",
217
+ modelId: "anthropic/claude-sonnet-4.6",
218
+ apiKey: "test-key",
219
+ baseUrl: "https://openrouter.ai/api/v1",
220
+ });
221
+
222
+ const normalized = handler.normalizeBadRequest(
223
+ Object.assign(new Error("500 Provider returned error"), {
224
+ status: 500,
225
+ }),
226
+ );
227
+
228
+ expect(normalized).toBeUndefined();
229
+ });
230
+ });
@@ -23,6 +23,22 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
23
23
  "X-CLIENT-TYPE": "cline-sdk",
24
24
  };
25
25
 
26
+ interface OpenAICompatibleProviderErrorShape {
27
+ status?: number;
28
+ message?: string;
29
+ error?: {
30
+ message?: string;
31
+ code?: number;
32
+ metadata?: {
33
+ raw?: string;
34
+ provider_name?: string;
35
+ };
36
+ };
37
+ response?: {
38
+ status?: number;
39
+ };
40
+ }
41
+
26
42
  const controllerIds = new WeakMap<AbortController, string>();
27
43
  let controllerIdCounter = 0;
28
44
 
@@ -188,6 +204,20 @@ export abstract class BaseHandler implements ApiHandler {
188
204
  );
189
205
  }
190
206
 
207
+ protected calculateCostFromInclusiveInput(
208
+ inputTokens: number,
209
+ outputTokens: number,
210
+ cacheReadTokens = 0,
211
+ cacheWriteTokens = 0,
212
+ ): number | undefined {
213
+ return this.calculateCost(
214
+ Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens),
215
+ outputTokens,
216
+ cacheReadTokens,
217
+ cacheWriteTokens,
218
+ );
219
+ }
220
+
191
221
  protected createResponseId(): string {
192
222
  return nanoid();
193
223
  }
@@ -214,4 +244,67 @@ export abstract class BaseHandler implements ApiHandler {
214
244
  ...(this.config.headers ?? {}),
215
245
  };
216
246
  }
247
+
248
+ protected normalizeOpenAICompatibleBadRequest(
249
+ error: unknown,
250
+ ): Error | undefined {
251
+ const rawError = error as OpenAICompatibleProviderErrorShape | undefined;
252
+ const status =
253
+ rawError?.status ??
254
+ rawError?.response?.status ??
255
+ rawError?.error?.code ??
256
+ (typeof rawError?.message === "string" && rawError.message.includes("400")
257
+ ? 400
258
+ : undefined);
259
+ if (status !== 400) {
260
+ return undefined;
261
+ }
262
+
263
+ const rawMetadata = rawError?.error?.metadata?.raw;
264
+ const parsedRaw = this.parseRawProviderError(rawMetadata);
265
+ const detail =
266
+ parsedRaw?.error?.message?.trim() ||
267
+ rawError?.error?.message?.trim() ||
268
+ rawError?.message?.trim() ||
269
+ "Provider returned error";
270
+ const providerName =
271
+ rawError?.error?.metadata?.provider_name?.trim() || "Provider";
272
+ const requestId = parsedRaw?.request_id?.trim();
273
+ const normalizedMessage = this.rewriteProviderBadRequestDetail(detail);
274
+ const suffix = requestId ? ` Request ID: ${requestId}.` : "";
275
+ return new Error(
276
+ `${providerName} request was rejected (HTTP 400). ${normalizedMessage}${suffix}`,
277
+ {
278
+ cause: error instanceof Error ? error : undefined,
279
+ },
280
+ );
281
+ }
282
+
283
+ private parseRawProviderError(
284
+ raw: string | undefined,
285
+ ): { error?: { message?: string }; request_id?: string } | undefined {
286
+ if (!raw) {
287
+ return undefined;
288
+ }
289
+ try {
290
+ return JSON.parse(raw) as {
291
+ error?: { message?: string };
292
+ request_id?: string;
293
+ };
294
+ } catch {
295
+ return undefined;
296
+ }
297
+ }
298
+
299
+ private rewriteProviderBadRequestDetail(detail: string): string {
300
+ const promptTooLongMatch = detail.match(
301
+ /prompt is too long:\s*([\d,]+)\s*tokens?\s*>\s*([\d,]+)\s*maximum/i,
302
+ );
303
+ if (promptTooLongMatch) {
304
+ const actual = promptTooLongMatch[1];
305
+ const maximum = promptTooLongMatch[2];
306
+ return `Prompt is too long: ${actual} tokens exceeds the ${maximum} token limit.`;
307
+ }
308
+ return detail.endsWith(".") ? detail : `${detail}.`;
309
+ }
217
310
  }
@@ -216,11 +216,11 @@ export class BedrockHandler extends BaseHandler {
216
216
 
217
217
  yield {
218
218
  type: "usage",
219
- inputTokens,
219
+ inputTokens: Math.max(0, inputTokens - cacheReadTokens),
220
220
  outputTokens,
221
221
  thoughtsTokenCount,
222
222
  cacheReadTokens,
223
- totalCost: this.calculateCost(
223
+ totalCost: this.calculateCostFromInclusiveInput(
224
224
  inputTokens,
225
225
  outputTokens,
226
226
  cacheReadTokens,
@@ -245,11 +245,11 @@ export class BedrockHandler extends BaseHandler {
245
245
 
246
246
  yield {
247
247
  type: "usage",
248
- inputTokens,
248
+ inputTokens: Math.max(0, inputTokens - cacheReadTokens),
249
249
  outputTokens,
250
250
  thoughtsTokenCount,
251
251
  cacheReadTokens,
252
- totalCost: this.calculateCost(
252
+ totalCost: this.calculateCostFromInclusiveInput(
253
253
  inputTokens,
254
254
  outputTokens,
255
255
  cacheReadTokens,
@@ -142,7 +142,7 @@ describe("Community SDK handlers", () => {
142
142
  chunk.type === "usage",
143
143
  );
144
144
  expect(usageChunk).toMatchObject({
145
- inputTokens: 10,
145
+ inputTokens: 6,
146
146
  outputTokens: 3,
147
147
  cacheReadTokens: 4,
148
148
  });
@@ -217,10 +217,15 @@ export class OpenAIBaseHandler extends BaseHandler {
217
217
  requestHeaders.Authorization = `Bearer ${apiKey}`;
218
218
  }
219
219
  const abortSignal = this.getAbortSignal();
220
- const stream = await client.chat.completions.create(requestOptions, {
221
- signal: abortSignal,
222
- headers: requestHeaders,
223
- });
220
+ let stream: AsyncIterable<ChatCompletionChunk>;
221
+ try {
222
+ stream = await client.chat.completions.create(requestOptions, {
223
+ signal: abortSignal,
224
+ headers: requestHeaders,
225
+ });
226
+ } catch (error) {
227
+ throw this.normalizeOpenAICompatibleBadRequest(error) ?? error;
228
+ }
224
229
  const toolCallProcessor = new ToolCallProcessor();
225
230
  let finishReason: string | null = null;
226
231
 
@@ -309,11 +314,14 @@ export class OpenAIBaseHandler extends BaseHandler {
309
314
 
310
315
  yield {
311
316
  type: "usage",
312
- inputTokens,
317
+ inputTokens: Math.max(
318
+ 0,
319
+ inputTokens - cacheReadTokens - cacheWriteTokens,
320
+ ),
313
321
  outputTokens,
314
322
  cacheReadTokens,
315
323
  cacheWriteTokens,
316
- totalCost: this.calculateCost(
324
+ totalCost: this.calculateCostFromInclusiveInput(
317
325
  inputTokens,
318
326
  outputTokens,
319
327
  cacheReadTokens,
@@ -246,14 +246,14 @@ describe("OpenAIResponsesHandler", () => {
246
246
 
247
247
  expect(chunks[0]).toMatchObject({
248
248
  type: "usage",
249
- inputTokens: 100,
249
+ inputTokens: 75,
250
250
  outputTokens: 40,
251
251
  cacheReadTokens: 25,
252
252
  cacheWriteTokens: 0,
253
253
  });
254
254
  expect(chunks[0]?.type).toBe("usage");
255
255
  if (chunks[0]?.type === "usage") {
256
- expect(chunks[0].totalCost).toBeCloseTo(0.0001925, 10);
256
+ expect(chunks[0].totalCost).toBeCloseTo(0.0001675, 10);
257
257
  }
258
258
  });
259
259
  });
@@ -330,6 +330,11 @@ export class OpenAIResponsesHandler extends BaseHandler {
330
330
  { signal: abortSignal, headers: requestHeaders },
331
331
  );
332
332
  } catch (error) {
333
+ const normalizedBadRequest =
334
+ this.normalizeOpenAICompatibleBadRequest(error);
335
+ if (normalizedBadRequest) {
336
+ throw normalizedBadRequest;
337
+ }
333
338
  if (this.config.providerId === "openai-codex") {
334
339
  const rawError = error as
335
340
  | (Error & {
@@ -568,7 +573,7 @@ export class OpenAIResponsesHandler extends BaseHandler {
568
573
  usage.input_tokens_details?.cached_tokens || 0;
569
574
  const cacheWriteTokens = 0;
570
575
 
571
- const totalCost = this.calculateCost(
576
+ const totalCost = this.calculateCostFromInclusiveInput(
572
577
  inputTokens,
573
578
  outputTokens,
574
579
  cacheReadTokens,
@@ -577,7 +582,10 @@ export class OpenAIResponsesHandler extends BaseHandler {
577
582
 
578
583
  yield {
579
584
  type: "usage",
580
- inputTokens,
585
+ inputTokens: Math.max(
586
+ 0,
587
+ inputTokens - cacheReadTokens - cacheWriteTokens,
588
+ ),
581
589
  outputTokens,
582
590
  cacheWriteTokens,
583
591
  cacheReadTokens,
@@ -257,11 +257,14 @@ export class R1BaseHandler extends BaseHandler {
257
257
 
258
258
  yield {
259
259
  type: "usage",
260
- inputTokens,
260
+ inputTokens: Math.max(
261
+ 0,
262
+ inputTokens - cacheReadTokens - cacheWriteTokens,
263
+ ),
261
264
  outputTokens,
262
265
  cacheReadTokens,
263
266
  cacheWriteTokens,
264
- totalCost: this.calculateCost(
267
+ totalCost: this.calculateCostFromInclusiveInput(
265
268
  inputTokens,
266
269
  outputTokens,
267
270
  cacheReadTokens,
@@ -172,6 +172,80 @@ function convertContentBlock(
172
172
  }
173
173
  }
174
174
 
175
+ /**
176
+ * Allowed JSON Schema properties per Gemini's supported subset.
177
+ * See: https://ai.google.dev/gemini-api/docs/structured-output
178
+ */
179
+ const GEMINI_ALLOWED_PROPERTIES = new Set([
180
+ // Common
181
+ "type",
182
+ "title",
183
+ "description",
184
+ "enum",
185
+ // Object
186
+ "properties",
187
+ "required",
188
+ "additionalProperties",
189
+ // String
190
+ "format",
191
+ // Number / Integer
192
+ "minimum",
193
+ "maximum",
194
+ // Array
195
+ "items",
196
+ "prefixItems",
197
+ "minItems",
198
+ "maxItems",
199
+ ]);
200
+
201
+ /**
202
+ * Recursively sanitize a JSON Schema to only include properties supported by Gemini.
203
+ * Converts exclusiveMinimum/exclusiveMaximum to minimum/maximum as a best-effort fallback.
204
+ */
205
+ function sanitizeSchemaForGemini(schema: unknown): unknown {
206
+ if (!schema || typeof schema !== "object" || Array.isArray(schema)) {
207
+ return schema;
208
+ }
209
+
210
+ const input = schema as Record<string, unknown>;
211
+ const output: Record<string, unknown> = {};
212
+
213
+ for (const [key, value] of Object.entries(input)) {
214
+ if (!GEMINI_ALLOWED_PROPERTIES.has(key)) {
215
+ continue;
216
+ }
217
+
218
+ if (key === "properties" && value && typeof value === "object") {
219
+ const sanitized: Record<string, unknown> = {};
220
+ for (const [propName, propSchema] of Object.entries(
221
+ value as Record<string, unknown>,
222
+ )) {
223
+ sanitized[propName] = sanitizeSchemaForGemini(propSchema);
224
+ }
225
+ output[key] = sanitized;
226
+ } else if (key === "items" || key === "additionalProperties") {
227
+ output[key] =
228
+ typeof value === "object" && value !== null
229
+ ? sanitizeSchemaForGemini(value)
230
+ : value;
231
+ } else if (key === "prefixItems" && Array.isArray(value)) {
232
+ output[key] = value.map((item) => sanitizeSchemaForGemini(item));
233
+ } else {
234
+ output[key] = value;
235
+ }
236
+ }
237
+
238
+ // Convert exclusiveMinimum/exclusiveMaximum to minimum/maximum
239
+ if (input.exclusiveMinimum !== undefined && output.minimum === undefined) {
240
+ output.minimum = input.exclusiveMinimum;
241
+ }
242
+ if (input.exclusiveMaximum !== undefined && output.maximum === undefined) {
243
+ output.maximum = input.exclusiveMaximum;
244
+ }
245
+
246
+ return output;
247
+ }
248
+
175
249
  /**
176
250
  * Convert tool definitions to Gemini format
177
251
  */
@@ -181,6 +255,8 @@ export function convertToolsToGemini(
181
255
  return tools.map((tool) => ({
182
256
  name: tool.name,
183
257
  description: tool.description,
184
- parameters: tool.inputSchema as FunctionDeclaration["parameters"],
258
+ parameters: sanitizeSchemaForGemini(
259
+ tool.inputSchema,
260
+ ) as FunctionDeclaration["parameters"],
185
261
  }));
186
262
  }