@clinebot/llms 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.browser.js +14 -14
- package/dist/index.js +13 -13
- package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
- package/dist/providers/handlers/base.d.ts +7 -2
- package/dist/providers/types/stream.d.ts +1 -1
- package/package.json +2 -2
- package/src/models/generated.ts +223 -641
- package/src/models/models-dev-catalog.test.ts +2 -2
- package/src/models/models-dev-catalog.ts +9 -2
- package/src/providers/handlers/ai-sdk-community.ts +3 -0
- package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
- package/src/providers/handlers/anthropic-base.test.ts +30 -0
- package/src/providers/handlers/anthropic-base.ts +31 -29
- package/src/providers/handlers/base.test.ts +127 -2
- package/src/providers/handlers/base.ts +115 -1
- package/src/providers/handlers/bedrock-base.ts +4 -4
- package/src/providers/handlers/community-sdk.test.ts +33 -0
- package/src/providers/handlers/gemini-base.ts +6 -19
- package/src/providers/handlers/openai-base.ts +33 -14
- package/src/providers/handlers/openai-responses.test.ts +46 -0
- package/src/providers/handlers/openai-responses.ts +12 -8
- package/src/providers/handlers/r1-base.ts +10 -8
- package/src/providers/handlers/vertex.ts +14 -4
- package/src/providers/transform/anthropic-format.ts +14 -2
- package/src/providers/transform/format-conversion.test.ts +23 -0
- package/src/providers/transform/gemini-format.ts +77 -1
- package/src/providers/types/stream.ts +1 -1
|
@@ -22,7 +22,6 @@ import type {
|
|
|
22
22
|
ModelInfo,
|
|
23
23
|
ProviderConfig,
|
|
24
24
|
} from "../types";
|
|
25
|
-
import { hasModelCapability } from "../types";
|
|
26
25
|
import type { Message, ToolDefinition } from "../types/messages";
|
|
27
26
|
import { retryStream } from "../utils/retry";
|
|
28
27
|
import { ToolCallProcessor } from "../utils/tool-processor";
|
|
@@ -108,9 +107,7 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
108
107
|
messages: Message[],
|
|
109
108
|
): OpenAI.Chat.ChatCompletionMessageParam[] {
|
|
110
109
|
const model = this.getModel();
|
|
111
|
-
const supportsPromptCache =
|
|
112
|
-
hasModelCapability(model.info, "prompt-cache") ||
|
|
113
|
-
this.config.capabilities?.includes("prompt-cache") === true;
|
|
110
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
114
111
|
const systemMessage = supportsPromptCache
|
|
115
112
|
? ({
|
|
116
113
|
role: "system",
|
|
@@ -156,7 +153,8 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
156
153
|
const openAiMessages = this.getMessages(systemPrompt, messages);
|
|
157
154
|
|
|
158
155
|
// Build request options
|
|
159
|
-
const requestOptions:
|
|
156
|
+
const requestOptions: Record<string, unknown> &
|
|
157
|
+
OpenAI.ChatCompletionCreateParamsStreaming = {
|
|
160
158
|
model: modelId,
|
|
161
159
|
messages: openAiMessages,
|
|
162
160
|
stream: true,
|
|
@@ -167,6 +165,17 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
167
165
|
}),
|
|
168
166
|
};
|
|
169
167
|
|
|
168
|
+
// Add top-level cache_control for OpenRouter with Anthropic models.
|
|
169
|
+
// This enables automatic caching where the cache breakpoint advances
|
|
170
|
+
// as the conversation grows, rather than relying on explicit per-block
|
|
171
|
+
// breakpoints which are limited to 4.
|
|
172
|
+
if (
|
|
173
|
+
this.config.providerId === "openrouter" &&
|
|
174
|
+
modelId.startsWith("anthropic/")
|
|
175
|
+
) {
|
|
176
|
+
requestOptions.cache_control = { type: "ephemeral" };
|
|
177
|
+
}
|
|
178
|
+
|
|
170
179
|
// Add max tokens if configured
|
|
171
180
|
const maxTokens = modelInfo.maxTokens ?? this.config.maxOutputTokens;
|
|
172
181
|
if (maxTokens) {
|
|
@@ -208,10 +217,15 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
208
217
|
requestHeaders.Authorization = `Bearer ${apiKey}`;
|
|
209
218
|
}
|
|
210
219
|
const abortSignal = this.getAbortSignal();
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
220
|
+
let stream: AsyncIterable<ChatCompletionChunk>;
|
|
221
|
+
try {
|
|
222
|
+
stream = await client.chat.completions.create(requestOptions, {
|
|
223
|
+
signal: abortSignal,
|
|
224
|
+
headers: requestHeaders,
|
|
225
|
+
});
|
|
226
|
+
} catch (error) {
|
|
227
|
+
throw this.normalizeOpenAICompatibleBadRequest(error) ?? error;
|
|
228
|
+
}
|
|
215
229
|
const toolCallProcessor = new ToolCallProcessor();
|
|
216
230
|
let finishReason: string | null = null;
|
|
217
231
|
|
|
@@ -286,27 +300,32 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
286
300
|
cached_tokens?: number;
|
|
287
301
|
cache_write_tokens?: number;
|
|
288
302
|
};
|
|
289
|
-
prompt_cache_miss_tokens?: number;
|
|
290
303
|
cache_creation_input_tokens?: number;
|
|
291
304
|
cache_read_input_tokens?: number;
|
|
292
305
|
};
|
|
293
306
|
const cacheReadTokens =
|
|
294
|
-
usageWithCache.prompt_tokens_details?.cached_tokens ??
|
|
307
|
+
usageWithCache.prompt_tokens_details?.cached_tokens ??
|
|
308
|
+
usageWithCache.cache_read_input_tokens ??
|
|
309
|
+
0;
|
|
295
310
|
const cacheWriteTokens =
|
|
296
311
|
usageWithCache.prompt_tokens_details?.cache_write_tokens ??
|
|
297
|
-
usageWithCache.
|
|
312
|
+
usageWithCache.cache_creation_input_tokens ??
|
|
298
313
|
0;
|
|
299
314
|
|
|
300
315
|
yield {
|
|
301
316
|
type: "usage",
|
|
302
|
-
inputTokens
|
|
317
|
+
inputTokens: Math.max(
|
|
318
|
+
0,
|
|
319
|
+
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
320
|
+
),
|
|
303
321
|
outputTokens,
|
|
304
322
|
cacheReadTokens,
|
|
305
323
|
cacheWriteTokens,
|
|
306
|
-
totalCost: this.
|
|
324
|
+
totalCost: this.calculateCostFromInclusiveInput(
|
|
307
325
|
inputTokens,
|
|
308
326
|
outputTokens,
|
|
309
327
|
cacheReadTokens,
|
|
328
|
+
cacheWriteTokens,
|
|
310
329
|
),
|
|
311
330
|
id: responseId,
|
|
312
331
|
};
|
|
@@ -210,4 +210,50 @@ describe("OpenAIResponsesHandler", () => {
|
|
|
210
210
|
},
|
|
211
211
|
});
|
|
212
212
|
});
|
|
213
|
+
|
|
214
|
+
it("keeps cached input tokens separate in usage chunks", () => {
|
|
215
|
+
const handler = new TestOpenAIResponsesHandler({
|
|
216
|
+
providerId: "openai-native",
|
|
217
|
+
modelId: "gpt-5.4",
|
|
218
|
+
apiKey: "test-key",
|
|
219
|
+
baseUrl: "https://example.com",
|
|
220
|
+
modelInfo: {
|
|
221
|
+
id: "gpt-5.4",
|
|
222
|
+
pricing: {
|
|
223
|
+
input: 1,
|
|
224
|
+
output: 2,
|
|
225
|
+
cacheRead: 0.5,
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
const chunks = handler.processChunkForTest({
|
|
231
|
+
type: "response.completed",
|
|
232
|
+
response: {
|
|
233
|
+
id: "resp_usage",
|
|
234
|
+
usage: {
|
|
235
|
+
input_tokens: 100,
|
|
236
|
+
output_tokens: 40,
|
|
237
|
+
input_tokens_details: {
|
|
238
|
+
cached_tokens: 25,
|
|
239
|
+
},
|
|
240
|
+
output_tokens_details: {
|
|
241
|
+
reasoning_tokens: 10,
|
|
242
|
+
},
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
expect(chunks[0]).toMatchObject({
|
|
248
|
+
type: "usage",
|
|
249
|
+
inputTokens: 75,
|
|
250
|
+
outputTokens: 40,
|
|
251
|
+
cacheReadTokens: 25,
|
|
252
|
+
cacheWriteTokens: 0,
|
|
253
|
+
});
|
|
254
|
+
expect(chunks[0]?.type).toBe("usage");
|
|
255
|
+
if (chunks[0]?.type === "usage") {
|
|
256
|
+
expect(chunks[0].totalCost).toBeCloseTo(0.0001675, 10);
|
|
257
|
+
}
|
|
258
|
+
});
|
|
213
259
|
});
|
|
@@ -330,6 +330,11 @@ export class OpenAIResponsesHandler extends BaseHandler {
|
|
|
330
330
|
{ signal: abortSignal, headers: requestHeaders },
|
|
331
331
|
);
|
|
332
332
|
} catch (error) {
|
|
333
|
+
const normalizedBadRequest =
|
|
334
|
+
this.normalizeOpenAICompatibleBadRequest(error);
|
|
335
|
+
if (normalizedBadRequest) {
|
|
336
|
+
throw normalizedBadRequest;
|
|
337
|
+
}
|
|
333
338
|
if (this.config.providerId === "openai-codex") {
|
|
334
339
|
const rawError = error as
|
|
335
340
|
| (Error & {
|
|
@@ -565,23 +570,22 @@ export class OpenAIResponsesHandler extends BaseHandler {
|
|
|
565
570
|
const inputTokens = usage.input_tokens || 0;
|
|
566
571
|
const outputTokens = usage.output_tokens || 0;
|
|
567
572
|
const cacheReadTokens =
|
|
568
|
-
usage.output_tokens_details?.reasoning_tokens || 0;
|
|
569
|
-
const cacheWriteTokens =
|
|
570
573
|
usage.input_tokens_details?.cached_tokens || 0;
|
|
574
|
+
const cacheWriteTokens = 0;
|
|
571
575
|
|
|
572
|
-
const totalCost = this.
|
|
576
|
+
const totalCost = this.calculateCostFromInclusiveInput(
|
|
573
577
|
inputTokens,
|
|
574
578
|
outputTokens,
|
|
575
579
|
cacheReadTokens,
|
|
576
|
-
|
|
577
|
-
const nonCachedInputTokens = Math.max(
|
|
578
|
-
0,
|
|
579
|
-
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
580
|
+
cacheWriteTokens,
|
|
580
581
|
);
|
|
581
582
|
|
|
582
583
|
yield {
|
|
583
584
|
type: "usage",
|
|
584
|
-
inputTokens:
|
|
585
|
+
inputTokens: Math.max(
|
|
586
|
+
0,
|
|
587
|
+
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
588
|
+
),
|
|
585
589
|
outputTokens,
|
|
586
590
|
cacheWriteTokens,
|
|
587
591
|
cacheReadTokens,
|
|
@@ -255,19 +255,21 @@ export class R1BaseHandler extends BaseHandler {
|
|
|
255
255
|
const cacheReadTokens = r1Usage.prompt_cache_hit_tokens ?? 0;
|
|
256
256
|
const cacheWriteTokens = r1Usage.prompt_cache_miss_tokens ?? 0;
|
|
257
257
|
|
|
258
|
-
// Calculate non-cached input tokens (will always be 0 for DeepSeek since input = read + write)
|
|
259
|
-
const nonCachedInputTokens = Math.max(
|
|
260
|
-
0,
|
|
261
|
-
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
262
|
-
);
|
|
263
|
-
|
|
264
258
|
yield {
|
|
265
259
|
type: "usage",
|
|
266
|
-
inputTokens:
|
|
260
|
+
inputTokens: Math.max(
|
|
261
|
+
0,
|
|
262
|
+
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
263
|
+
),
|
|
267
264
|
outputTokens,
|
|
268
265
|
cacheReadTokens,
|
|
269
266
|
cacheWriteTokens,
|
|
270
|
-
totalCost: this.
|
|
267
|
+
totalCost: this.calculateCostFromInclusiveInput(
|
|
268
|
+
inputTokens,
|
|
269
|
+
outputTokens,
|
|
270
|
+
cacheReadTokens,
|
|
271
|
+
cacheWriteTokens,
|
|
272
|
+
),
|
|
271
273
|
id: responseId,
|
|
272
274
|
};
|
|
273
275
|
}
|
|
@@ -189,7 +189,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
189
189
|
if (!isClaudeModel(model.id)) {
|
|
190
190
|
return this.ensureGeminiHandler().getMessages(systemPrompt, messages);
|
|
191
191
|
}
|
|
192
|
-
const supportsPromptCache =
|
|
192
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
193
193
|
return convertToAnthropicMessages(messages, supportsPromptCache);
|
|
194
194
|
}
|
|
195
195
|
|
|
@@ -226,7 +226,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
226
226
|
const budgetTokens = this.config.thinkingBudgetTokens ?? 0;
|
|
227
227
|
const reasoningOn =
|
|
228
228
|
hasModelCapability(model.info, "reasoning") && budgetTokens > 0;
|
|
229
|
-
const promptCacheOn =
|
|
229
|
+
const promptCacheOn = this.supportsPromptCache(model.info);
|
|
230
230
|
|
|
231
231
|
const providerOptions: Record<string, unknown> = {};
|
|
232
232
|
if (reasoningOn) {
|
|
@@ -251,8 +251,18 @@ export class VertexHandler extends BaseHandler {
|
|
|
251
251
|
yield* emitAiSdkStream(stream, {
|
|
252
252
|
responseId,
|
|
253
253
|
errorMessage: "Vertex Anthropic stream failed",
|
|
254
|
-
calculateCost: (
|
|
255
|
-
|
|
254
|
+
calculateCost: (
|
|
255
|
+
inputTokens,
|
|
256
|
+
outputTokens,
|
|
257
|
+
cacheReadTokens,
|
|
258
|
+
cacheWriteTokens,
|
|
259
|
+
) =>
|
|
260
|
+
this.calculateCost(
|
|
261
|
+
inputTokens,
|
|
262
|
+
outputTokens,
|
|
263
|
+
cacheReadTokens,
|
|
264
|
+
cacheWriteTokens,
|
|
265
|
+
),
|
|
256
266
|
reasoningTypes: ["reasoning-delta"],
|
|
257
267
|
enableToolCalls: true,
|
|
258
268
|
toolCallArgsOrder: ["input", "args"],
|
|
@@ -32,12 +32,24 @@ export function convertToAnthropicMessages(
|
|
|
32
32
|
messages: Message[],
|
|
33
33
|
enableCaching = false,
|
|
34
34
|
): AnthropicMessage[] {
|
|
35
|
+
const userMessageIndices = messages.reduce<number[]>(
|
|
36
|
+
(indices, message, index) => {
|
|
37
|
+
if (message.role === "user") {
|
|
38
|
+
indices.push(index);
|
|
39
|
+
}
|
|
40
|
+
return indices;
|
|
41
|
+
},
|
|
42
|
+
[],
|
|
43
|
+
);
|
|
44
|
+
const cacheableMessageIndices = enableCaching
|
|
45
|
+
? new Set(userMessageIndices.slice(-2))
|
|
46
|
+
: new Set<number>();
|
|
35
47
|
const result: AnthropicMessage[] = [];
|
|
36
48
|
|
|
37
|
-
for (const message of messages) {
|
|
49
|
+
for (const [index, message] of messages.entries()) {
|
|
38
50
|
const converted = convertMessage(
|
|
39
51
|
message,
|
|
40
|
-
|
|
52
|
+
cacheableMessageIndices.has(index),
|
|
41
53
|
);
|
|
42
54
|
if (converted) {
|
|
43
55
|
result.push(converted);
|
|
@@ -285,10 +285,33 @@ describe("format conversion", () => {
|
|
|
285
285
|
];
|
|
286
286
|
|
|
287
287
|
const anthropic = convertToAnthropicMessages(messages, true) as any[];
|
|
288
|
+
expect(anthropic[0].content[0].cache_control).toEqual({
|
|
289
|
+
type: "ephemeral",
|
|
290
|
+
});
|
|
288
291
|
expect(anthropic[1].content[0].type).toBe("thinking");
|
|
289
292
|
expect(anthropic[1].content[0].signature).toBe("anthropic-sig");
|
|
290
293
|
});
|
|
291
294
|
|
|
295
|
+
it("applies anthropic cache markers to the last two user messages", () => {
|
|
296
|
+
const messages: Message[] = [
|
|
297
|
+
{ role: "user", content: "first prompt" },
|
|
298
|
+
{ role: "assistant", content: "intermediate response" },
|
|
299
|
+
{ role: "user", content: "second prompt" },
|
|
300
|
+
{ role: "assistant", content: "another response" },
|
|
301
|
+
{ role: "user", content: "third prompt" },
|
|
302
|
+
];
|
|
303
|
+
|
|
304
|
+
const anthropic = convertToAnthropicMessages(messages, true) as any[];
|
|
305
|
+
|
|
306
|
+
expect(anthropic[0].content[0].cache_control).toBeUndefined();
|
|
307
|
+
expect(anthropic[2].content[0].cache_control).toEqual({
|
|
308
|
+
type: "ephemeral",
|
|
309
|
+
});
|
|
310
|
+
expect(anthropic[4].content[0].cache_control).toEqual({
|
|
311
|
+
type: "ephemeral",
|
|
312
|
+
});
|
|
313
|
+
});
|
|
314
|
+
|
|
292
315
|
it("normalizes array-shaped tool_use input for anthropic replay", () => {
|
|
293
316
|
const messages: Message[] = [
|
|
294
317
|
{ role: "user", content: "run these" },
|
|
@@ -172,6 +172,80 @@ function convertContentBlock(
|
|
|
172
172
|
}
|
|
173
173
|
}
|
|
174
174
|
|
|
175
|
+
/**
|
|
176
|
+
* Allowed JSON Schema properties per Gemini's supported subset.
|
|
177
|
+
* See: https://ai.google.dev/gemini-api/docs/structured-output
|
|
178
|
+
*/
|
|
179
|
+
const GEMINI_ALLOWED_PROPERTIES = new Set([
|
|
180
|
+
// Common
|
|
181
|
+
"type",
|
|
182
|
+
"title",
|
|
183
|
+
"description",
|
|
184
|
+
"enum",
|
|
185
|
+
// Object
|
|
186
|
+
"properties",
|
|
187
|
+
"required",
|
|
188
|
+
"additionalProperties",
|
|
189
|
+
// String
|
|
190
|
+
"format",
|
|
191
|
+
// Number / Integer
|
|
192
|
+
"minimum",
|
|
193
|
+
"maximum",
|
|
194
|
+
// Array
|
|
195
|
+
"items",
|
|
196
|
+
"prefixItems",
|
|
197
|
+
"minItems",
|
|
198
|
+
"maxItems",
|
|
199
|
+
]);
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Recursively sanitize a JSON Schema to only include properties supported by Gemini.
|
|
203
|
+
* Converts exclusiveMinimum/exclusiveMaximum to minimum/maximum as a best-effort fallback.
|
|
204
|
+
*/
|
|
205
|
+
function sanitizeSchemaForGemini(schema: unknown): unknown {
|
|
206
|
+
if (!schema || typeof schema !== "object" || Array.isArray(schema)) {
|
|
207
|
+
return schema;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const input = schema as Record<string, unknown>;
|
|
211
|
+
const output: Record<string, unknown> = {};
|
|
212
|
+
|
|
213
|
+
for (const [key, value] of Object.entries(input)) {
|
|
214
|
+
if (!GEMINI_ALLOWED_PROPERTIES.has(key)) {
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (key === "properties" && value && typeof value === "object") {
|
|
219
|
+
const sanitized: Record<string, unknown> = {};
|
|
220
|
+
for (const [propName, propSchema] of Object.entries(
|
|
221
|
+
value as Record<string, unknown>,
|
|
222
|
+
)) {
|
|
223
|
+
sanitized[propName] = sanitizeSchemaForGemini(propSchema);
|
|
224
|
+
}
|
|
225
|
+
output[key] = sanitized;
|
|
226
|
+
} else if (key === "items" || key === "additionalProperties") {
|
|
227
|
+
output[key] =
|
|
228
|
+
typeof value === "object" && value !== null
|
|
229
|
+
? sanitizeSchemaForGemini(value)
|
|
230
|
+
: value;
|
|
231
|
+
} else if (key === "prefixItems" && Array.isArray(value)) {
|
|
232
|
+
output[key] = value.map((item) => sanitizeSchemaForGemini(item));
|
|
233
|
+
} else {
|
|
234
|
+
output[key] = value;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Convert exclusiveMinimum/exclusiveMaximum to minimum/maximum
|
|
239
|
+
if (input.exclusiveMinimum !== undefined && output.minimum === undefined) {
|
|
240
|
+
output.minimum = input.exclusiveMinimum;
|
|
241
|
+
}
|
|
242
|
+
if (input.exclusiveMaximum !== undefined && output.maximum === undefined) {
|
|
243
|
+
output.maximum = input.exclusiveMaximum;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return output;
|
|
247
|
+
}
|
|
248
|
+
|
|
175
249
|
/**
|
|
176
250
|
* Convert tool definitions to Gemini format
|
|
177
251
|
*/
|
|
@@ -181,6 +255,8 @@ export function convertToolsToGemini(
|
|
|
181
255
|
return tools.map((tool) => ({
|
|
182
256
|
name: tool.name,
|
|
183
257
|
description: tool.description,
|
|
184
|
-
parameters:
|
|
258
|
+
parameters: sanitizeSchemaForGemini(
|
|
259
|
+
tool.inputSchema,
|
|
260
|
+
) as FunctionDeclaration["parameters"],
|
|
185
261
|
}));
|
|
186
262
|
}
|
|
@@ -55,7 +55,7 @@ export interface ApiStreamReasoningChunk {
|
|
|
55
55
|
*/
|
|
56
56
|
export interface ApiStreamUsageChunk {
|
|
57
57
|
type: "usage";
|
|
58
|
-
/**
|
|
58
|
+
/** Total number of input tokens reported by the provider */
|
|
59
59
|
inputTokens: number;
|
|
60
60
|
/** Number of output tokens */
|
|
61
61
|
outputTokens: number;
|