@clinebot/llms 0.0.7 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.browser.d.ts +2 -2
- package/dist/index.browser.js +40 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +12 -12
- package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
- package/dist/providers/handlers/base.d.ts +5 -29
- package/dist/providers/transform/openai-format.d.ts +1 -1
- package/dist/providers/types/config.d.ts +6 -0
- package/dist/providers/types/stream.d.ts +1 -1
- package/package.json +2 -1
- package/src/index.browser.ts +2 -2
- package/src/index.ts +2 -2
- package/src/models/providers/vercel-ai-gateway.ts +1 -1
- package/src/providers/handlers/ai-sdk-community.ts +5 -8
- package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
- package/src/providers/handlers/anthropic-base.test.ts +30 -0
- package/src/providers/handlers/anthropic-base.ts +43 -30
- package/src/providers/handlers/base.test.ts +68 -3
- package/src/providers/handlers/base.ts +104 -54
- package/src/providers/handlers/bedrock-base.ts +3 -3
- package/src/providers/handlers/community-sdk.test.ts +33 -0
- package/src/providers/handlers/gemini-base.test.ts +40 -0
- package/src/providers/handlers/gemini-base.ts +22 -20
- package/src/providers/handlers/openai-base.ts +67 -12
- package/src/providers/handlers/openai-responses.test.ts +46 -0
- package/src/providers/handlers/openai-responses.ts +3 -7
- package/src/providers/handlers/r1-base.ts +7 -8
- package/src/providers/handlers/vertex.ts +15 -5
- package/src/providers/transform/anthropic-format.ts +14 -2
- package/src/providers/transform/format-conversion.test.ts +49 -0
- package/src/providers/transform/openai-format.ts +50 -7
- package/src/providers/types/config.ts +8 -0
- package/src/providers/types/stream.ts +1 -1
|
@@ -255,19 +255,18 @@ export class R1BaseHandler extends BaseHandler {
|
|
|
255
255
|
const cacheReadTokens = r1Usage.prompt_cache_hit_tokens ?? 0;
|
|
256
256
|
const cacheWriteTokens = r1Usage.prompt_cache_miss_tokens ?? 0;
|
|
257
257
|
|
|
258
|
-
// Calculate non-cached input tokens (will always be 0 for DeepSeek since input = read + write)
|
|
259
|
-
const nonCachedInputTokens = Math.max(
|
|
260
|
-
0,
|
|
261
|
-
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
262
|
-
);
|
|
263
|
-
|
|
264
258
|
yield {
|
|
265
259
|
type: "usage",
|
|
266
|
-
inputTokens
|
|
260
|
+
inputTokens,
|
|
267
261
|
outputTokens,
|
|
268
262
|
cacheReadTokens,
|
|
269
263
|
cacheWriteTokens,
|
|
270
|
-
totalCost: this.calculateCost(
|
|
264
|
+
totalCost: this.calculateCost(
|
|
265
|
+
inputTokens,
|
|
266
|
+
outputTokens,
|
|
267
|
+
cacheReadTokens,
|
|
268
|
+
cacheWriteTokens,
|
|
269
|
+
),
|
|
271
270
|
id: responseId,
|
|
272
271
|
};
|
|
273
272
|
}
|
|
@@ -189,7 +189,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
189
189
|
if (!isClaudeModel(model.id)) {
|
|
190
190
|
return this.ensureGeminiHandler().getMessages(systemPrompt, messages);
|
|
191
191
|
}
|
|
192
|
-
const supportsPromptCache =
|
|
192
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
193
193
|
return convertToAnthropicMessages(messages, supportsPromptCache);
|
|
194
194
|
}
|
|
195
195
|
|
|
@@ -226,7 +226,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
226
226
|
const budgetTokens = this.config.thinkingBudgetTokens ?? 0;
|
|
227
227
|
const reasoningOn =
|
|
228
228
|
hasModelCapability(model.info, "reasoning") && budgetTokens > 0;
|
|
229
|
-
const promptCacheOn =
|
|
229
|
+
const promptCacheOn = this.supportsPromptCache(model.info);
|
|
230
230
|
|
|
231
231
|
const providerOptions: Record<string, unknown> = {};
|
|
232
232
|
if (reasoningOn) {
|
|
@@ -241,7 +241,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
241
241
|
promptCacheOn,
|
|
242
242
|
}),
|
|
243
243
|
tools: toAiSdkTools(tools),
|
|
244
|
-
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ??
|
|
244
|
+
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
245
245
|
temperature: reasoningOn ? undefined : 0,
|
|
246
246
|
providerOptions:
|
|
247
247
|
Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
|
|
@@ -251,8 +251,18 @@ export class VertexHandler extends BaseHandler {
|
|
|
251
251
|
yield* emitAiSdkStream(stream, {
|
|
252
252
|
responseId,
|
|
253
253
|
errorMessage: "Vertex Anthropic stream failed",
|
|
254
|
-
calculateCost: (
|
|
255
|
-
|
|
254
|
+
calculateCost: (
|
|
255
|
+
inputTokens,
|
|
256
|
+
outputTokens,
|
|
257
|
+
cacheReadTokens,
|
|
258
|
+
cacheWriteTokens,
|
|
259
|
+
) =>
|
|
260
|
+
this.calculateCost(
|
|
261
|
+
inputTokens,
|
|
262
|
+
outputTokens,
|
|
263
|
+
cacheReadTokens,
|
|
264
|
+
cacheWriteTokens,
|
|
265
|
+
),
|
|
256
266
|
reasoningTypes: ["reasoning-delta"],
|
|
257
267
|
enableToolCalls: true,
|
|
258
268
|
toolCallArgsOrder: ["input", "args"],
|
|
@@ -32,12 +32,24 @@ export function convertToAnthropicMessages(
|
|
|
32
32
|
messages: Message[],
|
|
33
33
|
enableCaching = false,
|
|
34
34
|
): AnthropicMessage[] {
|
|
35
|
+
const userMessageIndices = messages.reduce<number[]>(
|
|
36
|
+
(indices, message, index) => {
|
|
37
|
+
if (message.role === "user") {
|
|
38
|
+
indices.push(index);
|
|
39
|
+
}
|
|
40
|
+
return indices;
|
|
41
|
+
},
|
|
42
|
+
[],
|
|
43
|
+
);
|
|
44
|
+
const cacheableMessageIndices = enableCaching
|
|
45
|
+
? new Set(userMessageIndices.slice(-2))
|
|
46
|
+
: new Set<number>();
|
|
35
47
|
const result: AnthropicMessage[] = [];
|
|
36
48
|
|
|
37
|
-
for (const message of messages) {
|
|
49
|
+
for (const [index, message] of messages.entries()) {
|
|
38
50
|
const converted = convertMessage(
|
|
39
51
|
message,
|
|
40
|
-
|
|
52
|
+
cacheableMessageIndices.has(index),
|
|
41
53
|
);
|
|
42
54
|
if (converted) {
|
|
43
55
|
result.push(converted);
|
|
@@ -216,6 +216,32 @@ describe("format conversion", () => {
|
|
|
216
216
|
expect(openai[1].tool_calls[0].extra_content).toBeUndefined();
|
|
217
217
|
});
|
|
218
218
|
|
|
219
|
+
it("applies OpenAI cache markers only to the final user message", () => {
|
|
220
|
+
const messages: Message[] = [
|
|
221
|
+
{ role: "user", content: "first prompt" },
|
|
222
|
+
{ role: "assistant", content: "intermediate response" },
|
|
223
|
+
{ role: "user", content: "second prompt" },
|
|
224
|
+
];
|
|
225
|
+
|
|
226
|
+
const openai = convertToOpenAIMessages(messages, true) as any[];
|
|
227
|
+
expect(openai[0]).toMatchObject({ role: "user", content: "first prompt" });
|
|
228
|
+
expect(openai[2].role).toBe("user");
|
|
229
|
+
expect(openai[2].content).toMatchObject([
|
|
230
|
+
{
|
|
231
|
+
type: "text",
|
|
232
|
+
text: "second prompt",
|
|
233
|
+
cache_control: { type: "ephemeral" },
|
|
234
|
+
},
|
|
235
|
+
]);
|
|
236
|
+
|
|
237
|
+
const cacheMarkerCount = openai
|
|
238
|
+
.flatMap((message) =>
|
|
239
|
+
Array.isArray(message.content) ? message.content : [],
|
|
240
|
+
)
|
|
241
|
+
.filter((part) => part?.cache_control?.type === "ephemeral").length;
|
|
242
|
+
expect(cacheMarkerCount).toBe(1);
|
|
243
|
+
});
|
|
244
|
+
|
|
219
245
|
it("normalizes array-shaped tool_use input for openai replay", () => {
|
|
220
246
|
const messages: Message[] = [
|
|
221
247
|
{ role: "user", content: "run these" },
|
|
@@ -259,10 +285,33 @@ describe("format conversion", () => {
|
|
|
259
285
|
];
|
|
260
286
|
|
|
261
287
|
const anthropic = convertToAnthropicMessages(messages, true) as any[];
|
|
288
|
+
expect(anthropic[0].content[0].cache_control).toEqual({
|
|
289
|
+
type: "ephemeral",
|
|
290
|
+
});
|
|
262
291
|
expect(anthropic[1].content[0].type).toBe("thinking");
|
|
263
292
|
expect(anthropic[1].content[0].signature).toBe("anthropic-sig");
|
|
264
293
|
});
|
|
265
294
|
|
|
295
|
+
it("applies anthropic cache markers to the last two user messages", () => {
|
|
296
|
+
const messages: Message[] = [
|
|
297
|
+
{ role: "user", content: "first prompt" },
|
|
298
|
+
{ role: "assistant", content: "intermediate response" },
|
|
299
|
+
{ role: "user", content: "second prompt" },
|
|
300
|
+
{ role: "assistant", content: "another response" },
|
|
301
|
+
{ role: "user", content: "third prompt" },
|
|
302
|
+
];
|
|
303
|
+
|
|
304
|
+
const anthropic = convertToAnthropicMessages(messages, true) as any[];
|
|
305
|
+
|
|
306
|
+
expect(anthropic[0].content[0].cache_control).toBeUndefined();
|
|
307
|
+
expect(anthropic[2].content[0].cache_control).toEqual({
|
|
308
|
+
type: "ephemeral",
|
|
309
|
+
});
|
|
310
|
+
expect(anthropic[4].content[0].cache_control).toEqual({
|
|
311
|
+
type: "ephemeral",
|
|
312
|
+
});
|
|
313
|
+
});
|
|
314
|
+
|
|
266
315
|
it("normalizes array-shaped tool_use input for anthropic replay", () => {
|
|
267
316
|
const messages: Message[] = [
|
|
268
317
|
{ role: "user", content: "run these" },
|
|
@@ -26,23 +26,49 @@ type OpenAIContentPart = OpenAI.Chat.ChatCompletionContentPart;
|
|
|
26
26
|
/**
|
|
27
27
|
* Convert messages to OpenAI format
|
|
28
28
|
*/
|
|
29
|
-
export function convertToOpenAIMessages(
|
|
30
|
-
|
|
29
|
+
export function convertToOpenAIMessages(
|
|
30
|
+
messages: Message[],
|
|
31
|
+
enableCaching = false,
|
|
32
|
+
): OpenAIMessage[] {
|
|
33
|
+
const lastUserIndex = enableCaching
|
|
34
|
+
? messages.map((m) => m.role).lastIndexOf("user")
|
|
35
|
+
: -1;
|
|
36
|
+
return messages.flatMap((message, index) =>
|
|
37
|
+
convertMessage(message, enableCaching && index === lastUserIndex),
|
|
38
|
+
);
|
|
31
39
|
}
|
|
32
40
|
|
|
33
|
-
function convertMessage(
|
|
41
|
+
function convertMessage(
|
|
42
|
+
message: Message,
|
|
43
|
+
addCacheControl: boolean,
|
|
44
|
+
): OpenAIMessage[] {
|
|
34
45
|
const { role, content } = message;
|
|
35
46
|
|
|
36
47
|
// Simple string content
|
|
37
48
|
if (typeof content === "string") {
|
|
38
|
-
|
|
49
|
+
if (role !== "user" || !addCacheControl) {
|
|
50
|
+
return [{ role, content } as OpenAIMessage];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return [
|
|
54
|
+
{
|
|
55
|
+
role,
|
|
56
|
+
content: [
|
|
57
|
+
{
|
|
58
|
+
type: "text",
|
|
59
|
+
text: content,
|
|
60
|
+
cache_control: { type: "ephemeral" },
|
|
61
|
+
},
|
|
62
|
+
],
|
|
63
|
+
} as unknown as OpenAIMessage,
|
|
64
|
+
];
|
|
39
65
|
}
|
|
40
66
|
|
|
41
67
|
// Array content - need to process blocks
|
|
42
68
|
if (role === "assistant") {
|
|
43
69
|
return [convertAssistantMessage(content)];
|
|
44
70
|
} else {
|
|
45
|
-
return convertUserMessage(content);
|
|
71
|
+
return convertUserMessage(content, addCacheControl);
|
|
46
72
|
}
|
|
47
73
|
}
|
|
48
74
|
|
|
@@ -85,7 +111,10 @@ function convertAssistantMessage(content: ContentBlock[]): OpenAIMessage {
|
|
|
85
111
|
return message;
|
|
86
112
|
}
|
|
87
113
|
|
|
88
|
-
function convertUserMessage(
|
|
114
|
+
function convertUserMessage(
|
|
115
|
+
content: ContentBlock[],
|
|
116
|
+
addCacheControl: boolean,
|
|
117
|
+
): OpenAIMessage[] {
|
|
89
118
|
const messages: OpenAIMessage[] = [];
|
|
90
119
|
|
|
91
120
|
// Convert all tool results to separate tool messages
|
|
@@ -137,10 +166,24 @@ function convertUserMessage(content: ContentBlock[]): OpenAIMessage[] {
|
|
|
137
166
|
return messages;
|
|
138
167
|
}
|
|
139
168
|
|
|
169
|
+
if (addCacheControl) {
|
|
170
|
+
for (let i = parts.length - 1; i >= 0; i--) {
|
|
171
|
+
if (parts[i].type === "text") {
|
|
172
|
+
parts[i] = {
|
|
173
|
+
...(parts[i] as OpenAI.Chat.ChatCompletionContentPartText),
|
|
174
|
+
cache_control: { type: "ephemeral" },
|
|
175
|
+
} as unknown as OpenAIContentPart;
|
|
176
|
+
break;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
140
181
|
messages.push({
|
|
141
182
|
role: "user",
|
|
142
183
|
content:
|
|
143
|
-
parts.length === 1 && parts[0].type === "text"
|
|
184
|
+
parts.length === 1 && parts[0].type === "text" && !addCacheControl
|
|
185
|
+
? parts[0].text
|
|
186
|
+
: (parts as unknown as OpenAI.Chat.ChatCompletionUserMessageParam["content"]),
|
|
144
187
|
});
|
|
145
188
|
|
|
146
189
|
return messages;
|
|
@@ -244,6 +244,11 @@ export interface ProviderOptions {
|
|
|
244
244
|
modelCatalog?: ModelCatalogConfig;
|
|
245
245
|
}
|
|
246
246
|
|
|
247
|
+
/**
|
|
248
|
+
* Provider-specific options that don't fit other categories
|
|
249
|
+
*/
|
|
250
|
+
import type { BasicLogger } from "@clinebot/shared";
|
|
251
|
+
|
|
247
252
|
/**
|
|
248
253
|
* Runtime model catalog refresh options
|
|
249
254
|
*/
|
|
@@ -299,6 +304,9 @@ export interface ProviderConfig
|
|
|
299
304
|
/** AbortSignal for cancelling requests */
|
|
300
305
|
abortSignal?: AbortSignal;
|
|
301
306
|
|
|
307
|
+
/** Optional runtime logger for provider-level diagnostics */
|
|
308
|
+
logger?: BasicLogger;
|
|
309
|
+
|
|
302
310
|
/** Codex CLI-specific options */
|
|
303
311
|
codex?: CodexConfig;
|
|
304
312
|
|
|
@@ -55,7 +55,7 @@ export interface ApiStreamReasoningChunk {
|
|
|
55
55
|
*/
|
|
56
56
|
export interface ApiStreamUsageChunk {
|
|
57
57
|
type: "usage";
|
|
58
|
-
/**
|
|
58
|
+
/** Total number of input tokens reported by the provider */
|
|
59
59
|
inputTokens: number;
|
|
60
60
|
/** Number of output tokens */
|
|
61
61
|
outputTokens: number;
|