@clinebot/llms 0.0.7 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/index.browser.d.ts +2 -2
  2. package/dist/index.browser.js +40 -1
  3. package/dist/index.d.ts +2 -2
  4. package/dist/index.js +12 -12
  5. package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
  6. package/dist/providers/handlers/base.d.ts +5 -29
  7. package/dist/providers/transform/openai-format.d.ts +1 -1
  8. package/dist/providers/types/config.d.ts +6 -0
  9. package/dist/providers/types/stream.d.ts +1 -1
  10. package/package.json +2 -1
  11. package/src/index.browser.ts +2 -2
  12. package/src/index.ts +2 -2
  13. package/src/models/providers/vercel-ai-gateway.ts +1 -1
  14. package/src/providers/handlers/ai-sdk-community.ts +5 -8
  15. package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
  16. package/src/providers/handlers/anthropic-base.test.ts +30 -0
  17. package/src/providers/handlers/anthropic-base.ts +43 -30
  18. package/src/providers/handlers/base.test.ts +68 -3
  19. package/src/providers/handlers/base.ts +104 -54
  20. package/src/providers/handlers/bedrock-base.ts +3 -3
  21. package/src/providers/handlers/community-sdk.test.ts +33 -0
  22. package/src/providers/handlers/gemini-base.test.ts +40 -0
  23. package/src/providers/handlers/gemini-base.ts +22 -20
  24. package/src/providers/handlers/openai-base.ts +67 -12
  25. package/src/providers/handlers/openai-responses.test.ts +46 -0
  26. package/src/providers/handlers/openai-responses.ts +3 -7
  27. package/src/providers/handlers/r1-base.ts +7 -8
  28. package/src/providers/handlers/vertex.ts +15 -5
  29. package/src/providers/transform/anthropic-format.ts +14 -2
  30. package/src/providers/transform/format-conversion.test.ts +49 -0
  31. package/src/providers/transform/openai-format.ts +50 -7
  32. package/src/providers/types/config.ts +8 -0
  33. package/src/providers/types/stream.ts +1 -1
@@ -255,19 +255,18 @@ export class R1BaseHandler extends BaseHandler {
255
255
  const cacheReadTokens = r1Usage.prompt_cache_hit_tokens ?? 0;
256
256
  const cacheWriteTokens = r1Usage.prompt_cache_miss_tokens ?? 0;
257
257
 
258
- // Calculate non-cached input tokens (will always be 0 for DeepSeek since input = read + write)
259
- const nonCachedInputTokens = Math.max(
260
- 0,
261
- inputTokens - cacheReadTokens - cacheWriteTokens,
262
- );
263
-
264
258
  yield {
265
259
  type: "usage",
266
- inputTokens: nonCachedInputTokens,
260
+ inputTokens,
267
261
  outputTokens,
268
262
  cacheReadTokens,
269
263
  cacheWriteTokens,
270
- totalCost: this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
264
+ totalCost: this.calculateCost(
265
+ inputTokens,
266
+ outputTokens,
267
+ cacheReadTokens,
268
+ cacheWriteTokens,
269
+ ),
271
270
  id: responseId,
272
271
  };
273
272
  }
@@ -189,7 +189,7 @@ export class VertexHandler extends BaseHandler {
189
189
  if (!isClaudeModel(model.id)) {
190
190
  return this.ensureGeminiHandler().getMessages(systemPrompt, messages);
191
191
  }
192
- const supportsPromptCache = hasModelCapability(model.info, "prompt-cache");
192
+ const supportsPromptCache = this.supportsPromptCache(model.info);
193
193
  return convertToAnthropicMessages(messages, supportsPromptCache);
194
194
  }
195
195
 
@@ -226,7 +226,7 @@ export class VertexHandler extends BaseHandler {
226
226
  const budgetTokens = this.config.thinkingBudgetTokens ?? 0;
227
227
  const reasoningOn =
228
228
  hasModelCapability(model.info, "reasoning") && budgetTokens > 0;
229
- const promptCacheOn = hasModelCapability(model.info, "prompt-cache");
229
+ const promptCacheOn = this.supportsPromptCache(model.info);
230
230
 
231
231
  const providerOptions: Record<string, unknown> = {};
232
232
  if (reasoningOn) {
@@ -241,7 +241,7 @@ export class VertexHandler extends BaseHandler {
241
241
  promptCacheOn,
242
242
  }),
243
243
  tools: toAiSdkTools(tools),
244
- maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 8192,
244
+ maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
245
245
  temperature: reasoningOn ? undefined : 0,
246
246
  providerOptions:
247
247
  Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
@@ -251,8 +251,18 @@ export class VertexHandler extends BaseHandler {
251
251
  yield* emitAiSdkStream(stream, {
252
252
  responseId,
253
253
  errorMessage: "Vertex Anthropic stream failed",
254
- calculateCost: (inputTokens, outputTokens, cacheReadTokens) =>
255
- this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
254
+ calculateCost: (
255
+ inputTokens,
256
+ outputTokens,
257
+ cacheReadTokens,
258
+ cacheWriteTokens,
259
+ ) =>
260
+ this.calculateCost(
261
+ inputTokens,
262
+ outputTokens,
263
+ cacheReadTokens,
264
+ cacheWriteTokens,
265
+ ),
256
266
  reasoningTypes: ["reasoning-delta"],
257
267
  enableToolCalls: true,
258
268
  toolCallArgsOrder: ["input", "args"],
@@ -32,12 +32,24 @@ export function convertToAnthropicMessages(
32
32
  messages: Message[],
33
33
  enableCaching = false,
34
34
  ): AnthropicMessage[] {
35
+ const userMessageIndices = messages.reduce<number[]>(
36
+ (indices, message, index) => {
37
+ if (message.role === "user") {
38
+ indices.push(index);
39
+ }
40
+ return indices;
41
+ },
42
+ [],
43
+ );
44
+ const cacheableMessageIndices = enableCaching
45
+ ? new Set(userMessageIndices.slice(-2))
46
+ : new Set<number>();
35
47
  const result: AnthropicMessage[] = [];
36
48
 
37
- for (const message of messages) {
49
+ for (const [index, message] of messages.entries()) {
38
50
  const converted = convertMessage(
39
51
  message,
40
- enableCaching && messages.indexOf(message) === messages.length - 1,
52
+ cacheableMessageIndices.has(index),
41
53
  );
42
54
  if (converted) {
43
55
  result.push(converted);
@@ -216,6 +216,32 @@ describe("format conversion", () => {
216
216
  expect(openai[1].tool_calls[0].extra_content).toBeUndefined();
217
217
  });
218
218
 
219
+ it("applies OpenAI cache markers only to the final user message", () => {
220
+ const messages: Message[] = [
221
+ { role: "user", content: "first prompt" },
222
+ { role: "assistant", content: "intermediate response" },
223
+ { role: "user", content: "second prompt" },
224
+ ];
225
+
226
+ const openai = convertToOpenAIMessages(messages, true) as any[];
227
+ expect(openai[0]).toMatchObject({ role: "user", content: "first prompt" });
228
+ expect(openai[2].role).toBe("user");
229
+ expect(openai[2].content).toMatchObject([
230
+ {
231
+ type: "text",
232
+ text: "second prompt",
233
+ cache_control: { type: "ephemeral" },
234
+ },
235
+ ]);
236
+
237
+ const cacheMarkerCount = openai
238
+ .flatMap((message) =>
239
+ Array.isArray(message.content) ? message.content : [],
240
+ )
241
+ .filter((part) => part?.cache_control?.type === "ephemeral").length;
242
+ expect(cacheMarkerCount).toBe(1);
243
+ });
244
+
219
245
  it("normalizes array-shaped tool_use input for openai replay", () => {
220
246
  const messages: Message[] = [
221
247
  { role: "user", content: "run these" },
@@ -259,10 +285,33 @@ describe("format conversion", () => {
259
285
  ];
260
286
 
261
287
  const anthropic = convertToAnthropicMessages(messages, true) as any[];
288
+ expect(anthropic[0].content[0].cache_control).toEqual({
289
+ type: "ephemeral",
290
+ });
262
291
  expect(anthropic[1].content[0].type).toBe("thinking");
263
292
  expect(anthropic[1].content[0].signature).toBe("anthropic-sig");
264
293
  });
265
294
 
295
+ it("applies anthropic cache markers to the last two user messages", () => {
296
+ const messages: Message[] = [
297
+ { role: "user", content: "first prompt" },
298
+ { role: "assistant", content: "intermediate response" },
299
+ { role: "user", content: "second prompt" },
300
+ { role: "assistant", content: "another response" },
301
+ { role: "user", content: "third prompt" },
302
+ ];
303
+
304
+ const anthropic = convertToAnthropicMessages(messages, true) as any[];
305
+
306
+ expect(anthropic[0].content[0].cache_control).toBeUndefined();
307
+ expect(anthropic[2].content[0].cache_control).toEqual({
308
+ type: "ephemeral",
309
+ });
310
+ expect(anthropic[4].content[0].cache_control).toEqual({
311
+ type: "ephemeral",
312
+ });
313
+ });
314
+
266
315
  it("normalizes array-shaped tool_use input for anthropic replay", () => {
267
316
  const messages: Message[] = [
268
317
  { role: "user", content: "run these" },
@@ -26,23 +26,49 @@ type OpenAIContentPart = OpenAI.Chat.ChatCompletionContentPart;
26
26
  /**
27
27
  * Convert messages to OpenAI format
28
28
  */
29
- export function convertToOpenAIMessages(messages: Message[]): OpenAIMessage[] {
30
- return messages.flatMap(convertMessage);
29
+ export function convertToOpenAIMessages(
30
+ messages: Message[],
31
+ enableCaching = false,
32
+ ): OpenAIMessage[] {
33
+ const lastUserIndex = enableCaching
34
+ ? messages.map((m) => m.role).lastIndexOf("user")
35
+ : -1;
36
+ return messages.flatMap((message, index) =>
37
+ convertMessage(message, enableCaching && index === lastUserIndex),
38
+ );
31
39
  }
32
40
 
33
- function convertMessage(message: Message): OpenAIMessage[] {
41
+ function convertMessage(
42
+ message: Message,
43
+ addCacheControl: boolean,
44
+ ): OpenAIMessage[] {
34
45
  const { role, content } = message;
35
46
 
36
47
  // Simple string content
37
48
  if (typeof content === "string") {
38
- return [{ role, content } as OpenAIMessage];
49
+ if (role !== "user" || !addCacheControl) {
50
+ return [{ role, content } as OpenAIMessage];
51
+ }
52
+
53
+ return [
54
+ {
55
+ role,
56
+ content: [
57
+ {
58
+ type: "text",
59
+ text: content,
60
+ cache_control: { type: "ephemeral" },
61
+ },
62
+ ],
63
+ } as unknown as OpenAIMessage,
64
+ ];
39
65
  }
40
66
 
41
67
  // Array content - need to process blocks
42
68
  if (role === "assistant") {
43
69
  return [convertAssistantMessage(content)];
44
70
  } else {
45
- return convertUserMessage(content);
71
+ return convertUserMessage(content, addCacheControl);
46
72
  }
47
73
  }
48
74
 
@@ -85,7 +111,10 @@ function convertAssistantMessage(content: ContentBlock[]): OpenAIMessage {
85
111
  return message;
86
112
  }
87
113
 
88
- function convertUserMessage(content: ContentBlock[]): OpenAIMessage[] {
114
+ function convertUserMessage(
115
+ content: ContentBlock[],
116
+ addCacheControl: boolean,
117
+ ): OpenAIMessage[] {
89
118
  const messages: OpenAIMessage[] = [];
90
119
 
91
120
  // Convert all tool results to separate tool messages
@@ -137,10 +166,24 @@ function convertUserMessage(content: ContentBlock[]): OpenAIMessage[] {
137
166
  return messages;
138
167
  }
139
168
 
169
+ if (addCacheControl) {
170
+ for (let i = parts.length - 1; i >= 0; i--) {
171
+ if (parts[i].type === "text") {
172
+ parts[i] = {
173
+ ...(parts[i] as OpenAI.Chat.ChatCompletionContentPartText),
174
+ cache_control: { type: "ephemeral" },
175
+ } as unknown as OpenAIContentPart;
176
+ break;
177
+ }
178
+ }
179
+ }
180
+
140
181
  messages.push({
141
182
  role: "user",
142
183
  content:
143
- parts.length === 1 && parts[0].type === "text" ? parts[0].text : parts,
184
+ parts.length === 1 && parts[0].type === "text" && !addCacheControl
185
+ ? parts[0].text
186
+ : (parts as unknown as OpenAI.Chat.ChatCompletionUserMessageParam["content"]),
144
187
  });
145
188
 
146
189
  return messages;
@@ -244,6 +244,11 @@ export interface ProviderOptions {
244
244
  modelCatalog?: ModelCatalogConfig;
245
245
  }
246
246
 
247
+ /**
248
+ * Provider-specific options that don't fit other categories
249
+ */
250
+ import type { BasicLogger } from "@clinebot/shared";
251
+
247
252
  /**
248
253
  * Runtime model catalog refresh options
249
254
  */
@@ -299,6 +304,9 @@ export interface ProviderConfig
299
304
  /** AbortSignal for cancelling requests */
300
305
  abortSignal?: AbortSignal;
301
306
 
307
+ /** Optional runtime logger for provider-level diagnostics */
308
+ logger?: BasicLogger;
309
+
302
310
  /** Codex CLI-specific options */
303
311
  codex?: CodexConfig;
304
312
 
@@ -55,7 +55,7 @@ export interface ApiStreamReasoningChunk {
55
55
  */
56
56
  export interface ApiStreamUsageChunk {
57
57
  type: "usage";
58
- /** Number of input tokens (excluding cached) */
58
+ /** Total number of input tokens reported by the provider */
59
59
  inputTokens: number;
60
60
  /** Number of output tokens */
61
61
  outputTokens: number;