@clinebot/llms 0.0.6 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/index.browser.d.ts +2 -2
  2. package/dist/index.browser.js +40 -1
  3. package/dist/index.d.ts +2 -2
  4. package/dist/index.js +12 -12
  5. package/dist/providers/handlers/anthropic-base.d.ts +11 -1
  6. package/dist/providers/handlers/base.d.ts +2 -27
  7. package/dist/providers/transform/openai-format.d.ts +1 -1
  8. package/dist/providers/types/config.d.ts +6 -0
  9. package/dist/providers/types/messages.d.ts +2 -0
  10. package/dist/providers/utils/tool-processor.d.ts +2 -1
  11. package/package.json +2 -1
  12. package/src/index.browser.ts +2 -2
  13. package/src/index.ts +2 -2
  14. package/src/models/generated.ts +15 -1
  15. package/src/models/providers/openai-codex.ts +19 -3
  16. package/src/models/providers/vercel-ai-gateway.ts +1 -1
  17. package/src/providers/handlers/anthropic-base.ts +19 -6
  18. package/src/providers/handlers/base.test.ts +60 -1
  19. package/src/providers/handlers/base.ts +83 -54
  20. package/src/providers/handlers/bedrock-base.ts +1 -1
  21. package/src/providers/handlers/codex.test.ts +37 -0
  22. package/src/providers/handlers/community-sdk.ts +0 -1
  23. package/src/providers/handlers/gemini-base.test.ts +40 -0
  24. package/src/providers/handlers/gemini-base.ts +16 -1
  25. package/src/providers/handlers/openai-base.ts +55 -11
  26. package/src/providers/handlers/vertex.ts +1 -1
  27. package/src/providers/transform/format-conversion.test.ts +26 -0
  28. package/src/providers/transform/openai-format.ts +50 -7
  29. package/src/providers/types/config.ts +8 -0
  30. package/src/providers/types/messages.ts +2 -0
  31. package/src/providers/utils/tool-processor.test.ts +60 -0
  32. package/src/providers/utils/tool-processor.ts +37 -2
@@ -218,4 +218,44 @@ describe("GeminiHandler", () => {
218
218
  expect(secondId).toBeTruthy();
219
219
  expect(firstId).not.toBe(secondId);
220
220
  });
221
+
222
+ it("defaults maxOutputTokens to 8192 for gemini-3-flash when no model or config limit is provided", async () => {
223
+ generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
224
+
225
+ const handler = new GeminiHandler({
226
+ providerId: "gemini",
227
+ modelId: "gemini-3-flash",
228
+ apiKey: "test-key",
229
+ });
230
+
231
+ await collectChunks(
232
+ handler.createMessage("System", [{ role: "user", content: "go" }]),
233
+ );
234
+
235
+ expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
236
+ const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
237
+ config?: { maxOutputTokens?: number };
238
+ };
239
+ expect(request.config?.maxOutputTokens).toBe(8192);
240
+ });
241
+
242
+ it("defaults maxOutputTokens to 128000 for non gemini-3-flash models when no model or config limit is provided", async () => {
243
+ generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
244
+
245
+ const handler = new GeminiHandler({
246
+ providerId: "gemini",
247
+ modelId: "gemini-2.5-flash",
248
+ apiKey: "test-key",
249
+ });
250
+
251
+ await collectChunks(
252
+ handler.createMessage("System", [{ role: "user", content: "go" }]),
253
+ );
254
+
255
+ expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
256
+ const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
257
+ config?: { maxOutputTokens?: number };
258
+ };
259
+ expect(request.config?.maxOutputTokens).toBe(128000);
260
+ });
221
261
  });
@@ -27,6 +27,16 @@ import { RetriableError, retryStream } from "../utils/retry";
27
27
  import { BaseHandler } from "./base";
28
28
 
29
29
  const DEFAULT_THINKING_BUDGET_TOKENS = 1024;
30
+ const DEFAULT_MAX_OUTPUT_TOKENS = 128_000;
31
+ const GEMINI_3_FLASH_MAX_OUTPUT_TOKENS = 8192;
32
+
33
+ function isGemini3FlashModel(modelId: string): boolean {
34
+ const normalized = modelId.toLowerCase();
35
+ return (
36
+ normalized.includes("gemini-3-flash") ||
37
+ normalized.includes("gemini-3.0-flash")
38
+ );
39
+ }
30
40
 
31
41
  /**
32
42
  * Handler for Google's Gemini API
@@ -131,6 +141,11 @@ export class GeminiHandler extends BaseHandler {
131
141
  }
132
142
 
133
143
  // Build request config with abort signal
144
+ const fallbackMaxOutputTokens = isGemini3FlashModel(modelId)
145
+ ? GEMINI_3_FLASH_MAX_OUTPUT_TOKENS
146
+ : DEFAULT_MAX_OUTPUT_TOKENS;
147
+ const maxOutputTokens =
148
+ info.maxTokens ?? this.config.maxOutputTokens ?? fallbackMaxOutputTokens;
134
149
  const requestConfig: GenerateContentConfig = {
135
150
  httpOptions: this.config.baseUrl
136
151
  ? { baseUrl: this.config.baseUrl, headers: this.getRequestHeaders() }
@@ -138,7 +153,7 @@ export class GeminiHandler extends BaseHandler {
138
153
  abortSignal,
139
154
  systemInstruction: systemPrompt,
140
155
  temperature: info.temperature ?? 1,
141
- maxOutputTokens: info.maxTokens ?? this.config.maxOutputTokens,
156
+ maxOutputTokens,
142
157
  };
143
158
 
144
159
  // Add thinking config only when explicitly requested and supported.
@@ -22,6 +22,7 @@ import type {
22
22
  ModelInfo,
23
23
  ProviderConfig,
24
24
  } from "../types";
25
+ import { hasModelCapability } from "../types";
25
26
  import type { Message, ToolDefinition } from "../types/messages";
26
27
  import { retryStream } from "../utils/retry";
27
28
  import { ToolCallProcessor } from "../utils/tool-processor";
@@ -106,9 +107,26 @@ export class OpenAIBaseHandler extends BaseHandler {
106
107
  systemPrompt: string,
107
108
  messages: Message[],
108
109
  ): OpenAI.Chat.ChatCompletionMessageParam[] {
110
+ const model = this.getModel();
111
+ const supportsPromptCache =
112
+ hasModelCapability(model.info, "prompt-cache") ||
113
+ this.config.capabilities?.includes("prompt-cache") === true;
114
+ const systemMessage = supportsPromptCache
115
+ ? ({
116
+ role: "system",
117
+ content: [
118
+ {
119
+ type: "text",
120
+ text: systemPrompt,
121
+ cache_control: { type: "ephemeral" },
122
+ },
123
+ ],
124
+ } as unknown as OpenAI.Chat.ChatCompletionMessageParam)
125
+ : { role: "system" as const, content: systemPrompt };
126
+
109
127
  return [
110
- { role: "system", content: systemPrompt },
111
- ...convertToOpenAIMessages(messages),
128
+ systemMessage,
129
+ ...convertToOpenAIMessages(messages, supportsPromptCache),
112
130
  ];
113
131
  }
114
132
 
@@ -171,7 +189,11 @@ export class OpenAIBaseHandler extends BaseHandler {
171
189
  this.config.reasoningEffort ??
172
190
  (this.config.thinking ? DEFAULT_REASONING_EFFORT : undefined);
173
191
  if (supportsReasoningEffort && effectiveReasoningEffort) {
174
- (requestOptions as any).reasoning_effort = effectiveReasoningEffort;
192
+ (
193
+ requestOptions as OpenAI.ChatCompletionCreateParamsStreaming & {
194
+ reasoning_effort?: string;
195
+ }
196
+ ).reasoning_effort = effectiveReasoningEffort;
175
197
  }
176
198
 
177
199
  const requestHeaders = this.getRequestHeaders();
@@ -191,16 +213,25 @@ export class OpenAIBaseHandler extends BaseHandler {
191
213
  headers: requestHeaders,
192
214
  });
193
215
  const toolCallProcessor = new ToolCallProcessor();
216
+ let finishReason: string | null = null;
194
217
 
195
218
  for await (const chunk of stream) {
219
+ const choice = chunk.choices?.[0];
220
+ if (choice?.finish_reason) {
221
+ finishReason = choice.finish_reason;
222
+ }
196
223
  yield* this.withResponseIdForAll(
197
224
  this.processChunk(chunk, toolCallProcessor, modelInfo, responseId),
198
225
  responseId,
199
226
  );
200
227
  }
201
228
 
202
- // Yield done chunk to indicate streaming completed successfully
203
- yield { type: "done", success: true, id: responseId };
229
+ yield {
230
+ type: "done",
231
+ success: true,
232
+ id: responseId,
233
+ incompleteReason: finishReason === "length" ? "max_tokens" : undefined,
234
+ };
204
235
  }
205
236
 
206
237
  /**
@@ -213,9 +244,11 @@ export class OpenAIBaseHandler extends BaseHandler {
213
244
  _modelInfo: ModelInfo,
214
245
  responseId: string,
215
246
  ): Generator<import("../types").ApiStreamChunk> {
216
- const delta = chunk.choices?.[0]?.delta && {
217
- ...chunk.choices[0].delta,
218
- reasoning_content: (chunk.choices[0].delta as any).reasoning_content,
247
+ const rawDelta = chunk.choices?.[0]?.delta;
248
+ const delta = rawDelta && {
249
+ ...rawDelta,
250
+ reasoning_content: (rawDelta as { reasoning_content?: string })
251
+ .reasoning_content,
219
252
  };
220
253
 
221
254
  // Handle text content
@@ -227,7 +260,7 @@ export class OpenAIBaseHandler extends BaseHandler {
227
260
  if (delta?.reasoning_content) {
228
261
  yield {
229
262
  type: "reasoning",
230
- reasoning: (delta as any).reasoning_content,
263
+ reasoning: delta.reasoning_content,
231
264
  id: responseId,
232
265
  };
233
266
  }
@@ -248,10 +281,21 @@ export class OpenAIBaseHandler extends BaseHandler {
248
281
  if (chunk.usage) {
249
282
  const inputTokens = chunk.usage.prompt_tokens ?? 0;
250
283
  const outputTokens = chunk.usage.completion_tokens ?? 0;
284
+ const usageWithCache = chunk.usage as typeof chunk.usage & {
285
+ prompt_tokens_details?: {
286
+ cached_tokens?: number;
287
+ cache_write_tokens?: number;
288
+ };
289
+ prompt_cache_miss_tokens?: number;
290
+ cache_creation_input_tokens?: number;
291
+ cache_read_input_tokens?: number;
292
+ };
251
293
  const cacheReadTokens =
252
- (chunk.usage as any).prompt_tokens_details?.cached_tokens ?? 0;
294
+ usageWithCache.prompt_tokens_details?.cached_tokens ?? 0;
253
295
  const cacheWriteTokens =
254
- (chunk.usage as any).prompt_cache_miss_tokens ?? 0;
296
+ usageWithCache.prompt_tokens_details?.cache_write_tokens ??
297
+ usageWithCache.prompt_cache_miss_tokens ??
298
+ 0;
255
299
 
256
300
  yield {
257
301
  type: "usage",
@@ -241,7 +241,7 @@ export class VertexHandler extends BaseHandler {
241
241
  promptCacheOn,
242
242
  }),
243
243
  tools: toAiSdkTools(tools),
244
- maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 8192,
244
+ maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
245
245
  temperature: reasoningOn ? undefined : 0,
246
246
  providerOptions:
247
247
  Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
@@ -216,6 +216,32 @@ describe("format conversion", () => {
216
216
  expect(openai[1].tool_calls[0].extra_content).toBeUndefined();
217
217
  });
218
218
 
219
+ it("applies OpenAI cache markers only to the final user message", () => {
220
+ const messages: Message[] = [
221
+ { role: "user", content: "first prompt" },
222
+ { role: "assistant", content: "intermediate response" },
223
+ { role: "user", content: "second prompt" },
224
+ ];
225
+
226
+ const openai = convertToOpenAIMessages(messages, true) as any[];
227
+ expect(openai[0]).toMatchObject({ role: "user", content: "first prompt" });
228
+ expect(openai[2].role).toBe("user");
229
+ expect(openai[2].content).toMatchObject([
230
+ {
231
+ type: "text",
232
+ text: "second prompt",
233
+ cache_control: { type: "ephemeral" },
234
+ },
235
+ ]);
236
+
237
+ const cacheMarkerCount = openai
238
+ .flatMap((message) =>
239
+ Array.isArray(message.content) ? message.content : [],
240
+ )
241
+ .filter((part) => part?.cache_control?.type === "ephemeral").length;
242
+ expect(cacheMarkerCount).toBe(1);
243
+ });
244
+
219
245
  it("normalizes array-shaped tool_use input for openai replay", () => {
220
246
  const messages: Message[] = [
221
247
  { role: "user", content: "run these" },
@@ -26,23 +26,49 @@ type OpenAIContentPart = OpenAI.Chat.ChatCompletionContentPart;
26
26
  /**
27
27
  * Convert messages to OpenAI format
28
28
  */
29
- export function convertToOpenAIMessages(messages: Message[]): OpenAIMessage[] {
30
- return messages.flatMap(convertMessage);
29
+ export function convertToOpenAIMessages(
30
+ messages: Message[],
31
+ enableCaching = false,
32
+ ): OpenAIMessage[] {
33
+ const lastUserIndex = enableCaching
34
+ ? messages.map((m) => m.role).lastIndexOf("user")
35
+ : -1;
36
+ return messages.flatMap((message, index) =>
37
+ convertMessage(message, enableCaching && index === lastUserIndex),
38
+ );
31
39
  }
32
40
 
33
- function convertMessage(message: Message): OpenAIMessage[] {
41
+ function convertMessage(
42
+ message: Message,
43
+ addCacheControl: boolean,
44
+ ): OpenAIMessage[] {
34
45
  const { role, content } = message;
35
46
 
36
47
  // Simple string content
37
48
  if (typeof content === "string") {
38
- return [{ role, content } as OpenAIMessage];
49
+ if (role !== "user" || !addCacheControl) {
50
+ return [{ role, content } as OpenAIMessage];
51
+ }
52
+
53
+ return [
54
+ {
55
+ role,
56
+ content: [
57
+ {
58
+ type: "text",
59
+ text: content,
60
+ cache_control: { type: "ephemeral" },
61
+ },
62
+ ],
63
+ } as unknown as OpenAIMessage,
64
+ ];
39
65
  }
40
66
 
41
67
  // Array content - need to process blocks
42
68
  if (role === "assistant") {
43
69
  return [convertAssistantMessage(content)];
44
70
  } else {
45
- return convertUserMessage(content);
71
+ return convertUserMessage(content, addCacheControl);
46
72
  }
47
73
  }
48
74
 
@@ -85,7 +111,10 @@ function convertAssistantMessage(content: ContentBlock[]): OpenAIMessage {
85
111
  return message;
86
112
  }
87
113
 
88
- function convertUserMessage(content: ContentBlock[]): OpenAIMessage[] {
114
+ function convertUserMessage(
115
+ content: ContentBlock[],
116
+ addCacheControl: boolean,
117
+ ): OpenAIMessage[] {
89
118
  const messages: OpenAIMessage[] = [];
90
119
 
91
120
  // Convert all tool results to separate tool messages
@@ -137,10 +166,24 @@ function convertUserMessage(content: ContentBlock[]): OpenAIMessage[] {
137
166
  return messages;
138
167
  }
139
168
 
169
+ if (addCacheControl) {
170
+ for (let i = parts.length - 1; i >= 0; i--) {
171
+ if (parts[i].type === "text") {
172
+ parts[i] = {
173
+ ...(parts[i] as OpenAI.Chat.ChatCompletionContentPartText),
174
+ cache_control: { type: "ephemeral" },
175
+ } as unknown as OpenAIContentPart;
176
+ break;
177
+ }
178
+ }
179
+ }
180
+
140
181
  messages.push({
141
182
  role: "user",
142
183
  content:
143
- parts.length === 1 && parts[0].type === "text" ? parts[0].text : parts,
184
+ parts.length === 1 && parts[0].type === "text" && !addCacheControl
185
+ ? parts[0].text
186
+ : (parts as unknown as OpenAI.Chat.ChatCompletionUserMessageParam["content"]),
144
187
  });
145
188
 
146
189
  return messages;
@@ -244,6 +244,11 @@ export interface ProviderOptions {
244
244
  modelCatalog?: ModelCatalogConfig;
245
245
  }
246
246
 
247
+ /**
248
+ * Provider-specific options that don't fit other categories
249
+ */
250
+ import type { BasicLogger } from "@clinebot/shared";
251
+
247
252
  /**
248
253
  * Runtime model catalog refresh options
249
254
  */
@@ -299,6 +304,9 @@ export interface ProviderConfig
299
304
  /** AbortSignal for cancelling requests */
300
305
  abortSignal?: AbortSignal;
301
306
 
307
+ /** Optional runtime logger for provider-level diagnostics */
308
+ logger?: BasicLogger;
309
+
302
310
  /** Codex CLI-specific options */
303
311
  codex?: CodexConfig;
304
312
 
@@ -121,6 +121,8 @@ export interface Message {
121
121
  export interface MessageWithMetadata extends Message {
122
122
  /** Unique message ID */
123
123
  id?: string;
124
+ /** Additional message metadata for storage/history consumers */
125
+ metadata?: Record<string, unknown>;
124
126
  /** Provider ID used to generate this message */
125
127
  providerId?: string;
126
128
  /** Model ID used to generate this message */
@@ -33,6 +33,66 @@ describe("ToolCallProcessor", () => {
33
33
  expect(second[0].tool_call.function.arguments).toBe(' -la"]}');
34
34
  });
35
35
 
36
+ it("normalizes cumulative argument snapshots into deltas", () => {
37
+ const processor = new ToolCallProcessor();
38
+
39
+ const first = processor.processToolCallDeltas(
40
+ [
41
+ {
42
+ index: 0,
43
+ id: "call_1",
44
+ function: { name: "editor", arguments: '{"command":"create"' },
45
+ },
46
+ ],
47
+ "resp_1",
48
+ );
49
+
50
+ const second = processor.processToolCallDeltas(
51
+ [
52
+ {
53
+ index: 0,
54
+ function: {
55
+ arguments: '{"command":"create","path":"/tmp/file.txt"}',
56
+ },
57
+ },
58
+ ],
59
+ "resp_1",
60
+ );
61
+
62
+ expect(first).toHaveLength(1);
63
+ expect(second).toHaveLength(1);
64
+ expect(first[0].tool_call.function.arguments).toBe('{"command":"create"');
65
+ expect(second[0].tool_call.function.arguments).toBe(
66
+ ',"path":"/tmp/file.txt"}',
67
+ );
68
+ });
69
+
70
+ it("serializes object-shaped arguments instead of concatenating [object Object]", () => {
71
+ const processor = new ToolCallProcessor();
72
+
73
+ const result = processor.processToolCallDeltas(
74
+ [
75
+ {
76
+ index: 0,
77
+ id: "call_1",
78
+ function: {
79
+ name: "editor",
80
+ arguments: {
81
+ command: "create",
82
+ path: "/tmp/file.txt",
83
+ },
84
+ },
85
+ },
86
+ ],
87
+ "resp_1",
88
+ );
89
+
90
+ expect(result).toHaveLength(1);
91
+ expect(result[0].tool_call.function.arguments).toBe(
92
+ '{"command":"create","path":"/tmp/file.txt"}',
93
+ );
94
+ });
95
+
36
96
  it("preserves tool call id/name for interleaved parallel deltas", () => {
37
97
  const processor = new ToolCallProcessor();
38
98
 
@@ -18,7 +18,7 @@ interface ToolCallDelta {
18
18
  id?: string;
19
19
  function?: {
20
20
  name?: string;
21
- arguments?: string;
21
+ arguments?: unknown;
22
22
  };
23
23
  }
24
24
 
@@ -61,7 +61,11 @@ export class ToolCallProcessor {
61
61
  if (fn?.name) {
62
62
  toolCall.name = fn.name;
63
63
  }
64
- const deltaArguments = fn?.arguments ?? "";
64
+ const rawArguments = fn?.arguments;
65
+ const deltaArguments = this.normalizeArgumentsDelta(
66
+ toolCall.arguments,
67
+ rawArguments,
68
+ );
65
69
  if (deltaArguments) {
66
70
  toolCall.arguments += deltaArguments;
67
71
  }
@@ -108,4 +112,35 @@ export class ToolCallProcessor {
108
112
  reset(): void {
109
113
  this.toolCalls.clear();
110
114
  }
115
+
116
+ private normalizeArgumentsDelta(
117
+ accumulatedArguments: string,
118
+ rawArguments: unknown,
119
+ ): string {
120
+ if (rawArguments == null) {
121
+ return "";
122
+ }
123
+
124
+ const nextArguments =
125
+ typeof rawArguments === "string"
126
+ ? rawArguments
127
+ : JSON.stringify(rawArguments);
128
+
129
+ if (!nextArguments) {
130
+ return "";
131
+ }
132
+
133
+ // Some OpenAI-compatible providers emit cumulative argument snapshots
134
+ // instead of true deltas. Convert those snapshots back into a suffix so
135
+ // downstream accumulation only happens once.
136
+ if (
137
+ accumulatedArguments &&
138
+ nextArguments.length >= accumulatedArguments.length &&
139
+ nextArguments.startsWith(accumulatedArguments)
140
+ ) {
141
+ return nextArguments.slice(accumulatedArguments.length);
142
+ }
143
+
144
+ return nextArguments;
145
+ }
111
146
  }