@clinebot/llms 0.0.7 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/index.browser.d.ts +2 -2
  2. package/dist/index.browser.js +40 -1
  3. package/dist/index.d.ts +2 -2
  4. package/dist/index.js +12 -12
  5. package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
  6. package/dist/providers/handlers/base.d.ts +5 -29
  7. package/dist/providers/transform/openai-format.d.ts +1 -1
  8. package/dist/providers/types/config.d.ts +6 -0
  9. package/dist/providers/types/stream.d.ts +1 -1
  10. package/package.json +2 -1
  11. package/src/index.browser.ts +2 -2
  12. package/src/index.ts +2 -2
  13. package/src/models/providers/vercel-ai-gateway.ts +1 -1
  14. package/src/providers/handlers/ai-sdk-community.ts +5 -8
  15. package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
  16. package/src/providers/handlers/anthropic-base.test.ts +30 -0
  17. package/src/providers/handlers/anthropic-base.ts +43 -30
  18. package/src/providers/handlers/base.test.ts +68 -3
  19. package/src/providers/handlers/base.ts +104 -54
  20. package/src/providers/handlers/bedrock-base.ts +3 -3
  21. package/src/providers/handlers/community-sdk.test.ts +33 -0
  22. package/src/providers/handlers/gemini-base.test.ts +40 -0
  23. package/src/providers/handlers/gemini-base.ts +22 -20
  24. package/src/providers/handlers/openai-base.ts +67 -12
  25. package/src/providers/handlers/openai-responses.test.ts +46 -0
  26. package/src/providers/handlers/openai-responses.ts +3 -7
  27. package/src/providers/handlers/r1-base.ts +7 -8
  28. package/src/providers/handlers/vertex.ts +15 -5
  29. package/src/providers/transform/anthropic-format.ts +14 -2
  30. package/src/providers/transform/format-conversion.test.ts +49 -0
  31. package/src/providers/transform/openai-format.ts +50 -7
  32. package/src/providers/types/config.ts +8 -0
  33. package/src/providers/types/stream.ts +1 -1
@@ -28,7 +28,7 @@ type AiSdkUsageMetrics = {
28
28
  export type EmitAiSdkStreamOptions = {
29
29
  responseId: string;
30
30
  errorMessage: string;
31
- calculateCost: (inputTokens: number, outputTokens: number, cacheReadTokens: number) => number | undefined;
31
+ calculateCost: (inputTokens: number, outputTokens: number, cacheReadTokens: number, cacheWriteTokens?: number) => number | undefined;
32
32
  reasoningTypes?: string[];
33
33
  enableToolCalls?: boolean;
34
34
  toolCallArgsOrder?: Array<"args" | "input">;
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * Abstract base class that provides common functionality for all handlers.
5
5
  */
6
- import type { ApiHandler, ApiStream, ApiStreamUsageChunk, HandlerModelInfo, ProviderConfig } from "../types";
6
+ import type { ApiHandler, ApiStream, ApiStreamUsageChunk, HandlerModelInfo, ModelInfo, ProviderConfig } from "../types";
7
7
  import type { Message, ToolDefinition } from "../types/messages";
8
8
  import type { ApiStreamChunk } from "../types/stream";
9
9
  export declare const DEFAULT_REQUEST_HEADERS: Record<string, string>;
@@ -13,42 +13,18 @@ export declare const DEFAULT_REQUEST_HEADERS: Record<string, string>;
13
13
  export declare abstract class BaseHandler implements ApiHandler {
14
14
  protected config: ProviderConfig;
15
15
  protected abortController: AbortController | undefined;
16
+ private abortSignalSequence;
16
17
  constructor(config: ProviderConfig);
17
- /**
18
- * Convert Cline messages to provider-specific format
19
- * Must be implemented by subclasses
20
- */
21
18
  abstract getMessages(systemPrompt: string, messages: Message[]): unknown;
22
- /**
23
- * Create a streaming message completion
24
- * Must be implemented by subclasses
25
- */
26
19
  abstract createMessage(systemPrompt: string, messages: Message[], tools?: ToolDefinition[]): ApiStream;
27
- /**
28
- * Get the current model configuration
29
- * Can be overridden by subclasses for provider-specific logic
30
- */
31
20
  getModel(): HandlerModelInfo;
32
- /**
33
- * Get usage information (optional)
34
- * Override in subclasses that support this
35
- */
36
21
  getApiStreamUsage(): Promise<ApiStreamUsageChunk | undefined>;
37
- /**
38
- * Get the abort signal for the current request
39
- * Creates a new AbortController if one doesn't exist or was already aborted
40
- * Combines with config.abortSignal if provided
41
- */
42
22
  protected getAbortSignal(): AbortSignal;
43
- /**
44
- * Abort the current request
45
- */
46
23
  abort(): void;
47
24
  setAbortSignal(signal: AbortSignal | undefined): void;
48
- /**
49
- * Helper to calculate cost from usage
50
- */
51
- protected calculateCost(inputTokens: number, outputTokens: number, cacheReadTokens?: number): number | undefined;
25
+ private logAbort;
26
+ protected supportsPromptCache(modelInfo?: ModelInfo): boolean;
27
+ protected calculateCost(inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): number | undefined;
52
28
  protected createResponseId(): string;
53
29
  protected withResponseId<T extends ApiStreamChunk>(chunk: T, responseId: string): T;
54
30
  protected withResponseIdForAll(chunks: Iterable<ApiStreamChunk>, responseId: string): Generator<ApiStreamChunk>;
@@ -9,7 +9,7 @@ type OpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
9
9
  /**
10
10
  * Convert messages to OpenAI format
11
11
  */
12
- export declare function convertToOpenAIMessages(messages: Message[]): OpenAIMessage[];
12
+ export declare function convertToOpenAIMessages(messages: Message[], enableCaching?: boolean): OpenAIMessage[];
13
13
  /**
14
14
  * Convert tool definitions to OpenAI format
15
15
  */
@@ -187,6 +187,10 @@ export interface ProviderOptions {
187
187
  /** Runtime model catalog refresh configuration */
188
188
  modelCatalog?: ModelCatalogConfig;
189
189
  }
190
+ /**
191
+ * Provider-specific options that don't fit other categories
192
+ */
193
+ import type { BasicLogger } from "@clinebot/shared";
190
194
  /**
191
195
  * Runtime model catalog refresh options
192
196
  */
@@ -219,6 +223,8 @@ export interface ProviderConfig extends AuthConfig, EndpointConfig, ModelConfig,
219
223
  onRetryAttempt?: (attempt: number, maxRetries: number, delay: number, error: unknown) => void;
220
224
  /** AbortSignal for cancelling requests */
221
225
  abortSignal?: AbortSignal;
226
+ /** Optional runtime logger for provider-level diagnostics */
227
+ logger?: BasicLogger;
222
228
  /** Codex CLI-specific options */
223
229
  codex?: CodexConfig;
224
230
  /** Claude Code-specific options */
@@ -47,7 +47,7 @@ export interface ApiStreamReasoningChunk {
47
47
  */
48
48
  export interface ApiStreamUsageChunk {
49
49
  type: "usage";
50
- /** Number of input tokens (excluding cached) */
50
+ /** Total number of input tokens reported by the provider */
51
51
  inputTokens: number;
52
52
  /** Number of output tokens */
53
53
  outputTokens: number;
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@clinebot/llms",
3
- "version": "0.0.7",
3
+ "version": "0.0.11",
4
4
  "description": "Config-driven SDK for selecting, extending, and instantiating LLM providers and models",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
7
7
  "dependencies": {
8
+ "@clinebot/shared": "0.0.11",
8
9
  "@ai-sdk/amazon-bedrock": "^4.0.67",
9
10
  "@ai-sdk/google-vertex": "^4.0.74",
10
11
  "@ai-sdk/mistral": "^3.0.24",
@@ -1,6 +1,6 @@
1
1
  export { defineLlmsConfig, loadLlmsConfigFromFile } from "./config-browser";
2
- export * as models from "./models/index";
3
- export * as providers from "./providers/public.browser";
2
+ export * as LlmsModels from "./models/index";
3
+ export * as LlmsProviders from "./providers/public.browser";
4
4
  export type {
5
5
  CustomProviderConfig,
6
6
  LlmsConfig,
package/src/index.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  export { defineLlmsConfig } from "./config";
2
- export * as models from "./models/index";
3
- export * as providers from "./providers/public";
2
+ export * as LlmsModels from "./models/index";
3
+ export * as LlmsProviders from "./providers/public";
4
4
  export { createLlmsSdk } from "./sdk";
5
5
  export type {
6
6
  CustomProviderConfig,
@@ -14,7 +14,7 @@ export const VERCEL_AI_GATEWAY_PROVIDER: ModelCollection = {
14
14
  name: "Vercel AI Gateway",
15
15
  description: "Vercel's AI gateway service",
16
16
  protocol: "openai-chat",
17
- baseUrl: "https://ai-gateway.vercel.app/v1",
17
+ baseUrl: "https://ai-gateway.vercel.sh/v1",
18
18
  defaultModelId: Object.keys(VERCEL_AI_GATEWAY_MODELS)[0],
19
19
  capabilities: ["reasoning"],
20
20
  env: ["AI_GATEWAY_API_KEY"],
@@ -41,6 +41,7 @@ export type EmitAiSdkStreamOptions = {
41
41
  inputTokens: number,
42
42
  outputTokens: number,
43
43
  cacheReadTokens: number,
44
+ cacheWriteTokens?: number,
44
45
  ) => number | undefined;
45
46
  reasoningTypes?: string[];
46
47
  enableToolCalls?: boolean;
@@ -168,10 +169,7 @@ export async function* emitAiSdkStream(
168
169
 
169
170
  yield {
170
171
  type: "usage",
171
- inputTokens: Math.max(
172
- 0,
173
- usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
174
- ),
172
+ inputTokens: usageMetrics.inputTokens,
175
173
  outputTokens: usageMetrics.outputTokens,
176
174
  thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
177
175
  cacheReadTokens: usageMetrics.cacheReadTokens,
@@ -180,6 +178,7 @@ export async function* emitAiSdkStream(
180
178
  usageMetrics.inputTokens,
181
179
  usageMetrics.outputTokens,
182
180
  usageMetrics.cacheReadTokens,
181
+ usageMetrics.cacheWriteTokens,
183
182
  ),
184
183
  id: responseId,
185
184
  };
@@ -205,10 +204,7 @@ export async function* emitAiSdkStream(
205
204
  const usageMetrics = resolveUsageMetrics(usage);
206
205
  yield {
207
206
  type: "usage",
208
- inputTokens: Math.max(
209
- 0,
210
- usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
211
- ),
207
+ inputTokens: usageMetrics.inputTokens,
212
208
  outputTokens: usageMetrics.outputTokens,
213
209
  thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
214
210
  cacheReadTokens: usageMetrics.cacheReadTokens,
@@ -217,6 +213,7 @@ export async function* emitAiSdkStream(
217
213
  usageMetrics.inputTokens,
218
214
  usageMetrics.outputTokens,
219
215
  usageMetrics.cacheReadTokens,
216
+ usageMetrics.cacheWriteTokens,
220
217
  ),
221
218
  id: responseId,
222
219
  };
@@ -185,8 +185,18 @@ export abstract class AiSdkProviderHandler extends BaseHandler {
185
185
  yield* emitAiSdkStream(stream, {
186
186
  responseId,
187
187
  errorMessage: this.getStreamErrorMessage(),
188
- calculateCost: (inputTokens, outputTokens, cacheReadTokens) =>
189
- this.calculateCost(inputTokens, outputTokens, cacheReadTokens),
188
+ calculateCost: (
189
+ inputTokens,
190
+ outputTokens,
191
+ cacheReadTokens,
192
+ cacheWriteTokens,
193
+ ) =>
194
+ this.calculateCost(
195
+ inputTokens,
196
+ outputTokens,
197
+ cacheReadTokens,
198
+ cacheWriteTokens,
199
+ ),
190
200
  ...this.getEmitStreamOptions(),
191
201
  });
192
202
  }
@@ -0,0 +1,30 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { AnthropicHandler } from "./anthropic-base";
3
+
4
+ describe("AnthropicHandler prompt cache detection", () => {
5
+ it("enables prompt caching when model pricing includes cache pricing", () => {
6
+ const handler = new AnthropicHandler({
7
+ providerId: "anthropic",
8
+ modelId: "claude-sonnet-4-6",
9
+ apiKey: "test-key",
10
+ modelInfo: {
11
+ id: "claude-sonnet-4-6",
12
+ pricing: {
13
+ input: 3,
14
+ output: 15,
15
+ cacheRead: 0.3,
16
+ cacheWrite: 3.75,
17
+ },
18
+ },
19
+ });
20
+
21
+ const messages = handler.getMessages("system", [
22
+ { role: "user", content: "Tell me about this repo" },
23
+ ]);
24
+ const userTextBlock = messages[0]?.content?.[0] as
25
+ | { cache_control?: { type: string } }
26
+ | undefined;
27
+
28
+ expect(userTextBlock?.cache_control).toEqual({ type: "ephemeral" });
29
+ });
30
+ });
@@ -17,7 +17,6 @@ import {
17
17
  import {
18
18
  type ApiStream,
19
19
  type HandlerModelInfo,
20
- hasModelCapability,
21
20
  type ProviderConfig,
22
21
  supportsModelThinking,
23
22
  } from "../types";
@@ -76,10 +75,7 @@ export class AnthropicHandler extends BaseHandler {
76
75
  _systemPrompt: string,
77
76
  messages: Message[],
78
77
  ): Anthropic.MessageParam[] {
79
- const supportsPromptCache = hasModelCapability(
80
- this.getModel().info,
81
- "prompt-cache",
82
- );
78
+ const supportsPromptCache = this.supportsPromptCache(this.getModel().info);
83
79
  return convertToAnthropicMessages(
84
80
  messages,
85
81
  supportsPromptCache,
@@ -113,7 +109,7 @@ export class AnthropicHandler extends BaseHandler {
113
109
  const budgetTokens =
114
110
  thinkingSupported && requestedBudget > 0 ? requestedBudget : 0;
115
111
  const nativeToolsOn = tools && tools.length > 0;
116
- const supportsPromptCache = hasModelCapability(model.info, "prompt-cache");
112
+ const supportsPromptCache = this.supportsPromptCache(model.info);
117
113
  const reasoningOn = thinkingSupported && budgetTokens > 0;
118
114
  const debugThinking = isThinkingDebugEnabled();
119
115
  const debugChunkCounts: Record<string, number> = {};
@@ -139,29 +135,34 @@ export class AnthropicHandler extends BaseHandler {
139
135
  const requestOptions = { signal: abortSignal };
140
136
 
141
137
  // Create the request
138
+ // Use top-level automatic caching so the entire prefix (system +
139
+ // messages) is cached and the breakpoint advances each turn.
140
+ const createParams: Record<string, unknown> &
141
+ Anthropic.MessageCreateParamsStreaming = {
142
+ model: model.id,
143
+ thinking: reasoningOn
144
+ ? { type: "enabled", budget_tokens: budgetTokens }
145
+ : undefined,
146
+ max_tokens:
147
+ model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
148
+ temperature: reasoningOn ? undefined : 0,
149
+ system: [
150
+ supportsPromptCache
151
+ ? {
152
+ text: systemPrompt,
153
+ type: "text",
154
+ cache_control: { type: "ephemeral" },
155
+ }
156
+ : { text: systemPrompt, type: "text" },
157
+ ],
158
+ messages: anthropicMessages as Anthropic.MessageParam[],
159
+ stream: true,
160
+ tools: anthropicTools,
161
+ tool_choice: nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
162
+ };
163
+
142
164
  const stream = await client.messages.create(
143
- {
144
- model: model.id,
145
- thinking: reasoningOn
146
- ? { type: "enabled", budget_tokens: budgetTokens }
147
- : undefined,
148
- max_tokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 8192,
149
- temperature: reasoningOn ? undefined : 0,
150
- system: supportsPromptCache
151
- ? [
152
- {
153
- text: systemPrompt,
154
- type: "text",
155
- cache_control: { type: "ephemeral" },
156
- },
157
- ]
158
- : [{ text: systemPrompt, type: "text" }],
159
- messages: anthropicMessages as Anthropic.MessageParam[],
160
- stream: true,
161
- tools: anthropicTools,
162
- tool_choice:
163
- nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
164
- },
165
+ createParams as Anthropic.MessageCreateParamsStreaming,
165
166
  requestOptions,
166
167
  );
167
168
 
@@ -173,6 +174,7 @@ export class AnthropicHandler extends BaseHandler {
173
174
  cacheReadTokens: 0,
174
175
  cacheWriteTokens: 0,
175
176
  };
177
+ let stopReason: string | null = null;
176
178
 
177
179
  for await (const chunk of stream) {
178
180
  if (debugThinking) {
@@ -185,6 +187,11 @@ export class AnthropicHandler extends BaseHandler {
185
187
  countChunk(`content_block_delta:${chunk.delta?.type ?? "unknown"}`);
186
188
  }
187
189
  }
190
+ if (chunk.type === "message_delta") {
191
+ stopReason =
192
+ (chunk as { delta?: { stop_reason?: string } }).delta?.stop_reason ??
193
+ stopReason;
194
+ }
188
195
  yield* this.withResponseIdForAll(
189
196
  this.processChunk(chunk, currentToolCall, usageSnapshot, responseId),
190
197
  responseId,
@@ -199,8 +206,12 @@ export class AnthropicHandler extends BaseHandler {
199
206
  console.error(`[thinking-debug][anthropic][stream] ${summary}`);
200
207
  }
201
208
 
202
- // Yield done chunk to indicate streaming completed successfully
203
- yield { type: "done", success: true, id: responseId };
209
+ yield {
210
+ type: "done",
211
+ success: true,
212
+ id: responseId,
213
+ incompleteReason: stopReason === "max_tokens" ? "max_tokens" : undefined,
214
+ };
204
215
  }
205
216
 
206
217
  protected *processChunk(
@@ -233,6 +244,7 @@ export class AnthropicHandler extends BaseHandler {
233
244
  usageSnapshot.inputTokens,
234
245
  usageSnapshot.outputTokens,
235
246
  usageSnapshot.cacheReadTokens,
247
+ usageSnapshot.cacheWriteTokens,
236
248
  ),
237
249
  id: responseId,
238
250
  };
@@ -252,6 +264,7 @@ export class AnthropicHandler extends BaseHandler {
252
264
  usageSnapshot.inputTokens,
253
265
  usageSnapshot.outputTokens,
254
266
  usageSnapshot.cacheReadTokens,
267
+ usageSnapshot.cacheWriteTokens,
255
268
  ),
256
269
  id: responseId,
257
270
  };
@@ -1,4 +1,4 @@
1
- import { describe, expect, it } from "vitest";
1
+ import { describe, expect, it, vi } from "vitest";
2
2
  import type { ApiStream, ProviderConfig } from "../types/index";
3
3
  import { BaseHandler } from "./base";
4
4
 
@@ -15,8 +15,18 @@ class TestHandler extends BaseHandler {
15
15
  inputTokens: number,
16
16
  outputTokens: number,
17
17
  cacheReadTokens = 0,
18
+ cacheWriteTokens = 0,
18
19
  ): number | undefined {
19
- return this.calculateCost(inputTokens, outputTokens, cacheReadTokens);
20
+ return this.calculateCost(
21
+ inputTokens,
22
+ outputTokens,
23
+ cacheReadTokens,
24
+ cacheWriteTokens,
25
+ );
26
+ }
27
+
28
+ public exposeAbortSignal(): AbortSignal {
29
+ return this.getAbortSignal();
20
30
  }
21
31
  }
22
32
 
@@ -41,6 +51,61 @@ describe("BaseHandler.calculateCost", () => {
41
51
 
42
52
  const cost = handler.computeCost(1_000_000, 1_000_000, 100_000);
43
53
 
44
- expect(cost).toBeCloseTo(17.73, 6);
54
+ expect(cost).toBeCloseTo(18.03, 6);
55
+ });
56
+ });
57
+
58
+ describe("BaseHandler abort signal wiring", () => {
59
+ it("does not let a stale request signal abort a newer request", () => {
60
+ const logger = {
61
+ debug: vi.fn(),
62
+ warn: vi.fn(),
63
+ };
64
+ const request1 = new AbortController();
65
+ const handler = new TestHandler({
66
+ providerId: "openrouter",
67
+ modelId: "mock-model",
68
+ apiKey: "test-key",
69
+ baseUrl: "https://example.com/v1",
70
+ abortSignal: request1.signal,
71
+ logger,
72
+ });
73
+
74
+ const signal1 = handler.exposeAbortSignal();
75
+ expect(signal1.aborted).toBe(false);
76
+
77
+ const request2 = new AbortController();
78
+ handler.setAbortSignal(request2.signal);
79
+ const signal2 = handler.exposeAbortSignal();
80
+ expect(signal2).not.toBe(signal1);
81
+ expect(signal2.aborted).toBe(false);
82
+
83
+ request1.abort(new Error("stale timeout"));
84
+
85
+ expect(signal1.aborted).toBe(true);
86
+ expect(signal2.aborted).toBe(false);
87
+ expect(logger.warn).toHaveBeenCalledWith(
88
+ "Provider request abort signal fired",
89
+ expect.objectContaining({
90
+ reason: expect.objectContaining({ message: "stale timeout" }),
91
+ }),
92
+ );
93
+ });
94
+
95
+ it("creates a fresh controller for each request", () => {
96
+ const handler = new TestHandler({
97
+ providerId: "openrouter",
98
+ modelId: "mock-model",
99
+ apiKey: "test-key",
100
+ baseUrl: "https://example.com/v1",
101
+ abortSignal: new AbortController().signal,
102
+ });
103
+
104
+ const signal1 = handler.exposeAbortSignal();
105
+ const signal2 = handler.exposeAbortSignal();
106
+
107
+ expect(signal2).not.toBe(signal1);
108
+ expect(signal1.aborted).toBe(false);
109
+ expect(signal2.aborted).toBe(false);
45
110
  });
46
111
  });