@clinebot/llms 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.browser.js +14 -14
- package/dist/index.js +13 -13
- package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
- package/dist/providers/handlers/base.d.ts +7 -2
- package/dist/providers/types/stream.d.ts +1 -1
- package/package.json +2 -2
- package/src/models/generated.ts +223 -641
- package/src/models/models-dev-catalog.test.ts +2 -2
- package/src/models/models-dev-catalog.ts +9 -2
- package/src/providers/handlers/ai-sdk-community.ts +3 -0
- package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
- package/src/providers/handlers/anthropic-base.test.ts +30 -0
- package/src/providers/handlers/anthropic-base.ts +31 -29
- package/src/providers/handlers/base.test.ts +127 -2
- package/src/providers/handlers/base.ts +115 -1
- package/src/providers/handlers/bedrock-base.ts +4 -4
- package/src/providers/handlers/community-sdk.test.ts +33 -0
- package/src/providers/handlers/gemini-base.ts +6 -19
- package/src/providers/handlers/openai-base.ts +33 -14
- package/src/providers/handlers/openai-responses.test.ts +46 -0
- package/src/providers/handlers/openai-responses.ts +12 -8
- package/src/providers/handlers/r1-base.ts +10 -8
- package/src/providers/handlers/vertex.ts +14 -4
- package/src/providers/transform/anthropic-format.ts +14 -2
- package/src/providers/transform/format-conversion.test.ts +23 -0
- package/src/providers/transform/gemini-format.ts +77 -1
- package/src/providers/types/stream.ts +1 -1
|
@@ -82,7 +82,7 @@ describe("models-dev-catalog", () => {
|
|
|
82
82
|
id: "claude-defaults",
|
|
83
83
|
name: "claude-defaults",
|
|
84
84
|
contextWindow: 4096,
|
|
85
|
-
maxTokens:
|
|
85
|
+
maxTokens: 204,
|
|
86
86
|
capabilities: ["tools"],
|
|
87
87
|
pricing: {
|
|
88
88
|
input: 0,
|
|
@@ -97,7 +97,7 @@ describe("models-dev-catalog", () => {
|
|
|
97
97
|
id: "claude-older",
|
|
98
98
|
name: "claude-older",
|
|
99
99
|
contextWindow: 4096,
|
|
100
|
-
maxTokens:
|
|
100
|
+
maxTokens: 204,
|
|
101
101
|
capabilities: ["tools"],
|
|
102
102
|
pricing: {
|
|
103
103
|
input: 0,
|
|
@@ -93,11 +93,18 @@ function toStatus(status: string | undefined): ModelInfo["status"] {
|
|
|
93
93
|
}
|
|
94
94
|
|
|
95
95
|
function toModelInfo(modelId: string, model: ModelsDevModel): ModelInfo {
|
|
96
|
+
// If context or output limits are missing, default to DEFAULT_CONTEXT_WINDOW and DEFAULT_MAX_TOKENS respectively.
|
|
97
|
+
// If context and max are the same value, assume max tokens should be 5% of that value to avoid overallocation.
|
|
98
|
+
const contextWindow = model.limit?.context ?? DEFAULT_CONTEXT_WINDOW;
|
|
99
|
+
const outputToken = model.limit?.output ?? DEFAULT_MAX_TOKENS;
|
|
100
|
+
const discounted =
|
|
101
|
+
contextWindow === outputToken ? outputToken * 0.05 : outputToken;
|
|
102
|
+
|
|
96
103
|
return {
|
|
97
104
|
id: modelId,
|
|
98
105
|
name: model.name || modelId,
|
|
99
|
-
contextWindow
|
|
100
|
-
maxTokens:
|
|
106
|
+
contextWindow,
|
|
107
|
+
maxTokens: Math.floor(discounted),
|
|
101
108
|
capabilities: toCapabilities(model),
|
|
102
109
|
pricing: {
|
|
103
110
|
input: model.cost?.input ?? 0,
|
|
@@ -41,6 +41,7 @@ export type EmitAiSdkStreamOptions = {
|
|
|
41
41
|
inputTokens: number,
|
|
42
42
|
outputTokens: number,
|
|
43
43
|
cacheReadTokens: number,
|
|
44
|
+
cacheWriteTokens?: number,
|
|
44
45
|
) => number | undefined;
|
|
45
46
|
reasoningTypes?: string[];
|
|
46
47
|
enableToolCalls?: boolean;
|
|
@@ -180,6 +181,7 @@ export async function* emitAiSdkStream(
|
|
|
180
181
|
usageMetrics.inputTokens,
|
|
181
182
|
usageMetrics.outputTokens,
|
|
182
183
|
usageMetrics.cacheReadTokens,
|
|
184
|
+
usageMetrics.cacheWriteTokens,
|
|
183
185
|
),
|
|
184
186
|
id: responseId,
|
|
185
187
|
};
|
|
@@ -217,6 +219,7 @@ export async function* emitAiSdkStream(
|
|
|
217
219
|
usageMetrics.inputTokens,
|
|
218
220
|
usageMetrics.outputTokens,
|
|
219
221
|
usageMetrics.cacheReadTokens,
|
|
222
|
+
usageMetrics.cacheWriteTokens,
|
|
220
223
|
),
|
|
221
224
|
id: responseId,
|
|
222
225
|
};
|
|
@@ -185,8 +185,18 @@ export abstract class AiSdkProviderHandler extends BaseHandler {
|
|
|
185
185
|
yield* emitAiSdkStream(stream, {
|
|
186
186
|
responseId,
|
|
187
187
|
errorMessage: this.getStreamErrorMessage(),
|
|
188
|
-
calculateCost: (
|
|
189
|
-
|
|
188
|
+
calculateCost: (
|
|
189
|
+
inputTokens,
|
|
190
|
+
outputTokens,
|
|
191
|
+
cacheReadTokens,
|
|
192
|
+
cacheWriteTokens,
|
|
193
|
+
) =>
|
|
194
|
+
this.calculateCost(
|
|
195
|
+
inputTokens,
|
|
196
|
+
outputTokens,
|
|
197
|
+
cacheReadTokens,
|
|
198
|
+
cacheWriteTokens,
|
|
199
|
+
),
|
|
190
200
|
...this.getEmitStreamOptions(),
|
|
191
201
|
});
|
|
192
202
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { AnthropicHandler } from "./anthropic-base";
|
|
3
|
+
|
|
4
|
+
describe("AnthropicHandler prompt cache detection", () => {
|
|
5
|
+
it("enables prompt caching when model pricing includes cache pricing", () => {
|
|
6
|
+
const handler = new AnthropicHandler({
|
|
7
|
+
providerId: "anthropic",
|
|
8
|
+
modelId: "claude-sonnet-4-6",
|
|
9
|
+
apiKey: "test-key",
|
|
10
|
+
modelInfo: {
|
|
11
|
+
id: "claude-sonnet-4-6",
|
|
12
|
+
pricing: {
|
|
13
|
+
input: 3,
|
|
14
|
+
output: 15,
|
|
15
|
+
cacheRead: 0.3,
|
|
16
|
+
cacheWrite: 3.75,
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const messages = handler.getMessages("system", [
|
|
22
|
+
{ role: "user", content: "Tell me about this repo" },
|
|
23
|
+
]);
|
|
24
|
+
const userTextBlock = messages[0]?.content?.[0] as
|
|
25
|
+
| { cache_control?: { type: string } }
|
|
26
|
+
| undefined;
|
|
27
|
+
|
|
28
|
+
expect(userTextBlock?.cache_control).toEqual({ type: "ephemeral" });
|
|
29
|
+
});
|
|
30
|
+
});
|
|
@@ -17,7 +17,6 @@ import {
|
|
|
17
17
|
import {
|
|
18
18
|
type ApiStream,
|
|
19
19
|
type HandlerModelInfo,
|
|
20
|
-
hasModelCapability,
|
|
21
20
|
type ProviderConfig,
|
|
22
21
|
supportsModelThinking,
|
|
23
22
|
} from "../types";
|
|
@@ -76,10 +75,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
76
75
|
_systemPrompt: string,
|
|
77
76
|
messages: Message[],
|
|
78
77
|
): Anthropic.MessageParam[] {
|
|
79
|
-
const supportsPromptCache =
|
|
80
|
-
this.getModel().info,
|
|
81
|
-
"prompt-cache",
|
|
82
|
-
);
|
|
78
|
+
const supportsPromptCache = this.supportsPromptCache(this.getModel().info);
|
|
83
79
|
return convertToAnthropicMessages(
|
|
84
80
|
messages,
|
|
85
81
|
supportsPromptCache,
|
|
@@ -113,7 +109,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
113
109
|
const budgetTokens =
|
|
114
110
|
thinkingSupported && requestedBudget > 0 ? requestedBudget : 0;
|
|
115
111
|
const nativeToolsOn = tools && tools.length > 0;
|
|
116
|
-
const supportsPromptCache =
|
|
112
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
117
113
|
const reasoningOn = thinkingSupported && budgetTokens > 0;
|
|
118
114
|
const debugThinking = isThinkingDebugEnabled();
|
|
119
115
|
const debugChunkCounts: Record<string, number> = {};
|
|
@@ -139,30 +135,34 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
139
135
|
const requestOptions = { signal: abortSignal };
|
|
140
136
|
|
|
141
137
|
// Create the request
|
|
138
|
+
// Use top-level automatic caching so the entire prefix (system +
|
|
139
|
+
// messages) is cached and the breakpoint advances each turn.
|
|
140
|
+
const createParams: Record<string, unknown> &
|
|
141
|
+
Anthropic.MessageCreateParamsStreaming = {
|
|
142
|
+
model: model.id,
|
|
143
|
+
thinking: reasoningOn
|
|
144
|
+
? { type: "enabled", budget_tokens: budgetTokens }
|
|
145
|
+
: undefined,
|
|
146
|
+
max_tokens:
|
|
147
|
+
model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
148
|
+
temperature: reasoningOn ? undefined : 0,
|
|
149
|
+
system: [
|
|
150
|
+
supportsPromptCache
|
|
151
|
+
? {
|
|
152
|
+
text: systemPrompt,
|
|
153
|
+
type: "text",
|
|
154
|
+
cache_control: { type: "ephemeral" },
|
|
155
|
+
}
|
|
156
|
+
: { text: systemPrompt, type: "text" },
|
|
157
|
+
],
|
|
158
|
+
messages: anthropicMessages as Anthropic.MessageParam[],
|
|
159
|
+
stream: true,
|
|
160
|
+
tools: anthropicTools,
|
|
161
|
+
tool_choice: nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
|
|
162
|
+
};
|
|
163
|
+
|
|
142
164
|
const stream = await client.messages.create(
|
|
143
|
-
|
|
144
|
-
model: model.id,
|
|
145
|
-
thinking: reasoningOn
|
|
146
|
-
? { type: "enabled", budget_tokens: budgetTokens }
|
|
147
|
-
: undefined,
|
|
148
|
-
max_tokens:
|
|
149
|
-
model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
150
|
-
temperature: reasoningOn ? undefined : 0,
|
|
151
|
-
system: supportsPromptCache
|
|
152
|
-
? [
|
|
153
|
-
{
|
|
154
|
-
text: systemPrompt,
|
|
155
|
-
type: "text",
|
|
156
|
-
cache_control: { type: "ephemeral" },
|
|
157
|
-
},
|
|
158
|
-
]
|
|
159
|
-
: [{ text: systemPrompt, type: "text" }],
|
|
160
|
-
messages: anthropicMessages as Anthropic.MessageParam[],
|
|
161
|
-
stream: true,
|
|
162
|
-
tools: anthropicTools,
|
|
163
|
-
tool_choice:
|
|
164
|
-
nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
|
|
165
|
-
},
|
|
165
|
+
createParams as Anthropic.MessageCreateParamsStreaming,
|
|
166
166
|
requestOptions,
|
|
167
167
|
);
|
|
168
168
|
|
|
@@ -244,6 +244,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
244
244
|
usageSnapshot.inputTokens,
|
|
245
245
|
usageSnapshot.outputTokens,
|
|
246
246
|
usageSnapshot.cacheReadTokens,
|
|
247
|
+
usageSnapshot.cacheWriteTokens,
|
|
247
248
|
),
|
|
248
249
|
id: responseId,
|
|
249
250
|
};
|
|
@@ -263,6 +264,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
263
264
|
usageSnapshot.inputTokens,
|
|
264
265
|
usageSnapshot.outputTokens,
|
|
265
266
|
usageSnapshot.cacheReadTokens,
|
|
267
|
+
usageSnapshot.cacheWriteTokens,
|
|
266
268
|
),
|
|
267
269
|
id: responseId,
|
|
268
270
|
};
|
|
@@ -15,13 +15,37 @@ class TestHandler extends BaseHandler {
|
|
|
15
15
|
inputTokens: number,
|
|
16
16
|
outputTokens: number,
|
|
17
17
|
cacheReadTokens = 0,
|
|
18
|
+
cacheWriteTokens = 0,
|
|
18
19
|
): number | undefined {
|
|
19
|
-
return this.calculateCost(
|
|
20
|
+
return this.calculateCost(
|
|
21
|
+
inputTokens,
|
|
22
|
+
outputTokens,
|
|
23
|
+
cacheReadTokens,
|
|
24
|
+
cacheWriteTokens,
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
public computeCostFromInclusiveInput(
|
|
29
|
+
inputTokens: number,
|
|
30
|
+
outputTokens: number,
|
|
31
|
+
cacheReadTokens = 0,
|
|
32
|
+
cacheWriteTokens = 0,
|
|
33
|
+
): number | undefined {
|
|
34
|
+
return this.calculateCostFromInclusiveInput(
|
|
35
|
+
inputTokens,
|
|
36
|
+
outputTokens,
|
|
37
|
+
cacheReadTokens,
|
|
38
|
+
cacheWriteTokens,
|
|
39
|
+
);
|
|
20
40
|
}
|
|
21
41
|
|
|
22
42
|
public exposeAbortSignal(): AbortSignal {
|
|
23
43
|
return this.getAbortSignal();
|
|
24
44
|
}
|
|
45
|
+
|
|
46
|
+
public normalizeBadRequest(error: unknown): Error | undefined {
|
|
47
|
+
return this.normalizeOpenAICompatibleBadRequest(error);
|
|
48
|
+
}
|
|
25
49
|
}
|
|
26
50
|
|
|
27
51
|
describe("BaseHandler.calculateCost", () => {
|
|
@@ -45,7 +69,54 @@ describe("BaseHandler.calculateCost", () => {
|
|
|
45
69
|
|
|
46
70
|
const cost = handler.computeCost(1_000_000, 1_000_000, 100_000);
|
|
47
71
|
|
|
48
|
-
expect(cost).toBeCloseTo(
|
|
72
|
+
expect(cost).toBeCloseTo(18.03, 6);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("does not charge cache reads twice when input already includes them", () => {
|
|
76
|
+
const config: ProviderConfig = {
|
|
77
|
+
providerId: "openai-native",
|
|
78
|
+
modelId: "gpt-test",
|
|
79
|
+
apiKey: "test-key",
|
|
80
|
+
knownModels: {
|
|
81
|
+
"gpt-test": {
|
|
82
|
+
id: "gpt-test",
|
|
83
|
+
pricing: {
|
|
84
|
+
input: 1,
|
|
85
|
+
output: 2,
|
|
86
|
+
cacheRead: 0.5,
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
const handler = new TestHandler(config);
|
|
92
|
+
|
|
93
|
+
const cost = handler.computeCostFromInclusiveInput(100, 40, 25);
|
|
94
|
+
|
|
95
|
+
expect(cost).toBeCloseTo(0.0001675, 10);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("does not charge cache writes twice when input already includes them", () => {
|
|
99
|
+
const config: ProviderConfig = {
|
|
100
|
+
providerId: "openai-native",
|
|
101
|
+
modelId: "gpt-test",
|
|
102
|
+
apiKey: "test-key",
|
|
103
|
+
knownModels: {
|
|
104
|
+
"gpt-test": {
|
|
105
|
+
id: "gpt-test",
|
|
106
|
+
pricing: {
|
|
107
|
+
input: 1,
|
|
108
|
+
output: 2,
|
|
109
|
+
cacheRead: 0.5,
|
|
110
|
+
cacheWrite: 1.25,
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
const handler = new TestHandler(config);
|
|
116
|
+
|
|
117
|
+
const cost = handler.computeCostFromInclusiveInput(100, 40, 25, 10);
|
|
118
|
+
|
|
119
|
+
expect(cost).toBeCloseTo(0.00017, 10);
|
|
49
120
|
});
|
|
50
121
|
});
|
|
51
122
|
|
|
@@ -103,3 +174,57 @@ describe("BaseHandler abort signal wiring", () => {
|
|
|
103
174
|
expect(signal2.aborted).toBe(false);
|
|
104
175
|
});
|
|
105
176
|
});
|
|
177
|
+
|
|
178
|
+
describe("BaseHandler.normalizeOpenAICompatibleBadRequest", () => {
|
|
179
|
+
it("rewrites provider metadata prompt-limit errors into a helpful message", () => {
|
|
180
|
+
const handler = new TestHandler({
|
|
181
|
+
providerId: "openrouter",
|
|
182
|
+
modelId: "anthropic/claude-sonnet-4.6",
|
|
183
|
+
apiKey: "test-key",
|
|
184
|
+
baseUrl: "https://openrouter.ai/api/v1",
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
const error = Object.assign(new Error("400 Provider returned error"), {
|
|
188
|
+
status: 400,
|
|
189
|
+
error: {
|
|
190
|
+
message: "Provider returned error",
|
|
191
|
+
code: 400,
|
|
192
|
+
metadata: {
|
|
193
|
+
provider_name: "Anthropic",
|
|
194
|
+
raw: JSON.stringify({
|
|
195
|
+
type: "error",
|
|
196
|
+
error: {
|
|
197
|
+
type: "invalid_request_error",
|
|
198
|
+
message: "prompt is too long: 1102640 tokens > 1000000 maximum",
|
|
199
|
+
},
|
|
200
|
+
request_id: "req_123",
|
|
201
|
+
}),
|
|
202
|
+
},
|
|
203
|
+
},
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
const normalized = handler.normalizeBadRequest(error);
|
|
207
|
+
|
|
208
|
+
expect(normalized?.message).toBe(
|
|
209
|
+
"Anthropic request was rejected (HTTP 400). Prompt is too long: 1102640 tokens exceeds the 1000000 token limit. Request ID: req_123.",
|
|
210
|
+
);
|
|
211
|
+
expect(normalized?.cause).toBe(error);
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
it("returns undefined for non-400 errors", () => {
|
|
215
|
+
const handler = new TestHandler({
|
|
216
|
+
providerId: "openrouter",
|
|
217
|
+
modelId: "anthropic/claude-sonnet-4.6",
|
|
218
|
+
apiKey: "test-key",
|
|
219
|
+
baseUrl: "https://openrouter.ai/api/v1",
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
const normalized = handler.normalizeBadRequest(
|
|
223
|
+
Object.assign(new Error("500 Provider returned error"), {
|
|
224
|
+
status: 500,
|
|
225
|
+
}),
|
|
226
|
+
);
|
|
227
|
+
|
|
228
|
+
expect(normalized).toBeUndefined();
|
|
229
|
+
});
|
|
230
|
+
});
|
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
ApiStream,
|
|
11
11
|
ApiStreamUsageChunk,
|
|
12
12
|
HandlerModelInfo,
|
|
13
|
+
ModelInfo,
|
|
13
14
|
ProviderConfig,
|
|
14
15
|
} from "../types";
|
|
15
16
|
import type { Message, ToolDefinition } from "../types/messages";
|
|
@@ -22,6 +23,22 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
|
|
|
22
23
|
"X-CLIENT-TYPE": "cline-sdk",
|
|
23
24
|
};
|
|
24
25
|
|
|
26
|
+
interface OpenAICompatibleProviderErrorShape {
|
|
27
|
+
status?: number;
|
|
28
|
+
message?: string;
|
|
29
|
+
error?: {
|
|
30
|
+
message?: string;
|
|
31
|
+
code?: number;
|
|
32
|
+
metadata?: {
|
|
33
|
+
raw?: string;
|
|
34
|
+
provider_name?: string;
|
|
35
|
+
};
|
|
36
|
+
};
|
|
37
|
+
response?: {
|
|
38
|
+
status?: number;
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
25
42
|
const controllerIds = new WeakMap<AbortController, string>();
|
|
26
43
|
let controllerIdCounter = 0;
|
|
27
44
|
|
|
@@ -146,10 +163,26 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
146
163
|
});
|
|
147
164
|
}
|
|
148
165
|
|
|
166
|
+
protected supportsPromptCache(modelInfo?: ModelInfo): boolean {
|
|
167
|
+
const resolvedModelInfo =
|
|
168
|
+
modelInfo ??
|
|
169
|
+
this.config.modelInfo ??
|
|
170
|
+
this.config.knownModels?.[this.config.modelId];
|
|
171
|
+
const pricing = resolvedModelInfo?.pricing;
|
|
172
|
+
|
|
173
|
+
return (
|
|
174
|
+
resolvedModelInfo?.capabilities?.includes("prompt-cache") === true ||
|
|
175
|
+
this.config.capabilities?.includes("prompt-cache") === true ||
|
|
176
|
+
typeof pricing?.cacheRead === "number" ||
|
|
177
|
+
typeof pricing?.cacheWrite === "number"
|
|
178
|
+
);
|
|
179
|
+
}
|
|
180
|
+
|
|
149
181
|
protected calculateCost(
|
|
150
182
|
inputTokens: number,
|
|
151
183
|
outputTokens: number,
|
|
152
184
|
cacheReadTokens = 0,
|
|
185
|
+
cacheWriteTokens = 0,
|
|
153
186
|
): number | undefined {
|
|
154
187
|
const pricing = (
|
|
155
188
|
this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
|
|
@@ -159,14 +192,32 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
159
192
|
}
|
|
160
193
|
|
|
161
194
|
return (
|
|
162
|
-
(
|
|
195
|
+
(inputTokens / 1_000_000) * pricing.input +
|
|
163
196
|
(outputTokens / 1_000_000) * pricing.output +
|
|
164
197
|
(cacheReadTokens > 0
|
|
165
198
|
? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
|
|
199
|
+
: 0) +
|
|
200
|
+
(cacheWriteTokens > 0
|
|
201
|
+
? (cacheWriteTokens / 1_000_000) *
|
|
202
|
+
(pricing.cacheWrite ?? pricing.input * 1.25)
|
|
166
203
|
: 0)
|
|
167
204
|
);
|
|
168
205
|
}
|
|
169
206
|
|
|
207
|
+
protected calculateCostFromInclusiveInput(
|
|
208
|
+
inputTokens: number,
|
|
209
|
+
outputTokens: number,
|
|
210
|
+
cacheReadTokens = 0,
|
|
211
|
+
cacheWriteTokens = 0,
|
|
212
|
+
): number | undefined {
|
|
213
|
+
return this.calculateCost(
|
|
214
|
+
Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens),
|
|
215
|
+
outputTokens,
|
|
216
|
+
cacheReadTokens,
|
|
217
|
+
cacheWriteTokens,
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
|
|
170
221
|
protected createResponseId(): string {
|
|
171
222
|
return nanoid();
|
|
172
223
|
}
|
|
@@ -193,4 +244,67 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
193
244
|
...(this.config.headers ?? {}),
|
|
194
245
|
};
|
|
195
246
|
}
|
|
247
|
+
|
|
248
|
+
protected normalizeOpenAICompatibleBadRequest(
|
|
249
|
+
error: unknown,
|
|
250
|
+
): Error | undefined {
|
|
251
|
+
const rawError = error as OpenAICompatibleProviderErrorShape | undefined;
|
|
252
|
+
const status =
|
|
253
|
+
rawError?.status ??
|
|
254
|
+
rawError?.response?.status ??
|
|
255
|
+
rawError?.error?.code ??
|
|
256
|
+
(typeof rawError?.message === "string" && rawError.message.includes("400")
|
|
257
|
+
? 400
|
|
258
|
+
: undefined);
|
|
259
|
+
if (status !== 400) {
|
|
260
|
+
return undefined;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const rawMetadata = rawError?.error?.metadata?.raw;
|
|
264
|
+
const parsedRaw = this.parseRawProviderError(rawMetadata);
|
|
265
|
+
const detail =
|
|
266
|
+
parsedRaw?.error?.message?.trim() ||
|
|
267
|
+
rawError?.error?.message?.trim() ||
|
|
268
|
+
rawError?.message?.trim() ||
|
|
269
|
+
"Provider returned error";
|
|
270
|
+
const providerName =
|
|
271
|
+
rawError?.error?.metadata?.provider_name?.trim() || "Provider";
|
|
272
|
+
const requestId = parsedRaw?.request_id?.trim();
|
|
273
|
+
const normalizedMessage = this.rewriteProviderBadRequestDetail(detail);
|
|
274
|
+
const suffix = requestId ? ` Request ID: ${requestId}.` : "";
|
|
275
|
+
return new Error(
|
|
276
|
+
`${providerName} request was rejected (HTTP 400). ${normalizedMessage}${suffix}`,
|
|
277
|
+
{
|
|
278
|
+
cause: error instanceof Error ? error : undefined,
|
|
279
|
+
},
|
|
280
|
+
);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
private parseRawProviderError(
|
|
284
|
+
raw: string | undefined,
|
|
285
|
+
): { error?: { message?: string }; request_id?: string } | undefined {
|
|
286
|
+
if (!raw) {
|
|
287
|
+
return undefined;
|
|
288
|
+
}
|
|
289
|
+
try {
|
|
290
|
+
return JSON.parse(raw) as {
|
|
291
|
+
error?: { message?: string };
|
|
292
|
+
request_id?: string;
|
|
293
|
+
};
|
|
294
|
+
} catch {
|
|
295
|
+
return undefined;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
private rewriteProviderBadRequestDetail(detail: string): string {
|
|
300
|
+
const promptTooLongMatch = detail.match(
|
|
301
|
+
/prompt is too long:\s*([\d,]+)\s*tokens?\s*>\s*([\d,]+)\s*maximum/i,
|
|
302
|
+
);
|
|
303
|
+
if (promptTooLongMatch) {
|
|
304
|
+
const actual = promptTooLongMatch[1];
|
|
305
|
+
const maximum = promptTooLongMatch[2];
|
|
306
|
+
return `Prompt is too long: ${actual} tokens exceeds the ${maximum} token limit.`;
|
|
307
|
+
}
|
|
308
|
+
return detail.endsWith(".") ? detail : `${detail}.`;
|
|
309
|
+
}
|
|
196
310
|
}
|
|
@@ -216,11 +216,11 @@ export class BedrockHandler extends BaseHandler {
|
|
|
216
216
|
|
|
217
217
|
yield {
|
|
218
218
|
type: "usage",
|
|
219
|
-
inputTokens: inputTokens - cacheReadTokens,
|
|
219
|
+
inputTokens: Math.max(0, inputTokens - cacheReadTokens),
|
|
220
220
|
outputTokens,
|
|
221
221
|
thoughtsTokenCount,
|
|
222
222
|
cacheReadTokens,
|
|
223
|
-
totalCost: this.
|
|
223
|
+
totalCost: this.calculateCostFromInclusiveInput(
|
|
224
224
|
inputTokens,
|
|
225
225
|
outputTokens,
|
|
226
226
|
cacheReadTokens,
|
|
@@ -245,11 +245,11 @@ export class BedrockHandler extends BaseHandler {
|
|
|
245
245
|
|
|
246
246
|
yield {
|
|
247
247
|
type: "usage",
|
|
248
|
-
inputTokens: inputTokens - cacheReadTokens,
|
|
248
|
+
inputTokens: Math.max(0, inputTokens - cacheReadTokens),
|
|
249
249
|
outputTokens,
|
|
250
250
|
thoughtsTokenCount,
|
|
251
251
|
cacheReadTokens,
|
|
252
|
-
totalCost: this.
|
|
252
|
+
totalCost: this.calculateCostFromInclusiveInput(
|
|
253
253
|
inputTokens,
|
|
254
254
|
outputTokens,
|
|
255
255
|
cacheReadTokens,
|
|
@@ -115,6 +115,39 @@ describe("Community SDK handlers", () => {
|
|
|
115
115
|
expect(usageChunk?.outputTokens).toBe(3);
|
|
116
116
|
});
|
|
117
117
|
|
|
118
|
+
it("keeps cached input tokens separate from total input tokens", async () => {
|
|
119
|
+
streamTextSpy.mockReturnValue({
|
|
120
|
+
fullStream: makeStreamParts([
|
|
121
|
+
{
|
|
122
|
+
type: "finish",
|
|
123
|
+
usage: { inputTokens: 10, outputTokens: 3, cachedInputTokens: 4 },
|
|
124
|
+
},
|
|
125
|
+
]),
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const handler = new ClaudeCodeHandler({
|
|
129
|
+
providerId: "claude-code",
|
|
130
|
+
modelId: "sonnet",
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const chunks: ApiStreamChunk[] = [];
|
|
134
|
+
for await (const chunk of handler.createMessage("System", [
|
|
135
|
+
{ role: "user", content: "Hi" },
|
|
136
|
+
])) {
|
|
137
|
+
chunks.push(chunk);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const usageChunk = chunks.find(
|
|
141
|
+
(chunk): chunk is Extract<ApiStreamChunk, { type: "usage" }> =>
|
|
142
|
+
chunk.type === "usage",
|
|
143
|
+
);
|
|
144
|
+
expect(usageChunk).toMatchObject({
|
|
145
|
+
inputTokens: 6,
|
|
146
|
+
outputTokens: 3,
|
|
147
|
+
cacheReadTokens: 4,
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
118
151
|
it("uses a fallback model id when model is missing", () => {
|
|
119
152
|
const handler = new ClaudeCodeHandler({
|
|
120
153
|
providerId: "claude-code",
|
|
@@ -18,7 +18,6 @@ import {
|
|
|
18
18
|
import {
|
|
19
19
|
type ApiStream,
|
|
20
20
|
type HandlerModelInfo,
|
|
21
|
-
type ModelInfo,
|
|
22
21
|
type ProviderConfig,
|
|
23
22
|
supportsModelThinking,
|
|
24
23
|
} from "../types";
|
|
@@ -258,7 +257,6 @@ export class GeminiHandler extends BaseHandler {
|
|
|
258
257
|
|
|
259
258
|
// Yield final usage
|
|
260
259
|
const totalCost = this.calculateGeminiCost(
|
|
261
|
-
info,
|
|
262
260
|
promptTokens,
|
|
263
261
|
outputTokens,
|
|
264
262
|
thoughtsTokenCount,
|
|
@@ -267,7 +265,7 @@ export class GeminiHandler extends BaseHandler {
|
|
|
267
265
|
|
|
268
266
|
yield {
|
|
269
267
|
type: "usage",
|
|
270
|
-
inputTokens: promptTokens
|
|
268
|
+
inputTokens: promptTokens,
|
|
271
269
|
outputTokens,
|
|
272
270
|
thoughtsTokenCount,
|
|
273
271
|
cacheReadTokens,
|
|
@@ -288,27 +286,16 @@ export class GeminiHandler extends BaseHandler {
|
|
|
288
286
|
}
|
|
289
287
|
|
|
290
288
|
private calculateGeminiCost(
|
|
291
|
-
info: ModelInfo,
|
|
292
289
|
inputTokens: number,
|
|
293
290
|
outputTokens: number,
|
|
294
291
|
thoughtsTokenCount: number,
|
|
295
292
|
cacheReadTokens: number,
|
|
296
293
|
): number | undefined {
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
const uncachedInputTokens = inputTokens - cacheReadTokens;
|
|
303
|
-
const inputCost = pricing.input * (uncachedInputTokens / 1_000_000);
|
|
304
|
-
const outputCost =
|
|
305
|
-
pricing.output * ((outputTokens + thoughtsTokenCount) / 1_000_000);
|
|
306
|
-
const cacheReadCost =
|
|
307
|
-
cacheReadTokens > 0
|
|
308
|
-
? (pricing.cacheRead ?? 0) * (cacheReadTokens / 1_000_000)
|
|
309
|
-
: 0;
|
|
310
|
-
|
|
311
|
-
return inputCost + outputCost + cacheReadCost;
|
|
294
|
+
return this.calculateCost(
|
|
295
|
+
inputTokens,
|
|
296
|
+
outputTokens + thoughtsTokenCount,
|
|
297
|
+
cacheReadTokens,
|
|
298
|
+
);
|
|
312
299
|
}
|
|
313
300
|
}
|
|
314
301
|
|