@clinebot/llms 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +10 -10
- package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
- package/dist/providers/handlers/base.d.ts +3 -2
- package/dist/providers/types/stream.d.ts +1 -1
- package/package.json +2 -2
- package/src/providers/handlers/ai-sdk-community.ts +5 -8
- package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
- package/src/providers/handlers/anthropic-base.test.ts +30 -0
- package/src/providers/handlers/anthropic-base.ts +31 -29
- package/src/providers/handlers/base.test.ts +8 -2
- package/src/providers/handlers/base.ts +22 -1
- package/src/providers/handlers/bedrock-base.ts +2 -2
- package/src/providers/handlers/community-sdk.test.ts +33 -0
- package/src/providers/handlers/gemini-base.ts +6 -19
- package/src/providers/handlers/openai-base.ts +19 -8
- package/src/providers/handlers/openai-responses.test.ts +46 -0
- package/src/providers/handlers/openai-responses.ts +3 -7
- package/src/providers/handlers/r1-base.ts +7 -8
- package/src/providers/handlers/vertex.ts +14 -4
- package/src/providers/transform/anthropic-format.ts +14 -2
- package/src/providers/transform/format-conversion.test.ts +23 -0
- package/src/providers/types/stream.ts +1 -1
|
@@ -28,7 +28,7 @@ type AiSdkUsageMetrics = {
|
|
|
28
28
|
export type EmitAiSdkStreamOptions = {
|
|
29
29
|
responseId: string;
|
|
30
30
|
errorMessage: string;
|
|
31
|
-
calculateCost: (inputTokens: number, outputTokens: number, cacheReadTokens: number) => number | undefined;
|
|
31
|
+
calculateCost: (inputTokens: number, outputTokens: number, cacheReadTokens: number, cacheWriteTokens?: number) => number | undefined;
|
|
32
32
|
reasoningTypes?: string[];
|
|
33
33
|
enableToolCalls?: boolean;
|
|
34
34
|
toolCallArgsOrder?: Array<"args" | "input">;
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Abstract base class that provides common functionality for all handlers.
|
|
5
5
|
*/
|
|
6
|
-
import type { ApiHandler, ApiStream, ApiStreamUsageChunk, HandlerModelInfo, ProviderConfig } from "../types";
|
|
6
|
+
import type { ApiHandler, ApiStream, ApiStreamUsageChunk, HandlerModelInfo, ModelInfo, ProviderConfig } from "../types";
|
|
7
7
|
import type { Message, ToolDefinition } from "../types/messages";
|
|
8
8
|
import type { ApiStreamChunk } from "../types/stream";
|
|
9
9
|
export declare const DEFAULT_REQUEST_HEADERS: Record<string, string>;
|
|
@@ -23,7 +23,8 @@ export declare abstract class BaseHandler implements ApiHandler {
|
|
|
23
23
|
abort(): void;
|
|
24
24
|
setAbortSignal(signal: AbortSignal | undefined): void;
|
|
25
25
|
private logAbort;
|
|
26
|
-
protected
|
|
26
|
+
protected supportsPromptCache(modelInfo?: ModelInfo): boolean;
|
|
27
|
+
protected calculateCost(inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): number | undefined;
|
|
27
28
|
protected createResponseId(): string;
|
|
28
29
|
protected withResponseId<T extends ApiStreamChunk>(chunk: T, responseId: string): T;
|
|
29
30
|
protected withResponseIdForAll(chunks: Iterable<ApiStreamChunk>, responseId: string): Generator<ApiStreamChunk>;
|
|
@@ -47,7 +47,7 @@ export interface ApiStreamReasoningChunk {
|
|
|
47
47
|
*/
|
|
48
48
|
export interface ApiStreamUsageChunk {
|
|
49
49
|
type: "usage";
|
|
50
|
-
/**
|
|
50
|
+
/** Total number of input tokens reported by the provider */
|
|
51
51
|
inputTokens: number;
|
|
52
52
|
/** Number of output tokens */
|
|
53
53
|
outputTokens: number;
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@clinebot/llms",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.11",
|
|
4
4
|
"description": "Config-driven SDK for selecting, extending, and instantiating LLM providers and models",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
7
7
|
"dependencies": {
|
|
8
|
-
"@clinebot/shared": "0.0.
|
|
8
|
+
"@clinebot/shared": "0.0.11",
|
|
9
9
|
"@ai-sdk/amazon-bedrock": "^4.0.67",
|
|
10
10
|
"@ai-sdk/google-vertex": "^4.0.74",
|
|
11
11
|
"@ai-sdk/mistral": "^3.0.24",
|
|
@@ -41,6 +41,7 @@ export type EmitAiSdkStreamOptions = {
|
|
|
41
41
|
inputTokens: number,
|
|
42
42
|
outputTokens: number,
|
|
43
43
|
cacheReadTokens: number,
|
|
44
|
+
cacheWriteTokens?: number,
|
|
44
45
|
) => number | undefined;
|
|
45
46
|
reasoningTypes?: string[];
|
|
46
47
|
enableToolCalls?: boolean;
|
|
@@ -168,10 +169,7 @@ export async function* emitAiSdkStream(
|
|
|
168
169
|
|
|
169
170
|
yield {
|
|
170
171
|
type: "usage",
|
|
171
|
-
inputTokens:
|
|
172
|
-
0,
|
|
173
|
-
usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
|
|
174
|
-
),
|
|
172
|
+
inputTokens: usageMetrics.inputTokens,
|
|
175
173
|
outputTokens: usageMetrics.outputTokens,
|
|
176
174
|
thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
|
|
177
175
|
cacheReadTokens: usageMetrics.cacheReadTokens,
|
|
@@ -180,6 +178,7 @@ export async function* emitAiSdkStream(
|
|
|
180
178
|
usageMetrics.inputTokens,
|
|
181
179
|
usageMetrics.outputTokens,
|
|
182
180
|
usageMetrics.cacheReadTokens,
|
|
181
|
+
usageMetrics.cacheWriteTokens,
|
|
183
182
|
),
|
|
184
183
|
id: responseId,
|
|
185
184
|
};
|
|
@@ -205,10 +204,7 @@ export async function* emitAiSdkStream(
|
|
|
205
204
|
const usageMetrics = resolveUsageMetrics(usage);
|
|
206
205
|
yield {
|
|
207
206
|
type: "usage",
|
|
208
|
-
inputTokens:
|
|
209
|
-
0,
|
|
210
|
-
usageMetrics.inputTokens - usageMetrics.cacheReadTokens,
|
|
211
|
-
),
|
|
207
|
+
inputTokens: usageMetrics.inputTokens,
|
|
212
208
|
outputTokens: usageMetrics.outputTokens,
|
|
213
209
|
thoughtsTokenCount: usageMetrics.thoughtsTokenCount,
|
|
214
210
|
cacheReadTokens: usageMetrics.cacheReadTokens,
|
|
@@ -217,6 +213,7 @@ export async function* emitAiSdkStream(
|
|
|
217
213
|
usageMetrics.inputTokens,
|
|
218
214
|
usageMetrics.outputTokens,
|
|
219
215
|
usageMetrics.cacheReadTokens,
|
|
216
|
+
usageMetrics.cacheWriteTokens,
|
|
220
217
|
),
|
|
221
218
|
id: responseId,
|
|
222
219
|
};
|
|
@@ -185,8 +185,18 @@ export abstract class AiSdkProviderHandler extends BaseHandler {
|
|
|
185
185
|
yield* emitAiSdkStream(stream, {
|
|
186
186
|
responseId,
|
|
187
187
|
errorMessage: this.getStreamErrorMessage(),
|
|
188
|
-
calculateCost: (
|
|
189
|
-
|
|
188
|
+
calculateCost: (
|
|
189
|
+
inputTokens,
|
|
190
|
+
outputTokens,
|
|
191
|
+
cacheReadTokens,
|
|
192
|
+
cacheWriteTokens,
|
|
193
|
+
) =>
|
|
194
|
+
this.calculateCost(
|
|
195
|
+
inputTokens,
|
|
196
|
+
outputTokens,
|
|
197
|
+
cacheReadTokens,
|
|
198
|
+
cacheWriteTokens,
|
|
199
|
+
),
|
|
190
200
|
...this.getEmitStreamOptions(),
|
|
191
201
|
});
|
|
192
202
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { AnthropicHandler } from "./anthropic-base";
|
|
3
|
+
|
|
4
|
+
describe("AnthropicHandler prompt cache detection", () => {
|
|
5
|
+
it("enables prompt caching when model pricing includes cache pricing", () => {
|
|
6
|
+
const handler = new AnthropicHandler({
|
|
7
|
+
providerId: "anthropic",
|
|
8
|
+
modelId: "claude-sonnet-4-6",
|
|
9
|
+
apiKey: "test-key",
|
|
10
|
+
modelInfo: {
|
|
11
|
+
id: "claude-sonnet-4-6",
|
|
12
|
+
pricing: {
|
|
13
|
+
input: 3,
|
|
14
|
+
output: 15,
|
|
15
|
+
cacheRead: 0.3,
|
|
16
|
+
cacheWrite: 3.75,
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const messages = handler.getMessages("system", [
|
|
22
|
+
{ role: "user", content: "Tell me about this repo" },
|
|
23
|
+
]);
|
|
24
|
+
const userTextBlock = messages[0]?.content?.[0] as
|
|
25
|
+
| { cache_control?: { type: string } }
|
|
26
|
+
| undefined;
|
|
27
|
+
|
|
28
|
+
expect(userTextBlock?.cache_control).toEqual({ type: "ephemeral" });
|
|
29
|
+
});
|
|
30
|
+
});
|
|
@@ -17,7 +17,6 @@ import {
|
|
|
17
17
|
import {
|
|
18
18
|
type ApiStream,
|
|
19
19
|
type HandlerModelInfo,
|
|
20
|
-
hasModelCapability,
|
|
21
20
|
type ProviderConfig,
|
|
22
21
|
supportsModelThinking,
|
|
23
22
|
} from "../types";
|
|
@@ -76,10 +75,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
76
75
|
_systemPrompt: string,
|
|
77
76
|
messages: Message[],
|
|
78
77
|
): Anthropic.MessageParam[] {
|
|
79
|
-
const supportsPromptCache =
|
|
80
|
-
this.getModel().info,
|
|
81
|
-
"prompt-cache",
|
|
82
|
-
);
|
|
78
|
+
const supportsPromptCache = this.supportsPromptCache(this.getModel().info);
|
|
83
79
|
return convertToAnthropicMessages(
|
|
84
80
|
messages,
|
|
85
81
|
supportsPromptCache,
|
|
@@ -113,7 +109,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
113
109
|
const budgetTokens =
|
|
114
110
|
thinkingSupported && requestedBudget > 0 ? requestedBudget : 0;
|
|
115
111
|
const nativeToolsOn = tools && tools.length > 0;
|
|
116
|
-
const supportsPromptCache =
|
|
112
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
117
113
|
const reasoningOn = thinkingSupported && budgetTokens > 0;
|
|
118
114
|
const debugThinking = isThinkingDebugEnabled();
|
|
119
115
|
const debugChunkCounts: Record<string, number> = {};
|
|
@@ -139,30 +135,34 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
139
135
|
const requestOptions = { signal: abortSignal };
|
|
140
136
|
|
|
141
137
|
// Create the request
|
|
138
|
+
// Use top-level automatic caching so the entire prefix (system +
|
|
139
|
+
// messages) is cached and the breakpoint advances each turn.
|
|
140
|
+
const createParams: Record<string, unknown> &
|
|
141
|
+
Anthropic.MessageCreateParamsStreaming = {
|
|
142
|
+
model: model.id,
|
|
143
|
+
thinking: reasoningOn
|
|
144
|
+
? { type: "enabled", budget_tokens: budgetTokens }
|
|
145
|
+
: undefined,
|
|
146
|
+
max_tokens:
|
|
147
|
+
model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
148
|
+
temperature: reasoningOn ? undefined : 0,
|
|
149
|
+
system: [
|
|
150
|
+
supportsPromptCache
|
|
151
|
+
? {
|
|
152
|
+
text: systemPrompt,
|
|
153
|
+
type: "text",
|
|
154
|
+
cache_control: { type: "ephemeral" },
|
|
155
|
+
}
|
|
156
|
+
: { text: systemPrompt, type: "text" },
|
|
157
|
+
],
|
|
158
|
+
messages: anthropicMessages as Anthropic.MessageParam[],
|
|
159
|
+
stream: true,
|
|
160
|
+
tools: anthropicTools,
|
|
161
|
+
tool_choice: nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
|
|
162
|
+
};
|
|
163
|
+
|
|
142
164
|
const stream = await client.messages.create(
|
|
143
|
-
|
|
144
|
-
model: model.id,
|
|
145
|
-
thinking: reasoningOn
|
|
146
|
-
? { type: "enabled", budget_tokens: budgetTokens }
|
|
147
|
-
: undefined,
|
|
148
|
-
max_tokens:
|
|
149
|
-
model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
150
|
-
temperature: reasoningOn ? undefined : 0,
|
|
151
|
-
system: supportsPromptCache
|
|
152
|
-
? [
|
|
153
|
-
{
|
|
154
|
-
text: systemPrompt,
|
|
155
|
-
type: "text",
|
|
156
|
-
cache_control: { type: "ephemeral" },
|
|
157
|
-
},
|
|
158
|
-
]
|
|
159
|
-
: [{ text: systemPrompt, type: "text" }],
|
|
160
|
-
messages: anthropicMessages as Anthropic.MessageParam[],
|
|
161
|
-
stream: true,
|
|
162
|
-
tools: anthropicTools,
|
|
163
|
-
tool_choice:
|
|
164
|
-
nativeToolsOn && !reasoningOn ? { type: "auto" } : undefined,
|
|
165
|
-
},
|
|
165
|
+
createParams as Anthropic.MessageCreateParamsStreaming,
|
|
166
166
|
requestOptions,
|
|
167
167
|
);
|
|
168
168
|
|
|
@@ -244,6 +244,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
244
244
|
usageSnapshot.inputTokens,
|
|
245
245
|
usageSnapshot.outputTokens,
|
|
246
246
|
usageSnapshot.cacheReadTokens,
|
|
247
|
+
usageSnapshot.cacheWriteTokens,
|
|
247
248
|
),
|
|
248
249
|
id: responseId,
|
|
249
250
|
};
|
|
@@ -263,6 +264,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
263
264
|
usageSnapshot.inputTokens,
|
|
264
265
|
usageSnapshot.outputTokens,
|
|
265
266
|
usageSnapshot.cacheReadTokens,
|
|
267
|
+
usageSnapshot.cacheWriteTokens,
|
|
266
268
|
),
|
|
267
269
|
id: responseId,
|
|
268
270
|
};
|
|
@@ -15,8 +15,14 @@ class TestHandler extends BaseHandler {
|
|
|
15
15
|
inputTokens: number,
|
|
16
16
|
outputTokens: number,
|
|
17
17
|
cacheReadTokens = 0,
|
|
18
|
+
cacheWriteTokens = 0,
|
|
18
19
|
): number | undefined {
|
|
19
|
-
return this.calculateCost(
|
|
20
|
+
return this.calculateCost(
|
|
21
|
+
inputTokens,
|
|
22
|
+
outputTokens,
|
|
23
|
+
cacheReadTokens,
|
|
24
|
+
cacheWriteTokens,
|
|
25
|
+
);
|
|
20
26
|
}
|
|
21
27
|
|
|
22
28
|
public exposeAbortSignal(): AbortSignal {
|
|
@@ -45,7 +51,7 @@ describe("BaseHandler.calculateCost", () => {
|
|
|
45
51
|
|
|
46
52
|
const cost = handler.computeCost(1_000_000, 1_000_000, 100_000);
|
|
47
53
|
|
|
48
|
-
expect(cost).toBeCloseTo(
|
|
54
|
+
expect(cost).toBeCloseTo(18.03, 6);
|
|
49
55
|
});
|
|
50
56
|
});
|
|
51
57
|
|
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
ApiStream,
|
|
11
11
|
ApiStreamUsageChunk,
|
|
12
12
|
HandlerModelInfo,
|
|
13
|
+
ModelInfo,
|
|
13
14
|
ProviderConfig,
|
|
14
15
|
} from "../types";
|
|
15
16
|
import type { Message, ToolDefinition } from "../types/messages";
|
|
@@ -146,10 +147,26 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
146
147
|
});
|
|
147
148
|
}
|
|
148
149
|
|
|
150
|
+
protected supportsPromptCache(modelInfo?: ModelInfo): boolean {
|
|
151
|
+
const resolvedModelInfo =
|
|
152
|
+
modelInfo ??
|
|
153
|
+
this.config.modelInfo ??
|
|
154
|
+
this.config.knownModels?.[this.config.modelId];
|
|
155
|
+
const pricing = resolvedModelInfo?.pricing;
|
|
156
|
+
|
|
157
|
+
return (
|
|
158
|
+
resolvedModelInfo?.capabilities?.includes("prompt-cache") === true ||
|
|
159
|
+
this.config.capabilities?.includes("prompt-cache") === true ||
|
|
160
|
+
typeof pricing?.cacheRead === "number" ||
|
|
161
|
+
typeof pricing?.cacheWrite === "number"
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
|
|
149
165
|
protected calculateCost(
|
|
150
166
|
inputTokens: number,
|
|
151
167
|
outputTokens: number,
|
|
152
168
|
cacheReadTokens = 0,
|
|
169
|
+
cacheWriteTokens = 0,
|
|
153
170
|
): number | undefined {
|
|
154
171
|
const pricing = (
|
|
155
172
|
this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
|
|
@@ -159,10 +176,14 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
159
176
|
}
|
|
160
177
|
|
|
161
178
|
return (
|
|
162
|
-
(
|
|
179
|
+
(inputTokens / 1_000_000) * pricing.input +
|
|
163
180
|
(outputTokens / 1_000_000) * pricing.output +
|
|
164
181
|
(cacheReadTokens > 0
|
|
165
182
|
? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
|
|
183
|
+
: 0) +
|
|
184
|
+
(cacheWriteTokens > 0
|
|
185
|
+
? (cacheWriteTokens / 1_000_000) *
|
|
186
|
+
(pricing.cacheWrite ?? pricing.input * 1.25)
|
|
166
187
|
: 0)
|
|
167
188
|
);
|
|
168
189
|
}
|
|
@@ -216,7 +216,7 @@ export class BedrockHandler extends BaseHandler {
|
|
|
216
216
|
|
|
217
217
|
yield {
|
|
218
218
|
type: "usage",
|
|
219
|
-
inputTokens
|
|
219
|
+
inputTokens,
|
|
220
220
|
outputTokens,
|
|
221
221
|
thoughtsTokenCount,
|
|
222
222
|
cacheReadTokens,
|
|
@@ -245,7 +245,7 @@ export class BedrockHandler extends BaseHandler {
|
|
|
245
245
|
|
|
246
246
|
yield {
|
|
247
247
|
type: "usage",
|
|
248
|
-
inputTokens
|
|
248
|
+
inputTokens,
|
|
249
249
|
outputTokens,
|
|
250
250
|
thoughtsTokenCount,
|
|
251
251
|
cacheReadTokens,
|
|
@@ -115,6 +115,39 @@ describe("Community SDK handlers", () => {
|
|
|
115
115
|
expect(usageChunk?.outputTokens).toBe(3);
|
|
116
116
|
});
|
|
117
117
|
|
|
118
|
+
it("keeps cached input tokens separate from total input tokens", async () => {
|
|
119
|
+
streamTextSpy.mockReturnValue({
|
|
120
|
+
fullStream: makeStreamParts([
|
|
121
|
+
{
|
|
122
|
+
type: "finish",
|
|
123
|
+
usage: { inputTokens: 10, outputTokens: 3, cachedInputTokens: 4 },
|
|
124
|
+
},
|
|
125
|
+
]),
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const handler = new ClaudeCodeHandler({
|
|
129
|
+
providerId: "claude-code",
|
|
130
|
+
modelId: "sonnet",
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const chunks: ApiStreamChunk[] = [];
|
|
134
|
+
for await (const chunk of handler.createMessage("System", [
|
|
135
|
+
{ role: "user", content: "Hi" },
|
|
136
|
+
])) {
|
|
137
|
+
chunks.push(chunk);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const usageChunk = chunks.find(
|
|
141
|
+
(chunk): chunk is Extract<ApiStreamChunk, { type: "usage" }> =>
|
|
142
|
+
chunk.type === "usage",
|
|
143
|
+
);
|
|
144
|
+
expect(usageChunk).toMatchObject({
|
|
145
|
+
inputTokens: 10,
|
|
146
|
+
outputTokens: 3,
|
|
147
|
+
cacheReadTokens: 4,
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
118
151
|
it("uses a fallback model id when model is missing", () => {
|
|
119
152
|
const handler = new ClaudeCodeHandler({
|
|
120
153
|
providerId: "claude-code",
|
|
@@ -18,7 +18,6 @@ import {
|
|
|
18
18
|
import {
|
|
19
19
|
type ApiStream,
|
|
20
20
|
type HandlerModelInfo,
|
|
21
|
-
type ModelInfo,
|
|
22
21
|
type ProviderConfig,
|
|
23
22
|
supportsModelThinking,
|
|
24
23
|
} from "../types";
|
|
@@ -258,7 +257,6 @@ export class GeminiHandler extends BaseHandler {
|
|
|
258
257
|
|
|
259
258
|
// Yield final usage
|
|
260
259
|
const totalCost = this.calculateGeminiCost(
|
|
261
|
-
info,
|
|
262
260
|
promptTokens,
|
|
263
261
|
outputTokens,
|
|
264
262
|
thoughtsTokenCount,
|
|
@@ -267,7 +265,7 @@ export class GeminiHandler extends BaseHandler {
|
|
|
267
265
|
|
|
268
266
|
yield {
|
|
269
267
|
type: "usage",
|
|
270
|
-
inputTokens: promptTokens
|
|
268
|
+
inputTokens: promptTokens,
|
|
271
269
|
outputTokens,
|
|
272
270
|
thoughtsTokenCount,
|
|
273
271
|
cacheReadTokens,
|
|
@@ -288,27 +286,16 @@ export class GeminiHandler extends BaseHandler {
|
|
|
288
286
|
}
|
|
289
287
|
|
|
290
288
|
private calculateGeminiCost(
|
|
291
|
-
info: ModelInfo,
|
|
292
289
|
inputTokens: number,
|
|
293
290
|
outputTokens: number,
|
|
294
291
|
thoughtsTokenCount: number,
|
|
295
292
|
cacheReadTokens: number,
|
|
296
293
|
): number | undefined {
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
const uncachedInputTokens = inputTokens - cacheReadTokens;
|
|
303
|
-
const inputCost = pricing.input * (uncachedInputTokens / 1_000_000);
|
|
304
|
-
const outputCost =
|
|
305
|
-
pricing.output * ((outputTokens + thoughtsTokenCount) / 1_000_000);
|
|
306
|
-
const cacheReadCost =
|
|
307
|
-
cacheReadTokens > 0
|
|
308
|
-
? (pricing.cacheRead ?? 0) * (cacheReadTokens / 1_000_000)
|
|
309
|
-
: 0;
|
|
310
|
-
|
|
311
|
-
return inputCost + outputCost + cacheReadCost;
|
|
294
|
+
return this.calculateCost(
|
|
295
|
+
inputTokens,
|
|
296
|
+
outputTokens + thoughtsTokenCount,
|
|
297
|
+
cacheReadTokens,
|
|
298
|
+
);
|
|
312
299
|
}
|
|
313
300
|
}
|
|
314
301
|
|
|
@@ -22,7 +22,6 @@ import type {
|
|
|
22
22
|
ModelInfo,
|
|
23
23
|
ProviderConfig,
|
|
24
24
|
} from "../types";
|
|
25
|
-
import { hasModelCapability } from "../types";
|
|
26
25
|
import type { Message, ToolDefinition } from "../types/messages";
|
|
27
26
|
import { retryStream } from "../utils/retry";
|
|
28
27
|
import { ToolCallProcessor } from "../utils/tool-processor";
|
|
@@ -108,9 +107,7 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
108
107
|
messages: Message[],
|
|
109
108
|
): OpenAI.Chat.ChatCompletionMessageParam[] {
|
|
110
109
|
const model = this.getModel();
|
|
111
|
-
const supportsPromptCache =
|
|
112
|
-
hasModelCapability(model.info, "prompt-cache") ||
|
|
113
|
-
this.config.capabilities?.includes("prompt-cache") === true;
|
|
110
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
114
111
|
const systemMessage = supportsPromptCache
|
|
115
112
|
? ({
|
|
116
113
|
role: "system",
|
|
@@ -156,7 +153,8 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
156
153
|
const openAiMessages = this.getMessages(systemPrompt, messages);
|
|
157
154
|
|
|
158
155
|
// Build request options
|
|
159
|
-
const requestOptions:
|
|
156
|
+
const requestOptions: Record<string, unknown> &
|
|
157
|
+
OpenAI.ChatCompletionCreateParamsStreaming = {
|
|
160
158
|
model: modelId,
|
|
161
159
|
messages: openAiMessages,
|
|
162
160
|
stream: true,
|
|
@@ -167,6 +165,17 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
167
165
|
}),
|
|
168
166
|
};
|
|
169
167
|
|
|
168
|
+
// Add top-level cache_control for OpenRouter with Anthropic models.
|
|
169
|
+
// This enables automatic caching where the cache breakpoint advances
|
|
170
|
+
// as the conversation grows, rather than relying on explicit per-block
|
|
171
|
+
// breakpoints which are limited to 4.
|
|
172
|
+
if (
|
|
173
|
+
this.config.providerId === "openrouter" &&
|
|
174
|
+
modelId.startsWith("anthropic/")
|
|
175
|
+
) {
|
|
176
|
+
requestOptions.cache_control = { type: "ephemeral" };
|
|
177
|
+
}
|
|
178
|
+
|
|
170
179
|
// Add max tokens if configured
|
|
171
180
|
const maxTokens = modelInfo.maxTokens ?? this.config.maxOutputTokens;
|
|
172
181
|
if (maxTokens) {
|
|
@@ -286,15 +295,16 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
286
295
|
cached_tokens?: number;
|
|
287
296
|
cache_write_tokens?: number;
|
|
288
297
|
};
|
|
289
|
-
prompt_cache_miss_tokens?: number;
|
|
290
298
|
cache_creation_input_tokens?: number;
|
|
291
299
|
cache_read_input_tokens?: number;
|
|
292
300
|
};
|
|
293
301
|
const cacheReadTokens =
|
|
294
|
-
usageWithCache.prompt_tokens_details?.cached_tokens ??
|
|
302
|
+
usageWithCache.prompt_tokens_details?.cached_tokens ??
|
|
303
|
+
usageWithCache.cache_read_input_tokens ??
|
|
304
|
+
0;
|
|
295
305
|
const cacheWriteTokens =
|
|
296
306
|
usageWithCache.prompt_tokens_details?.cache_write_tokens ??
|
|
297
|
-
usageWithCache.
|
|
307
|
+
usageWithCache.cache_creation_input_tokens ??
|
|
298
308
|
0;
|
|
299
309
|
|
|
300
310
|
yield {
|
|
@@ -307,6 +317,7 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
307
317
|
inputTokens,
|
|
308
318
|
outputTokens,
|
|
309
319
|
cacheReadTokens,
|
|
320
|
+
cacheWriteTokens,
|
|
310
321
|
),
|
|
311
322
|
id: responseId,
|
|
312
323
|
};
|
|
@@ -210,4 +210,50 @@ describe("OpenAIResponsesHandler", () => {
|
|
|
210
210
|
},
|
|
211
211
|
});
|
|
212
212
|
});
|
|
213
|
+
|
|
214
|
+
it("keeps cached input tokens separate in usage chunks", () => {
|
|
215
|
+
const handler = new TestOpenAIResponsesHandler({
|
|
216
|
+
providerId: "openai-native",
|
|
217
|
+
modelId: "gpt-5.4",
|
|
218
|
+
apiKey: "test-key",
|
|
219
|
+
baseUrl: "https://example.com",
|
|
220
|
+
modelInfo: {
|
|
221
|
+
id: "gpt-5.4",
|
|
222
|
+
pricing: {
|
|
223
|
+
input: 1,
|
|
224
|
+
output: 2,
|
|
225
|
+
cacheRead: 0.5,
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
const chunks = handler.processChunkForTest({
|
|
231
|
+
type: "response.completed",
|
|
232
|
+
response: {
|
|
233
|
+
id: "resp_usage",
|
|
234
|
+
usage: {
|
|
235
|
+
input_tokens: 100,
|
|
236
|
+
output_tokens: 40,
|
|
237
|
+
input_tokens_details: {
|
|
238
|
+
cached_tokens: 25,
|
|
239
|
+
},
|
|
240
|
+
output_tokens_details: {
|
|
241
|
+
reasoning_tokens: 10,
|
|
242
|
+
},
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
expect(chunks[0]).toMatchObject({
|
|
248
|
+
type: "usage",
|
|
249
|
+
inputTokens: 100,
|
|
250
|
+
outputTokens: 40,
|
|
251
|
+
cacheReadTokens: 25,
|
|
252
|
+
cacheWriteTokens: 0,
|
|
253
|
+
});
|
|
254
|
+
expect(chunks[0]?.type).toBe("usage");
|
|
255
|
+
if (chunks[0]?.type === "usage") {
|
|
256
|
+
expect(chunks[0].totalCost).toBeCloseTo(0.0001925, 10);
|
|
257
|
+
}
|
|
258
|
+
});
|
|
213
259
|
});
|
|
@@ -565,23 +565,19 @@ export class OpenAIResponsesHandler extends BaseHandler {
|
|
|
565
565
|
const inputTokens = usage.input_tokens || 0;
|
|
566
566
|
const outputTokens = usage.output_tokens || 0;
|
|
567
567
|
const cacheReadTokens =
|
|
568
|
-
usage.output_tokens_details?.reasoning_tokens || 0;
|
|
569
|
-
const cacheWriteTokens =
|
|
570
568
|
usage.input_tokens_details?.cached_tokens || 0;
|
|
569
|
+
const cacheWriteTokens = 0;
|
|
571
570
|
|
|
572
571
|
const totalCost = this.calculateCost(
|
|
573
572
|
inputTokens,
|
|
574
573
|
outputTokens,
|
|
575
574
|
cacheReadTokens,
|
|
576
|
-
|
|
577
|
-
const nonCachedInputTokens = Math.max(
|
|
578
|
-
0,
|
|
579
|
-
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
575
|
+
cacheWriteTokens,
|
|
580
576
|
);
|
|
581
577
|
|
|
582
578
|
yield {
|
|
583
579
|
type: "usage",
|
|
584
|
-
inputTokens
|
|
580
|
+
inputTokens,
|
|
585
581
|
outputTokens,
|
|
586
582
|
cacheWriteTokens,
|
|
587
583
|
cacheReadTokens,
|
|
@@ -255,19 +255,18 @@ export class R1BaseHandler extends BaseHandler {
|
|
|
255
255
|
const cacheReadTokens = r1Usage.prompt_cache_hit_tokens ?? 0;
|
|
256
256
|
const cacheWriteTokens = r1Usage.prompt_cache_miss_tokens ?? 0;
|
|
257
257
|
|
|
258
|
-
// Calculate non-cached input tokens (will always be 0 for DeepSeek since input = read + write)
|
|
259
|
-
const nonCachedInputTokens = Math.max(
|
|
260
|
-
0,
|
|
261
|
-
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
262
|
-
);
|
|
263
|
-
|
|
264
258
|
yield {
|
|
265
259
|
type: "usage",
|
|
266
|
-
inputTokens
|
|
260
|
+
inputTokens,
|
|
267
261
|
outputTokens,
|
|
268
262
|
cacheReadTokens,
|
|
269
263
|
cacheWriteTokens,
|
|
270
|
-
totalCost: this.calculateCost(
|
|
264
|
+
totalCost: this.calculateCost(
|
|
265
|
+
inputTokens,
|
|
266
|
+
outputTokens,
|
|
267
|
+
cacheReadTokens,
|
|
268
|
+
cacheWriteTokens,
|
|
269
|
+
),
|
|
271
270
|
id: responseId,
|
|
272
271
|
};
|
|
273
272
|
}
|
|
@@ -189,7 +189,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
189
189
|
if (!isClaudeModel(model.id)) {
|
|
190
190
|
return this.ensureGeminiHandler().getMessages(systemPrompt, messages);
|
|
191
191
|
}
|
|
192
|
-
const supportsPromptCache =
|
|
192
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
193
193
|
return convertToAnthropicMessages(messages, supportsPromptCache);
|
|
194
194
|
}
|
|
195
195
|
|
|
@@ -226,7 +226,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
226
226
|
const budgetTokens = this.config.thinkingBudgetTokens ?? 0;
|
|
227
227
|
const reasoningOn =
|
|
228
228
|
hasModelCapability(model.info, "reasoning") && budgetTokens > 0;
|
|
229
|
-
const promptCacheOn =
|
|
229
|
+
const promptCacheOn = this.supportsPromptCache(model.info);
|
|
230
230
|
|
|
231
231
|
const providerOptions: Record<string, unknown> = {};
|
|
232
232
|
if (reasoningOn) {
|
|
@@ -251,8 +251,18 @@ export class VertexHandler extends BaseHandler {
|
|
|
251
251
|
yield* emitAiSdkStream(stream, {
|
|
252
252
|
responseId,
|
|
253
253
|
errorMessage: "Vertex Anthropic stream failed",
|
|
254
|
-
calculateCost: (
|
|
255
|
-
|
|
254
|
+
calculateCost: (
|
|
255
|
+
inputTokens,
|
|
256
|
+
outputTokens,
|
|
257
|
+
cacheReadTokens,
|
|
258
|
+
cacheWriteTokens,
|
|
259
|
+
) =>
|
|
260
|
+
this.calculateCost(
|
|
261
|
+
inputTokens,
|
|
262
|
+
outputTokens,
|
|
263
|
+
cacheReadTokens,
|
|
264
|
+
cacheWriteTokens,
|
|
265
|
+
),
|
|
256
266
|
reasoningTypes: ["reasoning-delta"],
|
|
257
267
|
enableToolCalls: true,
|
|
258
268
|
toolCallArgsOrder: ["input", "args"],
|