@clinebot/llms 0.0.7 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.browser.d.ts +2 -2
- package/dist/index.browser.js +40 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +12 -12
- package/dist/providers/handlers/ai-sdk-community.d.ts +1 -1
- package/dist/providers/handlers/base.d.ts +5 -29
- package/dist/providers/transform/openai-format.d.ts +1 -1
- package/dist/providers/types/config.d.ts +6 -0
- package/dist/providers/types/stream.d.ts +1 -1
- package/package.json +2 -1
- package/src/index.browser.ts +2 -2
- package/src/index.ts +2 -2
- package/src/models/providers/vercel-ai-gateway.ts +1 -1
- package/src/providers/handlers/ai-sdk-community.ts +5 -8
- package/src/providers/handlers/ai-sdk-provider-base.ts +12 -2
- package/src/providers/handlers/anthropic-base.test.ts +30 -0
- package/src/providers/handlers/anthropic-base.ts +43 -30
- package/src/providers/handlers/base.test.ts +68 -3
- package/src/providers/handlers/base.ts +104 -54
- package/src/providers/handlers/bedrock-base.ts +3 -3
- package/src/providers/handlers/community-sdk.test.ts +33 -0
- package/src/providers/handlers/gemini-base.test.ts +40 -0
- package/src/providers/handlers/gemini-base.ts +22 -20
- package/src/providers/handlers/openai-base.ts +67 -12
- package/src/providers/handlers/openai-responses.test.ts +46 -0
- package/src/providers/handlers/openai-responses.ts +3 -7
- package/src/providers/handlers/r1-base.ts +7 -8
- package/src/providers/handlers/vertex.ts +15 -5
- package/src/providers/transform/anthropic-format.ts +14 -2
- package/src/providers/transform/format-conversion.test.ts +49 -0
- package/src/providers/transform/openai-format.ts +50 -7
- package/src/providers/types/config.ts +8 -0
- package/src/providers/types/stream.ts +1 -1
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
ApiStream,
|
|
11
11
|
ApiStreamUsageChunk,
|
|
12
12
|
HandlerModelInfo,
|
|
13
|
+
ModelInfo,
|
|
13
14
|
ProviderConfig,
|
|
14
15
|
} from "../types";
|
|
15
16
|
import type { Message, ToolDefinition } from "../types/messages";
|
|
@@ -22,37 +23,44 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
|
|
|
22
23
|
"X-CLIENT-TYPE": "cline-sdk",
|
|
23
24
|
};
|
|
24
25
|
|
|
26
|
+
const controllerIds = new WeakMap<AbortController, string>();
|
|
27
|
+
let controllerIdCounter = 0;
|
|
28
|
+
|
|
29
|
+
function getControllerId(controller: AbortController): string {
|
|
30
|
+
let id = controllerIds.get(controller);
|
|
31
|
+
if (!id) {
|
|
32
|
+
id = `abort_${++controllerIdCounter}`;
|
|
33
|
+
controllerIds.set(controller, id);
|
|
34
|
+
}
|
|
35
|
+
return id;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function serializeAbortReason(reason: unknown): unknown {
|
|
39
|
+
return reason instanceof Error
|
|
40
|
+
? { name: reason.name, message: reason.message }
|
|
41
|
+
: reason;
|
|
42
|
+
}
|
|
43
|
+
|
|
25
44
|
/**
|
|
26
45
|
* Base handler class with common functionality
|
|
27
46
|
*/
|
|
28
47
|
export abstract class BaseHandler implements ApiHandler {
|
|
29
48
|
protected config: ProviderConfig;
|
|
30
49
|
protected abortController: AbortController | undefined;
|
|
50
|
+
private abortSignalSequence = 0;
|
|
31
51
|
|
|
32
52
|
constructor(config: ProviderConfig) {
|
|
33
53
|
this.config = config;
|
|
34
54
|
}
|
|
35
55
|
|
|
36
|
-
/**
|
|
37
|
-
* Convert Cline messages to provider-specific format
|
|
38
|
-
* Must be implemented by subclasses
|
|
39
|
-
*/
|
|
40
56
|
abstract getMessages(systemPrompt: string, messages: Message[]): unknown;
|
|
41
57
|
|
|
42
|
-
/**
|
|
43
|
-
* Create a streaming message completion
|
|
44
|
-
* Must be implemented by subclasses
|
|
45
|
-
*/
|
|
46
58
|
abstract createMessage(
|
|
47
59
|
systemPrompt: string,
|
|
48
60
|
messages: Message[],
|
|
49
61
|
tools?: ToolDefinition[],
|
|
50
62
|
): ApiStream;
|
|
51
63
|
|
|
52
|
-
/**
|
|
53
|
-
* Get the current model configuration
|
|
54
|
-
* Can be overridden by subclasses for provider-specific logic
|
|
55
|
-
*/
|
|
56
64
|
getModel(): HandlerModelInfo {
|
|
57
65
|
const modelId = this.config.modelId;
|
|
58
66
|
return {
|
|
@@ -61,43 +69,55 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
61
69
|
};
|
|
62
70
|
}
|
|
63
71
|
|
|
64
|
-
/**
|
|
65
|
-
* Get usage information (optional)
|
|
66
|
-
* Override in subclasses that support this
|
|
67
|
-
*/
|
|
68
72
|
async getApiStreamUsage(): Promise<ApiStreamUsageChunk | undefined> {
|
|
69
73
|
return undefined;
|
|
70
74
|
}
|
|
71
75
|
|
|
72
|
-
/**
|
|
73
|
-
* Get the abort signal for the current request
|
|
74
|
-
* Creates a new AbortController if one doesn't exist or was already aborted
|
|
75
|
-
* Combines with config.abortSignal if provided
|
|
76
|
-
*/
|
|
77
76
|
protected getAbortSignal(): AbortSignal {
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
77
|
+
const controller = new AbortController();
|
|
78
|
+
this.abortController = controller;
|
|
79
|
+
controller.signal.addEventListener(
|
|
80
|
+
"abort",
|
|
81
|
+
() => {
|
|
82
|
+
if (this.abortController === controller) {
|
|
83
|
+
this.abortController = undefined;
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
{ once: true },
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
const configSignal = this.config.abortSignal;
|
|
90
|
+
if (configSignal) {
|
|
86
91
|
if (configSignal.aborted) {
|
|
87
|
-
this.
|
|
92
|
+
this.logAbort("debug", "Provider request inherited aborted signal", {
|
|
93
|
+
controllerId: getControllerId(controller),
|
|
94
|
+
reason: serializeAbortReason(configSignal.reason),
|
|
95
|
+
});
|
|
96
|
+
controller.abort(configSignal.reason);
|
|
88
97
|
} else {
|
|
89
|
-
|
|
90
|
-
|
|
98
|
+
const signalId = ++this.abortSignalSequence;
|
|
99
|
+
configSignal.addEventListener(
|
|
100
|
+
"abort",
|
|
101
|
+
() => {
|
|
102
|
+
this.logAbort("warn", "Provider request abort signal fired", {
|
|
103
|
+
controllerId: getControllerId(controller),
|
|
104
|
+
signalId,
|
|
105
|
+
reason: serializeAbortReason(configSignal.reason),
|
|
106
|
+
});
|
|
107
|
+
controller.abort(configSignal.reason);
|
|
108
|
+
},
|
|
109
|
+
{ once: true },
|
|
110
|
+
);
|
|
111
|
+
this.logAbort("debug", "Provider request attached abort signal", {
|
|
112
|
+
controllerId: getControllerId(controller),
|
|
113
|
+
signalId,
|
|
91
114
|
});
|
|
92
115
|
}
|
|
93
116
|
}
|
|
94
117
|
|
|
95
|
-
return
|
|
118
|
+
return controller.signal;
|
|
96
119
|
}
|
|
97
120
|
|
|
98
|
-
/**
|
|
99
|
-
* Abort the current request
|
|
100
|
-
*/
|
|
101
121
|
abort(): void {
|
|
102
122
|
this.abortController?.abort();
|
|
103
123
|
}
|
|
@@ -105,37 +125,67 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
105
125
|
setAbortSignal(signal: AbortSignal | undefined): void {
|
|
106
126
|
this.config.abortSignal = signal;
|
|
107
127
|
if (signal?.aborted) {
|
|
128
|
+
this.logAbort("debug", "Provider handler received pre-aborted signal", {
|
|
129
|
+
controllerId: this.abortController
|
|
130
|
+
? getControllerId(this.abortController)
|
|
131
|
+
: undefined,
|
|
132
|
+
reason: serializeAbortReason(signal.reason),
|
|
133
|
+
});
|
|
108
134
|
this.abortController?.abort(signal.reason);
|
|
109
135
|
}
|
|
110
136
|
}
|
|
111
137
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
138
|
+
private logAbort(
|
|
139
|
+
level: "debug" | "warn",
|
|
140
|
+
message: string,
|
|
141
|
+
metadata?: Record<string, unknown>,
|
|
142
|
+
): void {
|
|
143
|
+
this.config.logger?.[level]?.(message, {
|
|
144
|
+
providerId: this.config.providerId,
|
|
145
|
+
modelId: this.config.modelId,
|
|
146
|
+
...metadata,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
protected supportsPromptCache(modelInfo?: ModelInfo): boolean {
|
|
151
|
+
const resolvedModelInfo =
|
|
152
|
+
modelInfo ??
|
|
153
|
+
this.config.modelInfo ??
|
|
154
|
+
this.config.knownModels?.[this.config.modelId];
|
|
155
|
+
const pricing = resolvedModelInfo?.pricing;
|
|
156
|
+
|
|
157
|
+
return (
|
|
158
|
+
resolvedModelInfo?.capabilities?.includes("prompt-cache") === true ||
|
|
159
|
+
this.config.capabilities?.includes("prompt-cache") === true ||
|
|
160
|
+
typeof pricing?.cacheRead === "number" ||
|
|
161
|
+
typeof pricing?.cacheWrite === "number"
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
|
|
115
165
|
protected calculateCost(
|
|
116
166
|
inputTokens: number,
|
|
117
167
|
outputTokens: number,
|
|
118
168
|
cacheReadTokens = 0,
|
|
169
|
+
cacheWriteTokens = 0,
|
|
119
170
|
): number | undefined {
|
|
120
|
-
const
|
|
121
|
-
this.config.modelInfo ??
|
|
122
|
-
|
|
123
|
-
? this.config.knownModels?.[this.config.modelId]
|
|
124
|
-
: undefined);
|
|
125
|
-
const pricing = modelPricingSource?.pricing;
|
|
171
|
+
const pricing = (
|
|
172
|
+
this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
|
|
173
|
+
)?.pricing;
|
|
126
174
|
if (!pricing?.input || !pricing?.output) {
|
|
127
175
|
return undefined;
|
|
128
176
|
}
|
|
129
177
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
cacheReadTokens > 0
|
|
178
|
+
return (
|
|
179
|
+
(inputTokens / 1_000_000) * pricing.input +
|
|
180
|
+
(outputTokens / 1_000_000) * pricing.output +
|
|
181
|
+
(cacheReadTokens > 0
|
|
135
182
|
? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
|
|
136
|
-
: 0
|
|
137
|
-
|
|
138
|
-
|
|
183
|
+
: 0) +
|
|
184
|
+
(cacheWriteTokens > 0
|
|
185
|
+
? (cacheWriteTokens / 1_000_000) *
|
|
186
|
+
(pricing.cacheWrite ?? pricing.input * 1.25)
|
|
187
|
+
: 0)
|
|
188
|
+
);
|
|
139
189
|
}
|
|
140
190
|
|
|
141
191
|
protected createResponseId(): string {
|
|
@@ -154,7 +204,7 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
154
204
|
responseId: string,
|
|
155
205
|
): Generator<ApiStreamChunk> {
|
|
156
206
|
for (const chunk of chunks) {
|
|
157
|
-
yield
|
|
207
|
+
yield { ...chunk, id: responseId };
|
|
158
208
|
}
|
|
159
209
|
}
|
|
160
210
|
|
|
@@ -143,7 +143,7 @@ export class BedrockHandler extends BaseHandler {
|
|
|
143
143
|
model: factory(modelId),
|
|
144
144
|
messages: this.getMessages(systemPrompt, messages),
|
|
145
145
|
tools: toAiSdkTools(tools),
|
|
146
|
-
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ??
|
|
146
|
+
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
147
147
|
temperature: reasoningEnabled ? undefined : (model.info.temperature ?? 0),
|
|
148
148
|
providerOptions:
|
|
149
149
|
Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
|
|
@@ -216,7 +216,7 @@ export class BedrockHandler extends BaseHandler {
|
|
|
216
216
|
|
|
217
217
|
yield {
|
|
218
218
|
type: "usage",
|
|
219
|
-
inputTokens
|
|
219
|
+
inputTokens,
|
|
220
220
|
outputTokens,
|
|
221
221
|
thoughtsTokenCount,
|
|
222
222
|
cacheReadTokens,
|
|
@@ -245,7 +245,7 @@ export class BedrockHandler extends BaseHandler {
|
|
|
245
245
|
|
|
246
246
|
yield {
|
|
247
247
|
type: "usage",
|
|
248
|
-
inputTokens
|
|
248
|
+
inputTokens,
|
|
249
249
|
outputTokens,
|
|
250
250
|
thoughtsTokenCount,
|
|
251
251
|
cacheReadTokens,
|
|
@@ -115,6 +115,39 @@ describe("Community SDK handlers", () => {
|
|
|
115
115
|
expect(usageChunk?.outputTokens).toBe(3);
|
|
116
116
|
});
|
|
117
117
|
|
|
118
|
+
it("keeps cached input tokens separate from total input tokens", async () => {
|
|
119
|
+
streamTextSpy.mockReturnValue({
|
|
120
|
+
fullStream: makeStreamParts([
|
|
121
|
+
{
|
|
122
|
+
type: "finish",
|
|
123
|
+
usage: { inputTokens: 10, outputTokens: 3, cachedInputTokens: 4 },
|
|
124
|
+
},
|
|
125
|
+
]),
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const handler = new ClaudeCodeHandler({
|
|
129
|
+
providerId: "claude-code",
|
|
130
|
+
modelId: "sonnet",
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const chunks: ApiStreamChunk[] = [];
|
|
134
|
+
for await (const chunk of handler.createMessage("System", [
|
|
135
|
+
{ role: "user", content: "Hi" },
|
|
136
|
+
])) {
|
|
137
|
+
chunks.push(chunk);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const usageChunk = chunks.find(
|
|
141
|
+
(chunk): chunk is Extract<ApiStreamChunk, { type: "usage" }> =>
|
|
142
|
+
chunk.type === "usage",
|
|
143
|
+
);
|
|
144
|
+
expect(usageChunk).toMatchObject({
|
|
145
|
+
inputTokens: 10,
|
|
146
|
+
outputTokens: 3,
|
|
147
|
+
cacheReadTokens: 4,
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
118
151
|
it("uses a fallback model id when model is missing", () => {
|
|
119
152
|
const handler = new ClaudeCodeHandler({
|
|
120
153
|
providerId: "claude-code",
|
|
@@ -218,4 +218,44 @@ describe("GeminiHandler", () => {
|
|
|
218
218
|
expect(secondId).toBeTruthy();
|
|
219
219
|
expect(firstId).not.toBe(secondId);
|
|
220
220
|
});
|
|
221
|
+
|
|
222
|
+
it("defaults maxOutputTokens to 8192 for gemini-3-flash when no model or config limit is provided", async () => {
|
|
223
|
+
generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
|
|
224
|
+
|
|
225
|
+
const handler = new GeminiHandler({
|
|
226
|
+
providerId: "gemini",
|
|
227
|
+
modelId: "gemini-3-flash",
|
|
228
|
+
apiKey: "test-key",
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
await collectChunks(
|
|
232
|
+
handler.createMessage("System", [{ role: "user", content: "go" }]),
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
|
|
236
|
+
const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
|
|
237
|
+
config?: { maxOutputTokens?: number };
|
|
238
|
+
};
|
|
239
|
+
expect(request.config?.maxOutputTokens).toBe(8192);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
it("defaults maxOutputTokens to 128000 for non gemini-3-flash models when no model or config limit is provided", async () => {
|
|
243
|
+
generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
|
|
244
|
+
|
|
245
|
+
const handler = new GeminiHandler({
|
|
246
|
+
providerId: "gemini",
|
|
247
|
+
modelId: "gemini-2.5-flash",
|
|
248
|
+
apiKey: "test-key",
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
await collectChunks(
|
|
252
|
+
handler.createMessage("System", [{ role: "user", content: "go" }]),
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
|
|
256
|
+
const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
|
|
257
|
+
config?: { maxOutputTokens?: number };
|
|
258
|
+
};
|
|
259
|
+
expect(request.config?.maxOutputTokens).toBe(128000);
|
|
260
|
+
});
|
|
221
261
|
});
|
|
@@ -18,7 +18,6 @@ import {
|
|
|
18
18
|
import {
|
|
19
19
|
type ApiStream,
|
|
20
20
|
type HandlerModelInfo,
|
|
21
|
-
type ModelInfo,
|
|
22
21
|
type ProviderConfig,
|
|
23
22
|
supportsModelThinking,
|
|
24
23
|
} from "../types";
|
|
@@ -27,6 +26,16 @@ import { RetriableError, retryStream } from "../utils/retry";
|
|
|
27
26
|
import { BaseHandler } from "./base";
|
|
28
27
|
|
|
29
28
|
const DEFAULT_THINKING_BUDGET_TOKENS = 1024;
|
|
29
|
+
const DEFAULT_MAX_OUTPUT_TOKENS = 128_000;
|
|
30
|
+
const GEMINI_3_FLASH_MAX_OUTPUT_TOKENS = 8192;
|
|
31
|
+
|
|
32
|
+
function isGemini3FlashModel(modelId: string): boolean {
|
|
33
|
+
const normalized = modelId.toLowerCase();
|
|
34
|
+
return (
|
|
35
|
+
normalized.includes("gemini-3-flash") ||
|
|
36
|
+
normalized.includes("gemini-3.0-flash")
|
|
37
|
+
);
|
|
38
|
+
}
|
|
30
39
|
|
|
31
40
|
/**
|
|
32
41
|
* Handler for Google's Gemini API
|
|
@@ -131,6 +140,11 @@ export class GeminiHandler extends BaseHandler {
|
|
|
131
140
|
}
|
|
132
141
|
|
|
133
142
|
// Build request config with abort signal
|
|
143
|
+
const fallbackMaxOutputTokens = isGemini3FlashModel(modelId)
|
|
144
|
+
? GEMINI_3_FLASH_MAX_OUTPUT_TOKENS
|
|
145
|
+
: DEFAULT_MAX_OUTPUT_TOKENS;
|
|
146
|
+
const maxOutputTokens =
|
|
147
|
+
info.maxTokens ?? this.config.maxOutputTokens ?? fallbackMaxOutputTokens;
|
|
134
148
|
const requestConfig: GenerateContentConfig = {
|
|
135
149
|
httpOptions: this.config.baseUrl
|
|
136
150
|
? { baseUrl: this.config.baseUrl, headers: this.getRequestHeaders() }
|
|
@@ -138,7 +152,7 @@ export class GeminiHandler extends BaseHandler {
|
|
|
138
152
|
abortSignal,
|
|
139
153
|
systemInstruction: systemPrompt,
|
|
140
154
|
temperature: info.temperature ?? 1,
|
|
141
|
-
maxOutputTokens
|
|
155
|
+
maxOutputTokens,
|
|
142
156
|
};
|
|
143
157
|
|
|
144
158
|
// Add thinking config only when explicitly requested and supported.
|
|
@@ -243,7 +257,6 @@ export class GeminiHandler extends BaseHandler {
|
|
|
243
257
|
|
|
244
258
|
// Yield final usage
|
|
245
259
|
const totalCost = this.calculateGeminiCost(
|
|
246
|
-
info,
|
|
247
260
|
promptTokens,
|
|
248
261
|
outputTokens,
|
|
249
262
|
thoughtsTokenCount,
|
|
@@ -252,7 +265,7 @@ export class GeminiHandler extends BaseHandler {
|
|
|
252
265
|
|
|
253
266
|
yield {
|
|
254
267
|
type: "usage",
|
|
255
|
-
inputTokens: promptTokens
|
|
268
|
+
inputTokens: promptTokens,
|
|
256
269
|
outputTokens,
|
|
257
270
|
thoughtsTokenCount,
|
|
258
271
|
cacheReadTokens,
|
|
@@ -273,27 +286,16 @@ export class GeminiHandler extends BaseHandler {
|
|
|
273
286
|
}
|
|
274
287
|
|
|
275
288
|
private calculateGeminiCost(
|
|
276
|
-
info: ModelInfo,
|
|
277
289
|
inputTokens: number,
|
|
278
290
|
outputTokens: number,
|
|
279
291
|
thoughtsTokenCount: number,
|
|
280
292
|
cacheReadTokens: number,
|
|
281
293
|
): number | undefined {
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
const uncachedInputTokens = inputTokens - cacheReadTokens;
|
|
288
|
-
const inputCost = pricing.input * (uncachedInputTokens / 1_000_000);
|
|
289
|
-
const outputCost =
|
|
290
|
-
pricing.output * ((outputTokens + thoughtsTokenCount) / 1_000_000);
|
|
291
|
-
const cacheReadCost =
|
|
292
|
-
cacheReadTokens > 0
|
|
293
|
-
? (pricing.cacheRead ?? 0) * (cacheReadTokens / 1_000_000)
|
|
294
|
-
: 0;
|
|
295
|
-
|
|
296
|
-
return inputCost + outputCost + cacheReadCost;
|
|
294
|
+
return this.calculateCost(
|
|
295
|
+
inputTokens,
|
|
296
|
+
outputTokens + thoughtsTokenCount,
|
|
297
|
+
cacheReadTokens,
|
|
298
|
+
);
|
|
297
299
|
}
|
|
298
300
|
}
|
|
299
301
|
|
|
@@ -106,9 +106,24 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
106
106
|
systemPrompt: string,
|
|
107
107
|
messages: Message[],
|
|
108
108
|
): OpenAI.Chat.ChatCompletionMessageParam[] {
|
|
109
|
+
const model = this.getModel();
|
|
110
|
+
const supportsPromptCache = this.supportsPromptCache(model.info);
|
|
111
|
+
const systemMessage = supportsPromptCache
|
|
112
|
+
? ({
|
|
113
|
+
role: "system",
|
|
114
|
+
content: [
|
|
115
|
+
{
|
|
116
|
+
type: "text",
|
|
117
|
+
text: systemPrompt,
|
|
118
|
+
cache_control: { type: "ephemeral" },
|
|
119
|
+
},
|
|
120
|
+
],
|
|
121
|
+
} as unknown as OpenAI.Chat.ChatCompletionMessageParam)
|
|
122
|
+
: { role: "system" as const, content: systemPrompt };
|
|
123
|
+
|
|
109
124
|
return [
|
|
110
|
-
|
|
111
|
-
...convertToOpenAIMessages(messages),
|
|
125
|
+
systemMessage,
|
|
126
|
+
...convertToOpenAIMessages(messages, supportsPromptCache),
|
|
112
127
|
];
|
|
113
128
|
}
|
|
114
129
|
|
|
@@ -138,7 +153,8 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
138
153
|
const openAiMessages = this.getMessages(systemPrompt, messages);
|
|
139
154
|
|
|
140
155
|
// Build request options
|
|
141
|
-
const requestOptions:
|
|
156
|
+
const requestOptions: Record<string, unknown> &
|
|
157
|
+
OpenAI.ChatCompletionCreateParamsStreaming = {
|
|
142
158
|
model: modelId,
|
|
143
159
|
messages: openAiMessages,
|
|
144
160
|
stream: true,
|
|
@@ -149,6 +165,17 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
149
165
|
}),
|
|
150
166
|
};
|
|
151
167
|
|
|
168
|
+
// Add top-level cache_control for OpenRouter with Anthropic models.
|
|
169
|
+
// This enables automatic caching where the cache breakpoint advances
|
|
170
|
+
// as the conversation grows, rather than relying on explicit per-block
|
|
171
|
+
// breakpoints which are limited to 4.
|
|
172
|
+
if (
|
|
173
|
+
this.config.providerId === "openrouter" &&
|
|
174
|
+
modelId.startsWith("anthropic/")
|
|
175
|
+
) {
|
|
176
|
+
requestOptions.cache_control = { type: "ephemeral" };
|
|
177
|
+
}
|
|
178
|
+
|
|
152
179
|
// Add max tokens if configured
|
|
153
180
|
const maxTokens = modelInfo.maxTokens ?? this.config.maxOutputTokens;
|
|
154
181
|
if (maxTokens) {
|
|
@@ -171,7 +198,11 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
171
198
|
this.config.reasoningEffort ??
|
|
172
199
|
(this.config.thinking ? DEFAULT_REASONING_EFFORT : undefined);
|
|
173
200
|
if (supportsReasoningEffort && effectiveReasoningEffort) {
|
|
174
|
-
(
|
|
201
|
+
(
|
|
202
|
+
requestOptions as OpenAI.ChatCompletionCreateParamsStreaming & {
|
|
203
|
+
reasoning_effort?: string;
|
|
204
|
+
}
|
|
205
|
+
).reasoning_effort = effectiveReasoningEffort;
|
|
175
206
|
}
|
|
176
207
|
|
|
177
208
|
const requestHeaders = this.getRequestHeaders();
|
|
@@ -191,16 +222,25 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
191
222
|
headers: requestHeaders,
|
|
192
223
|
});
|
|
193
224
|
const toolCallProcessor = new ToolCallProcessor();
|
|
225
|
+
let finishReason: string | null = null;
|
|
194
226
|
|
|
195
227
|
for await (const chunk of stream) {
|
|
228
|
+
const choice = chunk.choices?.[0];
|
|
229
|
+
if (choice?.finish_reason) {
|
|
230
|
+
finishReason = choice.finish_reason;
|
|
231
|
+
}
|
|
196
232
|
yield* this.withResponseIdForAll(
|
|
197
233
|
this.processChunk(chunk, toolCallProcessor, modelInfo, responseId),
|
|
198
234
|
responseId,
|
|
199
235
|
);
|
|
200
236
|
}
|
|
201
237
|
|
|
202
|
-
|
|
203
|
-
|
|
238
|
+
yield {
|
|
239
|
+
type: "done",
|
|
240
|
+
success: true,
|
|
241
|
+
id: responseId,
|
|
242
|
+
incompleteReason: finishReason === "length" ? "max_tokens" : undefined,
|
|
243
|
+
};
|
|
204
244
|
}
|
|
205
245
|
|
|
206
246
|
/**
|
|
@@ -213,9 +253,11 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
213
253
|
_modelInfo: ModelInfo,
|
|
214
254
|
responseId: string,
|
|
215
255
|
): Generator<import("../types").ApiStreamChunk> {
|
|
216
|
-
const
|
|
217
|
-
|
|
218
|
-
|
|
256
|
+
const rawDelta = chunk.choices?.[0]?.delta;
|
|
257
|
+
const delta = rawDelta && {
|
|
258
|
+
...rawDelta,
|
|
259
|
+
reasoning_content: (rawDelta as { reasoning_content?: string })
|
|
260
|
+
.reasoning_content,
|
|
219
261
|
};
|
|
220
262
|
|
|
221
263
|
// Handle text content
|
|
@@ -227,7 +269,7 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
227
269
|
if (delta?.reasoning_content) {
|
|
228
270
|
yield {
|
|
229
271
|
type: "reasoning",
|
|
230
|
-
reasoning:
|
|
272
|
+
reasoning: delta.reasoning_content,
|
|
231
273
|
id: responseId,
|
|
232
274
|
};
|
|
233
275
|
}
|
|
@@ -248,10 +290,22 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
248
290
|
if (chunk.usage) {
|
|
249
291
|
const inputTokens = chunk.usage.prompt_tokens ?? 0;
|
|
250
292
|
const outputTokens = chunk.usage.completion_tokens ?? 0;
|
|
293
|
+
const usageWithCache = chunk.usage as typeof chunk.usage & {
|
|
294
|
+
prompt_tokens_details?: {
|
|
295
|
+
cached_tokens?: number;
|
|
296
|
+
cache_write_tokens?: number;
|
|
297
|
+
};
|
|
298
|
+
cache_creation_input_tokens?: number;
|
|
299
|
+
cache_read_input_tokens?: number;
|
|
300
|
+
};
|
|
251
301
|
const cacheReadTokens =
|
|
252
|
-
|
|
302
|
+
usageWithCache.prompt_tokens_details?.cached_tokens ??
|
|
303
|
+
usageWithCache.cache_read_input_tokens ??
|
|
304
|
+
0;
|
|
253
305
|
const cacheWriteTokens =
|
|
254
|
-
|
|
306
|
+
usageWithCache.prompt_tokens_details?.cache_write_tokens ??
|
|
307
|
+
usageWithCache.cache_creation_input_tokens ??
|
|
308
|
+
0;
|
|
255
309
|
|
|
256
310
|
yield {
|
|
257
311
|
type: "usage",
|
|
@@ -263,6 +317,7 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
263
317
|
inputTokens,
|
|
264
318
|
outputTokens,
|
|
265
319
|
cacheReadTokens,
|
|
320
|
+
cacheWriteTokens,
|
|
266
321
|
),
|
|
267
322
|
id: responseId,
|
|
268
323
|
};
|
|
@@ -210,4 +210,50 @@ describe("OpenAIResponsesHandler", () => {
|
|
|
210
210
|
},
|
|
211
211
|
});
|
|
212
212
|
});
|
|
213
|
+
|
|
214
|
+
it("keeps cached input tokens separate in usage chunks", () => {
|
|
215
|
+
const handler = new TestOpenAIResponsesHandler({
|
|
216
|
+
providerId: "openai-native",
|
|
217
|
+
modelId: "gpt-5.4",
|
|
218
|
+
apiKey: "test-key",
|
|
219
|
+
baseUrl: "https://example.com",
|
|
220
|
+
modelInfo: {
|
|
221
|
+
id: "gpt-5.4",
|
|
222
|
+
pricing: {
|
|
223
|
+
input: 1,
|
|
224
|
+
output: 2,
|
|
225
|
+
cacheRead: 0.5,
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
const chunks = handler.processChunkForTest({
|
|
231
|
+
type: "response.completed",
|
|
232
|
+
response: {
|
|
233
|
+
id: "resp_usage",
|
|
234
|
+
usage: {
|
|
235
|
+
input_tokens: 100,
|
|
236
|
+
output_tokens: 40,
|
|
237
|
+
input_tokens_details: {
|
|
238
|
+
cached_tokens: 25,
|
|
239
|
+
},
|
|
240
|
+
output_tokens_details: {
|
|
241
|
+
reasoning_tokens: 10,
|
|
242
|
+
},
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
expect(chunks[0]).toMatchObject({
|
|
248
|
+
type: "usage",
|
|
249
|
+
inputTokens: 100,
|
|
250
|
+
outputTokens: 40,
|
|
251
|
+
cacheReadTokens: 25,
|
|
252
|
+
cacheWriteTokens: 0,
|
|
253
|
+
});
|
|
254
|
+
expect(chunks[0]?.type).toBe("usage");
|
|
255
|
+
if (chunks[0]?.type === "usage") {
|
|
256
|
+
expect(chunks[0].totalCost).toBeCloseTo(0.0001925, 10);
|
|
257
|
+
}
|
|
258
|
+
});
|
|
213
259
|
});
|
|
@@ -565,23 +565,19 @@ export class OpenAIResponsesHandler extends BaseHandler {
|
|
|
565
565
|
const inputTokens = usage.input_tokens || 0;
|
|
566
566
|
const outputTokens = usage.output_tokens || 0;
|
|
567
567
|
const cacheReadTokens =
|
|
568
|
-
usage.output_tokens_details?.reasoning_tokens || 0;
|
|
569
|
-
const cacheWriteTokens =
|
|
570
568
|
usage.input_tokens_details?.cached_tokens || 0;
|
|
569
|
+
const cacheWriteTokens = 0;
|
|
571
570
|
|
|
572
571
|
const totalCost = this.calculateCost(
|
|
573
572
|
inputTokens,
|
|
574
573
|
outputTokens,
|
|
575
574
|
cacheReadTokens,
|
|
576
|
-
|
|
577
|
-
const nonCachedInputTokens = Math.max(
|
|
578
|
-
0,
|
|
579
|
-
inputTokens - cacheReadTokens - cacheWriteTokens,
|
|
575
|
+
cacheWriteTokens,
|
|
580
576
|
);
|
|
581
577
|
|
|
582
578
|
yield {
|
|
583
579
|
type: "usage",
|
|
584
|
-
inputTokens
|
|
580
|
+
inputTokens,
|
|
585
581
|
outputTokens,
|
|
586
582
|
cacheWriteTokens,
|
|
587
583
|
cacheReadTokens,
|