@clinebot/llms 0.0.7 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.browser.d.ts +2 -2
- package/dist/index.browser.js +40 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +12 -12
- package/dist/providers/handlers/base.d.ts +2 -27
- package/dist/providers/transform/openai-format.d.ts +1 -1
- package/dist/providers/types/config.d.ts +6 -0
- package/package.json +2 -1
- package/src/index.browser.ts +2 -2
- package/src/index.ts +2 -2
- package/src/models/providers/vercel-ai-gateway.ts +1 -1
- package/src/providers/handlers/anthropic-base.ts +14 -3
- package/src/providers/handlers/base.test.ts +60 -1
- package/src/providers/handlers/base.ts +83 -54
- package/src/providers/handlers/bedrock-base.ts +1 -1
- package/src/providers/handlers/gemini-base.test.ts +40 -0
- package/src/providers/handlers/gemini-base.ts +16 -1
- package/src/providers/handlers/openai-base.ts +55 -11
- package/src/providers/handlers/vertex.ts +1 -1
- package/src/providers/transform/format-conversion.test.ts +26 -0
- package/src/providers/transform/openai-format.ts +50 -7
- package/src/providers/types/config.ts +8 -0
|
@@ -13,41 +13,16 @@ export declare const DEFAULT_REQUEST_HEADERS: Record<string, string>;
|
|
|
13
13
|
export declare abstract class BaseHandler implements ApiHandler {
|
|
14
14
|
protected config: ProviderConfig;
|
|
15
15
|
protected abortController: AbortController | undefined;
|
|
16
|
+
private abortSignalSequence;
|
|
16
17
|
constructor(config: ProviderConfig);
|
|
17
|
-
/**
|
|
18
|
-
* Convert Cline messages to provider-specific format
|
|
19
|
-
* Must be implemented by subclasses
|
|
20
|
-
*/
|
|
21
18
|
abstract getMessages(systemPrompt: string, messages: Message[]): unknown;
|
|
22
|
-
/**
|
|
23
|
-
* Create a streaming message completion
|
|
24
|
-
* Must be implemented by subclasses
|
|
25
|
-
*/
|
|
26
19
|
abstract createMessage(systemPrompt: string, messages: Message[], tools?: ToolDefinition[]): ApiStream;
|
|
27
|
-
/**
|
|
28
|
-
* Get the current model configuration
|
|
29
|
-
* Can be overridden by subclasses for provider-specific logic
|
|
30
|
-
*/
|
|
31
20
|
getModel(): HandlerModelInfo;
|
|
32
|
-
/**
|
|
33
|
-
* Get usage information (optional)
|
|
34
|
-
* Override in subclasses that support this
|
|
35
|
-
*/
|
|
36
21
|
getApiStreamUsage(): Promise<ApiStreamUsageChunk | undefined>;
|
|
37
|
-
/**
|
|
38
|
-
* Get the abort signal for the current request
|
|
39
|
-
* Creates a new AbortController if one doesn't exist or was already aborted
|
|
40
|
-
* Combines with config.abortSignal if provided
|
|
41
|
-
*/
|
|
42
22
|
protected getAbortSignal(): AbortSignal;
|
|
43
|
-
/**
|
|
44
|
-
* Abort the current request
|
|
45
|
-
*/
|
|
46
23
|
abort(): void;
|
|
47
24
|
setAbortSignal(signal: AbortSignal | undefined): void;
|
|
48
|
-
|
|
49
|
-
* Helper to calculate cost from usage
|
|
50
|
-
*/
|
|
25
|
+
private logAbort;
|
|
51
26
|
protected calculateCost(inputTokens: number, outputTokens: number, cacheReadTokens?: number): number | undefined;
|
|
52
27
|
protected createResponseId(): string;
|
|
53
28
|
protected withResponseId<T extends ApiStreamChunk>(chunk: T, responseId: string): T;
|
|
@@ -9,7 +9,7 @@ type OpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
|
|
|
9
9
|
/**
|
|
10
10
|
* Convert messages to OpenAI format
|
|
11
11
|
*/
|
|
12
|
-
export declare function convertToOpenAIMessages(messages: Message[]): OpenAIMessage[];
|
|
12
|
+
export declare function convertToOpenAIMessages(messages: Message[], enableCaching?: boolean): OpenAIMessage[];
|
|
13
13
|
/**
|
|
14
14
|
* Convert tool definitions to OpenAI format
|
|
15
15
|
*/
|
|
@@ -187,6 +187,10 @@ export interface ProviderOptions {
|
|
|
187
187
|
/** Runtime model catalog refresh configuration */
|
|
188
188
|
modelCatalog?: ModelCatalogConfig;
|
|
189
189
|
}
|
|
190
|
+
/**
|
|
191
|
+
* Provider-specific options that don't fit other categories
|
|
192
|
+
*/
|
|
193
|
+
import type { BasicLogger } from "@clinebot/shared";
|
|
190
194
|
/**
|
|
191
195
|
* Runtime model catalog refresh options
|
|
192
196
|
*/
|
|
@@ -219,6 +223,8 @@ export interface ProviderConfig extends AuthConfig, EndpointConfig, ModelConfig,
|
|
|
219
223
|
onRetryAttempt?: (attempt: number, maxRetries: number, delay: number, error: unknown) => void;
|
|
220
224
|
/** AbortSignal for cancelling requests */
|
|
221
225
|
abortSignal?: AbortSignal;
|
|
226
|
+
/** Optional runtime logger for provider-level diagnostics */
|
|
227
|
+
logger?: BasicLogger;
|
|
222
228
|
/** Codex CLI-specific options */
|
|
223
229
|
codex?: CodexConfig;
|
|
224
230
|
/** Claude Code-specific options */
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@clinebot/llms",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.10",
|
|
4
4
|
"description": "Config-driven SDK for selecting, extending, and instantiating LLM providers and models",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
7
7
|
"dependencies": {
|
|
8
|
+
"@clinebot/shared": "0.0.10",
|
|
8
9
|
"@ai-sdk/amazon-bedrock": "^4.0.67",
|
|
9
10
|
"@ai-sdk/google-vertex": "^4.0.74",
|
|
10
11
|
"@ai-sdk/mistral": "^3.0.24",
|
package/src/index.browser.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export { defineLlmsConfig, loadLlmsConfigFromFile } from "./config-browser";
|
|
2
|
-
export * as
|
|
3
|
-
export * as
|
|
2
|
+
export * as LlmsModels from "./models/index";
|
|
3
|
+
export * as LlmsProviders from "./providers/public.browser";
|
|
4
4
|
export type {
|
|
5
5
|
CustomProviderConfig,
|
|
6
6
|
LlmsConfig,
|
package/src/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export { defineLlmsConfig } from "./config";
|
|
2
|
-
export * as
|
|
3
|
-
export * as
|
|
2
|
+
export * as LlmsModels from "./models/index";
|
|
3
|
+
export * as LlmsProviders from "./providers/public";
|
|
4
4
|
export { createLlmsSdk } from "./sdk";
|
|
5
5
|
export type {
|
|
6
6
|
CustomProviderConfig,
|
|
@@ -14,7 +14,7 @@ export const VERCEL_AI_GATEWAY_PROVIDER: ModelCollection = {
|
|
|
14
14
|
name: "Vercel AI Gateway",
|
|
15
15
|
description: "Vercel's AI gateway service",
|
|
16
16
|
protocol: "openai-chat",
|
|
17
|
-
baseUrl: "https://ai-gateway.vercel.
|
|
17
|
+
baseUrl: "https://ai-gateway.vercel.sh/v1",
|
|
18
18
|
defaultModelId: Object.keys(VERCEL_AI_GATEWAY_MODELS)[0],
|
|
19
19
|
capabilities: ["reasoning"],
|
|
20
20
|
env: ["AI_GATEWAY_API_KEY"],
|
|
@@ -145,7 +145,8 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
145
145
|
thinking: reasoningOn
|
|
146
146
|
? { type: "enabled", budget_tokens: budgetTokens }
|
|
147
147
|
: undefined,
|
|
148
|
-
max_tokens:
|
|
148
|
+
max_tokens:
|
|
149
|
+
model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
149
150
|
temperature: reasoningOn ? undefined : 0,
|
|
150
151
|
system: supportsPromptCache
|
|
151
152
|
? [
|
|
@@ -173,6 +174,7 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
173
174
|
cacheReadTokens: 0,
|
|
174
175
|
cacheWriteTokens: 0,
|
|
175
176
|
};
|
|
177
|
+
let stopReason: string | null = null;
|
|
176
178
|
|
|
177
179
|
for await (const chunk of stream) {
|
|
178
180
|
if (debugThinking) {
|
|
@@ -185,6 +187,11 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
185
187
|
countChunk(`content_block_delta:${chunk.delta?.type ?? "unknown"}`);
|
|
186
188
|
}
|
|
187
189
|
}
|
|
190
|
+
if (chunk.type === "message_delta") {
|
|
191
|
+
stopReason =
|
|
192
|
+
(chunk as { delta?: { stop_reason?: string } }).delta?.stop_reason ??
|
|
193
|
+
stopReason;
|
|
194
|
+
}
|
|
188
195
|
yield* this.withResponseIdForAll(
|
|
189
196
|
this.processChunk(chunk, currentToolCall, usageSnapshot, responseId),
|
|
190
197
|
responseId,
|
|
@@ -199,8 +206,12 @@ export class AnthropicHandler extends BaseHandler {
|
|
|
199
206
|
console.error(`[thinking-debug][anthropic][stream] ${summary}`);
|
|
200
207
|
}
|
|
201
208
|
|
|
202
|
-
|
|
203
|
-
|
|
209
|
+
yield {
|
|
210
|
+
type: "done",
|
|
211
|
+
success: true,
|
|
212
|
+
id: responseId,
|
|
213
|
+
incompleteReason: stopReason === "max_tokens" ? "max_tokens" : undefined,
|
|
214
|
+
};
|
|
204
215
|
}
|
|
205
216
|
|
|
206
217
|
protected *processChunk(
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { describe, expect, it } from "vitest";
|
|
1
|
+
import { describe, expect, it, vi } from "vitest";
|
|
2
2
|
import type { ApiStream, ProviderConfig } from "../types/index";
|
|
3
3
|
import { BaseHandler } from "./base";
|
|
4
4
|
|
|
@@ -18,6 +18,10 @@ class TestHandler extends BaseHandler {
|
|
|
18
18
|
): number | undefined {
|
|
19
19
|
return this.calculateCost(inputTokens, outputTokens, cacheReadTokens);
|
|
20
20
|
}
|
|
21
|
+
|
|
22
|
+
public exposeAbortSignal(): AbortSignal {
|
|
23
|
+
return this.getAbortSignal();
|
|
24
|
+
}
|
|
21
25
|
}
|
|
22
26
|
|
|
23
27
|
describe("BaseHandler.calculateCost", () => {
|
|
@@ -44,3 +48,58 @@ describe("BaseHandler.calculateCost", () => {
|
|
|
44
48
|
expect(cost).toBeCloseTo(17.73, 6);
|
|
45
49
|
});
|
|
46
50
|
});
|
|
51
|
+
|
|
52
|
+
describe("BaseHandler abort signal wiring", () => {
|
|
53
|
+
it("does not let a stale request signal abort a newer request", () => {
|
|
54
|
+
const logger = {
|
|
55
|
+
debug: vi.fn(),
|
|
56
|
+
warn: vi.fn(),
|
|
57
|
+
};
|
|
58
|
+
const request1 = new AbortController();
|
|
59
|
+
const handler = new TestHandler({
|
|
60
|
+
providerId: "openrouter",
|
|
61
|
+
modelId: "mock-model",
|
|
62
|
+
apiKey: "test-key",
|
|
63
|
+
baseUrl: "https://example.com/v1",
|
|
64
|
+
abortSignal: request1.signal,
|
|
65
|
+
logger,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const signal1 = handler.exposeAbortSignal();
|
|
69
|
+
expect(signal1.aborted).toBe(false);
|
|
70
|
+
|
|
71
|
+
const request2 = new AbortController();
|
|
72
|
+
handler.setAbortSignal(request2.signal);
|
|
73
|
+
const signal2 = handler.exposeAbortSignal();
|
|
74
|
+
expect(signal2).not.toBe(signal1);
|
|
75
|
+
expect(signal2.aborted).toBe(false);
|
|
76
|
+
|
|
77
|
+
request1.abort(new Error("stale timeout"));
|
|
78
|
+
|
|
79
|
+
expect(signal1.aborted).toBe(true);
|
|
80
|
+
expect(signal2.aborted).toBe(false);
|
|
81
|
+
expect(logger.warn).toHaveBeenCalledWith(
|
|
82
|
+
"Provider request abort signal fired",
|
|
83
|
+
expect.objectContaining({
|
|
84
|
+
reason: expect.objectContaining({ message: "stale timeout" }),
|
|
85
|
+
}),
|
|
86
|
+
);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it("creates a fresh controller for each request", () => {
|
|
90
|
+
const handler = new TestHandler({
|
|
91
|
+
providerId: "openrouter",
|
|
92
|
+
modelId: "mock-model",
|
|
93
|
+
apiKey: "test-key",
|
|
94
|
+
baseUrl: "https://example.com/v1",
|
|
95
|
+
abortSignal: new AbortController().signal,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
const signal1 = handler.exposeAbortSignal();
|
|
99
|
+
const signal2 = handler.exposeAbortSignal();
|
|
100
|
+
|
|
101
|
+
expect(signal2).not.toBe(signal1);
|
|
102
|
+
expect(signal1.aborted).toBe(false);
|
|
103
|
+
expect(signal2.aborted).toBe(false);
|
|
104
|
+
});
|
|
105
|
+
});
|
|
@@ -22,37 +22,44 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
|
|
|
22
22
|
"X-CLIENT-TYPE": "cline-sdk",
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
+
const controllerIds = new WeakMap<AbortController, string>();
|
|
26
|
+
let controllerIdCounter = 0;
|
|
27
|
+
|
|
28
|
+
function getControllerId(controller: AbortController): string {
|
|
29
|
+
let id = controllerIds.get(controller);
|
|
30
|
+
if (!id) {
|
|
31
|
+
id = `abort_${++controllerIdCounter}`;
|
|
32
|
+
controllerIds.set(controller, id);
|
|
33
|
+
}
|
|
34
|
+
return id;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function serializeAbortReason(reason: unknown): unknown {
|
|
38
|
+
return reason instanceof Error
|
|
39
|
+
? { name: reason.name, message: reason.message }
|
|
40
|
+
: reason;
|
|
41
|
+
}
|
|
42
|
+
|
|
25
43
|
/**
|
|
26
44
|
* Base handler class with common functionality
|
|
27
45
|
*/
|
|
28
46
|
export abstract class BaseHandler implements ApiHandler {
|
|
29
47
|
protected config: ProviderConfig;
|
|
30
48
|
protected abortController: AbortController | undefined;
|
|
49
|
+
private abortSignalSequence = 0;
|
|
31
50
|
|
|
32
51
|
constructor(config: ProviderConfig) {
|
|
33
52
|
this.config = config;
|
|
34
53
|
}
|
|
35
54
|
|
|
36
|
-
/**
|
|
37
|
-
* Convert Cline messages to provider-specific format
|
|
38
|
-
* Must be implemented by subclasses
|
|
39
|
-
*/
|
|
40
55
|
abstract getMessages(systemPrompt: string, messages: Message[]): unknown;
|
|
41
56
|
|
|
42
|
-
/**
|
|
43
|
-
* Create a streaming message completion
|
|
44
|
-
* Must be implemented by subclasses
|
|
45
|
-
*/
|
|
46
57
|
abstract createMessage(
|
|
47
58
|
systemPrompt: string,
|
|
48
59
|
messages: Message[],
|
|
49
60
|
tools?: ToolDefinition[],
|
|
50
61
|
): ApiStream;
|
|
51
62
|
|
|
52
|
-
/**
|
|
53
|
-
* Get the current model configuration
|
|
54
|
-
* Can be overridden by subclasses for provider-specific logic
|
|
55
|
-
*/
|
|
56
63
|
getModel(): HandlerModelInfo {
|
|
57
64
|
const modelId = this.config.modelId;
|
|
58
65
|
return {
|
|
@@ -61,43 +68,55 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
61
68
|
};
|
|
62
69
|
}
|
|
63
70
|
|
|
64
|
-
/**
|
|
65
|
-
* Get usage information (optional)
|
|
66
|
-
* Override in subclasses that support this
|
|
67
|
-
*/
|
|
68
71
|
async getApiStreamUsage(): Promise<ApiStreamUsageChunk | undefined> {
|
|
69
72
|
return undefined;
|
|
70
73
|
}
|
|
71
74
|
|
|
72
|
-
/**
|
|
73
|
-
* Get the abort signal for the current request
|
|
74
|
-
* Creates a new AbortController if one doesn't exist or was already aborted
|
|
75
|
-
* Combines with config.abortSignal if provided
|
|
76
|
-
*/
|
|
77
75
|
protected getAbortSignal(): AbortSignal {
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
76
|
+
const controller = new AbortController();
|
|
77
|
+
this.abortController = controller;
|
|
78
|
+
controller.signal.addEventListener(
|
|
79
|
+
"abort",
|
|
80
|
+
() => {
|
|
81
|
+
if (this.abortController === controller) {
|
|
82
|
+
this.abortController = undefined;
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
{ once: true },
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
const configSignal = this.config.abortSignal;
|
|
89
|
+
if (configSignal) {
|
|
86
90
|
if (configSignal.aborted) {
|
|
87
|
-
this.
|
|
91
|
+
this.logAbort("debug", "Provider request inherited aborted signal", {
|
|
92
|
+
controllerId: getControllerId(controller),
|
|
93
|
+
reason: serializeAbortReason(configSignal.reason),
|
|
94
|
+
});
|
|
95
|
+
controller.abort(configSignal.reason);
|
|
88
96
|
} else {
|
|
89
|
-
|
|
90
|
-
|
|
97
|
+
const signalId = ++this.abortSignalSequence;
|
|
98
|
+
configSignal.addEventListener(
|
|
99
|
+
"abort",
|
|
100
|
+
() => {
|
|
101
|
+
this.logAbort("warn", "Provider request abort signal fired", {
|
|
102
|
+
controllerId: getControllerId(controller),
|
|
103
|
+
signalId,
|
|
104
|
+
reason: serializeAbortReason(configSignal.reason),
|
|
105
|
+
});
|
|
106
|
+
controller.abort(configSignal.reason);
|
|
107
|
+
},
|
|
108
|
+
{ once: true },
|
|
109
|
+
);
|
|
110
|
+
this.logAbort("debug", "Provider request attached abort signal", {
|
|
111
|
+
controllerId: getControllerId(controller),
|
|
112
|
+
signalId,
|
|
91
113
|
});
|
|
92
114
|
}
|
|
93
115
|
}
|
|
94
116
|
|
|
95
|
-
return
|
|
117
|
+
return controller.signal;
|
|
96
118
|
}
|
|
97
119
|
|
|
98
|
-
/**
|
|
99
|
-
* Abort the current request
|
|
100
|
-
*/
|
|
101
120
|
abort(): void {
|
|
102
121
|
this.abortController?.abort();
|
|
103
122
|
}
|
|
@@ -105,37 +124,47 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
105
124
|
setAbortSignal(signal: AbortSignal | undefined): void {
|
|
106
125
|
this.config.abortSignal = signal;
|
|
107
126
|
if (signal?.aborted) {
|
|
127
|
+
this.logAbort("debug", "Provider handler received pre-aborted signal", {
|
|
128
|
+
controllerId: this.abortController
|
|
129
|
+
? getControllerId(this.abortController)
|
|
130
|
+
: undefined,
|
|
131
|
+
reason: serializeAbortReason(signal.reason),
|
|
132
|
+
});
|
|
108
133
|
this.abortController?.abort(signal.reason);
|
|
109
134
|
}
|
|
110
135
|
}
|
|
111
136
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
137
|
+
private logAbort(
|
|
138
|
+
level: "debug" | "warn",
|
|
139
|
+
message: string,
|
|
140
|
+
metadata?: Record<string, unknown>,
|
|
141
|
+
): void {
|
|
142
|
+
this.config.logger?.[level]?.(message, {
|
|
143
|
+
providerId: this.config.providerId,
|
|
144
|
+
modelId: this.config.modelId,
|
|
145
|
+
...metadata,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
115
149
|
protected calculateCost(
|
|
116
150
|
inputTokens: number,
|
|
117
151
|
outputTokens: number,
|
|
118
152
|
cacheReadTokens = 0,
|
|
119
153
|
): number | undefined {
|
|
120
|
-
const
|
|
121
|
-
this.config.modelInfo ??
|
|
122
|
-
|
|
123
|
-
? this.config.knownModels?.[this.config.modelId]
|
|
124
|
-
: undefined);
|
|
125
|
-
const pricing = modelPricingSource?.pricing;
|
|
154
|
+
const pricing = (
|
|
155
|
+
this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
|
|
156
|
+
)?.pricing;
|
|
126
157
|
if (!pricing?.input || !pricing?.output) {
|
|
127
158
|
return undefined;
|
|
128
159
|
}
|
|
129
160
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
cacheReadTokens > 0
|
|
161
|
+
return (
|
|
162
|
+
((inputTokens - cacheReadTokens) / 1_000_000) * pricing.input +
|
|
163
|
+
(outputTokens / 1_000_000) * pricing.output +
|
|
164
|
+
(cacheReadTokens > 0
|
|
135
165
|
? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
|
|
136
|
-
: 0
|
|
137
|
-
|
|
138
|
-
return inputCost + outputCost + cacheReadCost;
|
|
166
|
+
: 0)
|
|
167
|
+
);
|
|
139
168
|
}
|
|
140
169
|
|
|
141
170
|
protected createResponseId(): string {
|
|
@@ -154,7 +183,7 @@ export abstract class BaseHandler implements ApiHandler {
|
|
|
154
183
|
responseId: string,
|
|
155
184
|
): Generator<ApiStreamChunk> {
|
|
156
185
|
for (const chunk of chunks) {
|
|
157
|
-
yield
|
|
186
|
+
yield { ...chunk, id: responseId };
|
|
158
187
|
}
|
|
159
188
|
}
|
|
160
189
|
|
|
@@ -143,7 +143,7 @@ export class BedrockHandler extends BaseHandler {
|
|
|
143
143
|
model: factory(modelId),
|
|
144
144
|
messages: this.getMessages(systemPrompt, messages),
|
|
145
145
|
tools: toAiSdkTools(tools),
|
|
146
|
-
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ??
|
|
146
|
+
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
147
147
|
temperature: reasoningEnabled ? undefined : (model.info.temperature ?? 0),
|
|
148
148
|
providerOptions:
|
|
149
149
|
Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
|
|
@@ -218,4 +218,44 @@ describe("GeminiHandler", () => {
|
|
|
218
218
|
expect(secondId).toBeTruthy();
|
|
219
219
|
expect(firstId).not.toBe(secondId);
|
|
220
220
|
});
|
|
221
|
+
|
|
222
|
+
it("defaults maxOutputTokens to 8192 for gemini-3-flash when no model or config limit is provided", async () => {
|
|
223
|
+
generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
|
|
224
|
+
|
|
225
|
+
const handler = new GeminiHandler({
|
|
226
|
+
providerId: "gemini",
|
|
227
|
+
modelId: "gemini-3-flash",
|
|
228
|
+
apiKey: "test-key",
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
await collectChunks(
|
|
232
|
+
handler.createMessage("System", [{ role: "user", content: "go" }]),
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
|
|
236
|
+
const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
|
|
237
|
+
config?: { maxOutputTokens?: number };
|
|
238
|
+
};
|
|
239
|
+
expect(request.config?.maxOutputTokens).toBe(8192);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
it("defaults maxOutputTokens to 128000 for non gemini-3-flash models when no model or config limit is provided", async () => {
|
|
243
|
+
generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
|
|
244
|
+
|
|
245
|
+
const handler = new GeminiHandler({
|
|
246
|
+
providerId: "gemini",
|
|
247
|
+
modelId: "gemini-2.5-flash",
|
|
248
|
+
apiKey: "test-key",
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
await collectChunks(
|
|
252
|
+
handler.createMessage("System", [{ role: "user", content: "go" }]),
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
|
|
256
|
+
const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
|
|
257
|
+
config?: { maxOutputTokens?: number };
|
|
258
|
+
};
|
|
259
|
+
expect(request.config?.maxOutputTokens).toBe(128000);
|
|
260
|
+
});
|
|
221
261
|
});
|
|
@@ -27,6 +27,16 @@ import { RetriableError, retryStream } from "../utils/retry";
|
|
|
27
27
|
import { BaseHandler } from "./base";
|
|
28
28
|
|
|
29
29
|
const DEFAULT_THINKING_BUDGET_TOKENS = 1024;
|
|
30
|
+
const DEFAULT_MAX_OUTPUT_TOKENS = 128_000;
|
|
31
|
+
const GEMINI_3_FLASH_MAX_OUTPUT_TOKENS = 8192;
|
|
32
|
+
|
|
33
|
+
function isGemini3FlashModel(modelId: string): boolean {
|
|
34
|
+
const normalized = modelId.toLowerCase();
|
|
35
|
+
return (
|
|
36
|
+
normalized.includes("gemini-3-flash") ||
|
|
37
|
+
normalized.includes("gemini-3.0-flash")
|
|
38
|
+
);
|
|
39
|
+
}
|
|
30
40
|
|
|
31
41
|
/**
|
|
32
42
|
* Handler for Google's Gemini API
|
|
@@ -131,6 +141,11 @@ export class GeminiHandler extends BaseHandler {
|
|
|
131
141
|
}
|
|
132
142
|
|
|
133
143
|
// Build request config with abort signal
|
|
144
|
+
const fallbackMaxOutputTokens = isGemini3FlashModel(modelId)
|
|
145
|
+
? GEMINI_3_FLASH_MAX_OUTPUT_TOKENS
|
|
146
|
+
: DEFAULT_MAX_OUTPUT_TOKENS;
|
|
147
|
+
const maxOutputTokens =
|
|
148
|
+
info.maxTokens ?? this.config.maxOutputTokens ?? fallbackMaxOutputTokens;
|
|
134
149
|
const requestConfig: GenerateContentConfig = {
|
|
135
150
|
httpOptions: this.config.baseUrl
|
|
136
151
|
? { baseUrl: this.config.baseUrl, headers: this.getRequestHeaders() }
|
|
@@ -138,7 +153,7 @@ export class GeminiHandler extends BaseHandler {
|
|
|
138
153
|
abortSignal,
|
|
139
154
|
systemInstruction: systemPrompt,
|
|
140
155
|
temperature: info.temperature ?? 1,
|
|
141
|
-
maxOutputTokens
|
|
156
|
+
maxOutputTokens,
|
|
142
157
|
};
|
|
143
158
|
|
|
144
159
|
// Add thinking config only when explicitly requested and supported.
|
|
@@ -22,6 +22,7 @@ import type {
|
|
|
22
22
|
ModelInfo,
|
|
23
23
|
ProviderConfig,
|
|
24
24
|
} from "../types";
|
|
25
|
+
import { hasModelCapability } from "../types";
|
|
25
26
|
import type { Message, ToolDefinition } from "../types/messages";
|
|
26
27
|
import { retryStream } from "../utils/retry";
|
|
27
28
|
import { ToolCallProcessor } from "../utils/tool-processor";
|
|
@@ -106,9 +107,26 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
106
107
|
systemPrompt: string,
|
|
107
108
|
messages: Message[],
|
|
108
109
|
): OpenAI.Chat.ChatCompletionMessageParam[] {
|
|
110
|
+
const model = this.getModel();
|
|
111
|
+
const supportsPromptCache =
|
|
112
|
+
hasModelCapability(model.info, "prompt-cache") ||
|
|
113
|
+
this.config.capabilities?.includes("prompt-cache") === true;
|
|
114
|
+
const systemMessage = supportsPromptCache
|
|
115
|
+
? ({
|
|
116
|
+
role: "system",
|
|
117
|
+
content: [
|
|
118
|
+
{
|
|
119
|
+
type: "text",
|
|
120
|
+
text: systemPrompt,
|
|
121
|
+
cache_control: { type: "ephemeral" },
|
|
122
|
+
},
|
|
123
|
+
],
|
|
124
|
+
} as unknown as OpenAI.Chat.ChatCompletionMessageParam)
|
|
125
|
+
: { role: "system" as const, content: systemPrompt };
|
|
126
|
+
|
|
109
127
|
return [
|
|
110
|
-
|
|
111
|
-
...convertToOpenAIMessages(messages),
|
|
128
|
+
systemMessage,
|
|
129
|
+
...convertToOpenAIMessages(messages, supportsPromptCache),
|
|
112
130
|
];
|
|
113
131
|
}
|
|
114
132
|
|
|
@@ -171,7 +189,11 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
171
189
|
this.config.reasoningEffort ??
|
|
172
190
|
(this.config.thinking ? DEFAULT_REASONING_EFFORT : undefined);
|
|
173
191
|
if (supportsReasoningEffort && effectiveReasoningEffort) {
|
|
174
|
-
(
|
|
192
|
+
(
|
|
193
|
+
requestOptions as OpenAI.ChatCompletionCreateParamsStreaming & {
|
|
194
|
+
reasoning_effort?: string;
|
|
195
|
+
}
|
|
196
|
+
).reasoning_effort = effectiveReasoningEffort;
|
|
175
197
|
}
|
|
176
198
|
|
|
177
199
|
const requestHeaders = this.getRequestHeaders();
|
|
@@ -191,16 +213,25 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
191
213
|
headers: requestHeaders,
|
|
192
214
|
});
|
|
193
215
|
const toolCallProcessor = new ToolCallProcessor();
|
|
216
|
+
let finishReason: string | null = null;
|
|
194
217
|
|
|
195
218
|
for await (const chunk of stream) {
|
|
219
|
+
const choice = chunk.choices?.[0];
|
|
220
|
+
if (choice?.finish_reason) {
|
|
221
|
+
finishReason = choice.finish_reason;
|
|
222
|
+
}
|
|
196
223
|
yield* this.withResponseIdForAll(
|
|
197
224
|
this.processChunk(chunk, toolCallProcessor, modelInfo, responseId),
|
|
198
225
|
responseId,
|
|
199
226
|
);
|
|
200
227
|
}
|
|
201
228
|
|
|
202
|
-
|
|
203
|
-
|
|
229
|
+
yield {
|
|
230
|
+
type: "done",
|
|
231
|
+
success: true,
|
|
232
|
+
id: responseId,
|
|
233
|
+
incompleteReason: finishReason === "length" ? "max_tokens" : undefined,
|
|
234
|
+
};
|
|
204
235
|
}
|
|
205
236
|
|
|
206
237
|
/**
|
|
@@ -213,9 +244,11 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
213
244
|
_modelInfo: ModelInfo,
|
|
214
245
|
responseId: string,
|
|
215
246
|
): Generator<import("../types").ApiStreamChunk> {
|
|
216
|
-
const
|
|
217
|
-
|
|
218
|
-
|
|
247
|
+
const rawDelta = chunk.choices?.[0]?.delta;
|
|
248
|
+
const delta = rawDelta && {
|
|
249
|
+
...rawDelta,
|
|
250
|
+
reasoning_content: (rawDelta as { reasoning_content?: string })
|
|
251
|
+
.reasoning_content,
|
|
219
252
|
};
|
|
220
253
|
|
|
221
254
|
// Handle text content
|
|
@@ -227,7 +260,7 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
227
260
|
if (delta?.reasoning_content) {
|
|
228
261
|
yield {
|
|
229
262
|
type: "reasoning",
|
|
230
|
-
reasoning:
|
|
263
|
+
reasoning: delta.reasoning_content,
|
|
231
264
|
id: responseId,
|
|
232
265
|
};
|
|
233
266
|
}
|
|
@@ -248,10 +281,21 @@ export class OpenAIBaseHandler extends BaseHandler {
|
|
|
248
281
|
if (chunk.usage) {
|
|
249
282
|
const inputTokens = chunk.usage.prompt_tokens ?? 0;
|
|
250
283
|
const outputTokens = chunk.usage.completion_tokens ?? 0;
|
|
284
|
+
const usageWithCache = chunk.usage as typeof chunk.usage & {
|
|
285
|
+
prompt_tokens_details?: {
|
|
286
|
+
cached_tokens?: number;
|
|
287
|
+
cache_write_tokens?: number;
|
|
288
|
+
};
|
|
289
|
+
prompt_cache_miss_tokens?: number;
|
|
290
|
+
cache_creation_input_tokens?: number;
|
|
291
|
+
cache_read_input_tokens?: number;
|
|
292
|
+
};
|
|
251
293
|
const cacheReadTokens =
|
|
252
|
-
|
|
294
|
+
usageWithCache.prompt_tokens_details?.cached_tokens ?? 0;
|
|
253
295
|
const cacheWriteTokens =
|
|
254
|
-
|
|
296
|
+
usageWithCache.prompt_tokens_details?.cache_write_tokens ??
|
|
297
|
+
usageWithCache.prompt_cache_miss_tokens ??
|
|
298
|
+
0;
|
|
255
299
|
|
|
256
300
|
yield {
|
|
257
301
|
type: "usage",
|
|
@@ -241,7 +241,7 @@ export class VertexHandler extends BaseHandler {
|
|
|
241
241
|
promptCacheOn,
|
|
242
242
|
}),
|
|
243
243
|
tools: toAiSdkTools(tools),
|
|
244
|
-
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ??
|
|
244
|
+
maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
|
|
245
245
|
temperature: reasoningOn ? undefined : 0,
|
|
246
246
|
providerOptions:
|
|
247
247
|
Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
|