@clinebot/llms 0.0.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,41 +13,16 @@ export declare const DEFAULT_REQUEST_HEADERS: Record<string, string>;
13
13
  export declare abstract class BaseHandler implements ApiHandler {
14
14
  protected config: ProviderConfig;
15
15
  protected abortController: AbortController | undefined;
16
+ private abortSignalSequence;
16
17
  constructor(config: ProviderConfig);
17
- /**
18
- * Convert Cline messages to provider-specific format
19
- * Must be implemented by subclasses
20
- */
21
18
  abstract getMessages(systemPrompt: string, messages: Message[]): unknown;
22
- /**
23
- * Create a streaming message completion
24
- * Must be implemented by subclasses
25
- */
26
19
  abstract createMessage(systemPrompt: string, messages: Message[], tools?: ToolDefinition[]): ApiStream;
27
- /**
28
- * Get the current model configuration
29
- * Can be overridden by subclasses for provider-specific logic
30
- */
31
20
  getModel(): HandlerModelInfo;
32
- /**
33
- * Get usage information (optional)
34
- * Override in subclasses that support this
35
- */
36
21
  getApiStreamUsage(): Promise<ApiStreamUsageChunk | undefined>;
37
- /**
38
- * Get the abort signal for the current request
39
- * Creates a new AbortController if one doesn't exist or was already aborted
40
- * Combines with config.abortSignal if provided
41
- */
42
22
  protected getAbortSignal(): AbortSignal;
43
- /**
44
- * Abort the current request
45
- */
46
23
  abort(): void;
47
24
  setAbortSignal(signal: AbortSignal | undefined): void;
48
- /**
49
- * Helper to calculate cost from usage
50
- */
25
+ private logAbort;
51
26
  protected calculateCost(inputTokens: number, outputTokens: number, cacheReadTokens?: number): number | undefined;
52
27
  protected createResponseId(): string;
53
28
  protected withResponseId<T extends ApiStreamChunk>(chunk: T, responseId: string): T;
@@ -9,7 +9,7 @@ type OpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
9
9
  /**
10
10
  * Convert messages to OpenAI format
11
11
  */
12
- export declare function convertToOpenAIMessages(messages: Message[]): OpenAIMessage[];
12
+ export declare function convertToOpenAIMessages(messages: Message[], enableCaching?: boolean): OpenAIMessage[];
13
13
  /**
14
14
  * Convert tool definitions to OpenAI format
15
15
  */
@@ -187,6 +187,10 @@ export interface ProviderOptions {
187
187
  /** Runtime model catalog refresh configuration */
188
188
  modelCatalog?: ModelCatalogConfig;
189
189
  }
190
+ /**
191
+ * Provider-specific options that don't fit other categories
192
+ */
193
+ import type { BasicLogger } from "@clinebot/shared";
190
194
  /**
191
195
  * Runtime model catalog refresh options
192
196
  */
@@ -219,6 +223,8 @@ export interface ProviderConfig extends AuthConfig, EndpointConfig, ModelConfig,
219
223
  onRetryAttempt?: (attempt: number, maxRetries: number, delay: number, error: unknown) => void;
220
224
  /** AbortSignal for cancelling requests */
221
225
  abortSignal?: AbortSignal;
226
+ /** Optional runtime logger for provider-level diagnostics */
227
+ logger?: BasicLogger;
222
228
  /** Codex CLI-specific options */
223
229
  codex?: CodexConfig;
224
230
  /** Claude Code-specific options */
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@clinebot/llms",
3
- "version": "0.0.7",
3
+ "version": "0.0.10",
4
4
  "description": "Config-driven SDK for selecting, extending, and instantiating LLM providers and models",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
7
7
  "dependencies": {
8
+ "@clinebot/shared": "0.0.10",
8
9
  "@ai-sdk/amazon-bedrock": "^4.0.67",
9
10
  "@ai-sdk/google-vertex": "^4.0.74",
10
11
  "@ai-sdk/mistral": "^3.0.24",
@@ -1,6 +1,6 @@
1
1
  export { defineLlmsConfig, loadLlmsConfigFromFile } from "./config-browser";
2
- export * as models from "./models/index";
3
- export * as providers from "./providers/public.browser";
2
+ export * as LlmsModels from "./models/index";
3
+ export * as LlmsProviders from "./providers/public.browser";
4
4
  export type {
5
5
  CustomProviderConfig,
6
6
  LlmsConfig,
package/src/index.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  export { defineLlmsConfig } from "./config";
2
- export * as models from "./models/index";
3
- export * as providers from "./providers/public";
2
+ export * as LlmsModels from "./models/index";
3
+ export * as LlmsProviders from "./providers/public";
4
4
  export { createLlmsSdk } from "./sdk";
5
5
  export type {
6
6
  CustomProviderConfig,
@@ -14,7 +14,7 @@ export const VERCEL_AI_GATEWAY_PROVIDER: ModelCollection = {
14
14
  name: "Vercel AI Gateway",
15
15
  description: "Vercel's AI gateway service",
16
16
  protocol: "openai-chat",
17
- baseUrl: "https://ai-gateway.vercel.app/v1",
17
+ baseUrl: "https://ai-gateway.vercel.sh/v1",
18
18
  defaultModelId: Object.keys(VERCEL_AI_GATEWAY_MODELS)[0],
19
19
  capabilities: ["reasoning"],
20
20
  env: ["AI_GATEWAY_API_KEY"],
@@ -145,7 +145,8 @@ export class AnthropicHandler extends BaseHandler {
145
145
  thinking: reasoningOn
146
146
  ? { type: "enabled", budget_tokens: budgetTokens }
147
147
  : undefined,
148
- max_tokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 8192,
148
+ max_tokens:
149
+ model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
149
150
  temperature: reasoningOn ? undefined : 0,
150
151
  system: supportsPromptCache
151
152
  ? [
@@ -173,6 +174,7 @@ export class AnthropicHandler extends BaseHandler {
173
174
  cacheReadTokens: 0,
174
175
  cacheWriteTokens: 0,
175
176
  };
177
+ let stopReason: string | null = null;
176
178
 
177
179
  for await (const chunk of stream) {
178
180
  if (debugThinking) {
@@ -185,6 +187,11 @@ export class AnthropicHandler extends BaseHandler {
185
187
  countChunk(`content_block_delta:${chunk.delta?.type ?? "unknown"}`);
186
188
  }
187
189
  }
190
+ if (chunk.type === "message_delta") {
191
+ stopReason =
192
+ (chunk as { delta?: { stop_reason?: string } }).delta?.stop_reason ??
193
+ stopReason;
194
+ }
188
195
  yield* this.withResponseIdForAll(
189
196
  this.processChunk(chunk, currentToolCall, usageSnapshot, responseId),
190
197
  responseId,
@@ -199,8 +206,12 @@ export class AnthropicHandler extends BaseHandler {
199
206
  console.error(`[thinking-debug][anthropic][stream] ${summary}`);
200
207
  }
201
208
 
202
- // Yield done chunk to indicate streaming completed successfully
203
- yield { type: "done", success: true, id: responseId };
209
+ yield {
210
+ type: "done",
211
+ success: true,
212
+ id: responseId,
213
+ incompleteReason: stopReason === "max_tokens" ? "max_tokens" : undefined,
214
+ };
204
215
  }
205
216
 
206
217
  protected *processChunk(
@@ -1,4 +1,4 @@
1
- import { describe, expect, it } from "vitest";
1
+ import { describe, expect, it, vi } from "vitest";
2
2
  import type { ApiStream, ProviderConfig } from "../types/index";
3
3
  import { BaseHandler } from "./base";
4
4
 
@@ -18,6 +18,10 @@ class TestHandler extends BaseHandler {
18
18
  ): number | undefined {
19
19
  return this.calculateCost(inputTokens, outputTokens, cacheReadTokens);
20
20
  }
21
+
22
+ public exposeAbortSignal(): AbortSignal {
23
+ return this.getAbortSignal();
24
+ }
21
25
  }
22
26
 
23
27
  describe("BaseHandler.calculateCost", () => {
@@ -44,3 +48,58 @@ describe("BaseHandler.calculateCost", () => {
44
48
  expect(cost).toBeCloseTo(17.73, 6);
45
49
  });
46
50
  });
51
+
52
+ describe("BaseHandler abort signal wiring", () => {
53
+ it("does not let a stale request signal abort a newer request", () => {
54
+ const logger = {
55
+ debug: vi.fn(),
56
+ warn: vi.fn(),
57
+ };
58
+ const request1 = new AbortController();
59
+ const handler = new TestHandler({
60
+ providerId: "openrouter",
61
+ modelId: "mock-model",
62
+ apiKey: "test-key",
63
+ baseUrl: "https://example.com/v1",
64
+ abortSignal: request1.signal,
65
+ logger,
66
+ });
67
+
68
+ const signal1 = handler.exposeAbortSignal();
69
+ expect(signal1.aborted).toBe(false);
70
+
71
+ const request2 = new AbortController();
72
+ handler.setAbortSignal(request2.signal);
73
+ const signal2 = handler.exposeAbortSignal();
74
+ expect(signal2).not.toBe(signal1);
75
+ expect(signal2.aborted).toBe(false);
76
+
77
+ request1.abort(new Error("stale timeout"));
78
+
79
+ expect(signal1.aborted).toBe(true);
80
+ expect(signal2.aborted).toBe(false);
81
+ expect(logger.warn).toHaveBeenCalledWith(
82
+ "Provider request abort signal fired",
83
+ expect.objectContaining({
84
+ reason: expect.objectContaining({ message: "stale timeout" }),
85
+ }),
86
+ );
87
+ });
88
+
89
+ it("creates a fresh controller for each request", () => {
90
+ const handler = new TestHandler({
91
+ providerId: "openrouter",
92
+ modelId: "mock-model",
93
+ apiKey: "test-key",
94
+ baseUrl: "https://example.com/v1",
95
+ abortSignal: new AbortController().signal,
96
+ });
97
+
98
+ const signal1 = handler.exposeAbortSignal();
99
+ const signal2 = handler.exposeAbortSignal();
100
+
101
+ expect(signal2).not.toBe(signal1);
102
+ expect(signal1.aborted).toBe(false);
103
+ expect(signal2.aborted).toBe(false);
104
+ });
105
+ });
@@ -22,37 +22,44 @@ export const DEFAULT_REQUEST_HEADERS: Record<string, string> = {
22
22
  "X-CLIENT-TYPE": "cline-sdk",
23
23
  };
24
24
 
25
+ const controllerIds = new WeakMap<AbortController, string>();
26
+ let controllerIdCounter = 0;
27
+
28
+ function getControllerId(controller: AbortController): string {
29
+ let id = controllerIds.get(controller);
30
+ if (!id) {
31
+ id = `abort_${++controllerIdCounter}`;
32
+ controllerIds.set(controller, id);
33
+ }
34
+ return id;
35
+ }
36
+
37
+ function serializeAbortReason(reason: unknown): unknown {
38
+ return reason instanceof Error
39
+ ? { name: reason.name, message: reason.message }
40
+ : reason;
41
+ }
42
+
25
43
  /**
26
44
  * Base handler class with common functionality
27
45
  */
28
46
  export abstract class BaseHandler implements ApiHandler {
29
47
  protected config: ProviderConfig;
30
48
  protected abortController: AbortController | undefined;
49
+ private abortSignalSequence = 0;
31
50
 
32
51
  constructor(config: ProviderConfig) {
33
52
  this.config = config;
34
53
  }
35
54
 
36
- /**
37
- * Convert Cline messages to provider-specific format
38
- * Must be implemented by subclasses
39
- */
40
55
  abstract getMessages(systemPrompt: string, messages: Message[]): unknown;
41
56
 
42
- /**
43
- * Create a streaming message completion
44
- * Must be implemented by subclasses
45
- */
46
57
  abstract createMessage(
47
58
  systemPrompt: string,
48
59
  messages: Message[],
49
60
  tools?: ToolDefinition[],
50
61
  ): ApiStream;
51
62
 
52
- /**
53
- * Get the current model configuration
54
- * Can be overridden by subclasses for provider-specific logic
55
- */
56
63
  getModel(): HandlerModelInfo {
57
64
  const modelId = this.config.modelId;
58
65
  return {
@@ -61,43 +68,55 @@ export abstract class BaseHandler implements ApiHandler {
61
68
  };
62
69
  }
63
70
 
64
- /**
65
- * Get usage information (optional)
66
- * Override in subclasses that support this
67
- */
68
71
  async getApiStreamUsage(): Promise<ApiStreamUsageChunk | undefined> {
69
72
  return undefined;
70
73
  }
71
74
 
72
- /**
73
- * Get the abort signal for the current request
74
- * Creates a new AbortController if one doesn't exist or was already aborted
75
- * Combines with config.abortSignal if provided
76
- */
77
75
  protected getAbortSignal(): AbortSignal {
78
- // Create a new controller if needed
79
- if (!this.abortController || this.abortController.signal.aborted) {
80
- this.abortController = new AbortController();
81
- }
82
-
83
- // If a signal was provided in config, chain it
84
- if (this.config.abortSignal) {
85
- const configSignal = this.config.abortSignal;
76
+ const controller = new AbortController();
77
+ this.abortController = controller;
78
+ controller.signal.addEventListener(
79
+ "abort",
80
+ () => {
81
+ if (this.abortController === controller) {
82
+ this.abortController = undefined;
83
+ }
84
+ },
85
+ { once: true },
86
+ );
87
+
88
+ const configSignal = this.config.abortSignal;
89
+ if (configSignal) {
86
90
  if (configSignal.aborted) {
87
- this.abortController.abort(configSignal.reason);
91
+ this.logAbort("debug", "Provider request inherited aborted signal", {
92
+ controllerId: getControllerId(controller),
93
+ reason: serializeAbortReason(configSignal.reason),
94
+ });
95
+ controller.abort(configSignal.reason);
88
96
  } else {
89
- configSignal.addEventListener("abort", () => {
90
- this.abortController?.abort(configSignal.reason);
97
+ const signalId = ++this.abortSignalSequence;
98
+ configSignal.addEventListener(
99
+ "abort",
100
+ () => {
101
+ this.logAbort("warn", "Provider request abort signal fired", {
102
+ controllerId: getControllerId(controller),
103
+ signalId,
104
+ reason: serializeAbortReason(configSignal.reason),
105
+ });
106
+ controller.abort(configSignal.reason);
107
+ },
108
+ { once: true },
109
+ );
110
+ this.logAbort("debug", "Provider request attached abort signal", {
111
+ controllerId: getControllerId(controller),
112
+ signalId,
91
113
  });
92
114
  }
93
115
  }
94
116
 
95
- return this.abortController.signal;
117
+ return controller.signal;
96
118
  }
97
119
 
98
- /**
99
- * Abort the current request
100
- */
101
120
  abort(): void {
102
121
  this.abortController?.abort();
103
122
  }
@@ -105,37 +124,47 @@ export abstract class BaseHandler implements ApiHandler {
105
124
  setAbortSignal(signal: AbortSignal | undefined): void {
106
125
  this.config.abortSignal = signal;
107
126
  if (signal?.aborted) {
127
+ this.logAbort("debug", "Provider handler received pre-aborted signal", {
128
+ controllerId: this.abortController
129
+ ? getControllerId(this.abortController)
130
+ : undefined,
131
+ reason: serializeAbortReason(signal.reason),
132
+ });
108
133
  this.abortController?.abort(signal.reason);
109
134
  }
110
135
  }
111
136
 
112
- /**
113
- * Helper to calculate cost from usage
114
- */
137
+ private logAbort(
138
+ level: "debug" | "warn",
139
+ message: string,
140
+ metadata?: Record<string, unknown>,
141
+ ): void {
142
+ this.config.logger?.[level]?.(message, {
143
+ providerId: this.config.providerId,
144
+ modelId: this.config.modelId,
145
+ ...metadata,
146
+ });
147
+ }
148
+
115
149
  protected calculateCost(
116
150
  inputTokens: number,
117
151
  outputTokens: number,
118
152
  cacheReadTokens = 0,
119
153
  ): number | undefined {
120
- const modelPricingSource =
121
- this.config.modelInfo ??
122
- (this.config.modelId
123
- ? this.config.knownModels?.[this.config.modelId]
124
- : undefined);
125
- const pricing = modelPricingSource?.pricing;
154
+ const pricing = (
155
+ this.config.modelInfo ?? this.config.knownModels?.[this.config.modelId]
156
+ )?.pricing;
126
157
  if (!pricing?.input || !pricing?.output) {
127
158
  return undefined;
128
159
  }
129
160
 
130
- const uncachedInputTokens = inputTokens - cacheReadTokens;
131
- const inputCost = (uncachedInputTokens / 1_000_000) * pricing.input;
132
- const outputCost = (outputTokens / 1_000_000) * pricing.output;
133
- const cacheReadCost =
134
- cacheReadTokens > 0
161
+ return (
162
+ ((inputTokens - cacheReadTokens) / 1_000_000) * pricing.input +
163
+ (outputTokens / 1_000_000) * pricing.output +
164
+ (cacheReadTokens > 0
135
165
  ? (cacheReadTokens / 1_000_000) * (pricing.cacheRead ?? 0)
136
- : 0;
137
-
138
- return inputCost + outputCost + cacheReadCost;
166
+ : 0)
167
+ );
139
168
  }
140
169
 
141
170
  protected createResponseId(): string {
@@ -154,7 +183,7 @@ export abstract class BaseHandler implements ApiHandler {
154
183
  responseId: string,
155
184
  ): Generator<ApiStreamChunk> {
156
185
  for (const chunk of chunks) {
157
- yield this.withResponseId(chunk, responseId);
186
+ yield { ...chunk, id: responseId };
158
187
  }
159
188
  }
160
189
 
@@ -143,7 +143,7 @@ export class BedrockHandler extends BaseHandler {
143
143
  model: factory(modelId),
144
144
  messages: this.getMessages(systemPrompt, messages),
145
145
  tools: toAiSdkTools(tools),
146
- maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 8192,
146
+ maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
147
147
  temperature: reasoningEnabled ? undefined : (model.info.temperature ?? 0),
148
148
  providerOptions:
149
149
  Object.keys(providerOptions).length > 0 ? providerOptions : undefined,
@@ -218,4 +218,44 @@ describe("GeminiHandler", () => {
218
218
  expect(secondId).toBeTruthy();
219
219
  expect(firstId).not.toBe(secondId);
220
220
  });
221
+
222
+ it("defaults maxOutputTokens to 8192 for gemini-3-flash when no model or config limit is provided", async () => {
223
+ generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
224
+
225
+ const handler = new GeminiHandler({
226
+ providerId: "gemini",
227
+ modelId: "gemini-3-flash",
228
+ apiKey: "test-key",
229
+ });
230
+
231
+ await collectChunks(
232
+ handler.createMessage("System", [{ role: "user", content: "go" }]),
233
+ );
234
+
235
+ expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
236
+ const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
237
+ config?: { maxOutputTokens?: number };
238
+ };
239
+ expect(request.config?.maxOutputTokens).toBe(8192);
240
+ });
241
+
242
+ it("defaults maxOutputTokens to 128000 for non gemini-3-flash models when no model or config limit is provided", async () => {
243
+ generateContentStreamSpy.mockResolvedValue(createAsyncIterable([]));
244
+
245
+ const handler = new GeminiHandler({
246
+ providerId: "gemini",
247
+ modelId: "gemini-2.5-flash",
248
+ apiKey: "test-key",
249
+ });
250
+
251
+ await collectChunks(
252
+ handler.createMessage("System", [{ role: "user", content: "go" }]),
253
+ );
254
+
255
+ expect(generateContentStreamSpy).toHaveBeenCalledTimes(1);
256
+ const request = generateContentStreamSpy.mock.calls[0]?.[0] as {
257
+ config?: { maxOutputTokens?: number };
258
+ };
259
+ expect(request.config?.maxOutputTokens).toBe(128000);
260
+ });
221
261
  });
@@ -27,6 +27,16 @@ import { RetriableError, retryStream } from "../utils/retry";
27
27
  import { BaseHandler } from "./base";
28
28
 
29
29
  const DEFAULT_THINKING_BUDGET_TOKENS = 1024;
30
+ const DEFAULT_MAX_OUTPUT_TOKENS = 128_000;
31
+ const GEMINI_3_FLASH_MAX_OUTPUT_TOKENS = 8192;
32
+
33
+ function isGemini3FlashModel(modelId: string): boolean {
34
+ const normalized = modelId.toLowerCase();
35
+ return (
36
+ normalized.includes("gemini-3-flash") ||
37
+ normalized.includes("gemini-3.0-flash")
38
+ );
39
+ }
30
40
 
31
41
  /**
32
42
  * Handler for Google's Gemini API
@@ -131,6 +141,11 @@ export class GeminiHandler extends BaseHandler {
131
141
  }
132
142
 
133
143
  // Build request config with abort signal
144
+ const fallbackMaxOutputTokens = isGemini3FlashModel(modelId)
145
+ ? GEMINI_3_FLASH_MAX_OUTPUT_TOKENS
146
+ : DEFAULT_MAX_OUTPUT_TOKENS;
147
+ const maxOutputTokens =
148
+ info.maxTokens ?? this.config.maxOutputTokens ?? fallbackMaxOutputTokens;
134
149
  const requestConfig: GenerateContentConfig = {
135
150
  httpOptions: this.config.baseUrl
136
151
  ? { baseUrl: this.config.baseUrl, headers: this.getRequestHeaders() }
@@ -138,7 +153,7 @@ export class GeminiHandler extends BaseHandler {
138
153
  abortSignal,
139
154
  systemInstruction: systemPrompt,
140
155
  temperature: info.temperature ?? 1,
141
- maxOutputTokens: info.maxTokens ?? this.config.maxOutputTokens,
156
+ maxOutputTokens,
142
157
  };
143
158
 
144
159
  // Add thinking config only when explicitly requested and supported.
@@ -22,6 +22,7 @@ import type {
22
22
  ModelInfo,
23
23
  ProviderConfig,
24
24
  } from "../types";
25
+ import { hasModelCapability } from "../types";
25
26
  import type { Message, ToolDefinition } from "../types/messages";
26
27
  import { retryStream } from "../utils/retry";
27
28
  import { ToolCallProcessor } from "../utils/tool-processor";
@@ -106,9 +107,26 @@ export class OpenAIBaseHandler extends BaseHandler {
106
107
  systemPrompt: string,
107
108
  messages: Message[],
108
109
  ): OpenAI.Chat.ChatCompletionMessageParam[] {
110
+ const model = this.getModel();
111
+ const supportsPromptCache =
112
+ hasModelCapability(model.info, "prompt-cache") ||
113
+ this.config.capabilities?.includes("prompt-cache") === true;
114
+ const systemMessage = supportsPromptCache
115
+ ? ({
116
+ role: "system",
117
+ content: [
118
+ {
119
+ type: "text",
120
+ text: systemPrompt,
121
+ cache_control: { type: "ephemeral" },
122
+ },
123
+ ],
124
+ } as unknown as OpenAI.Chat.ChatCompletionMessageParam)
125
+ : { role: "system" as const, content: systemPrompt };
126
+
109
127
  return [
110
- { role: "system", content: systemPrompt },
111
- ...convertToOpenAIMessages(messages),
128
+ systemMessage,
129
+ ...convertToOpenAIMessages(messages, supportsPromptCache),
112
130
  ];
113
131
  }
114
132
 
@@ -171,7 +189,11 @@ export class OpenAIBaseHandler extends BaseHandler {
171
189
  this.config.reasoningEffort ??
172
190
  (this.config.thinking ? DEFAULT_REASONING_EFFORT : undefined);
173
191
  if (supportsReasoningEffort && effectiveReasoningEffort) {
174
- (requestOptions as any).reasoning_effort = effectiveReasoningEffort;
192
+ (
193
+ requestOptions as OpenAI.ChatCompletionCreateParamsStreaming & {
194
+ reasoning_effort?: string;
195
+ }
196
+ ).reasoning_effort = effectiveReasoningEffort;
175
197
  }
176
198
 
177
199
  const requestHeaders = this.getRequestHeaders();
@@ -191,16 +213,25 @@ export class OpenAIBaseHandler extends BaseHandler {
191
213
  headers: requestHeaders,
192
214
  });
193
215
  const toolCallProcessor = new ToolCallProcessor();
216
+ let finishReason: string | null = null;
194
217
 
195
218
  for await (const chunk of stream) {
219
+ const choice = chunk.choices?.[0];
220
+ if (choice?.finish_reason) {
221
+ finishReason = choice.finish_reason;
222
+ }
196
223
  yield* this.withResponseIdForAll(
197
224
  this.processChunk(chunk, toolCallProcessor, modelInfo, responseId),
198
225
  responseId,
199
226
  );
200
227
  }
201
228
 
202
- // Yield done chunk to indicate streaming completed successfully
203
- yield { type: "done", success: true, id: responseId };
229
+ yield {
230
+ type: "done",
231
+ success: true,
232
+ id: responseId,
233
+ incompleteReason: finishReason === "length" ? "max_tokens" : undefined,
234
+ };
204
235
  }
205
236
 
206
237
  /**
@@ -213,9 +244,11 @@ export class OpenAIBaseHandler extends BaseHandler {
213
244
  _modelInfo: ModelInfo,
214
245
  responseId: string,
215
246
  ): Generator<import("../types").ApiStreamChunk> {
216
- const delta = chunk.choices?.[0]?.delta && {
217
- ...chunk.choices[0].delta,
218
- reasoning_content: (chunk.choices[0].delta as any).reasoning_content,
247
+ const rawDelta = chunk.choices?.[0]?.delta;
248
+ const delta = rawDelta && {
249
+ ...rawDelta,
250
+ reasoning_content: (rawDelta as { reasoning_content?: string })
251
+ .reasoning_content,
219
252
  };
220
253
 
221
254
  // Handle text content
@@ -227,7 +260,7 @@ export class OpenAIBaseHandler extends BaseHandler {
227
260
  if (delta?.reasoning_content) {
228
261
  yield {
229
262
  type: "reasoning",
230
- reasoning: (delta as any).reasoning_content,
263
+ reasoning: delta.reasoning_content,
231
264
  id: responseId,
232
265
  };
233
266
  }
@@ -248,10 +281,21 @@ export class OpenAIBaseHandler extends BaseHandler {
248
281
  if (chunk.usage) {
249
282
  const inputTokens = chunk.usage.prompt_tokens ?? 0;
250
283
  const outputTokens = chunk.usage.completion_tokens ?? 0;
284
+ const usageWithCache = chunk.usage as typeof chunk.usage & {
285
+ prompt_tokens_details?: {
286
+ cached_tokens?: number;
287
+ cache_write_tokens?: number;
288
+ };
289
+ prompt_cache_miss_tokens?: number;
290
+ cache_creation_input_tokens?: number;
291
+ cache_read_input_tokens?: number;
292
+ };
251
293
  const cacheReadTokens =
252
- (chunk.usage as any).prompt_tokens_details?.cached_tokens ?? 0;
294
+ usageWithCache.prompt_tokens_details?.cached_tokens ?? 0;
253
295
  const cacheWriteTokens =
254
- (chunk.usage as any).prompt_cache_miss_tokens ?? 0;
296
+ usageWithCache.prompt_tokens_details?.cache_write_tokens ??
297
+ usageWithCache.prompt_cache_miss_tokens ??
298
+ 0;
255
299
 
256
300
  yield {
257
301
  type: "usage",
@@ -241,7 +241,7 @@ export class VertexHandler extends BaseHandler {
241
241
  promptCacheOn,
242
242
  }),
243
243
  tools: toAiSdkTools(tools),
244
- maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 8192,
244
+ maxTokens: model.info.maxTokens ?? this.config.maxOutputTokens ?? 128_000,
245
245
  temperature: reasoningOn ? undefined : 0,
246
246
  providerOptions:
247
247
  Object.keys(providerOptions).length > 0 ? providerOptions : undefined,