workers-ai-provider 3.1.13 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -18,6 +18,7 @@ import type { WorkersAIRerankingSettings } from "./workersai-reranking-settings"
18
18
  import type {
19
19
  EmbeddingModels,
20
20
  ImageGenerationModels,
21
+ KnownTextGenerationModels,
21
22
  TextGenerationModels,
22
23
  TranscriptionModels,
23
24
  SpeechModels,
@@ -40,10 +41,69 @@ export type { WorkersAISpeechSettings } from "./workersai-speech-settings";
40
41
  export { WorkersAIRerankingModel } from "./workersai-reranking-model";
41
42
  export type { WorkersAIRerankingSettings } from "./workersai-reranking-settings";
42
43
 
44
+ // ---------------------------------------------------------------------------
45
+ // AI Gateway delegate (route catalog models through AI Gateway)
46
+ //
47
+ // The delegate factory itself is internal — it's wired through
48
+ // `createWorkersAI({ providers })` (see below), so `createWorkersAI` is the
49
+ // single public entry point. The transport types, error classes, registry, and
50
+ // resume helpers are safe to re-export here (no optional `@ai-sdk/*` peer
51
+ // imports). The provider plugins (`openai`, `anthropic`, `google`) stay
52
+ // sub-path-only so those packages remain optional.
53
+ // ---------------------------------------------------------------------------
54
+
55
+ export {
56
+ type Billing,
57
+ createClientFallbackModel,
58
+ type DelegateCallOptions,
59
+ type DispatchInfo,
60
+ type FallbackAttempt,
61
+ type FallbackLeg,
62
+ type FallbackOptions,
63
+ GatewayDelegateError,
64
+ type GatewayErrorCode,
65
+ type GatewayErrorContext,
66
+ GATEWAY_PROVIDERS,
67
+ type GatewayProviderInfo,
68
+ type ParsedSlug,
69
+ type ProviderPlugin,
70
+ type ResumableStreamOptions,
71
+ type ResumeExpiredPolicy,
72
+ type Transport,
73
+ type WireFormat,
74
+ WorkersAIFallbackError,
75
+ WorkersAIGatewayError,
76
+ createResumableStream,
77
+ detectProviderByUrl,
78
+ findProviderBySlug,
79
+ parseSlug,
80
+ selectTransport,
81
+ wireableProviders,
82
+ } from "./gateway-delegate";
83
+ export {
84
+ createGatewayFetch,
85
+ createGatewayProvider,
86
+ type GatewayFetchConfig,
87
+ } from "./gateway-provider";
88
+
89
+ import {
90
+ createGatewayDelegate,
91
+ type DelegateCallOptions,
92
+ type GatewayDelegate,
93
+ type ProviderPlugin,
94
+ type ResumeExpiredPolicy,
95
+ } from "./gateway-delegate";
96
+
43
97
  // ---------------------------------------------------------------------------
44
98
  // Workers AI
45
99
  // ---------------------------------------------------------------------------
46
100
 
101
+ /**
102
+ * The account-wide AI Gateway used for catalog routing when no `gateway` is
103
+ * configured. Every Cloudflare account has a `"default"` gateway.
104
+ */
105
+ const DEFAULT_GATEWAY_ID = "default";
106
+
47
107
  export type WorkersAISettings = (
48
108
  | {
49
109
  /**
@@ -77,19 +137,72 @@ export type WorkersAISettings = (
77
137
  }
78
138
  ) & {
79
139
  /**
80
- * Optionally specify a gateway.
140
+ * Optionally specify a gateway. For third-party catalog routing (see
141
+ * `providers`) this defaults to the account's `"default"` gateway when unset.
81
142
  */
82
143
  gateway?: GatewayOptions;
144
+
145
+ /**
146
+ * Provider plugins that enable routing third-party catalog models
147
+ * (e.g. `"openai/gpt-5-mini"`) through AI Gateway. Supply them from the
148
+ * sub-path modules, e.g. `import { openai } from "workers-ai-provider/openai"`.
149
+ *
150
+ * When set, calling the provider with a `"<provider>/<model>"` slug (anything
151
+ * that is not a `@cf/...` Workers AI model id) is automatically dispatched
152
+ * through the {@link createGatewayDelegate | gateway delegate}. Leaving this
153
+ * unset preserves the exact prior behavior — only Workers AI models are built.
154
+ *
155
+ * @experimental The gateway delegate is an experimental surface.
156
+ */
157
+ providers?: ProviderPlugin[];
158
+
159
+ /**
160
+ * Default resume behavior for gateway-routed catalog models. Defaults to
161
+ * `true`. Overridable per call. Only relevant when `providers` is set.
162
+ */
163
+ resume?: boolean;
164
+
165
+ /**
166
+ * Default resume-expiry policy for gateway-routed catalog models (run path).
167
+ * Defaults to `"error"`. Only relevant when `providers` is set.
168
+ */
169
+ onResumeExpired?: ResumeExpiredPolicy;
83
170
  };
84
171
 
172
+ /**
173
+ * True when a literal model id is a `"<provider>/<model>"` AI Gateway catalog
174
+ * slug rather than a `@cf/...` Workers AI id. Bare `string` (a non-literal,
175
+ * e.g. a variable) resolves to `false` so the common path keeps chat settings.
176
+ */
177
+ type IsCatalogSlug<M extends string> = string extends M
178
+ ? false
179
+ : M extends `@${string}`
180
+ ? false
181
+ : M extends `${string}/${string}`
182
+ ? true
183
+ : false;
184
+
185
+ /**
186
+ * Picks the per-model settings type from the (captured) literal model id:
187
+ * `DelegateCallOptions` for catalog slugs, `WorkersAIChatSettings` otherwise.
188
+ * This is what lets `workersai("openai/gpt-5", { … })` autocomplete delegate
189
+ * options while `workersai("@cf/…", { … })` autocompletes chat settings.
190
+ */
191
+ type ModelSettings<M extends string> =
192
+ IsCatalogSlug<M> extends true ? DelegateCallOptions : WorkersAIChatSettings;
193
+
85
194
  export interface WorkersAI {
86
- (modelId: TextGenerationModels, settings?: WorkersAIChatSettings): WorkersAIChatLanguageModel;
195
+ <M extends string>(
196
+ modelId: M | KnownTextGenerationModels,
197
+ settings?: ModelSettings<M>,
198
+ ): WorkersAIChatLanguageModel;
87
199
  /**
88
- * Creates a model for text generation.
200
+ * Creates a model for text generation. Accepts a `@cf/...` Workers AI id, or
201
+ * a `"<provider>/<model>"` catalog slug when `providers` is configured.
89
202
  **/
90
- chat(
91
- modelId: TextGenerationModels,
92
- settings?: WorkersAIChatSettings,
203
+ chat<M extends string>(
204
+ modelId: M | KnownTextGenerationModels,
205
+ settings?: ModelSettings<M>,
93
206
  ): WorkersAIChatLanguageModel;
94
207
 
95
208
  embedding(
@@ -178,6 +291,61 @@ export function createWorkersAI(options: WorkersAISettings): WorkersAI {
178
291
  isBinding,
179
292
  });
180
293
 
294
+ // Third-party catalog routing: when `providers` is configured, a non-`@cf/`
295
+ // `"<provider>/<model>"` slug is dispatched through the gateway delegate
296
+ // instead of being treated as a Workers AI model id. Built lazily so the
297
+ // delegate (and its plugin requirements) only materializes on first use.
298
+ let delegate: GatewayDelegate | undefined;
299
+ const getDelegate = (slug: string): GatewayDelegate => {
300
+ if (!options.providers?.length) {
301
+ throw new Error(
302
+ `"${slug}" looks like a third-party AI Gateway catalog model, but this Workers AI ` +
303
+ "provider was not configured to route them. Pass provider plugins, e.g.:\n" +
304
+ ' import { openai } from "workers-ai-provider/openai";\n' +
305
+ " createWorkersAI({ binding: env.AI, providers: [openai] });\n" +
306
+ 'A gateway defaults to "default" but can be set via `gateway`. ' +
307
+ 'Otherwise use a Workers AI model id (e.g. "@cf/meta/llama-3.1-8b-instruct").',
308
+ );
309
+ }
310
+ delegate ??= createGatewayDelegate({
311
+ binding,
312
+ // Catalog routing needs a gateway (resume runs through it). When one
313
+ // isn't configured, fall back to the account's `"default"` gateway so
314
+ // `createWorkersAI({ providers })` works out of the box. An explicit
315
+ // `gateway` (here or per call) always wins.
316
+ gateway: options.gateway ?? { id: DEFAULT_GATEWAY_ID },
317
+ providers: options.providers,
318
+ resume: options.resume,
319
+ onResumeExpired: options.onResumeExpired,
320
+ });
321
+ return delegate;
322
+ };
323
+
324
+ // Workers AI model ids are always `@cf/...`; gateway catalog slugs are
325
+ // `"<provider>/<model>"`. Anything with a slash that is not `@`-prefixed is
326
+ // treated as a catalog slug.
327
+ const isGatewaySlug = (id: unknown): id is string =>
328
+ typeof id === "string" && !id.startsWith("@") && id.includes("/");
329
+
330
+ // Settings is the union of both shapes here; the public `WorkersAI` interface
331
+ // narrows it per call via `ModelSettings<M>`. We branch at runtime and cast to
332
+ // the concrete shape each path expects.
333
+ const buildChat = (
334
+ modelId: TextGenerationModels,
335
+ settings?: WorkersAIChatSettings | DelegateCallOptions,
336
+ ): WorkersAIChatLanguageModel => {
337
+ if (isGatewaySlug(modelId)) {
338
+ // The delegate returns a `LanguageModelV3` built by the configured plugin.
339
+ // It's structurally compatible with the AI SDK consumers this provider is
340
+ // used with; the cast keeps the public return type unchanged.
341
+ return getDelegate(modelId)(
342
+ modelId,
343
+ settings as DelegateCallOptions,
344
+ ) as unknown as WorkersAIChatLanguageModel;
345
+ }
346
+ return createChatModel(modelId, settings as WorkersAIChatSettings | undefined);
347
+ };
348
+
181
349
  const createImageModel = (
182
350
  modelId: ImageGenerationModels,
183
351
  settings: WorkersAIImageSettings = {},
@@ -229,14 +397,17 @@ export function createWorkersAI(options: WorkersAISettings): WorkersAI {
229
397
  provider: "workersai.reranking",
230
398
  });
231
399
 
232
- const provider = (modelId: TextGenerationModels, settings?: WorkersAIChatSettings) => {
400
+ const provider = (
401
+ modelId: TextGenerationModels,
402
+ settings?: WorkersAIChatSettings | DelegateCallOptions,
403
+ ) => {
233
404
  if (new.target) {
234
405
  throw new Error("The WorkersAI model function cannot be called with the new keyword.");
235
406
  }
236
- return createChatModel(modelId, settings);
407
+ return buildChat(modelId, settings);
237
408
  };
238
409
 
239
- provider.chat = createChatModel;
410
+ provider.chat = buildChat;
240
411
  provider.embedding = createEmbeddingModel;
241
412
  provider.textEmbedding = createEmbeddingModel;
242
413
  provider.textEmbeddingModel = createEmbeddingModel;
package/src/openai.ts ADDED
@@ -0,0 +1,25 @@
1
+ import { createOpenAI } from "@ai-sdk/openai";
2
+ import type { ProviderPlugin } from "./gateway-delegate";
3
+
4
+ /**
5
+ * OpenAI-wire provider plugin for the gateway delegate. Pass to
6
+ * `createGatewayDelegate({ providers: [openai] })` to handle every
7
+ * OpenAI-compatible provider in one go — `openai/…`, plus the OpenAI-compatible
8
+ * long tail (`deepseek/…`, `xai/…`, `groq/…`, `mistral/…`, `perplexity/…`,
9
+ * `openrouter/…`, `cohere/…`, …). The registry routes each slug to its gateway
10
+ * provider id; this plugin only supplies the response parser.
11
+ *
12
+ * Requires `@ai-sdk/openai` (an optional peer dependency — install it yourself).
13
+ *
14
+ * Uses `.chat()` (Chat Completions) deliberately: AI SDK v6's bare `openai()`
15
+ * defaults to the Responses API, which the AI Gateway run catalog does not serve.
16
+ */
17
+ export const openai: ProviderPlugin = {
18
+ wireFormat: "openai",
19
+ create: ({ modelId, fetch, baseURL }) =>
20
+ // apiKey is a placeholder — the gateway handles auth (unified billing / BYOK)
21
+ // and the delegate strips the Authorization header on the gateway path.
22
+ // baseURL (set by the registry for non-OpenAI openai-wire providers) makes
23
+ // the generated URL host-strip to the right gateway-native endpoint.
24
+ createOpenAI({ apiKey: "unused", fetch, ...(baseURL ? { baseURL } : {}) }).chat(modelId),
25
+ };
@@ -0,0 +1,223 @@
1
+ import { GatewayDelegateError } from "./gateway-delegate";
2
+
3
+ /**
4
+ * Resumable run-path stream (RFC §7.1).
5
+ *
6
+ * Wraps the byte stream from a run-path response (`env.AI.run(..., {
7
+ * returnRawResponse })`) so a transient mid-stream drop is recovered
8
+ * transparently: the wrapper reconnects to the gateway resume endpoint and keeps
9
+ * feeding bytes to the same consumer, so the downstream `@ai-sdk/*` parser never
10
+ * sees the break.
11
+ *
12
+ * Byte alignment is the one correctness subtlety. The gateway `resume?from=N`
13
+ * endpoint takes an SSE *event index* (count of `\n\n` terminators) and replays
14
+ * whole events from that index. So the wrapper only ever emits *complete* events
15
+ * downstream and buffers any trailing partial event. On a drop the buffered
16
+ * partial is discarded and resume starts from the count of complete events
17
+ * already emitted — landing exactly on the next event boundary, with no
18
+ * duplicated or truncated bytes.
19
+ *
20
+ * Expiry: once the gateway buffer TTL (~5.5 min) elapses, resume returns 404
21
+ * `{"error":"Request not found"}`. Behavior is governed by `onResumeExpired`:
22
+ * `"error"` (default) surfaces a `GatewayDelegateError("resume-expired")` into
23
+ * the stream; `"accept-partial"` ends the stream cleanly with whatever was
24
+ * already delivered (the caller's higher layer — e.g. Think — can then continue
25
+ * or regenerate).
26
+ */
27
+
28
+ type AiWithFetch = Ai & {
29
+ fetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response>;
30
+ };
31
+
32
+ export type ResumeExpiredPolicy = "error" | "accept-partial";
33
+
34
+ export interface ResumableStreamOptions {
35
+ /** Cloudflare AI binding (e.g. `env.AI`) — used for the resume fetch. */
36
+ binding: Ai;
37
+ /** Gateway id the run was issued under. */
38
+ gateway: string;
39
+ /** The `cf-aig-run-id` of the run to resume. */
40
+ runId: string;
41
+ /**
42
+ * Initial run-path response body. Omit for **cross-invocation re-attach**: the
43
+ * stream then starts by fetching `resume?from={fromEvent}` directly (e.g. a new
44
+ * Durable Object invocation re-attaching to a run after eviction).
45
+ */
46
+ initial?: ReadableStream<Uint8Array>;
47
+ /**
48
+ * SSE event index to (re-)attach from. Defaults to `0`. Used as the starting
49
+ * `from` when `initial` is omitted, and as the base offset for the event
50
+ * counter (so a later reconnect resumes from the correct absolute index).
51
+ */
52
+ fromEvent?: number;
53
+ /** What to do when the resume buffer has expired (404). Defaults to `"error"`. */
54
+ onResumeExpired?: ResumeExpiredPolicy;
55
+ /** Max reconnect attempts before giving up. Defaults to 5. */
56
+ maxReconnects?: number;
57
+ /** Fired before each reconnect with the resume `from` index and attempt number. */
58
+ onReconnect?: (fromEvent: number, attempt: number) => void;
59
+ /**
60
+ * Fired with the cumulative SSE event offset whenever complete events are
61
+ * emitted. Use it to persist `{ runId, eventOffset }` for cross-invocation
62
+ * re-attach (throttle your own writes — this can fire per chunk).
63
+ */
64
+ onProgress?: (eventOffset: number) => void;
65
+ }
66
+
67
+ function concat(a: Uint8Array, b: Uint8Array): Uint8Array<ArrayBuffer> {
68
+ const out = new Uint8Array(new ArrayBuffer(a.length + b.length));
69
+ out.set(a, 0);
70
+ out.set(b, a.length);
71
+ return out;
72
+ }
73
+
74
+ /** Index just past the last `\n\n` in `buf`, or -1 if there is no complete event. */
75
+ function lastEventBoundary(buf: Uint8Array): number {
76
+ for (let i = buf.length - 2; i >= 0; i--) {
77
+ if (buf[i] === 0x0a && buf[i + 1] === 0x0a) return i + 2;
78
+ }
79
+ return -1;
80
+ }
81
+
82
+ /** Count of `\n\n` terminators (= complete SSE events) in `buf`. */
83
+ function countEvents(buf: Uint8Array): number {
84
+ let n = 0;
85
+ for (let i = 0; i + 1 < buf.length; i++) {
86
+ if (buf[i] === 0x0a && buf[i + 1] === 0x0a) {
87
+ n++;
88
+ i++; // don't double-count "\n\n\n"
89
+ }
90
+ }
91
+ return n;
92
+ }
93
+
94
+ function resumeUrl(gateway: string, runId: string, from: number): string {
95
+ return `https://workers-binding.ai/ai-gateway/gateways/${gateway}/run/${runId}/resume?from=${from}`;
96
+ }
97
+
98
+ export function createResumableStream(options: ResumableStreamOptions): ReadableStream<Uint8Array> {
99
+ const { binding, gateway, runId } = options;
100
+ const maxReconnects = options.maxReconnects ?? 5;
101
+ const onExpired = options.onResumeExpired ?? "error";
102
+
103
+ let emittedEvents = options.fromEvent ?? 0; // absolute SSE event index reached
104
+ let pending: Uint8Array<ArrayBuffer> = new Uint8Array(new ArrayBuffer(0));
105
+ let reconnects = 0;
106
+
107
+ // Fetch `resume?from={emittedEvents}`; on a terminal outcome (expiry / error /
108
+ // network throw) it settles the controller and returns null.
109
+ async function fetchResume(
110
+ controller: ReadableStreamDefaultController<Uint8Array>,
111
+ ): Promise<ReadableStream<Uint8Array> | null> {
112
+ let res: Response;
113
+ try {
114
+ res = await (binding as AiWithFetch).fetch(resumeUrl(gateway, runId, emittedEvents), {
115
+ method: "GET",
116
+ });
117
+ } catch (fetchErr) {
118
+ controller.error(
119
+ new GatewayDelegateError(
120
+ "dispatch",
121
+ `Resume request threw at event ${emittedEvents}.`,
122
+ fetchErr,
123
+ ),
124
+ );
125
+ return null;
126
+ }
127
+
128
+ if (res.status === 404) {
129
+ if (onExpired === "accept-partial") {
130
+ controller.close();
131
+ return null;
132
+ }
133
+ controller.error(
134
+ new GatewayDelegateError(
135
+ "resume-expired",
136
+ `Resume buffer expired (404) at event ${emittedEvents}. The gateway buffer ` +
137
+ "TTL (~5.5 min) elapsed; fall back to continuation or regeneration.",
138
+ ),
139
+ );
140
+ return null;
141
+ }
142
+ if (!res.ok || !res.body) {
143
+ controller.error(
144
+ new GatewayDelegateError(
145
+ "dispatch",
146
+ `Resume failed (${res.status}) at event ${emittedEvents}.`,
147
+ ),
148
+ );
149
+ return null;
150
+ }
151
+ return res.body;
152
+ }
153
+
154
+ return new ReadableStream<Uint8Array>({
155
+ async start(controller) {
156
+ // In-stream wrap starts from the live body; cross-invocation re-attach
157
+ // (no `initial`) starts by resuming from `fromEvent`. An initial-attach
158
+ // failure is terminal — it is not charged against the reconnect budget.
159
+ let current: ReadableStream<Uint8Array>;
160
+ if (options.initial) {
161
+ current = options.initial;
162
+ } else {
163
+ const body = await fetchResume(controller);
164
+ if (!body) return;
165
+ current = body;
166
+ }
167
+
168
+ for (;;) {
169
+ const reader = current.getReader();
170
+ try {
171
+ for (;;) {
172
+ const { done, value } = await reader.read();
173
+ if (done) {
174
+ if (pending.length > 0) {
175
+ controller.enqueue(pending);
176
+ pending = new Uint8Array(new ArrayBuffer(0));
177
+ }
178
+ controller.close();
179
+ return;
180
+ }
181
+ if (!value || value.length === 0) continue;
182
+
183
+ pending = concat(pending, value);
184
+ const boundary = lastEventBoundary(pending);
185
+ if (boundary > 0) {
186
+ const complete = pending.slice(0, boundary);
187
+ controller.enqueue(complete);
188
+ emittedEvents += countEvents(complete);
189
+ options.onProgress?.(emittedEvents);
190
+ pending = pending.slice(boundary);
191
+ }
192
+ }
193
+ } catch (err) {
194
+ try {
195
+ reader.releaseLock();
196
+ } catch {
197
+ // reader may already be released
198
+ }
199
+
200
+ if (reconnects >= maxReconnects) {
201
+ controller.error(
202
+ new GatewayDelegateError(
203
+ "resume-expired",
204
+ `Exceeded ${maxReconnects} reconnect attempts at event ${emittedEvents}.`,
205
+ err,
206
+ ),
207
+ );
208
+ return;
209
+ }
210
+
211
+ // Discard the unfinished partial — resume realigns on the boundary.
212
+ pending = new Uint8Array(new ArrayBuffer(0));
213
+ reconnects++;
214
+ options.onReconnect?.(emittedEvents, reconnects);
215
+
216
+ const body = await fetchResume(controller);
217
+ if (!body) return;
218
+ current = body;
219
+ }
220
+ }
221
+ },
222
+ });
223
+ }
package/src/streaming.ts CHANGED
@@ -6,6 +6,12 @@ import type {
6
6
  import { generateId } from "ai";
7
7
  import { mapWorkersAIFinishReason } from "./map-workersai-finish-reason";
8
8
  import { mapWorkersAIUsage } from "./map-workersai-usage";
9
+ import {
10
+ createAISDKToolCallId,
11
+ getToolNames,
12
+ isForcedToolChoice,
13
+ parseLeakedToolCalls,
14
+ } from "./utils";
9
15
 
10
16
  /**
11
17
  * Prepend a stream-start event to an existing LanguageModelV3 stream.
@@ -63,6 +69,10 @@ function isNullFinalizationChunk(tc: Record<string, unknown>): boolean {
63
69
  */
64
70
  export function getMappedStream(
65
71
  response: Response | ReadableStream<Uint8Array>,
72
+ salvageContext?: {
73
+ tools: Array<{ function: { name?: string } }> | undefined;
74
+ toolChoice: unknown;
75
+ },
66
76
  ): ReadableStream<LanguageModelV3StreamPart> {
67
77
  const rawStream =
68
78
  response instanceof ReadableStream
@@ -73,6 +83,18 @@ export function getMappedStream(
73
83
  throw new Error("No readable stream available for SSE parsing.");
74
84
  }
75
85
 
86
+ // gpt-oss harmony quirk: a forced tool call can be streamed as `content`
87
+ // text deltas instead of structured tool calls. When a tool was forced,
88
+ // buffer the text content (rather than emitting it incrementally) so we can
89
+ // reinterpret it as a tool call at flush time. Text is unexpected in forced
90
+ // mode anyway, so buffering it does not regress a useful stream.
91
+ // See https://github.com/cloudflare/ai/issues/560.
92
+ const knownToolNames = getToolNames(salvageContext?.tools);
93
+ const bufferContentForSalvage =
94
+ isForcedToolChoice(salvageContext?.toolChoice) && knownToolNames.size > 0;
95
+ let contentBuffer = "";
96
+ let anyToolCallStarted = false;
97
+
76
98
  // State shared across the transform
77
99
  let usage: LanguageModelV3Usage = {
78
100
  outputTokens: { total: 0, text: undefined, reasoning: undefined },
@@ -145,20 +167,24 @@ export function getMappedStream(
145
167
  if (nativeResponse != null && nativeResponse !== "") {
146
168
  const responseText = String(nativeResponse);
147
169
  if (responseText.length > 0) {
148
- // Close active reasoning block before text starts
149
- if (reasoningId) {
150
- controller.enqueue({ type: "reasoning-end", id: reasoningId });
151
- reasoningId = null;
152
- }
153
- if (!textId) {
154
- textId = generateId();
155
- controller.enqueue({ type: "text-start", id: textId });
170
+ if (bufferContentForSalvage) {
171
+ contentBuffer += responseText;
172
+ } else {
173
+ // Close active reasoning block before text starts
174
+ if (reasoningId) {
175
+ controller.enqueue({ type: "reasoning-end", id: reasoningId });
176
+ reasoningId = null;
177
+ }
178
+ if (!textId) {
179
+ textId = generateId();
180
+ controller.enqueue({ type: "text-start", id: textId });
181
+ }
182
+ controller.enqueue({
183
+ type: "text-delta",
184
+ id: textId,
185
+ delta: responseText,
186
+ });
156
187
  }
157
- controller.enqueue({
158
- type: "text-delta",
159
- id: textId,
160
- delta: responseText,
161
- });
162
188
  }
163
189
  }
164
190
 
@@ -196,20 +222,24 @@ export function getMappedStream(
196
222
 
197
223
  const textDelta = delta.content as string | undefined;
198
224
  if (textDelta && textDelta.length > 0) {
199
- // Close active reasoning block before text starts
200
- if (reasoningId) {
201
- controller.enqueue({ type: "reasoning-end", id: reasoningId });
202
- reasoningId = null;
203
- }
204
- if (!textId) {
205
- textId = generateId();
206
- controller.enqueue({ type: "text-start", id: textId });
225
+ if (bufferContentForSalvage) {
226
+ contentBuffer += textDelta;
227
+ } else {
228
+ // Close active reasoning block before text starts
229
+ if (reasoningId) {
230
+ controller.enqueue({ type: "reasoning-end", id: reasoningId });
231
+ reasoningId = null;
232
+ }
233
+ if (!textId) {
234
+ textId = generateId();
235
+ controller.enqueue({ type: "text-start", id: textId });
236
+ }
237
+ controller.enqueue({
238
+ type: "text-delta",
239
+ id: textId,
240
+ delta: textDelta,
241
+ });
207
242
  }
208
- controller.enqueue({
209
- type: "text-delta",
210
- id: textId,
211
- delta: textDelta,
212
- });
213
243
  }
214
244
 
215
245
  const deltaToolCalls = delta.tool_calls as
@@ -233,17 +263,59 @@ export function getMappedStream(
233
263
  closeToolCall(idx, controller);
234
264
  }
235
265
 
236
- // Close open text/reasoning blocks
266
+ // Close open reasoning block before any salvaged tool calls.
237
267
  if (reasoningId) {
238
268
  controller.enqueue({ type: "reasoning-end", id: reasoningId });
239
269
  }
270
+
271
+ // Salvage a forced tool call that streamed as buffered text.
272
+ let salvagedToolCalls = false;
273
+ if (bufferContentForSalvage && !anyToolCallStarted && contentBuffer.trim()) {
274
+ const salvaged = parseLeakedToolCalls(contentBuffer, knownToolNames);
275
+ if (salvaged.length > 0) {
276
+ for (const call of salvaged) {
277
+ controller.enqueue({
278
+ type: "tool-input-start",
279
+ id: call.toolCallId,
280
+ toolName: call.toolName,
281
+ });
282
+ controller.enqueue({
283
+ type: "tool-input-delta",
284
+ id: call.toolCallId,
285
+ delta: call.input,
286
+ });
287
+ controller.enqueue({ type: "tool-input-end", id: call.toolCallId });
288
+ controller.enqueue(call);
289
+ }
290
+ salvagedToolCalls = true;
291
+ // Stream warnings are fixed at stream-start, so surface the
292
+ // reinterpretation here for observability instead.
293
+ console.warn(
294
+ `[workers-ai-provider] Recovered ${salvaged.length} forced tool call(s) that the model streamed as text content instead of structured tool calls.`,
295
+ );
296
+ } else {
297
+ // Not a recoverable tool call — emit the buffered text as-is.
298
+ const id = generateId();
299
+ controller.enqueue({ type: "text-start", id });
300
+ controller.enqueue({ type: "text-delta", id, delta: contentBuffer });
301
+ controller.enqueue({ type: "text-end", id });
302
+ }
303
+ } else if (bufferContentForSalvage && contentBuffer.trim()) {
304
+ // Real tool calls were present alongside buffered text — emit text.
305
+ const id = generateId();
306
+ controller.enqueue({ type: "text-start", id });
307
+ controller.enqueue({ type: "text-delta", id, delta: contentBuffer });
308
+ controller.enqueue({ type: "text-end", id });
309
+ }
310
+
240
311
  if (textId) {
241
312
  controller.enqueue({ type: "text-end", id: textId });
242
313
  }
243
314
 
244
315
  // Detect premature termination
245
- const effectiveFinishReason =
246
- !receivedDone && receivedAnyData && !finishReason
316
+ const effectiveFinishReason = salvagedToolCalls
317
+ ? ({ unified: "tool-calls", raw: "stop" } as LanguageModelV3FinishReason)
318
+ : !receivedDone && receivedAnyData && !finishReason
247
319
  ? ({
248
320
  unified: "error",
249
321
  raw: "stream-truncated",
@@ -317,10 +389,11 @@ export function getMappedStream(
317
389
  closeToolCall(lastActiveToolIndex, controller);
318
390
  }
319
391
 
320
- const id = tcId || generateId();
392
+ const id = createAISDKToolCallId(tcId);
321
393
  const toolName = tcName || "";
322
394
  activeToolCalls.set(tcIndex, { id, toolName, args: "" });
323
395
  lastActiveToolIndex = tcIndex;
396
+ anyToolCallStarted = true;
324
397
 
325
398
  controller.enqueue({
326
399
  type: "tool-input-start",