workers-ai-provider 3.1.13 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,5 +1,344 @@
1
+ import { a as findProviderBySlug, c as WorkersAIGatewayError, i as detectProviderByUrl, l as _defineProperty, n as createGatewayProvider, o as wireableProviders, r as GATEWAY_PROVIDERS, s as WorkersAIFallbackError, t as createGatewayFetch } from "./gateway-provider-1USFWm7c.mjs";
2
+ import { TooManyEmbeddingValuesForCallError, UnsupportedFunctionalityError } from "@ai-sdk/provider";
1
3
  import { generateId } from "ai";
2
- import { TooManyEmbeddingValuesForCallError } from "@ai-sdk/provider";
4
+ //#region src/utils.ts
5
+ /**
6
+ * Normalize messages before passing to the Workers AI binding.
7
+ *
8
+ * The binding has strict schema validation that differs from the OpenAI API:
9
+ * - `content` must not be null
10
+ */
11
+ function normalizeMessagesForBinding(messages) {
12
+ return messages.map((msg) => {
13
+ const normalized = { ...msg };
14
+ if (normalized.content === null || normalized.content === void 0) normalized.content = "";
15
+ return normalized;
16
+ });
17
+ }
18
+ /**
19
+ * Creates a run method that emulates the Cloudflare Workers AI binding,
20
+ * but uses the Cloudflare REST API under the hood.
21
+ */
22
+ function createRun(config) {
23
+ const { accountId, apiKey } = config;
24
+ const fetchFn = config.fetch ?? globalThis.fetch;
25
+ return async function run(model, inputs, options) {
26
+ const { gateway, prefix: _prefix, extraHeaders, returnRawResponse, signal, ...passthroughOptions } = options || {};
27
+ const urlParams = new URLSearchParams();
28
+ for (const [key, value] of Object.entries(passthroughOptions)) {
29
+ if (value === void 0 || value === null) throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
30
+ try {
31
+ const valueStr = String(value);
32
+ if (!valueStr) continue;
33
+ urlParams.append(key, valueStr);
34
+ } catch {
35
+ throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
36
+ }
37
+ }
38
+ const queryString = urlParams.toString();
39
+ const modelPath = String(model).startsWith("run/") ? model : `run/${model}`;
40
+ const url = gateway?.id ? `https://gateway.ai.cloudflare.com/v1/${accountId}/${gateway.id}/workers-ai/${modelPath}${queryString ? `?${queryString}` : ""}` : `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/${modelPath}${queryString ? `?${queryString}` : ""}`;
41
+ const headers = {
42
+ Authorization: `Bearer ${apiKey}`,
43
+ "Content-Type": "application/json",
44
+ ...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {}
45
+ };
46
+ if (gateway) {
47
+ if (gateway.skipCache) headers["cf-aig-skip-cache"] = "true";
48
+ if (typeof gateway.cacheTtl === "number") headers["cf-aig-cache-ttl"] = String(gateway.cacheTtl);
49
+ if (gateway.cacheKey) headers["cf-aig-cache-key"] = gateway.cacheKey;
50
+ if (gateway.metadata) headers["cf-aig-metadata"] = JSON.stringify(gateway.metadata);
51
+ }
52
+ const response = await fetchFn(url, {
53
+ body: JSON.stringify(inputs),
54
+ headers,
55
+ method: "POST",
56
+ signal
57
+ });
58
+ if (!response.ok && !returnRawResponse) {
59
+ let errorBody;
60
+ try {
61
+ errorBody = await response.text();
62
+ } catch {
63
+ errorBody = "<unable to read response body>";
64
+ }
65
+ throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
66
+ }
67
+ if (returnRawResponse) return response;
68
+ if (inputs.stream === true) {
69
+ const contentType = response.headers.get("content-type") || "";
70
+ if (contentType.includes("event-stream") && response.body) return response.body;
71
+ if (response.body && !contentType.includes("json")) return response.body;
72
+ const retryResponse = await fetchFn(url, {
73
+ body: JSON.stringify({
74
+ ...inputs,
75
+ stream: false
76
+ }),
77
+ headers,
78
+ method: "POST",
79
+ signal
80
+ });
81
+ if (!retryResponse.ok) {
82
+ let errorBody;
83
+ try {
84
+ errorBody = await retryResponse.text();
85
+ } catch {
86
+ errorBody = "<unable to read response body>";
87
+ }
88
+ throw new Error(`Workers AI API error (${retryResponse.status} ${retryResponse.statusText}): ${errorBody}`);
89
+ }
90
+ return (await retryResponse.json()).result;
91
+ }
92
+ return (await response.json()).result;
93
+ };
94
+ }
95
+ /**
96
+ * Make a binary REST API call to Workers AI.
97
+ *
98
+ * Some models (e.g. `@cf/deepgram/nova-3`) require raw audio bytes
99
+ * with an appropriate `Content-Type` header instead of JSON.
100
+ *
101
+ * @param config Credentials config
102
+ * @param model Workers AI model name
103
+ * @param audioBytes Raw audio bytes
104
+ * @param contentType MIME type (e.g. "audio/wav")
105
+ * @param signal Optional AbortSignal
106
+ * @returns The parsed JSON response body
107
+ */
108
+ async function createRunBinary(config, model, audioBytes, contentType, signal) {
109
+ const url = `https://api.cloudflare.com/client/v4/accounts/${config.accountId}/ai/run/${model}`;
110
+ const response = await fetch(url, {
111
+ method: "POST",
112
+ headers: {
113
+ Authorization: `Bearer ${config.apiKey}`,
114
+ "Content-Type": contentType
115
+ },
116
+ body: audioBytes,
117
+ signal
118
+ });
119
+ if (!response.ok) {
120
+ let errorBody;
121
+ try {
122
+ errorBody = await response.text();
123
+ } catch {
124
+ errorBody = "<unable to read response body>";
125
+ }
126
+ throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
127
+ }
128
+ const data = await response.json();
129
+ return data.result ?? data;
130
+ }
131
+ /**
132
+ * Build the `response_format.json_schema` payload for native Workers AI models.
133
+ *
134
+ * Native Workers AI (`@cf/...`) expects `json_schema` to be a **bare** JSON
135
+ * Schema, NOT OpenAI's `{ name, schema, strict }` envelope. That envelope is
136
+ * only required by partner-model routes (e.g. `openai/...`), which never reach
137
+ * this code — they go through the gateway delegate and the real `@ai-sdk/*`
138
+ * providers, which build the envelope themselves. Wrapping the schema here would
139
+ * break native models, so we must keep the bare shape.
140
+ *
141
+ * The AI SDK's structured-output `name` / `description` (from
142
+ * `Output.object({ schema, name, description })` / `generateObject`) would
143
+ * otherwise be silently dropped on this path. We preserve them as the standard
144
+ * JSON Schema `title` (from `name`) and `description` keywords, which keeps the
145
+ * payload a valid bare schema while still passing the LLM guidance through.
146
+ *
147
+ * Existing schema-level `title` / `description` are never overwritten, empty
148
+ * strings are ignored, and the input schema object is never mutated.
149
+ *
150
+ * See https://github.com/cloudflare/ai/issues/559.
151
+ */
152
+ function buildJsonSchemaPayload(schema, name, description) {
153
+ if (typeof schema !== "object" || schema === null || Array.isArray(schema)) return schema;
154
+ const record = schema;
155
+ const addTitle = !!name && record.title === void 0;
156
+ const addDescription = !!description && record.description === void 0;
157
+ if (!addTitle && !addDescription) return schema;
158
+ return {
159
+ ...record,
160
+ ...addTitle ? { title: name } : {},
161
+ ...addDescription ? { description } : {}
162
+ };
163
+ }
164
+ function prepareToolsAndToolChoice(tools, toolChoice) {
165
+ if (tools == null) return {
166
+ tool_choice: void 0,
167
+ tools: void 0
168
+ };
169
+ const mappedTools = tools.map((tool) => ({
170
+ function: {
171
+ description: tool.type === "function" ? tool.description : void 0,
172
+ name: tool.name,
173
+ parameters: tool.type === "function" ? tool.inputSchema : void 0
174
+ },
175
+ type: "function"
176
+ }));
177
+ if (toolChoice == null) return {
178
+ tool_choice: void 0,
179
+ tools: mappedTools
180
+ };
181
+ const type = toolChoice.type;
182
+ switch (type) {
183
+ case "auto": return {
184
+ tool_choice: type,
185
+ tools: mappedTools
186
+ };
187
+ case "none": return {
188
+ tool_choice: type,
189
+ tools: mappedTools
190
+ };
191
+ case "required": return {
192
+ tool_choice: "required",
193
+ tools: mappedTools
194
+ };
195
+ case "tool": return {
196
+ tool_choice: {
197
+ type: "function",
198
+ function: { name: toolChoice.toolName }
199
+ },
200
+ tools: mappedTools
201
+ };
202
+ default: throw new Error(`Unsupported tool choice type: ${type}`);
203
+ }
204
+ }
205
+ const TOOL_CALL_ID_MARKER = "::cf-wai-tool-call::";
206
+ function createAISDKToolCallId(toolCallId) {
207
+ return `${toolCallId || generateId()}${TOOL_CALL_ID_MARKER}${generateId()}`;
208
+ }
209
+ function toWorkersAIToolCallId(toolCallId) {
210
+ const markerIndex = toolCallId.lastIndexOf(TOOL_CALL_ID_MARKER);
211
+ if (markerIndex === -1) return toolCallId;
212
+ if (markerIndex + 20 >= toolCallId.length) return toolCallId;
213
+ return toolCallId.slice(0, markerIndex);
214
+ }
215
+ function processToolCall(toolCall) {
216
+ const fn = "function" in toolCall && typeof toolCall.function === "object" && toolCall.function ? toolCall.function : null;
217
+ if (fn?.name) return {
218
+ input: typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments || {}),
219
+ toolCallId: createAISDKToolCallId(toolCall.id),
220
+ type: "tool-call",
221
+ toolName: fn.name
222
+ };
223
+ const flat = toolCall;
224
+ return {
225
+ input: typeof flat.arguments === "string" ? flat.arguments : JSON.stringify(flat.arguments || {}),
226
+ toolCallId: createAISDKToolCallId(flat.id),
227
+ type: "tool-call",
228
+ toolName: flat.name
229
+ };
230
+ }
231
+ function processToolCalls(output) {
232
+ if (output.tool_calls && Array.isArray(output.tool_calls)) return output.tool_calls.map((toolCall) => processToolCall(toolCall));
233
+ const choices = output.choices;
234
+ if (choices?.[0]?.message?.tool_calls && Array.isArray(choices[0].message.tool_calls)) return choices[0].message.tool_calls.map((toolCall) => processToolCall(toolCall));
235
+ return [];
236
+ }
237
+ /**
238
+ * Was a specific tool forced for this request?
239
+ *
240
+ * True for both `tool_choice: "required"` and the named-function form
241
+ * `{ type: "function", function: { name } }`.
242
+ */
243
+ function isForcedToolChoice(toolChoice) {
244
+ if (toolChoice === "required") return true;
245
+ return typeof toolChoice === "object" && toolChoice !== null && toolChoice.type === "function";
246
+ }
247
+ /**
248
+ * Parse tool calls that a model leaked as JSON text instead of structured
249
+ * `tool_calls`. Shared by the non-streaming salvage and the streaming buffer.
250
+ *
251
+ * Only JSON objects whose `name` is one of `knownToolNames` are recovered;
252
+ * everything else (prose, harmony channel/role leaks like `{"name":"analysis"}`,
253
+ * hallucinated names) is ignored to avoid fabricating bogus calls.
254
+ */
255
+ function parseLeakedToolCalls(text, knownToolNames) {
256
+ let parsed;
257
+ try {
258
+ parsed = JSON.parse(text.trim());
259
+ } catch {
260
+ return [];
261
+ }
262
+ const candidates = Array.isArray(parsed) ? parsed : [parsed];
263
+ const salvaged = [];
264
+ for (const candidate of candidates) {
265
+ if (typeof candidate !== "object" || candidate === null) continue;
266
+ const obj = candidate;
267
+ const name = obj.name;
268
+ if (typeof name !== "string" || !knownToolNames.has(name)) continue;
269
+ let args;
270
+ if ("arguments" in obj) args = obj.arguments;
271
+ else if ("parameters" in obj) args = obj.parameters;
272
+ else {
273
+ const { name: _name, ...rest } = obj;
274
+ args = rest;
275
+ }
276
+ salvaged.push({
277
+ input: typeof args === "string" ? args : JSON.stringify(args ?? {}),
278
+ toolCallId: createAISDKToolCallId(void 0),
279
+ type: "tool-call",
280
+ toolName: name
281
+ });
282
+ }
283
+ return salvaged;
284
+ }
285
+ /** Collect the requested tool names from mapped tools. */
286
+ function getToolNames(tools) {
287
+ return new Set((tools ?? []).map((tool) => tool.function?.name).filter((name) => typeof name === "string"));
288
+ }
289
+ /**
290
+ * Salvage a tool call that a model leaked into text content instead of the
291
+ * structured `tool_calls` field.
292
+ *
293
+ * Workers AI's gpt-oss models (harmony format) sometimes emit a forced tool
294
+ * call as raw JSON in `message.content` with an empty `tool_calls` array and
295
+ * `finish_reason: "stop"` — typically when the forced tool is a poor fit for
296
+ * the conversation. The content looks like one of:
297
+ *
298
+ * {"name":"read_skill_resource","path":"feedback.txt"} (flat args)
299
+ * {"name":"calc","arguments":{"a":1}} (wrapped args)
300
+ * [{"name":"calc","parameters":{"a":1}}] (array form)
301
+ *
302
+ * This reinterprets that text as a structured tool call. It is intentionally
303
+ * narrow to avoid false positives:
304
+ * - only runs when a tool was *forced* (required / named-function), so a
305
+ * tool call was explicitly demanded by the caller;
306
+ * - only runs when there are no real structured tool calls to override;
307
+ * - only matches JSON objects whose `name` is one of the requested tools.
308
+ *
309
+ * Returns the salvaged tool calls, or `null` when nothing was salvaged.
310
+ *
311
+ * See https://github.com/cloudflare/ai/issues/560.
312
+ */
313
+ function salvageToolCallsFromText(output, context) {
314
+ if (!isForcedToolChoice(context.toolChoice)) return null;
315
+ if (processToolCalls(output).length > 0) return null;
316
+ const knownToolNames = getToolNames(context.tools);
317
+ if (knownToolNames.size === 0) return null;
318
+ const text = processText(output);
319
+ if (!text) return null;
320
+ const salvaged = parseLeakedToolCalls(text, knownToolNames);
321
+ return salvaged.length > 0 ? salvaged : null;
322
+ }
323
+ /**
324
+ * Extract text from a Workers AI response, handling multiple response formats:
325
+ * - OpenAI format: { choices: [{ message: { content: "..." } }] }
326
+ * - Native format: { response: "..." }
327
+ * - Structured output quirk: { response: { ... } } (object instead of string)
328
+ * - Structured output quirk: { response: "{ ... }" } (JSON string)
329
+ */
330
+ function processText(output) {
331
+ const choiceContent = output.choices?.[0]?.message?.content;
332
+ if (choiceContent != null && String(choiceContent).length > 0) return String(choiceContent);
333
+ if ("response" in output) {
334
+ const response = output.response;
335
+ if (typeof response === "object" && response !== null) return JSON.stringify(response);
336
+ if (typeof response === "number") return String(response);
337
+ if (response === null || response === void 0) return;
338
+ return String(response);
339
+ }
340
+ }
341
+ //#endregion
3
342
  //#region src/convert-to-workersai-chat-messages.ts
4
343
  /**
5
344
  * Normalise any LanguageModelV3DataContent value to a Uint8Array.
@@ -25,6 +364,17 @@ function toUint8Array$2(data) {
25
364
  if (data instanceof URL) throw new Error("URL image sources are not supported by Workers AI. Provide image data as a Uint8Array or base64 string instead.");
26
365
  return null;
27
366
  }
367
+ function assertImageMediaType(mediaType) {
368
+ if (!mediaType) throw new UnsupportedFunctionalityError({
369
+ functionality: "file-part-without-media-type",
370
+ message: "Workers AI chat only supports image file parts with an image/* mediaType. Received a file part without a mediaType."
371
+ });
372
+ if (!mediaType.toLowerCase().startsWith("image/")) throw new UnsupportedFunctionalityError({
373
+ functionality: "non-image-file-part",
374
+ message: `Workers AI chat only supports image file parts with an image/* mediaType. Received mediaType "${mediaType}".`
375
+ });
376
+ return mediaType;
377
+ }
28
378
  function uint8ArrayToBase64$1(bytes) {
29
379
  let binary = "";
30
380
  const chunkSize = 8192;
@@ -51,10 +401,11 @@ function convertToWorkersAIChatMessages(prompt) {
51
401
  textParts.push(part.text);
52
402
  break;
53
403
  case "file": {
404
+ const mediaType = assertImageMediaType(part.mediaType);
54
405
  const imageBytes = toUint8Array$2(part.data);
55
406
  if (imageBytes) imageParts.push({
56
407
  image: imageBytes,
57
- mediaType: part.mediaType
408
+ mediaType
58
409
  });
59
410
  break;
60
411
  }
@@ -67,10 +418,9 @@ function convertToWorkersAIChatMessages(prompt) {
67
418
  });
68
419
  for (const img of imageParts) {
69
420
  const base64 = uint8ArrayToBase64$1(img.image);
70
- const mediaType = img.mediaType || "image/png";
71
421
  contentArray.push({
72
422
  type: "image_url",
73
- image_url: { url: `data:${mediaType};base64,${base64}` }
423
+ image_url: { url: `data:${img.mediaType};base64,${base64}` }
74
424
  });
75
425
  }
76
426
  messages.push({
@@ -101,7 +451,7 @@ function convertToWorkersAIChatMessages(prompt) {
101
451
  arguments: JSON.stringify(part.input),
102
452
  name: part.toolName
103
453
  },
104
- id: part.toolCallId,
454
+ id: toWorkersAIToolCallId(part.toolCallId),
105
455
  type: "function"
106
456
  });
107
457
  break;
@@ -149,7 +499,7 @@ function convertToWorkersAIChatMessages(prompt) {
149
499
  messages.push({
150
500
  content,
151
501
  name: toolResponse.toolName,
152
- tool_call_id: toolResponse.toolCallId,
502
+ tool_call_id: toWorkersAIToolCallId(toolResponse.toolCallId),
153
503
  role: "tool"
154
504
  });
155
505
  }
@@ -288,9 +638,13 @@ function isNullFinalizationChunk(tc) {
288
638
  * 1. Native format: { response: "chunk", tool_calls: [...] }
289
639
  * 2. OpenAI format: { choices: [{ delta: { content: "chunk" } }] }
290
640
  */
291
- function getMappedStream(response) {
641
+ function getMappedStream(response, salvageContext) {
292
642
  const rawStream = response instanceof ReadableStream ? response : response.body;
293
643
  if (!rawStream) throw new Error("No readable stream available for SSE parsing.");
644
+ const knownToolNames = getToolNames(salvageContext?.tools);
645
+ const bufferContentForSalvage = isForcedToolChoice(salvageContext?.toolChoice) && knownToolNames.size > 0;
646
+ let contentBuffer = "";
647
+ let anyToolCallStarted = false;
294
648
  let usage = {
295
649
  outputTokens: {
296
650
  total: 0,
@@ -336,7 +690,8 @@ function getMappedStream(response) {
336
690
  const nativeResponse = chunk.response;
337
691
  if (nativeResponse != null && nativeResponse !== "") {
338
692
  const responseText = String(nativeResponse);
339
- if (responseText.length > 0) {
693
+ if (responseText.length > 0) if (bufferContentForSalvage) contentBuffer += responseText;
694
+ else {
340
695
  if (reasoningId) {
341
696
  controller.enqueue({
342
697
  type: "reasoning-end",
@@ -386,7 +741,8 @@ function getMappedStream(response) {
386
741
  });
387
742
  }
388
743
  const textDelta = delta.content;
389
- if (textDelta && textDelta.length > 0) {
744
+ if (textDelta && textDelta.length > 0) if (bufferContentForSalvage) contentBuffer += textDelta;
745
+ else {
390
746
  if (reasoningId) {
391
747
  controller.enqueue({
392
748
  type: "reasoning-end",
@@ -429,11 +785,69 @@ function getMappedStream(response) {
429
785
  type: "reasoning-end",
430
786
  id: reasoningId
431
787
  });
788
+ let salvagedToolCalls = false;
789
+ if (bufferContentForSalvage && !anyToolCallStarted && contentBuffer.trim()) {
790
+ const salvaged = parseLeakedToolCalls(contentBuffer, knownToolNames);
791
+ if (salvaged.length > 0) {
792
+ for (const call of salvaged) {
793
+ controller.enqueue({
794
+ type: "tool-input-start",
795
+ id: call.toolCallId,
796
+ toolName: call.toolName
797
+ });
798
+ controller.enqueue({
799
+ type: "tool-input-delta",
800
+ id: call.toolCallId,
801
+ delta: call.input
802
+ });
803
+ controller.enqueue({
804
+ type: "tool-input-end",
805
+ id: call.toolCallId
806
+ });
807
+ controller.enqueue(call);
808
+ }
809
+ salvagedToolCalls = true;
810
+ console.warn(`[workers-ai-provider] Recovered ${salvaged.length} forced tool call(s) that the model streamed as text content instead of structured tool calls.`);
811
+ } else {
812
+ const id = generateId();
813
+ controller.enqueue({
814
+ type: "text-start",
815
+ id
816
+ });
817
+ controller.enqueue({
818
+ type: "text-delta",
819
+ id,
820
+ delta: contentBuffer
821
+ });
822
+ controller.enqueue({
823
+ type: "text-end",
824
+ id
825
+ });
826
+ }
827
+ } else if (bufferContentForSalvage && contentBuffer.trim()) {
828
+ const id = generateId();
829
+ controller.enqueue({
830
+ type: "text-start",
831
+ id
832
+ });
833
+ controller.enqueue({
834
+ type: "text-delta",
835
+ id,
836
+ delta: contentBuffer
837
+ });
838
+ controller.enqueue({
839
+ type: "text-end",
840
+ id
841
+ });
842
+ }
432
843
  if (textId) controller.enqueue({
433
844
  type: "text-end",
434
845
  id: textId
435
846
  });
436
- const effectiveFinishReason = !receivedDone && receivedAnyData && !finishReason ? {
847
+ const effectiveFinishReason = salvagedToolCalls ? {
848
+ unified: "tool-calls",
849
+ raw: "stop"
850
+ } : !receivedDone && receivedAnyData && !finishReason ? {
437
851
  unified: "error",
438
852
  raw: "stream-truncated"
439
853
  } : finishReason ?? {
@@ -492,7 +906,7 @@ function getMappedStream(response) {
492
906
  const tcId = tc.id;
493
907
  if (!activeToolCalls.has(tcIndex)) {
494
908
  if (lastActiveToolIndex != null && lastActiveToolIndex !== tcIndex) closeToolCall(lastActiveToolIndex, controller);
495
- const id = tcId || generateId();
909
+ const id = createAISDKToolCallId(tcId);
496
910
  const toolName = tcName || "";
497
911
  activeToolCalls.set(tcIndex, {
498
912
  id,
@@ -500,6 +914,7 @@ function getMappedStream(response) {
500
914
  args: ""
501
915
  });
502
916
  lastActiveToolIndex = tcIndex;
917
+ anyToolCallStarted = true;
503
918
  controller.enqueue({
504
919
  type: "tool-input-start",
505
920
  id,
@@ -508,303 +923,59 @@ function getMappedStream(response) {
508
923
  if (tcArgs != null && tcArgs !== "") {
509
924
  const delta = typeof tcArgs === "string" ? tcArgs : JSON.stringify(tcArgs);
510
925
  activeToolCalls.get(tcIndex).args += delta;
511
- controller.enqueue({
512
- type: "tool-input-delta",
513
- id,
514
- delta
515
- });
516
- }
517
- } else {
518
- const active = activeToolCalls.get(tcIndex);
519
- lastActiveToolIndex = tcIndex;
520
- if (tcArgs != null && tcArgs !== "") {
521
- const delta = typeof tcArgs === "string" ? tcArgs : JSON.stringify(tcArgs);
522
- active.args += delta;
523
- controller.enqueue({
524
- type: "tool-input-delta",
525
- id: active.id,
526
- delta
527
- });
528
- }
529
- }
530
- }
531
- }
532
- }
533
- /**
534
- * TransformStream that decodes a raw byte stream into SSE `data:` payloads.
535
- * Each output chunk is the string content after "data: " (one per SSE event).
536
- * Handles line buffering for partial chunks.
537
- */
538
- var SSEDecoder = class extends TransformStream {
539
- constructor() {
540
- let buffer = "";
541
- const decoder = new TextDecoder();
542
- super({
543
- transform(chunk, controller) {
544
- buffer += decoder.decode(chunk, { stream: true });
545
- const lines = buffer.split("\n");
546
- buffer = lines.pop() || "";
547
- for (const line of lines) {
548
- const trimmed = line.trim();
549
- if (!trimmed) continue;
550
- if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
551
- else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
552
- }
553
- },
554
- flush(controller) {
555
- if (buffer.trim()) {
556
- const trimmed = buffer.trim();
557
- if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
558
- else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
559
- }
560
- }
561
- });
562
- }
563
- };
564
- //#endregion
565
- //#region src/utils.ts
566
- /**
567
- * Normalize messages before passing to the Workers AI binding.
568
- *
569
- * The binding has strict schema validation that differs from the OpenAI API:
570
- * - `content` must not be null
571
- */
572
- function normalizeMessagesForBinding(messages) {
573
- return messages.map((msg) => {
574
- const normalized = { ...msg };
575
- if (normalized.content === null || normalized.content === void 0) normalized.content = "";
576
- return normalized;
577
- });
578
- }
579
- /**
580
- * Creates a run method that emulates the Cloudflare Workers AI binding,
581
- * but uses the Cloudflare REST API under the hood.
582
- */
583
- function createRun(config) {
584
- const { accountId, apiKey } = config;
585
- const fetchFn = config.fetch ?? globalThis.fetch;
586
- return async function run(model, inputs, options) {
587
- const { gateway, prefix: _prefix, extraHeaders, returnRawResponse, signal, ...passthroughOptions } = options || {};
588
- const urlParams = new URLSearchParams();
589
- for (const [key, value] of Object.entries(passthroughOptions)) {
590
- if (value === void 0 || value === null) throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
591
- try {
592
- const valueStr = String(value);
593
- if (!valueStr) continue;
594
- urlParams.append(key, valueStr);
595
- } catch {
596
- throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
597
- }
598
- }
599
- const queryString = urlParams.toString();
600
- const modelPath = String(model).startsWith("run/") ? model : `run/${model}`;
601
- const url = gateway?.id ? `https://gateway.ai.cloudflare.com/v1/${accountId}/${gateway.id}/workers-ai/${modelPath}${queryString ? `?${queryString}` : ""}` : `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/${modelPath}${queryString ? `?${queryString}` : ""}`;
602
- const headers = {
603
- Authorization: `Bearer ${apiKey}`,
604
- "Content-Type": "application/json",
605
- ...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {}
606
- };
607
- if (gateway) {
608
- if (gateway.skipCache) headers["cf-aig-skip-cache"] = "true";
609
- if (typeof gateway.cacheTtl === "number") headers["cf-aig-cache-ttl"] = String(gateway.cacheTtl);
610
- if (gateway.cacheKey) headers["cf-aig-cache-key"] = gateway.cacheKey;
611
- if (gateway.metadata) headers["cf-aig-metadata"] = JSON.stringify(gateway.metadata);
612
- }
613
- const response = await fetchFn(url, {
614
- body: JSON.stringify(inputs),
615
- headers,
616
- method: "POST",
617
- signal
618
- });
619
- if (!response.ok && !returnRawResponse) {
620
- let errorBody;
621
- try {
622
- errorBody = await response.text();
623
- } catch {
624
- errorBody = "<unable to read response body>";
625
- }
626
- throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
627
- }
628
- if (returnRawResponse) return response;
629
- if (inputs.stream === true) {
630
- const contentType = response.headers.get("content-type") || "";
631
- if (contentType.includes("event-stream") && response.body) return response.body;
632
- if (response.body && !contentType.includes("json")) return response.body;
633
- const retryResponse = await fetchFn(url, {
634
- body: JSON.stringify({
635
- ...inputs,
636
- stream: false
637
- }),
638
- headers,
639
- method: "POST",
640
- signal
641
- });
642
- if (!retryResponse.ok) {
643
- let errorBody;
644
- try {
645
- errorBody = await retryResponse.text();
646
- } catch {
647
- errorBody = "<unable to read response body>";
648
- }
649
- throw new Error(`Workers AI API error (${retryResponse.status} ${retryResponse.statusText}): ${errorBody}`);
650
- }
651
- return (await retryResponse.json()).result;
652
- }
653
- return (await response.json()).result;
654
- };
655
- }
656
- /**
657
- * Make a binary REST API call to Workers AI.
658
- *
659
- * Some models (e.g. `@cf/deepgram/nova-3`) require raw audio bytes
660
- * with an appropriate `Content-Type` header instead of JSON.
661
- *
662
- * @param config Credentials config
663
- * @param model Workers AI model name
664
- * @param audioBytes Raw audio bytes
665
- * @param contentType MIME type (e.g. "audio/wav")
666
- * @param signal Optional AbortSignal
667
- * @returns The parsed JSON response body
668
- */
669
- async function createRunBinary(config, model, audioBytes, contentType, signal) {
670
- const url = `https://api.cloudflare.com/client/v4/accounts/${config.accountId}/ai/run/${model}`;
671
- const response = await fetch(url, {
672
- method: "POST",
673
- headers: {
674
- Authorization: `Bearer ${config.apiKey}`,
675
- "Content-Type": contentType
676
- },
677
- body: audioBytes,
678
- signal
679
- });
680
- if (!response.ok) {
681
- let errorBody;
682
- try {
683
- errorBody = await response.text();
684
- } catch {
685
- errorBody = "<unable to read response body>";
926
+ controller.enqueue({
927
+ type: "tool-input-delta",
928
+ id,
929
+ delta
930
+ });
931
+ }
932
+ } else {
933
+ const active = activeToolCalls.get(tcIndex);
934
+ lastActiveToolIndex = tcIndex;
935
+ if (tcArgs != null && tcArgs !== "") {
936
+ const delta = typeof tcArgs === "string" ? tcArgs : JSON.stringify(tcArgs);
937
+ active.args += delta;
938
+ controller.enqueue({
939
+ type: "tool-input-delta",
940
+ id: active.id,
941
+ delta
942
+ });
943
+ }
944
+ }
686
945
  }
687
- throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
688
- }
689
- const data = await response.json();
690
- return data.result ?? data;
691
- }
692
- function prepareToolsAndToolChoice(tools, toolChoice) {
693
- if (tools == null) return {
694
- tool_choice: void 0,
695
- tools: void 0
696
- };
697
- const mappedTools = tools.map((tool) => ({
698
- function: {
699
- description: tool.type === "function" ? tool.description : void 0,
700
- name: tool.name,
701
- parameters: tool.type === "function" ? tool.inputSchema : void 0
702
- },
703
- type: "function"
704
- }));
705
- if (toolChoice == null) return {
706
- tool_choice: void 0,
707
- tools: mappedTools
708
- };
709
- const type = toolChoice.type;
710
- switch (type) {
711
- case "auto": return {
712
- tool_choice: type,
713
- tools: mappedTools
714
- };
715
- case "none": return {
716
- tool_choice: type,
717
- tools: mappedTools
718
- };
719
- case "required": return {
720
- tool_choice: "required",
721
- tools: mappedTools
722
- };
723
- case "tool": return {
724
- tool_choice: "required",
725
- tools: mappedTools.filter((tool) => tool.function.name === toolChoice.toolName)
726
- };
727
- default: throw new Error(`Unsupported tool choice type: ${type}`);
728
946
  }
729
947
  }
730
- function processToolCall(toolCall) {
731
- const fn = "function" in toolCall && typeof toolCall.function === "object" && toolCall.function ? toolCall.function : null;
732
- if (fn?.name) return {
733
- input: typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments || {}),
734
- toolCallId: toolCall.id || generateId(),
735
- type: "tool-call",
736
- toolName: fn.name
737
- };
738
- const flat = toolCall;
739
- return {
740
- input: typeof flat.arguments === "string" ? flat.arguments : JSON.stringify(flat.arguments || {}),
741
- toolCallId: flat.id || generateId(),
742
- type: "tool-call",
743
- toolName: flat.name
744
- };
745
- }
746
- function processToolCalls(output) {
747
- if (output.tool_calls && Array.isArray(output.tool_calls)) return output.tool_calls.map((toolCall) => processToolCall(toolCall));
748
- const choices = output.choices;
749
- if (choices?.[0]?.message?.tool_calls && Array.isArray(choices[0].message.tool_calls)) return choices[0].message.tool_calls.map((toolCall) => processToolCall(toolCall));
750
- return [];
751
- }
752
948
  /**
753
- * Extract text from a Workers AI response, handling multiple response formats:
754
- * - OpenAI format: { choices: [{ message: { content: "..." } }] }
755
- * - Native format: { response: "..." }
756
- * - Structured output quirk: { response: { ... } } (object instead of string)
757
- * - Structured output quirk: { response: "{ ... }" } (JSON string)
949
+ * TransformStream that decodes a raw byte stream into SSE `data:` payloads.
950
+ * Each output chunk is the string content after "data: " (one per SSE event).
951
+ * Handles line buffering for partial chunks.
758
952
  */
759
- function processText(output) {
760
- const choiceContent = output.choices?.[0]?.message?.content;
761
- if (choiceContent != null && String(choiceContent).length > 0) return String(choiceContent);
762
- if ("response" in output) {
763
- const response = output.response;
764
- if (typeof response === "object" && response !== null) return JSON.stringify(response);
765
- if (typeof response === "number") return String(response);
766
- if (response === null || response === void 0) return;
767
- return String(response);
768
- }
769
- }
770
- //#endregion
771
- //#region \0@oxc-project+runtime@0.122.0/helpers/typeof.js
772
- function _typeof(o) {
773
- "@babel/helpers - typeof";
774
- return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function(o) {
775
- return typeof o;
776
- } : function(o) {
777
- return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o;
778
- }, _typeof(o);
779
- }
780
- //#endregion
781
- //#region \0@oxc-project+runtime@0.122.0/helpers/toPrimitive.js
782
- function toPrimitive(t, r) {
783
- if ("object" != _typeof(t) || !t) return t;
784
- var e = t[Symbol.toPrimitive];
785
- if (void 0 !== e) {
786
- var i = e.call(t, r || "default");
787
- if ("object" != _typeof(i)) return i;
788
- throw new TypeError("@@toPrimitive must return a primitive value.");
953
+ var SSEDecoder = class extends TransformStream {
954
+ constructor() {
955
+ let buffer = "";
956
+ const decoder = new TextDecoder();
957
+ super({
958
+ transform(chunk, controller) {
959
+ buffer += decoder.decode(chunk, { stream: true });
960
+ const lines = buffer.split("\n");
961
+ buffer = lines.pop() || "";
962
+ for (const line of lines) {
963
+ const trimmed = line.trim();
964
+ if (!trimmed) continue;
965
+ if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
966
+ else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
967
+ }
968
+ },
969
+ flush(controller) {
970
+ if (buffer.trim()) {
971
+ const trimmed = buffer.trim();
972
+ if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
973
+ else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
974
+ }
975
+ }
976
+ });
789
977
  }
790
- return ("string" === r ? String : Number)(t);
791
- }
792
- //#endregion
793
- //#region \0@oxc-project+runtime@0.122.0/helpers/toPropertyKey.js
794
- function toPropertyKey(t) {
795
- var i = toPrimitive(t, "string");
796
- return "symbol" == _typeof(i) ? i : i + "";
797
- }
798
- //#endregion
799
- //#region \0@oxc-project+runtime@0.122.0/helpers/defineProperty.js
800
- function _defineProperty(e, r, t) {
801
- return (r = toPropertyKey(r)) in e ? Object.defineProperty(e, r, {
802
- value: t,
803
- enumerable: !0,
804
- configurable: !0,
805
- writable: !0
806
- }) : e[r] = t, e;
807
- }
978
+ };
808
979
  //#endregion
809
980
  //#region src/aisearch-chat-language-model.ts
810
981
  var AISearchChatLanguageModel = class {
@@ -973,18 +1144,21 @@ var WorkersAIChatLanguageModel = class {
973
1144
  },
974
1145
  warnings
975
1146
  };
976
- case "json": return {
977
- args: {
978
- ...baseArgs,
979
- response_format: {
980
- type: "json_schema",
981
- json_schema: responseFormat?.type === "json" ? responseFormat.schema : void 0
1147
+ case "json": {
1148
+ const json = responseFormat?.type === "json" ? responseFormat : void 0;
1149
+ return {
1150
+ args: {
1151
+ ...baseArgs,
1152
+ response_format: {
1153
+ type: "json_schema",
1154
+ json_schema: buildJsonSchemaPayload(json?.schema, json?.name, json?.description)
1155
+ },
1156
+ tools: void 0,
1157
+ tool_choice: void 0
982
1158
  },
983
- tools: void 0,
984
- tool_choice: void 0
985
- },
986
- warnings
987
- };
1159
+ warnings
1160
+ };
1161
+ }
988
1162
  default: throw new Error(`Unsupported type: ${type}`);
989
1163
  }
990
1164
  }
@@ -1043,6 +1217,38 @@ var WorkersAIChatLanguageModel = class {
1043
1217
  ...passthroughOptions
1044
1218
  };
1045
1219
  }
1220
+ /**
1221
+ * Extract reasoning, text, and tool calls from a non-streaming response.
1222
+ *
1223
+ * Shared by `doGenerate` and `doStream`'s graceful-degradation branch (the
1224
+ * path gpt-oss falls through, since it doesn't support `/ai/run/` streaming
1225
+ * and is retried non-streaming). When a forced tool call was leaked into
1226
+ * text content (gpt-oss harmony quirk), it is salvaged into a structured
1227
+ * tool call and the leaked JSON text is suppressed. A warning is appended in
1228
+ * place so callers can observe the reinterpretation.
1229
+ */
1230
+ extractContent(outputRecord, args, warnings) {
1231
+ const choices = outputRecord.choices;
1232
+ const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
1233
+ const toolCalls = processToolCalls(outputRecord);
1234
+ const salvaged = toolCalls.length === 0 ? salvageToolCallsFromText(outputRecord, {
1235
+ tools: args.tools,
1236
+ toolChoice: args.tool_choice
1237
+ }) : null;
1238
+ if (salvaged) warnings.push({
1239
+ type: "other",
1240
+ message: `Recovered ${salvaged.length} forced tool call(s) that the model emitted as text content instead of structured tool calls (model: ${this.modelId}).`
1241
+ });
1242
+ return {
1243
+ reasoningContent,
1244
+ text: salvaged ? "" : processText(outputRecord) ?? "",
1245
+ toolCalls: salvaged ?? toolCalls,
1246
+ finishReason: salvaged ? {
1247
+ unified: "tool-calls",
1248
+ raw: "stop"
1249
+ } : mapWorkersAIFinishReason(outputRecord)
1250
+ };
1251
+ }
1046
1252
  async doGenerate(options) {
1047
1253
  const { args, warnings } = this.getArgs(options);
1048
1254
  const { messages } = convertToWorkersAIChatMessages(options.prompt);
@@ -1054,10 +1260,9 @@ var WorkersAIChatLanguageModel = class {
1054
1260
  });
1055
1261
  if (output instanceof ReadableStream) throw new Error("Unexpected streaming response from non-streaming request. Check that `stream: true` was not passed.");
1056
1262
  const outputRecord = output;
1057
- const choices = outputRecord.choices;
1058
- const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
1263
+ const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
1059
1264
  return {
1060
- finishReason: mapWorkersAIFinishReason(outputRecord),
1265
+ finishReason,
1061
1266
  content: [
1062
1267
  ...reasoningContent ? [{
1063
1268
  type: "reasoning",
@@ -1065,9 +1270,9 @@ var WorkersAIChatLanguageModel = class {
1065
1270
  }] : [],
1066
1271
  {
1067
1272
  type: "text",
1068
- text: processText(outputRecord) ?? ""
1273
+ text
1069
1274
  },
1070
- ...processToolCalls(outputRecord)
1275
+ ...toolCalls
1071
1276
  ],
1072
1277
  usage: mapWorkersAIUsage(output),
1073
1278
  warnings
@@ -1085,10 +1290,12 @@ var WorkersAIChatLanguageModel = class {
1085
1290
  ...runOptions,
1086
1291
  signal: options.abortSignal
1087
1292
  });
1088
- if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response), warnings) };
1293
+ if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response, {
1294
+ tools: args.tools,
1295
+ toolChoice: args.tool_choice
1296
+ }), warnings) };
1089
1297
  const outputRecord = response;
1090
- const choices = outputRecord.choices;
1091
- const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
1298
+ const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
1092
1299
  let textId = null;
1093
1300
  let reasoningId = null;
1094
1301
  return { stream: new ReadableStream({ start(controller) {
@@ -1112,7 +1319,6 @@ var WorkersAIChatLanguageModel = class {
1112
1319
  id: reasoningId
1113
1320
  });
1114
1321
  }
1115
- const text = processText(outputRecord);
1116
1322
  if (text) {
1117
1323
  textId = generateId();
1118
1324
  controller.enqueue({
@@ -1129,10 +1335,10 @@ var WorkersAIChatLanguageModel = class {
1129
1335
  id: textId
1130
1336
  });
1131
1337
  }
1132
- for (const toolCall of processToolCalls(outputRecord)) controller.enqueue(toolCall);
1338
+ for (const toolCall of toolCalls) controller.enqueue(toolCall);
1133
1339
  controller.enqueue({
1134
1340
  type: "finish",
1135
- finishReason: mapWorkersAIFinishReason(outputRecord),
1341
+ finishReason,
1136
1342
  usage: mapWorkersAIUsage(response)
1137
1343
  });
1138
1344
  controller.close();
@@ -1495,8 +1701,422 @@ function documentsToContexts(documents, warnings) {
1495
1701
  */
1496
1702
  var AutoRAGChatLanguageModel = class extends AISearchChatLanguageModel {};
1497
1703
  //#endregion
1704
+ //#region src/client-fallback.ts
1705
+ /**
1706
+ * Wrap a chain of models so a failed *pre-stream* dispatch falls through to the
1707
+ * next model, preserving resume on each leg's own transport. If every leg fails,
1708
+ * throws a {@link WorkersAIFallbackError} carrying the full attempt tree.
1709
+ *
1710
+ * Fallback triggers on `doGenerate`/`doStream` rejection (the dispatch never
1711
+ * produced a stream). Errors that surface *mid-stream* — after content has
1712
+ * already been emitted — are not recoverable here and propagate as-is.
1713
+ */
1714
+ function createClientFallbackModel(legs) {
1715
+ if (legs.length === 0) throw new Error("createClientFallbackModel requires at least one model leg.");
1716
+ const primary = legs[0].model;
1717
+ async function attempt(run) {
1718
+ const attempts = [];
1719
+ for (const leg of legs) try {
1720
+ const result = await run(leg.model);
1721
+ attempts.push({
1722
+ model: leg.slug,
1723
+ transport: leg.transport,
1724
+ ok: true
1725
+ });
1726
+ return result;
1727
+ } catch (e) {
1728
+ const err = WorkersAIGatewayError.fromUnknown(e);
1729
+ attempts.push({
1730
+ model: leg.slug,
1731
+ transport: leg.transport,
1732
+ ok: false,
1733
+ status: err.status,
1734
+ error: err
1735
+ });
1736
+ }
1737
+ throw new WorkersAIFallbackError(attempts);
1738
+ }
1739
+ return {
1740
+ specificationVersion: "v3",
1741
+ provider: primary.provider,
1742
+ modelId: primary.modelId,
1743
+ supportedUrls: primary.supportedUrls,
1744
+ doGenerate(options) {
1745
+ return attempt((m) => m.doGenerate(options));
1746
+ },
1747
+ doStream(options) {
1748
+ return attempt((m) => m.doStream(options));
1749
+ }
1750
+ };
1751
+ }
1752
+ //#endregion
1753
+ //#region src/resumable-stream.ts
1754
+ function concat(a, b) {
1755
+ const out = new Uint8Array(new ArrayBuffer(a.length + b.length));
1756
+ out.set(a, 0);
1757
+ out.set(b, a.length);
1758
+ return out;
1759
+ }
1760
+ /** Index just past the last `\n\n` in `buf`, or -1 if there is no complete event. */
1761
+ function lastEventBoundary(buf) {
1762
+ for (let i = buf.length - 2; i >= 0; i--) if (buf[i] === 10 && buf[i + 1] === 10) return i + 2;
1763
+ return -1;
1764
+ }
1765
+ /** Count of `\n\n` terminators (= complete SSE events) in `buf`. */
1766
+ function countEvents(buf) {
1767
+ let n = 0;
1768
+ for (let i = 0; i + 1 < buf.length; i++) if (buf[i] === 10 && buf[i + 1] === 10) {
1769
+ n++;
1770
+ i++;
1771
+ }
1772
+ return n;
1773
+ }
1774
+ function resumeUrl(gateway, runId, from) {
1775
+ return `https://workers-binding.ai/ai-gateway/gateways/${gateway}/run/${runId}/resume?from=${from}`;
1776
+ }
1777
+ function createResumableStream(options) {
1778
+ const { binding, gateway, runId } = options;
1779
+ const maxReconnects = options.maxReconnects ?? 5;
1780
+ const onExpired = options.onResumeExpired ?? "error";
1781
+ let emittedEvents = options.fromEvent ?? 0;
1782
+ let pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
1783
+ let reconnects = 0;
1784
+ async function fetchResume(controller) {
1785
+ let res;
1786
+ try {
1787
+ res = await binding.fetch(resumeUrl(gateway, runId, emittedEvents), { method: "GET" });
1788
+ } catch (fetchErr) {
1789
+ controller.error(new GatewayDelegateError("dispatch", `Resume request threw at event ${emittedEvents}.`, fetchErr));
1790
+ return null;
1791
+ }
1792
+ if (res.status === 404) {
1793
+ if (onExpired === "accept-partial") {
1794
+ controller.close();
1795
+ return null;
1796
+ }
1797
+ controller.error(new GatewayDelegateError("resume-expired", `Resume buffer expired (404) at event ${emittedEvents}. The gateway buffer TTL (~5.5 min) elapsed; fall back to continuation or regeneration.`));
1798
+ return null;
1799
+ }
1800
+ if (!res.ok || !res.body) {
1801
+ controller.error(new GatewayDelegateError("dispatch", `Resume failed (${res.status}) at event ${emittedEvents}.`));
1802
+ return null;
1803
+ }
1804
+ return res.body;
1805
+ }
1806
+ return new ReadableStream({ async start(controller) {
1807
+ let current;
1808
+ if (options.initial) current = options.initial;
1809
+ else {
1810
+ const body = await fetchResume(controller);
1811
+ if (!body) return;
1812
+ current = body;
1813
+ }
1814
+ for (;;) {
1815
+ const reader = current.getReader();
1816
+ try {
1817
+ for (;;) {
1818
+ const { done, value } = await reader.read();
1819
+ if (done) {
1820
+ if (pending.length > 0) {
1821
+ controller.enqueue(pending);
1822
+ pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
1823
+ }
1824
+ controller.close();
1825
+ return;
1826
+ }
1827
+ if (!value || value.length === 0) continue;
1828
+ pending = concat(pending, value);
1829
+ const boundary = lastEventBoundary(pending);
1830
+ if (boundary > 0) {
1831
+ const complete = pending.slice(0, boundary);
1832
+ controller.enqueue(complete);
1833
+ emittedEvents += countEvents(complete);
1834
+ options.onProgress?.(emittedEvents);
1835
+ pending = pending.slice(boundary);
1836
+ }
1837
+ }
1838
+ } catch (err) {
1839
+ try {
1840
+ reader.releaseLock();
1841
+ } catch {}
1842
+ if (reconnects >= maxReconnects) {
1843
+ controller.error(new GatewayDelegateError("resume-expired", `Exceeded ${maxReconnects} reconnect attempts at event ${emittedEvents}.`, err));
1844
+ return;
1845
+ }
1846
+ pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
1847
+ reconnects++;
1848
+ options.onReconnect?.(emittedEvents, reconnects);
1849
+ const body = await fetchResume(controller);
1850
+ if (!body) return;
1851
+ current = body;
1852
+ }
1853
+ }
1854
+ } });
1855
+ }
1856
+ //#endregion
1857
+ //#region src/gateway-delegate.ts
1858
+ /**
1859
+ * Parse a `vendor/model` slug. The first segment is the resolver key (which
1860
+ * registry entry handles it); the rest is the provider-native model id. Routing
1861
+ * providers keep multi-segment model ids, e.g. `openrouter/anthropic/claude`.
1862
+ */
1863
+ function parseSlug(slug) {
1864
+ const slash = slug.indexOf("/");
1865
+ if (slash === -1) throw new GatewayDelegateError("config", `Model slug "${slug}" has no resolver key. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
1866
+ const resolverKey = slug.slice(0, slash);
1867
+ const modelId = slug.slice(slash + 1);
1868
+ if (!resolverKey || !modelId) throw new GatewayDelegateError("config", `Model slug "${slug}" is malformed. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
1869
+ return {
1870
+ resolverKey,
1871
+ modelId
1872
+ };
1873
+ }
1874
+ /**
1875
+ * Resolve a slug to its registry entry, raising a helpful error for unknown or
1876
+ * bring-your-own-provider-only providers.
1877
+ */
1878
+ function resolveProvider(slug, parsed) {
1879
+ const info = findProviderBySlug(parsed.resolverKey);
1880
+ if (!info) throw new GatewayDelegateError("config", `Unknown gateway provider "${parsed.resolverKey}" (from slug "${slug}"). See the AI Gateway provider directory for valid slugs, or use createGatewayProvider to bring your own @ai-sdk provider.`);
1881
+ if (!info.wireFormat) throw new GatewayDelegateError("config", `Provider "${parsed.resolverKey}" is not chat/completions-shaped and has no built-in parser. Reach it with createGatewayProvider (bring your own @ai-sdk provider).`);
1882
+ return info;
1883
+ }
1884
+ /**
1885
+ * Resolve the transport from the requested options. Gateway-only features (server
1886
+ * fallback, caching) force the gateway path and disable resume — with a loud
1887
+ * warning if resume was merely defaulted, or a thrown error if it was explicitly
1888
+ * requested.
1889
+ */
1890
+ function selectTransport(opts, resumeExplicitlyTrue, runCatalog = true, gatewayAvailable = true) {
1891
+ const warnings = [];
1892
+ const wantsServerFallback = opts.fallback?.mode === "server";
1893
+ const wantsCaching = opts.cacheTtl !== void 0 || opts.skipCache === true;
1894
+ const gatewayOnly = wantsServerFallback || wantsCaching;
1895
+ const feature = wantsServerFallback ? "fallback.mode:\"server\"" : "caching (cacheTtl/skipCache)";
1896
+ if (runCatalog && !gatewayAvailable && (opts.transport === "gateway" || gatewayOnly)) throw new GatewayDelegateError("config", `${opts.transport === "gateway" ? "transport:\"gateway\"" : feature} is unavailable: this provider is on the unified run catalog but is not a native gateway provider, so it has no gateway path (no caching, server-side fallback, or transport:"gateway"). Use the default run path, or fallback.mode:"client".`);
1897
+ if (!runCatalog) {
1898
+ if (opts.transport === "run") throw new GatewayDelegateError("config", "transport:\"run\" is unavailable: this provider is not on the unified-billing run catalog, so it can only be reached through the gateway path (BYOK).");
1899
+ if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", "resume:true is unavailable: this provider is not on the resumable run catalog (cf-aig-run-id requires the unified-billing run path).");
1900
+ return {
1901
+ transport: "gateway",
1902
+ resumeEnabled: false,
1903
+ warnings
1904
+ };
1905
+ }
1906
+ if (opts.transport === "run" && gatewayOnly) throw new GatewayDelegateError("config", `transport:"run" cannot satisfy ${feature}: those features are only available on the gateway path. Use the gateway transport, or fallback.mode:"client".`);
1907
+ if (opts.transport === "gateway" && resumeExplicitlyTrue) throw new GatewayDelegateError("config", "transport:\"gateway\" cannot provide resume — cf-aig-run-id is only on the run path.");
1908
+ if (gatewayOnly) {
1909
+ if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", `resume:true conflicts with ${feature}: resume (cf-aig-run-id) is only on the run path, which does not support ${wantsServerFallback ? "server-side fallback" : "caching"}. Use fallback.mode:"client" to keep resume, or drop resume.`);
1910
+ warnings.push(`[workers-ai-provider] resume disabled: ${feature} requires the gateway path, which does not surface cf-aig-run-id. Use fallback.mode:"client" to keep resumable streaming.`);
1911
+ return {
1912
+ transport: "gateway",
1913
+ resumeEnabled: false,
1914
+ warnings
1915
+ };
1916
+ }
1917
+ const transport = opts.transport ?? "run";
1918
+ return {
1919
+ transport,
1920
+ resumeEnabled: transport === "run" && opts.resume !== false,
1921
+ warnings
1922
+ };
1923
+ }
1924
+ var GatewayDelegateError = class extends Error {
1925
+ constructor(kind, message, cause) {
1926
+ super(message);
1927
+ _defineProperty(this, "kind", void 0);
1928
+ _defineProperty(this, "cause", void 0);
1929
+ this.name = "GatewayDelegateError";
1930
+ this.kind = kind;
1931
+ this.cause = cause;
1932
+ }
1933
+ };
1934
+ const STRIP_HEADERS_BASE = new Set(["content-length", "host"]);
1935
+ function asText(body) {
1936
+ if (typeof body === "string") return body;
1937
+ if (body instanceof Uint8Array) return new TextDecoder().decode(body);
1938
+ if (body instanceof ArrayBuffer) return new TextDecoder().decode(body);
1939
+ return "{}";
1940
+ }
1941
+ function headersToObject(h) {
1942
+ const out = {};
1943
+ if (!h) return out;
1944
+ if (h instanceof Headers) for (const [k, v] of h) out[k] = v;
1945
+ else if (Array.isArray(h)) for (const [k, v] of h) out[k] = v;
1946
+ else Object.assign(out, h);
1947
+ return out;
1948
+ }
1949
+ function normalizeGateway(gateway) {
1950
+ if (!gateway) throw new GatewayDelegateError("config", "A gateway is required for the delegate (resume needs a gateway). Pass `gateway: \"<gateway-id>\"` to createGatewayDelegate or per call.");
1951
+ if (typeof gateway === "string") return {
1952
+ id: gateway,
1953
+ options: { id: gateway }
1954
+ };
1955
+ return {
1956
+ id: gateway.id,
1957
+ options: gateway
1958
+ };
1959
+ }
1960
+ /**
1961
+ * Create a gateway delegate. Returns a function that builds an AI SDK model for a
1962
+ * `"<provider>/<model>"` slug, dispatched through AI Gateway on the transport the
1963
+ * requested options imply.
1964
+ */
1965
+ function createGatewayDelegate(config) {
1966
+ if (!config?.binding) throw new GatewayDelegateError("config", "createGatewayDelegate requires a `binding` (e.g. { binding: env.AI }).");
1967
+ if (!config.providers?.length) throw new GatewayDelegateError("config", "createGatewayDelegate requires at least one provider plugin, e.g. `providers: [openai]` from \"workers-ai-provider/openai\".");
1968
+ const plugins = /* @__PURE__ */ new Map();
1969
+ for (const p of config.providers) plugins.set(p.wireFormat, p);
1970
+ const defaultResume = config.resume ?? true;
1971
+ const buildOne = (slug, options) => {
1972
+ const parsed = parseSlug(slug);
1973
+ const info = resolveProvider(slug, parsed);
1974
+ const resumeExplicitlyTrue = options.resume === true;
1975
+ const effectiveOptions = {
1976
+ ...options,
1977
+ resume: options.resume ?? defaultResume,
1978
+ onResumeExpired: options.onResumeExpired ?? config.onResumeExpired
1979
+ };
1980
+ const selection = selectTransport(effectiveOptions, resumeExplicitlyTrue, info.runCatalog, info.gatewayPath !== false);
1981
+ for (const w of selection.warnings) console.warn(w);
1982
+ const wire = selection.transport === "run" ? info.runWireFormat ?? "openai" : info.wireFormat;
1983
+ const plugin = plugins.get(wire);
1984
+ if (!plugin) throw new GatewayDelegateError("config", selection.transport === "run" ? `The run path for "${parsed.resolverKey}" (from slug "${slug}") returns "${wire}"-wire responses, so it needs the "${wire}" plugin. Install + pass it from "workers-ai-provider/${wire}". Registered: ${[...plugins.keys()].join(", ") || "<none>"}.` : `No provider plugin for wire format "${wire}" (needed by "${parsed.resolverKey}" on the gateway path from slug "${slug}"). Registered: ${[...plugins.keys()].join(", ") || "<none>"}. Install + pass the matching plugin from "workers-ai-provider/${wire}".`);
1985
+ const { id: gatewayId, options: gatewayOptions } = normalizeGateway(options.gateway ?? config.gateway);
1986
+ const fetchImpl = selection.transport === "run" ? makeRunFetch(config.binding, `${info.resolverKey}/${parsed.modelId}`, gatewayOptions, effectiveOptions, selection, options) : makeGatewayFetch(config.binding, info, gatewayId, gatewayOptions, effectiveOptions, selection, options);
1987
+ return {
1988
+ model: plugin.create({
1989
+ modelId: parsed.modelId,
1990
+ fetch: fetchImpl,
1991
+ ...selection.transport === "gateway" && info.baseURL ? { baseURL: info.baseURL } : {}
1992
+ }),
1993
+ transport: selection.transport
1994
+ };
1995
+ };
1996
+ return (slug, options = {}) => {
1997
+ if (options.fallback?.mode === "client") {
1998
+ const { fallback, ...rest } = options;
1999
+ return createClientFallbackModel([slug, ...fallback.models].map((s) => {
2000
+ const { model, transport } = buildOne(s, rest);
2001
+ return {
2002
+ slug: s,
2003
+ model,
2004
+ transport
2005
+ };
2006
+ }));
2007
+ }
2008
+ return buildOne(slug, options).model;
2009
+ };
2010
+ }
2011
+ function fireDispatch(resp, selection, options) {
2012
+ if (!options.onDispatch) return;
2013
+ options.onDispatch({
2014
+ transport: selection.transport,
2015
+ resumeEnabled: selection.resumeEnabled,
2016
+ warnings: selection.warnings,
2017
+ status: resp.status,
2018
+ runId: resp.headers.get("cf-aig-run-id"),
2019
+ cfStep: resp.headers.get("cf-aig-step"),
2020
+ cacheStatus: resp.headers.get("cf-aig-cache-status"),
2021
+ logId: resp.headers.get("cf-aig-log-id")
2022
+ });
2023
+ }
2024
+ /** Merge call-level metadata over gateway-option metadata (call wins). */
2025
+ function mergeMetadata(base, override) {
2026
+ if (!base && !override) return void 0;
2027
+ return {
2028
+ ...base,
2029
+ ...override
2030
+ };
2031
+ }
2032
+ /** JSON-encode metadata for the `cf-aig-metadata` header (bigint → string). */
2033
+ function serializeMetadata(metadata) {
2034
+ return JSON.stringify(metadata, (_k, v) => typeof v === "bigint" ? v.toString() : v);
2035
+ }
2036
+ function makeRunFetch(binding, slug, gatewayOptions, opts, selection, callOptions) {
2037
+ return (async (_input, init) => {
2038
+ const body = JSON.parse(asText(init?.body));
2039
+ delete body.model;
2040
+ const mergedGateway = { ...gatewayOptions };
2041
+ const mergedMeta = mergeMetadata(gatewayOptions.metadata, opts.metadata);
2042
+ if (mergedMeta) mergedGateway.metadata = mergedMeta;
2043
+ if (opts.collectLog !== void 0) mergedGateway.collectLog = opts.collectLog;
2044
+ const runOptions = {
2045
+ gateway: mergedGateway,
2046
+ returnRawResponse: true,
2047
+ ...opts.extraHeaders ? { extraHeaders: opts.extraHeaders } : {},
2048
+ ...init?.signal ? { signal: init.signal } : {}
2049
+ };
2050
+ const resp = await binding.run(slug, body, runOptions);
2051
+ fireDispatch(resp, selection, callOptions);
2052
+ const runId = resp.headers.get("cf-aig-run-id");
2053
+ if (selection.resumeEnabled && runId && resp.body) {
2054
+ const resumable = createResumableStream({
2055
+ binding,
2056
+ gateway: gatewayOptions.id,
2057
+ runId,
2058
+ initial: resp.body,
2059
+ onResumeExpired: opts.onResumeExpired,
2060
+ ...opts.onProgress ? { onProgress: opts.onProgress } : {}
2061
+ });
2062
+ return new Response(resumable, {
2063
+ status: resp.status,
2064
+ headers: resp.headers
2065
+ });
2066
+ }
2067
+ return resp;
2068
+ });
2069
+ }
2070
+ function makeGatewayFetch(binding, info, gatewayId, gatewayOptions, opts, selection, callOptions) {
2071
+ const strip = new Set(STRIP_HEADERS_BASE);
2072
+ if (!opts.byok) for (const h of info.authHeaders) strip.add(h.toLowerCase());
2073
+ return (async (input, init) => {
2074
+ const rawUrl = typeof input === "string" ? input : input.toString();
2075
+ const endpoint = info.transformEndpoint ? info.transformEndpoint(rawUrl) : new URL(rawUrl).pathname.replace(/^\//, "") + (new URL(rawUrl).search || "");
2076
+ const body = JSON.parse(asText(init?.body));
2077
+ const headers = {};
2078
+ for (const [k, v] of Object.entries(headersToObject(init?.headers))) if (!strip.has(k.toLowerCase())) headers[k] = v;
2079
+ if (opts.extraHeaders) Object.assign(headers, opts.extraHeaders);
2080
+ if (opts.cacheTtl !== void 0) headers["cf-aig-cache-ttl"] = String(opts.cacheTtl);
2081
+ if (opts.skipCache) headers["cf-aig-skip-cache"] = "true";
2082
+ const metadata = mergeMetadata(gatewayOptions.metadata, opts.metadata);
2083
+ if (metadata) headers["cf-aig-metadata"] = serializeMetadata(metadata);
2084
+ if (opts.collectLog !== void 0) headers["cf-aig-collect-log"] = String(opts.collectLog);
2085
+ const primary = {
2086
+ provider: info.gatewayProviderId,
2087
+ endpoint,
2088
+ headers,
2089
+ query: body
2090
+ };
2091
+ const entries = [primary];
2092
+ if (opts.fallback?.mode === "server") for (const fb of opts.fallback.models) {
2093
+ const fbParsed = parseSlug(fb);
2094
+ const fbInfo = resolveProvider(fb, fbParsed);
2095
+ if (fbInfo.gatewayProviderId !== info.gatewayProviderId) throw new GatewayDelegateError("config", `Cross-vendor server-side fallback (${info.gatewayProviderId} → ${fbInfo.gatewayProviderId}) is not supported yet. Use fallback.mode:"client", or same-vendor fallback models.`);
2096
+ entries.push({
2097
+ ...primary,
2098
+ query: {
2099
+ ...body,
2100
+ model: fbParsed.modelId
2101
+ }
2102
+ });
2103
+ }
2104
+ const gw = binding.gateway(gatewayId);
2105
+ const runOptions = {};
2106
+ if (init?.signal) runOptions.signal = init.signal;
2107
+ const resp = await gw.run(entries, runOptions);
2108
+ fireDispatch(resp, selection, callOptions);
2109
+ return resp;
2110
+ });
2111
+ }
2112
+ //#endregion
1498
2113
  //#region src/index.ts
1499
2114
  /**
2115
+ * The account-wide AI Gateway used for catalog routing when no `gateway` is
2116
+ * configured. Every Cloudflare account has a `"default"` gateway.
2117
+ */
2118
+ const DEFAULT_GATEWAY_ID = "default";
2119
+ /**
1500
2120
  * Create a Workers AI provider instance.
1501
2121
  */
1502
2122
  function createWorkersAI(options) {
@@ -1518,6 +2138,26 @@ function createWorkersAI(options) {
1518
2138
  provider: "workersai.chat",
1519
2139
  isBinding
1520
2140
  });
2141
+ let delegate;
2142
+ const getDelegate = (slug) => {
2143
+ if (!options.providers?.length) throw new Error(`"${slug}" looks like a third-party AI Gateway catalog model, but this Workers AI provider was not configured to route them. Pass provider plugins, e.g.:
2144
+ import { openai } from "workers-ai-provider/openai";
2145
+ createWorkersAI({ binding: env.AI, providers: [openai] });
2146
+ A gateway defaults to "default" but can be set via \`gateway\`. Otherwise use a Workers AI model id (e.g. "@cf/meta/llama-3.1-8b-instruct").`);
2147
+ delegate ?? (delegate = createGatewayDelegate({
2148
+ binding,
2149
+ gateway: options.gateway ?? { id: DEFAULT_GATEWAY_ID },
2150
+ providers: options.providers,
2151
+ resume: options.resume,
2152
+ onResumeExpired: options.onResumeExpired
2153
+ }));
2154
+ return delegate;
2155
+ };
2156
+ const isGatewaySlug = (id) => typeof id === "string" && !id.startsWith("@") && id.includes("/");
2157
+ const buildChat = (modelId, settings) => {
2158
+ if (isGatewaySlug(modelId)) return getDelegate(modelId)(modelId, settings);
2159
+ return createChatModel(modelId, settings);
2160
+ };
1521
2161
  const createImageModel = (modelId, settings = {}) => new WorkersAIImageModel(modelId, settings, {
1522
2162
  binding,
1523
2163
  gateway: options.gateway,
@@ -1550,9 +2190,9 @@ function createWorkersAI(options) {
1550
2190
  });
1551
2191
  const provider = (modelId, settings) => {
1552
2192
  if (new.target) throw new Error("The WorkersAI model function cannot be called with the new keyword.");
1553
- return createChatModel(modelId, settings);
2193
+ return buildChat(modelId, settings);
1554
2194
  };
1555
- provider.chat = createChatModel;
2195
+ provider.chat = buildChat;
1556
2196
  provider.embedding = createEmbeddingModel;
1557
2197
  provider.textEmbedding = createEmbeddingModel;
1558
2198
  provider.textEmbeddingModel = createEmbeddingModel;
@@ -1598,6 +2238,6 @@ function createAutoRAG(options) {
1598
2238
  return createAISearch(options, "autorag.chat");
1599
2239
  }
1600
2240
  //#endregion
1601
- export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createWorkersAI };
2241
+ export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, GATEWAY_PROVIDERS, GatewayDelegateError, WorkersAIFallbackError, WorkersAIGatewayError, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createClientFallbackModel, createGatewayFetch, createGatewayProvider, createResumableStream, createWorkersAI, detectProviderByUrl, findProviderBySlug, parseSlug, selectTransport, wireableProviders };
1602
2242
 
1603
2243
  //# sourceMappingURL=index.mjs.map