workers-ai-provider 3.1.14 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,5 +1,6 @@
1
+ import { a as findProviderBySlug, c as WorkersAIGatewayError, i as detectProviderByUrl, l as _defineProperty, n as createGatewayProvider, o as wireableProviders, r as GATEWAY_PROVIDERS, s as WorkersAIFallbackError, t as createGatewayFetch } from "./gateway-provider-1USFWm7c.mjs";
2
+ import { TooManyEmbeddingValuesForCallError, UnsupportedFunctionalityError } from "@ai-sdk/provider";
1
3
  import { generateId } from "ai";
2
- import { TooManyEmbeddingValuesForCallError } from "@ai-sdk/provider";
3
4
  //#region src/utils.ts
4
5
  /**
5
6
  * Normalize messages before passing to the Workers AI binding.
@@ -127,6 +128,39 @@ async function createRunBinary(config, model, audioBytes, contentType, signal) {
127
128
  const data = await response.json();
128
129
  return data.result ?? data;
129
130
  }
131
+ /**
132
+ * Build the `response_format.json_schema` payload for native Workers AI models.
133
+ *
134
+ * Native Workers AI (`@cf/...`) expects `json_schema` to be a **bare** JSON
135
+ * Schema, NOT OpenAI's `{ name, schema, strict }` envelope. That envelope is
136
+ * only required by partner-model routes (e.g. `openai/...`), which never reach
137
+ * this code — they go through the gateway delegate and the real `@ai-sdk/*`
138
+ * providers, which build the envelope themselves. Wrapping the schema here would
139
+ * break native models, so we must keep the bare shape.
140
+ *
141
+ * The AI SDK's structured-output `name` / `description` (from
142
+ * `Output.object({ schema, name, description })` / `generateObject`) would
143
+ * otherwise be silently dropped on this path. We preserve them as the standard
144
+ * JSON Schema `title` (from `name`) and `description` keywords, which keeps the
145
+ * payload a valid bare schema while still passing the LLM guidance through.
146
+ *
147
+ * Existing schema-level `title` / `description` are never overwritten, empty
148
+ * strings are ignored, and the input schema object is never mutated.
149
+ *
150
+ * See https://github.com/cloudflare/ai/issues/559.
151
+ */
152
+ function buildJsonSchemaPayload(schema, name, description) {
153
+ if (typeof schema !== "object" || schema === null || Array.isArray(schema)) return schema;
154
+ const record = schema;
155
+ const addTitle = !!name && record.title === void 0;
156
+ const addDescription = !!description && record.description === void 0;
157
+ if (!addTitle && !addDescription) return schema;
158
+ return {
159
+ ...record,
160
+ ...addTitle ? { title: name } : {},
161
+ ...addDescription ? { description } : {}
162
+ };
163
+ }
130
164
  function prepareToolsAndToolChoice(tools, toolChoice) {
131
165
  if (tools == null) return {
132
166
  tool_choice: void 0,
@@ -159,8 +193,11 @@ function prepareToolsAndToolChoice(tools, toolChoice) {
159
193
  tools: mappedTools
160
194
  };
161
195
  case "tool": return {
162
- tool_choice: "required",
163
- tools: mappedTools.filter((tool) => tool.function.name === toolChoice.toolName)
196
+ tool_choice: {
197
+ type: "function",
198
+ function: { name: toolChoice.toolName }
199
+ },
200
+ tools: mappedTools
164
201
  };
165
202
  default: throw new Error(`Unsupported tool choice type: ${type}`);
166
203
  }
@@ -198,6 +235,92 @@ function processToolCalls(output) {
198
235
  return [];
199
236
  }
200
237
  /**
238
+ * Was a specific tool forced for this request?
239
+ *
240
+ * True for both `tool_choice: "required"` and the named-function form
241
+ * `{ type: "function", function: { name } }`.
242
+ */
243
+ function isForcedToolChoice(toolChoice) {
244
+ if (toolChoice === "required") return true;
245
+ return typeof toolChoice === "object" && toolChoice !== null && toolChoice.type === "function";
246
+ }
247
+ /**
248
+ * Parse tool calls that a model leaked as JSON text instead of structured
249
+ * `tool_calls`. Shared by the non-streaming salvage and the streaming buffer.
250
+ *
251
+ * Only JSON objects whose `name` is one of `knownToolNames` are recovered;
252
+ * everything else (prose, harmony channel/role leaks like `{"name":"analysis"}`,
253
+ * hallucinated names) is ignored to avoid fabricating bogus calls.
254
+ */
255
+ function parseLeakedToolCalls(text, knownToolNames) {
256
+ let parsed;
257
+ try {
258
+ parsed = JSON.parse(text.trim());
259
+ } catch {
260
+ return [];
261
+ }
262
+ const candidates = Array.isArray(parsed) ? parsed : [parsed];
263
+ const salvaged = [];
264
+ for (const candidate of candidates) {
265
+ if (typeof candidate !== "object" || candidate === null) continue;
266
+ const obj = candidate;
267
+ const name = obj.name;
268
+ if (typeof name !== "string" || !knownToolNames.has(name)) continue;
269
+ let args;
270
+ if ("arguments" in obj) args = obj.arguments;
271
+ else if ("parameters" in obj) args = obj.parameters;
272
+ else {
273
+ const { name: _name, ...rest } = obj;
274
+ args = rest;
275
+ }
276
+ salvaged.push({
277
+ input: typeof args === "string" ? args : JSON.stringify(args ?? {}),
278
+ toolCallId: createAISDKToolCallId(void 0),
279
+ type: "tool-call",
280
+ toolName: name
281
+ });
282
+ }
283
+ return salvaged;
284
+ }
285
+ /** Collect the requested tool names from mapped tools. */
286
+ function getToolNames(tools) {
287
+ return new Set((tools ?? []).map((tool) => tool.function?.name).filter((name) => typeof name === "string"));
288
+ }
289
+ /**
290
+ * Salvage a tool call that a model leaked into text content instead of the
291
+ * structured `tool_calls` field.
292
+ *
293
+ * Workers AI's gpt-oss models (harmony format) sometimes emit a forced tool
294
+ * call as raw JSON in `message.content` with an empty `tool_calls` array and
295
+ * `finish_reason: "stop"` — typically when the forced tool is a poor fit for
296
+ * the conversation. The content looks like one of:
297
+ *
298
+ * {"name":"read_skill_resource","path":"feedback.txt"} (flat args)
299
+ * {"name":"calc","arguments":{"a":1}} (wrapped args)
300
+ * [{"name":"calc","parameters":{"a":1}}] (array form)
301
+ *
302
+ * This reinterprets that text as a structured tool call. It is intentionally
303
+ * narrow to avoid false positives:
304
+ * - only runs when a tool was *forced* (required / named-function), so a
305
+ * tool call was explicitly demanded by the caller;
306
+ * - only runs when there are no real structured tool calls to override;
307
+ * - only matches JSON objects whose `name` is one of the requested tools.
308
+ *
309
+ * Returns the salvaged tool calls, or `null` when nothing was salvaged.
310
+ *
311
+ * See https://github.com/cloudflare/ai/issues/560.
312
+ */
313
+ function salvageToolCallsFromText(output, context) {
314
+ if (!isForcedToolChoice(context.toolChoice)) return null;
315
+ if (processToolCalls(output).length > 0) return null;
316
+ const knownToolNames = getToolNames(context.tools);
317
+ if (knownToolNames.size === 0) return null;
318
+ const text = processText(output);
319
+ if (!text) return null;
320
+ const salvaged = parseLeakedToolCalls(text, knownToolNames);
321
+ return salvaged.length > 0 ? salvaged : null;
322
+ }
323
+ /**
201
324
  * Extract text from a Workers AI response, handling multiple response formats:
202
325
  * - OpenAI format: { choices: [{ message: { content: "..." } }] }
203
326
  * - Native format: { response: "..." }
@@ -241,6 +364,17 @@ function toUint8Array$2(data) {
241
364
  if (data instanceof URL) throw new Error("URL image sources are not supported by Workers AI. Provide image data as a Uint8Array or base64 string instead.");
242
365
  return null;
243
366
  }
367
+ function assertImageMediaType(mediaType) {
368
+ if (!mediaType) throw new UnsupportedFunctionalityError({
369
+ functionality: "file-part-without-media-type",
370
+ message: "Workers AI chat only supports image file parts with an image/* mediaType. Received a file part without a mediaType."
371
+ });
372
+ if (!mediaType.toLowerCase().startsWith("image/")) throw new UnsupportedFunctionalityError({
373
+ functionality: "non-image-file-part",
374
+ message: `Workers AI chat only supports image file parts with an image/* mediaType. Received mediaType "${mediaType}".`
375
+ });
376
+ return mediaType;
377
+ }
244
378
  function uint8ArrayToBase64$1(bytes) {
245
379
  let binary = "";
246
380
  const chunkSize = 8192;
@@ -267,10 +401,11 @@ function convertToWorkersAIChatMessages(prompt) {
267
401
  textParts.push(part.text);
268
402
  break;
269
403
  case "file": {
404
+ const mediaType = assertImageMediaType(part.mediaType);
270
405
  const imageBytes = toUint8Array$2(part.data);
271
406
  if (imageBytes) imageParts.push({
272
407
  image: imageBytes,
273
- mediaType: part.mediaType
408
+ mediaType
274
409
  });
275
410
  break;
276
411
  }
@@ -283,10 +418,9 @@ function convertToWorkersAIChatMessages(prompt) {
283
418
  });
284
419
  for (const img of imageParts) {
285
420
  const base64 = uint8ArrayToBase64$1(img.image);
286
- const mediaType = img.mediaType || "image/png";
287
421
  contentArray.push({
288
422
  type: "image_url",
289
- image_url: { url: `data:${mediaType};base64,${base64}` }
423
+ image_url: { url: `data:${img.mediaType};base64,${base64}` }
290
424
  });
291
425
  }
292
426
  messages.push({
@@ -504,9 +638,13 @@ function isNullFinalizationChunk(tc) {
504
638
  * 1. Native format: { response: "chunk", tool_calls: [...] }
505
639
  * 2. OpenAI format: { choices: [{ delta: { content: "chunk" } }] }
506
640
  */
507
- function getMappedStream(response) {
641
+ function getMappedStream(response, salvageContext) {
508
642
  const rawStream = response instanceof ReadableStream ? response : response.body;
509
643
  if (!rawStream) throw new Error("No readable stream available for SSE parsing.");
644
+ const knownToolNames = getToolNames(salvageContext?.tools);
645
+ const bufferContentForSalvage = isForcedToolChoice(salvageContext?.toolChoice) && knownToolNames.size > 0;
646
+ let contentBuffer = "";
647
+ let anyToolCallStarted = false;
510
648
  let usage = {
511
649
  outputTokens: {
512
650
  total: 0,
@@ -552,7 +690,8 @@ function getMappedStream(response) {
552
690
  const nativeResponse = chunk.response;
553
691
  if (nativeResponse != null && nativeResponse !== "") {
554
692
  const responseText = String(nativeResponse);
555
- if (responseText.length > 0) {
693
+ if (responseText.length > 0) if (bufferContentForSalvage) contentBuffer += responseText;
694
+ else {
556
695
  if (reasoningId) {
557
696
  controller.enqueue({
558
697
  type: "reasoning-end",
@@ -602,7 +741,8 @@ function getMappedStream(response) {
602
741
  });
603
742
  }
604
743
  const textDelta = delta.content;
605
- if (textDelta && textDelta.length > 0) {
744
+ if (textDelta && textDelta.length > 0) if (bufferContentForSalvage) contentBuffer += textDelta;
745
+ else {
606
746
  if (reasoningId) {
607
747
  controller.enqueue({
608
748
  type: "reasoning-end",
@@ -645,11 +785,69 @@ function getMappedStream(response) {
645
785
  type: "reasoning-end",
646
786
  id: reasoningId
647
787
  });
788
+ let salvagedToolCalls = false;
789
+ if (bufferContentForSalvage && !anyToolCallStarted && contentBuffer.trim()) {
790
+ const salvaged = parseLeakedToolCalls(contentBuffer, knownToolNames);
791
+ if (salvaged.length > 0) {
792
+ for (const call of salvaged) {
793
+ controller.enqueue({
794
+ type: "tool-input-start",
795
+ id: call.toolCallId,
796
+ toolName: call.toolName
797
+ });
798
+ controller.enqueue({
799
+ type: "tool-input-delta",
800
+ id: call.toolCallId,
801
+ delta: call.input
802
+ });
803
+ controller.enqueue({
804
+ type: "tool-input-end",
805
+ id: call.toolCallId
806
+ });
807
+ controller.enqueue(call);
808
+ }
809
+ salvagedToolCalls = true;
810
+ console.warn(`[workers-ai-provider] Recovered ${salvaged.length} forced tool call(s) that the model streamed as text content instead of structured tool calls.`);
811
+ } else {
812
+ const id = generateId();
813
+ controller.enqueue({
814
+ type: "text-start",
815
+ id
816
+ });
817
+ controller.enqueue({
818
+ type: "text-delta",
819
+ id,
820
+ delta: contentBuffer
821
+ });
822
+ controller.enqueue({
823
+ type: "text-end",
824
+ id
825
+ });
826
+ }
827
+ } else if (bufferContentForSalvage && contentBuffer.trim()) {
828
+ const id = generateId();
829
+ controller.enqueue({
830
+ type: "text-start",
831
+ id
832
+ });
833
+ controller.enqueue({
834
+ type: "text-delta",
835
+ id,
836
+ delta: contentBuffer
837
+ });
838
+ controller.enqueue({
839
+ type: "text-end",
840
+ id
841
+ });
842
+ }
648
843
  if (textId) controller.enqueue({
649
844
  type: "text-end",
650
845
  id: textId
651
846
  });
652
- const effectiveFinishReason = !receivedDone && receivedAnyData && !finishReason ? {
847
+ const effectiveFinishReason = salvagedToolCalls ? {
848
+ unified: "tool-calls",
849
+ raw: "stop"
850
+ } : !receivedDone && receivedAnyData && !finishReason ? {
653
851
  unified: "error",
654
852
  raw: "stream-truncated"
655
853
  } : finishReason ?? {
@@ -716,6 +914,7 @@ function getMappedStream(response) {
716
914
  args: ""
717
915
  });
718
916
  lastActiveToolIndex = tcIndex;
917
+ anyToolCallStarted = true;
719
918
  controller.enqueue({
720
919
  type: "tool-input-start",
721
920
  id,
@@ -778,44 +977,6 @@ var SSEDecoder = class extends TransformStream {
778
977
  }
779
978
  };
780
979
  //#endregion
781
- //#region \0@oxc-project+runtime@0.122.0/helpers/typeof.js
782
- function _typeof(o) {
783
- "@babel/helpers - typeof";
784
- return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function(o) {
785
- return typeof o;
786
- } : function(o) {
787
- return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o;
788
- }, _typeof(o);
789
- }
790
- //#endregion
791
- //#region \0@oxc-project+runtime@0.122.0/helpers/toPrimitive.js
792
- function toPrimitive(t, r) {
793
- if ("object" != _typeof(t) || !t) return t;
794
- var e = t[Symbol.toPrimitive];
795
- if (void 0 !== e) {
796
- var i = e.call(t, r || "default");
797
- if ("object" != _typeof(i)) return i;
798
- throw new TypeError("@@toPrimitive must return a primitive value.");
799
- }
800
- return ("string" === r ? String : Number)(t);
801
- }
802
- //#endregion
803
- //#region \0@oxc-project+runtime@0.122.0/helpers/toPropertyKey.js
804
- function toPropertyKey(t) {
805
- var i = toPrimitive(t, "string");
806
- return "symbol" == _typeof(i) ? i : i + "";
807
- }
808
- //#endregion
809
- //#region \0@oxc-project+runtime@0.122.0/helpers/defineProperty.js
810
- function _defineProperty(e, r, t) {
811
- return (r = toPropertyKey(r)) in e ? Object.defineProperty(e, r, {
812
- value: t,
813
- enumerable: !0,
814
- configurable: !0,
815
- writable: !0
816
- }) : e[r] = t, e;
817
- }
818
- //#endregion
819
980
  //#region src/aisearch-chat-language-model.ts
820
981
  var AISearchChatLanguageModel = class {
821
982
  constructor(modelId, settings, config) {
@@ -983,18 +1144,21 @@ var WorkersAIChatLanguageModel = class {
983
1144
  },
984
1145
  warnings
985
1146
  };
986
- case "json": return {
987
- args: {
988
- ...baseArgs,
989
- response_format: {
990
- type: "json_schema",
991
- json_schema: responseFormat?.type === "json" ? responseFormat.schema : void 0
1147
+ case "json": {
1148
+ const json = responseFormat?.type === "json" ? responseFormat : void 0;
1149
+ return {
1150
+ args: {
1151
+ ...baseArgs,
1152
+ response_format: {
1153
+ type: "json_schema",
1154
+ json_schema: buildJsonSchemaPayload(json?.schema, json?.name, json?.description)
1155
+ },
1156
+ tools: void 0,
1157
+ tool_choice: void 0
992
1158
  },
993
- tools: void 0,
994
- tool_choice: void 0
995
- },
996
- warnings
997
- };
1159
+ warnings
1160
+ };
1161
+ }
998
1162
  default: throw new Error(`Unsupported type: ${type}`);
999
1163
  }
1000
1164
  }
@@ -1053,6 +1217,38 @@ var WorkersAIChatLanguageModel = class {
1053
1217
  ...passthroughOptions
1054
1218
  };
1055
1219
  }
1220
+ /**
1221
+ * Extract reasoning, text, and tool calls from a non-streaming response.
1222
+ *
1223
+ * Shared by `doGenerate` and `doStream`'s graceful-degradation branch (the
1224
+ * path gpt-oss falls through, since it doesn't support `/ai/run/` streaming
1225
+ * and is retried non-streaming). When a forced tool call was leaked into
1226
+ * text content (gpt-oss harmony quirk), it is salvaged into a structured
1227
+ * tool call and the leaked JSON text is suppressed. A warning is appended in
1228
+ * place so callers can observe the reinterpretation.
1229
+ */
1230
+ extractContent(outputRecord, args, warnings) {
1231
+ const choices = outputRecord.choices;
1232
+ const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
1233
+ const toolCalls = processToolCalls(outputRecord);
1234
+ const salvaged = toolCalls.length === 0 ? salvageToolCallsFromText(outputRecord, {
1235
+ tools: args.tools,
1236
+ toolChoice: args.tool_choice
1237
+ }) : null;
1238
+ if (salvaged) warnings.push({
1239
+ type: "other",
1240
+ message: `Recovered ${salvaged.length} forced tool call(s) that the model emitted as text content instead of structured tool calls (model: ${this.modelId}).`
1241
+ });
1242
+ return {
1243
+ reasoningContent,
1244
+ text: salvaged ? "" : processText(outputRecord) ?? "",
1245
+ toolCalls: salvaged ?? toolCalls,
1246
+ finishReason: salvaged ? {
1247
+ unified: "tool-calls",
1248
+ raw: "stop"
1249
+ } : mapWorkersAIFinishReason(outputRecord)
1250
+ };
1251
+ }
1056
1252
  async doGenerate(options) {
1057
1253
  const { args, warnings } = this.getArgs(options);
1058
1254
  const { messages } = convertToWorkersAIChatMessages(options.prompt);
@@ -1064,10 +1260,9 @@ var WorkersAIChatLanguageModel = class {
1064
1260
  });
1065
1261
  if (output instanceof ReadableStream) throw new Error("Unexpected streaming response from non-streaming request. Check that `stream: true` was not passed.");
1066
1262
  const outputRecord = output;
1067
- const choices = outputRecord.choices;
1068
- const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
1263
+ const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
1069
1264
  return {
1070
- finishReason: mapWorkersAIFinishReason(outputRecord),
1265
+ finishReason,
1071
1266
  content: [
1072
1267
  ...reasoningContent ? [{
1073
1268
  type: "reasoning",
@@ -1075,9 +1270,9 @@ var WorkersAIChatLanguageModel = class {
1075
1270
  }] : [],
1076
1271
  {
1077
1272
  type: "text",
1078
- text: processText(outputRecord) ?? ""
1273
+ text
1079
1274
  },
1080
- ...processToolCalls(outputRecord)
1275
+ ...toolCalls
1081
1276
  ],
1082
1277
  usage: mapWorkersAIUsage(output),
1083
1278
  warnings
@@ -1095,10 +1290,12 @@ var WorkersAIChatLanguageModel = class {
1095
1290
  ...runOptions,
1096
1291
  signal: options.abortSignal
1097
1292
  });
1098
- if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response), warnings) };
1293
+ if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response, {
1294
+ tools: args.tools,
1295
+ toolChoice: args.tool_choice
1296
+ }), warnings) };
1099
1297
  const outputRecord = response;
1100
- const choices = outputRecord.choices;
1101
- const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
1298
+ const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
1102
1299
  let textId = null;
1103
1300
  let reasoningId = null;
1104
1301
  return { stream: new ReadableStream({ start(controller) {
@@ -1122,7 +1319,6 @@ var WorkersAIChatLanguageModel = class {
1122
1319
  id: reasoningId
1123
1320
  });
1124
1321
  }
1125
- const text = processText(outputRecord);
1126
1322
  if (text) {
1127
1323
  textId = generateId();
1128
1324
  controller.enqueue({
@@ -1139,10 +1335,10 @@ var WorkersAIChatLanguageModel = class {
1139
1335
  id: textId
1140
1336
  });
1141
1337
  }
1142
- for (const toolCall of processToolCalls(outputRecord)) controller.enqueue(toolCall);
1338
+ for (const toolCall of toolCalls) controller.enqueue(toolCall);
1143
1339
  controller.enqueue({
1144
1340
  type: "finish",
1145
- finishReason: mapWorkersAIFinishReason(outputRecord),
1341
+ finishReason,
1146
1342
  usage: mapWorkersAIUsage(response)
1147
1343
  });
1148
1344
  controller.close();
@@ -1505,8 +1701,422 @@ function documentsToContexts(documents, warnings) {
1505
1701
  */
1506
1702
  var AutoRAGChatLanguageModel = class extends AISearchChatLanguageModel {};
1507
1703
  //#endregion
1704
+ //#region src/client-fallback.ts
1705
+ /**
1706
+ * Wrap a chain of models so a failed *pre-stream* dispatch falls through to the
1707
+ * next model, preserving resume on each leg's own transport. If every leg fails,
1708
+ * throws a {@link WorkersAIFallbackError} carrying the full attempt tree.
1709
+ *
1710
+ * Fallback triggers on `doGenerate`/`doStream` rejection (the dispatch never
1711
+ * produced a stream). Errors that surface *mid-stream* — after content has
1712
+ * already been emitted — are not recoverable here and propagate as-is.
1713
+ */
1714
+ function createClientFallbackModel(legs) {
1715
+ if (legs.length === 0) throw new Error("createClientFallbackModel requires at least one model leg.");
1716
+ const primary = legs[0].model;
1717
+ async function attempt(run) {
1718
+ const attempts = [];
1719
+ for (const leg of legs) try {
1720
+ const result = await run(leg.model);
1721
+ attempts.push({
1722
+ model: leg.slug,
1723
+ transport: leg.transport,
1724
+ ok: true
1725
+ });
1726
+ return result;
1727
+ } catch (e) {
1728
+ const err = WorkersAIGatewayError.fromUnknown(e);
1729
+ attempts.push({
1730
+ model: leg.slug,
1731
+ transport: leg.transport,
1732
+ ok: false,
1733
+ status: err.status,
1734
+ error: err
1735
+ });
1736
+ }
1737
+ throw new WorkersAIFallbackError(attempts);
1738
+ }
1739
+ return {
1740
+ specificationVersion: "v3",
1741
+ provider: primary.provider,
1742
+ modelId: primary.modelId,
1743
+ supportedUrls: primary.supportedUrls,
1744
+ doGenerate(options) {
1745
+ return attempt((m) => m.doGenerate(options));
1746
+ },
1747
+ doStream(options) {
1748
+ return attempt((m) => m.doStream(options));
1749
+ }
1750
+ };
1751
+ }
1752
+ //#endregion
1753
+ //#region src/resumable-stream.ts
1754
+ function concat(a, b) {
1755
+ const out = new Uint8Array(new ArrayBuffer(a.length + b.length));
1756
+ out.set(a, 0);
1757
+ out.set(b, a.length);
1758
+ return out;
1759
+ }
1760
+ /** Index just past the last `\n\n` in `buf`, or -1 if there is no complete event. */
1761
+ function lastEventBoundary(buf) {
1762
+ for (let i = buf.length - 2; i >= 0; i--) if (buf[i] === 10 && buf[i + 1] === 10) return i + 2;
1763
+ return -1;
1764
+ }
1765
+ /** Count of `\n\n` terminators (= complete SSE events) in `buf`. */
1766
+ function countEvents(buf) {
1767
+ let n = 0;
1768
+ for (let i = 0; i + 1 < buf.length; i++) if (buf[i] === 10 && buf[i + 1] === 10) {
1769
+ n++;
1770
+ i++;
1771
+ }
1772
+ return n;
1773
+ }
1774
+ function resumeUrl(gateway, runId, from) {
1775
+ return `https://workers-binding.ai/ai-gateway/gateways/${gateway}/run/${runId}/resume?from=${from}`;
1776
+ }
1777
+ function createResumableStream(options) {
1778
+ const { binding, gateway, runId } = options;
1779
+ const maxReconnects = options.maxReconnects ?? 5;
1780
+ const onExpired = options.onResumeExpired ?? "error";
1781
+ let emittedEvents = options.fromEvent ?? 0;
1782
+ let pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
1783
+ let reconnects = 0;
1784
+ async function fetchResume(controller) {
1785
+ let res;
1786
+ try {
1787
+ res = await binding.fetch(resumeUrl(gateway, runId, emittedEvents), { method: "GET" });
1788
+ } catch (fetchErr) {
1789
+ controller.error(new GatewayDelegateError("dispatch", `Resume request threw at event ${emittedEvents}.`, fetchErr));
1790
+ return null;
1791
+ }
1792
+ if (res.status === 404) {
1793
+ if (onExpired === "accept-partial") {
1794
+ controller.close();
1795
+ return null;
1796
+ }
1797
+ controller.error(new GatewayDelegateError("resume-expired", `Resume buffer expired (404) at event ${emittedEvents}. The gateway buffer TTL (~5.5 min) elapsed; fall back to continuation or regeneration.`));
1798
+ return null;
1799
+ }
1800
+ if (!res.ok || !res.body) {
1801
+ controller.error(new GatewayDelegateError("dispatch", `Resume failed (${res.status}) at event ${emittedEvents}.`));
1802
+ return null;
1803
+ }
1804
+ return res.body;
1805
+ }
1806
+ return new ReadableStream({ async start(controller) {
1807
+ let current;
1808
+ if (options.initial) current = options.initial;
1809
+ else {
1810
+ const body = await fetchResume(controller);
1811
+ if (!body) return;
1812
+ current = body;
1813
+ }
1814
+ for (;;) {
1815
+ const reader = current.getReader();
1816
+ try {
1817
+ for (;;) {
1818
+ const { done, value } = await reader.read();
1819
+ if (done) {
1820
+ if (pending.length > 0) {
1821
+ controller.enqueue(pending);
1822
+ pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
1823
+ }
1824
+ controller.close();
1825
+ return;
1826
+ }
1827
+ if (!value || value.length === 0) continue;
1828
+ pending = concat(pending, value);
1829
+ const boundary = lastEventBoundary(pending);
1830
+ if (boundary > 0) {
1831
+ const complete = pending.slice(0, boundary);
1832
+ controller.enqueue(complete);
1833
+ emittedEvents += countEvents(complete);
1834
+ options.onProgress?.(emittedEvents);
1835
+ pending = pending.slice(boundary);
1836
+ }
1837
+ }
1838
+ } catch (err) {
1839
+ try {
1840
+ reader.releaseLock();
1841
+ } catch {}
1842
+ if (reconnects >= maxReconnects) {
1843
+ controller.error(new GatewayDelegateError("resume-expired", `Exceeded ${maxReconnects} reconnect attempts at event ${emittedEvents}.`, err));
1844
+ return;
1845
+ }
1846
+ pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
1847
+ reconnects++;
1848
+ options.onReconnect?.(emittedEvents, reconnects);
1849
+ const body = await fetchResume(controller);
1850
+ if (!body) return;
1851
+ current = body;
1852
+ }
1853
+ }
1854
+ } });
1855
+ }
1856
+ //#endregion
1857
+ //#region src/gateway-delegate.ts
1858
+ /**
1859
+ * Parse a `vendor/model` slug. The first segment is the resolver key (which
1860
+ * registry entry handles it); the rest is the provider-native model id. Routing
1861
+ * providers keep multi-segment model ids, e.g. `openrouter/anthropic/claude`.
1862
+ */
1863
+ function parseSlug(slug) {
1864
+ const slash = slug.indexOf("/");
1865
+ if (slash === -1) throw new GatewayDelegateError("config", `Model slug "${slug}" has no resolver key. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
1866
+ const resolverKey = slug.slice(0, slash);
1867
+ const modelId = slug.slice(slash + 1);
1868
+ if (!resolverKey || !modelId) throw new GatewayDelegateError("config", `Model slug "${slug}" is malformed. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
1869
+ return {
1870
+ resolverKey,
1871
+ modelId
1872
+ };
1873
+ }
1874
+ /**
1875
+ * Resolve a slug to its registry entry, raising a helpful error for unknown or
1876
+ * bring-your-own-provider-only providers.
1877
+ */
1878
+ function resolveProvider(slug, parsed) {
1879
+ const info = findProviderBySlug(parsed.resolverKey);
1880
+ if (!info) throw new GatewayDelegateError("config", `Unknown gateway provider "${parsed.resolverKey}" (from slug "${slug}"). See the AI Gateway provider directory for valid slugs, or use createGatewayProvider to bring your own @ai-sdk provider.`);
1881
+ if (!info.wireFormat) throw new GatewayDelegateError("config", `Provider "${parsed.resolverKey}" is not chat/completions-shaped and has no built-in parser. Reach it with createGatewayProvider (bring your own @ai-sdk provider).`);
1882
+ return info;
1883
+ }
1884
+ /**
1885
+ * Resolve the transport from the requested options. Gateway-only features (server
1886
+ * fallback, caching) force the gateway path and disable resume — with a loud
1887
+ * warning if resume was merely defaulted, or a thrown error if it was explicitly
1888
+ * requested.
1889
+ */
1890
+ function selectTransport(opts, resumeExplicitlyTrue, runCatalog = true, gatewayAvailable = true) {
1891
+ const warnings = [];
1892
+ const wantsServerFallback = opts.fallback?.mode === "server";
1893
+ const wantsCaching = opts.cacheTtl !== void 0 || opts.skipCache === true;
1894
+ const gatewayOnly = wantsServerFallback || wantsCaching;
1895
+ const feature = wantsServerFallback ? "fallback.mode:\"server\"" : "caching (cacheTtl/skipCache)";
1896
+ if (runCatalog && !gatewayAvailable && (opts.transport === "gateway" || gatewayOnly)) throw new GatewayDelegateError("config", `${opts.transport === "gateway" ? "transport:\"gateway\"" : feature} is unavailable: this provider is on the unified run catalog but is not a native gateway provider, so it has no gateway path (no caching, server-side fallback, or transport:"gateway"). Use the default run path, or fallback.mode:"client".`);
1897
+ if (!runCatalog) {
1898
+ if (opts.transport === "run") throw new GatewayDelegateError("config", "transport:\"run\" is unavailable: this provider is not on the unified-billing run catalog, so it can only be reached through the gateway path (BYOK).");
1899
+ if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", "resume:true is unavailable: this provider is not on the resumable run catalog (cf-aig-run-id requires the unified-billing run path).");
1900
+ return {
1901
+ transport: "gateway",
1902
+ resumeEnabled: false,
1903
+ warnings
1904
+ };
1905
+ }
1906
+ if (opts.transport === "run" && gatewayOnly) throw new GatewayDelegateError("config", `transport:"run" cannot satisfy ${feature}: those features are only available on the gateway path. Use the gateway transport, or fallback.mode:"client".`);
1907
+ if (opts.transport === "gateway" && resumeExplicitlyTrue) throw new GatewayDelegateError("config", "transport:\"gateway\" cannot provide resume — cf-aig-run-id is only on the run path.");
1908
+ if (gatewayOnly) {
1909
+ if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", `resume:true conflicts with ${feature}: resume (cf-aig-run-id) is only on the run path, which does not support ${wantsServerFallback ? "server-side fallback" : "caching"}. Use fallback.mode:"client" to keep resume, or drop resume.`);
1910
+ warnings.push(`[workers-ai-provider] resume disabled: ${feature} requires the gateway path, which does not surface cf-aig-run-id. Use fallback.mode:"client" to keep resumable streaming.`);
1911
+ return {
1912
+ transport: "gateway",
1913
+ resumeEnabled: false,
1914
+ warnings
1915
+ };
1916
+ }
1917
+ const transport = opts.transport ?? "run";
1918
+ return {
1919
+ transport,
1920
+ resumeEnabled: transport === "run" && opts.resume !== false,
1921
+ warnings
1922
+ };
1923
+ }
1924
+ var GatewayDelegateError = class extends Error {
1925
+ constructor(kind, message, cause) {
1926
+ super(message);
1927
+ _defineProperty(this, "kind", void 0);
1928
+ _defineProperty(this, "cause", void 0);
1929
+ this.name = "GatewayDelegateError";
1930
+ this.kind = kind;
1931
+ this.cause = cause;
1932
+ }
1933
+ };
1934
+ const STRIP_HEADERS_BASE = new Set(["content-length", "host"]);
1935
+ function asText(body) {
1936
+ if (typeof body === "string") return body;
1937
+ if (body instanceof Uint8Array) return new TextDecoder().decode(body);
1938
+ if (body instanceof ArrayBuffer) return new TextDecoder().decode(body);
1939
+ return "{}";
1940
+ }
1941
+ function headersToObject(h) {
1942
+ const out = {};
1943
+ if (!h) return out;
1944
+ if (h instanceof Headers) for (const [k, v] of h) out[k] = v;
1945
+ else if (Array.isArray(h)) for (const [k, v] of h) out[k] = v;
1946
+ else Object.assign(out, h);
1947
+ return out;
1948
+ }
1949
+ function normalizeGateway(gateway) {
1950
+ if (!gateway) throw new GatewayDelegateError("config", "A gateway is required for the delegate (resume needs a gateway). Pass `gateway: \"<gateway-id>\"` to createGatewayDelegate or per call.");
1951
+ if (typeof gateway === "string") return {
1952
+ id: gateway,
1953
+ options: { id: gateway }
1954
+ };
1955
+ return {
1956
+ id: gateway.id,
1957
+ options: gateway
1958
+ };
1959
+ }
1960
+ /**
1961
+ * Create a gateway delegate. Returns a function that builds an AI SDK model for a
1962
+ * `"<provider>/<model>"` slug, dispatched through AI Gateway on the transport the
1963
+ * requested options imply.
1964
+ */
1965
+ function createGatewayDelegate(config) {
1966
+ if (!config?.binding) throw new GatewayDelegateError("config", "createGatewayDelegate requires a `binding` (e.g. { binding: env.AI }).");
1967
+ if (!config.providers?.length) throw new GatewayDelegateError("config", "createGatewayDelegate requires at least one provider plugin, e.g. `providers: [openai]` from \"workers-ai-provider/openai\".");
1968
+ const plugins = /* @__PURE__ */ new Map();
1969
+ for (const p of config.providers) plugins.set(p.wireFormat, p);
1970
+ const defaultResume = config.resume ?? true;
1971
+ const buildOne = (slug, options) => {
1972
+ const parsed = parseSlug(slug);
1973
+ const info = resolveProvider(slug, parsed);
1974
+ const resumeExplicitlyTrue = options.resume === true;
1975
+ const effectiveOptions = {
1976
+ ...options,
1977
+ resume: options.resume ?? defaultResume,
1978
+ onResumeExpired: options.onResumeExpired ?? config.onResumeExpired
1979
+ };
1980
+ const selection = selectTransport(effectiveOptions, resumeExplicitlyTrue, info.runCatalog, info.gatewayPath !== false);
1981
+ for (const w of selection.warnings) console.warn(w);
1982
+ const wire = selection.transport === "run" ? info.runWireFormat ?? "openai" : info.wireFormat;
1983
+ const plugin = plugins.get(wire);
1984
+ if (!plugin) throw new GatewayDelegateError("config", selection.transport === "run" ? `The run path for "${parsed.resolverKey}" (from slug "${slug}") returns "${wire}"-wire responses, so it needs the "${wire}" plugin. Install + pass it from "workers-ai-provider/${wire}". Registered: ${[...plugins.keys()].join(", ") || "<none>"}.` : `No provider plugin for wire format "${wire}" (needed by "${parsed.resolverKey}" on the gateway path from slug "${slug}"). Registered: ${[...plugins.keys()].join(", ") || "<none>"}. Install + pass the matching plugin from "workers-ai-provider/${wire}".`);
1985
+ const { id: gatewayId, options: gatewayOptions } = normalizeGateway(options.gateway ?? config.gateway);
1986
+ const fetchImpl = selection.transport === "run" ? makeRunFetch(config.binding, `${info.resolverKey}/${parsed.modelId}`, gatewayOptions, effectiveOptions, selection, options) : makeGatewayFetch(config.binding, info, gatewayId, gatewayOptions, effectiveOptions, selection, options);
1987
+ return {
1988
+ model: plugin.create({
1989
+ modelId: parsed.modelId,
1990
+ fetch: fetchImpl,
1991
+ ...selection.transport === "gateway" && info.baseURL ? { baseURL: info.baseURL } : {}
1992
+ }),
1993
+ transport: selection.transport
1994
+ };
1995
+ };
1996
+ return (slug, options = {}) => {
1997
+ if (options.fallback?.mode === "client") {
1998
+ const { fallback, ...rest } = options;
1999
+ return createClientFallbackModel([slug, ...fallback.models].map((s) => {
2000
+ const { model, transport } = buildOne(s, rest);
2001
+ return {
2002
+ slug: s,
2003
+ model,
2004
+ transport
2005
+ };
2006
+ }));
2007
+ }
2008
+ return buildOne(slug, options).model;
2009
+ };
2010
+ }
2011
+ function fireDispatch(resp, selection, options) {
2012
+ if (!options.onDispatch) return;
2013
+ options.onDispatch({
2014
+ transport: selection.transport,
2015
+ resumeEnabled: selection.resumeEnabled,
2016
+ warnings: selection.warnings,
2017
+ status: resp.status,
2018
+ runId: resp.headers.get("cf-aig-run-id"),
2019
+ cfStep: resp.headers.get("cf-aig-step"),
2020
+ cacheStatus: resp.headers.get("cf-aig-cache-status"),
2021
+ logId: resp.headers.get("cf-aig-log-id")
2022
+ });
2023
+ }
2024
+ /** Merge call-level metadata over gateway-option metadata (call wins). */
2025
+ function mergeMetadata(base, override) {
2026
+ if (!base && !override) return void 0;
2027
+ return {
2028
+ ...base,
2029
+ ...override
2030
+ };
2031
+ }
2032
+ /** JSON-encode metadata for the `cf-aig-metadata` header (bigint → string). */
2033
+ function serializeMetadata(metadata) {
2034
+ return JSON.stringify(metadata, (_k, v) => typeof v === "bigint" ? v.toString() : v);
2035
+ }
2036
+ function makeRunFetch(binding, slug, gatewayOptions, opts, selection, callOptions) {
2037
+ return (async (_input, init) => {
2038
+ const body = JSON.parse(asText(init?.body));
2039
+ delete body.model;
2040
+ const mergedGateway = { ...gatewayOptions };
2041
+ const mergedMeta = mergeMetadata(gatewayOptions.metadata, opts.metadata);
2042
+ if (mergedMeta) mergedGateway.metadata = mergedMeta;
2043
+ if (opts.collectLog !== void 0) mergedGateway.collectLog = opts.collectLog;
2044
+ const runOptions = {
2045
+ gateway: mergedGateway,
2046
+ returnRawResponse: true,
2047
+ ...opts.extraHeaders ? { extraHeaders: opts.extraHeaders } : {},
2048
+ ...init?.signal ? { signal: init.signal } : {}
2049
+ };
2050
+ const resp = await binding.run(slug, body, runOptions);
2051
+ fireDispatch(resp, selection, callOptions);
2052
+ const runId = resp.headers.get("cf-aig-run-id");
2053
+ if (selection.resumeEnabled && runId && resp.body) {
2054
+ const resumable = createResumableStream({
2055
+ binding,
2056
+ gateway: gatewayOptions.id,
2057
+ runId,
2058
+ initial: resp.body,
2059
+ onResumeExpired: opts.onResumeExpired,
2060
+ ...opts.onProgress ? { onProgress: opts.onProgress } : {}
2061
+ });
2062
+ return new Response(resumable, {
2063
+ status: resp.status,
2064
+ headers: resp.headers
2065
+ });
2066
+ }
2067
+ return resp;
2068
+ });
2069
+ }
2070
+ function makeGatewayFetch(binding, info, gatewayId, gatewayOptions, opts, selection, callOptions) {
2071
+ const strip = new Set(STRIP_HEADERS_BASE);
2072
+ if (!opts.byok) for (const h of info.authHeaders) strip.add(h.toLowerCase());
2073
+ return (async (input, init) => {
2074
+ const rawUrl = typeof input === "string" ? input : input.toString();
2075
+ const endpoint = info.transformEndpoint ? info.transformEndpoint(rawUrl) : new URL(rawUrl).pathname.replace(/^\//, "") + (new URL(rawUrl).search || "");
2076
+ const body = JSON.parse(asText(init?.body));
2077
+ const headers = {};
2078
+ for (const [k, v] of Object.entries(headersToObject(init?.headers))) if (!strip.has(k.toLowerCase())) headers[k] = v;
2079
+ if (opts.extraHeaders) Object.assign(headers, opts.extraHeaders);
2080
+ if (opts.cacheTtl !== void 0) headers["cf-aig-cache-ttl"] = String(opts.cacheTtl);
2081
+ if (opts.skipCache) headers["cf-aig-skip-cache"] = "true";
2082
+ const metadata = mergeMetadata(gatewayOptions.metadata, opts.metadata);
2083
+ if (metadata) headers["cf-aig-metadata"] = serializeMetadata(metadata);
2084
+ if (opts.collectLog !== void 0) headers["cf-aig-collect-log"] = String(opts.collectLog);
2085
+ const primary = {
2086
+ provider: info.gatewayProviderId,
2087
+ endpoint,
2088
+ headers,
2089
+ query: body
2090
+ };
2091
+ const entries = [primary];
2092
+ if (opts.fallback?.mode === "server") for (const fb of opts.fallback.models) {
2093
+ const fbParsed = parseSlug(fb);
2094
+ const fbInfo = resolveProvider(fb, fbParsed);
2095
+ if (fbInfo.gatewayProviderId !== info.gatewayProviderId) throw new GatewayDelegateError("config", `Cross-vendor server-side fallback (${info.gatewayProviderId} → ${fbInfo.gatewayProviderId}) is not supported yet. Use fallback.mode:"client", or same-vendor fallback models.`);
2096
+ entries.push({
2097
+ ...primary,
2098
+ query: {
2099
+ ...body,
2100
+ model: fbParsed.modelId
2101
+ }
2102
+ });
2103
+ }
2104
+ const gw = binding.gateway(gatewayId);
2105
+ const runOptions = {};
2106
+ if (init?.signal) runOptions.signal = init.signal;
2107
+ const resp = await gw.run(entries, runOptions);
2108
+ fireDispatch(resp, selection, callOptions);
2109
+ return resp;
2110
+ });
2111
+ }
2112
+ //#endregion
1508
2113
  //#region src/index.ts
1509
2114
  /**
2115
+ * The account-wide AI Gateway used for catalog routing when no `gateway` is
2116
+ * configured. Every Cloudflare account has a `"default"` gateway.
2117
+ */
2118
+ const DEFAULT_GATEWAY_ID = "default";
2119
+ /**
1510
2120
  * Create a Workers AI provider instance.
1511
2121
  */
1512
2122
  function createWorkersAI(options) {
@@ -1528,6 +2138,26 @@ function createWorkersAI(options) {
1528
2138
  provider: "workersai.chat",
1529
2139
  isBinding
1530
2140
  });
2141
+ let delegate;
2142
+ const getDelegate = (slug) => {
2143
+ if (!options.providers?.length) throw new Error(`"${slug}" looks like a third-party AI Gateway catalog model, but this Workers AI provider was not configured to route them. Pass provider plugins, e.g.:
2144
+ import { openai } from "workers-ai-provider/openai";
2145
+ createWorkersAI({ binding: env.AI, providers: [openai] });
2146
+ A gateway defaults to "default" but can be set via \`gateway\`. Otherwise use a Workers AI model id (e.g. "@cf/meta/llama-3.1-8b-instruct").`);
2147
+ delegate ?? (delegate = createGatewayDelegate({
2148
+ binding,
2149
+ gateway: options.gateway ?? { id: DEFAULT_GATEWAY_ID },
2150
+ providers: options.providers,
2151
+ resume: options.resume,
2152
+ onResumeExpired: options.onResumeExpired
2153
+ }));
2154
+ return delegate;
2155
+ };
2156
+ const isGatewaySlug = (id) => typeof id === "string" && !id.startsWith("@") && id.includes("/");
2157
+ const buildChat = (modelId, settings) => {
2158
+ if (isGatewaySlug(modelId)) return getDelegate(modelId)(modelId, settings);
2159
+ return createChatModel(modelId, settings);
2160
+ };
1531
2161
  const createImageModel = (modelId, settings = {}) => new WorkersAIImageModel(modelId, settings, {
1532
2162
  binding,
1533
2163
  gateway: options.gateway,
@@ -1560,9 +2190,9 @@ function createWorkersAI(options) {
1560
2190
  });
1561
2191
  const provider = (modelId, settings) => {
1562
2192
  if (new.target) throw new Error("The WorkersAI model function cannot be called with the new keyword.");
1563
- return createChatModel(modelId, settings);
2193
+ return buildChat(modelId, settings);
1564
2194
  };
1565
- provider.chat = createChatModel;
2195
+ provider.chat = buildChat;
1566
2196
  provider.embedding = createEmbeddingModel;
1567
2197
  provider.textEmbedding = createEmbeddingModel;
1568
2198
  provider.textEmbeddingModel = createEmbeddingModel;
@@ -1608,6 +2238,6 @@ function createAutoRAG(options) {
1608
2238
  return createAISearch(options, "autorag.chat");
1609
2239
  }
1610
2240
  //#endregion
1611
- export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createWorkersAI };
2241
+ export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, GATEWAY_PROVIDERS, GatewayDelegateError, WorkersAIFallbackError, WorkersAIGatewayError, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createClientFallbackModel, createGatewayFetch, createGatewayProvider, createResumableStream, createWorkersAI, detectProviderByUrl, findProviderBySlug, parseSlug, selectTransport, wireableProviders };
1612
2242
 
1613
2243
  //# sourceMappingURL=index.mjs.map