npm - workers-ai-provider - Versions diffs - 3.1.14 → 3.2.0 - Mend

workers-ai-provider 3.1.14 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +183 -31
package/dist/anthropic.d.mts +14 -0
package/dist/anthropic.mjs +21 -0
package/dist/anthropic.mjs.map +1 -0
package/dist/gateway-delegate-BfaUTwDZ.d.mts +385 -0
package/dist/gateway-provider-1USFWm7c.mjs +583 -0
package/dist/gateway-provider-1USFWm7c.mjs.map +1 -0
package/dist/gateway-provider.d.mts +80 -0
package/dist/gateway-provider.mjs +2 -0
package/dist/google.d.mts +14 -0
package/dist/google.mjs +21 -0
package/dist/google.mjs.map +1 -0
package/dist/index.d.mts +64 -7
package/dist/index.mjs +703 -73
package/dist/index.mjs.map +1 -1
package/dist/openai.d.mts +20 -0
package/dist/openai.mjs +27 -0
package/dist/openai.mjs.map +1 -0
package/package.json +47 -6
package/src/anthropic.ts +17 -0
package/src/client-fallback.ts +70 -0
package/src/convert-to-workersai-chat-messages.ts +30 -5
package/src/errors.ts +216 -0
package/src/gateway-delegate.ts +696 -0
package/src/gateway-provider.ts +167 -0
package/src/gateway-providers.ts +457 -0
package/src/google.ts +19 -0
package/src/index.ts +180 -9
package/src/openai.ts +25 -0
package/src/resumable-stream.ts +223 -0
package/src/streaming.ts +102 -30
package/src/utils.ts +187 -4
package/src/workersai-chat-language-model.ts +87 -26
package/src/workersai-chat-settings.ts +1 -1
package/src/workersai-models.ts +11 -3

package/dist/index.mjs CHANGED Viewed

@@ -1,5 +1,6 @@
+import { a as findProviderBySlug, c as WorkersAIGatewayError, i as detectProviderByUrl, l as _defineProperty, n as createGatewayProvider, o as wireableProviders, r as GATEWAY_PROVIDERS, s as WorkersAIFallbackError, t as createGatewayFetch } from "./gateway-provider-1USFWm7c.mjs";
+import { TooManyEmbeddingValuesForCallError, UnsupportedFunctionalityError } from "@ai-sdk/provider";
 import { generateId } from "ai";
-import { TooManyEmbeddingValuesForCallError } from "@ai-sdk/provider";
 //#region src/utils.ts
 /**
 * Normalize messages before passing to the Workers AI binding.
@@ -127,6 +128,39 @@ async function createRunBinary(config, model, audioBytes, contentType, signal) {
 	const data = await response.json();
 	return data.result ?? data;
 }
+/**
+* Build the `response_format.json_schema` payload for native Workers AI models.
+*
+* Native Workers AI (`@cf/...`) expects `json_schema` to be a **bare** JSON
+* Schema, NOT OpenAI's `{ name, schema, strict }` envelope. That envelope is
+* only required by partner-model routes (e.g. `openai/...`), which never reach
+* this code — they go through the gateway delegate and the real `@ai-sdk/*`
+* providers, which build the envelope themselves. Wrapping the schema here would
+* break native models, so we must keep the bare shape.
+*
+* The AI SDK's structured-output `name` / `description` (from
+* `Output.object({ schema, name, description })` / `generateObject`) would
+* otherwise be silently dropped on this path. We preserve them as the standard
+* JSON Schema `title` (from `name`) and `description` keywords, which keeps the
+* payload a valid bare schema while still passing the LLM guidance through.
+*
+* Existing schema-level `title` / `description` are never overwritten, empty
+* strings are ignored, and the input schema object is never mutated.
+*
+* See https://github.com/cloudflare/ai/issues/559.
+*/
+function buildJsonSchemaPayload(schema, name, description) {
+	if (typeof schema !== "object" || schema === null || Array.isArray(schema)) return schema;
+	const record = schema;
+	const addTitle = !!name && record.title === void 0;
+	const addDescription = !!description && record.description === void 0;
+	if (!addTitle && !addDescription) return schema;
+	return {
+		...record,
+		...addTitle ? { title: name } : {},
+		...addDescription ? { description } : {}
+	};
+}
 function prepareToolsAndToolChoice(tools, toolChoice) {
 	if (tools == null) return {
 		tool_choice: void 0,
@@ -159,8 +193,11 @@ function prepareToolsAndToolChoice(tools, toolChoice) {
 			tools: mappedTools
 		};
 		case "tool": return {
-			tool_choice: "required",
-			tools: mappedTools.filter((tool) => tool.function.name === toolChoice.toolName)
+			tool_choice: {
+				type: "function",
+				function: { name: toolChoice.toolName }
+			},
+			tools: mappedTools
 		};
 		default: throw new Error(`Unsupported tool choice type: ${type}`);
 	}
@@ -198,6 +235,92 @@ function processToolCalls(output) {
 	return [];
 }
 /**
+* Was a specific tool forced for this request?
+*
+* True for both `tool_choice: "required"` and the named-function form
+* `{ type: "function", function: { name } }`.
+*/
+function isForcedToolChoice(toolChoice) {
+	if (toolChoice === "required") return true;
+	return typeof toolChoice === "object" && toolChoice !== null && toolChoice.type === "function";
+}
+/**
+* Parse tool calls that a model leaked as JSON text instead of structured
+* `tool_calls`. Shared by the non-streaming salvage and the streaming buffer.
+*
+* Only JSON objects whose `name` is one of `knownToolNames` are recovered;
+* everything else (prose, harmony channel/role leaks like `{"name":"analysis"}`,
+* hallucinated names) is ignored to avoid fabricating bogus calls.
+*/
+function parseLeakedToolCalls(text, knownToolNames) {
+	let parsed;
+	try {
+		parsed = JSON.parse(text.trim());
+	} catch {
+		return [];
+	}
+	const candidates = Array.isArray(parsed) ? parsed : [parsed];
+	const salvaged = [];
+	for (const candidate of candidates) {
+		if (typeof candidate !== "object" || candidate === null) continue;
+		const obj = candidate;
+		const name = obj.name;
+		if (typeof name !== "string" || !knownToolNames.has(name)) continue;
+		let args;
+		if ("arguments" in obj) args = obj.arguments;
+		else if ("parameters" in obj) args = obj.parameters;
+		else {
+			const { name: _name, ...rest } = obj;
+			args = rest;
+		}
+		salvaged.push({
+			input: typeof args === "string" ? args : JSON.stringify(args ?? {}),
+			toolCallId: createAISDKToolCallId(void 0),
+			type: "tool-call",
+			toolName: name
+		});
+	}
+	return salvaged;
+}
+/** Collect the requested tool names from mapped tools. */
+function getToolNames(tools) {
+	return new Set((tools ?? []).map((tool) => tool.function?.name).filter((name) => typeof name === "string"));
+}
+/**
+* Salvage a tool call that a model leaked into text content instead of the
+* structured `tool_calls` field.
+*
+* Workers AI's gpt-oss models (harmony format) sometimes emit a forced tool
+* call as raw JSON in `message.content` with an empty `tool_calls` array and
+* `finish_reason: "stop"` — typically when the forced tool is a poor fit for
+* the conversation. The content looks like one of:
+*
+*   {"name":"read_skill_resource","path":"feedback.txt"}        (flat args)
+*   {"name":"calc","arguments":{"a":1}}                          (wrapped args)
+*   [{"name":"calc","parameters":{"a":1}}]                       (array form)
+*
+* This reinterprets that text as a structured tool call. It is intentionally
+* narrow to avoid false positives:
+*   - only runs when a tool was *forced* (required / named-function), so a
+*     tool call was explicitly demanded by the caller;
+*   - only runs when there are no real structured tool calls to override;
+*   - only matches JSON objects whose `name` is one of the requested tools.
+*
+* Returns the salvaged tool calls, or `null` when nothing was salvaged.
+*
+* See https://github.com/cloudflare/ai/issues/560.
+*/
+function salvageToolCallsFromText(output, context) {
+	if (!isForcedToolChoice(context.toolChoice)) return null;
+	if (processToolCalls(output).length > 0) return null;
+	const knownToolNames = getToolNames(context.tools);
+	if (knownToolNames.size === 0) return null;
+	const text = processText(output);
+	if (!text) return null;
+	const salvaged = parseLeakedToolCalls(text, knownToolNames);
+	return salvaged.length > 0 ? salvaged : null;
+}
+/**
 * Extract text from a Workers AI response, handling multiple response formats:
 * - OpenAI format: { choices: [{ message: { content: "..." } }] }
 * - Native format: { response: "..." }
@@ -241,6 +364,17 @@ function toUint8Array$2(data) {
 	if (data instanceof URL) throw new Error("URL image sources are not supported by Workers AI. Provide image data as a Uint8Array or base64 string instead.");
 	return null;
 }
+function assertImageMediaType(mediaType) {
+	if (!mediaType) throw new UnsupportedFunctionalityError({
+		functionality: "file-part-without-media-type",
+		message: "Workers AI chat only supports image file parts with an image/* mediaType. Received a file part without a mediaType."
+	});
+	if (!mediaType.toLowerCase().startsWith("image/")) throw new UnsupportedFunctionalityError({
+		functionality: "non-image-file-part",
+		message: `Workers AI chat only supports image file parts with an image/* mediaType. Received mediaType "${mediaType}".`
+	});
+	return mediaType;
+}
 function uint8ArrayToBase64$1(bytes) {
 	let binary = "";
 	const chunkSize = 8192;
@@ -267,10 +401,11 @@ function convertToWorkersAIChatMessages(prompt) {
 					textParts.push(part.text);
 					break;
 				case "file": {
+					const mediaType = assertImageMediaType(part.mediaType);
 					const imageBytes = toUint8Array$2(part.data);
 					if (imageBytes) imageParts.push({
 						image: imageBytes,
-						mediaType: part.mediaType
+						mediaType
 					});
 					break;
 				}
@@ -283,10 +418,9 @@ function convertToWorkersAIChatMessages(prompt) {
 				});
 				for (const img of imageParts) {
 					const base64 = uint8ArrayToBase64$1(img.image);
-					const mediaType = img.mediaType || "image/png";
 					contentArray.push({
 						type: "image_url",
-						image_url: { url: `data:${mediaType};base64,${base64}` }
+						image_url: { url: `data:${img.mediaType};base64,${base64}` }
 					});
 				}
 				messages.push({
@@ -504,9 +638,13 @@ function isNullFinalizationChunk(tc) {
 * 1. Native format:  { response: "chunk", tool_calls: [...] }
 * 2. OpenAI format:  { choices: [{ delta: { content: "chunk" } }] }
 */
-function getMappedStream(response) {
+function getMappedStream(response, salvageContext) {
 	const rawStream = response instanceof ReadableStream ? response : response.body;
 	if (!rawStream) throw new Error("No readable stream available for SSE parsing.");
+	const knownToolNames = getToolNames(salvageContext?.tools);
+	const bufferContentForSalvage = isForcedToolChoice(salvageContext?.toolChoice) && knownToolNames.size > 0;
+	let contentBuffer = "";
+	let anyToolCallStarted = false;
 	let usage = {
 		outputTokens: {
 			total: 0,
@@ -552,7 +690,8 @@ function getMappedStream(response) {
 			const nativeResponse = chunk.response;
 			if (nativeResponse != null && nativeResponse !== "") {
 				const responseText = String(nativeResponse);
-				if (responseText.length > 0) {
+				if (responseText.length > 0) if (bufferContentForSalvage) contentBuffer += responseText;
+				else {
 					if (reasoningId) {
 						controller.enqueue({
 							type: "reasoning-end",
@@ -602,7 +741,8 @@ function getMappedStream(response) {
 					});
 				}
 				const textDelta = delta.content;
-				if (textDelta && textDelta.length > 0) {
+				if (textDelta && textDelta.length > 0) if (bufferContentForSalvage) contentBuffer += textDelta;
+				else {
 					if (reasoningId) {
 						controller.enqueue({
 							type: "reasoning-end",
@@ -645,11 +785,69 @@ function getMappedStream(response) {
 				type: "reasoning-end",
 				id: reasoningId
 			});
+			let salvagedToolCalls = false;
+			if (bufferContentForSalvage && !anyToolCallStarted && contentBuffer.trim()) {
+				const salvaged = parseLeakedToolCalls(contentBuffer, knownToolNames);
+				if (salvaged.length > 0) {
+					for (const call of salvaged) {
+						controller.enqueue({
+							type: "tool-input-start",
+							id: call.toolCallId,
+							toolName: call.toolName
+						});
+						controller.enqueue({
+							type: "tool-input-delta",
+							id: call.toolCallId,
+							delta: call.input
+						});
+						controller.enqueue({
+							type: "tool-input-end",
+							id: call.toolCallId
+						});
+						controller.enqueue(call);
+					}
+					salvagedToolCalls = true;
+					console.warn(`[workers-ai-provider] Recovered ${salvaged.length} forced tool call(s) that the model streamed as text content instead of structured tool calls.`);
+				} else {
+					const id = generateId();
+					controller.enqueue({
+						type: "text-start",
+						id
+					});
+					controller.enqueue({
+						type: "text-delta",
+						id,
+						delta: contentBuffer
+					});
+					controller.enqueue({
+						type: "text-end",
+						id
+					});
+				}
+			} else if (bufferContentForSalvage && contentBuffer.trim()) {
+				const id = generateId();
+				controller.enqueue({
+					type: "text-start",
+					id
+				});
+				controller.enqueue({
+					type: "text-delta",
+					id,
+					delta: contentBuffer
+				});
+				controller.enqueue({
+					type: "text-end",
+					id
+				});
+			}
 			if (textId) controller.enqueue({
 				type: "text-end",
 				id: textId
 			});
-			const effectiveFinishReason = !receivedDone && receivedAnyData && !finishReason ? {
+			const effectiveFinishReason = salvagedToolCalls ? {
+				unified: "tool-calls",
+				raw: "stop"
+			} : !receivedDone && receivedAnyData && !finishReason ? {
 				unified: "error",
 				raw: "stream-truncated"
 			} : finishReason ?? {
@@ -716,6 +914,7 @@ function getMappedStream(response) {
 					args: ""
 				});
 				lastActiveToolIndex = tcIndex;
+				anyToolCallStarted = true;
 				controller.enqueue({
 					type: "tool-input-start",
 					id,
@@ -778,44 +977,6 @@ var SSEDecoder = class extends TransformStream {
 	}
 };
 //#endregion
-//#region \0@oxc-project+runtime@0.122.0/helpers/typeof.js
-function _typeof(o) {
-	"@babel/helpers - typeof";
-	return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function(o) {
-		return typeof o;
-	} : function(o) {
-		return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o;
-	}, _typeof(o);
-}
-//#endregion
-//#region \0@oxc-project+runtime@0.122.0/helpers/toPrimitive.js
-function toPrimitive(t, r) {
-	if ("object" != _typeof(t) || !t) return t;
-	var e = t[Symbol.toPrimitive];
-	if (void 0 !== e) {
-		var i = e.call(t, r || "default");
-		if ("object" != _typeof(i)) return i;
-		throw new TypeError("@@toPrimitive must return a primitive value.");
-	}
-	return ("string" === r ? String : Number)(t);
-}
-//#endregion
-//#region \0@oxc-project+runtime@0.122.0/helpers/toPropertyKey.js
-function toPropertyKey(t) {
-	var i = toPrimitive(t, "string");
-	return "symbol" == _typeof(i) ? i : i + "";
-}
-//#endregion
-//#region \0@oxc-project+runtime@0.122.0/helpers/defineProperty.js
-function _defineProperty(e, r, t) {
-	return (r = toPropertyKey(r)) in e ? Object.defineProperty(e, r, {
-		value: t,
-		enumerable: !0,
-		configurable: !0,
-		writable: !0
-	}) : e[r] = t, e;
-}
-//#endregion
 //#region src/aisearch-chat-language-model.ts
 var AISearchChatLanguageModel = class {
 	constructor(modelId, settings, config) {
@@ -983,18 +1144,21 @@ var WorkersAIChatLanguageModel = class {
 				},
 				warnings
 			};
-			case "json": return {
-				args: {
-					...baseArgs,
-					response_format: {
-						type: "json_schema",
-						json_schema: responseFormat?.type === "json" ? responseFormat.schema : void 0
+			case "json": {
+				const json = responseFormat?.type === "json" ? responseFormat : void 0;
+				return {
+					args: {
+						...baseArgs,
+						response_format: {
+							type: "json_schema",
+							json_schema: buildJsonSchemaPayload(json?.schema, json?.name, json?.description)
+						},
+						tools: void 0,
+						tool_choice: void 0
 					},
-					tools: void 0,
-					tool_choice: void 0
-				},
-				warnings
-			};
+					warnings
+				};
+			}
 			default: throw new Error(`Unsupported type: ${type}`);
 		}
 	}
@@ -1053,6 +1217,38 @@ var WorkersAIChatLanguageModel = class {
 			...passthroughOptions
 		};
 	}
+	/**
+	* Extract reasoning, text, and tool calls from a non-streaming response.
+	*
+	* Shared by `doGenerate` and `doStream`'s graceful-degradation branch (the
+	* path gpt-oss falls through, since it doesn't support `/ai/run/` streaming
+	* and is retried non-streaming). When a forced tool call was leaked into
+	* text content (gpt-oss harmony quirk), it is salvaged into a structured
+	* tool call and the leaked JSON text is suppressed. A warning is appended in
+	* place so callers can observe the reinterpretation.
+	*/
+	extractContent(outputRecord, args, warnings) {
+		const choices = outputRecord.choices;
+		const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
+		const toolCalls = processToolCalls(outputRecord);
+		const salvaged = toolCalls.length === 0 ? salvageToolCallsFromText(outputRecord, {
+			tools: args.tools,
+			toolChoice: args.tool_choice
+		}) : null;
+		if (salvaged) warnings.push({
+			type: "other",
+			message: `Recovered ${salvaged.length} forced tool call(s) that the model emitted as text content instead of structured tool calls (model: ${this.modelId}).`
+		});
+		return {
+			reasoningContent,
+			text: salvaged ? "" : processText(outputRecord) ?? "",
+			toolCalls: salvaged ?? toolCalls,
+			finishReason: salvaged ? {
+				unified: "tool-calls",
+				raw: "stop"
+			} : mapWorkersAIFinishReason(outputRecord)
+		};
+	}
 	async doGenerate(options) {
 		const { args, warnings } = this.getArgs(options);
 		const { messages } = convertToWorkersAIChatMessages(options.prompt);
@@ -1064,10 +1260,9 @@ var WorkersAIChatLanguageModel = class {
 		});
 		if (output instanceof ReadableStream) throw new Error("Unexpected streaming response from non-streaming request. Check that `stream: true` was not passed.");
 		const outputRecord = output;
-		const choices = outputRecord.choices;
-		const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
+		const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
 		return {
-			finishReason: mapWorkersAIFinishReason(outputRecord),
+			finishReason,
 			content: [
 				...reasoningContent ? [{
 					type: "reasoning",
@@ -1075,9 +1270,9 @@ var WorkersAIChatLanguageModel = class {
 				}] : [],
 				{
 					type: "text",
-					text: processText(outputRecord) ?? ""
+					text
 				},
-				...processToolCalls(outputRecord)
+				...toolCalls
 			],
 			usage: mapWorkersAIUsage(output),
 			warnings
@@ -1095,10 +1290,12 @@ var WorkersAIChatLanguageModel = class {
 			...runOptions,
 			signal: options.abortSignal
 		});
-		if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response), warnings) };
+		if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response, {
+			tools: args.tools,
+			toolChoice: args.tool_choice
+		}), warnings) };
 		const outputRecord = response;
-		const choices = outputRecord.choices;
-		const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
+		const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
 		let textId = null;
 		let reasoningId = null;
 		return { stream: new ReadableStream({ start(controller) {
@@ -1122,7 +1319,6 @@ var WorkersAIChatLanguageModel = class {
 					id: reasoningId
 				});
 			}
-			const text = processText(outputRecord);
 			if (text) {
 				textId = generateId();
 				controller.enqueue({
@@ -1139,10 +1335,10 @@ var WorkersAIChatLanguageModel = class {
 					id: textId
 				});
 			}
-			for (const toolCall of processToolCalls(outputRecord)) controller.enqueue(toolCall);
+			for (const toolCall of toolCalls) controller.enqueue(toolCall);
 			controller.enqueue({
 				type: "finish",
-				finishReason: mapWorkersAIFinishReason(outputRecord),
+				finishReason,
 				usage: mapWorkersAIUsage(response)
 			});
 			controller.close();
@@ -1505,8 +1701,422 @@ function documentsToContexts(documents, warnings) {
 */
 var AutoRAGChatLanguageModel = class extends AISearchChatLanguageModel {};
 //#endregion
+//#region src/client-fallback.ts
+/**
+* Wrap a chain of models so a failed *pre-stream* dispatch falls through to the
+* next model, preserving resume on each leg's own transport. If every leg fails,
+* throws a {@link WorkersAIFallbackError} carrying the full attempt tree.
+*
+* Fallback triggers on `doGenerate`/`doStream` rejection (the dispatch never
+* produced a stream). Errors that surface *mid-stream* — after content has
+* already been emitted — are not recoverable here and propagate as-is.
+*/
+function createClientFallbackModel(legs) {
+	if (legs.length === 0) throw new Error("createClientFallbackModel requires at least one model leg.");
+	const primary = legs[0].model;
+	async function attempt(run) {
+		const attempts = [];
+		for (const leg of legs) try {
+			const result = await run(leg.model);
+			attempts.push({
+				model: leg.slug,
+				transport: leg.transport,
+				ok: true
+			});
+			return result;
+		} catch (e) {
+			const err = WorkersAIGatewayError.fromUnknown(e);
+			attempts.push({
+				model: leg.slug,
+				transport: leg.transport,
+				ok: false,
+				status: err.status,
+				error: err
+			});
+		}
+		throw new WorkersAIFallbackError(attempts);
+	}
+	return {
+		specificationVersion: "v3",
+		provider: primary.provider,
+		modelId: primary.modelId,
+		supportedUrls: primary.supportedUrls,
+		doGenerate(options) {
+			return attempt((m) => m.doGenerate(options));
+		},
+		doStream(options) {
+			return attempt((m) => m.doStream(options));
+		}
+	};
+}
+//#endregion
+//#region src/resumable-stream.ts
+function concat(a, b) {
+	const out = new Uint8Array(new ArrayBuffer(a.length + b.length));
+	out.set(a, 0);
+	out.set(b, a.length);
+	return out;
+}
+/** Index just past the last `\n\n` in `buf`, or -1 if there is no complete event. */
+function lastEventBoundary(buf) {
+	for (let i = buf.length - 2; i >= 0; i--) if (buf[i] === 10 && buf[i + 1] === 10) return i + 2;
+	return -1;
+}
+/** Count of `\n\n` terminators (= complete SSE events) in `buf`. */
+function countEvents(buf) {
+	let n = 0;
+	for (let i = 0; i + 1 < buf.length; i++) if (buf[i] === 10 && buf[i + 1] === 10) {
+		n++;
+		i++;
+	}
+	return n;
+}
+function resumeUrl(gateway, runId, from) {
+	return `https://workers-binding.ai/ai-gateway/gateways/${gateway}/run/${runId}/resume?from=${from}`;
+}
+function createResumableStream(options) {
+	const { binding, gateway, runId } = options;
+	const maxReconnects = options.maxReconnects ?? 5;
+	const onExpired = options.onResumeExpired ?? "error";
+	let emittedEvents = options.fromEvent ?? 0;
+	let pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
+	let reconnects = 0;
+	async function fetchResume(controller) {
+		let res;
+		try {
+			res = await binding.fetch(resumeUrl(gateway, runId, emittedEvents), { method: "GET" });
+		} catch (fetchErr) {
+			controller.error(new GatewayDelegateError("dispatch", `Resume request threw at event ${emittedEvents}.`, fetchErr));
+			return null;
+		}
+		if (res.status === 404) {
+			if (onExpired === "accept-partial") {
+				controller.close();
+				return null;
+			}
+			controller.error(new GatewayDelegateError("resume-expired", `Resume buffer expired (404) at event ${emittedEvents}. The gateway buffer TTL (~5.5 min) elapsed; fall back to continuation or regeneration.`));
+			return null;
+		}
+		if (!res.ok || !res.body) {
+			controller.error(new GatewayDelegateError("dispatch", `Resume failed (${res.status}) at event ${emittedEvents}.`));
+			return null;
+		}
+		return res.body;
+	}
+	return new ReadableStream({ async start(controller) {
+		let current;
+		if (options.initial) current = options.initial;
+		else {
+			const body = await fetchResume(controller);
+			if (!body) return;
+			current = body;
+		}
+		for (;;) {
+			const reader = current.getReader();
+			try {
+				for (;;) {
+					const { done, value } = await reader.read();
+					if (done) {
+						if (pending.length > 0) {
+							controller.enqueue(pending);
+							pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
+						}
+						controller.close();
+						return;
+					}
+					if (!value || value.length === 0) continue;
+					pending = concat(pending, value);
+					const boundary = lastEventBoundary(pending);
+					if (boundary > 0) {
+						const complete = pending.slice(0, boundary);
+						controller.enqueue(complete);
+						emittedEvents += countEvents(complete);
+						options.onProgress?.(emittedEvents);
+						pending = pending.slice(boundary);
+					}
+				}
+			} catch (err) {
+				try {
+					reader.releaseLock();
+				} catch {}
+				if (reconnects >= maxReconnects) {
+					controller.error(new GatewayDelegateError("resume-expired", `Exceeded ${maxReconnects} reconnect attempts at event ${emittedEvents}.`, err));
+					return;
+				}
+				pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
+				reconnects++;
+				options.onReconnect?.(emittedEvents, reconnects);
+				const body = await fetchResume(controller);
+				if (!body) return;
+				current = body;
+			}
+		}
+	} });
+}
+//#endregion
+//#region src/gateway-delegate.ts
+/**
+* Parse a `vendor/model` slug. The first segment is the resolver key (which
+* registry entry handles it); the rest is the provider-native model id. Routing
+* providers keep multi-segment model ids, e.g. `openrouter/anthropic/claude`.
+*/
+function parseSlug(slug) {
+	const slash = slug.indexOf("/");
+	if (slash === -1) throw new GatewayDelegateError("config", `Model slug "${slug}" has no resolver key. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
+	const resolverKey = slug.slice(0, slash);
+	const modelId = slug.slice(slash + 1);
+	if (!resolverKey || !modelId) throw new GatewayDelegateError("config", `Model slug "${slug}" is malformed. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
+	return {
+		resolverKey,
+		modelId
+	};
+}
+/**
+* Resolve a slug to its registry entry, raising a helpful error for unknown or
+* bring-your-own-provider-only providers.
+*/
+function resolveProvider(slug, parsed) {
+	const info = findProviderBySlug(parsed.resolverKey);
+	if (!info) throw new GatewayDelegateError("config", `Unknown gateway provider "${parsed.resolverKey}" (from slug "${slug}"). See the AI Gateway provider directory for valid slugs, or use createGatewayProvider to bring your own @ai-sdk provider.`);
+	if (!info.wireFormat) throw new GatewayDelegateError("config", `Provider "${parsed.resolverKey}" is not chat/completions-shaped and has no built-in parser. Reach it with createGatewayProvider (bring your own @ai-sdk provider).`);
+	return info;
+}
+/**
+* Resolve the transport from the requested options. Gateway-only features (server
+* fallback, caching) force the gateway path and disable resume — with a loud
+* warning if resume was merely defaulted, or a thrown error if it was explicitly
+* requested.
+*/
+function selectTransport(opts, resumeExplicitlyTrue, runCatalog = true, gatewayAvailable = true) {
+	const warnings = [];
+	const wantsServerFallback = opts.fallback?.mode === "server";
+	const wantsCaching = opts.cacheTtl !== void 0 || opts.skipCache === true;
+	const gatewayOnly = wantsServerFallback || wantsCaching;
+	const feature = wantsServerFallback ? "fallback.mode:\"server\"" : "caching (cacheTtl/skipCache)";
+	if (runCatalog && !gatewayAvailable && (opts.transport === "gateway" || gatewayOnly)) throw new GatewayDelegateError("config", `${opts.transport === "gateway" ? "transport:\"gateway\"" : feature} is unavailable: this provider is on the unified run catalog but is not a native gateway provider, so it has no gateway path (no caching, server-side fallback, or transport:"gateway"). Use the default run path, or fallback.mode:"client".`);
+	if (!runCatalog) {
+		if (opts.transport === "run") throw new GatewayDelegateError("config", "transport:\"run\" is unavailable: this provider is not on the unified-billing run catalog, so it can only be reached through the gateway path (BYOK).");
+		if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", "resume:true is unavailable: this provider is not on the resumable run catalog (cf-aig-run-id requires the unified-billing run path).");
+		return {
+			transport: "gateway",
+			resumeEnabled: false,
+			warnings
+		};
+	}
+	if (opts.transport === "run" && gatewayOnly) throw new GatewayDelegateError("config", `transport:"run" cannot satisfy ${feature}: those features are only available on the gateway path. Use the gateway transport, or fallback.mode:"client".`);
+	if (opts.transport === "gateway" && resumeExplicitlyTrue) throw new GatewayDelegateError("config", "transport:\"gateway\" cannot provide resume — cf-aig-run-id is only on the run path.");
+	if (gatewayOnly) {
+		if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", `resume:true conflicts with ${feature}: resume (cf-aig-run-id) is only on the run path, which does not support ${wantsServerFallback ? "server-side fallback" : "caching"}. Use fallback.mode:"client" to keep resume, or drop resume.`);
+		warnings.push(`[workers-ai-provider] resume disabled: ${feature} requires the gateway path, which does not surface cf-aig-run-id. Use fallback.mode:"client" to keep resumable streaming.`);
+		return {
+			transport: "gateway",
+			resumeEnabled: false,
+			warnings
+		};
+	}
+	const transport = opts.transport ?? "run";
+	return {
+		transport,
+		resumeEnabled: transport === "run" && opts.resume !== false,
+		warnings
+	};
+}
+var GatewayDelegateError = class extends Error {
+	constructor(kind, message, cause) {
+		super(message);
+		_defineProperty(this, "kind", void 0);
+		_defineProperty(this, "cause", void 0);
+		this.name = "GatewayDelegateError";
+		this.kind = kind;
+		this.cause = cause;
+	}
+};
+const STRIP_HEADERS_BASE = new Set(["content-length", "host"]);
+function asText(body) {
+	if (typeof body === "string") return body;
+	if (body instanceof Uint8Array) return new TextDecoder().decode(body);
+	if (body instanceof ArrayBuffer) return new TextDecoder().decode(body);
+	return "{}";
+}
+function headersToObject(h) {
+	const out = {};
+	if (!h) return out;
+	if (h instanceof Headers) for (const [k, v] of h) out[k] = v;
+	else if (Array.isArray(h)) for (const [k, v] of h) out[k] = v;
+	else Object.assign(out, h);
+	return out;
+}
+function normalizeGateway(gateway) {
+	if (!gateway) throw new GatewayDelegateError("config", "A gateway is required for the delegate (resume needs a gateway). Pass `gateway: \"<gateway-id>\"` to createGatewayDelegate or per call.");
+	if (typeof gateway === "string") return {
+		id: gateway,
+		options: { id: gateway }
+	};
+	return {
+		id: gateway.id,
+		options: gateway
+	};
+}
+/**
+* Create a gateway delegate. Returns a function that builds an AI SDK model for a
+* `"<provider>/<model>"` slug, dispatched through AI Gateway on the transport the
+* requested options imply.
+*/
+function createGatewayDelegate(config) {
+	if (!config?.binding) throw new GatewayDelegateError("config", "createGatewayDelegate requires a `binding` (e.g. { binding: env.AI }).");
+	if (!config.providers?.length) throw new GatewayDelegateError("config", "createGatewayDelegate requires at least one provider plugin, e.g. `providers: [openai]` from \"workers-ai-provider/openai\".");
+	const plugins = /* @__PURE__ */ new Map();
+	for (const p of config.providers) plugins.set(p.wireFormat, p);
+	const defaultResume = config.resume ?? true;
+	const buildOne = (slug, options) => {
+		const parsed = parseSlug(slug);
+		const info = resolveProvider(slug, parsed);
+		const resumeExplicitlyTrue = options.resume === true;
+		const effectiveOptions = {
+			...options,
+			resume: options.resume ?? defaultResume,
+			onResumeExpired: options.onResumeExpired ?? config.onResumeExpired
+		};
+		const selection = selectTransport(effectiveOptions, resumeExplicitlyTrue, info.runCatalog, info.gatewayPath !== false);
+		for (const w of selection.warnings) console.warn(w);
+		const wire = selection.transport === "run" ? info.runWireFormat ?? "openai" : info.wireFormat;
+		const plugin = plugins.get(wire);
+		if (!plugin) throw new GatewayDelegateError("config", selection.transport === "run" ? `The run path for "${parsed.resolverKey}" (from slug "${slug}") returns "${wire}"-wire responses, so it needs the "${wire}" plugin. Install + pass it from "workers-ai-provider/${wire}". Registered: ${[...plugins.keys()].join(", ") || "<none>"}.` : `No provider plugin for wire format "${wire}" (needed by "${parsed.resolverKey}" on the gateway path from slug "${slug}"). Registered: ${[...plugins.keys()].join(", ") || "<none>"}. Install + pass the matching plugin from "workers-ai-provider/${wire}".`);
+		const { id: gatewayId, options: gatewayOptions } = normalizeGateway(options.gateway ?? config.gateway);
+		const fetchImpl = selection.transport === "run" ? makeRunFetch(config.binding, `${info.resolverKey}/${parsed.modelId}`, gatewayOptions, effectiveOptions, selection, options) : makeGatewayFetch(config.binding, info, gatewayId, gatewayOptions, effectiveOptions, selection, options);
+		return {
+			model: plugin.create({
+				modelId: parsed.modelId,
+				fetch: fetchImpl,
+				...selection.transport === "gateway" && info.baseURL ? { baseURL: info.baseURL } : {}
+			}),
+			transport: selection.transport
+		};
+	};
+	return (slug, options = {}) => {
+		if (options.fallback?.mode === "client") {
+			const { fallback, ...rest } = options;
+			return createClientFallbackModel([slug, ...fallback.models].map((s) => {
+				const { model, transport } = buildOne(s, rest);
+				return {
+					slug: s,
+					model,
+					transport
+				};
+			}));
+		}
+		return buildOne(slug, options).model;
+	};
+}
+function fireDispatch(resp, selection, options) {
+	if (!options.onDispatch) return;
+	options.onDispatch({
+		transport: selection.transport,
+		resumeEnabled: selection.resumeEnabled,
+		warnings: selection.warnings,
+		status: resp.status,
+		runId: resp.headers.get("cf-aig-run-id"),
+		cfStep: resp.headers.get("cf-aig-step"),
+		cacheStatus: resp.headers.get("cf-aig-cache-status"),
+		logId: resp.headers.get("cf-aig-log-id")
+	});
+}
+/** Merge call-level metadata over gateway-option metadata (call wins). */
+function mergeMetadata(base, override) {
+	if (!base && !override) return void 0;
+	return {
+		...base,
+		...override
+	};
+}
+/** JSON-encode metadata for the `cf-aig-metadata` header (bigint → string). */
+function serializeMetadata(metadata) {
+	return JSON.stringify(metadata, (_k, v) => typeof v === "bigint" ? v.toString() : v);
+}
+function makeRunFetch(binding, slug, gatewayOptions, opts, selection, callOptions) {
+	return (async (_input, init) => {
+		const body = JSON.parse(asText(init?.body));
+		delete body.model;
+		const mergedGateway = { ...gatewayOptions };
+		const mergedMeta = mergeMetadata(gatewayOptions.metadata, opts.metadata);
+		if (mergedMeta) mergedGateway.metadata = mergedMeta;
+		if (opts.collectLog !== void 0) mergedGateway.collectLog = opts.collectLog;
+		const runOptions = {
+			gateway: mergedGateway,
+			returnRawResponse: true,
+			...opts.extraHeaders ? { extraHeaders: opts.extraHeaders } : {},
+			...init?.signal ? { signal: init.signal } : {}
+		};
+		const resp = await binding.run(slug, body, runOptions);
+		fireDispatch(resp, selection, callOptions);
+		const runId = resp.headers.get("cf-aig-run-id");
+		if (selection.resumeEnabled && runId && resp.body) {
+			const resumable = createResumableStream({
+				binding,
+				gateway: gatewayOptions.id,
+				runId,
+				initial: resp.body,
+				onResumeExpired: opts.onResumeExpired,
+				...opts.onProgress ? { onProgress: opts.onProgress } : {}
+			});
+			return new Response(resumable, {
+				status: resp.status,
+				headers: resp.headers
+			});
+		}
+		return resp;
+	});
+}
+function makeGatewayFetch(binding, info, gatewayId, gatewayOptions, opts, selection, callOptions) {
+	const strip = new Set(STRIP_HEADERS_BASE);
+	if (!opts.byok) for (const h of info.authHeaders) strip.add(h.toLowerCase());
+	return (async (input, init) => {
+		const rawUrl = typeof input === "string" ? input : input.toString();
+		const endpoint = info.transformEndpoint ? info.transformEndpoint(rawUrl) : new URL(rawUrl).pathname.replace(/^\//, "") + (new URL(rawUrl).search || "");
+		const body = JSON.parse(asText(init?.body));
+		const headers = {};
+		for (const [k, v] of Object.entries(headersToObject(init?.headers))) if (!strip.has(k.toLowerCase())) headers[k] = v;
+		if (opts.extraHeaders) Object.assign(headers, opts.extraHeaders);
+		if (opts.cacheTtl !== void 0) headers["cf-aig-cache-ttl"] = String(opts.cacheTtl);
+		if (opts.skipCache) headers["cf-aig-skip-cache"] = "true";
+		const metadata = mergeMetadata(gatewayOptions.metadata, opts.metadata);
+		if (metadata) headers["cf-aig-metadata"] = serializeMetadata(metadata);
+		if (opts.collectLog !== void 0) headers["cf-aig-collect-log"] = String(opts.collectLog);
+		const primary = {
+			provider: info.gatewayProviderId,
+			endpoint,
+			headers,
+			query: body
+		};
+		const entries = [primary];
+		if (opts.fallback?.mode === "server") for (const fb of opts.fallback.models) {
+			const fbParsed = parseSlug(fb);
+			const fbInfo = resolveProvider(fb, fbParsed);
+			if (fbInfo.gatewayProviderId !== info.gatewayProviderId) throw new GatewayDelegateError("config", `Cross-vendor server-side fallback (${info.gatewayProviderId} → ${fbInfo.gatewayProviderId}) is not supported yet. Use fallback.mode:"client", or same-vendor fallback models.`);
+			entries.push({
+				...primary,
+				query: {
+					...body,
+					model: fbParsed.modelId
+				}
+			});
+		}
+		const gw = binding.gateway(gatewayId);
+		const runOptions = {};
+		if (init?.signal) runOptions.signal = init.signal;
+		const resp = await gw.run(entries, runOptions);
+		fireDispatch(resp, selection, callOptions);
+		return resp;
+	});
+}
+//#endregion
 //#region src/index.ts
 /**
+* The account-wide AI Gateway used for catalog routing when no `gateway` is
+* configured. Every Cloudflare account has a `"default"` gateway.
+*/
+const DEFAULT_GATEWAY_ID = "default";
+/**
 * Create a Workers AI provider instance.
 */
 function createWorkersAI(options) {
@@ -1528,6 +2138,26 @@ function createWorkersAI(options) {
 		provider: "workersai.chat",
 		isBinding
 	});
+	let delegate;
+	const getDelegate = (slug) => {
+		if (!options.providers?.length) throw new Error(`"${slug}" looks like a third-party AI Gateway catalog model, but this Workers AI provider was not configured to route them. Pass provider plugins, e.g.:
+  import { openai } from "workers-ai-provider/openai";
+  createWorkersAI({ binding: env.AI, providers: [openai] });
+A gateway defaults to "default" but can be set via \`gateway\`. Otherwise use a Workers AI model id (e.g. "@cf/meta/llama-3.1-8b-instruct").`);
+		delegate ?? (delegate = createGatewayDelegate({
+			binding,
+			gateway: options.gateway ?? { id: DEFAULT_GATEWAY_ID },
+			providers: options.providers,
+			resume: options.resume,
+			onResumeExpired: options.onResumeExpired
+		}));
+		return delegate;
+	};
+	const isGatewaySlug = (id) => typeof id === "string" && !id.startsWith("@") && id.includes("/");
+	const buildChat = (modelId, settings) => {
+		if (isGatewaySlug(modelId)) return getDelegate(modelId)(modelId, settings);
+		return createChatModel(modelId, settings);
+	};
 	const createImageModel = (modelId, settings = {}) => new WorkersAIImageModel(modelId, settings, {
 		binding,
 		gateway: options.gateway,
@@ -1560,9 +2190,9 @@ function createWorkersAI(options) {
 	});
 	const provider = (modelId, settings) => {
 		if (new.target) throw new Error("The WorkersAI model function cannot be called with the new keyword.");
-		return createChatModel(modelId, settings);
+		return buildChat(modelId, settings);
 	};
-	provider.chat = createChatModel;
+	provider.chat = buildChat;
 	provider.embedding = createEmbeddingModel;
 	provider.textEmbedding = createEmbeddingModel;
 	provider.textEmbeddingModel = createEmbeddingModel;
@@ -1608,6 +2238,6 @@ function createAutoRAG(options) {
 	return createAISearch(options, "autorag.chat");
 }
 //#endregion
-export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createWorkersAI };
+export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, GATEWAY_PROVIDERS, GatewayDelegateError, WorkersAIFallbackError, WorkersAIGatewayError, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createClientFallbackModel, createGatewayFetch, createGatewayProvider, createResumableStream, createWorkersAI, detectProviderByUrl, findProviderBySlug, parseSlug, selectTransport, wireableProviders };
 //# sourceMappingURL=index.mjs.map