workers-ai-provider 3.1.14 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +183 -31
- package/dist/anthropic.d.mts +14 -0
- package/dist/anthropic.mjs +21 -0
- package/dist/anthropic.mjs.map +1 -0
- package/dist/gateway-delegate-BfaUTwDZ.d.mts +385 -0
- package/dist/gateway-provider-1USFWm7c.mjs +583 -0
- package/dist/gateway-provider-1USFWm7c.mjs.map +1 -0
- package/dist/gateway-provider.d.mts +80 -0
- package/dist/gateway-provider.mjs +2 -0
- package/dist/google.d.mts +14 -0
- package/dist/google.mjs +21 -0
- package/dist/google.mjs.map +1 -0
- package/dist/index.d.mts +64 -7
- package/dist/index.mjs +703 -73
- package/dist/index.mjs.map +1 -1
- package/dist/openai.d.mts +20 -0
- package/dist/openai.mjs +27 -0
- package/dist/openai.mjs.map +1 -0
- package/package.json +47 -6
- package/src/anthropic.ts +17 -0
- package/src/client-fallback.ts +70 -0
- package/src/convert-to-workersai-chat-messages.ts +30 -5
- package/src/errors.ts +216 -0
- package/src/gateway-delegate.ts +696 -0
- package/src/gateway-provider.ts +167 -0
- package/src/gateway-providers.ts +457 -0
- package/src/google.ts +19 -0
- package/src/index.ts +180 -9
- package/src/openai.ts +25 -0
- package/src/resumable-stream.ts +223 -0
- package/src/streaming.ts +102 -30
- package/src/utils.ts +187 -4
- package/src/workersai-chat-language-model.ts +87 -26
- package/src/workersai-chat-settings.ts +1 -1
- package/src/workersai-models.ts +11 -3
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import { a as findProviderBySlug, c as WorkersAIGatewayError, i as detectProviderByUrl, l as _defineProperty, n as createGatewayProvider, o as wireableProviders, r as GATEWAY_PROVIDERS, s as WorkersAIFallbackError, t as createGatewayFetch } from "./gateway-provider-1USFWm7c.mjs";
|
|
2
|
+
import { TooManyEmbeddingValuesForCallError, UnsupportedFunctionalityError } from "@ai-sdk/provider";
|
|
1
3
|
import { generateId } from "ai";
|
|
2
|
-
import { TooManyEmbeddingValuesForCallError } from "@ai-sdk/provider";
|
|
3
4
|
//#region src/utils.ts
|
|
4
5
|
/**
|
|
5
6
|
* Normalize messages before passing to the Workers AI binding.
|
|
@@ -127,6 +128,39 @@ async function createRunBinary(config, model, audioBytes, contentType, signal) {
|
|
|
127
128
|
const data = await response.json();
|
|
128
129
|
return data.result ?? data;
|
|
129
130
|
}
|
|
131
|
+
/**
|
|
132
|
+
* Build the `response_format.json_schema` payload for native Workers AI models.
|
|
133
|
+
*
|
|
134
|
+
* Native Workers AI (`@cf/...`) expects `json_schema` to be a **bare** JSON
|
|
135
|
+
* Schema, NOT OpenAI's `{ name, schema, strict }` envelope. That envelope is
|
|
136
|
+
* only required by partner-model routes (e.g. `openai/...`), which never reach
|
|
137
|
+
* this code — they go through the gateway delegate and the real `@ai-sdk/*`
|
|
138
|
+
* providers, which build the envelope themselves. Wrapping the schema here would
|
|
139
|
+
* break native models, so we must keep the bare shape.
|
|
140
|
+
*
|
|
141
|
+
* The AI SDK's structured-output `name` / `description` (from
|
|
142
|
+
* `Output.object({ schema, name, description })` / `generateObject`) would
|
|
143
|
+
* otherwise be silently dropped on this path. We preserve them as the standard
|
|
144
|
+
* JSON Schema `title` (from `name`) and `description` keywords, which keeps the
|
|
145
|
+
* payload a valid bare schema while still passing the LLM guidance through.
|
|
146
|
+
*
|
|
147
|
+
* Existing schema-level `title` / `description` are never overwritten, empty
|
|
148
|
+
* strings are ignored, and the input schema object is never mutated.
|
|
149
|
+
*
|
|
150
|
+
* See https://github.com/cloudflare/ai/issues/559.
|
|
151
|
+
*/
|
|
152
|
+
function buildJsonSchemaPayload(schema, name, description) {
|
|
153
|
+
if (typeof schema !== "object" || schema === null || Array.isArray(schema)) return schema;
|
|
154
|
+
const record = schema;
|
|
155
|
+
const addTitle = !!name && record.title === void 0;
|
|
156
|
+
const addDescription = !!description && record.description === void 0;
|
|
157
|
+
if (!addTitle && !addDescription) return schema;
|
|
158
|
+
return {
|
|
159
|
+
...record,
|
|
160
|
+
...addTitle ? { title: name } : {},
|
|
161
|
+
...addDescription ? { description } : {}
|
|
162
|
+
};
|
|
163
|
+
}
|
|
130
164
|
function prepareToolsAndToolChoice(tools, toolChoice) {
|
|
131
165
|
if (tools == null) return {
|
|
132
166
|
tool_choice: void 0,
|
|
@@ -159,8 +193,11 @@ function prepareToolsAndToolChoice(tools, toolChoice) {
|
|
|
159
193
|
tools: mappedTools
|
|
160
194
|
};
|
|
161
195
|
case "tool": return {
|
|
162
|
-
tool_choice:
|
|
163
|
-
|
|
196
|
+
tool_choice: {
|
|
197
|
+
type: "function",
|
|
198
|
+
function: { name: toolChoice.toolName }
|
|
199
|
+
},
|
|
200
|
+
tools: mappedTools
|
|
164
201
|
};
|
|
165
202
|
default: throw new Error(`Unsupported tool choice type: ${type}`);
|
|
166
203
|
}
|
|
@@ -198,6 +235,92 @@ function processToolCalls(output) {
|
|
|
198
235
|
return [];
|
|
199
236
|
}
|
|
200
237
|
/**
|
|
238
|
+
* Was a specific tool forced for this request?
|
|
239
|
+
*
|
|
240
|
+
* True for both `tool_choice: "required"` and the named-function form
|
|
241
|
+
* `{ type: "function", function: { name } }`.
|
|
242
|
+
*/
|
|
243
|
+
function isForcedToolChoice(toolChoice) {
|
|
244
|
+
if (toolChoice === "required") return true;
|
|
245
|
+
return typeof toolChoice === "object" && toolChoice !== null && toolChoice.type === "function";
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Parse tool calls that a model leaked as JSON text instead of structured
|
|
249
|
+
* `tool_calls`. Shared by the non-streaming salvage and the streaming buffer.
|
|
250
|
+
*
|
|
251
|
+
* Only JSON objects whose `name` is one of `knownToolNames` are recovered;
|
|
252
|
+
* everything else (prose, harmony channel/role leaks like `{"name":"analysis"}`,
|
|
253
|
+
* hallucinated names) is ignored to avoid fabricating bogus calls.
|
|
254
|
+
*/
|
|
255
|
+
function parseLeakedToolCalls(text, knownToolNames) {
|
|
256
|
+
let parsed;
|
|
257
|
+
try {
|
|
258
|
+
parsed = JSON.parse(text.trim());
|
|
259
|
+
} catch {
|
|
260
|
+
return [];
|
|
261
|
+
}
|
|
262
|
+
const candidates = Array.isArray(parsed) ? parsed : [parsed];
|
|
263
|
+
const salvaged = [];
|
|
264
|
+
for (const candidate of candidates) {
|
|
265
|
+
if (typeof candidate !== "object" || candidate === null) continue;
|
|
266
|
+
const obj = candidate;
|
|
267
|
+
const name = obj.name;
|
|
268
|
+
if (typeof name !== "string" || !knownToolNames.has(name)) continue;
|
|
269
|
+
let args;
|
|
270
|
+
if ("arguments" in obj) args = obj.arguments;
|
|
271
|
+
else if ("parameters" in obj) args = obj.parameters;
|
|
272
|
+
else {
|
|
273
|
+
const { name: _name, ...rest } = obj;
|
|
274
|
+
args = rest;
|
|
275
|
+
}
|
|
276
|
+
salvaged.push({
|
|
277
|
+
input: typeof args === "string" ? args : JSON.stringify(args ?? {}),
|
|
278
|
+
toolCallId: createAISDKToolCallId(void 0),
|
|
279
|
+
type: "tool-call",
|
|
280
|
+
toolName: name
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
return salvaged;
|
|
284
|
+
}
|
|
285
|
+
/** Collect the requested tool names from mapped tools. */
|
|
286
|
+
function getToolNames(tools) {
|
|
287
|
+
return new Set((tools ?? []).map((tool) => tool.function?.name).filter((name) => typeof name === "string"));
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Salvage a tool call that a model leaked into text content instead of the
|
|
291
|
+
* structured `tool_calls` field.
|
|
292
|
+
*
|
|
293
|
+
* Workers AI's gpt-oss models (harmony format) sometimes emit a forced tool
|
|
294
|
+
* call as raw JSON in `message.content` with an empty `tool_calls` array and
|
|
295
|
+
* `finish_reason: "stop"` — typically when the forced tool is a poor fit for
|
|
296
|
+
* the conversation. The content looks like one of:
|
|
297
|
+
*
|
|
298
|
+
* {"name":"read_skill_resource","path":"feedback.txt"} (flat args)
|
|
299
|
+
* {"name":"calc","arguments":{"a":1}} (wrapped args)
|
|
300
|
+
* [{"name":"calc","parameters":{"a":1}}] (array form)
|
|
301
|
+
*
|
|
302
|
+
* This reinterprets that text as a structured tool call. It is intentionally
|
|
303
|
+
* narrow to avoid false positives:
|
|
304
|
+
* - only runs when a tool was *forced* (required / named-function), so a
|
|
305
|
+
* tool call was explicitly demanded by the caller;
|
|
306
|
+
* - only runs when there are no real structured tool calls to override;
|
|
307
|
+
* - only matches JSON objects whose `name` is one of the requested tools.
|
|
308
|
+
*
|
|
309
|
+
* Returns the salvaged tool calls, or `null` when nothing was salvaged.
|
|
310
|
+
*
|
|
311
|
+
* See https://github.com/cloudflare/ai/issues/560.
|
|
312
|
+
*/
|
|
313
|
+
function salvageToolCallsFromText(output, context) {
|
|
314
|
+
if (!isForcedToolChoice(context.toolChoice)) return null;
|
|
315
|
+
if (processToolCalls(output).length > 0) return null;
|
|
316
|
+
const knownToolNames = getToolNames(context.tools);
|
|
317
|
+
if (knownToolNames.size === 0) return null;
|
|
318
|
+
const text = processText(output);
|
|
319
|
+
if (!text) return null;
|
|
320
|
+
const salvaged = parseLeakedToolCalls(text, knownToolNames);
|
|
321
|
+
return salvaged.length > 0 ? salvaged : null;
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
201
324
|
* Extract text from a Workers AI response, handling multiple response formats:
|
|
202
325
|
* - OpenAI format: { choices: [{ message: { content: "..." } }] }
|
|
203
326
|
* - Native format: { response: "..." }
|
|
@@ -241,6 +364,17 @@ function toUint8Array$2(data) {
|
|
|
241
364
|
if (data instanceof URL) throw new Error("URL image sources are not supported by Workers AI. Provide image data as a Uint8Array or base64 string instead.");
|
|
242
365
|
return null;
|
|
243
366
|
}
|
|
367
|
+
function assertImageMediaType(mediaType) {
|
|
368
|
+
if (!mediaType) throw new UnsupportedFunctionalityError({
|
|
369
|
+
functionality: "file-part-without-media-type",
|
|
370
|
+
message: "Workers AI chat only supports image file parts with an image/* mediaType. Received a file part without a mediaType."
|
|
371
|
+
});
|
|
372
|
+
if (!mediaType.toLowerCase().startsWith("image/")) throw new UnsupportedFunctionalityError({
|
|
373
|
+
functionality: "non-image-file-part",
|
|
374
|
+
message: `Workers AI chat only supports image file parts with an image/* mediaType. Received mediaType "${mediaType}".`
|
|
375
|
+
});
|
|
376
|
+
return mediaType;
|
|
377
|
+
}
|
|
244
378
|
function uint8ArrayToBase64$1(bytes) {
|
|
245
379
|
let binary = "";
|
|
246
380
|
const chunkSize = 8192;
|
|
@@ -267,10 +401,11 @@ function convertToWorkersAIChatMessages(prompt) {
|
|
|
267
401
|
textParts.push(part.text);
|
|
268
402
|
break;
|
|
269
403
|
case "file": {
|
|
404
|
+
const mediaType = assertImageMediaType(part.mediaType);
|
|
270
405
|
const imageBytes = toUint8Array$2(part.data);
|
|
271
406
|
if (imageBytes) imageParts.push({
|
|
272
407
|
image: imageBytes,
|
|
273
|
-
mediaType
|
|
408
|
+
mediaType
|
|
274
409
|
});
|
|
275
410
|
break;
|
|
276
411
|
}
|
|
@@ -283,10 +418,9 @@ function convertToWorkersAIChatMessages(prompt) {
|
|
|
283
418
|
});
|
|
284
419
|
for (const img of imageParts) {
|
|
285
420
|
const base64 = uint8ArrayToBase64$1(img.image);
|
|
286
|
-
const mediaType = img.mediaType || "image/png";
|
|
287
421
|
contentArray.push({
|
|
288
422
|
type: "image_url",
|
|
289
|
-
image_url: { url: `data:${mediaType};base64,${base64}` }
|
|
423
|
+
image_url: { url: `data:${img.mediaType};base64,${base64}` }
|
|
290
424
|
});
|
|
291
425
|
}
|
|
292
426
|
messages.push({
|
|
@@ -504,9 +638,13 @@ function isNullFinalizationChunk(tc) {
|
|
|
504
638
|
* 1. Native format: { response: "chunk", tool_calls: [...] }
|
|
505
639
|
* 2. OpenAI format: { choices: [{ delta: { content: "chunk" } }] }
|
|
506
640
|
*/
|
|
507
|
-
function getMappedStream(response) {
|
|
641
|
+
function getMappedStream(response, salvageContext) {
|
|
508
642
|
const rawStream = response instanceof ReadableStream ? response : response.body;
|
|
509
643
|
if (!rawStream) throw new Error("No readable stream available for SSE parsing.");
|
|
644
|
+
const knownToolNames = getToolNames(salvageContext?.tools);
|
|
645
|
+
const bufferContentForSalvage = isForcedToolChoice(salvageContext?.toolChoice) && knownToolNames.size > 0;
|
|
646
|
+
let contentBuffer = "";
|
|
647
|
+
let anyToolCallStarted = false;
|
|
510
648
|
let usage = {
|
|
511
649
|
outputTokens: {
|
|
512
650
|
total: 0,
|
|
@@ -552,7 +690,8 @@ function getMappedStream(response) {
|
|
|
552
690
|
const nativeResponse = chunk.response;
|
|
553
691
|
if (nativeResponse != null && nativeResponse !== "") {
|
|
554
692
|
const responseText = String(nativeResponse);
|
|
555
|
-
if (responseText.length > 0)
|
|
693
|
+
if (responseText.length > 0) if (bufferContentForSalvage) contentBuffer += responseText;
|
|
694
|
+
else {
|
|
556
695
|
if (reasoningId) {
|
|
557
696
|
controller.enqueue({
|
|
558
697
|
type: "reasoning-end",
|
|
@@ -602,7 +741,8 @@ function getMappedStream(response) {
|
|
|
602
741
|
});
|
|
603
742
|
}
|
|
604
743
|
const textDelta = delta.content;
|
|
605
|
-
if (textDelta && textDelta.length > 0)
|
|
744
|
+
if (textDelta && textDelta.length > 0) if (bufferContentForSalvage) contentBuffer += textDelta;
|
|
745
|
+
else {
|
|
606
746
|
if (reasoningId) {
|
|
607
747
|
controller.enqueue({
|
|
608
748
|
type: "reasoning-end",
|
|
@@ -645,11 +785,69 @@ function getMappedStream(response) {
|
|
|
645
785
|
type: "reasoning-end",
|
|
646
786
|
id: reasoningId
|
|
647
787
|
});
|
|
788
|
+
let salvagedToolCalls = false;
|
|
789
|
+
if (bufferContentForSalvage && !anyToolCallStarted && contentBuffer.trim()) {
|
|
790
|
+
const salvaged = parseLeakedToolCalls(contentBuffer, knownToolNames);
|
|
791
|
+
if (salvaged.length > 0) {
|
|
792
|
+
for (const call of salvaged) {
|
|
793
|
+
controller.enqueue({
|
|
794
|
+
type: "tool-input-start",
|
|
795
|
+
id: call.toolCallId,
|
|
796
|
+
toolName: call.toolName
|
|
797
|
+
});
|
|
798
|
+
controller.enqueue({
|
|
799
|
+
type: "tool-input-delta",
|
|
800
|
+
id: call.toolCallId,
|
|
801
|
+
delta: call.input
|
|
802
|
+
});
|
|
803
|
+
controller.enqueue({
|
|
804
|
+
type: "tool-input-end",
|
|
805
|
+
id: call.toolCallId
|
|
806
|
+
});
|
|
807
|
+
controller.enqueue(call);
|
|
808
|
+
}
|
|
809
|
+
salvagedToolCalls = true;
|
|
810
|
+
console.warn(`[workers-ai-provider] Recovered ${salvaged.length} forced tool call(s) that the model streamed as text content instead of structured tool calls.`);
|
|
811
|
+
} else {
|
|
812
|
+
const id = generateId();
|
|
813
|
+
controller.enqueue({
|
|
814
|
+
type: "text-start",
|
|
815
|
+
id
|
|
816
|
+
});
|
|
817
|
+
controller.enqueue({
|
|
818
|
+
type: "text-delta",
|
|
819
|
+
id,
|
|
820
|
+
delta: contentBuffer
|
|
821
|
+
});
|
|
822
|
+
controller.enqueue({
|
|
823
|
+
type: "text-end",
|
|
824
|
+
id
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
} else if (bufferContentForSalvage && contentBuffer.trim()) {
|
|
828
|
+
const id = generateId();
|
|
829
|
+
controller.enqueue({
|
|
830
|
+
type: "text-start",
|
|
831
|
+
id
|
|
832
|
+
});
|
|
833
|
+
controller.enqueue({
|
|
834
|
+
type: "text-delta",
|
|
835
|
+
id,
|
|
836
|
+
delta: contentBuffer
|
|
837
|
+
});
|
|
838
|
+
controller.enqueue({
|
|
839
|
+
type: "text-end",
|
|
840
|
+
id
|
|
841
|
+
});
|
|
842
|
+
}
|
|
648
843
|
if (textId) controller.enqueue({
|
|
649
844
|
type: "text-end",
|
|
650
845
|
id: textId
|
|
651
846
|
});
|
|
652
|
-
const effectiveFinishReason =
|
|
847
|
+
const effectiveFinishReason = salvagedToolCalls ? {
|
|
848
|
+
unified: "tool-calls",
|
|
849
|
+
raw: "stop"
|
|
850
|
+
} : !receivedDone && receivedAnyData && !finishReason ? {
|
|
653
851
|
unified: "error",
|
|
654
852
|
raw: "stream-truncated"
|
|
655
853
|
} : finishReason ?? {
|
|
@@ -716,6 +914,7 @@ function getMappedStream(response) {
|
|
|
716
914
|
args: ""
|
|
717
915
|
});
|
|
718
916
|
lastActiveToolIndex = tcIndex;
|
|
917
|
+
anyToolCallStarted = true;
|
|
719
918
|
controller.enqueue({
|
|
720
919
|
type: "tool-input-start",
|
|
721
920
|
id,
|
|
@@ -778,44 +977,6 @@ var SSEDecoder = class extends TransformStream {
|
|
|
778
977
|
}
|
|
779
978
|
};
|
|
780
979
|
//#endregion
|
|
781
|
-
//#region \0@oxc-project+runtime@0.122.0/helpers/typeof.js
|
|
782
|
-
function _typeof(o) {
|
|
783
|
-
"@babel/helpers - typeof";
|
|
784
|
-
return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function(o) {
|
|
785
|
-
return typeof o;
|
|
786
|
-
} : function(o) {
|
|
787
|
-
return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o;
|
|
788
|
-
}, _typeof(o);
|
|
789
|
-
}
|
|
790
|
-
//#endregion
|
|
791
|
-
//#region \0@oxc-project+runtime@0.122.0/helpers/toPrimitive.js
|
|
792
|
-
function toPrimitive(t, r) {
|
|
793
|
-
if ("object" != _typeof(t) || !t) return t;
|
|
794
|
-
var e = t[Symbol.toPrimitive];
|
|
795
|
-
if (void 0 !== e) {
|
|
796
|
-
var i = e.call(t, r || "default");
|
|
797
|
-
if ("object" != _typeof(i)) return i;
|
|
798
|
-
throw new TypeError("@@toPrimitive must return a primitive value.");
|
|
799
|
-
}
|
|
800
|
-
return ("string" === r ? String : Number)(t);
|
|
801
|
-
}
|
|
802
|
-
//#endregion
|
|
803
|
-
//#region \0@oxc-project+runtime@0.122.0/helpers/toPropertyKey.js
|
|
804
|
-
function toPropertyKey(t) {
|
|
805
|
-
var i = toPrimitive(t, "string");
|
|
806
|
-
return "symbol" == _typeof(i) ? i : i + "";
|
|
807
|
-
}
|
|
808
|
-
//#endregion
|
|
809
|
-
//#region \0@oxc-project+runtime@0.122.0/helpers/defineProperty.js
|
|
810
|
-
function _defineProperty(e, r, t) {
|
|
811
|
-
return (r = toPropertyKey(r)) in e ? Object.defineProperty(e, r, {
|
|
812
|
-
value: t,
|
|
813
|
-
enumerable: !0,
|
|
814
|
-
configurable: !0,
|
|
815
|
-
writable: !0
|
|
816
|
-
}) : e[r] = t, e;
|
|
817
|
-
}
|
|
818
|
-
//#endregion
|
|
819
980
|
//#region src/aisearch-chat-language-model.ts
|
|
820
981
|
var AISearchChatLanguageModel = class {
|
|
821
982
|
constructor(modelId, settings, config) {
|
|
@@ -983,18 +1144,21 @@ var WorkersAIChatLanguageModel = class {
|
|
|
983
1144
|
},
|
|
984
1145
|
warnings
|
|
985
1146
|
};
|
|
986
|
-
case "json":
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
1147
|
+
case "json": {
|
|
1148
|
+
const json = responseFormat?.type === "json" ? responseFormat : void 0;
|
|
1149
|
+
return {
|
|
1150
|
+
args: {
|
|
1151
|
+
...baseArgs,
|
|
1152
|
+
response_format: {
|
|
1153
|
+
type: "json_schema",
|
|
1154
|
+
json_schema: buildJsonSchemaPayload(json?.schema, json?.name, json?.description)
|
|
1155
|
+
},
|
|
1156
|
+
tools: void 0,
|
|
1157
|
+
tool_choice: void 0
|
|
992
1158
|
},
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
warnings
|
|
997
|
-
};
|
|
1159
|
+
warnings
|
|
1160
|
+
};
|
|
1161
|
+
}
|
|
998
1162
|
default: throw new Error(`Unsupported type: ${type}`);
|
|
999
1163
|
}
|
|
1000
1164
|
}
|
|
@@ -1053,6 +1217,38 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1053
1217
|
...passthroughOptions
|
|
1054
1218
|
};
|
|
1055
1219
|
}
|
|
1220
|
+
/**
|
|
1221
|
+
* Extract reasoning, text, and tool calls from a non-streaming response.
|
|
1222
|
+
*
|
|
1223
|
+
* Shared by `doGenerate` and `doStream`'s graceful-degradation branch (the
|
|
1224
|
+
* path gpt-oss falls through, since it doesn't support `/ai/run/` streaming
|
|
1225
|
+
* and is retried non-streaming). When a forced tool call was leaked into
|
|
1226
|
+
* text content (gpt-oss harmony quirk), it is salvaged into a structured
|
|
1227
|
+
* tool call and the leaked JSON text is suppressed. A warning is appended in
|
|
1228
|
+
* place so callers can observe the reinterpretation.
|
|
1229
|
+
*/
|
|
1230
|
+
extractContent(outputRecord, args, warnings) {
|
|
1231
|
+
const choices = outputRecord.choices;
|
|
1232
|
+
const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
|
|
1233
|
+
const toolCalls = processToolCalls(outputRecord);
|
|
1234
|
+
const salvaged = toolCalls.length === 0 ? salvageToolCallsFromText(outputRecord, {
|
|
1235
|
+
tools: args.tools,
|
|
1236
|
+
toolChoice: args.tool_choice
|
|
1237
|
+
}) : null;
|
|
1238
|
+
if (salvaged) warnings.push({
|
|
1239
|
+
type: "other",
|
|
1240
|
+
message: `Recovered ${salvaged.length} forced tool call(s) that the model emitted as text content instead of structured tool calls (model: ${this.modelId}).`
|
|
1241
|
+
});
|
|
1242
|
+
return {
|
|
1243
|
+
reasoningContent,
|
|
1244
|
+
text: salvaged ? "" : processText(outputRecord) ?? "",
|
|
1245
|
+
toolCalls: salvaged ?? toolCalls,
|
|
1246
|
+
finishReason: salvaged ? {
|
|
1247
|
+
unified: "tool-calls",
|
|
1248
|
+
raw: "stop"
|
|
1249
|
+
} : mapWorkersAIFinishReason(outputRecord)
|
|
1250
|
+
};
|
|
1251
|
+
}
|
|
1056
1252
|
async doGenerate(options) {
|
|
1057
1253
|
const { args, warnings } = this.getArgs(options);
|
|
1058
1254
|
const { messages } = convertToWorkersAIChatMessages(options.prompt);
|
|
@@ -1064,10 +1260,9 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1064
1260
|
});
|
|
1065
1261
|
if (output instanceof ReadableStream) throw new Error("Unexpected streaming response from non-streaming request. Check that `stream: true` was not passed.");
|
|
1066
1262
|
const outputRecord = output;
|
|
1067
|
-
const
|
|
1068
|
-
const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
|
|
1263
|
+
const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
|
|
1069
1264
|
return {
|
|
1070
|
-
finishReason
|
|
1265
|
+
finishReason,
|
|
1071
1266
|
content: [
|
|
1072
1267
|
...reasoningContent ? [{
|
|
1073
1268
|
type: "reasoning",
|
|
@@ -1075,9 +1270,9 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1075
1270
|
}] : [],
|
|
1076
1271
|
{
|
|
1077
1272
|
type: "text",
|
|
1078
|
-
text
|
|
1273
|
+
text
|
|
1079
1274
|
},
|
|
1080
|
-
...
|
|
1275
|
+
...toolCalls
|
|
1081
1276
|
],
|
|
1082
1277
|
usage: mapWorkersAIUsage(output),
|
|
1083
1278
|
warnings
|
|
@@ -1095,10 +1290,12 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1095
1290
|
...runOptions,
|
|
1096
1291
|
signal: options.abortSignal
|
|
1097
1292
|
});
|
|
1098
|
-
if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response
|
|
1293
|
+
if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response, {
|
|
1294
|
+
tools: args.tools,
|
|
1295
|
+
toolChoice: args.tool_choice
|
|
1296
|
+
}), warnings) };
|
|
1099
1297
|
const outputRecord = response;
|
|
1100
|
-
const
|
|
1101
|
-
const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
|
|
1298
|
+
const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
|
|
1102
1299
|
let textId = null;
|
|
1103
1300
|
let reasoningId = null;
|
|
1104
1301
|
return { stream: new ReadableStream({ start(controller) {
|
|
@@ -1122,7 +1319,6 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1122
1319
|
id: reasoningId
|
|
1123
1320
|
});
|
|
1124
1321
|
}
|
|
1125
|
-
const text = processText(outputRecord);
|
|
1126
1322
|
if (text) {
|
|
1127
1323
|
textId = generateId();
|
|
1128
1324
|
controller.enqueue({
|
|
@@ -1139,10 +1335,10 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1139
1335
|
id: textId
|
|
1140
1336
|
});
|
|
1141
1337
|
}
|
|
1142
|
-
for (const toolCall of
|
|
1338
|
+
for (const toolCall of toolCalls) controller.enqueue(toolCall);
|
|
1143
1339
|
controller.enqueue({
|
|
1144
1340
|
type: "finish",
|
|
1145
|
-
finishReason
|
|
1341
|
+
finishReason,
|
|
1146
1342
|
usage: mapWorkersAIUsage(response)
|
|
1147
1343
|
});
|
|
1148
1344
|
controller.close();
|
|
@@ -1505,8 +1701,422 @@ function documentsToContexts(documents, warnings) {
|
|
|
1505
1701
|
*/
|
|
1506
1702
|
var AutoRAGChatLanguageModel = class extends AISearchChatLanguageModel {};
|
|
1507
1703
|
//#endregion
|
|
1704
|
+
//#region src/client-fallback.ts
|
|
1705
|
+
/**
|
|
1706
|
+
* Wrap a chain of models so a failed *pre-stream* dispatch falls through to the
|
|
1707
|
+
* next model, preserving resume on each leg's own transport. If every leg fails,
|
|
1708
|
+
* throws a {@link WorkersAIFallbackError} carrying the full attempt tree.
|
|
1709
|
+
*
|
|
1710
|
+
* Fallback triggers on `doGenerate`/`doStream` rejection (the dispatch never
|
|
1711
|
+
* produced a stream). Errors that surface *mid-stream* — after content has
|
|
1712
|
+
* already been emitted — are not recoverable here and propagate as-is.
|
|
1713
|
+
*/
|
|
1714
|
+
function createClientFallbackModel(legs) {
|
|
1715
|
+
if (legs.length === 0) throw new Error("createClientFallbackModel requires at least one model leg.");
|
|
1716
|
+
const primary = legs[0].model;
|
|
1717
|
+
async function attempt(run) {
|
|
1718
|
+
const attempts = [];
|
|
1719
|
+
for (const leg of legs) try {
|
|
1720
|
+
const result = await run(leg.model);
|
|
1721
|
+
attempts.push({
|
|
1722
|
+
model: leg.slug,
|
|
1723
|
+
transport: leg.transport,
|
|
1724
|
+
ok: true
|
|
1725
|
+
});
|
|
1726
|
+
return result;
|
|
1727
|
+
} catch (e) {
|
|
1728
|
+
const err = WorkersAIGatewayError.fromUnknown(e);
|
|
1729
|
+
attempts.push({
|
|
1730
|
+
model: leg.slug,
|
|
1731
|
+
transport: leg.transport,
|
|
1732
|
+
ok: false,
|
|
1733
|
+
status: err.status,
|
|
1734
|
+
error: err
|
|
1735
|
+
});
|
|
1736
|
+
}
|
|
1737
|
+
throw new WorkersAIFallbackError(attempts);
|
|
1738
|
+
}
|
|
1739
|
+
return {
|
|
1740
|
+
specificationVersion: "v3",
|
|
1741
|
+
provider: primary.provider,
|
|
1742
|
+
modelId: primary.modelId,
|
|
1743
|
+
supportedUrls: primary.supportedUrls,
|
|
1744
|
+
doGenerate(options) {
|
|
1745
|
+
return attempt((m) => m.doGenerate(options));
|
|
1746
|
+
},
|
|
1747
|
+
doStream(options) {
|
|
1748
|
+
return attempt((m) => m.doStream(options));
|
|
1749
|
+
}
|
|
1750
|
+
};
|
|
1751
|
+
}
|
|
1752
|
+
//#endregion
|
|
1753
|
+
//#region src/resumable-stream.ts
|
|
1754
|
+
function concat(a, b) {
|
|
1755
|
+
const out = new Uint8Array(new ArrayBuffer(a.length + b.length));
|
|
1756
|
+
out.set(a, 0);
|
|
1757
|
+
out.set(b, a.length);
|
|
1758
|
+
return out;
|
|
1759
|
+
}
|
|
1760
|
+
/** Index just past the last `\n\n` in `buf`, or -1 if there is no complete event. */
|
|
1761
|
+
function lastEventBoundary(buf) {
|
|
1762
|
+
for (let i = buf.length - 2; i >= 0; i--) if (buf[i] === 10 && buf[i + 1] === 10) return i + 2;
|
|
1763
|
+
return -1;
|
|
1764
|
+
}
|
|
1765
|
+
/** Count of `\n\n` terminators (= complete SSE events) in `buf`. */
|
|
1766
|
+
function countEvents(buf) {
|
|
1767
|
+
let n = 0;
|
|
1768
|
+
for (let i = 0; i + 1 < buf.length; i++) if (buf[i] === 10 && buf[i + 1] === 10) {
|
|
1769
|
+
n++;
|
|
1770
|
+
i++;
|
|
1771
|
+
}
|
|
1772
|
+
return n;
|
|
1773
|
+
}
|
|
1774
|
+
function resumeUrl(gateway, runId, from) {
|
|
1775
|
+
return `https://workers-binding.ai/ai-gateway/gateways/${gateway}/run/${runId}/resume?from=${from}`;
|
|
1776
|
+
}
|
|
1777
|
+
function createResumableStream(options) {
|
|
1778
|
+
const { binding, gateway, runId } = options;
|
|
1779
|
+
const maxReconnects = options.maxReconnects ?? 5;
|
|
1780
|
+
const onExpired = options.onResumeExpired ?? "error";
|
|
1781
|
+
let emittedEvents = options.fromEvent ?? 0;
|
|
1782
|
+
let pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
|
|
1783
|
+
let reconnects = 0;
|
|
1784
|
+
async function fetchResume(controller) {
|
|
1785
|
+
let res;
|
|
1786
|
+
try {
|
|
1787
|
+
res = await binding.fetch(resumeUrl(gateway, runId, emittedEvents), { method: "GET" });
|
|
1788
|
+
} catch (fetchErr) {
|
|
1789
|
+
controller.error(new GatewayDelegateError("dispatch", `Resume request threw at event ${emittedEvents}.`, fetchErr));
|
|
1790
|
+
return null;
|
|
1791
|
+
}
|
|
1792
|
+
if (res.status === 404) {
|
|
1793
|
+
if (onExpired === "accept-partial") {
|
|
1794
|
+
controller.close();
|
|
1795
|
+
return null;
|
|
1796
|
+
}
|
|
1797
|
+
controller.error(new GatewayDelegateError("resume-expired", `Resume buffer expired (404) at event ${emittedEvents}. The gateway buffer TTL (~5.5 min) elapsed; fall back to continuation or regeneration.`));
|
|
1798
|
+
return null;
|
|
1799
|
+
}
|
|
1800
|
+
if (!res.ok || !res.body) {
|
|
1801
|
+
controller.error(new GatewayDelegateError("dispatch", `Resume failed (${res.status}) at event ${emittedEvents}.`));
|
|
1802
|
+
return null;
|
|
1803
|
+
}
|
|
1804
|
+
return res.body;
|
|
1805
|
+
}
|
|
1806
|
+
return new ReadableStream({ async start(controller) {
|
|
1807
|
+
let current;
|
|
1808
|
+
if (options.initial) current = options.initial;
|
|
1809
|
+
else {
|
|
1810
|
+
const body = await fetchResume(controller);
|
|
1811
|
+
if (!body) return;
|
|
1812
|
+
current = body;
|
|
1813
|
+
}
|
|
1814
|
+
for (;;) {
|
|
1815
|
+
const reader = current.getReader();
|
|
1816
|
+
try {
|
|
1817
|
+
for (;;) {
|
|
1818
|
+
const { done, value } = await reader.read();
|
|
1819
|
+
if (done) {
|
|
1820
|
+
if (pending.length > 0) {
|
|
1821
|
+
controller.enqueue(pending);
|
|
1822
|
+
pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
|
|
1823
|
+
}
|
|
1824
|
+
controller.close();
|
|
1825
|
+
return;
|
|
1826
|
+
}
|
|
1827
|
+
if (!value || value.length === 0) continue;
|
|
1828
|
+
pending = concat(pending, value);
|
|
1829
|
+
const boundary = lastEventBoundary(pending);
|
|
1830
|
+
if (boundary > 0) {
|
|
1831
|
+
const complete = pending.slice(0, boundary);
|
|
1832
|
+
controller.enqueue(complete);
|
|
1833
|
+
emittedEvents += countEvents(complete);
|
|
1834
|
+
options.onProgress?.(emittedEvents);
|
|
1835
|
+
pending = pending.slice(boundary);
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
} catch (err) {
|
|
1839
|
+
try {
|
|
1840
|
+
reader.releaseLock();
|
|
1841
|
+
} catch {}
|
|
1842
|
+
if (reconnects >= maxReconnects) {
|
|
1843
|
+
controller.error(new GatewayDelegateError("resume-expired", `Exceeded ${maxReconnects} reconnect attempts at event ${emittedEvents}.`, err));
|
|
1844
|
+
return;
|
|
1845
|
+
}
|
|
1846
|
+
pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
|
|
1847
|
+
reconnects++;
|
|
1848
|
+
options.onReconnect?.(emittedEvents, reconnects);
|
|
1849
|
+
const body = await fetchResume(controller);
|
|
1850
|
+
if (!body) return;
|
|
1851
|
+
current = body;
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
} });
|
|
1855
|
+
}
|
|
1856
|
+
//#endregion
|
|
1857
|
+
//#region src/gateway-delegate.ts
|
|
1858
|
+
/**
|
|
1859
|
+
* Parse a `vendor/model` slug. The first segment is the resolver key (which
|
|
1860
|
+
* registry entry handles it); the rest is the provider-native model id. Routing
|
|
1861
|
+
* providers keep multi-segment model ids, e.g. `openrouter/anthropic/claude`.
|
|
1862
|
+
*/
|
|
1863
|
+
function parseSlug(slug) {
|
|
1864
|
+
const slash = slug.indexOf("/");
|
|
1865
|
+
if (slash === -1) throw new GatewayDelegateError("config", `Model slug "${slug}" has no resolver key. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
|
|
1866
|
+
const resolverKey = slug.slice(0, slash);
|
|
1867
|
+
const modelId = slug.slice(slash + 1);
|
|
1868
|
+
if (!resolverKey || !modelId) throw new GatewayDelegateError("config", `Model slug "${slug}" is malformed. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
|
|
1869
|
+
return {
|
|
1870
|
+
resolverKey,
|
|
1871
|
+
modelId
|
|
1872
|
+
};
|
|
1873
|
+
}
|
|
1874
|
+
/**
|
|
1875
|
+
* Resolve a slug to its registry entry, raising a helpful error for unknown or
|
|
1876
|
+
* bring-your-own-provider-only providers.
|
|
1877
|
+
*/
|
|
1878
|
+
function resolveProvider(slug, parsed) {
|
|
1879
|
+
const info = findProviderBySlug(parsed.resolverKey);
|
|
1880
|
+
if (!info) throw new GatewayDelegateError("config", `Unknown gateway provider "${parsed.resolverKey}" (from slug "${slug}"). See the AI Gateway provider directory for valid slugs, or use createGatewayProvider to bring your own @ai-sdk provider.`);
|
|
1881
|
+
if (!info.wireFormat) throw new GatewayDelegateError("config", `Provider "${parsed.resolverKey}" is not chat/completions-shaped and has no built-in parser. Reach it with createGatewayProvider (bring your own @ai-sdk provider).`);
|
|
1882
|
+
return info;
|
|
1883
|
+
}
|
|
1884
|
+
/**
|
|
1885
|
+
* Resolve the transport from the requested options. Gateway-only features (server
|
|
1886
|
+
* fallback, caching) force the gateway path and disable resume — with a loud
|
|
1887
|
+
* warning if resume was merely defaulted, or a thrown error if it was explicitly
|
|
1888
|
+
* requested.
|
|
1889
|
+
*/
|
|
1890
|
+
function selectTransport(opts, resumeExplicitlyTrue, runCatalog = true, gatewayAvailable = true) {
|
|
1891
|
+
const warnings = [];
|
|
1892
|
+
const wantsServerFallback = opts.fallback?.mode === "server";
|
|
1893
|
+
const wantsCaching = opts.cacheTtl !== void 0 || opts.skipCache === true;
|
|
1894
|
+
const gatewayOnly = wantsServerFallback || wantsCaching;
|
|
1895
|
+
const feature = wantsServerFallback ? "fallback.mode:\"server\"" : "caching (cacheTtl/skipCache)";
|
|
1896
|
+
if (runCatalog && !gatewayAvailable && (opts.transport === "gateway" || gatewayOnly)) throw new GatewayDelegateError("config", `${opts.transport === "gateway" ? "transport:\"gateway\"" : feature} is unavailable: this provider is on the unified run catalog but is not a native gateway provider, so it has no gateway path (no caching, server-side fallback, or transport:"gateway"). Use the default run path, or fallback.mode:"client".`);
|
|
1897
|
+
if (!runCatalog) {
|
|
1898
|
+
if (opts.transport === "run") throw new GatewayDelegateError("config", "transport:\"run\" is unavailable: this provider is not on the unified-billing run catalog, so it can only be reached through the gateway path (BYOK).");
|
|
1899
|
+
if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", "resume:true is unavailable: this provider is not on the resumable run catalog (cf-aig-run-id requires the unified-billing run path).");
|
|
1900
|
+
return {
|
|
1901
|
+
transport: "gateway",
|
|
1902
|
+
resumeEnabled: false,
|
|
1903
|
+
warnings
|
|
1904
|
+
};
|
|
1905
|
+
}
|
|
1906
|
+
if (opts.transport === "run" && gatewayOnly) throw new GatewayDelegateError("config", `transport:"run" cannot satisfy ${feature}: those features are only available on the gateway path. Use the gateway transport, or fallback.mode:"client".`);
|
|
1907
|
+
if (opts.transport === "gateway" && resumeExplicitlyTrue) throw new GatewayDelegateError("config", "transport:\"gateway\" cannot provide resume — cf-aig-run-id is only on the run path.");
|
|
1908
|
+
if (gatewayOnly) {
|
|
1909
|
+
if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", `resume:true conflicts with ${feature}: resume (cf-aig-run-id) is only on the run path, which does not support ${wantsServerFallback ? "server-side fallback" : "caching"}. Use fallback.mode:"client" to keep resume, or drop resume.`);
|
|
1910
|
+
warnings.push(`[workers-ai-provider] resume disabled: ${feature} requires the gateway path, which does not surface cf-aig-run-id. Use fallback.mode:"client" to keep resumable streaming.`);
|
|
1911
|
+
return {
|
|
1912
|
+
transport: "gateway",
|
|
1913
|
+
resumeEnabled: false,
|
|
1914
|
+
warnings
|
|
1915
|
+
};
|
|
1916
|
+
}
|
|
1917
|
+
const transport = opts.transport ?? "run";
|
|
1918
|
+
return {
|
|
1919
|
+
transport,
|
|
1920
|
+
resumeEnabled: transport === "run" && opts.resume !== false,
|
|
1921
|
+
warnings
|
|
1922
|
+
};
|
|
1923
|
+
}
|
|
1924
|
+
var GatewayDelegateError = class extends Error {
|
|
1925
|
+
constructor(kind, message, cause) {
|
|
1926
|
+
super(message);
|
|
1927
|
+
_defineProperty(this, "kind", void 0);
|
|
1928
|
+
_defineProperty(this, "cause", void 0);
|
|
1929
|
+
this.name = "GatewayDelegateError";
|
|
1930
|
+
this.kind = kind;
|
|
1931
|
+
this.cause = cause;
|
|
1932
|
+
}
|
|
1933
|
+
};
|
|
1934
|
+
const STRIP_HEADERS_BASE = new Set(["content-length", "host"]);
|
|
1935
|
+
function asText(body) {
|
|
1936
|
+
if (typeof body === "string") return body;
|
|
1937
|
+
if (body instanceof Uint8Array) return new TextDecoder().decode(body);
|
|
1938
|
+
if (body instanceof ArrayBuffer) return new TextDecoder().decode(body);
|
|
1939
|
+
return "{}";
|
|
1940
|
+
}
|
|
1941
|
+
function headersToObject(h) {
|
|
1942
|
+
const out = {};
|
|
1943
|
+
if (!h) return out;
|
|
1944
|
+
if (h instanceof Headers) for (const [k, v] of h) out[k] = v;
|
|
1945
|
+
else if (Array.isArray(h)) for (const [k, v] of h) out[k] = v;
|
|
1946
|
+
else Object.assign(out, h);
|
|
1947
|
+
return out;
|
|
1948
|
+
}
|
|
1949
|
+
function normalizeGateway(gateway) {
|
|
1950
|
+
if (!gateway) throw new GatewayDelegateError("config", "A gateway is required for the delegate (resume needs a gateway). Pass `gateway: \"<gateway-id>\"` to createGatewayDelegate or per call.");
|
|
1951
|
+
if (typeof gateway === "string") return {
|
|
1952
|
+
id: gateway,
|
|
1953
|
+
options: { id: gateway }
|
|
1954
|
+
};
|
|
1955
|
+
return {
|
|
1956
|
+
id: gateway.id,
|
|
1957
|
+
options: gateway
|
|
1958
|
+
};
|
|
1959
|
+
}
|
|
1960
|
+
/**
|
|
1961
|
+
* Create a gateway delegate. Returns a function that builds an AI SDK model for a
|
|
1962
|
+
* `"<provider>/<model>"` slug, dispatched through AI Gateway on the transport the
|
|
1963
|
+
* requested options imply.
|
|
1964
|
+
*/
|
|
1965
|
+
function createGatewayDelegate(config) {
|
|
1966
|
+
if (!config?.binding) throw new GatewayDelegateError("config", "createGatewayDelegate requires a `binding` (e.g. { binding: env.AI }).");
|
|
1967
|
+
if (!config.providers?.length) throw new GatewayDelegateError("config", "createGatewayDelegate requires at least one provider plugin, e.g. `providers: [openai]` from \"workers-ai-provider/openai\".");
|
|
1968
|
+
const plugins = /* @__PURE__ */ new Map();
|
|
1969
|
+
for (const p of config.providers) plugins.set(p.wireFormat, p);
|
|
1970
|
+
const defaultResume = config.resume ?? true;
|
|
1971
|
+
const buildOne = (slug, options) => {
|
|
1972
|
+
const parsed = parseSlug(slug);
|
|
1973
|
+
const info = resolveProvider(slug, parsed);
|
|
1974
|
+
const resumeExplicitlyTrue = options.resume === true;
|
|
1975
|
+
const effectiveOptions = {
|
|
1976
|
+
...options,
|
|
1977
|
+
resume: options.resume ?? defaultResume,
|
|
1978
|
+
onResumeExpired: options.onResumeExpired ?? config.onResumeExpired
|
|
1979
|
+
};
|
|
1980
|
+
const selection = selectTransport(effectiveOptions, resumeExplicitlyTrue, info.runCatalog, info.gatewayPath !== false);
|
|
1981
|
+
for (const w of selection.warnings) console.warn(w);
|
|
1982
|
+
const wire = selection.transport === "run" ? info.runWireFormat ?? "openai" : info.wireFormat;
|
|
1983
|
+
const plugin = plugins.get(wire);
|
|
1984
|
+
if (!plugin) throw new GatewayDelegateError("config", selection.transport === "run" ? `The run path for "${parsed.resolverKey}" (from slug "${slug}") returns "${wire}"-wire responses, so it needs the "${wire}" plugin. Install + pass it from "workers-ai-provider/${wire}". Registered: ${[...plugins.keys()].join(", ") || "<none>"}.` : `No provider plugin for wire format "${wire}" (needed by "${parsed.resolverKey}" on the gateway path from slug "${slug}"). Registered: ${[...plugins.keys()].join(", ") || "<none>"}. Install + pass the matching plugin from "workers-ai-provider/${wire}".`);
|
|
1985
|
+
const { id: gatewayId, options: gatewayOptions } = normalizeGateway(options.gateway ?? config.gateway);
|
|
1986
|
+
const fetchImpl = selection.transport === "run" ? makeRunFetch(config.binding, `${info.resolverKey}/${parsed.modelId}`, gatewayOptions, effectiveOptions, selection, options) : makeGatewayFetch(config.binding, info, gatewayId, gatewayOptions, effectiveOptions, selection, options);
|
|
1987
|
+
return {
|
|
1988
|
+
model: plugin.create({
|
|
1989
|
+
modelId: parsed.modelId,
|
|
1990
|
+
fetch: fetchImpl,
|
|
1991
|
+
...selection.transport === "gateway" && info.baseURL ? { baseURL: info.baseURL } : {}
|
|
1992
|
+
}),
|
|
1993
|
+
transport: selection.transport
|
|
1994
|
+
};
|
|
1995
|
+
};
|
|
1996
|
+
return (slug, options = {}) => {
|
|
1997
|
+
if (options.fallback?.mode === "client") {
|
|
1998
|
+
const { fallback, ...rest } = options;
|
|
1999
|
+
return createClientFallbackModel([slug, ...fallback.models].map((s) => {
|
|
2000
|
+
const { model, transport } = buildOne(s, rest);
|
|
2001
|
+
return {
|
|
2002
|
+
slug: s,
|
|
2003
|
+
model,
|
|
2004
|
+
transport
|
|
2005
|
+
};
|
|
2006
|
+
}));
|
|
2007
|
+
}
|
|
2008
|
+
return buildOne(slug, options).model;
|
|
2009
|
+
};
|
|
2010
|
+
}
|
|
2011
|
+
function fireDispatch(resp, selection, options) {
|
|
2012
|
+
if (!options.onDispatch) return;
|
|
2013
|
+
options.onDispatch({
|
|
2014
|
+
transport: selection.transport,
|
|
2015
|
+
resumeEnabled: selection.resumeEnabled,
|
|
2016
|
+
warnings: selection.warnings,
|
|
2017
|
+
status: resp.status,
|
|
2018
|
+
runId: resp.headers.get("cf-aig-run-id"),
|
|
2019
|
+
cfStep: resp.headers.get("cf-aig-step"),
|
|
2020
|
+
cacheStatus: resp.headers.get("cf-aig-cache-status"),
|
|
2021
|
+
logId: resp.headers.get("cf-aig-log-id")
|
|
2022
|
+
});
|
|
2023
|
+
}
|
|
2024
|
+
/** Merge call-level metadata over gateway-option metadata (call wins). */
|
|
2025
|
+
function mergeMetadata(base, override) {
|
|
2026
|
+
if (!base && !override) return void 0;
|
|
2027
|
+
return {
|
|
2028
|
+
...base,
|
|
2029
|
+
...override
|
|
2030
|
+
};
|
|
2031
|
+
}
|
|
2032
|
+
/** JSON-encode metadata for the `cf-aig-metadata` header (bigint → string). */
|
|
2033
|
+
function serializeMetadata(metadata) {
|
|
2034
|
+
return JSON.stringify(metadata, (_k, v) => typeof v === "bigint" ? v.toString() : v);
|
|
2035
|
+
}
|
|
2036
|
+
function makeRunFetch(binding, slug, gatewayOptions, opts, selection, callOptions) {
|
|
2037
|
+
return (async (_input, init) => {
|
|
2038
|
+
const body = JSON.parse(asText(init?.body));
|
|
2039
|
+
delete body.model;
|
|
2040
|
+
const mergedGateway = { ...gatewayOptions };
|
|
2041
|
+
const mergedMeta = mergeMetadata(gatewayOptions.metadata, opts.metadata);
|
|
2042
|
+
if (mergedMeta) mergedGateway.metadata = mergedMeta;
|
|
2043
|
+
if (opts.collectLog !== void 0) mergedGateway.collectLog = opts.collectLog;
|
|
2044
|
+
const runOptions = {
|
|
2045
|
+
gateway: mergedGateway,
|
|
2046
|
+
returnRawResponse: true,
|
|
2047
|
+
...opts.extraHeaders ? { extraHeaders: opts.extraHeaders } : {},
|
|
2048
|
+
...init?.signal ? { signal: init.signal } : {}
|
|
2049
|
+
};
|
|
2050
|
+
const resp = await binding.run(slug, body, runOptions);
|
|
2051
|
+
fireDispatch(resp, selection, callOptions);
|
|
2052
|
+
const runId = resp.headers.get("cf-aig-run-id");
|
|
2053
|
+
if (selection.resumeEnabled && runId && resp.body) {
|
|
2054
|
+
const resumable = createResumableStream({
|
|
2055
|
+
binding,
|
|
2056
|
+
gateway: gatewayOptions.id,
|
|
2057
|
+
runId,
|
|
2058
|
+
initial: resp.body,
|
|
2059
|
+
onResumeExpired: opts.onResumeExpired,
|
|
2060
|
+
...opts.onProgress ? { onProgress: opts.onProgress } : {}
|
|
2061
|
+
});
|
|
2062
|
+
return new Response(resumable, {
|
|
2063
|
+
status: resp.status,
|
|
2064
|
+
headers: resp.headers
|
|
2065
|
+
});
|
|
2066
|
+
}
|
|
2067
|
+
return resp;
|
|
2068
|
+
});
|
|
2069
|
+
}
|
|
2070
|
+
function makeGatewayFetch(binding, info, gatewayId, gatewayOptions, opts, selection, callOptions) {
|
|
2071
|
+
const strip = new Set(STRIP_HEADERS_BASE);
|
|
2072
|
+
if (!opts.byok) for (const h of info.authHeaders) strip.add(h.toLowerCase());
|
|
2073
|
+
return (async (input, init) => {
|
|
2074
|
+
const rawUrl = typeof input === "string" ? input : input.toString();
|
|
2075
|
+
const endpoint = info.transformEndpoint ? info.transformEndpoint(rawUrl) : new URL(rawUrl).pathname.replace(/^\//, "") + (new URL(rawUrl).search || "");
|
|
2076
|
+
const body = JSON.parse(asText(init?.body));
|
|
2077
|
+
const headers = {};
|
|
2078
|
+
for (const [k, v] of Object.entries(headersToObject(init?.headers))) if (!strip.has(k.toLowerCase())) headers[k] = v;
|
|
2079
|
+
if (opts.extraHeaders) Object.assign(headers, opts.extraHeaders);
|
|
2080
|
+
if (opts.cacheTtl !== void 0) headers["cf-aig-cache-ttl"] = String(opts.cacheTtl);
|
|
2081
|
+
if (opts.skipCache) headers["cf-aig-skip-cache"] = "true";
|
|
2082
|
+
const metadata = mergeMetadata(gatewayOptions.metadata, opts.metadata);
|
|
2083
|
+
if (metadata) headers["cf-aig-metadata"] = serializeMetadata(metadata);
|
|
2084
|
+
if (opts.collectLog !== void 0) headers["cf-aig-collect-log"] = String(opts.collectLog);
|
|
2085
|
+
const primary = {
|
|
2086
|
+
provider: info.gatewayProviderId,
|
|
2087
|
+
endpoint,
|
|
2088
|
+
headers,
|
|
2089
|
+
query: body
|
|
2090
|
+
};
|
|
2091
|
+
const entries = [primary];
|
|
2092
|
+
if (opts.fallback?.mode === "server") for (const fb of opts.fallback.models) {
|
|
2093
|
+
const fbParsed = parseSlug(fb);
|
|
2094
|
+
const fbInfo = resolveProvider(fb, fbParsed);
|
|
2095
|
+
if (fbInfo.gatewayProviderId !== info.gatewayProviderId) throw new GatewayDelegateError("config", `Cross-vendor server-side fallback (${info.gatewayProviderId} → ${fbInfo.gatewayProviderId}) is not supported yet. Use fallback.mode:"client", or same-vendor fallback models.`);
|
|
2096
|
+
entries.push({
|
|
2097
|
+
...primary,
|
|
2098
|
+
query: {
|
|
2099
|
+
...body,
|
|
2100
|
+
model: fbParsed.modelId
|
|
2101
|
+
}
|
|
2102
|
+
});
|
|
2103
|
+
}
|
|
2104
|
+
const gw = binding.gateway(gatewayId);
|
|
2105
|
+
const runOptions = {};
|
|
2106
|
+
if (init?.signal) runOptions.signal = init.signal;
|
|
2107
|
+
const resp = await gw.run(entries, runOptions);
|
|
2108
|
+
fireDispatch(resp, selection, callOptions);
|
|
2109
|
+
return resp;
|
|
2110
|
+
});
|
|
2111
|
+
}
|
|
2112
|
+
//#endregion
|
|
1508
2113
|
//#region src/index.ts
|
|
1509
2114
|
/**
|
|
2115
|
+
* The account-wide AI Gateway used for catalog routing when no `gateway` is
|
|
2116
|
+
* configured. Every Cloudflare account has a `"default"` gateway.
|
|
2117
|
+
*/
|
|
2118
|
+
const DEFAULT_GATEWAY_ID = "default";
|
|
2119
|
+
/**
|
|
1510
2120
|
* Create a Workers AI provider instance.
|
|
1511
2121
|
*/
|
|
1512
2122
|
function createWorkersAI(options) {
|
|
@@ -1528,6 +2138,26 @@ function createWorkersAI(options) {
|
|
|
1528
2138
|
provider: "workersai.chat",
|
|
1529
2139
|
isBinding
|
|
1530
2140
|
});
|
|
2141
|
+
let delegate;
|
|
2142
|
+
const getDelegate = (slug) => {
|
|
2143
|
+
if (!options.providers?.length) throw new Error(`"${slug}" looks like a third-party AI Gateway catalog model, but this Workers AI provider was not configured to route them. Pass provider plugins, e.g.:
|
|
2144
|
+
import { openai } from "workers-ai-provider/openai";
|
|
2145
|
+
createWorkersAI({ binding: env.AI, providers: [openai] });
|
|
2146
|
+
A gateway defaults to "default" but can be set via \`gateway\`. Otherwise use a Workers AI model id (e.g. "@cf/meta/llama-3.1-8b-instruct").`);
|
|
2147
|
+
delegate ?? (delegate = createGatewayDelegate({
|
|
2148
|
+
binding,
|
|
2149
|
+
gateway: options.gateway ?? { id: DEFAULT_GATEWAY_ID },
|
|
2150
|
+
providers: options.providers,
|
|
2151
|
+
resume: options.resume,
|
|
2152
|
+
onResumeExpired: options.onResumeExpired
|
|
2153
|
+
}));
|
|
2154
|
+
return delegate;
|
|
2155
|
+
};
|
|
2156
|
+
const isGatewaySlug = (id) => typeof id === "string" && !id.startsWith("@") && id.includes("/");
|
|
2157
|
+
const buildChat = (modelId, settings) => {
|
|
2158
|
+
if (isGatewaySlug(modelId)) return getDelegate(modelId)(modelId, settings);
|
|
2159
|
+
return createChatModel(modelId, settings);
|
|
2160
|
+
};
|
|
1531
2161
|
const createImageModel = (modelId, settings = {}) => new WorkersAIImageModel(modelId, settings, {
|
|
1532
2162
|
binding,
|
|
1533
2163
|
gateway: options.gateway,
|
|
@@ -1560,9 +2190,9 @@ function createWorkersAI(options) {
|
|
|
1560
2190
|
});
|
|
1561
2191
|
const provider = (modelId, settings) => {
|
|
1562
2192
|
if (new.target) throw new Error("The WorkersAI model function cannot be called with the new keyword.");
|
|
1563
|
-
return
|
|
2193
|
+
return buildChat(modelId, settings);
|
|
1564
2194
|
};
|
|
1565
|
-
provider.chat =
|
|
2195
|
+
provider.chat = buildChat;
|
|
1566
2196
|
provider.embedding = createEmbeddingModel;
|
|
1567
2197
|
provider.textEmbedding = createEmbeddingModel;
|
|
1568
2198
|
provider.textEmbeddingModel = createEmbeddingModel;
|
|
@@ -1608,6 +2238,6 @@ function createAutoRAG(options) {
|
|
|
1608
2238
|
return createAISearch(options, "autorag.chat");
|
|
1609
2239
|
}
|
|
1610
2240
|
//#endregion
|
|
1611
|
-
export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createWorkersAI };
|
|
2241
|
+
export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, GATEWAY_PROVIDERS, GatewayDelegateError, WorkersAIFallbackError, WorkersAIGatewayError, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createClientFallbackModel, createGatewayFetch, createGatewayProvider, createResumableStream, createWorkersAI, detectProviderByUrl, findProviderBySlug, parseSlug, selectTransport, wireableProviders };
|
|
1612
2242
|
|
|
1613
2243
|
//# sourceMappingURL=index.mjs.map
|