workers-ai-provider 3.1.13 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +183 -31
- package/dist/anthropic.d.mts +14 -0
- package/dist/anthropic.mjs +21 -0
- package/dist/anthropic.mjs.map +1 -0
- package/dist/gateway-delegate-BfaUTwDZ.d.mts +385 -0
- package/dist/gateway-provider-1USFWm7c.mjs +583 -0
- package/dist/gateway-provider-1USFWm7c.mjs.map +1 -0
- package/dist/gateway-provider.d.mts +80 -0
- package/dist/gateway-provider.mjs +2 -0
- package/dist/google.d.mts +14 -0
- package/dist/google.mjs +21 -0
- package/dist/google.mjs.map +1 -0
- package/dist/index.d.mts +64 -7
- package/dist/index.mjs +967 -327
- package/dist/index.mjs.map +1 -1
- package/dist/openai.d.mts +20 -0
- package/dist/openai.mjs +27 -0
- package/dist/openai.mjs.map +1 -0
- package/package.json +47 -6
- package/src/anthropic.ts +17 -0
- package/src/client-fallback.ts +70 -0
- package/src/convert-to-workersai-chat-messages.ts +33 -7
- package/src/errors.ts +216 -0
- package/src/gateway-delegate.ts +696 -0
- package/src/gateway-provider.ts +167 -0
- package/src/gateway-providers.ts +457 -0
- package/src/google.ts +19 -0
- package/src/index.ts +180 -9
- package/src/openai.ts +25 -0
- package/src/resumable-stream.ts +223 -0
- package/src/streaming.ts +103 -30
- package/src/utils.ts +206 -6
- package/src/workersai-chat-language-model.ts +87 -26
- package/src/workersai-chat-settings.ts +1 -1
- package/src/workersai-models.ts +11 -3
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,344 @@
|
|
|
1
|
+
import { a as findProviderBySlug, c as WorkersAIGatewayError, i as detectProviderByUrl, l as _defineProperty, n as createGatewayProvider, o as wireableProviders, r as GATEWAY_PROVIDERS, s as WorkersAIFallbackError, t as createGatewayFetch } from "./gateway-provider-1USFWm7c.mjs";
|
|
2
|
+
import { TooManyEmbeddingValuesForCallError, UnsupportedFunctionalityError } from "@ai-sdk/provider";
|
|
1
3
|
import { generateId } from "ai";
|
|
2
|
-
|
|
4
|
+
//#region src/utils.ts
|
|
5
|
+
/**
|
|
6
|
+
* Normalize messages before passing to the Workers AI binding.
|
|
7
|
+
*
|
|
8
|
+
* The binding has strict schema validation that differs from the OpenAI API:
|
|
9
|
+
* - `content` must not be null
|
|
10
|
+
*/
|
|
11
|
+
function normalizeMessagesForBinding(messages) {
|
|
12
|
+
return messages.map((msg) => {
|
|
13
|
+
const normalized = { ...msg };
|
|
14
|
+
if (normalized.content === null || normalized.content === void 0) normalized.content = "";
|
|
15
|
+
return normalized;
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Creates a run method that emulates the Cloudflare Workers AI binding,
|
|
20
|
+
* but uses the Cloudflare REST API under the hood.
|
|
21
|
+
*/
|
|
22
|
+
function createRun(config) {
|
|
23
|
+
const { accountId, apiKey } = config;
|
|
24
|
+
const fetchFn = config.fetch ?? globalThis.fetch;
|
|
25
|
+
return async function run(model, inputs, options) {
|
|
26
|
+
const { gateway, prefix: _prefix, extraHeaders, returnRawResponse, signal, ...passthroughOptions } = options || {};
|
|
27
|
+
const urlParams = new URLSearchParams();
|
|
28
|
+
for (const [key, value] of Object.entries(passthroughOptions)) {
|
|
29
|
+
if (value === void 0 || value === null) throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
|
|
30
|
+
try {
|
|
31
|
+
const valueStr = String(value);
|
|
32
|
+
if (!valueStr) continue;
|
|
33
|
+
urlParams.append(key, valueStr);
|
|
34
|
+
} catch {
|
|
35
|
+
throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
const queryString = urlParams.toString();
|
|
39
|
+
const modelPath = String(model).startsWith("run/") ? model : `run/${model}`;
|
|
40
|
+
const url = gateway?.id ? `https://gateway.ai.cloudflare.com/v1/${accountId}/${gateway.id}/workers-ai/${modelPath}${queryString ? `?${queryString}` : ""}` : `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/${modelPath}${queryString ? `?${queryString}` : ""}`;
|
|
41
|
+
const headers = {
|
|
42
|
+
Authorization: `Bearer ${apiKey}`,
|
|
43
|
+
"Content-Type": "application/json",
|
|
44
|
+
...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {}
|
|
45
|
+
};
|
|
46
|
+
if (gateway) {
|
|
47
|
+
if (gateway.skipCache) headers["cf-aig-skip-cache"] = "true";
|
|
48
|
+
if (typeof gateway.cacheTtl === "number") headers["cf-aig-cache-ttl"] = String(gateway.cacheTtl);
|
|
49
|
+
if (gateway.cacheKey) headers["cf-aig-cache-key"] = gateway.cacheKey;
|
|
50
|
+
if (gateway.metadata) headers["cf-aig-metadata"] = JSON.stringify(gateway.metadata);
|
|
51
|
+
}
|
|
52
|
+
const response = await fetchFn(url, {
|
|
53
|
+
body: JSON.stringify(inputs),
|
|
54
|
+
headers,
|
|
55
|
+
method: "POST",
|
|
56
|
+
signal
|
|
57
|
+
});
|
|
58
|
+
if (!response.ok && !returnRawResponse) {
|
|
59
|
+
let errorBody;
|
|
60
|
+
try {
|
|
61
|
+
errorBody = await response.text();
|
|
62
|
+
} catch {
|
|
63
|
+
errorBody = "<unable to read response body>";
|
|
64
|
+
}
|
|
65
|
+
throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
|
|
66
|
+
}
|
|
67
|
+
if (returnRawResponse) return response;
|
|
68
|
+
if (inputs.stream === true) {
|
|
69
|
+
const contentType = response.headers.get("content-type") || "";
|
|
70
|
+
if (contentType.includes("event-stream") && response.body) return response.body;
|
|
71
|
+
if (response.body && !contentType.includes("json")) return response.body;
|
|
72
|
+
const retryResponse = await fetchFn(url, {
|
|
73
|
+
body: JSON.stringify({
|
|
74
|
+
...inputs,
|
|
75
|
+
stream: false
|
|
76
|
+
}),
|
|
77
|
+
headers,
|
|
78
|
+
method: "POST",
|
|
79
|
+
signal
|
|
80
|
+
});
|
|
81
|
+
if (!retryResponse.ok) {
|
|
82
|
+
let errorBody;
|
|
83
|
+
try {
|
|
84
|
+
errorBody = await retryResponse.text();
|
|
85
|
+
} catch {
|
|
86
|
+
errorBody = "<unable to read response body>";
|
|
87
|
+
}
|
|
88
|
+
throw new Error(`Workers AI API error (${retryResponse.status} ${retryResponse.statusText}): ${errorBody}`);
|
|
89
|
+
}
|
|
90
|
+
return (await retryResponse.json()).result;
|
|
91
|
+
}
|
|
92
|
+
return (await response.json()).result;
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Make a binary REST API call to Workers AI.
|
|
97
|
+
*
|
|
98
|
+
* Some models (e.g. `@cf/deepgram/nova-3`) require raw audio bytes
|
|
99
|
+
* with an appropriate `Content-Type` header instead of JSON.
|
|
100
|
+
*
|
|
101
|
+
* @param config Credentials config
|
|
102
|
+
* @param model Workers AI model name
|
|
103
|
+
* @param audioBytes Raw audio bytes
|
|
104
|
+
* @param contentType MIME type (e.g. "audio/wav")
|
|
105
|
+
* @param signal Optional AbortSignal
|
|
106
|
+
* @returns The parsed JSON response body
|
|
107
|
+
*/
|
|
108
|
+
async function createRunBinary(config, model, audioBytes, contentType, signal) {
|
|
109
|
+
const url = `https://api.cloudflare.com/client/v4/accounts/${config.accountId}/ai/run/${model}`;
|
|
110
|
+
const response = await fetch(url, {
|
|
111
|
+
method: "POST",
|
|
112
|
+
headers: {
|
|
113
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
114
|
+
"Content-Type": contentType
|
|
115
|
+
},
|
|
116
|
+
body: audioBytes,
|
|
117
|
+
signal
|
|
118
|
+
});
|
|
119
|
+
if (!response.ok) {
|
|
120
|
+
let errorBody;
|
|
121
|
+
try {
|
|
122
|
+
errorBody = await response.text();
|
|
123
|
+
} catch {
|
|
124
|
+
errorBody = "<unable to read response body>";
|
|
125
|
+
}
|
|
126
|
+
throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
|
|
127
|
+
}
|
|
128
|
+
const data = await response.json();
|
|
129
|
+
return data.result ?? data;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Build the `response_format.json_schema` payload for native Workers AI models.
|
|
133
|
+
*
|
|
134
|
+
* Native Workers AI (`@cf/...`) expects `json_schema` to be a **bare** JSON
|
|
135
|
+
* Schema, NOT OpenAI's `{ name, schema, strict }` envelope. That envelope is
|
|
136
|
+
* only required by partner-model routes (e.g. `openai/...`), which never reach
|
|
137
|
+
* this code — they go through the gateway delegate and the real `@ai-sdk/*`
|
|
138
|
+
* providers, which build the envelope themselves. Wrapping the schema here would
|
|
139
|
+
* break native models, so we must keep the bare shape.
|
|
140
|
+
*
|
|
141
|
+
* The AI SDK's structured-output `name` / `description` (from
|
|
142
|
+
* `Output.object({ schema, name, description })` / `generateObject`) would
|
|
143
|
+
* otherwise be silently dropped on this path. We preserve them as the standard
|
|
144
|
+
* JSON Schema `title` (from `name`) and `description` keywords, which keeps the
|
|
145
|
+
* payload a valid bare schema while still passing the LLM guidance through.
|
|
146
|
+
*
|
|
147
|
+
* Existing schema-level `title` / `description` are never overwritten, empty
|
|
148
|
+
* strings are ignored, and the input schema object is never mutated.
|
|
149
|
+
*
|
|
150
|
+
* See https://github.com/cloudflare/ai/issues/559.
|
|
151
|
+
*/
|
|
152
|
+
function buildJsonSchemaPayload(schema, name, description) {
|
|
153
|
+
if (typeof schema !== "object" || schema === null || Array.isArray(schema)) return schema;
|
|
154
|
+
const record = schema;
|
|
155
|
+
const addTitle = !!name && record.title === void 0;
|
|
156
|
+
const addDescription = !!description && record.description === void 0;
|
|
157
|
+
if (!addTitle && !addDescription) return schema;
|
|
158
|
+
return {
|
|
159
|
+
...record,
|
|
160
|
+
...addTitle ? { title: name } : {},
|
|
161
|
+
...addDescription ? { description } : {}
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
function prepareToolsAndToolChoice(tools, toolChoice) {
|
|
165
|
+
if (tools == null) return {
|
|
166
|
+
tool_choice: void 0,
|
|
167
|
+
tools: void 0
|
|
168
|
+
};
|
|
169
|
+
const mappedTools = tools.map((tool) => ({
|
|
170
|
+
function: {
|
|
171
|
+
description: tool.type === "function" ? tool.description : void 0,
|
|
172
|
+
name: tool.name,
|
|
173
|
+
parameters: tool.type === "function" ? tool.inputSchema : void 0
|
|
174
|
+
},
|
|
175
|
+
type: "function"
|
|
176
|
+
}));
|
|
177
|
+
if (toolChoice == null) return {
|
|
178
|
+
tool_choice: void 0,
|
|
179
|
+
tools: mappedTools
|
|
180
|
+
};
|
|
181
|
+
const type = toolChoice.type;
|
|
182
|
+
switch (type) {
|
|
183
|
+
case "auto": return {
|
|
184
|
+
tool_choice: type,
|
|
185
|
+
tools: mappedTools
|
|
186
|
+
};
|
|
187
|
+
case "none": return {
|
|
188
|
+
tool_choice: type,
|
|
189
|
+
tools: mappedTools
|
|
190
|
+
};
|
|
191
|
+
case "required": return {
|
|
192
|
+
tool_choice: "required",
|
|
193
|
+
tools: mappedTools
|
|
194
|
+
};
|
|
195
|
+
case "tool": return {
|
|
196
|
+
tool_choice: {
|
|
197
|
+
type: "function",
|
|
198
|
+
function: { name: toolChoice.toolName }
|
|
199
|
+
},
|
|
200
|
+
tools: mappedTools
|
|
201
|
+
};
|
|
202
|
+
default: throw new Error(`Unsupported tool choice type: ${type}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
const TOOL_CALL_ID_MARKER = "::cf-wai-tool-call::";
|
|
206
|
+
function createAISDKToolCallId(toolCallId) {
|
|
207
|
+
return `${toolCallId || generateId()}${TOOL_CALL_ID_MARKER}${generateId()}`;
|
|
208
|
+
}
|
|
209
|
+
function toWorkersAIToolCallId(toolCallId) {
|
|
210
|
+
const markerIndex = toolCallId.lastIndexOf(TOOL_CALL_ID_MARKER);
|
|
211
|
+
if (markerIndex === -1) return toolCallId;
|
|
212
|
+
if (markerIndex + 20 >= toolCallId.length) return toolCallId;
|
|
213
|
+
return toolCallId.slice(0, markerIndex);
|
|
214
|
+
}
|
|
215
|
+
function processToolCall(toolCall) {
|
|
216
|
+
const fn = "function" in toolCall && typeof toolCall.function === "object" && toolCall.function ? toolCall.function : null;
|
|
217
|
+
if (fn?.name) return {
|
|
218
|
+
input: typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments || {}),
|
|
219
|
+
toolCallId: createAISDKToolCallId(toolCall.id),
|
|
220
|
+
type: "tool-call",
|
|
221
|
+
toolName: fn.name
|
|
222
|
+
};
|
|
223
|
+
const flat = toolCall;
|
|
224
|
+
return {
|
|
225
|
+
input: typeof flat.arguments === "string" ? flat.arguments : JSON.stringify(flat.arguments || {}),
|
|
226
|
+
toolCallId: createAISDKToolCallId(flat.id),
|
|
227
|
+
type: "tool-call",
|
|
228
|
+
toolName: flat.name
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
function processToolCalls(output) {
|
|
232
|
+
if (output.tool_calls && Array.isArray(output.tool_calls)) return output.tool_calls.map((toolCall) => processToolCall(toolCall));
|
|
233
|
+
const choices = output.choices;
|
|
234
|
+
if (choices?.[0]?.message?.tool_calls && Array.isArray(choices[0].message.tool_calls)) return choices[0].message.tool_calls.map((toolCall) => processToolCall(toolCall));
|
|
235
|
+
return [];
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Was a specific tool forced for this request?
|
|
239
|
+
*
|
|
240
|
+
* True for both `tool_choice: "required"` and the named-function form
|
|
241
|
+
* `{ type: "function", function: { name } }`.
|
|
242
|
+
*/
|
|
243
|
+
function isForcedToolChoice(toolChoice) {
|
|
244
|
+
if (toolChoice === "required") return true;
|
|
245
|
+
return typeof toolChoice === "object" && toolChoice !== null && toolChoice.type === "function";
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Parse tool calls that a model leaked as JSON text instead of structured
|
|
249
|
+
* `tool_calls`. Shared by the non-streaming salvage and the streaming buffer.
|
|
250
|
+
*
|
|
251
|
+
* Only JSON objects whose `name` is one of `knownToolNames` are recovered;
|
|
252
|
+
* everything else (prose, harmony channel/role leaks like `{"name":"analysis"}`,
|
|
253
|
+
* hallucinated names) is ignored to avoid fabricating bogus calls.
|
|
254
|
+
*/
|
|
255
|
+
function parseLeakedToolCalls(text, knownToolNames) {
|
|
256
|
+
let parsed;
|
|
257
|
+
try {
|
|
258
|
+
parsed = JSON.parse(text.trim());
|
|
259
|
+
} catch {
|
|
260
|
+
return [];
|
|
261
|
+
}
|
|
262
|
+
const candidates = Array.isArray(parsed) ? parsed : [parsed];
|
|
263
|
+
const salvaged = [];
|
|
264
|
+
for (const candidate of candidates) {
|
|
265
|
+
if (typeof candidate !== "object" || candidate === null) continue;
|
|
266
|
+
const obj = candidate;
|
|
267
|
+
const name = obj.name;
|
|
268
|
+
if (typeof name !== "string" || !knownToolNames.has(name)) continue;
|
|
269
|
+
let args;
|
|
270
|
+
if ("arguments" in obj) args = obj.arguments;
|
|
271
|
+
else if ("parameters" in obj) args = obj.parameters;
|
|
272
|
+
else {
|
|
273
|
+
const { name: _name, ...rest } = obj;
|
|
274
|
+
args = rest;
|
|
275
|
+
}
|
|
276
|
+
salvaged.push({
|
|
277
|
+
input: typeof args === "string" ? args : JSON.stringify(args ?? {}),
|
|
278
|
+
toolCallId: createAISDKToolCallId(void 0),
|
|
279
|
+
type: "tool-call",
|
|
280
|
+
toolName: name
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
return salvaged;
|
|
284
|
+
}
|
|
285
|
+
/** Collect the requested tool names from mapped tools. */
|
|
286
|
+
function getToolNames(tools) {
|
|
287
|
+
return new Set((tools ?? []).map((tool) => tool.function?.name).filter((name) => typeof name === "string"));
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Salvage a tool call that a model leaked into text content instead of the
|
|
291
|
+
* structured `tool_calls` field.
|
|
292
|
+
*
|
|
293
|
+
* Workers AI's gpt-oss models (harmony format) sometimes emit a forced tool
|
|
294
|
+
* call as raw JSON in `message.content` with an empty `tool_calls` array and
|
|
295
|
+
* `finish_reason: "stop"` — typically when the forced tool is a poor fit for
|
|
296
|
+
* the conversation. The content looks like one of:
|
|
297
|
+
*
|
|
298
|
+
* {"name":"read_skill_resource","path":"feedback.txt"} (flat args)
|
|
299
|
+
* {"name":"calc","arguments":{"a":1}} (wrapped args)
|
|
300
|
+
* [{"name":"calc","parameters":{"a":1}}] (array form)
|
|
301
|
+
*
|
|
302
|
+
* This reinterprets that text as a structured tool call. It is intentionally
|
|
303
|
+
* narrow to avoid false positives:
|
|
304
|
+
* - only runs when a tool was *forced* (required / named-function), so a
|
|
305
|
+
* tool call was explicitly demanded by the caller;
|
|
306
|
+
* - only runs when there are no real structured tool calls to override;
|
|
307
|
+
* - only matches JSON objects whose `name` is one of the requested tools.
|
|
308
|
+
*
|
|
309
|
+
* Returns the salvaged tool calls, or `null` when nothing was salvaged.
|
|
310
|
+
*
|
|
311
|
+
* See https://github.com/cloudflare/ai/issues/560.
|
|
312
|
+
*/
|
|
313
|
+
function salvageToolCallsFromText(output, context) {
|
|
314
|
+
if (!isForcedToolChoice(context.toolChoice)) return null;
|
|
315
|
+
if (processToolCalls(output).length > 0) return null;
|
|
316
|
+
const knownToolNames = getToolNames(context.tools);
|
|
317
|
+
if (knownToolNames.size === 0) return null;
|
|
318
|
+
const text = processText(output);
|
|
319
|
+
if (!text) return null;
|
|
320
|
+
const salvaged = parseLeakedToolCalls(text, knownToolNames);
|
|
321
|
+
return salvaged.length > 0 ? salvaged : null;
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Extract text from a Workers AI response, handling multiple response formats:
|
|
325
|
+
* - OpenAI format: { choices: [{ message: { content: "..." } }] }
|
|
326
|
+
* - Native format: { response: "..." }
|
|
327
|
+
* - Structured output quirk: { response: { ... } } (object instead of string)
|
|
328
|
+
* - Structured output quirk: { response: "{ ... }" } (JSON string)
|
|
329
|
+
*/
|
|
330
|
+
function processText(output) {
|
|
331
|
+
const choiceContent = output.choices?.[0]?.message?.content;
|
|
332
|
+
if (choiceContent != null && String(choiceContent).length > 0) return String(choiceContent);
|
|
333
|
+
if ("response" in output) {
|
|
334
|
+
const response = output.response;
|
|
335
|
+
if (typeof response === "object" && response !== null) return JSON.stringify(response);
|
|
336
|
+
if (typeof response === "number") return String(response);
|
|
337
|
+
if (response === null || response === void 0) return;
|
|
338
|
+
return String(response);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
//#endregion
|
|
3
342
|
//#region src/convert-to-workersai-chat-messages.ts
|
|
4
343
|
/**
|
|
5
344
|
* Normalise any LanguageModelV3DataContent value to a Uint8Array.
|
|
@@ -25,6 +364,17 @@ function toUint8Array$2(data) {
|
|
|
25
364
|
if (data instanceof URL) throw new Error("URL image sources are not supported by Workers AI. Provide image data as a Uint8Array or base64 string instead.");
|
|
26
365
|
return null;
|
|
27
366
|
}
|
|
367
|
+
function assertImageMediaType(mediaType) {
|
|
368
|
+
if (!mediaType) throw new UnsupportedFunctionalityError({
|
|
369
|
+
functionality: "file-part-without-media-type",
|
|
370
|
+
message: "Workers AI chat only supports image file parts with an image/* mediaType. Received a file part without a mediaType."
|
|
371
|
+
});
|
|
372
|
+
if (!mediaType.toLowerCase().startsWith("image/")) throw new UnsupportedFunctionalityError({
|
|
373
|
+
functionality: "non-image-file-part",
|
|
374
|
+
message: `Workers AI chat only supports image file parts with an image/* mediaType. Received mediaType "${mediaType}".`
|
|
375
|
+
});
|
|
376
|
+
return mediaType;
|
|
377
|
+
}
|
|
28
378
|
function uint8ArrayToBase64$1(bytes) {
|
|
29
379
|
let binary = "";
|
|
30
380
|
const chunkSize = 8192;
|
|
@@ -51,10 +401,11 @@ function convertToWorkersAIChatMessages(prompt) {
|
|
|
51
401
|
textParts.push(part.text);
|
|
52
402
|
break;
|
|
53
403
|
case "file": {
|
|
404
|
+
const mediaType = assertImageMediaType(part.mediaType);
|
|
54
405
|
const imageBytes = toUint8Array$2(part.data);
|
|
55
406
|
if (imageBytes) imageParts.push({
|
|
56
407
|
image: imageBytes,
|
|
57
|
-
mediaType
|
|
408
|
+
mediaType
|
|
58
409
|
});
|
|
59
410
|
break;
|
|
60
411
|
}
|
|
@@ -67,10 +418,9 @@ function convertToWorkersAIChatMessages(prompt) {
|
|
|
67
418
|
});
|
|
68
419
|
for (const img of imageParts) {
|
|
69
420
|
const base64 = uint8ArrayToBase64$1(img.image);
|
|
70
|
-
const mediaType = img.mediaType || "image/png";
|
|
71
421
|
contentArray.push({
|
|
72
422
|
type: "image_url",
|
|
73
|
-
image_url: { url: `data:${mediaType};base64,${base64}` }
|
|
423
|
+
image_url: { url: `data:${img.mediaType};base64,${base64}` }
|
|
74
424
|
});
|
|
75
425
|
}
|
|
76
426
|
messages.push({
|
|
@@ -101,7 +451,7 @@ function convertToWorkersAIChatMessages(prompt) {
|
|
|
101
451
|
arguments: JSON.stringify(part.input),
|
|
102
452
|
name: part.toolName
|
|
103
453
|
},
|
|
104
|
-
id: part.toolCallId,
|
|
454
|
+
id: toWorkersAIToolCallId(part.toolCallId),
|
|
105
455
|
type: "function"
|
|
106
456
|
});
|
|
107
457
|
break;
|
|
@@ -149,7 +499,7 @@ function convertToWorkersAIChatMessages(prompt) {
|
|
|
149
499
|
messages.push({
|
|
150
500
|
content,
|
|
151
501
|
name: toolResponse.toolName,
|
|
152
|
-
tool_call_id: toolResponse.toolCallId,
|
|
502
|
+
tool_call_id: toWorkersAIToolCallId(toolResponse.toolCallId),
|
|
153
503
|
role: "tool"
|
|
154
504
|
});
|
|
155
505
|
}
|
|
@@ -288,9 +638,13 @@ function isNullFinalizationChunk(tc) {
|
|
|
288
638
|
* 1. Native format: { response: "chunk", tool_calls: [...] }
|
|
289
639
|
* 2. OpenAI format: { choices: [{ delta: { content: "chunk" } }] }
|
|
290
640
|
*/
|
|
291
|
-
function getMappedStream(response) {
|
|
641
|
+
function getMappedStream(response, salvageContext) {
|
|
292
642
|
const rawStream = response instanceof ReadableStream ? response : response.body;
|
|
293
643
|
if (!rawStream) throw new Error("No readable stream available for SSE parsing.");
|
|
644
|
+
const knownToolNames = getToolNames(salvageContext?.tools);
|
|
645
|
+
const bufferContentForSalvage = isForcedToolChoice(salvageContext?.toolChoice) && knownToolNames.size > 0;
|
|
646
|
+
let contentBuffer = "";
|
|
647
|
+
let anyToolCallStarted = false;
|
|
294
648
|
let usage = {
|
|
295
649
|
outputTokens: {
|
|
296
650
|
total: 0,
|
|
@@ -336,7 +690,8 @@ function getMappedStream(response) {
|
|
|
336
690
|
const nativeResponse = chunk.response;
|
|
337
691
|
if (nativeResponse != null && nativeResponse !== "") {
|
|
338
692
|
const responseText = String(nativeResponse);
|
|
339
|
-
if (responseText.length > 0)
|
|
693
|
+
if (responseText.length > 0) if (bufferContentForSalvage) contentBuffer += responseText;
|
|
694
|
+
else {
|
|
340
695
|
if (reasoningId) {
|
|
341
696
|
controller.enqueue({
|
|
342
697
|
type: "reasoning-end",
|
|
@@ -386,7 +741,8 @@ function getMappedStream(response) {
|
|
|
386
741
|
});
|
|
387
742
|
}
|
|
388
743
|
const textDelta = delta.content;
|
|
389
|
-
if (textDelta && textDelta.length > 0)
|
|
744
|
+
if (textDelta && textDelta.length > 0) if (bufferContentForSalvage) contentBuffer += textDelta;
|
|
745
|
+
else {
|
|
390
746
|
if (reasoningId) {
|
|
391
747
|
controller.enqueue({
|
|
392
748
|
type: "reasoning-end",
|
|
@@ -429,11 +785,69 @@ function getMappedStream(response) {
|
|
|
429
785
|
type: "reasoning-end",
|
|
430
786
|
id: reasoningId
|
|
431
787
|
});
|
|
788
|
+
let salvagedToolCalls = false;
|
|
789
|
+
if (bufferContentForSalvage && !anyToolCallStarted && contentBuffer.trim()) {
|
|
790
|
+
const salvaged = parseLeakedToolCalls(contentBuffer, knownToolNames);
|
|
791
|
+
if (salvaged.length > 0) {
|
|
792
|
+
for (const call of salvaged) {
|
|
793
|
+
controller.enqueue({
|
|
794
|
+
type: "tool-input-start",
|
|
795
|
+
id: call.toolCallId,
|
|
796
|
+
toolName: call.toolName
|
|
797
|
+
});
|
|
798
|
+
controller.enqueue({
|
|
799
|
+
type: "tool-input-delta",
|
|
800
|
+
id: call.toolCallId,
|
|
801
|
+
delta: call.input
|
|
802
|
+
});
|
|
803
|
+
controller.enqueue({
|
|
804
|
+
type: "tool-input-end",
|
|
805
|
+
id: call.toolCallId
|
|
806
|
+
});
|
|
807
|
+
controller.enqueue(call);
|
|
808
|
+
}
|
|
809
|
+
salvagedToolCalls = true;
|
|
810
|
+
console.warn(`[workers-ai-provider] Recovered ${salvaged.length} forced tool call(s) that the model streamed as text content instead of structured tool calls.`);
|
|
811
|
+
} else {
|
|
812
|
+
const id = generateId();
|
|
813
|
+
controller.enqueue({
|
|
814
|
+
type: "text-start",
|
|
815
|
+
id
|
|
816
|
+
});
|
|
817
|
+
controller.enqueue({
|
|
818
|
+
type: "text-delta",
|
|
819
|
+
id,
|
|
820
|
+
delta: contentBuffer
|
|
821
|
+
});
|
|
822
|
+
controller.enqueue({
|
|
823
|
+
type: "text-end",
|
|
824
|
+
id
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
} else if (bufferContentForSalvage && contentBuffer.trim()) {
|
|
828
|
+
const id = generateId();
|
|
829
|
+
controller.enqueue({
|
|
830
|
+
type: "text-start",
|
|
831
|
+
id
|
|
832
|
+
});
|
|
833
|
+
controller.enqueue({
|
|
834
|
+
type: "text-delta",
|
|
835
|
+
id,
|
|
836
|
+
delta: contentBuffer
|
|
837
|
+
});
|
|
838
|
+
controller.enqueue({
|
|
839
|
+
type: "text-end",
|
|
840
|
+
id
|
|
841
|
+
});
|
|
842
|
+
}
|
|
432
843
|
if (textId) controller.enqueue({
|
|
433
844
|
type: "text-end",
|
|
434
845
|
id: textId
|
|
435
846
|
});
|
|
436
|
-
const effectiveFinishReason =
|
|
847
|
+
const effectiveFinishReason = salvagedToolCalls ? {
|
|
848
|
+
unified: "tool-calls",
|
|
849
|
+
raw: "stop"
|
|
850
|
+
} : !receivedDone && receivedAnyData && !finishReason ? {
|
|
437
851
|
unified: "error",
|
|
438
852
|
raw: "stream-truncated"
|
|
439
853
|
} : finishReason ?? {
|
|
@@ -492,7 +906,7 @@ function getMappedStream(response) {
|
|
|
492
906
|
const tcId = tc.id;
|
|
493
907
|
if (!activeToolCalls.has(tcIndex)) {
|
|
494
908
|
if (lastActiveToolIndex != null && lastActiveToolIndex !== tcIndex) closeToolCall(lastActiveToolIndex, controller);
|
|
495
|
-
const id = tcId
|
|
909
|
+
const id = createAISDKToolCallId(tcId);
|
|
496
910
|
const toolName = tcName || "";
|
|
497
911
|
activeToolCalls.set(tcIndex, {
|
|
498
912
|
id,
|
|
@@ -500,6 +914,7 @@ function getMappedStream(response) {
|
|
|
500
914
|
args: ""
|
|
501
915
|
});
|
|
502
916
|
lastActiveToolIndex = tcIndex;
|
|
917
|
+
anyToolCallStarted = true;
|
|
503
918
|
controller.enqueue({
|
|
504
919
|
type: "tool-input-start",
|
|
505
920
|
id,
|
|
@@ -508,303 +923,59 @@ function getMappedStream(response) {
|
|
|
508
923
|
if (tcArgs != null && tcArgs !== "") {
|
|
509
924
|
const delta = typeof tcArgs === "string" ? tcArgs : JSON.stringify(tcArgs);
|
|
510
925
|
activeToolCalls.get(tcIndex).args += delta;
|
|
511
|
-
controller.enqueue({
|
|
512
|
-
type: "tool-input-delta",
|
|
513
|
-
id,
|
|
514
|
-
delta
|
|
515
|
-
});
|
|
516
|
-
}
|
|
517
|
-
} else {
|
|
518
|
-
const active = activeToolCalls.get(tcIndex);
|
|
519
|
-
lastActiveToolIndex = tcIndex;
|
|
520
|
-
if (tcArgs != null && tcArgs !== "") {
|
|
521
|
-
const delta = typeof tcArgs === "string" ? tcArgs : JSON.stringify(tcArgs);
|
|
522
|
-
active.args += delta;
|
|
523
|
-
controller.enqueue({
|
|
524
|
-
type: "tool-input-delta",
|
|
525
|
-
id: active.id,
|
|
526
|
-
delta
|
|
527
|
-
});
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
/**
|
|
534
|
-
* TransformStream that decodes a raw byte stream into SSE `data:` payloads.
|
|
535
|
-
* Each output chunk is the string content after "data: " (one per SSE event).
|
|
536
|
-
* Handles line buffering for partial chunks.
|
|
537
|
-
*/
|
|
538
|
-
var SSEDecoder = class extends TransformStream {
|
|
539
|
-
constructor() {
|
|
540
|
-
let buffer = "";
|
|
541
|
-
const decoder = new TextDecoder();
|
|
542
|
-
super({
|
|
543
|
-
transform(chunk, controller) {
|
|
544
|
-
buffer += decoder.decode(chunk, { stream: true });
|
|
545
|
-
const lines = buffer.split("\n");
|
|
546
|
-
buffer = lines.pop() || "";
|
|
547
|
-
for (const line of lines) {
|
|
548
|
-
const trimmed = line.trim();
|
|
549
|
-
if (!trimmed) continue;
|
|
550
|
-
if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
|
|
551
|
-
else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
|
|
552
|
-
}
|
|
553
|
-
},
|
|
554
|
-
flush(controller) {
|
|
555
|
-
if (buffer.trim()) {
|
|
556
|
-
const trimmed = buffer.trim();
|
|
557
|
-
if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
|
|
558
|
-
else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
|
|
559
|
-
}
|
|
560
|
-
}
|
|
561
|
-
});
|
|
562
|
-
}
|
|
563
|
-
};
|
|
564
|
-
//#endregion
|
|
565
|
-
//#region src/utils.ts
|
|
566
|
-
/**
|
|
567
|
-
* Normalize messages before passing to the Workers AI binding.
|
|
568
|
-
*
|
|
569
|
-
* The binding has strict schema validation that differs from the OpenAI API:
|
|
570
|
-
* - `content` must not be null
|
|
571
|
-
*/
|
|
572
|
-
function normalizeMessagesForBinding(messages) {
|
|
573
|
-
return messages.map((msg) => {
|
|
574
|
-
const normalized = { ...msg };
|
|
575
|
-
if (normalized.content === null || normalized.content === void 0) normalized.content = "";
|
|
576
|
-
return normalized;
|
|
577
|
-
});
|
|
578
|
-
}
|
|
579
|
-
/**
|
|
580
|
-
* Creates a run method that emulates the Cloudflare Workers AI binding,
|
|
581
|
-
* but uses the Cloudflare REST API under the hood.
|
|
582
|
-
*/
|
|
583
|
-
function createRun(config) {
|
|
584
|
-
const { accountId, apiKey } = config;
|
|
585
|
-
const fetchFn = config.fetch ?? globalThis.fetch;
|
|
586
|
-
return async function run(model, inputs, options) {
|
|
587
|
-
const { gateway, prefix: _prefix, extraHeaders, returnRawResponse, signal, ...passthroughOptions } = options || {};
|
|
588
|
-
const urlParams = new URLSearchParams();
|
|
589
|
-
for (const [key, value] of Object.entries(passthroughOptions)) {
|
|
590
|
-
if (value === void 0 || value === null) throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
|
|
591
|
-
try {
|
|
592
|
-
const valueStr = String(value);
|
|
593
|
-
if (!valueStr) continue;
|
|
594
|
-
urlParams.append(key, valueStr);
|
|
595
|
-
} catch {
|
|
596
|
-
throw new Error(`Value for option '${key}' is not able to be coerced into a string.`);
|
|
597
|
-
}
|
|
598
|
-
}
|
|
599
|
-
const queryString = urlParams.toString();
|
|
600
|
-
const modelPath = String(model).startsWith("run/") ? model : `run/${model}`;
|
|
601
|
-
const url = gateway?.id ? `https://gateway.ai.cloudflare.com/v1/${accountId}/${gateway.id}/workers-ai/${modelPath}${queryString ? `?${queryString}` : ""}` : `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/${modelPath}${queryString ? `?${queryString}` : ""}`;
|
|
602
|
-
const headers = {
|
|
603
|
-
Authorization: `Bearer ${apiKey}`,
|
|
604
|
-
"Content-Type": "application/json",
|
|
605
|
-
...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {}
|
|
606
|
-
};
|
|
607
|
-
if (gateway) {
|
|
608
|
-
if (gateway.skipCache) headers["cf-aig-skip-cache"] = "true";
|
|
609
|
-
if (typeof gateway.cacheTtl === "number") headers["cf-aig-cache-ttl"] = String(gateway.cacheTtl);
|
|
610
|
-
if (gateway.cacheKey) headers["cf-aig-cache-key"] = gateway.cacheKey;
|
|
611
|
-
if (gateway.metadata) headers["cf-aig-metadata"] = JSON.stringify(gateway.metadata);
|
|
612
|
-
}
|
|
613
|
-
const response = await fetchFn(url, {
|
|
614
|
-
body: JSON.stringify(inputs),
|
|
615
|
-
headers,
|
|
616
|
-
method: "POST",
|
|
617
|
-
signal
|
|
618
|
-
});
|
|
619
|
-
if (!response.ok && !returnRawResponse) {
|
|
620
|
-
let errorBody;
|
|
621
|
-
try {
|
|
622
|
-
errorBody = await response.text();
|
|
623
|
-
} catch {
|
|
624
|
-
errorBody = "<unable to read response body>";
|
|
625
|
-
}
|
|
626
|
-
throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
|
|
627
|
-
}
|
|
628
|
-
if (returnRawResponse) return response;
|
|
629
|
-
if (inputs.stream === true) {
|
|
630
|
-
const contentType = response.headers.get("content-type") || "";
|
|
631
|
-
if (contentType.includes("event-stream") && response.body) return response.body;
|
|
632
|
-
if (response.body && !contentType.includes("json")) return response.body;
|
|
633
|
-
const retryResponse = await fetchFn(url, {
|
|
634
|
-
body: JSON.stringify({
|
|
635
|
-
...inputs,
|
|
636
|
-
stream: false
|
|
637
|
-
}),
|
|
638
|
-
headers,
|
|
639
|
-
method: "POST",
|
|
640
|
-
signal
|
|
641
|
-
});
|
|
642
|
-
if (!retryResponse.ok) {
|
|
643
|
-
let errorBody;
|
|
644
|
-
try {
|
|
645
|
-
errorBody = await retryResponse.text();
|
|
646
|
-
} catch {
|
|
647
|
-
errorBody = "<unable to read response body>";
|
|
648
|
-
}
|
|
649
|
-
throw new Error(`Workers AI API error (${retryResponse.status} ${retryResponse.statusText}): ${errorBody}`);
|
|
650
|
-
}
|
|
651
|
-
return (await retryResponse.json()).result;
|
|
652
|
-
}
|
|
653
|
-
return (await response.json()).result;
|
|
654
|
-
};
|
|
655
|
-
}
|
|
656
|
-
/**
|
|
657
|
-
* Make a binary REST API call to Workers AI.
|
|
658
|
-
*
|
|
659
|
-
* Some models (e.g. `@cf/deepgram/nova-3`) require raw audio bytes
|
|
660
|
-
* with an appropriate `Content-Type` header instead of JSON.
|
|
661
|
-
*
|
|
662
|
-
* @param config Credentials config
|
|
663
|
-
* @param model Workers AI model name
|
|
664
|
-
* @param audioBytes Raw audio bytes
|
|
665
|
-
* @param contentType MIME type (e.g. "audio/wav")
|
|
666
|
-
* @param signal Optional AbortSignal
|
|
667
|
-
* @returns The parsed JSON response body
|
|
668
|
-
*/
|
|
669
|
-
async function createRunBinary(config, model, audioBytes, contentType, signal) {
|
|
670
|
-
const url = `https://api.cloudflare.com/client/v4/accounts/${config.accountId}/ai/run/${model}`;
|
|
671
|
-
const response = await fetch(url, {
|
|
672
|
-
method: "POST",
|
|
673
|
-
headers: {
|
|
674
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
675
|
-
"Content-Type": contentType
|
|
676
|
-
},
|
|
677
|
-
body: audioBytes,
|
|
678
|
-
signal
|
|
679
|
-
});
|
|
680
|
-
if (!response.ok) {
|
|
681
|
-
let errorBody;
|
|
682
|
-
try {
|
|
683
|
-
errorBody = await response.text();
|
|
684
|
-
} catch {
|
|
685
|
-
errorBody = "<unable to read response body>";
|
|
926
|
+
controller.enqueue({
|
|
927
|
+
type: "tool-input-delta",
|
|
928
|
+
id,
|
|
929
|
+
delta
|
|
930
|
+
});
|
|
931
|
+
}
|
|
932
|
+
} else {
|
|
933
|
+
const active = activeToolCalls.get(tcIndex);
|
|
934
|
+
lastActiveToolIndex = tcIndex;
|
|
935
|
+
if (tcArgs != null && tcArgs !== "") {
|
|
936
|
+
const delta = typeof tcArgs === "string" ? tcArgs : JSON.stringify(tcArgs);
|
|
937
|
+
active.args += delta;
|
|
938
|
+
controller.enqueue({
|
|
939
|
+
type: "tool-input-delta",
|
|
940
|
+
id: active.id,
|
|
941
|
+
delta
|
|
942
|
+
});
|
|
943
|
+
}
|
|
944
|
+
}
|
|
686
945
|
}
|
|
687
|
-
throw new Error(`Workers AI API error (${response.status} ${response.statusText}): ${errorBody}`);
|
|
688
|
-
}
|
|
689
|
-
const data = await response.json();
|
|
690
|
-
return data.result ?? data;
|
|
691
|
-
}
|
|
692
|
-
function prepareToolsAndToolChoice(tools, toolChoice) {
|
|
693
|
-
if (tools == null) return {
|
|
694
|
-
tool_choice: void 0,
|
|
695
|
-
tools: void 0
|
|
696
|
-
};
|
|
697
|
-
const mappedTools = tools.map((tool) => ({
|
|
698
|
-
function: {
|
|
699
|
-
description: tool.type === "function" ? tool.description : void 0,
|
|
700
|
-
name: tool.name,
|
|
701
|
-
parameters: tool.type === "function" ? tool.inputSchema : void 0
|
|
702
|
-
},
|
|
703
|
-
type: "function"
|
|
704
|
-
}));
|
|
705
|
-
if (toolChoice == null) return {
|
|
706
|
-
tool_choice: void 0,
|
|
707
|
-
tools: mappedTools
|
|
708
|
-
};
|
|
709
|
-
const type = toolChoice.type;
|
|
710
|
-
switch (type) {
|
|
711
|
-
case "auto": return {
|
|
712
|
-
tool_choice: type,
|
|
713
|
-
tools: mappedTools
|
|
714
|
-
};
|
|
715
|
-
case "none": return {
|
|
716
|
-
tool_choice: type,
|
|
717
|
-
tools: mappedTools
|
|
718
|
-
};
|
|
719
|
-
case "required": return {
|
|
720
|
-
tool_choice: "required",
|
|
721
|
-
tools: mappedTools
|
|
722
|
-
};
|
|
723
|
-
case "tool": return {
|
|
724
|
-
tool_choice: "required",
|
|
725
|
-
tools: mappedTools.filter((tool) => tool.function.name === toolChoice.toolName)
|
|
726
|
-
};
|
|
727
|
-
default: throw new Error(`Unsupported tool choice type: ${type}`);
|
|
728
946
|
}
|
|
729
947
|
}
|
|
730
|
-
function processToolCall(toolCall) {
|
|
731
|
-
const fn = "function" in toolCall && typeof toolCall.function === "object" && toolCall.function ? toolCall.function : null;
|
|
732
|
-
if (fn?.name) return {
|
|
733
|
-
input: typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments || {}),
|
|
734
|
-
toolCallId: toolCall.id || generateId(),
|
|
735
|
-
type: "tool-call",
|
|
736
|
-
toolName: fn.name
|
|
737
|
-
};
|
|
738
|
-
const flat = toolCall;
|
|
739
|
-
return {
|
|
740
|
-
input: typeof flat.arguments === "string" ? flat.arguments : JSON.stringify(flat.arguments || {}),
|
|
741
|
-
toolCallId: flat.id || generateId(),
|
|
742
|
-
type: "tool-call",
|
|
743
|
-
toolName: flat.name
|
|
744
|
-
};
|
|
745
|
-
}
|
|
746
|
-
function processToolCalls(output) {
|
|
747
|
-
if (output.tool_calls && Array.isArray(output.tool_calls)) return output.tool_calls.map((toolCall) => processToolCall(toolCall));
|
|
748
|
-
const choices = output.choices;
|
|
749
|
-
if (choices?.[0]?.message?.tool_calls && Array.isArray(choices[0].message.tool_calls)) return choices[0].message.tool_calls.map((toolCall) => processToolCall(toolCall));
|
|
750
|
-
return [];
|
|
751
|
-
}
|
|
752
948
|
/**
|
|
753
|
-
*
|
|
754
|
-
*
|
|
755
|
-
*
|
|
756
|
-
* - Structured output quirk: { response: { ... } } (object instead of string)
|
|
757
|
-
* - Structured output quirk: { response: "{ ... }" } (JSON string)
|
|
949
|
+
* TransformStream that decodes a raw byte stream into SSE `data:` payloads.
|
|
950
|
+
* Each output chunk is the string content after "data: " (one per SSE event).
|
|
951
|
+
* Handles line buffering for partial chunks.
|
|
758
952
|
*/
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
if ("object" != _typeof(t) || !t) return t;
|
|
784
|
-
var e = t[Symbol.toPrimitive];
|
|
785
|
-
if (void 0 !== e) {
|
|
786
|
-
var i = e.call(t, r || "default");
|
|
787
|
-
if ("object" != _typeof(i)) return i;
|
|
788
|
-
throw new TypeError("@@toPrimitive must return a primitive value.");
|
|
953
|
+
var SSEDecoder = class extends TransformStream {
|
|
954
|
+
constructor() {
|
|
955
|
+
let buffer = "";
|
|
956
|
+
const decoder = new TextDecoder();
|
|
957
|
+
super({
|
|
958
|
+
transform(chunk, controller) {
|
|
959
|
+
buffer += decoder.decode(chunk, { stream: true });
|
|
960
|
+
const lines = buffer.split("\n");
|
|
961
|
+
buffer = lines.pop() || "";
|
|
962
|
+
for (const line of lines) {
|
|
963
|
+
const trimmed = line.trim();
|
|
964
|
+
if (!trimmed) continue;
|
|
965
|
+
if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
|
|
966
|
+
else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
|
|
967
|
+
}
|
|
968
|
+
},
|
|
969
|
+
flush(controller) {
|
|
970
|
+
if (buffer.trim()) {
|
|
971
|
+
const trimmed = buffer.trim();
|
|
972
|
+
if (trimmed.startsWith("data: ")) controller.enqueue(trimmed.slice(6));
|
|
973
|
+
else if (trimmed.startsWith("data:")) controller.enqueue(trimmed.slice(5));
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
});
|
|
789
977
|
}
|
|
790
|
-
|
|
791
|
-
}
|
|
792
|
-
//#endregion
|
|
793
|
-
//#region \0@oxc-project+runtime@0.122.0/helpers/toPropertyKey.js
|
|
794
|
-
function toPropertyKey(t) {
|
|
795
|
-
var i = toPrimitive(t, "string");
|
|
796
|
-
return "symbol" == _typeof(i) ? i : i + "";
|
|
797
|
-
}
|
|
798
|
-
//#endregion
|
|
799
|
-
//#region \0@oxc-project+runtime@0.122.0/helpers/defineProperty.js
|
|
800
|
-
function _defineProperty(e, r, t) {
|
|
801
|
-
return (r = toPropertyKey(r)) in e ? Object.defineProperty(e, r, {
|
|
802
|
-
value: t,
|
|
803
|
-
enumerable: !0,
|
|
804
|
-
configurable: !0,
|
|
805
|
-
writable: !0
|
|
806
|
-
}) : e[r] = t, e;
|
|
807
|
-
}
|
|
978
|
+
};
|
|
808
979
|
//#endregion
|
|
809
980
|
//#region src/aisearch-chat-language-model.ts
|
|
810
981
|
var AISearchChatLanguageModel = class {
|
|
@@ -973,18 +1144,21 @@ var WorkersAIChatLanguageModel = class {
|
|
|
973
1144
|
},
|
|
974
1145
|
warnings
|
|
975
1146
|
};
|
|
976
|
-
case "json":
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
1147
|
+
case "json": {
|
|
1148
|
+
const json = responseFormat?.type === "json" ? responseFormat : void 0;
|
|
1149
|
+
return {
|
|
1150
|
+
args: {
|
|
1151
|
+
...baseArgs,
|
|
1152
|
+
response_format: {
|
|
1153
|
+
type: "json_schema",
|
|
1154
|
+
json_schema: buildJsonSchemaPayload(json?.schema, json?.name, json?.description)
|
|
1155
|
+
},
|
|
1156
|
+
tools: void 0,
|
|
1157
|
+
tool_choice: void 0
|
|
982
1158
|
},
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
warnings
|
|
987
|
-
};
|
|
1159
|
+
warnings
|
|
1160
|
+
};
|
|
1161
|
+
}
|
|
988
1162
|
default: throw new Error(`Unsupported type: ${type}`);
|
|
989
1163
|
}
|
|
990
1164
|
}
|
|
@@ -1043,6 +1217,38 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1043
1217
|
...passthroughOptions
|
|
1044
1218
|
};
|
|
1045
1219
|
}
|
|
1220
|
+
/**
|
|
1221
|
+
* Extract reasoning, text, and tool calls from a non-streaming response.
|
|
1222
|
+
*
|
|
1223
|
+
* Shared by `doGenerate` and `doStream`'s graceful-degradation branch (the
|
|
1224
|
+
* path gpt-oss falls through, since it doesn't support `/ai/run/` streaming
|
|
1225
|
+
* and is retried non-streaming). When a forced tool call was leaked into
|
|
1226
|
+
* text content (gpt-oss harmony quirk), it is salvaged into a structured
|
|
1227
|
+
* tool call and the leaked JSON text is suppressed. A warning is appended in
|
|
1228
|
+
* place so callers can observe the reinterpretation.
|
|
1229
|
+
*/
|
|
1230
|
+
extractContent(outputRecord, args, warnings) {
|
|
1231
|
+
const choices = outputRecord.choices;
|
|
1232
|
+
const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
|
|
1233
|
+
const toolCalls = processToolCalls(outputRecord);
|
|
1234
|
+
const salvaged = toolCalls.length === 0 ? salvageToolCallsFromText(outputRecord, {
|
|
1235
|
+
tools: args.tools,
|
|
1236
|
+
toolChoice: args.tool_choice
|
|
1237
|
+
}) : null;
|
|
1238
|
+
if (salvaged) warnings.push({
|
|
1239
|
+
type: "other",
|
|
1240
|
+
message: `Recovered ${salvaged.length} forced tool call(s) that the model emitted as text content instead of structured tool calls (model: ${this.modelId}).`
|
|
1241
|
+
});
|
|
1242
|
+
return {
|
|
1243
|
+
reasoningContent,
|
|
1244
|
+
text: salvaged ? "" : processText(outputRecord) ?? "",
|
|
1245
|
+
toolCalls: salvaged ?? toolCalls,
|
|
1246
|
+
finishReason: salvaged ? {
|
|
1247
|
+
unified: "tool-calls",
|
|
1248
|
+
raw: "stop"
|
|
1249
|
+
} : mapWorkersAIFinishReason(outputRecord)
|
|
1250
|
+
};
|
|
1251
|
+
}
|
|
1046
1252
|
async doGenerate(options) {
|
|
1047
1253
|
const { args, warnings } = this.getArgs(options);
|
|
1048
1254
|
const { messages } = convertToWorkersAIChatMessages(options.prompt);
|
|
@@ -1054,10 +1260,9 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1054
1260
|
});
|
|
1055
1261
|
if (output instanceof ReadableStream) throw new Error("Unexpected streaming response from non-streaming request. Check that `stream: true` was not passed.");
|
|
1056
1262
|
const outputRecord = output;
|
|
1057
|
-
const
|
|
1058
|
-
const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
|
|
1263
|
+
const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
|
|
1059
1264
|
return {
|
|
1060
|
-
finishReason
|
|
1265
|
+
finishReason,
|
|
1061
1266
|
content: [
|
|
1062
1267
|
...reasoningContent ? [{
|
|
1063
1268
|
type: "reasoning",
|
|
@@ -1065,9 +1270,9 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1065
1270
|
}] : [],
|
|
1066
1271
|
{
|
|
1067
1272
|
type: "text",
|
|
1068
|
-
text
|
|
1273
|
+
text
|
|
1069
1274
|
},
|
|
1070
|
-
...
|
|
1275
|
+
...toolCalls
|
|
1071
1276
|
],
|
|
1072
1277
|
usage: mapWorkersAIUsage(output),
|
|
1073
1278
|
warnings
|
|
@@ -1085,10 +1290,12 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1085
1290
|
...runOptions,
|
|
1086
1291
|
signal: options.abortSignal
|
|
1087
1292
|
});
|
|
1088
|
-
if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response
|
|
1293
|
+
if (response instanceof ReadableStream) return { stream: prependStreamStart(getMappedStream(response, {
|
|
1294
|
+
tools: args.tools,
|
|
1295
|
+
toolChoice: args.tool_choice
|
|
1296
|
+
}), warnings) };
|
|
1089
1297
|
const outputRecord = response;
|
|
1090
|
-
const
|
|
1091
|
-
const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
|
|
1298
|
+
const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(outputRecord, args, warnings);
|
|
1092
1299
|
let textId = null;
|
|
1093
1300
|
let reasoningId = null;
|
|
1094
1301
|
return { stream: new ReadableStream({ start(controller) {
|
|
@@ -1112,7 +1319,6 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1112
1319
|
id: reasoningId
|
|
1113
1320
|
});
|
|
1114
1321
|
}
|
|
1115
|
-
const text = processText(outputRecord);
|
|
1116
1322
|
if (text) {
|
|
1117
1323
|
textId = generateId();
|
|
1118
1324
|
controller.enqueue({
|
|
@@ -1129,10 +1335,10 @@ var WorkersAIChatLanguageModel = class {
|
|
|
1129
1335
|
id: textId
|
|
1130
1336
|
});
|
|
1131
1337
|
}
|
|
1132
|
-
for (const toolCall of
|
|
1338
|
+
for (const toolCall of toolCalls) controller.enqueue(toolCall);
|
|
1133
1339
|
controller.enqueue({
|
|
1134
1340
|
type: "finish",
|
|
1135
|
-
finishReason
|
|
1341
|
+
finishReason,
|
|
1136
1342
|
usage: mapWorkersAIUsage(response)
|
|
1137
1343
|
});
|
|
1138
1344
|
controller.close();
|
|
@@ -1495,8 +1701,422 @@ function documentsToContexts(documents, warnings) {
|
|
|
1495
1701
|
*/
|
|
1496
1702
|
var AutoRAGChatLanguageModel = class extends AISearchChatLanguageModel {};
|
|
1497
1703
|
//#endregion
|
|
1704
|
+
//#region src/client-fallback.ts
|
|
1705
|
+
/**
|
|
1706
|
+
* Wrap a chain of models so a failed *pre-stream* dispatch falls through to the
|
|
1707
|
+
* next model, preserving resume on each leg's own transport. If every leg fails,
|
|
1708
|
+
* throws a {@link WorkersAIFallbackError} carrying the full attempt tree.
|
|
1709
|
+
*
|
|
1710
|
+
* Fallback triggers on `doGenerate`/`doStream` rejection (the dispatch never
|
|
1711
|
+
* produced a stream). Errors that surface *mid-stream* — after content has
|
|
1712
|
+
* already been emitted — are not recoverable here and propagate as-is.
|
|
1713
|
+
*/
|
|
1714
|
+
function createClientFallbackModel(legs) {
|
|
1715
|
+
if (legs.length === 0) throw new Error("createClientFallbackModel requires at least one model leg.");
|
|
1716
|
+
const primary = legs[0].model;
|
|
1717
|
+
async function attempt(run) {
|
|
1718
|
+
const attempts = [];
|
|
1719
|
+
for (const leg of legs) try {
|
|
1720
|
+
const result = await run(leg.model);
|
|
1721
|
+
attempts.push({
|
|
1722
|
+
model: leg.slug,
|
|
1723
|
+
transport: leg.transport,
|
|
1724
|
+
ok: true
|
|
1725
|
+
});
|
|
1726
|
+
return result;
|
|
1727
|
+
} catch (e) {
|
|
1728
|
+
const err = WorkersAIGatewayError.fromUnknown(e);
|
|
1729
|
+
attempts.push({
|
|
1730
|
+
model: leg.slug,
|
|
1731
|
+
transport: leg.transport,
|
|
1732
|
+
ok: false,
|
|
1733
|
+
status: err.status,
|
|
1734
|
+
error: err
|
|
1735
|
+
});
|
|
1736
|
+
}
|
|
1737
|
+
throw new WorkersAIFallbackError(attempts);
|
|
1738
|
+
}
|
|
1739
|
+
return {
|
|
1740
|
+
specificationVersion: "v3",
|
|
1741
|
+
provider: primary.provider,
|
|
1742
|
+
modelId: primary.modelId,
|
|
1743
|
+
supportedUrls: primary.supportedUrls,
|
|
1744
|
+
doGenerate(options) {
|
|
1745
|
+
return attempt((m) => m.doGenerate(options));
|
|
1746
|
+
},
|
|
1747
|
+
doStream(options) {
|
|
1748
|
+
return attempt((m) => m.doStream(options));
|
|
1749
|
+
}
|
|
1750
|
+
};
|
|
1751
|
+
}
|
|
1752
|
+
//#endregion
|
|
1753
|
+
//#region src/resumable-stream.ts
|
|
1754
|
+
function concat(a, b) {
|
|
1755
|
+
const out = new Uint8Array(new ArrayBuffer(a.length + b.length));
|
|
1756
|
+
out.set(a, 0);
|
|
1757
|
+
out.set(b, a.length);
|
|
1758
|
+
return out;
|
|
1759
|
+
}
|
|
1760
|
+
/** Index just past the last `\n\n` in `buf`, or -1 if there is no complete event. */
|
|
1761
|
+
function lastEventBoundary(buf) {
|
|
1762
|
+
for (let i = buf.length - 2; i >= 0; i--) if (buf[i] === 10 && buf[i + 1] === 10) return i + 2;
|
|
1763
|
+
return -1;
|
|
1764
|
+
}
|
|
1765
|
+
/** Count of `\n\n` terminators (= complete SSE events) in `buf`. */
|
|
1766
|
+
function countEvents(buf) {
|
|
1767
|
+
let n = 0;
|
|
1768
|
+
for (let i = 0; i + 1 < buf.length; i++) if (buf[i] === 10 && buf[i + 1] === 10) {
|
|
1769
|
+
n++;
|
|
1770
|
+
i++;
|
|
1771
|
+
}
|
|
1772
|
+
return n;
|
|
1773
|
+
}
|
|
1774
|
+
function resumeUrl(gateway, runId, from) {
|
|
1775
|
+
return `https://workers-binding.ai/ai-gateway/gateways/${gateway}/run/${runId}/resume?from=${from}`;
|
|
1776
|
+
}
|
|
1777
|
+
function createResumableStream(options) {
|
|
1778
|
+
const { binding, gateway, runId } = options;
|
|
1779
|
+
const maxReconnects = options.maxReconnects ?? 5;
|
|
1780
|
+
const onExpired = options.onResumeExpired ?? "error";
|
|
1781
|
+
let emittedEvents = options.fromEvent ?? 0;
|
|
1782
|
+
let pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
|
|
1783
|
+
let reconnects = 0;
|
|
1784
|
+
async function fetchResume(controller) {
|
|
1785
|
+
let res;
|
|
1786
|
+
try {
|
|
1787
|
+
res = await binding.fetch(resumeUrl(gateway, runId, emittedEvents), { method: "GET" });
|
|
1788
|
+
} catch (fetchErr) {
|
|
1789
|
+
controller.error(new GatewayDelegateError("dispatch", `Resume request threw at event ${emittedEvents}.`, fetchErr));
|
|
1790
|
+
return null;
|
|
1791
|
+
}
|
|
1792
|
+
if (res.status === 404) {
|
|
1793
|
+
if (onExpired === "accept-partial") {
|
|
1794
|
+
controller.close();
|
|
1795
|
+
return null;
|
|
1796
|
+
}
|
|
1797
|
+
controller.error(new GatewayDelegateError("resume-expired", `Resume buffer expired (404) at event ${emittedEvents}. The gateway buffer TTL (~5.5 min) elapsed; fall back to continuation or regeneration.`));
|
|
1798
|
+
return null;
|
|
1799
|
+
}
|
|
1800
|
+
if (!res.ok || !res.body) {
|
|
1801
|
+
controller.error(new GatewayDelegateError("dispatch", `Resume failed (${res.status}) at event ${emittedEvents}.`));
|
|
1802
|
+
return null;
|
|
1803
|
+
}
|
|
1804
|
+
return res.body;
|
|
1805
|
+
}
|
|
1806
|
+
return new ReadableStream({ async start(controller) {
|
|
1807
|
+
let current;
|
|
1808
|
+
if (options.initial) current = options.initial;
|
|
1809
|
+
else {
|
|
1810
|
+
const body = await fetchResume(controller);
|
|
1811
|
+
if (!body) return;
|
|
1812
|
+
current = body;
|
|
1813
|
+
}
|
|
1814
|
+
for (;;) {
|
|
1815
|
+
const reader = current.getReader();
|
|
1816
|
+
try {
|
|
1817
|
+
for (;;) {
|
|
1818
|
+
const { done, value } = await reader.read();
|
|
1819
|
+
if (done) {
|
|
1820
|
+
if (pending.length > 0) {
|
|
1821
|
+
controller.enqueue(pending);
|
|
1822
|
+
pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
|
|
1823
|
+
}
|
|
1824
|
+
controller.close();
|
|
1825
|
+
return;
|
|
1826
|
+
}
|
|
1827
|
+
if (!value || value.length === 0) continue;
|
|
1828
|
+
pending = concat(pending, value);
|
|
1829
|
+
const boundary = lastEventBoundary(pending);
|
|
1830
|
+
if (boundary > 0) {
|
|
1831
|
+
const complete = pending.slice(0, boundary);
|
|
1832
|
+
controller.enqueue(complete);
|
|
1833
|
+
emittedEvents += countEvents(complete);
|
|
1834
|
+
options.onProgress?.(emittedEvents);
|
|
1835
|
+
pending = pending.slice(boundary);
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
} catch (err) {
|
|
1839
|
+
try {
|
|
1840
|
+
reader.releaseLock();
|
|
1841
|
+
} catch {}
|
|
1842
|
+
if (reconnects >= maxReconnects) {
|
|
1843
|
+
controller.error(new GatewayDelegateError("resume-expired", `Exceeded ${maxReconnects} reconnect attempts at event ${emittedEvents}.`, err));
|
|
1844
|
+
return;
|
|
1845
|
+
}
|
|
1846
|
+
pending = new Uint8Array(/* @__PURE__ */ new ArrayBuffer(0));
|
|
1847
|
+
reconnects++;
|
|
1848
|
+
options.onReconnect?.(emittedEvents, reconnects);
|
|
1849
|
+
const body = await fetchResume(controller);
|
|
1850
|
+
if (!body) return;
|
|
1851
|
+
current = body;
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
} });
|
|
1855
|
+
}
|
|
1856
|
+
//#endregion
|
|
1857
|
+
//#region src/gateway-delegate.ts
|
|
1858
|
+
/**
|
|
1859
|
+
* Parse a `vendor/model` slug. The first segment is the resolver key (which
|
|
1860
|
+
* registry entry handles it); the rest is the provider-native model id. Routing
|
|
1861
|
+
* providers keep multi-segment model ids, e.g. `openrouter/anthropic/claude`.
|
|
1862
|
+
*/
|
|
1863
|
+
function parseSlug(slug) {
|
|
1864
|
+
const slash = slug.indexOf("/");
|
|
1865
|
+
if (slash === -1) throw new GatewayDelegateError("config", `Model slug "${slug}" has no resolver key. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
|
|
1866
|
+
const resolverKey = slug.slice(0, slash);
|
|
1867
|
+
const modelId = slug.slice(slash + 1);
|
|
1868
|
+
if (!resolverKey || !modelId) throw new GatewayDelegateError("config", `Model slug "${slug}" is malformed. Use "<provider>/<model>" (e.g. "openai/gpt-5").`);
|
|
1869
|
+
return {
|
|
1870
|
+
resolverKey,
|
|
1871
|
+
modelId
|
|
1872
|
+
};
|
|
1873
|
+
}
|
|
1874
|
+
/**
|
|
1875
|
+
* Resolve a slug to its registry entry, raising a helpful error for unknown or
|
|
1876
|
+
* bring-your-own-provider-only providers.
|
|
1877
|
+
*/
|
|
1878
|
+
function resolveProvider(slug, parsed) {
|
|
1879
|
+
const info = findProviderBySlug(parsed.resolverKey);
|
|
1880
|
+
if (!info) throw new GatewayDelegateError("config", `Unknown gateway provider "${parsed.resolverKey}" (from slug "${slug}"). See the AI Gateway provider directory for valid slugs, or use createGatewayProvider to bring your own @ai-sdk provider.`);
|
|
1881
|
+
if (!info.wireFormat) throw new GatewayDelegateError("config", `Provider "${parsed.resolverKey}" is not chat/completions-shaped and has no built-in parser. Reach it with createGatewayProvider (bring your own @ai-sdk provider).`);
|
|
1882
|
+
return info;
|
|
1883
|
+
}
|
|
1884
|
+
/**
|
|
1885
|
+
* Resolve the transport from the requested options. Gateway-only features (server
|
|
1886
|
+
* fallback, caching) force the gateway path and disable resume — with a loud
|
|
1887
|
+
* warning if resume was merely defaulted, or a thrown error if it was explicitly
|
|
1888
|
+
* requested.
|
|
1889
|
+
*/
|
|
1890
|
+
function selectTransport(opts, resumeExplicitlyTrue, runCatalog = true, gatewayAvailable = true) {
|
|
1891
|
+
const warnings = [];
|
|
1892
|
+
const wantsServerFallback = opts.fallback?.mode === "server";
|
|
1893
|
+
const wantsCaching = opts.cacheTtl !== void 0 || opts.skipCache === true;
|
|
1894
|
+
const gatewayOnly = wantsServerFallback || wantsCaching;
|
|
1895
|
+
const feature = wantsServerFallback ? "fallback.mode:\"server\"" : "caching (cacheTtl/skipCache)";
|
|
1896
|
+
if (runCatalog && !gatewayAvailable && (opts.transport === "gateway" || gatewayOnly)) throw new GatewayDelegateError("config", `${opts.transport === "gateway" ? "transport:\"gateway\"" : feature} is unavailable: this provider is on the unified run catalog but is not a native gateway provider, so it has no gateway path (no caching, server-side fallback, or transport:"gateway"). Use the default run path, or fallback.mode:"client".`);
|
|
1897
|
+
if (!runCatalog) {
|
|
1898
|
+
if (opts.transport === "run") throw new GatewayDelegateError("config", "transport:\"run\" is unavailable: this provider is not on the unified-billing run catalog, so it can only be reached through the gateway path (BYOK).");
|
|
1899
|
+
if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", "resume:true is unavailable: this provider is not on the resumable run catalog (cf-aig-run-id requires the unified-billing run path).");
|
|
1900
|
+
return {
|
|
1901
|
+
transport: "gateway",
|
|
1902
|
+
resumeEnabled: false,
|
|
1903
|
+
warnings
|
|
1904
|
+
};
|
|
1905
|
+
}
|
|
1906
|
+
if (opts.transport === "run" && gatewayOnly) throw new GatewayDelegateError("config", `transport:"run" cannot satisfy ${feature}: those features are only available on the gateway path. Use the gateway transport, or fallback.mode:"client".`);
|
|
1907
|
+
if (opts.transport === "gateway" && resumeExplicitlyTrue) throw new GatewayDelegateError("config", "transport:\"gateway\" cannot provide resume — cf-aig-run-id is only on the run path.");
|
|
1908
|
+
if (gatewayOnly) {
|
|
1909
|
+
if (resumeExplicitlyTrue) throw new GatewayDelegateError("config", `resume:true conflicts with ${feature}: resume (cf-aig-run-id) is only on the run path, which does not support ${wantsServerFallback ? "server-side fallback" : "caching"}. Use fallback.mode:"client" to keep resume, or drop resume.`);
|
|
1910
|
+
warnings.push(`[workers-ai-provider] resume disabled: ${feature} requires the gateway path, which does not surface cf-aig-run-id. Use fallback.mode:"client" to keep resumable streaming.`);
|
|
1911
|
+
return {
|
|
1912
|
+
transport: "gateway",
|
|
1913
|
+
resumeEnabled: false,
|
|
1914
|
+
warnings
|
|
1915
|
+
};
|
|
1916
|
+
}
|
|
1917
|
+
const transport = opts.transport ?? "run";
|
|
1918
|
+
return {
|
|
1919
|
+
transport,
|
|
1920
|
+
resumeEnabled: transport === "run" && opts.resume !== false,
|
|
1921
|
+
warnings
|
|
1922
|
+
};
|
|
1923
|
+
}
|
|
1924
|
+
var GatewayDelegateError = class extends Error {
|
|
1925
|
+
constructor(kind, message, cause) {
|
|
1926
|
+
super(message);
|
|
1927
|
+
_defineProperty(this, "kind", void 0);
|
|
1928
|
+
_defineProperty(this, "cause", void 0);
|
|
1929
|
+
this.name = "GatewayDelegateError";
|
|
1930
|
+
this.kind = kind;
|
|
1931
|
+
this.cause = cause;
|
|
1932
|
+
}
|
|
1933
|
+
};
|
|
1934
|
+
const STRIP_HEADERS_BASE = new Set(["content-length", "host"]);
|
|
1935
|
+
function asText(body) {
|
|
1936
|
+
if (typeof body === "string") return body;
|
|
1937
|
+
if (body instanceof Uint8Array) return new TextDecoder().decode(body);
|
|
1938
|
+
if (body instanceof ArrayBuffer) return new TextDecoder().decode(body);
|
|
1939
|
+
return "{}";
|
|
1940
|
+
}
|
|
1941
|
+
function headersToObject(h) {
|
|
1942
|
+
const out = {};
|
|
1943
|
+
if (!h) return out;
|
|
1944
|
+
if (h instanceof Headers) for (const [k, v] of h) out[k] = v;
|
|
1945
|
+
else if (Array.isArray(h)) for (const [k, v] of h) out[k] = v;
|
|
1946
|
+
else Object.assign(out, h);
|
|
1947
|
+
return out;
|
|
1948
|
+
}
|
|
1949
|
+
function normalizeGateway(gateway) {
|
|
1950
|
+
if (!gateway) throw new GatewayDelegateError("config", "A gateway is required for the delegate (resume needs a gateway). Pass `gateway: \"<gateway-id>\"` to createGatewayDelegate or per call.");
|
|
1951
|
+
if (typeof gateway === "string") return {
|
|
1952
|
+
id: gateway,
|
|
1953
|
+
options: { id: gateway }
|
|
1954
|
+
};
|
|
1955
|
+
return {
|
|
1956
|
+
id: gateway.id,
|
|
1957
|
+
options: gateway
|
|
1958
|
+
};
|
|
1959
|
+
}
|
|
1960
|
+
/**
|
|
1961
|
+
* Create a gateway delegate. Returns a function that builds an AI SDK model for a
|
|
1962
|
+
* `"<provider>/<model>"` slug, dispatched through AI Gateway on the transport the
|
|
1963
|
+
* requested options imply.
|
|
1964
|
+
*/
|
|
1965
|
+
function createGatewayDelegate(config) {
|
|
1966
|
+
if (!config?.binding) throw new GatewayDelegateError("config", "createGatewayDelegate requires a `binding` (e.g. { binding: env.AI }).");
|
|
1967
|
+
if (!config.providers?.length) throw new GatewayDelegateError("config", "createGatewayDelegate requires at least one provider plugin, e.g. `providers: [openai]` from \"workers-ai-provider/openai\".");
|
|
1968
|
+
const plugins = /* @__PURE__ */ new Map();
|
|
1969
|
+
for (const p of config.providers) plugins.set(p.wireFormat, p);
|
|
1970
|
+
const defaultResume = config.resume ?? true;
|
|
1971
|
+
const buildOne = (slug, options) => {
|
|
1972
|
+
const parsed = parseSlug(slug);
|
|
1973
|
+
const info = resolveProvider(slug, parsed);
|
|
1974
|
+
const resumeExplicitlyTrue = options.resume === true;
|
|
1975
|
+
const effectiveOptions = {
|
|
1976
|
+
...options,
|
|
1977
|
+
resume: options.resume ?? defaultResume,
|
|
1978
|
+
onResumeExpired: options.onResumeExpired ?? config.onResumeExpired
|
|
1979
|
+
};
|
|
1980
|
+
const selection = selectTransport(effectiveOptions, resumeExplicitlyTrue, info.runCatalog, info.gatewayPath !== false);
|
|
1981
|
+
for (const w of selection.warnings) console.warn(w);
|
|
1982
|
+
const wire = selection.transport === "run" ? info.runWireFormat ?? "openai" : info.wireFormat;
|
|
1983
|
+
const plugin = plugins.get(wire);
|
|
1984
|
+
if (!plugin) throw new GatewayDelegateError("config", selection.transport === "run" ? `The run path for "${parsed.resolverKey}" (from slug "${slug}") returns "${wire}"-wire responses, so it needs the "${wire}" plugin. Install + pass it from "workers-ai-provider/${wire}". Registered: ${[...plugins.keys()].join(", ") || "<none>"}.` : `No provider plugin for wire format "${wire}" (needed by "${parsed.resolverKey}" on the gateway path from slug "${slug}"). Registered: ${[...plugins.keys()].join(", ") || "<none>"}. Install + pass the matching plugin from "workers-ai-provider/${wire}".`);
|
|
1985
|
+
const { id: gatewayId, options: gatewayOptions } = normalizeGateway(options.gateway ?? config.gateway);
|
|
1986
|
+
const fetchImpl = selection.transport === "run" ? makeRunFetch(config.binding, `${info.resolverKey}/${parsed.modelId}`, gatewayOptions, effectiveOptions, selection, options) : makeGatewayFetch(config.binding, info, gatewayId, gatewayOptions, effectiveOptions, selection, options);
|
|
1987
|
+
return {
|
|
1988
|
+
model: plugin.create({
|
|
1989
|
+
modelId: parsed.modelId,
|
|
1990
|
+
fetch: fetchImpl,
|
|
1991
|
+
...selection.transport === "gateway" && info.baseURL ? { baseURL: info.baseURL } : {}
|
|
1992
|
+
}),
|
|
1993
|
+
transport: selection.transport
|
|
1994
|
+
};
|
|
1995
|
+
};
|
|
1996
|
+
return (slug, options = {}) => {
|
|
1997
|
+
if (options.fallback?.mode === "client") {
|
|
1998
|
+
const { fallback, ...rest } = options;
|
|
1999
|
+
return createClientFallbackModel([slug, ...fallback.models].map((s) => {
|
|
2000
|
+
const { model, transport } = buildOne(s, rest);
|
|
2001
|
+
return {
|
|
2002
|
+
slug: s,
|
|
2003
|
+
model,
|
|
2004
|
+
transport
|
|
2005
|
+
};
|
|
2006
|
+
}));
|
|
2007
|
+
}
|
|
2008
|
+
return buildOne(slug, options).model;
|
|
2009
|
+
};
|
|
2010
|
+
}
|
|
2011
|
+
function fireDispatch(resp, selection, options) {
|
|
2012
|
+
if (!options.onDispatch) return;
|
|
2013
|
+
options.onDispatch({
|
|
2014
|
+
transport: selection.transport,
|
|
2015
|
+
resumeEnabled: selection.resumeEnabled,
|
|
2016
|
+
warnings: selection.warnings,
|
|
2017
|
+
status: resp.status,
|
|
2018
|
+
runId: resp.headers.get("cf-aig-run-id"),
|
|
2019
|
+
cfStep: resp.headers.get("cf-aig-step"),
|
|
2020
|
+
cacheStatus: resp.headers.get("cf-aig-cache-status"),
|
|
2021
|
+
logId: resp.headers.get("cf-aig-log-id")
|
|
2022
|
+
});
|
|
2023
|
+
}
|
|
2024
|
+
/** Merge call-level metadata over gateway-option metadata (call wins). */
|
|
2025
|
+
function mergeMetadata(base, override) {
|
|
2026
|
+
if (!base && !override) return void 0;
|
|
2027
|
+
return {
|
|
2028
|
+
...base,
|
|
2029
|
+
...override
|
|
2030
|
+
};
|
|
2031
|
+
}
|
|
2032
|
+
/** JSON-encode metadata for the `cf-aig-metadata` header (bigint → string). */
|
|
2033
|
+
function serializeMetadata(metadata) {
|
|
2034
|
+
return JSON.stringify(metadata, (_k, v) => typeof v === "bigint" ? v.toString() : v);
|
|
2035
|
+
}
|
|
2036
|
+
function makeRunFetch(binding, slug, gatewayOptions, opts, selection, callOptions) {
|
|
2037
|
+
return (async (_input, init) => {
|
|
2038
|
+
const body = JSON.parse(asText(init?.body));
|
|
2039
|
+
delete body.model;
|
|
2040
|
+
const mergedGateway = { ...gatewayOptions };
|
|
2041
|
+
const mergedMeta = mergeMetadata(gatewayOptions.metadata, opts.metadata);
|
|
2042
|
+
if (mergedMeta) mergedGateway.metadata = mergedMeta;
|
|
2043
|
+
if (opts.collectLog !== void 0) mergedGateway.collectLog = opts.collectLog;
|
|
2044
|
+
const runOptions = {
|
|
2045
|
+
gateway: mergedGateway,
|
|
2046
|
+
returnRawResponse: true,
|
|
2047
|
+
...opts.extraHeaders ? { extraHeaders: opts.extraHeaders } : {},
|
|
2048
|
+
...init?.signal ? { signal: init.signal } : {}
|
|
2049
|
+
};
|
|
2050
|
+
const resp = await binding.run(slug, body, runOptions);
|
|
2051
|
+
fireDispatch(resp, selection, callOptions);
|
|
2052
|
+
const runId = resp.headers.get("cf-aig-run-id");
|
|
2053
|
+
if (selection.resumeEnabled && runId && resp.body) {
|
|
2054
|
+
const resumable = createResumableStream({
|
|
2055
|
+
binding,
|
|
2056
|
+
gateway: gatewayOptions.id,
|
|
2057
|
+
runId,
|
|
2058
|
+
initial: resp.body,
|
|
2059
|
+
onResumeExpired: opts.onResumeExpired,
|
|
2060
|
+
...opts.onProgress ? { onProgress: opts.onProgress } : {}
|
|
2061
|
+
});
|
|
2062
|
+
return new Response(resumable, {
|
|
2063
|
+
status: resp.status,
|
|
2064
|
+
headers: resp.headers
|
|
2065
|
+
});
|
|
2066
|
+
}
|
|
2067
|
+
return resp;
|
|
2068
|
+
});
|
|
2069
|
+
}
|
|
2070
|
+
function makeGatewayFetch(binding, info, gatewayId, gatewayOptions, opts, selection, callOptions) {
|
|
2071
|
+
const strip = new Set(STRIP_HEADERS_BASE);
|
|
2072
|
+
if (!opts.byok) for (const h of info.authHeaders) strip.add(h.toLowerCase());
|
|
2073
|
+
return (async (input, init) => {
|
|
2074
|
+
const rawUrl = typeof input === "string" ? input : input.toString();
|
|
2075
|
+
const endpoint = info.transformEndpoint ? info.transformEndpoint(rawUrl) : new URL(rawUrl).pathname.replace(/^\//, "") + (new URL(rawUrl).search || "");
|
|
2076
|
+
const body = JSON.parse(asText(init?.body));
|
|
2077
|
+
const headers = {};
|
|
2078
|
+
for (const [k, v] of Object.entries(headersToObject(init?.headers))) if (!strip.has(k.toLowerCase())) headers[k] = v;
|
|
2079
|
+
if (opts.extraHeaders) Object.assign(headers, opts.extraHeaders);
|
|
2080
|
+
if (opts.cacheTtl !== void 0) headers["cf-aig-cache-ttl"] = String(opts.cacheTtl);
|
|
2081
|
+
if (opts.skipCache) headers["cf-aig-skip-cache"] = "true";
|
|
2082
|
+
const metadata = mergeMetadata(gatewayOptions.metadata, opts.metadata);
|
|
2083
|
+
if (metadata) headers["cf-aig-metadata"] = serializeMetadata(metadata);
|
|
2084
|
+
if (opts.collectLog !== void 0) headers["cf-aig-collect-log"] = String(opts.collectLog);
|
|
2085
|
+
const primary = {
|
|
2086
|
+
provider: info.gatewayProviderId,
|
|
2087
|
+
endpoint,
|
|
2088
|
+
headers,
|
|
2089
|
+
query: body
|
|
2090
|
+
};
|
|
2091
|
+
const entries = [primary];
|
|
2092
|
+
if (opts.fallback?.mode === "server") for (const fb of opts.fallback.models) {
|
|
2093
|
+
const fbParsed = parseSlug(fb);
|
|
2094
|
+
const fbInfo = resolveProvider(fb, fbParsed);
|
|
2095
|
+
if (fbInfo.gatewayProviderId !== info.gatewayProviderId) throw new GatewayDelegateError("config", `Cross-vendor server-side fallback (${info.gatewayProviderId} → ${fbInfo.gatewayProviderId}) is not supported yet. Use fallback.mode:"client", or same-vendor fallback models.`);
|
|
2096
|
+
entries.push({
|
|
2097
|
+
...primary,
|
|
2098
|
+
query: {
|
|
2099
|
+
...body,
|
|
2100
|
+
model: fbParsed.modelId
|
|
2101
|
+
}
|
|
2102
|
+
});
|
|
2103
|
+
}
|
|
2104
|
+
const gw = binding.gateway(gatewayId);
|
|
2105
|
+
const runOptions = {};
|
|
2106
|
+
if (init?.signal) runOptions.signal = init.signal;
|
|
2107
|
+
const resp = await gw.run(entries, runOptions);
|
|
2108
|
+
fireDispatch(resp, selection, callOptions);
|
|
2109
|
+
return resp;
|
|
2110
|
+
});
|
|
2111
|
+
}
|
|
2112
|
+
//#endregion
|
|
1498
2113
|
//#region src/index.ts
|
|
1499
2114
|
/**
|
|
2115
|
+
* The account-wide AI Gateway used for catalog routing when no `gateway` is
|
|
2116
|
+
* configured. Every Cloudflare account has a `"default"` gateway.
|
|
2117
|
+
*/
|
|
2118
|
+
const DEFAULT_GATEWAY_ID = "default";
|
|
2119
|
+
/**
|
|
1500
2120
|
* Create a Workers AI provider instance.
|
|
1501
2121
|
*/
|
|
1502
2122
|
function createWorkersAI(options) {
|
|
@@ -1518,6 +2138,26 @@ function createWorkersAI(options) {
|
|
|
1518
2138
|
provider: "workersai.chat",
|
|
1519
2139
|
isBinding
|
|
1520
2140
|
});
|
|
2141
|
+
let delegate;
|
|
2142
|
+
const getDelegate = (slug) => {
|
|
2143
|
+
if (!options.providers?.length) throw new Error(`"${slug}" looks like a third-party AI Gateway catalog model, but this Workers AI provider was not configured to route them. Pass provider plugins, e.g.:
|
|
2144
|
+
import { openai } from "workers-ai-provider/openai";
|
|
2145
|
+
createWorkersAI({ binding: env.AI, providers: [openai] });
|
|
2146
|
+
A gateway defaults to "default" but can be set via \`gateway\`. Otherwise use a Workers AI model id (e.g. "@cf/meta/llama-3.1-8b-instruct").`);
|
|
2147
|
+
delegate ?? (delegate = createGatewayDelegate({
|
|
2148
|
+
binding,
|
|
2149
|
+
gateway: options.gateway ?? { id: DEFAULT_GATEWAY_ID },
|
|
2150
|
+
providers: options.providers,
|
|
2151
|
+
resume: options.resume,
|
|
2152
|
+
onResumeExpired: options.onResumeExpired
|
|
2153
|
+
}));
|
|
2154
|
+
return delegate;
|
|
2155
|
+
};
|
|
2156
|
+
const isGatewaySlug = (id) => typeof id === "string" && !id.startsWith("@") && id.includes("/");
|
|
2157
|
+
const buildChat = (modelId, settings) => {
|
|
2158
|
+
if (isGatewaySlug(modelId)) return getDelegate(modelId)(modelId, settings);
|
|
2159
|
+
return createChatModel(modelId, settings);
|
|
2160
|
+
};
|
|
1521
2161
|
const createImageModel = (modelId, settings = {}) => new WorkersAIImageModel(modelId, settings, {
|
|
1522
2162
|
binding,
|
|
1523
2163
|
gateway: options.gateway,
|
|
@@ -1550,9 +2190,9 @@ function createWorkersAI(options) {
|
|
|
1550
2190
|
});
|
|
1551
2191
|
const provider = (modelId, settings) => {
|
|
1552
2192
|
if (new.target) throw new Error("The WorkersAI model function cannot be called with the new keyword.");
|
|
1553
|
-
return
|
|
2193
|
+
return buildChat(modelId, settings);
|
|
1554
2194
|
};
|
|
1555
|
-
provider.chat =
|
|
2195
|
+
provider.chat = buildChat;
|
|
1556
2196
|
provider.embedding = createEmbeddingModel;
|
|
1557
2197
|
provider.textEmbedding = createEmbeddingModel;
|
|
1558
2198
|
provider.textEmbeddingModel = createEmbeddingModel;
|
|
@@ -1598,6 +2238,6 @@ function createAutoRAG(options) {
|
|
|
1598
2238
|
return createAISearch(options, "autorag.chat");
|
|
1599
2239
|
}
|
|
1600
2240
|
//#endregion
|
|
1601
|
-
export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createWorkersAI };
|
|
2241
|
+
export { AISearchChatLanguageModel, AutoRAGChatLanguageModel, GATEWAY_PROVIDERS, GatewayDelegateError, WorkersAIFallbackError, WorkersAIGatewayError, WorkersAIRerankingModel, WorkersAISpeechModel, WorkersAITranscriptionModel, createAISearch, createAutoRAG, createClientFallbackModel, createGatewayFetch, createGatewayProvider, createResumableStream, createWorkersAI, detectProviderByUrl, findProviderBySlug, parseSlug, selectTransport, wireableProviders };
|
|
1602
2242
|
|
|
1603
2243
|
//# sourceMappingURL=index.mjs.map
|