workers-ai-provider 3.1.13 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +183 -31
- package/dist/anthropic.d.mts +14 -0
- package/dist/anthropic.mjs +21 -0
- package/dist/anthropic.mjs.map +1 -0
- package/dist/gateway-delegate-BfaUTwDZ.d.mts +385 -0
- package/dist/gateway-provider-1USFWm7c.mjs +583 -0
- package/dist/gateway-provider-1USFWm7c.mjs.map +1 -0
- package/dist/gateway-provider.d.mts +80 -0
- package/dist/gateway-provider.mjs +2 -0
- package/dist/google.d.mts +14 -0
- package/dist/google.mjs +21 -0
- package/dist/google.mjs.map +1 -0
- package/dist/index.d.mts +64 -7
- package/dist/index.mjs +967 -327
- package/dist/index.mjs.map +1 -1
- package/dist/openai.d.mts +20 -0
- package/dist/openai.mjs +27 -0
- package/dist/openai.mjs.map +1 -0
- package/package.json +47 -6
- package/src/anthropic.ts +17 -0
- package/src/client-fallback.ts +70 -0
- package/src/convert-to-workersai-chat-messages.ts +33 -7
- package/src/errors.ts +216 -0
- package/src/gateway-delegate.ts +696 -0
- package/src/gateway-provider.ts +167 -0
- package/src/gateway-providers.ts +457 -0
- package/src/google.ts +19 -0
- package/src/index.ts +180 -9
- package/src/openai.ts +25 -0
- package/src/resumable-stream.ts +223 -0
- package/src/streaming.ts +103 -30
- package/src/utils.ts +206 -6
- package/src/workersai-chat-language-model.ts +87 -26
- package/src/workersai-chat-settings.ts +1 -1
- package/src/workersai-models.ts +11 -3
package/src/index.ts
CHANGED
|
@@ -18,6 +18,7 @@ import type { WorkersAIRerankingSettings } from "./workersai-reranking-settings"
|
|
|
18
18
|
import type {
|
|
19
19
|
EmbeddingModels,
|
|
20
20
|
ImageGenerationModels,
|
|
21
|
+
KnownTextGenerationModels,
|
|
21
22
|
TextGenerationModels,
|
|
22
23
|
TranscriptionModels,
|
|
23
24
|
SpeechModels,
|
|
@@ -40,10 +41,69 @@ export type { WorkersAISpeechSettings } from "./workersai-speech-settings";
|
|
|
40
41
|
export { WorkersAIRerankingModel } from "./workersai-reranking-model";
|
|
41
42
|
export type { WorkersAIRerankingSettings } from "./workersai-reranking-settings";
|
|
42
43
|
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// AI Gateway delegate (route catalog models through AI Gateway)
|
|
46
|
+
//
|
|
47
|
+
// The delegate factory itself is internal — it's wired through
|
|
48
|
+
// `createWorkersAI({ providers })` (see below), so `createWorkersAI` is the
|
|
49
|
+
// single public entry point. The transport types, error classes, registry, and
|
|
50
|
+
// resume helpers are safe to re-export here (no optional `@ai-sdk/*` peer
|
|
51
|
+
// imports). The provider plugins (`openai`, `anthropic`, `google`) stay
|
|
52
|
+
// sub-path-only so those packages remain optional.
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
export {
|
|
56
|
+
type Billing,
|
|
57
|
+
createClientFallbackModel,
|
|
58
|
+
type DelegateCallOptions,
|
|
59
|
+
type DispatchInfo,
|
|
60
|
+
type FallbackAttempt,
|
|
61
|
+
type FallbackLeg,
|
|
62
|
+
type FallbackOptions,
|
|
63
|
+
GatewayDelegateError,
|
|
64
|
+
type GatewayErrorCode,
|
|
65
|
+
type GatewayErrorContext,
|
|
66
|
+
GATEWAY_PROVIDERS,
|
|
67
|
+
type GatewayProviderInfo,
|
|
68
|
+
type ParsedSlug,
|
|
69
|
+
type ProviderPlugin,
|
|
70
|
+
type ResumableStreamOptions,
|
|
71
|
+
type ResumeExpiredPolicy,
|
|
72
|
+
type Transport,
|
|
73
|
+
type WireFormat,
|
|
74
|
+
WorkersAIFallbackError,
|
|
75
|
+
WorkersAIGatewayError,
|
|
76
|
+
createResumableStream,
|
|
77
|
+
detectProviderByUrl,
|
|
78
|
+
findProviderBySlug,
|
|
79
|
+
parseSlug,
|
|
80
|
+
selectTransport,
|
|
81
|
+
wireableProviders,
|
|
82
|
+
} from "./gateway-delegate";
|
|
83
|
+
export {
|
|
84
|
+
createGatewayFetch,
|
|
85
|
+
createGatewayProvider,
|
|
86
|
+
type GatewayFetchConfig,
|
|
87
|
+
} from "./gateway-provider";
|
|
88
|
+
|
|
89
|
+
import {
|
|
90
|
+
createGatewayDelegate,
|
|
91
|
+
type DelegateCallOptions,
|
|
92
|
+
type GatewayDelegate,
|
|
93
|
+
type ProviderPlugin,
|
|
94
|
+
type ResumeExpiredPolicy,
|
|
95
|
+
} from "./gateway-delegate";
|
|
96
|
+
|
|
43
97
|
// ---------------------------------------------------------------------------
|
|
44
98
|
// Workers AI
|
|
45
99
|
// ---------------------------------------------------------------------------
|
|
46
100
|
|
|
101
|
+
/**
|
|
102
|
+
* The account-wide AI Gateway used for catalog routing when no `gateway` is
|
|
103
|
+
* configured. Every Cloudflare account has a `"default"` gateway.
|
|
104
|
+
*/
|
|
105
|
+
const DEFAULT_GATEWAY_ID = "default";
|
|
106
|
+
|
|
47
107
|
export type WorkersAISettings = (
|
|
48
108
|
| {
|
|
49
109
|
/**
|
|
@@ -77,19 +137,72 @@ export type WorkersAISettings = (
|
|
|
77
137
|
}
|
|
78
138
|
) & {
|
|
79
139
|
/**
|
|
80
|
-
* Optionally specify a gateway.
|
|
140
|
+
* Optionally specify a gateway. For third-party catalog routing (see
|
|
141
|
+
* `providers`) this defaults to the account's `"default"` gateway when unset.
|
|
81
142
|
*/
|
|
82
143
|
gateway?: GatewayOptions;
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Provider plugins that enable routing third-party catalog models
|
|
147
|
+
* (e.g. `"openai/gpt-5-mini"`) through AI Gateway. Supply them from the
|
|
148
|
+
* sub-path modules, e.g. `import { openai } from "workers-ai-provider/openai"`.
|
|
149
|
+
*
|
|
150
|
+
* When set, calling the provider with a `"<provider>/<model>"` slug (anything
|
|
151
|
+
* that is not a `@cf/...` Workers AI model id) is automatically dispatched
|
|
152
|
+
* through the {@link createGatewayDelegate | gateway delegate}. Leaving this
|
|
153
|
+
* unset preserves the exact prior behavior — only Workers AI models are built.
|
|
154
|
+
*
|
|
155
|
+
* @experimental The gateway delegate is an experimental surface.
|
|
156
|
+
*/
|
|
157
|
+
providers?: ProviderPlugin[];
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Default resume behavior for gateway-routed catalog models. Defaults to
|
|
161
|
+
* `true`. Overridable per call. Only relevant when `providers` is set.
|
|
162
|
+
*/
|
|
163
|
+
resume?: boolean;
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Default resume-expiry policy for gateway-routed catalog models (run path).
|
|
167
|
+
* Defaults to `"error"`. Only relevant when `providers` is set.
|
|
168
|
+
*/
|
|
169
|
+
onResumeExpired?: ResumeExpiredPolicy;
|
|
83
170
|
};
|
|
84
171
|
|
|
172
|
+
/**
|
|
173
|
+
* True when a literal model id is a `"<provider>/<model>"` AI Gateway catalog
|
|
174
|
+
* slug rather than a `@cf/...` Workers AI id. Bare `string` (a non-literal,
|
|
175
|
+
* e.g. a variable) resolves to `false` so the common path keeps chat settings.
|
|
176
|
+
*/
|
|
177
|
+
type IsCatalogSlug<M extends string> = string extends M
|
|
178
|
+
? false
|
|
179
|
+
: M extends `@${string}`
|
|
180
|
+
? false
|
|
181
|
+
: M extends `${string}/${string}`
|
|
182
|
+
? true
|
|
183
|
+
: false;
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Picks the per-model settings type from the (captured) literal model id:
|
|
187
|
+
* `DelegateCallOptions` for catalog slugs, `WorkersAIChatSettings` otherwise.
|
|
188
|
+
* This is what lets `workersai("openai/gpt-5", { … })` autocomplete delegate
|
|
189
|
+
* options while `workersai("@cf/…", { … })` autocompletes chat settings.
|
|
190
|
+
*/
|
|
191
|
+
type ModelSettings<M extends string> =
|
|
192
|
+
IsCatalogSlug<M> extends true ? DelegateCallOptions : WorkersAIChatSettings;
|
|
193
|
+
|
|
85
194
|
export interface WorkersAI {
|
|
86
|
-
|
|
195
|
+
<M extends string>(
|
|
196
|
+
modelId: M | KnownTextGenerationModels,
|
|
197
|
+
settings?: ModelSettings<M>,
|
|
198
|
+
): WorkersAIChatLanguageModel;
|
|
87
199
|
/**
|
|
88
|
-
* Creates a model for text generation.
|
|
200
|
+
* Creates a model for text generation. Accepts a `@cf/...` Workers AI id, or
|
|
201
|
+
* a `"<provider>/<model>"` catalog slug when `providers` is configured.
|
|
89
202
|
**/
|
|
90
|
-
chat(
|
|
91
|
-
modelId:
|
|
92
|
-
settings?:
|
|
203
|
+
chat<M extends string>(
|
|
204
|
+
modelId: M | KnownTextGenerationModels,
|
|
205
|
+
settings?: ModelSettings<M>,
|
|
93
206
|
): WorkersAIChatLanguageModel;
|
|
94
207
|
|
|
95
208
|
embedding(
|
|
@@ -178,6 +291,61 @@ export function createWorkersAI(options: WorkersAISettings): WorkersAI {
|
|
|
178
291
|
isBinding,
|
|
179
292
|
});
|
|
180
293
|
|
|
294
|
+
// Third-party catalog routing: when `providers` is configured, a non-`@cf/`
|
|
295
|
+
// `"<provider>/<model>"` slug is dispatched through the gateway delegate
|
|
296
|
+
// instead of being treated as a Workers AI model id. Built lazily so the
|
|
297
|
+
// delegate (and its plugin requirements) only materializes on first use.
|
|
298
|
+
let delegate: GatewayDelegate | undefined;
|
|
299
|
+
const getDelegate = (slug: string): GatewayDelegate => {
|
|
300
|
+
if (!options.providers?.length) {
|
|
301
|
+
throw new Error(
|
|
302
|
+
`"${slug}" looks like a third-party AI Gateway catalog model, but this Workers AI ` +
|
|
303
|
+
"provider was not configured to route them. Pass provider plugins, e.g.:\n" +
|
|
304
|
+
' import { openai } from "workers-ai-provider/openai";\n' +
|
|
305
|
+
" createWorkersAI({ binding: env.AI, providers: [openai] });\n" +
|
|
306
|
+
'A gateway defaults to "default" but can be set via `gateway`. ' +
|
|
307
|
+
'Otherwise use a Workers AI model id (e.g. "@cf/meta/llama-3.1-8b-instruct").',
|
|
308
|
+
);
|
|
309
|
+
}
|
|
310
|
+
delegate ??= createGatewayDelegate({
|
|
311
|
+
binding,
|
|
312
|
+
// Catalog routing needs a gateway (resume runs through it). When one
|
|
313
|
+
// isn't configured, fall back to the account's `"default"` gateway so
|
|
314
|
+
// `createWorkersAI({ providers })` works out of the box. An explicit
|
|
315
|
+
// `gateway` (here or per call) always wins.
|
|
316
|
+
gateway: options.gateway ?? { id: DEFAULT_GATEWAY_ID },
|
|
317
|
+
providers: options.providers,
|
|
318
|
+
resume: options.resume,
|
|
319
|
+
onResumeExpired: options.onResumeExpired,
|
|
320
|
+
});
|
|
321
|
+
return delegate;
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
// Workers AI model ids are always `@cf/...`; gateway catalog slugs are
|
|
325
|
+
// `"<provider>/<model>"`. Anything with a slash that is not `@`-prefixed is
|
|
326
|
+
// treated as a catalog slug.
|
|
327
|
+
const isGatewaySlug = (id: unknown): id is string =>
|
|
328
|
+
typeof id === "string" && !id.startsWith("@") && id.includes("/");
|
|
329
|
+
|
|
330
|
+
// Settings is the union of both shapes here; the public `WorkersAI` interface
|
|
331
|
+
// narrows it per call via `ModelSettings<M>`. We branch at runtime and cast to
|
|
332
|
+
// the concrete shape each path expects.
|
|
333
|
+
const buildChat = (
|
|
334
|
+
modelId: TextGenerationModels,
|
|
335
|
+
settings?: WorkersAIChatSettings | DelegateCallOptions,
|
|
336
|
+
): WorkersAIChatLanguageModel => {
|
|
337
|
+
if (isGatewaySlug(modelId)) {
|
|
338
|
+
// The delegate returns a `LanguageModelV3` built by the configured plugin.
|
|
339
|
+
// It's structurally compatible with the AI SDK consumers this provider is
|
|
340
|
+
// used with; the cast keeps the public return type unchanged.
|
|
341
|
+
return getDelegate(modelId)(
|
|
342
|
+
modelId,
|
|
343
|
+
settings as DelegateCallOptions,
|
|
344
|
+
) as unknown as WorkersAIChatLanguageModel;
|
|
345
|
+
}
|
|
346
|
+
return createChatModel(modelId, settings as WorkersAIChatSettings | undefined);
|
|
347
|
+
};
|
|
348
|
+
|
|
181
349
|
const createImageModel = (
|
|
182
350
|
modelId: ImageGenerationModels,
|
|
183
351
|
settings: WorkersAIImageSettings = {},
|
|
@@ -229,14 +397,17 @@ export function createWorkersAI(options: WorkersAISettings): WorkersAI {
|
|
|
229
397
|
provider: "workersai.reranking",
|
|
230
398
|
});
|
|
231
399
|
|
|
232
|
-
const provider = (
|
|
400
|
+
const provider = (
|
|
401
|
+
modelId: TextGenerationModels,
|
|
402
|
+
settings?: WorkersAIChatSettings | DelegateCallOptions,
|
|
403
|
+
) => {
|
|
233
404
|
if (new.target) {
|
|
234
405
|
throw new Error("The WorkersAI model function cannot be called with the new keyword.");
|
|
235
406
|
}
|
|
236
|
-
return
|
|
407
|
+
return buildChat(modelId, settings);
|
|
237
408
|
};
|
|
238
409
|
|
|
239
|
-
provider.chat =
|
|
410
|
+
provider.chat = buildChat;
|
|
240
411
|
provider.embedding = createEmbeddingModel;
|
|
241
412
|
provider.textEmbedding = createEmbeddingModel;
|
|
242
413
|
provider.textEmbeddingModel = createEmbeddingModel;
|
package/src/openai.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
|
+
import type { ProviderPlugin } from "./gateway-delegate";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* OpenAI-wire provider plugin for the gateway delegate. Pass to
|
|
6
|
+
* `createGatewayDelegate({ providers: [openai] })` to handle every
|
|
7
|
+
* OpenAI-compatible provider in one go — `openai/…`, plus the OpenAI-compatible
|
|
8
|
+
* long tail (`deepseek/…`, `xai/…`, `groq/…`, `mistral/…`, `perplexity/…`,
|
|
9
|
+
* `openrouter/…`, `cohere/…`, …). The registry routes each slug to its gateway
|
|
10
|
+
* provider id; this plugin only supplies the response parser.
|
|
11
|
+
*
|
|
12
|
+
* Requires `@ai-sdk/openai` (an optional peer dependency — install it yourself).
|
|
13
|
+
*
|
|
14
|
+
* Uses `.chat()` (Chat Completions) deliberately: AI SDK v6's bare `openai()`
|
|
15
|
+
* defaults to the Responses API, which the AI Gateway run catalog does not serve.
|
|
16
|
+
*/
|
|
17
|
+
export const openai: ProviderPlugin = {
|
|
18
|
+
wireFormat: "openai",
|
|
19
|
+
create: ({ modelId, fetch, baseURL }) =>
|
|
20
|
+
// apiKey is a placeholder — the gateway handles auth (unified billing / BYOK)
|
|
21
|
+
// and the delegate strips the Authorization header on the gateway path.
|
|
22
|
+
// baseURL (set by the registry for non-OpenAI openai-wire providers) makes
|
|
23
|
+
// the generated URL host-strip to the right gateway-native endpoint.
|
|
24
|
+
createOpenAI({ apiKey: "unused", fetch, ...(baseURL ? { baseURL } : {}) }).chat(modelId),
|
|
25
|
+
};
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import { GatewayDelegateError } from "./gateway-delegate";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Resumable run-path stream (RFC §7.1).
|
|
5
|
+
*
|
|
6
|
+
* Wraps the byte stream from a run-path response (`env.AI.run(..., {
|
|
7
|
+
* returnRawResponse })`) so a transient mid-stream drop is recovered
|
|
8
|
+
* transparently: the wrapper reconnects to the gateway resume endpoint and keeps
|
|
9
|
+
* feeding bytes to the same consumer, so the downstream `@ai-sdk/*` parser never
|
|
10
|
+
* sees the break.
|
|
11
|
+
*
|
|
12
|
+
* Byte alignment is the one correctness subtlety. The gateway `resume?from=N`
|
|
13
|
+
* endpoint takes an SSE *event index* (count of `\n\n` terminators) and replays
|
|
14
|
+
* whole events from that index. So the wrapper only ever emits *complete* events
|
|
15
|
+
* downstream and buffers any trailing partial event. On a drop the buffered
|
|
16
|
+
* partial is discarded and resume starts from the count of complete events
|
|
17
|
+
* already emitted — landing exactly on the next event boundary, with no
|
|
18
|
+
* duplicated or truncated bytes.
|
|
19
|
+
*
|
|
20
|
+
* Expiry: once the gateway buffer TTL (~5.5 min) elapses, resume returns 404
|
|
21
|
+
* `{"error":"Request not found"}`. Behavior is governed by `onResumeExpired`:
|
|
22
|
+
* `"error"` (default) surfaces a `GatewayDelegateError("resume-expired")` into
|
|
23
|
+
* the stream; `"accept-partial"` ends the stream cleanly with whatever was
|
|
24
|
+
* already delivered (the caller's higher layer — e.g. Think — can then continue
|
|
25
|
+
* or regenerate).
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
type AiWithFetch = Ai & {
|
|
29
|
+
fetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response>;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export type ResumeExpiredPolicy = "error" | "accept-partial";
|
|
33
|
+
|
|
34
|
+
export interface ResumableStreamOptions {
|
|
35
|
+
/** Cloudflare AI binding (e.g. `env.AI`) — used for the resume fetch. */
|
|
36
|
+
binding: Ai;
|
|
37
|
+
/** Gateway id the run was issued under. */
|
|
38
|
+
gateway: string;
|
|
39
|
+
/** The `cf-aig-run-id` of the run to resume. */
|
|
40
|
+
runId: string;
|
|
41
|
+
/**
|
|
42
|
+
* Initial run-path response body. Omit for **cross-invocation re-attach**: the
|
|
43
|
+
* stream then starts by fetching `resume?from={fromEvent}` directly (e.g. a new
|
|
44
|
+
* Durable Object invocation re-attaching to a run after eviction).
|
|
45
|
+
*/
|
|
46
|
+
initial?: ReadableStream<Uint8Array>;
|
|
47
|
+
/**
|
|
48
|
+
* SSE event index to (re-)attach from. Defaults to `0`. Used as the starting
|
|
49
|
+
* `from` when `initial` is omitted, and as the base offset for the event
|
|
50
|
+
* counter (so a later reconnect resumes from the correct absolute index).
|
|
51
|
+
*/
|
|
52
|
+
fromEvent?: number;
|
|
53
|
+
/** What to do when the resume buffer has expired (404). Defaults to `"error"`. */
|
|
54
|
+
onResumeExpired?: ResumeExpiredPolicy;
|
|
55
|
+
/** Max reconnect attempts before giving up. Defaults to 5. */
|
|
56
|
+
maxReconnects?: number;
|
|
57
|
+
/** Fired before each reconnect with the resume `from` index and attempt number. */
|
|
58
|
+
onReconnect?: (fromEvent: number, attempt: number) => void;
|
|
59
|
+
/**
|
|
60
|
+
* Fired with the cumulative SSE event offset whenever complete events are
|
|
61
|
+
* emitted. Use it to persist `{ runId, eventOffset }` for cross-invocation
|
|
62
|
+
* re-attach (throttle your own writes — this can fire per chunk).
|
|
63
|
+
*/
|
|
64
|
+
onProgress?: (eventOffset: number) => void;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function concat(a: Uint8Array, b: Uint8Array): Uint8Array<ArrayBuffer> {
|
|
68
|
+
const out = new Uint8Array(new ArrayBuffer(a.length + b.length));
|
|
69
|
+
out.set(a, 0);
|
|
70
|
+
out.set(b, a.length);
|
|
71
|
+
return out;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Index just past the last `\n\n` in `buf`, or -1 if there is no complete event. */
|
|
75
|
+
function lastEventBoundary(buf: Uint8Array): number {
|
|
76
|
+
for (let i = buf.length - 2; i >= 0; i--) {
|
|
77
|
+
if (buf[i] === 0x0a && buf[i + 1] === 0x0a) return i + 2;
|
|
78
|
+
}
|
|
79
|
+
return -1;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** Count of `\n\n` terminators (= complete SSE events) in `buf`. */
|
|
83
|
+
function countEvents(buf: Uint8Array): number {
|
|
84
|
+
let n = 0;
|
|
85
|
+
for (let i = 0; i + 1 < buf.length; i++) {
|
|
86
|
+
if (buf[i] === 0x0a && buf[i + 1] === 0x0a) {
|
|
87
|
+
n++;
|
|
88
|
+
i++; // don't double-count "\n\n\n"
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return n;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function resumeUrl(gateway: string, runId: string, from: number): string {
|
|
95
|
+
return `https://workers-binding.ai/ai-gateway/gateways/${gateway}/run/${runId}/resume?from=${from}`;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function createResumableStream(options: ResumableStreamOptions): ReadableStream<Uint8Array> {
|
|
99
|
+
const { binding, gateway, runId } = options;
|
|
100
|
+
const maxReconnects = options.maxReconnects ?? 5;
|
|
101
|
+
const onExpired = options.onResumeExpired ?? "error";
|
|
102
|
+
|
|
103
|
+
let emittedEvents = options.fromEvent ?? 0; // absolute SSE event index reached
|
|
104
|
+
let pending: Uint8Array<ArrayBuffer> = new Uint8Array(new ArrayBuffer(0));
|
|
105
|
+
let reconnects = 0;
|
|
106
|
+
|
|
107
|
+
// Fetch `resume?from={emittedEvents}`; on a terminal outcome (expiry / error /
|
|
108
|
+
// network throw) it settles the controller and returns null.
|
|
109
|
+
async function fetchResume(
|
|
110
|
+
controller: ReadableStreamDefaultController<Uint8Array>,
|
|
111
|
+
): Promise<ReadableStream<Uint8Array> | null> {
|
|
112
|
+
let res: Response;
|
|
113
|
+
try {
|
|
114
|
+
res = await (binding as AiWithFetch).fetch(resumeUrl(gateway, runId, emittedEvents), {
|
|
115
|
+
method: "GET",
|
|
116
|
+
});
|
|
117
|
+
} catch (fetchErr) {
|
|
118
|
+
controller.error(
|
|
119
|
+
new GatewayDelegateError(
|
|
120
|
+
"dispatch",
|
|
121
|
+
`Resume request threw at event ${emittedEvents}.`,
|
|
122
|
+
fetchErr,
|
|
123
|
+
),
|
|
124
|
+
);
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (res.status === 404) {
|
|
129
|
+
if (onExpired === "accept-partial") {
|
|
130
|
+
controller.close();
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
controller.error(
|
|
134
|
+
new GatewayDelegateError(
|
|
135
|
+
"resume-expired",
|
|
136
|
+
`Resume buffer expired (404) at event ${emittedEvents}. The gateway buffer ` +
|
|
137
|
+
"TTL (~5.5 min) elapsed; fall back to continuation or regeneration.",
|
|
138
|
+
),
|
|
139
|
+
);
|
|
140
|
+
return null;
|
|
141
|
+
}
|
|
142
|
+
if (!res.ok || !res.body) {
|
|
143
|
+
controller.error(
|
|
144
|
+
new GatewayDelegateError(
|
|
145
|
+
"dispatch",
|
|
146
|
+
`Resume failed (${res.status}) at event ${emittedEvents}.`,
|
|
147
|
+
),
|
|
148
|
+
);
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
return res.body;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return new ReadableStream<Uint8Array>({
|
|
155
|
+
async start(controller) {
|
|
156
|
+
// In-stream wrap starts from the live body; cross-invocation re-attach
|
|
157
|
+
// (no `initial`) starts by resuming from `fromEvent`. An initial-attach
|
|
158
|
+
// failure is terminal — it is not charged against the reconnect budget.
|
|
159
|
+
let current: ReadableStream<Uint8Array>;
|
|
160
|
+
if (options.initial) {
|
|
161
|
+
current = options.initial;
|
|
162
|
+
} else {
|
|
163
|
+
const body = await fetchResume(controller);
|
|
164
|
+
if (!body) return;
|
|
165
|
+
current = body;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
for (;;) {
|
|
169
|
+
const reader = current.getReader();
|
|
170
|
+
try {
|
|
171
|
+
for (;;) {
|
|
172
|
+
const { done, value } = await reader.read();
|
|
173
|
+
if (done) {
|
|
174
|
+
if (pending.length > 0) {
|
|
175
|
+
controller.enqueue(pending);
|
|
176
|
+
pending = new Uint8Array(new ArrayBuffer(0));
|
|
177
|
+
}
|
|
178
|
+
controller.close();
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
if (!value || value.length === 0) continue;
|
|
182
|
+
|
|
183
|
+
pending = concat(pending, value);
|
|
184
|
+
const boundary = lastEventBoundary(pending);
|
|
185
|
+
if (boundary > 0) {
|
|
186
|
+
const complete = pending.slice(0, boundary);
|
|
187
|
+
controller.enqueue(complete);
|
|
188
|
+
emittedEvents += countEvents(complete);
|
|
189
|
+
options.onProgress?.(emittedEvents);
|
|
190
|
+
pending = pending.slice(boundary);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
} catch (err) {
|
|
194
|
+
try {
|
|
195
|
+
reader.releaseLock();
|
|
196
|
+
} catch {
|
|
197
|
+
// reader may already be released
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (reconnects >= maxReconnects) {
|
|
201
|
+
controller.error(
|
|
202
|
+
new GatewayDelegateError(
|
|
203
|
+
"resume-expired",
|
|
204
|
+
`Exceeded ${maxReconnects} reconnect attempts at event ${emittedEvents}.`,
|
|
205
|
+
err,
|
|
206
|
+
),
|
|
207
|
+
);
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Discard the unfinished partial — resume realigns on the boundary.
|
|
212
|
+
pending = new Uint8Array(new ArrayBuffer(0));
|
|
213
|
+
reconnects++;
|
|
214
|
+
options.onReconnect?.(emittedEvents, reconnects);
|
|
215
|
+
|
|
216
|
+
const body = await fetchResume(controller);
|
|
217
|
+
if (!body) return;
|
|
218
|
+
current = body;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
});
|
|
223
|
+
}
|
package/src/streaming.ts
CHANGED
|
@@ -6,6 +6,12 @@ import type {
|
|
|
6
6
|
import { generateId } from "ai";
|
|
7
7
|
import { mapWorkersAIFinishReason } from "./map-workersai-finish-reason";
|
|
8
8
|
import { mapWorkersAIUsage } from "./map-workersai-usage";
|
|
9
|
+
import {
|
|
10
|
+
createAISDKToolCallId,
|
|
11
|
+
getToolNames,
|
|
12
|
+
isForcedToolChoice,
|
|
13
|
+
parseLeakedToolCalls,
|
|
14
|
+
} from "./utils";
|
|
9
15
|
|
|
10
16
|
/**
|
|
11
17
|
* Prepend a stream-start event to an existing LanguageModelV3 stream.
|
|
@@ -63,6 +69,10 @@ function isNullFinalizationChunk(tc: Record<string, unknown>): boolean {
|
|
|
63
69
|
*/
|
|
64
70
|
export function getMappedStream(
|
|
65
71
|
response: Response | ReadableStream<Uint8Array>,
|
|
72
|
+
salvageContext?: {
|
|
73
|
+
tools: Array<{ function: { name?: string } }> | undefined;
|
|
74
|
+
toolChoice: unknown;
|
|
75
|
+
},
|
|
66
76
|
): ReadableStream<LanguageModelV3StreamPart> {
|
|
67
77
|
const rawStream =
|
|
68
78
|
response instanceof ReadableStream
|
|
@@ -73,6 +83,18 @@ export function getMappedStream(
|
|
|
73
83
|
throw new Error("No readable stream available for SSE parsing.");
|
|
74
84
|
}
|
|
75
85
|
|
|
86
|
+
// gpt-oss harmony quirk: a forced tool call can be streamed as `content`
|
|
87
|
+
// text deltas instead of structured tool calls. When a tool was forced,
|
|
88
|
+
// buffer the text content (rather than emitting it incrementally) so we can
|
|
89
|
+
// reinterpret it as a tool call at flush time. Text is unexpected in forced
|
|
90
|
+
// mode anyway, so buffering it does not regress a useful stream.
|
|
91
|
+
// See https://github.com/cloudflare/ai/issues/560.
|
|
92
|
+
const knownToolNames = getToolNames(salvageContext?.tools);
|
|
93
|
+
const bufferContentForSalvage =
|
|
94
|
+
isForcedToolChoice(salvageContext?.toolChoice) && knownToolNames.size > 0;
|
|
95
|
+
let contentBuffer = "";
|
|
96
|
+
let anyToolCallStarted = false;
|
|
97
|
+
|
|
76
98
|
// State shared across the transform
|
|
77
99
|
let usage: LanguageModelV3Usage = {
|
|
78
100
|
outputTokens: { total: 0, text: undefined, reasoning: undefined },
|
|
@@ -145,20 +167,24 @@ export function getMappedStream(
|
|
|
145
167
|
if (nativeResponse != null && nativeResponse !== "") {
|
|
146
168
|
const responseText = String(nativeResponse);
|
|
147
169
|
if (responseText.length > 0) {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
170
|
+
if (bufferContentForSalvage) {
|
|
171
|
+
contentBuffer += responseText;
|
|
172
|
+
} else {
|
|
173
|
+
// Close active reasoning block before text starts
|
|
174
|
+
if (reasoningId) {
|
|
175
|
+
controller.enqueue({ type: "reasoning-end", id: reasoningId });
|
|
176
|
+
reasoningId = null;
|
|
177
|
+
}
|
|
178
|
+
if (!textId) {
|
|
179
|
+
textId = generateId();
|
|
180
|
+
controller.enqueue({ type: "text-start", id: textId });
|
|
181
|
+
}
|
|
182
|
+
controller.enqueue({
|
|
183
|
+
type: "text-delta",
|
|
184
|
+
id: textId,
|
|
185
|
+
delta: responseText,
|
|
186
|
+
});
|
|
156
187
|
}
|
|
157
|
-
controller.enqueue({
|
|
158
|
-
type: "text-delta",
|
|
159
|
-
id: textId,
|
|
160
|
-
delta: responseText,
|
|
161
|
-
});
|
|
162
188
|
}
|
|
163
189
|
}
|
|
164
190
|
|
|
@@ -196,20 +222,24 @@ export function getMappedStream(
|
|
|
196
222
|
|
|
197
223
|
const textDelta = delta.content as string | undefined;
|
|
198
224
|
if (textDelta && textDelta.length > 0) {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
225
|
+
if (bufferContentForSalvage) {
|
|
226
|
+
contentBuffer += textDelta;
|
|
227
|
+
} else {
|
|
228
|
+
// Close active reasoning block before text starts
|
|
229
|
+
if (reasoningId) {
|
|
230
|
+
controller.enqueue({ type: "reasoning-end", id: reasoningId });
|
|
231
|
+
reasoningId = null;
|
|
232
|
+
}
|
|
233
|
+
if (!textId) {
|
|
234
|
+
textId = generateId();
|
|
235
|
+
controller.enqueue({ type: "text-start", id: textId });
|
|
236
|
+
}
|
|
237
|
+
controller.enqueue({
|
|
238
|
+
type: "text-delta",
|
|
239
|
+
id: textId,
|
|
240
|
+
delta: textDelta,
|
|
241
|
+
});
|
|
207
242
|
}
|
|
208
|
-
controller.enqueue({
|
|
209
|
-
type: "text-delta",
|
|
210
|
-
id: textId,
|
|
211
|
-
delta: textDelta,
|
|
212
|
-
});
|
|
213
243
|
}
|
|
214
244
|
|
|
215
245
|
const deltaToolCalls = delta.tool_calls as
|
|
@@ -233,17 +263,59 @@ export function getMappedStream(
|
|
|
233
263
|
closeToolCall(idx, controller);
|
|
234
264
|
}
|
|
235
265
|
|
|
236
|
-
// Close open
|
|
266
|
+
// Close open reasoning block before any salvaged tool calls.
|
|
237
267
|
if (reasoningId) {
|
|
238
268
|
controller.enqueue({ type: "reasoning-end", id: reasoningId });
|
|
239
269
|
}
|
|
270
|
+
|
|
271
|
+
// Salvage a forced tool call that streamed as buffered text.
|
|
272
|
+
let salvagedToolCalls = false;
|
|
273
|
+
if (bufferContentForSalvage && !anyToolCallStarted && contentBuffer.trim()) {
|
|
274
|
+
const salvaged = parseLeakedToolCalls(contentBuffer, knownToolNames);
|
|
275
|
+
if (salvaged.length > 0) {
|
|
276
|
+
for (const call of salvaged) {
|
|
277
|
+
controller.enqueue({
|
|
278
|
+
type: "tool-input-start",
|
|
279
|
+
id: call.toolCallId,
|
|
280
|
+
toolName: call.toolName,
|
|
281
|
+
});
|
|
282
|
+
controller.enqueue({
|
|
283
|
+
type: "tool-input-delta",
|
|
284
|
+
id: call.toolCallId,
|
|
285
|
+
delta: call.input,
|
|
286
|
+
});
|
|
287
|
+
controller.enqueue({ type: "tool-input-end", id: call.toolCallId });
|
|
288
|
+
controller.enqueue(call);
|
|
289
|
+
}
|
|
290
|
+
salvagedToolCalls = true;
|
|
291
|
+
// Stream warnings are fixed at stream-start, so surface the
|
|
292
|
+
// reinterpretation here for observability instead.
|
|
293
|
+
console.warn(
|
|
294
|
+
`[workers-ai-provider] Recovered ${salvaged.length} forced tool call(s) that the model streamed as text content instead of structured tool calls.`,
|
|
295
|
+
);
|
|
296
|
+
} else {
|
|
297
|
+
// Not a recoverable tool call — emit the buffered text as-is.
|
|
298
|
+
const id = generateId();
|
|
299
|
+
controller.enqueue({ type: "text-start", id });
|
|
300
|
+
controller.enqueue({ type: "text-delta", id, delta: contentBuffer });
|
|
301
|
+
controller.enqueue({ type: "text-end", id });
|
|
302
|
+
}
|
|
303
|
+
} else if (bufferContentForSalvage && contentBuffer.trim()) {
|
|
304
|
+
// Real tool calls were present alongside buffered text — emit text.
|
|
305
|
+
const id = generateId();
|
|
306
|
+
controller.enqueue({ type: "text-start", id });
|
|
307
|
+
controller.enqueue({ type: "text-delta", id, delta: contentBuffer });
|
|
308
|
+
controller.enqueue({ type: "text-end", id });
|
|
309
|
+
}
|
|
310
|
+
|
|
240
311
|
if (textId) {
|
|
241
312
|
controller.enqueue({ type: "text-end", id: textId });
|
|
242
313
|
}
|
|
243
314
|
|
|
244
315
|
// Detect premature termination
|
|
245
|
-
const effectiveFinishReason =
|
|
246
|
-
|
|
316
|
+
const effectiveFinishReason = salvagedToolCalls
|
|
317
|
+
? ({ unified: "tool-calls", raw: "stop" } as LanguageModelV3FinishReason)
|
|
318
|
+
: !receivedDone && receivedAnyData && !finishReason
|
|
247
319
|
? ({
|
|
248
320
|
unified: "error",
|
|
249
321
|
raw: "stream-truncated",
|
|
@@ -317,10 +389,11 @@ export function getMappedStream(
|
|
|
317
389
|
closeToolCall(lastActiveToolIndex, controller);
|
|
318
390
|
}
|
|
319
391
|
|
|
320
|
-
const id = tcId
|
|
392
|
+
const id = createAISDKToolCallId(tcId);
|
|
321
393
|
const toolName = tcName || "";
|
|
322
394
|
activeToolCalls.set(tcIndex, { id, toolName, args: "" });
|
|
323
395
|
lastActiveToolIndex = tcIndex;
|
|
396
|
+
anyToolCallStarted = true;
|
|
324
397
|
|
|
325
398
|
controller.enqueue({
|
|
326
399
|
type: "tool-input-start",
|