workers-ai-provider 3.1.13 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/utils.ts CHANGED
@@ -266,6 +266,57 @@ export async function createRunBinary(
266
266
  return (data.result ?? data) as Record<string, unknown>;
267
267
  }
268
268
 
269
+ // ---------------------------------------------------------------------------
270
+ // Structured output (JSON mode)
271
+ // ---------------------------------------------------------------------------
272
+
273
+ /**
274
+ * Build the `response_format.json_schema` payload for native Workers AI models.
275
+ *
276
+ * Native Workers AI (`@cf/...`) expects `json_schema` to be a **bare** JSON
277
+ * Schema, NOT OpenAI's `{ name, schema, strict }` envelope. That envelope is
278
+ * only required by partner-model routes (e.g. `openai/...`), which never reach
279
+ * this code — they go through the gateway delegate and the real `@ai-sdk/*`
280
+ * providers, which build the envelope themselves. Wrapping the schema here would
281
+ * break native models, so we must keep the bare shape.
282
+ *
283
+ * The AI SDK's structured-output `name` / `description` (from
284
+ * `Output.object({ schema, name, description })` / `generateObject`) would
285
+ * otherwise be silently dropped on this path. We preserve them as the standard
286
+ * JSON Schema `title` (from `name`) and `description` keywords, which keeps the
287
+ * payload a valid bare schema while still passing the LLM guidance through.
288
+ *
289
+ * Existing schema-level `title` / `description` are never overwritten, empty
290
+ * strings are ignored, and the input schema object is never mutated.
291
+ *
292
+ * See https://github.com/cloudflare/ai/issues/559.
293
+ */
294
+ export function buildJsonSchemaPayload(
295
+ schema: unknown,
296
+ name?: string,
297
+ description?: string,
298
+ ): unknown {
299
+ // Only objects can carry JSON Schema keywords. Anything else (incl.
300
+ // `undefined` when no schema was supplied) passes through untouched.
301
+ if (typeof schema !== "object" || schema === null || Array.isArray(schema)) {
302
+ return schema;
303
+ }
304
+
305
+ const record = schema as Record<string, unknown>;
306
+ const addTitle = !!name && record.title === undefined;
307
+ const addDescription = !!description && record.description === undefined;
308
+
309
+ if (!addTitle && !addDescription) {
310
+ return schema;
311
+ }
312
+
313
+ return {
314
+ ...record,
315
+ ...(addTitle ? { title: name } : {}),
316
+ ...(addDescription ? { description } : {}),
317
+ };
318
+ }
319
+
269
320
  // ---------------------------------------------------------------------------
270
321
  // Tool preparation
271
322
  // ---------------------------------------------------------------------------
@@ -301,12 +352,17 @@ export function prepareToolsAndToolChoice(
301
352
  case "required":
302
353
  return { tool_choice: "required", tools: mappedTools };
303
354
 
304
- // Workers AI does not support tool mode directly,
305
- // so we filter the tools and force the tool choice through 'required'
355
+ // Force a specific tool via the OpenAI-style named-function form.
356
+ // Workers AI enforces this server-side, unlike "required" which is
357
+ // advisory and "fails open" on long contexts / reasoning models (the
358
+ // model can answer in prose instead of calling the tool). The full tool
359
+ // list is kept (not filtered to the single function) to match OpenAI
360
+ // semantics and preserve tool-result context fidelity.
361
+ // See https://github.com/cloudflare/ai/issues/560.
306
362
  case "tool":
307
363
  return {
308
- tool_choice: "required",
309
- tools: mappedTools.filter((tool) => tool.function.name === toolChoice.toolName),
364
+ tool_choice: { type: "function", function: { name: toolChoice.toolName } },
365
+ tools: mappedTools,
310
366
  };
311
367
  default: {
312
368
  const exhaustiveCheck = type satisfies never;
@@ -323,6 +379,23 @@ export function prepareToolsAndToolChoice(
323
379
  // Tool call processing
324
380
  // ---------------------------------------------------------------------------
325
381
 
382
+ const TOOL_CALL_ID_MARKER = "::cf-wai-tool-call::";
383
+
384
+ export function createAISDKToolCallId(toolCallId: string | null | undefined): string {
385
+ const originalId = toolCallId || generateId();
386
+ return `${originalId}${TOOL_CALL_ID_MARKER}${generateId()}`;
387
+ }
388
+
389
+ export function toWorkersAIToolCallId(toolCallId: string): string {
390
+ const markerIndex = toolCallId.lastIndexOf(TOOL_CALL_ID_MARKER);
391
+ if (markerIndex === -1) return toolCallId;
392
+
393
+ const suffixIndex = markerIndex + TOOL_CALL_ID_MARKER.length;
394
+ if (suffixIndex >= toolCallId.length) return toolCallId;
395
+
396
+ return toolCallId.slice(0, markerIndex);
397
+ }
398
+
326
399
  /** Workers AI flat tool call format (non-streaming, native) */
327
400
  interface FlatToolCall {
328
401
  name: string;
@@ -406,7 +479,7 @@ function processToolCall(toolCall: FlatToolCall | OpenAIToolCall): LanguageModel
406
479
  typeof fn.arguments === "string"
407
480
  ? fn.arguments
408
481
  : JSON.stringify(fn.arguments || {}),
409
- toolCallId: toolCall.id || generateId(),
482
+ toolCallId: createAISDKToolCallId(toolCall.id),
410
483
  type: "tool-call",
411
484
  toolName: fn.name,
412
485
  };
@@ -419,7 +492,7 @@ function processToolCall(toolCall: FlatToolCall | OpenAIToolCall): LanguageModel
419
492
  typeof flat.arguments === "string"
420
493
  ? flat.arguments
421
494
  : JSON.stringify(flat.arguments || {}),
422
- toolCallId: flat.id || generateId(),
495
+ toolCallId: createAISDKToolCallId(flat.id),
423
496
  type: "tool-call",
424
497
  toolName: flat.name,
425
498
  };
@@ -447,6 +520,133 @@ export function processPartialToolCalls(partialToolCalls: PartialToolCall[]) {
447
520
  return processToolCalls({ tool_calls: mergedToolCalls });
448
521
  }
449
522
 
523
+ // ---------------------------------------------------------------------------
524
+ // Forced tool-call salvage (gpt-oss harmony quirk)
525
+ // ---------------------------------------------------------------------------
526
+
527
+ /**
528
+ * Was a specific tool forced for this request?
529
+ *
530
+ * True for both `tool_choice: "required"` and the named-function form
531
+ * `{ type: "function", function: { name } }`.
532
+ */
533
+ export function isForcedToolChoice(toolChoice: unknown): boolean {
534
+ if (toolChoice === "required") return true;
535
+ return (
536
+ typeof toolChoice === "object" &&
537
+ toolChoice !== null &&
538
+ (toolChoice as { type?: unknown }).type === "function"
539
+ );
540
+ }
541
+
542
+ /**
543
+ * Parse tool calls that a model leaked as JSON text instead of structured
544
+ * `tool_calls`. Shared by the non-streaming salvage and the streaming buffer.
545
+ *
546
+ * Only JSON objects whose `name` is one of `knownToolNames` are recovered;
547
+ * everything else (prose, harmony channel/role leaks like `{"name":"analysis"}`,
548
+ * hallucinated names) is ignored to avoid fabricating bogus calls.
549
+ */
550
+ export function parseLeakedToolCalls(
551
+ text: string,
552
+ knownToolNames: Set<string>,
553
+ ): LanguageModelV3ToolCall[] {
554
+ let parsed: unknown;
555
+ try {
556
+ parsed = JSON.parse(text.trim());
557
+ } catch {
558
+ return [];
559
+ }
560
+
561
+ const candidates = Array.isArray(parsed) ? parsed : [parsed];
562
+ const salvaged: LanguageModelV3ToolCall[] = [];
563
+
564
+ for (const candidate of candidates) {
565
+ if (typeof candidate !== "object" || candidate === null) continue;
566
+ const obj = candidate as Record<string, unknown>;
567
+ const name = obj.name;
568
+ if (typeof name !== "string" || !knownToolNames.has(name)) continue;
569
+
570
+ // Arguments may be wrapped (`arguments`/`parameters`) or flattened as
571
+ // siblings of `name`.
572
+ let args: unknown;
573
+ if ("arguments" in obj) {
574
+ args = obj.arguments;
575
+ } else if ("parameters" in obj) {
576
+ args = obj.parameters;
577
+ } else {
578
+ const { name: _name, ...rest } = obj;
579
+ args = rest;
580
+ }
581
+
582
+ salvaged.push({
583
+ input: typeof args === "string" ? args : JSON.stringify(args ?? {}),
584
+ toolCallId: createAISDKToolCallId(undefined),
585
+ type: "tool-call",
586
+ toolName: name,
587
+ });
588
+ }
589
+
590
+ return salvaged;
591
+ }
592
+
593
+ /** Collect the requested tool names from mapped tools. */
594
+ export function getToolNames(
595
+ tools: Array<{ function: { name?: string } }> | undefined,
596
+ ): Set<string> {
597
+ return new Set(
598
+ (tools ?? [])
599
+ .map((tool) => tool.function?.name)
600
+ .filter((name): name is string => typeof name === "string"),
601
+ );
602
+ }
603
+
604
+ /**
605
+ * Salvage a tool call that a model leaked into text content instead of the
606
+ * structured `tool_calls` field.
607
+ *
608
+ * Workers AI's gpt-oss models (harmony format) sometimes emit a forced tool
609
+ * call as raw JSON in `message.content` with an empty `tool_calls` array and
610
+ * `finish_reason: "stop"` — typically when the forced tool is a poor fit for
611
+ * the conversation. The content looks like one of:
612
+ *
613
+ * {"name":"read_skill_resource","path":"feedback.txt"} (flat args)
614
+ * {"name":"calc","arguments":{"a":1}} (wrapped args)
615
+ * [{"name":"calc","parameters":{"a":1}}] (array form)
616
+ *
617
+ * This reinterprets that text as a structured tool call. It is intentionally
618
+ * narrow to avoid false positives:
619
+ * - only runs when a tool was *forced* (required / named-function), so a
620
+ * tool call was explicitly demanded by the caller;
621
+ * - only runs when there are no real structured tool calls to override;
622
+ * - only matches JSON objects whose `name` is one of the requested tools.
623
+ *
624
+ * Returns the salvaged tool calls, or `null` when nothing was salvaged.
625
+ *
626
+ * See https://github.com/cloudflare/ai/issues/560.
627
+ */
628
+ export function salvageToolCallsFromText(
629
+ output: Record<string, unknown>,
630
+ context: {
631
+ tools: Array<{ function: { name?: string } }> | undefined;
632
+ toolChoice: unknown;
633
+ },
634
+ ): LanguageModelV3ToolCall[] | null {
635
+ if (!isForcedToolChoice(context.toolChoice)) return null;
636
+
637
+ // Never override real tool calls.
638
+ if (processToolCalls(output).length > 0) return null;
639
+
640
+ const knownToolNames = getToolNames(context.tools);
641
+ if (knownToolNames.size === 0) return null;
642
+
643
+ const text = processText(output);
644
+ if (!text) return null;
645
+
646
+ const salvaged = parseLeakedToolCalls(text, knownToolNames);
647
+ return salvaged.length > 0 ? salvaged : null;
648
+ }
649
+
450
650
  // ---------------------------------------------------------------------------
451
651
  // Text extraction
452
652
  // ---------------------------------------------------------------------------
@@ -5,10 +5,12 @@ import { mapWorkersAIFinishReason } from "./map-workersai-finish-reason";
5
5
  import { mapWorkersAIUsage } from "./map-workersai-usage";
6
6
  import { getMappedStream, prependStreamStart } from "./streaming";
7
7
  import {
8
+ buildJsonSchemaPayload,
8
9
  normalizeMessagesForBinding,
9
10
  prepareToolsAndToolChoice,
10
11
  processText,
11
12
  processToolCalls,
13
+ salvageToolCallsFromText,
12
14
  } from "./utils";
13
15
  import type { WorkersAIChatSettings } from "./workersai-chat-settings";
14
16
  import type { TextGenerationModels } from "./workersai-models";
@@ -93,13 +95,23 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
93
95
  }
94
96
 
95
97
  case "json": {
98
+ // Native Workers AI expects a BARE JSON Schema under `json_schema`
99
+ // (not OpenAI's `{ name, schema, strict }` envelope — partner models
100
+ // that need that go through the gateway delegate, not this path). We
101
+ // fold the AI SDK's `name`/`description` into the schema as `title`/
102
+ // `description` so they aren't lost. See
103
+ // https://github.com/cloudflare/ai/issues/559.
104
+ const json = responseFormat?.type === "json" ? responseFormat : undefined;
96
105
  return {
97
106
  args: {
98
107
  ...baseArgs,
99
108
  response_format: {
100
109
  type: "json_schema",
101
- json_schema:
102
- responseFormat?.type === "json" ? responseFormat.schema : undefined,
110
+ json_schema: buildJsonSchemaPayload(
111
+ json?.schema,
112
+ json?.name,
113
+ json?.description,
114
+ ),
103
115
  },
104
116
  tools: undefined,
105
117
  tool_choice: undefined,
@@ -203,6 +215,57 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
203
215
  };
204
216
  }
205
217
 
218
+ /**
219
+ * Extract reasoning, text, and tool calls from a non-streaming response.
220
+ *
221
+ * Shared by `doGenerate` and `doStream`'s graceful-degradation branch (the
222
+ * path gpt-oss falls through, since it doesn't support `/ai/run/` streaming
223
+ * and is retried non-streaming). When a forced tool call was leaked into
224
+ * text content (gpt-oss harmony quirk), it is salvaged into a structured
225
+ * tool call and the leaked JSON text is suppressed. A warning is appended in
226
+ * place so callers can observe the reinterpretation.
227
+ */
228
+ private extractContent(
229
+ outputRecord: Record<string, unknown>,
230
+ args: ReturnType<typeof this.getArgs>["args"],
231
+ warnings: SharedV3Warning[],
232
+ ) {
233
+ const choices = outputRecord.choices as
234
+ | Array<{ message?: { reasoning_content?: string; reasoning?: string } }>
235
+ | undefined;
236
+ const reasoningContent =
237
+ choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
238
+
239
+ const toolCalls = processToolCalls(outputRecord);
240
+ const salvaged =
241
+ toolCalls.length === 0
242
+ ? salvageToolCallsFromText(outputRecord, {
243
+ tools: args.tools,
244
+ toolChoice: args.tool_choice,
245
+ })
246
+ : null;
247
+
248
+ if (salvaged) {
249
+ warnings.push({
250
+ type: "other",
251
+ message: `Recovered ${salvaged.length} forced tool call(s) that the model emitted as text content instead of structured tool calls (model: ${this.modelId}).`,
252
+ });
253
+ }
254
+
255
+ return {
256
+ reasoningContent,
257
+ // Suppress the leaked JSON text when we salvaged a tool call from it.
258
+ text: salvaged ? "" : (processText(outputRecord) ?? ""),
259
+ toolCalls: salvaged ?? toolCalls,
260
+ // When salvaged, the upstream finish_reason is "stop"; report
261
+ // "tool-calls" so the response is indistinguishable from a native
262
+ // tool call and the agentic loop continues correctly.
263
+ finishReason: salvaged
264
+ ? ({ unified: "tool-calls", raw: "stop" } as const)
265
+ : mapWorkersAIFinishReason(outputRecord),
266
+ };
267
+ }
268
+
206
269
  async doGenerate(
207
270
  options: Parameters<LanguageModelV3["doGenerate"]>[0],
208
271
  ): Promise<Awaited<ReturnType<LanguageModelV3["doGenerate"]>>> {
@@ -230,25 +293,20 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
230
293
  }
231
294
 
232
295
  const outputRecord = output as Record<string, unknown>;
233
- const choices = outputRecord.choices as
234
- | Array<{
235
- message?: { reasoning_content?: string; reasoning?: string };
236
- }>
237
- | undefined;
238
- const reasoningContent =
239
- choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
296
+ const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(
297
+ outputRecord,
298
+ args,
299
+ warnings,
300
+ );
240
301
 
241
302
  return {
242
- finishReason: mapWorkersAIFinishReason(outputRecord),
303
+ finishReason,
243
304
  content: [
244
305
  ...(reasoningContent
245
306
  ? [{ type: "reasoning" as const, text: reasoningContent }]
246
307
  : []),
247
- {
248
- type: "text",
249
- text: processText(outputRecord) ?? "",
250
- },
251
- ...processToolCalls(outputRecord),
308
+ { type: "text" as const, text },
309
+ ...toolCalls,
252
310
  ],
253
311
  usage: mapWorkersAIUsage(output as Record<string, unknown>),
254
312
  warnings,
@@ -279,20 +337,24 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
279
337
  // If the binding returned a stream, pipe it through the SSE mapper
280
338
  if (response instanceof ReadableStream) {
281
339
  return {
282
- stream: prependStreamStart(getMappedStream(response), warnings),
340
+ stream: prependStreamStart(
341
+ getMappedStream(response, {
342
+ tools: args.tools,
343
+ toolChoice: args.tool_choice,
344
+ }),
345
+ warnings,
346
+ ),
283
347
  };
284
348
  }
285
349
 
286
350
  // Graceful degradation: some models return a non-streaming response even
287
351
  // when stream:true is requested. Wrap the complete response as a stream.
288
352
  const outputRecord = response as Record<string, unknown>;
289
- const choices = outputRecord.choices as
290
- | Array<{
291
- message?: { reasoning_content?: string; reasoning?: string };
292
- }>
293
- | undefined;
294
- const reasoningContent =
295
- choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
353
+ const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(
354
+ outputRecord,
355
+ args,
356
+ warnings,
357
+ );
296
358
 
297
359
  let textId: string | null = null;
298
360
  let reasoningId: string | null = null;
@@ -316,7 +378,6 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
316
378
  controller.enqueue({ type: "reasoning-end", id: reasoningId });
317
379
  }
318
380
 
319
- const text = processText(outputRecord);
320
381
  if (text) {
321
382
  textId = generateId();
322
383
  controller.enqueue({ type: "text-start", id: textId });
@@ -324,13 +385,13 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
324
385
  controller.enqueue({ type: "text-end", id: textId });
325
386
  }
326
387
 
327
- for (const toolCall of processToolCalls(outputRecord)) {
388
+ for (const toolCall of toolCalls) {
328
389
  controller.enqueue(toolCall);
329
390
  }
330
391
 
331
392
  controller.enqueue({
332
393
  type: "finish",
333
- finishReason: mapWorkersAIFinishReason(outputRecord),
394
+ finishReason,
334
395
  usage: mapWorkersAIUsage(response as Record<string, unknown>),
335
396
  });
336
397
  controller.close();
@@ -18,7 +18,7 @@ export type WorkersAIChatSettings = {
18
18
 
19
19
  /**
20
20
  * Controls the reasoning budget for reasoning-capable Workers AI models
21
- * (e.g. `@cf/zai-org/glm-4.7-flash`, `@cf/moonshotai/kimi-k2.5`,
21
+ * (e.g. `@cf/zai-org/glm-4.7-flash`, `@cf/moonshotai/kimi-k2.7-code`,
22
22
  * `@cf/openai/gpt-oss-120b`).
23
23
  *
24
24
  * `null` is a valid value and disables reasoning for models that support it.
@@ -1,11 +1,19 @@
1
+ /**
2
+ * The known (typed) BaseAiTextGeneration model ids — the literal union without
3
+ * the `(string & {})` escape hatch. Used to drive editor autocomplete while
4
+ * still capturing the exact literal a caller passed (see `WorkersAI`).
5
+ */
6
+ export type KnownTextGenerationModels = Exclude<
7
+ value2key<AiModels, BaseAiTextGeneration>,
8
+ value2key<AiModels, BaseAiTextToImage>
9
+ >;
10
+
1
11
  /**
2
12
  * The names of the BaseAiTextGeneration models.
3
13
  *
4
14
  * Accepts any string at runtime, but provides autocomplete for known models.
5
15
  */
6
- export type TextGenerationModels =
7
- | Exclude<value2key<AiModels, BaseAiTextGeneration>, value2key<AiModels, BaseAiTextToImage>>
8
- | (string & {});
16
+ export type TextGenerationModels = KnownTextGenerationModels | (string & {});
9
17
 
10
18
  /*
11
19
  * The names of the BaseAiTextToImage models.