npm - workers-ai-provider - Versions diffs - 3.1.13 → 3.2.0 - Mend

workers-ai-provider 3.1.13 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +183 -31
package/dist/anthropic.d.mts +14 -0
package/dist/anthropic.mjs +21 -0
package/dist/anthropic.mjs.map +1 -0
package/dist/gateway-delegate-BfaUTwDZ.d.mts +385 -0
package/dist/gateway-provider-1USFWm7c.mjs +583 -0
package/dist/gateway-provider-1USFWm7c.mjs.map +1 -0
package/dist/gateway-provider.d.mts +80 -0
package/dist/gateway-provider.mjs +2 -0
package/dist/google.d.mts +14 -0
package/dist/google.mjs +21 -0
package/dist/google.mjs.map +1 -0
package/dist/index.d.mts +64 -7
package/dist/index.mjs +967 -327
package/dist/index.mjs.map +1 -1
package/dist/openai.d.mts +20 -0
package/dist/openai.mjs +27 -0
package/dist/openai.mjs.map +1 -0
package/package.json +47 -6
package/src/anthropic.ts +17 -0
package/src/client-fallback.ts +70 -0
package/src/convert-to-workersai-chat-messages.ts +33 -7
package/src/errors.ts +216 -0
package/src/gateway-delegate.ts +696 -0
package/src/gateway-provider.ts +167 -0
package/src/gateway-providers.ts +457 -0
package/src/google.ts +19 -0
package/src/index.ts +180 -9
package/src/openai.ts +25 -0
package/src/resumable-stream.ts +223 -0
package/src/streaming.ts +103 -30
package/src/utils.ts +206 -6
package/src/workersai-chat-language-model.ts +87 -26
package/src/workersai-chat-settings.ts +1 -1
package/src/workersai-models.ts +11 -3

package/src/utils.ts CHANGED Viewed

@@ -266,6 +266,57 @@ export async function createRunBinary(
 	return (data.result ?? data) as Record<string, unknown>;
 }
+// ---------------------------------------------------------------------------
+// Structured output (JSON mode)
+// ---------------------------------------------------------------------------
+/**
+ * Build the `response_format.json_schema` payload for native Workers AI models.
+ *
+ * Native Workers AI (`@cf/...`) expects `json_schema` to be a **bare** JSON
+ * Schema, NOT OpenAI's `{ name, schema, strict }` envelope. That envelope is
+ * only required by partner-model routes (e.g. `openai/...`), which never reach
+ * this code — they go through the gateway delegate and the real `@ai-sdk/*`
+ * providers, which build the envelope themselves. Wrapping the schema here would
+ * break native models, so we must keep the bare shape.
+ *
+ * The AI SDK's structured-output `name` / `description` (from
+ * `Output.object({ schema, name, description })` / `generateObject`) would
+ * otherwise be silently dropped on this path. We preserve them as the standard
+ * JSON Schema `title` (from `name`) and `description` keywords, which keeps the
+ * payload a valid bare schema while still passing the LLM guidance through.
+ *
+ * Existing schema-level `title` / `description` are never overwritten, empty
+ * strings are ignored, and the input schema object is never mutated.
+ *
+ * See https://github.com/cloudflare/ai/issues/559.
+ */
+export function buildJsonSchemaPayload(
+	schema: unknown,
+	name?: string,
+	description?: string,
+): unknown {
+	// Only objects can carry JSON Schema keywords. Anything else (incl.
+	// `undefined` when no schema was supplied) passes through untouched.
+	if (typeof schema !== "object" || schema === null || Array.isArray(schema)) {
+		return schema;
+	}
+	const record = schema as Record<string, unknown>;
+	const addTitle = !!name && record.title === undefined;
+	const addDescription = !!description && record.description === undefined;
+	if (!addTitle && !addDescription) {
+		return schema;
+	}
+	return {
+		...record,
+		...(addTitle ? { title: name } : {}),
+		...(addDescription ? { description } : {}),
+	};
+}
 // ---------------------------------------------------------------------------
 // Tool preparation
 // ---------------------------------------------------------------------------
@@ -301,12 +352,17 @@ export function prepareToolsAndToolChoice(
 		case "required":
 			return { tool_choice: "required", tools: mappedTools };
-		// Workers AI does not support tool mode directly,
-		// so we filter the tools and force the tool choice through 'required'
+		// Force a specific tool via the OpenAI-style named-function form.
+		// Workers AI enforces this server-side, unlike "required" which is
+		// advisory and "fails open" on long contexts / reasoning models (the
+		// model can answer in prose instead of calling the tool). The full tool
+		// list is kept (not filtered to the single function) to match OpenAI
+		// semantics and preserve tool-result context fidelity.
+		// See https://github.com/cloudflare/ai/issues/560.
 		case "tool":
 			return {
-				tool_choice: "required",
-				tools: mappedTools.filter((tool) => tool.function.name === toolChoice.toolName),
+				tool_choice: { type: "function", function: { name: toolChoice.toolName } },
+				tools: mappedTools,
 			};
 		default: {
 			const exhaustiveCheck = type satisfies never;
@@ -323,6 +379,23 @@ export function prepareToolsAndToolChoice(
 // Tool call processing
 // ---------------------------------------------------------------------------
+const TOOL_CALL_ID_MARKER = "::cf-wai-tool-call::";
+export function createAISDKToolCallId(toolCallId: string | null | undefined): string {
+	const originalId = toolCallId || generateId();
+	return `${originalId}${TOOL_CALL_ID_MARKER}${generateId()}`;
+}
+export function toWorkersAIToolCallId(toolCallId: string): string {
+	const markerIndex = toolCallId.lastIndexOf(TOOL_CALL_ID_MARKER);
+	if (markerIndex === -1) return toolCallId;
+	const suffixIndex = markerIndex + TOOL_CALL_ID_MARKER.length;
+	if (suffixIndex >= toolCallId.length) return toolCallId;
+	return toolCallId.slice(0, markerIndex);
+}
 /** Workers AI flat tool call format (non-streaming, native) */
 interface FlatToolCall {
 	name: string;
@@ -406,7 +479,7 @@ function processToolCall(toolCall: FlatToolCall | OpenAIToolCall): LanguageModel
 				typeof fn.arguments === "string"
 					? fn.arguments
 					: JSON.stringify(fn.arguments || {}),
-			toolCallId: toolCall.id || generateId(),
+			toolCallId: createAISDKToolCallId(toolCall.id),
 			type: "tool-call",
 			toolName: fn.name,
 		};
@@ -419,7 +492,7 @@ function processToolCall(toolCall: FlatToolCall | OpenAIToolCall): LanguageModel
 			typeof flat.arguments === "string"
 				? flat.arguments
 				: JSON.stringify(flat.arguments || {}),
-		toolCallId: flat.id || generateId(),
+		toolCallId: createAISDKToolCallId(flat.id),
 		type: "tool-call",
 		toolName: flat.name,
 	};
@@ -447,6 +520,133 @@ export function processPartialToolCalls(partialToolCalls: PartialToolCall[]) {
 	return processToolCalls({ tool_calls: mergedToolCalls });
 }
+// ---------------------------------------------------------------------------
+// Forced tool-call salvage (gpt-oss harmony quirk)
+// ---------------------------------------------------------------------------
+/**
+ * Was a specific tool forced for this request?
+ *
+ * True for both `tool_choice: "required"` and the named-function form
+ * `{ type: "function", function: { name } }`.
+ */
+export function isForcedToolChoice(toolChoice: unknown): boolean {
+	if (toolChoice === "required") return true;
+	return (
+		typeof toolChoice === "object" &&
+		toolChoice !== null &&
+		(toolChoice as { type?: unknown }).type === "function"
+	);
+}
+/**
+ * Parse tool calls that a model leaked as JSON text instead of structured
+ * `tool_calls`. Shared by the non-streaming salvage and the streaming buffer.
+ *
+ * Only JSON objects whose `name` is one of `knownToolNames` are recovered;
+ * everything else (prose, harmony channel/role leaks like `{"name":"analysis"}`,
+ * hallucinated names) is ignored to avoid fabricating bogus calls.
+ */
+export function parseLeakedToolCalls(
+	text: string,
+	knownToolNames: Set<string>,
+): LanguageModelV3ToolCall[] {
+	let parsed: unknown;
+	try {
+		parsed = JSON.parse(text.trim());
+	} catch {
+		return [];
+	}
+	const candidates = Array.isArray(parsed) ? parsed : [parsed];
+	const salvaged: LanguageModelV3ToolCall[] = [];
+	for (const candidate of candidates) {
+		if (typeof candidate !== "object" || candidate === null) continue;
+		const obj = candidate as Record<string, unknown>;
+		const name = obj.name;
+		if (typeof name !== "string" || !knownToolNames.has(name)) continue;
+		// Arguments may be wrapped (`arguments`/`parameters`) or flattened as
+		// siblings of `name`.
+		let args: unknown;
+		if ("arguments" in obj) {
+			args = obj.arguments;
+		} else if ("parameters" in obj) {
+			args = obj.parameters;
+		} else {
+			const { name: _name, ...rest } = obj;
+			args = rest;
+		}
+		salvaged.push({
+			input: typeof args === "string" ? args : JSON.stringify(args ?? {}),
+			toolCallId: createAISDKToolCallId(undefined),
+			type: "tool-call",
+			toolName: name,
+		});
+	}
+	return salvaged;
+}
+/** Collect the requested tool names from mapped tools. */
+export function getToolNames(
+	tools: Array<{ function: { name?: string } }> | undefined,
+): Set<string> {
+	return new Set(
+		(tools ?? [])
+			.map((tool) => tool.function?.name)
+			.filter((name): name is string => typeof name === "string"),
+	);
+}
+/**
+ * Salvage a tool call that a model leaked into text content instead of the
+ * structured `tool_calls` field.
+ *
+ * Workers AI's gpt-oss models (harmony format) sometimes emit a forced tool
+ * call as raw JSON in `message.content` with an empty `tool_calls` array and
+ * `finish_reason: "stop"` — typically when the forced tool is a poor fit for
+ * the conversation. The content looks like one of:
+ *
+ *   {"name":"read_skill_resource","path":"feedback.txt"}        (flat args)
+ *   {"name":"calc","arguments":{"a":1}}                          (wrapped args)
+ *   [{"name":"calc","parameters":{"a":1}}]                       (array form)
+ *
+ * This reinterprets that text as a structured tool call. It is intentionally
+ * narrow to avoid false positives:
+ *   - only runs when a tool was *forced* (required / named-function), so a
+ *     tool call was explicitly demanded by the caller;
+ *   - only runs when there are no real structured tool calls to override;
+ *   - only matches JSON objects whose `name` is one of the requested tools.
+ *
+ * Returns the salvaged tool calls, or `null` when nothing was salvaged.
+ *
+ * See https://github.com/cloudflare/ai/issues/560.
+ */
+export function salvageToolCallsFromText(
+	output: Record<string, unknown>,
+	context: {
+		tools: Array<{ function: { name?: string } }> | undefined;
+		toolChoice: unknown;
+	},
+): LanguageModelV3ToolCall[] | null {
+	if (!isForcedToolChoice(context.toolChoice)) return null;
+	// Never override real tool calls.
+	if (processToolCalls(output).length > 0) return null;
+	const knownToolNames = getToolNames(context.tools);
+	if (knownToolNames.size === 0) return null;
+	const text = processText(output);
+	if (!text) return null;
+	const salvaged = parseLeakedToolCalls(text, knownToolNames);
+	return salvaged.length > 0 ? salvaged : null;
+}
 // ---------------------------------------------------------------------------
 // Text extraction
 // ---------------------------------------------------------------------------

package/src/workersai-chat-language-model.ts CHANGED Viewed

@@ -5,10 +5,12 @@ import { mapWorkersAIFinishReason } from "./map-workersai-finish-reason";
 import { mapWorkersAIUsage } from "./map-workersai-usage";
 import { getMappedStream, prependStreamStart } from "./streaming";
 import {
+	buildJsonSchemaPayload,
 	normalizeMessagesForBinding,
 	prepareToolsAndToolChoice,
 	processText,
 	processToolCalls,
+	salvageToolCallsFromText,
 } from "./utils";
 import type { WorkersAIChatSettings } from "./workersai-chat-settings";
 import type { TextGenerationModels } from "./workersai-models";
@@ -93,13 +95,23 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 			}
 			case "json": {
+				// Native Workers AI expects a BARE JSON Schema under `json_schema`
+				// (not OpenAI's `{ name, schema, strict }` envelope — partner models
+				// that need that go through the gateway delegate, not this path). We
+				// fold the AI SDK's `name`/`description` into the schema as `title`/
+				// `description` so they aren't lost. See
+				// https://github.com/cloudflare/ai/issues/559.
+				const json = responseFormat?.type === "json" ? responseFormat : undefined;
 				return {
 					args: {
 						...baseArgs,
 						response_format: {
 							type: "json_schema",
-							json_schema:
-								responseFormat?.type === "json" ? responseFormat.schema : undefined,
+							json_schema: buildJsonSchemaPayload(
+								json?.schema,
+								json?.name,
+								json?.description,
+							),
 						},
 						tools: undefined,
 						tool_choice: undefined,
@@ -203,6 +215,57 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 		};
 	}
+	/**
+	 * Extract reasoning, text, and tool calls from a non-streaming response.
+	 *
+	 * Shared by `doGenerate` and `doStream`'s graceful-degradation branch (the
+	 * path gpt-oss falls through, since it doesn't support `/ai/run/` streaming
+	 * and is retried non-streaming). When a forced tool call was leaked into
+	 * text content (gpt-oss harmony quirk), it is salvaged into a structured
+	 * tool call and the leaked JSON text is suppressed. A warning is appended in
+	 * place so callers can observe the reinterpretation.
+	 */
+	private extractContent(
+		outputRecord: Record<string, unknown>,
+		args: ReturnType<typeof this.getArgs>["args"],
+		warnings: SharedV3Warning[],
+	) {
+		const choices = outputRecord.choices as
+			| Array<{ message?: { reasoning_content?: string; reasoning?: string } }>
+			| undefined;
+		const reasoningContent =
+			choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
+		const toolCalls = processToolCalls(outputRecord);
+		const salvaged =
+			toolCalls.length === 0
+				? salvageToolCallsFromText(outputRecord, {
+						tools: args.tools,
+						toolChoice: args.tool_choice,
+					})
+				: null;
+		if (salvaged) {
+			warnings.push({
+				type: "other",
+				message: `Recovered ${salvaged.length} forced tool call(s) that the model emitted as text content instead of structured tool calls (model: ${this.modelId}).`,
+			});
+		}
+		return {
+			reasoningContent,
+			// Suppress the leaked JSON text when we salvaged a tool call from it.
+			text: salvaged ? "" : (processText(outputRecord) ?? ""),
+			toolCalls: salvaged ?? toolCalls,
+			// When salvaged, the upstream finish_reason is "stop"; report
+			// "tool-calls" so the response is indistinguishable from a native
+			// tool call and the agentic loop continues correctly.
+			finishReason: salvaged
+				? ({ unified: "tool-calls", raw: "stop" } as const)
+				: mapWorkersAIFinishReason(outputRecord),
+		};
+	}
 	async doGenerate(
 		options: Parameters<LanguageModelV3["doGenerate"]>[0],
 	): Promise<Awaited<ReturnType<LanguageModelV3["doGenerate"]>>> {
@@ -230,25 +293,20 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 		}
 		const outputRecord = output as Record<string, unknown>;
-		const choices = outputRecord.choices as
-			| Array<{
-					message?: { reasoning_content?: string; reasoning?: string };
-			  }>
-			| undefined;
-		const reasoningContent =
-			choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
+		const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(
+			outputRecord,
+			args,
+			warnings,
+		);
 		return {
-			finishReason: mapWorkersAIFinishReason(outputRecord),
+			finishReason,
 			content: [
 				...(reasoningContent
 					? [{ type: "reasoning" as const, text: reasoningContent }]
 					: []),
-				{
-					type: "text",
-					text: processText(outputRecord) ?? "",
-				},
-				...processToolCalls(outputRecord),
+				{ type: "text" as const, text },
+				...toolCalls,
 			],
 			usage: mapWorkersAIUsage(output as Record<string, unknown>),
 			warnings,
@@ -279,20 +337,24 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 		// If the binding returned a stream, pipe it through the SSE mapper
 		if (response instanceof ReadableStream) {
 			return {
-				stream: prependStreamStart(getMappedStream(response), warnings),
+				stream: prependStreamStart(
+					getMappedStream(response, {
+						tools: args.tools,
+						toolChoice: args.tool_choice,
+					}),
+					warnings,
+				),
 			};
 		}
 		// Graceful degradation: some models return a non-streaming response even
 		// when stream:true is requested. Wrap the complete response as a stream.
 		const outputRecord = response as Record<string, unknown>;
-		const choices = outputRecord.choices as
-			| Array<{
-					message?: { reasoning_content?: string; reasoning?: string };
-			  }>
-			| undefined;
-		const reasoningContent =
-			choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
+		const { reasoningContent, text, toolCalls, finishReason } = this.extractContent(
+			outputRecord,
+			args,
+			warnings,
+		);
 		let textId: string | null = null;
 		let reasoningId: string | null = null;
@@ -316,7 +378,6 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 						controller.enqueue({ type: "reasoning-end", id: reasoningId });
 					}
-					const text = processText(outputRecord);
 					if (text) {
 						textId = generateId();
 						controller.enqueue({ type: "text-start", id: textId });
@@ -324,13 +385,13 @@ export class WorkersAIChatLanguageModel implements LanguageModelV3 {
 						controller.enqueue({ type: "text-end", id: textId });
 					}
-					for (const toolCall of processToolCalls(outputRecord)) {
+					for (const toolCall of toolCalls) {
 						controller.enqueue(toolCall);
 					}
 					controller.enqueue({
 						type: "finish",
-						finishReason: mapWorkersAIFinishReason(outputRecord),
+						finishReason,
 						usage: mapWorkersAIUsage(response as Record<string, unknown>),
 					});
 					controller.close();

package/src/workersai-chat-settings.ts CHANGED Viewed

@@ -18,7 +18,7 @@ export type WorkersAIChatSettings = {
 	/**
 	 * Controls the reasoning budget for reasoning-capable Workers AI models
-	 * (e.g. `@cf/zai-org/glm-4.7-flash`, `@cf/moonshotai/kimi-k2.5`,
+	 * (e.g. `@cf/zai-org/glm-4.7-flash`, `@cf/moonshotai/kimi-k2.7-code`,
 	 * `@cf/openai/gpt-oss-120b`).
 	 *
 	 * `null` is a valid value and disables reasoning for models that support it.

package/src/workersai-models.ts CHANGED Viewed

@@ -1,11 +1,19 @@
+/**
+ * The known (typed) BaseAiTextGeneration model ids — the literal union without
+ * the `(string & {})` escape hatch. Used to drive editor autocomplete while
+ * still capturing the exact literal a caller passed (see `WorkersAI`).
+ */
+export type KnownTextGenerationModels = Exclude<
+	value2key<AiModels, BaseAiTextGeneration>,
+	value2key<AiModels, BaseAiTextToImage>
+>;
 /**
  * The names of the BaseAiTextGeneration models.
  *
  * Accepts any string at runtime, but provides autocomplete for known models.
  */
-export type TextGenerationModels =
-	| Exclude<value2key<AiModels, BaseAiTextGeneration>, value2key<AiModels, BaseAiTextToImage>>
-	| (string & {});
+export type TextGenerationModels = KnownTextGenerationModels | (string & {});
 /*
  * The names of the BaseAiTextToImage models.