npm - workers-ai-provider - Versions diffs - 3.1.2 → 3.1.4 - Mend

workers-ai-provider 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +37 -1
package/dist/index.d.ts +11 -0
package/dist/index.js +122 -54
package/dist/index.js.map +1 -1
package/package.json +3 -3
package/src/convert-to-workersai-chat-messages.ts +78 -22
package/src/streaming.ts +3 -1
package/src/utils.ts +38 -41
package/src/workersai-chat-language-model.ts +45 -26
package/src/workersai-chat-prompt.ts +5 -1
package/src/workersai-chat-settings.ts +6 -0

package/README.md CHANGED Viewed

@@ -111,6 +111,29 @@ for await (const chunk of result.textStream) {
 }
 ```
+## Vision (Image Inputs)
+Send images to vision-capable models like Llama 4 Scout and Kimi K2.5:
+```ts
+import { generateText } from "ai";
+const { text } = await generateText({
+	model: workersai("@cf/meta/llama-4-scout-17b-16e-instruct"),
+	messages: [
+		{
+			role: "user",
+			content: [
+				{ type: "text", text: "What's in this image?" },
+				{ type: "image", image: imageUint8Array },
+			],
+		},
+	],
+});
+```
+Images can be provided as `Uint8Array`, base64 strings, or data URLs. Multiple images per message are supported. Works with both the binding and REST API configurations.
 ## Tool Calling
 ```ts
@@ -287,7 +310,20 @@ Streaming works the same way — use `streamText` instead of `generateText`.
 | `apiKey`    | `string`         | Cloudflare API token. Required with `accountId`.                             |
 | `gateway`   | `GatewayOptions` | Optional [AI Gateway](https://developers.cloudflare.com/ai-gateway/) config. |
-Returns a provider with model factories:
+Returns a provider with model factories. Each factory accepts an optional second argument for per-model settings:
+```ts
+workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast", {
+	sessionAffinity: "my-unique-session-id",
+});
+```
+| Setting           | Type      | Description                                                                                  |
+| ----------------- | --------- | -------------------------------------------------------------------------------------------- |
+| `safePrompt`      | `boolean` | Inject a safety prompt before all conversations.                                             |
+| `sessionAffinity` | `string`  | Routes requests with the same key to the same backend replica for prefix-cache optimization. |
+Model factories:
 ```ts
 // Chat — for generateText / streamText

package/dist/index.d.ts CHANGED Viewed

@@ -111,6 +111,11 @@ type WorkersAIChatSettings = {
      * Optionally set Cloudflare AI Gateway options.
      */
     gateway?: GatewayOptions;
+    /**
+     * Session affinity key for prefix-cache optimization.
+     * Routes requests with the same key to the same backend replica.
+     */
+    sessionAffinity?: string;
     /**
      * Passthrough settings that are provided directly to the run function.
      * Use this for any provider-specific options not covered by the typed fields.
@@ -137,6 +142,12 @@ declare class WorkersAIChatLanguageModel implements LanguageModelV3 {
     private getArgs;
     /**
      * Build the inputs object for `binding.run()`, shared by doGenerate and doStream.
+     *
+     * Images are embedded inline in messages as OpenAI-compatible content
+     * arrays with `image_url` parts. Both the REST API and the binding
+     * accept this format at runtime.
+     *
+     * The binding path additionally normalises null content to empty strings.
      */
     private buildRunInputs;
     /**

package/dist/index.js CHANGED Viewed

@@ -3,9 +3,43 @@ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { en
 var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
 // src/convert-to-workersai-chat-messages.ts
+function toUint8Array(data) {
+  if (data instanceof Uint8Array) {
+    return data;
+  }
+  if (typeof data === "string") {
+    let base64 = data;
+    if (base64.startsWith("data:")) {
+      const commaIndex = base64.indexOf(",");
+      if (commaIndex >= 0) {
+        base64 = base64.slice(commaIndex + 1);
+      }
+    }
+    const binaryString = atob(base64);
+    const bytes = new Uint8Array(binaryString.length);
+    for (let i = 0; i < binaryString.length; i++) {
+      bytes[i] = binaryString.charCodeAt(i);
+    }
+    return bytes;
+  }
+  if (data instanceof URL) {
+    throw new Error(
+      "URL image sources are not supported by Workers AI. Provide image data as a Uint8Array or base64 string instead."
+    );
+  }
+  return null;
+}
+function uint8ArrayToBase64(bytes) {
+  let binary = "";
+  const chunkSize = 8192;
+  for (let i = 0; i < bytes.length; i += chunkSize) {
+    const chunk = bytes.subarray(i, Math.min(i + chunkSize, bytes.length));
+    binary += String.fromCharCode(...chunk);
+  }
+  return btoa(binary);
+}
 function convertToWorkersAIChatMessages(prompt) {
   const messages = [];
-  const images = [];
   for (const { role, content } of prompt) {
     switch (role) {
       case "system": {
@@ -14,6 +48,7 @@ function convertToWorkersAIChatMessages(prompt) {
       }
       case "user": {
         const textParts = [];
+        const imageParts = [];
         for (const part of content) {
           switch (part.type) {
             case "text": {
@@ -21,21 +56,34 @@ function convertToWorkersAIChatMessages(prompt) {
               break;
             }
             case "file": {
-              if (part.data instanceof Uint8Array) {
-                images.push({
-                  image: part.data,
-                  mediaType: part.mediaType,
-                  providerOptions: part.providerOptions
+              const imageBytes = toUint8Array(part.data);
+              if (imageBytes) {
+                imageParts.push({
+                  image: imageBytes,
+                  mediaType: part.mediaType
                 });
               }
               break;
             }
           }
         }
-        messages.push({
-          content: textParts.join("\n"),
-          role: "user"
-        });
+        if (imageParts.length > 0) {
+          const contentArray = [];
+          if (textParts.length > 0) {
+            contentArray.push({ type: "text", text: textParts.join("\n") });
+          }
+          for (const img of imageParts) {
+            const base64 = uint8ArrayToBase64(img.image);
+            const mediaType = img.mediaType || "image/png";
+            contentArray.push({
+              type: "image_url",
+              image_url: { url: `data:${mediaType};base64,${base64}` }
+            });
+          }
+          messages.push({ content: contentArray, role: "user" });
+        } else {
+          messages.push({ content: textParts.join("\n"), role: "user" });
+        }
         break;
       }
       case "assistant": {
@@ -106,7 +154,7 @@ function convertToWorkersAIChatMessages(prompt) {
       }
     }
   }
-  return { images, messages };
+  return { messages };
 }
 // src/map-workersai-usage.ts
@@ -274,7 +322,7 @@ function getMappedStream(response) {
         }
         if (choices?.[0]?.delta) {
           const delta = choices[0].delta;
-          const reasoningDelta = delta.reasoning_content;
+          const reasoningDelta = delta.reasoning_content ?? delta.reasoning;
           if (reasoningDelta && reasoningDelta.length > 0) {
             if (!reasoningId) {
               reasoningId = generateId();
@@ -411,25 +459,12 @@ var SSEDecoder = class extends TransformStream {
 // src/utils.ts
 import { generateId as generateId2 } from "ai";
-function sanitizeToolCallId(id) {
-  const alphanumeric = id.replace(/[^a-zA-Z0-9]/g, "");
-  return alphanumeric.slice(0, 9).padEnd(9, "0");
-}
 function normalizeMessagesForBinding(messages) {
   return messages.map((msg) => {
     const normalized = { ...msg };
     if (normalized.content === null || normalized.content === void 0) {
       normalized.content = "";
     }
-    if ("tool_call_id" in normalized && typeof normalized.tool_call_id === "string") {
-      normalized.tool_call_id = sanitizeToolCallId(normalized.tool_call_id);
-    }
-    if ("tool_calls" in normalized && Array.isArray(normalized.tool_calls)) {
-      normalized.tool_calls = normalized.tool_calls.map((tc) => ({
-        ...tc,
-        id: sanitizeToolCallId(tc.id)
-      }));
-    }
     return normalized;
   });
 }
@@ -437,9 +472,9 @@ function createRun(config) {
   const { accountId, apiKey } = config;
   return async function run(model, inputs, options) {
     const {
-      gateway: _gateway,
+      gateway,
       prefix: _prefix,
-      extraHeaders: _extraHeaders,
+      extraHeaders,
       returnRawResponse,
       signal,
       // AbortSignal — not serializable as a query parameter
@@ -465,11 +500,27 @@ function createRun(config) {
       }
     }
     const queryString = urlParams.toString();
-    const url = `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/run/${model}${queryString ? `?${queryString}` : ""}`;
+    const modelPath = String(model).startsWith("run/") ? model : `run/${model}`;
+    const url = gateway?.id ? `https://gateway.ai.cloudflare.com/v1/${accountId}/${gateway.id}/workers-ai/${modelPath}${queryString ? `?${queryString}` : ""}` : `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/${modelPath}${queryString ? `?${queryString}` : ""}`;
     const headers = {
       Authorization: `Bearer ${apiKey}`,
-      "Content-Type": "application/json"
+      "Content-Type": "application/json",
+      ...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {}
     };
+    if (gateway) {
+      if (gateway.skipCache) {
+        headers["cf-aig-skip-cache"] = "true";
+      }
+      if (typeof gateway.cacheTtl === "number") {
+        headers["cf-aig-cache-ttl"] = String(gateway.cacheTtl);
+      }
+      if (gateway.cacheKey) {
+        headers["cf-aig-cache-key"] = gateway.cacheKey;
+      }
+      if (gateway.metadata) {
+        headers["cf-aig-metadata"] = JSON.stringify(gateway.metadata);
+      }
+    }
     const body = JSON.stringify(inputs);
     const response = await fetch(url, {
       body,
@@ -500,7 +551,10 @@ function createRun(config) {
         return response.body;
       }
       const retryResponse = await fetch(url, {
-        body: JSON.stringify({ ...inputs, stream: false }),
+        body: JSON.stringify({
+          ...inputs,
+          stream: false
+        }),
         headers,
         method: "POST",
         signal
@@ -842,7 +896,8 @@ var WorkersAIChatLanguageModel = class {
               type: "json_schema",
               json_schema: responseFormat?.type === "json" ? responseFormat.schema : void 0
             },
-            tools: void 0
+            tools: void 0,
+            tool_choice: void 0
           },
           warnings
         };
@@ -855,21 +910,21 @@ var WorkersAIChatLanguageModel = class {
   }
   /**
    * Build the inputs object for `binding.run()`, shared by doGenerate and doStream.
+   *
+   * Images are embedded inline in messages as OpenAI-compatible content
+   * arrays with `image_url` parts. Both the REST API and the binding
+   * accept this format at runtime.
+   *
+   * The binding path additionally normalises null content to empty strings.
    */
-  buildRunInputs(args, messages, images, options) {
-    if (images.length > 1) {
-      throw new Error("Multiple images are not yet supported as input");
-    }
-    const imagePart = images[0];
-    const finalMessages = this.config.isBinding ? normalizeMessagesForBinding(messages) : messages;
+  buildRunInputs(args, messages, options) {
     return {
       max_tokens: args.max_tokens,
-      messages: finalMessages,
+      messages: this.config.isBinding ? normalizeMessagesForBinding(messages) : messages,
       temperature: args.temperature,
       tools: args.tools,
+      ...args.tool_choice ? { tool_choice: args.tool_choice } : {},
       top_p: args.top_p,
-      ...imagePart ? { image: Array.from(imagePart.image) } : {},
-      // Only include response_format when actually set
       ...args.response_format ? { response_format: args.response_format } : {},
       ...options?.stream ? { stream: true } : {}
     };
@@ -878,19 +933,32 @@ var WorkersAIChatLanguageModel = class {
    * Get passthrough options for binding.run() from settings.
    */
   getRunOptions() {
-    const { gateway, safePrompt: _safePrompt, ...passthroughOptions } = this.settings;
+    const {
+      gateway,
+      safePrompt: _safePrompt,
+      sessionAffinity,
+      extraHeaders,
+      ...passthroughOptions
+    } = this.settings;
+    const mergedHeaders = {
+      ...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {},
+      ...sessionAffinity ? { "x-session-affinity": sessionAffinity } : {}
+    };
     return {
       gateway: this.config.gateway ?? gateway,
+      ...Object.keys(mergedHeaders).length > 0 ? { extraHeaders: mergedHeaders } : {},
       ...passthroughOptions
     };
   }
   async doGenerate(options) {
     const { args, warnings } = this.getArgs(options);
-    const { messages, images } = convertToWorkersAIChatMessages(options.prompt);
-    const inputs = this.buildRunInputs(args, messages, images);
+    const { messages } = convertToWorkersAIChatMessages(options.prompt);
+    const inputs = this.buildRunInputs(args, messages);
     const runOptions = this.getRunOptions();
     const output = await this.config.binding.run(
       args.model,
+      // Content arrays for vision are valid at runtime but not in the
+      // binding's strict TypeScript definitions (which expect string content).
       inputs,
       runOptions
     );
@@ -901,7 +969,7 @@ var WorkersAIChatLanguageModel = class {
     }
     const outputRecord = output;
     const choices = outputRecord.choices;
-    const reasoningContent = choices?.[0]?.message?.reasoning_content;
+    const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
     return {
       finishReason: mapWorkersAIFinishReason(outputRecord),
       content: [
@@ -918,8 +986,8 @@ var WorkersAIChatLanguageModel = class {
   }
   async doStream(options) {
     const { args, warnings } = this.getArgs(options);
-    const { messages, images } = convertToWorkersAIChatMessages(options.prompt);
-    const inputs = this.buildRunInputs(args, messages, images, { stream: true });
+    const { messages } = convertToWorkersAIChatMessages(options.prompt);
+    const inputs = this.buildRunInputs(args, messages, { stream: true });
     const runOptions = this.getRunOptions();
     const response = await this.config.binding.run(
       args.model,
@@ -933,7 +1001,7 @@ var WorkersAIChatLanguageModel = class {
     }
     const outputRecord = response;
     const choices = outputRecord.choices;
-    const reasoningContent = choices?.[0]?.message?.reasoning_content;
+    const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
     let textId = null;
     let reasoningId = null;
     return {
@@ -1012,7 +1080,7 @@ var WorkersAIImageModel = class {
         seed,
         width
       });
-      return toUint8Array(output);
+      return toUint8Array2(output);
     };
     const images = await Promise.all(
       Array.from({ length: n }, () => generateImage())
@@ -1040,7 +1108,7 @@ function parseInteger(value) {
   const number = Number(value);
   return Number.isInteger(number) ? number : void 0;
 }
-async function toUint8Array(output) {
+async function toUint8Array2(output) {
   if (output instanceof Uint8Array) {
     return output;
   }
@@ -1121,7 +1189,7 @@ var WorkersAITranscriptionModel = class {
   // ---------------------------------------------------------------------------
   async runWhisper(audioBytes, abortSignal) {
     const modelStr = this.modelId;
-    const audio = modelStr === "@cf/openai/whisper-large-v3-turbo" ? uint8ArrayToBase64(audioBytes) : Array.from(audioBytes);
+    const audio = modelStr === "@cf/openai/whisper-large-v3-turbo" ? uint8ArrayToBase642(audioBytes) : Array.from(audioBytes);
     const inputs = { audio };
     if (this.settings.language) {
       inputs.language = this.settings.language;
@@ -1177,7 +1245,7 @@ var WorkersAITranscriptionModel = class {
       return this.config.binding.run(
         this.modelId,
         {
-          audio: { body: uint8ArrayToBase64(audioBytes), contentType: mediaType }
+          audio: { body: uint8ArrayToBase642(audioBytes), contentType: mediaType }
         },
         { gateway: this.config.gateway, signal: abortSignal }
       );
@@ -1224,7 +1292,7 @@ var WorkersAITranscriptionModel = class {
     };
   }
 };
-function uint8ArrayToBase64(bytes) {
+function uint8ArrayToBase642(bytes) {
   let binary = "";
   for (let i = 0; i < bytes.length; i++) {
     binary += String.fromCharCode(bytes[i]);
@@ -1276,7 +1344,7 @@ var WorkersAISpeechModel = class {
         returnRawResponse: true
       }
     );
-    const audio = await toUint8Array2(result);
+    const audio = await toUint8Array3(result);
     return {
       audio,
       warnings,
@@ -1288,7 +1356,7 @@ var WorkersAISpeechModel = class {
     };
   }
 };
-async function toUint8Array2(output) {
+async function toUint8Array3(output) {
   if (output instanceof Response) {
     return new Uint8Array(await output.arrayBuffer());
   }