workers-ai-provider 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -111,6 +111,29 @@ for await (const chunk of result.textStream) {
111
111
  }
112
112
  ```
113
113
 
114
+ ## Vision (Image Inputs)
115
+
116
+ Send images to vision-capable models like Llama 4 Scout and Kimi K2.5:
117
+
118
+ ```ts
119
+ import { generateText } from "ai";
120
+
121
+ const { text } = await generateText({
122
+ model: workersai("@cf/meta/llama-4-scout-17b-16e-instruct"),
123
+ messages: [
124
+ {
125
+ role: "user",
126
+ content: [
127
+ { type: "text", text: "What's in this image?" },
128
+ { type: "image", image: imageUint8Array },
129
+ ],
130
+ },
131
+ ],
132
+ });
133
+ ```
134
+
135
+ Images can be provided as `Uint8Array`, base64 strings, or data URLs. Multiple images per message are supported. Works with both the binding and REST API configurations.
136
+
114
137
  ## Tool Calling
115
138
 
116
139
  ```ts
@@ -287,7 +310,20 @@ Streaming works the same way — use `streamText` instead of `generateText`.
287
310
  | `apiKey` | `string` | Cloudflare API token. Required with `accountId`. |
288
311
  | `gateway` | `GatewayOptions` | Optional [AI Gateway](https://developers.cloudflare.com/ai-gateway/) config. |
289
312
 
290
- Returns a provider with model factories:
313
+ Returns a provider with model factories. Each factory accepts an optional second argument for per-model settings:
314
+
315
+ ```ts
316
+ workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast", {
317
+ sessionAffinity: "my-unique-session-id",
318
+ });
319
+ ```
320
+
321
+ | Setting | Type | Description |
322
+ | ----------------- | --------- | -------------------------------------------------------------------------------------------- |
323
+ | `safePrompt` | `boolean` | Inject a safety prompt before all conversations. |
324
+ | `sessionAffinity` | `string` | Routes requests with the same key to the same backend replica for prefix-cache optimization. |
325
+
326
+ Model factories:
291
327
 
292
328
  ```ts
293
329
  // Chat — for generateText / streamText
package/dist/index.d.ts CHANGED
@@ -111,6 +111,11 @@ type WorkersAIChatSettings = {
111
111
  * Optionally set Cloudflare AI Gateway options.
112
112
  */
113
113
  gateway?: GatewayOptions;
114
+ /**
115
+ * Session affinity key for prefix-cache optimization.
116
+ * Routes requests with the same key to the same backend replica.
117
+ */
118
+ sessionAffinity?: string;
114
119
  /**
115
120
  * Passthrough settings that are provided directly to the run function.
116
121
  * Use this for any provider-specific options not covered by the typed fields.
@@ -137,6 +142,12 @@ declare class WorkersAIChatLanguageModel implements LanguageModelV3 {
137
142
  private getArgs;
138
143
  /**
139
144
  * Build the inputs object for `binding.run()`, shared by doGenerate and doStream.
145
+ *
146
+ * Images are embedded inline in messages as OpenAI-compatible content
147
+ * arrays with `image_url` parts. Both the REST API and the binding
148
+ * accept this format at runtime.
149
+ *
150
+ * The binding path additionally normalises null content to empty strings.
140
151
  */
141
152
  private buildRunInputs;
142
153
  /**
package/dist/index.js CHANGED
@@ -3,9 +3,43 @@ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { en
3
3
  var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
4
 
5
5
  // src/convert-to-workersai-chat-messages.ts
6
+ function toUint8Array(data) {
7
+ if (data instanceof Uint8Array) {
8
+ return data;
9
+ }
10
+ if (typeof data === "string") {
11
+ let base64 = data;
12
+ if (base64.startsWith("data:")) {
13
+ const commaIndex = base64.indexOf(",");
14
+ if (commaIndex >= 0) {
15
+ base64 = base64.slice(commaIndex + 1);
16
+ }
17
+ }
18
+ const binaryString = atob(base64);
19
+ const bytes = new Uint8Array(binaryString.length);
20
+ for (let i = 0; i < binaryString.length; i++) {
21
+ bytes[i] = binaryString.charCodeAt(i);
22
+ }
23
+ return bytes;
24
+ }
25
+ if (data instanceof URL) {
26
+ throw new Error(
27
+ "URL image sources are not supported by Workers AI. Provide image data as a Uint8Array or base64 string instead."
28
+ );
29
+ }
30
+ return null;
31
+ }
32
+ function uint8ArrayToBase64(bytes) {
33
+ let binary = "";
34
+ const chunkSize = 8192;
35
+ for (let i = 0; i < bytes.length; i += chunkSize) {
36
+ const chunk = bytes.subarray(i, Math.min(i + chunkSize, bytes.length));
37
+ binary += String.fromCharCode(...chunk);
38
+ }
39
+ return btoa(binary);
40
+ }
6
41
  function convertToWorkersAIChatMessages(prompt) {
7
42
  const messages = [];
8
- const images = [];
9
43
  for (const { role, content } of prompt) {
10
44
  switch (role) {
11
45
  case "system": {
@@ -14,6 +48,7 @@ function convertToWorkersAIChatMessages(prompt) {
14
48
  }
15
49
  case "user": {
16
50
  const textParts = [];
51
+ const imageParts = [];
17
52
  for (const part of content) {
18
53
  switch (part.type) {
19
54
  case "text": {
@@ -21,21 +56,34 @@ function convertToWorkersAIChatMessages(prompt) {
21
56
  break;
22
57
  }
23
58
  case "file": {
24
- if (part.data instanceof Uint8Array) {
25
- images.push({
26
- image: part.data,
27
- mediaType: part.mediaType,
28
- providerOptions: part.providerOptions
59
+ const imageBytes = toUint8Array(part.data);
60
+ if (imageBytes) {
61
+ imageParts.push({
62
+ image: imageBytes,
63
+ mediaType: part.mediaType
29
64
  });
30
65
  }
31
66
  break;
32
67
  }
33
68
  }
34
69
  }
35
- messages.push({
36
- content: textParts.join("\n"),
37
- role: "user"
38
- });
70
+ if (imageParts.length > 0) {
71
+ const contentArray = [];
72
+ if (textParts.length > 0) {
73
+ contentArray.push({ type: "text", text: textParts.join("\n") });
74
+ }
75
+ for (const img of imageParts) {
76
+ const base64 = uint8ArrayToBase64(img.image);
77
+ const mediaType = img.mediaType || "image/png";
78
+ contentArray.push({
79
+ type: "image_url",
80
+ image_url: { url: `data:${mediaType};base64,${base64}` }
81
+ });
82
+ }
83
+ messages.push({ content: contentArray, role: "user" });
84
+ } else {
85
+ messages.push({ content: textParts.join("\n"), role: "user" });
86
+ }
39
87
  break;
40
88
  }
41
89
  case "assistant": {
@@ -106,7 +154,7 @@ function convertToWorkersAIChatMessages(prompt) {
106
154
  }
107
155
  }
108
156
  }
109
- return { images, messages };
157
+ return { messages };
110
158
  }
111
159
 
112
160
  // src/map-workersai-usage.ts
@@ -274,7 +322,7 @@ function getMappedStream(response) {
274
322
  }
275
323
  if (choices?.[0]?.delta) {
276
324
  const delta = choices[0].delta;
277
- const reasoningDelta = delta.reasoning_content;
325
+ const reasoningDelta = delta.reasoning_content ?? delta.reasoning;
278
326
  if (reasoningDelta && reasoningDelta.length > 0) {
279
327
  if (!reasoningId) {
280
328
  reasoningId = generateId();
@@ -411,25 +459,12 @@ var SSEDecoder = class extends TransformStream {
411
459
 
412
460
  // src/utils.ts
413
461
  import { generateId as generateId2 } from "ai";
414
- function sanitizeToolCallId(id) {
415
- const alphanumeric = id.replace(/[^a-zA-Z0-9]/g, "");
416
- return alphanumeric.slice(0, 9).padEnd(9, "0");
417
- }
418
462
  function normalizeMessagesForBinding(messages) {
419
463
  return messages.map((msg) => {
420
464
  const normalized = { ...msg };
421
465
  if (normalized.content === null || normalized.content === void 0) {
422
466
  normalized.content = "";
423
467
  }
424
- if ("tool_call_id" in normalized && typeof normalized.tool_call_id === "string") {
425
- normalized.tool_call_id = sanitizeToolCallId(normalized.tool_call_id);
426
- }
427
- if ("tool_calls" in normalized && Array.isArray(normalized.tool_calls)) {
428
- normalized.tool_calls = normalized.tool_calls.map((tc) => ({
429
- ...tc,
430
- id: sanitizeToolCallId(tc.id)
431
- }));
432
- }
433
468
  return normalized;
434
469
  });
435
470
  }
@@ -437,9 +472,9 @@ function createRun(config) {
437
472
  const { accountId, apiKey } = config;
438
473
  return async function run(model, inputs, options) {
439
474
  const {
440
- gateway: _gateway,
475
+ gateway,
441
476
  prefix: _prefix,
442
- extraHeaders: _extraHeaders,
477
+ extraHeaders,
443
478
  returnRawResponse,
444
479
  signal,
445
480
  // AbortSignal — not serializable as a query parameter
@@ -465,11 +500,27 @@ function createRun(config) {
465
500
  }
466
501
  }
467
502
  const queryString = urlParams.toString();
468
- const url = `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/run/${model}${queryString ? `?${queryString}` : ""}`;
503
+ const modelPath = String(model).startsWith("run/") ? model : `run/${model}`;
504
+ const url = gateway?.id ? `https://gateway.ai.cloudflare.com/v1/${accountId}/${gateway.id}/workers-ai/${modelPath}${queryString ? `?${queryString}` : ""}` : `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/${modelPath}${queryString ? `?${queryString}` : ""}`;
469
505
  const headers = {
470
506
  Authorization: `Bearer ${apiKey}`,
471
- "Content-Type": "application/json"
507
+ "Content-Type": "application/json",
508
+ ...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {}
472
509
  };
510
+ if (gateway) {
511
+ if (gateway.skipCache) {
512
+ headers["cf-aig-skip-cache"] = "true";
513
+ }
514
+ if (typeof gateway.cacheTtl === "number") {
515
+ headers["cf-aig-cache-ttl"] = String(gateway.cacheTtl);
516
+ }
517
+ if (gateway.cacheKey) {
518
+ headers["cf-aig-cache-key"] = gateway.cacheKey;
519
+ }
520
+ if (gateway.metadata) {
521
+ headers["cf-aig-metadata"] = JSON.stringify(gateway.metadata);
522
+ }
523
+ }
473
524
  const body = JSON.stringify(inputs);
474
525
  const response = await fetch(url, {
475
526
  body,
@@ -500,7 +551,10 @@ function createRun(config) {
500
551
  return response.body;
501
552
  }
502
553
  const retryResponse = await fetch(url, {
503
- body: JSON.stringify({ ...inputs, stream: false }),
554
+ body: JSON.stringify({
555
+ ...inputs,
556
+ stream: false
557
+ }),
504
558
  headers,
505
559
  method: "POST",
506
560
  signal
@@ -842,7 +896,8 @@ var WorkersAIChatLanguageModel = class {
842
896
  type: "json_schema",
843
897
  json_schema: responseFormat?.type === "json" ? responseFormat.schema : void 0
844
898
  },
845
- tools: void 0
899
+ tools: void 0,
900
+ tool_choice: void 0
846
901
  },
847
902
  warnings
848
903
  };
@@ -855,21 +910,21 @@ var WorkersAIChatLanguageModel = class {
855
910
  }
856
911
  /**
857
912
  * Build the inputs object for `binding.run()`, shared by doGenerate and doStream.
913
+ *
914
+ * Images are embedded inline in messages as OpenAI-compatible content
915
+ * arrays with `image_url` parts. Both the REST API and the binding
916
+ * accept this format at runtime.
917
+ *
918
+ * The binding path additionally normalises null content to empty strings.
858
919
  */
859
- buildRunInputs(args, messages, images, options) {
860
- if (images.length > 1) {
861
- throw new Error("Multiple images are not yet supported as input");
862
- }
863
- const imagePart = images[0];
864
- const finalMessages = this.config.isBinding ? normalizeMessagesForBinding(messages) : messages;
920
+ buildRunInputs(args, messages, options) {
865
921
  return {
866
922
  max_tokens: args.max_tokens,
867
- messages: finalMessages,
923
+ messages: this.config.isBinding ? normalizeMessagesForBinding(messages) : messages,
868
924
  temperature: args.temperature,
869
925
  tools: args.tools,
926
+ ...args.tool_choice ? { tool_choice: args.tool_choice } : {},
870
927
  top_p: args.top_p,
871
- ...imagePart ? { image: Array.from(imagePart.image) } : {},
872
- // Only include response_format when actually set
873
928
  ...args.response_format ? { response_format: args.response_format } : {},
874
929
  ...options?.stream ? { stream: true } : {}
875
930
  };
@@ -878,19 +933,32 @@ var WorkersAIChatLanguageModel = class {
878
933
  * Get passthrough options for binding.run() from settings.
879
934
  */
880
935
  getRunOptions() {
881
- const { gateway, safePrompt: _safePrompt, ...passthroughOptions } = this.settings;
936
+ const {
937
+ gateway,
938
+ safePrompt: _safePrompt,
939
+ sessionAffinity,
940
+ extraHeaders,
941
+ ...passthroughOptions
942
+ } = this.settings;
943
+ const mergedHeaders = {
944
+ ...extraHeaders && typeof extraHeaders === "object" ? extraHeaders : {},
945
+ ...sessionAffinity ? { "x-session-affinity": sessionAffinity } : {}
946
+ };
882
947
  return {
883
948
  gateway: this.config.gateway ?? gateway,
949
+ ...Object.keys(mergedHeaders).length > 0 ? { extraHeaders: mergedHeaders } : {},
884
950
  ...passthroughOptions
885
951
  };
886
952
  }
887
953
  async doGenerate(options) {
888
954
  const { args, warnings } = this.getArgs(options);
889
- const { messages, images } = convertToWorkersAIChatMessages(options.prompt);
890
- const inputs = this.buildRunInputs(args, messages, images);
955
+ const { messages } = convertToWorkersAIChatMessages(options.prompt);
956
+ const inputs = this.buildRunInputs(args, messages);
891
957
  const runOptions = this.getRunOptions();
892
958
  const output = await this.config.binding.run(
893
959
  args.model,
960
+ // Content arrays for vision are valid at runtime but not in the
961
+ // binding's strict TypeScript definitions (which expect string content).
894
962
  inputs,
895
963
  runOptions
896
964
  );
@@ -901,7 +969,7 @@ var WorkersAIChatLanguageModel = class {
901
969
  }
902
970
  const outputRecord = output;
903
971
  const choices = outputRecord.choices;
904
- const reasoningContent = choices?.[0]?.message?.reasoning_content;
972
+ const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
905
973
  return {
906
974
  finishReason: mapWorkersAIFinishReason(outputRecord),
907
975
  content: [
@@ -918,8 +986,8 @@ var WorkersAIChatLanguageModel = class {
918
986
  }
919
987
  async doStream(options) {
920
988
  const { args, warnings } = this.getArgs(options);
921
- const { messages, images } = convertToWorkersAIChatMessages(options.prompt);
922
- const inputs = this.buildRunInputs(args, messages, images, { stream: true });
989
+ const { messages } = convertToWorkersAIChatMessages(options.prompt);
990
+ const inputs = this.buildRunInputs(args, messages, { stream: true });
923
991
  const runOptions = this.getRunOptions();
924
992
  const response = await this.config.binding.run(
925
993
  args.model,
@@ -933,7 +1001,7 @@ var WorkersAIChatLanguageModel = class {
933
1001
  }
934
1002
  const outputRecord = response;
935
1003
  const choices = outputRecord.choices;
936
- const reasoningContent = choices?.[0]?.message?.reasoning_content;
1004
+ const reasoningContent = choices?.[0]?.message?.reasoning_content ?? choices?.[0]?.message?.reasoning;
937
1005
  let textId = null;
938
1006
  let reasoningId = null;
939
1007
  return {
@@ -1012,7 +1080,7 @@ var WorkersAIImageModel = class {
1012
1080
  seed,
1013
1081
  width
1014
1082
  });
1015
- return toUint8Array(output);
1083
+ return toUint8Array2(output);
1016
1084
  };
1017
1085
  const images = await Promise.all(
1018
1086
  Array.from({ length: n }, () => generateImage())
@@ -1040,7 +1108,7 @@ function parseInteger(value) {
1040
1108
  const number = Number(value);
1041
1109
  return Number.isInteger(number) ? number : void 0;
1042
1110
  }
1043
- async function toUint8Array(output) {
1111
+ async function toUint8Array2(output) {
1044
1112
  if (output instanceof Uint8Array) {
1045
1113
  return output;
1046
1114
  }
@@ -1121,7 +1189,7 @@ var WorkersAITranscriptionModel = class {
1121
1189
  // ---------------------------------------------------------------------------
1122
1190
  async runWhisper(audioBytes, abortSignal) {
1123
1191
  const modelStr = this.modelId;
1124
- const audio = modelStr === "@cf/openai/whisper-large-v3-turbo" ? uint8ArrayToBase64(audioBytes) : Array.from(audioBytes);
1192
+ const audio = modelStr === "@cf/openai/whisper-large-v3-turbo" ? uint8ArrayToBase642(audioBytes) : Array.from(audioBytes);
1125
1193
  const inputs = { audio };
1126
1194
  if (this.settings.language) {
1127
1195
  inputs.language = this.settings.language;
@@ -1177,7 +1245,7 @@ var WorkersAITranscriptionModel = class {
1177
1245
  return this.config.binding.run(
1178
1246
  this.modelId,
1179
1247
  {
1180
- audio: { body: uint8ArrayToBase64(audioBytes), contentType: mediaType }
1248
+ audio: { body: uint8ArrayToBase642(audioBytes), contentType: mediaType }
1181
1249
  },
1182
1250
  { gateway: this.config.gateway, signal: abortSignal }
1183
1251
  );
@@ -1224,7 +1292,7 @@ var WorkersAITranscriptionModel = class {
1224
1292
  };
1225
1293
  }
1226
1294
  };
1227
- function uint8ArrayToBase64(bytes) {
1295
+ function uint8ArrayToBase642(bytes) {
1228
1296
  let binary = "";
1229
1297
  for (let i = 0; i < bytes.length; i++) {
1230
1298
  binary += String.fromCharCode(bytes[i]);
@@ -1276,7 +1344,7 @@ var WorkersAISpeechModel = class {
1276
1344
  returnRawResponse: true
1277
1345
  }
1278
1346
  );
1279
- const audio = await toUint8Array2(result);
1347
+ const audio = await toUint8Array3(result);
1280
1348
  return {
1281
1349
  audio,
1282
1350
  warnings,
@@ -1288,7 +1356,7 @@ var WorkersAISpeechModel = class {
1288
1356
  };
1289
1357
  }
1290
1358
  };
1291
- async function toUint8Array2(output) {
1359
+ async function toUint8Array3(output) {
1292
1360
  if (output instanceof Response) {
1293
1361
  return new Uint8Array(await output.arrayBuffer());
1294
1362
  }