@ai-sdk/xai 3.0.76 → 3.0.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/01-xai.mdx CHANGED
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  title: xAI Grok
3
- description: Learn how to use xAI Grok.
3
+ description: Learn how to use xAI Grok and Imagine.
4
4
  ---
5
5
 
6
6
  # xAI Grok Provider
@@ -73,10 +73,10 @@ You can use the following optional settings to customize the xAI provider instan
73
73
  ## Language Models
74
74
 
75
75
  You can create [xAI models](https://console.x.ai) using a provider instance. The
76
- first argument is the model id, e.g. `grok-3`.
76
+ first argument is the model id, e.g. `grok-4.20-non-reasoning`.
77
77
 
78
78
  ```ts
79
- const model = xai('grok-3');
79
+ const model = xai('grok-4.20-non-reasoning');
80
80
  ```
81
81
 
82
82
  By default, `xai(modelId)` uses the Chat API. To use the Responses API with server-side agentic tools, explicitly use `xai.responses(modelId)`.
@@ -90,7 +90,7 @@ import { xai } from '@ai-sdk/xai';
90
90
  import { generateText } from 'ai';
91
91
 
92
92
  const { text } = await generateText({
93
- model: xai('grok-3'),
93
+ model: xai('grok-4.20-non-reasoning'),
94
94
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
95
95
  });
96
96
  ```
@@ -142,7 +142,7 @@ The following optional provider options are available for xAI chat models:
142
142
  You can use the xAI Responses API with the `xai.responses(modelId)` factory method for server-side agentic tool calling. This enables the model to autonomously orchestrate tool calls and research on xAI's servers.
143
143
 
144
144
  ```ts
145
- const model = xai.responses('grok-4-fast-non-reasoning');
145
+ const model = xai.responses('grok-4.20-non-reasoning');
146
146
  ```
147
147
 
148
148
  The Responses API provides server-side tools that the model can autonomously execute during its reasoning process:
@@ -186,7 +186,7 @@ import { xai } from '@ai-sdk/xai';
186
186
  import { generateText } from 'ai';
187
187
 
188
188
  const { text, sources } = await generateText({
189
- model: xai.responses('grok-4-fast-non-reasoning'),
189
+ model: xai.responses('grok-4.20-non-reasoning'),
190
190
  prompt: 'What are the latest developments in AI?',
191
191
  tools: {
192
192
  web_search: xai.tools.webSearch({
@@ -220,7 +220,7 @@ The X search tool enables searching X (Twitter) for posts, with filtering by han
220
220
 
221
221
  ```ts
222
222
  const { text, sources } = await generateText({
223
- model: xai.responses('grok-4-fast-non-reasoning'),
223
+ model: xai.responses('grok-4.20-non-reasoning'),
224
224
  prompt: 'What are people saying about AI on X this week?',
225
225
  tools: {
226
226
  x_search: xai.tools.xSearch({
@@ -266,7 +266,7 @@ The code execution tool enables the model to write and execute Python code for c
266
266
 
267
267
  ```ts
268
268
  const { text } = await generateText({
269
- model: xai.responses('grok-4-fast-non-reasoning'),
269
+ model: xai.responses('grok-4.20-non-reasoning'),
270
270
  prompt:
271
271
  'Calculate the compound interest for $10,000 at 5% annually for 10 years',
272
272
  tools: {
@@ -281,7 +281,7 @@ The view image tool enables the model to view and analyze images:
281
281
 
282
282
  ```ts
283
283
  const { text } = await generateText({
284
- model: xai.responses('grok-4-fast-non-reasoning'),
284
+ model: xai.responses('grok-4.20-non-reasoning'),
285
285
  prompt: 'Describe what you see in the image',
286
286
  tools: {
287
287
  view_image: xai.tools.viewImage(),
@@ -295,7 +295,7 @@ The view X video tool enables the model to view and analyze videos from X (Twitt
295
295
 
296
296
  ```ts
297
297
  const { text } = await generateText({
298
- model: xai.responses('grok-4-fast-non-reasoning'),
298
+ model: xai.responses('grok-4.20-non-reasoning'),
299
299
  prompt: 'Summarize the content of this X video',
300
300
  tools: {
301
301
  view_x_video: xai.tools.viewXVideo(),
@@ -309,7 +309,7 @@ The MCP server tool enables the model to connect to remote [Model Context Protoc
309
309
 
310
310
  ```ts
311
311
  const { text } = await generateText({
312
- model: xai.responses('grok-4-fast-non-reasoning'),
312
+ model: xai.responses('grok-4.20-non-reasoning'),
313
313
  prompt: 'Use the weather tool to check conditions in San Francisco',
314
314
  tools: {
315
315
  weather_server: xai.tools.mcpServer({
@@ -357,7 +357,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
357
357
  import { streamText } from 'ai';
358
358
 
359
359
  const result = streamText({
360
- model: xai.responses('grok-4-1-fast-reasoning'),
360
+ model: xai.responses('grok-4.20-reasoning'),
361
361
  prompt: 'What documents do you have access to?',
362
362
  tools: {
363
363
  file_search: xai.tools.fileSearch({
@@ -390,7 +390,7 @@ const result = streamText({
390
390
  Include file search results in the response. When set to `['file_search_call.results']`, the response will contain the actual search results with file content and scores.
391
391
 
392
392
  <Note>
393
- File search requires grok-4 family models and the Responses API. Vector stores
393
+ File search requires grok-4 family models (including grok-4.20) and the Responses API. Vector stores
394
394
  can be created using the [xAI
395
395
  API](https://docs.x.ai/docs/guides/using-collections/api).
396
396
  </Note>
@@ -404,7 +404,7 @@ import { xai } from '@ai-sdk/xai';
404
404
  import { streamText } from 'ai';
405
405
 
406
406
  const { fullStream } = streamText({
407
- model: xai.responses('grok-4-fast-non-reasoning'),
407
+ model: xai.responses('grok-4.20-non-reasoning'),
408
408
  prompt: 'Research AI safety developments and calculate risk metrics',
409
409
  tools: {
410
410
  web_search: xai.tools.webSearch(),
@@ -438,7 +438,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
438
438
  import { generateText } from 'ai';
439
439
 
440
440
  const result = await generateText({
441
- model: xai.responses('grok-4-fast-non-reasoning'),
441
+ model: xai.responses('grok-4.20-non-reasoning'),
442
442
  providerOptions: {
443
443
  xai: {
444
444
  reasoningEffort: 'high',
@@ -768,19 +768,16 @@ console.log('Sources:', await result.sources);
768
768
 
769
769
  | Model | Image Input | Object Generation | Tool Usage | Tool Streaming | Reasoning |
770
770
  | ----------------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
771
- | `grok-4-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
771
+ | `grok-4.20-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
772
+ | `grok-4.20-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
772
773
  | `grok-4-1-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
773
774
  | `grok-4-1-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
774
- | `grok-4-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
775
+ | `grok-4-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
775
776
  | `grok-4-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
777
+ | `grok-4-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
776
778
  | `grok-code-fast-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
777
- | `grok-4` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
778
- | `grok-4-0709` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
779
- | `grok-4-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
780
779
  | `grok-3` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
781
- | `grok-3-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
782
780
  | `grok-3-mini` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
783
- | `grok-3-mini-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
784
781
 
785
782
  <Note>
786
783
  The table above lists popular models. Please see the [xAI
@@ -839,7 +836,7 @@ const { images } = await generateImage({
839
836
 
840
837
  #### Multi-Image Editing
841
838
 
842
- Combine or reference multiple input images (up to 3) in the prompt:
839
+ Combine or reference multiple input images in the prompt:
843
840
 
844
841
  ```ts
845
842
  import { xai } from '@ai-sdk/xai';
@@ -877,37 +874,53 @@ const { images } = await generateImage({
877
874
 
878
875
  <Note>
879
876
  Input images can be provided as `Buffer`, `ArrayBuffer`, `Uint8Array`, or
880
- base64-encoded strings. Up to 3 input images are supported per request.
877
+ base64-encoded strings.
881
878
  </Note>
882
879
 
883
- ### Model-specific options
880
+ ### Image Provider Options
884
881
 
885
- You can customize the image generation behavior with model-specific settings:
882
+ You can customize the image generation behavior with provider-specific settings via `providerOptions.xai`:
886
883
 
887
884
  ```ts
888
- import { xai } from '@ai-sdk/xai';
885
+ import { xai, type XaiImageModelOptions } from '@ai-sdk/xai';
889
886
  import { generateImage } from 'ai';
890
887
 
891
888
  const { images } = await generateImage({
892
- model: xai.image('grok-imagine-image'),
889
+ model: xai.image('grok-imagine-image-pro'),
893
890
  prompt: 'A futuristic cityscape at sunset',
894
891
  aspectRatio: '16:9',
895
- n: 2,
892
+ providerOptions: {
893
+ xai: {
894
+ resolution: '2k',
895
+ quality: 'high',
896
+ } satisfies XaiImageModelOptions,
897
+ },
896
898
  });
897
899
  ```
898
900
 
899
- ### Model Capabilities
901
+ - **resolution** _'1k' | '2k'_
900
902
 
901
- | Model | Aspect Ratios | Image Editing |
902
- | -------------------- | ----------------------------------------------------------------------------------------------------------- | ------------------- |
903
- | `grok-imagine-image` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
903
+ Output resolution. `1k` produces ~1024×1024 images, `2k` produces ~2048×2048
904
+ images (actual dimensions vary based on aspect ratio). Available for
905
+ `grok-imagine-image-pro`.
906
+
907
+ - **quality** _'low' | 'medium' | 'high'_
908
+
909
+ Image quality level. Higher quality may increase generation time.
910
+
911
+ ### Image Model Capabilities
912
+
913
+ | Model | Resolution | Aspect Ratios | Image Editing |
914
+ | ------------------------ | ------------ | ----------------------------------------------------------------------------------------------------------- | ------------------- |
915
+ | `grok-imagine-image-pro` | `1k`, `2k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
916
+ | `grok-imagine-image` | `1k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
904
917
 
905
918
  ## Video Models
906
919
 
907
920
  You can create xAI video models using the `.video()` factory method.
908
921
  For more on video generation with the AI SDK see [generateVideo()](/docs/reference/ai-sdk-core/generate-video).
909
922
 
910
- This provider supports three video generation modes: text-to-video, image-to-video, and video editing.
923
+ This provider supports standard video generation from text prompts or image input, plus explicit video editing, video extension, and reference-to-video (R2V) operations.
911
924
 
912
925
  ### Text-to-Video
913
926
 
@@ -917,7 +930,7 @@ Generate videos from text prompts:
917
930
  import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
918
931
  import { experimental_generateVideo as generateVideo } from 'ai';
919
932
 
920
- const { videos } = await generateVideo({
933
+ const { video } = await generateVideo({
921
934
  model: xai.video('grok-imagine-video'),
922
935
  prompt: 'A chicken flying into the sunset in the style of 90s anime.',
923
936
  aspectRatio: '16:9',
@@ -930,15 +943,15 @@ const { videos } = await generateVideo({
930
943
  });
931
944
  ```
932
945
 
933
- ### Image-to-Video
946
+ ### Generation with Image Input
934
947
 
935
- Generate videos using an image as the starting frame with an optional text prompt:
948
+ Generate videos using an image as the starting frame with an optional text prompt. This uses the standard generation path rather than a separate provider mode:
936
949
 
937
950
  ```ts
938
951
  import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
939
952
  import { experimental_generateVideo as generateVideo } from 'ai';
940
953
 
941
- const { videos } = await generateVideo({
954
+ const { video } = await generateVideo({
942
955
  model: xai.video('grok-imagine-video'),
943
956
  prompt: {
944
957
  image: 'https://example.com/start-frame.png',
@@ -961,11 +974,12 @@ Edit an existing video using a text prompt by providing a source video URL via p
961
974
  import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
962
975
  import { experimental_generateVideo as generateVideo } from 'ai';
963
976
 
964
- const { videos } = await generateVideo({
977
+ const { video } = await generateVideo({
965
978
  model: xai.video('grok-imagine-video'),
966
979
  prompt: 'Give the person sunglasses and a hat',
967
980
  providerOptions: {
968
981
  xai: {
982
+ mode: 'edit-video',
969
983
  videoUrl: 'https://example.com/source-video.mp4',
970
984
  pollTimeoutMs: 600000, // 10 minutes
971
985
  } satisfies XaiVideoModelOptions,
@@ -991,6 +1005,7 @@ import { experimental_generateVideo as generateVideo } from 'ai';
991
1005
 
992
1006
  const providerOptions = {
993
1007
  xai: {
1008
+ mode: 'edit-video',
994
1009
  videoUrl: 'https://example.com/source-video.mp4',
995
1010
  pollTimeoutMs: 600000,
996
1011
  } satisfies XaiVideoModelOptions,
@@ -1012,19 +1027,99 @@ const [withSunglasses, withScarf] = await Promise.all([
1012
1027
  model: xai.video('grok-imagine-video'),
1013
1028
  prompt: 'Add sunglasses',
1014
1029
  providerOptions: {
1015
- xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
1030
+ xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
1016
1031
  },
1017
1032
  }),
1018
1033
  generateVideo({
1019
1034
  model: xai.video('grok-imagine-video'),
1020
1035
  prompt: 'Add a scarf',
1021
1036
  providerOptions: {
1022
- xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
1037
+ xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
1023
1038
  },
1024
1039
  }),
1025
1040
  ]);
1026
1041
  ```
1027
1042
 
1043
+ ### Video Extension
1044
+
1045
+ Extend an existing video from its last frame. The `duration` controls the length of the extension only, not the total output. The output inherits `aspectRatio` and `resolution` from the source video.
1046
+
1047
+ ```ts
1048
+ import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
1049
+ import { experimental_generateVideo as generateVideo } from 'ai';
1050
+
1051
+ // Step 1: Generate a source video
1052
+ const source = await generateVideo({
1053
+ model: xai.video('grok-imagine-video'),
1054
+ prompt: 'A cat sitting on a sunlit windowsill, tail gently swishing.',
1055
+ duration: 5,
1056
+ aspectRatio: '16:9',
1057
+ providerOptions: {
1058
+ xai: {
1059
+ pollTimeoutMs: 600000,
1060
+ } satisfies XaiVideoModelOptions,
1061
+ },
1062
+ });
1063
+
1064
+ const sourceUrl = source.providerMetadata?.xai?.videoUrl as string;
1065
+
1066
+ // Step 2: Extend the video with a new scene
1067
+ const extended = await generateVideo({
1068
+ model: xai.video('grok-imagine-video'),
1069
+ prompt: 'The cat turns its head, notices a butterfly, and leaps off.',
1070
+ duration: 6,
1071
+ providerOptions: {
1072
+ xai: {
1073
+ mode: 'extend-video',
1074
+ videoUrl: sourceUrl,
1075
+ pollTimeoutMs: 600000,
1076
+ } satisfies XaiVideoModelOptions,
1077
+ },
1078
+ });
1079
+ ```
1080
+
1081
+ <Note>
1082
+ Video extension does not support custom `aspectRatio` or `resolution` — the
1083
+ output inherits those from the source video. `duration` is supported and
1084
+ controls how long the extension is (not the total video length).
1085
+ </Note>
1086
+
1087
+ ### Reference-to-Video (R2V)
1088
+
1089
+ Provide reference images to guide the video's style and content. Unlike image-to-video, reference images are not used as the first frame — the model incorporates their visual elements into the generated video. Each reference image can be a public HTTPS URL or a base64 data URI.
1090
+
1091
+ ```ts
1092
+ import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
1093
+ import { experimental_generateVideo as generateVideo } from 'ai';
1094
+
1095
+ const { video } = await generateVideo({
1096
+ model: xai.video('grok-imagine-video'),
1097
+ prompt:
1098
+ 'The comic cat from <IMAGE_1> and the comic dog from <IMAGE_2> ' +
1099
+ 'are having a playful chase through a sunlit park. ' +
1100
+ 'Cinematic slow-motion, warm afternoon light.',
1101
+ duration: 8,
1102
+ aspectRatio: '16:9',
1103
+ providerOptions: {
1104
+ xai: {
1105
+ mode: 'reference-to-video',
1106
+ referenceImageUrls: [
1107
+ 'https://example.com/comic-cat.png',
1108
+ 'https://example.com/comic-dog.png',
1109
+ ],
1110
+ pollTimeoutMs: 600000,
1111
+ } satisfies XaiVideoModelOptions,
1112
+ },
1113
+ });
1114
+ ```
1115
+
1116
+ Use `<IMAGE_1>`, `<IMAGE_2>`, etc. in your prompt to reference specific images. Up to 7 reference images are supported per request.
1117
+
1118
+ <Note>
1119
+ Reference-to-video supports `duration`, `aspectRatio`, and `resolution`. Use
1120
+ `mode` to select the operation — each mode is mutually exclusive.
1121
+ </Note>
1122
+
1028
1123
  ### Video Provider Options
1029
1124
 
1030
1125
  The following provider options are available via `providerOptions.xai`.
@@ -1044,10 +1139,27 @@ You can validate the provider options using the `XaiVideoModelOptions` type.
1044
1139
  `1280x720` maps to `720p` and `854x480` maps to `480p`.
1045
1140
  Use this provider option to pass the native format directly.
1046
1141
 
1142
+ - **mode** _'edit-video' | 'extend-video' | 'reference-to-video'_
1143
+
1144
+ Selects the explicit video operation. Each mode is mutually exclusive:
1145
+ - `'edit-video'` — edit an existing video (requires `videoUrl`)
1146
+ - `'extend-video'` — extend a video from its last frame (requires `videoUrl`)
1147
+ - `'reference-to-video'` — generate from reference images (requires `referenceImageUrls`)
1148
+
1149
+ When omitted, standard generation is used. Legacy inputs are still auto-detected from fields for backward compatibility.
1150
+
1047
1151
  - **videoUrl** _string_
1048
1152
 
1049
- URL of a source video for video editing. When provided, the prompt is used
1050
- to describe the desired edits to the video.
1153
+ URL of a source video. Used with `mode: 'edit-video'` for video editing
1154
+ and `mode: 'extend-video'` for video extension.
1155
+
1156
+ - **referenceImageUrls** _string[]_
1157
+
1158
+ Array of reference image URLs (1–7 images) or base64 data URIs for
1159
+ reference-to-video (R2V) generation. The model incorporates visual
1160
+ elements from these images without using them as the first frame. Use
1161
+ `<IMAGE_1>`, `<IMAGE_2>`, etc. in the prompt to reference specific
1162
+ images. Used with `mode: 'reference-to-video'`.
1051
1163
 
1052
1164
  <Note>
1053
1165
  Video generation is an asynchronous process that can take several minutes.
@@ -1067,14 +1179,21 @@ desired ratio.
1067
1179
 
1068
1180
  For **video editing**, the output matches the input video's aspect ratio and
1069
1181
  resolution. Custom `duration`, `aspectRatio`, and `resolution` are not
1070
- supported - the output resolution is capped at 720p (e.g., a 1080p input
1182
+ supported the output resolution is capped at 720p (e.g., a 1080p input
1071
1183
  will be downsized to 720p).
1072
1184
 
1185
+ For **video extension**, the output inherits `aspectRatio` and `resolution`
1186
+ from the source video. `duration` is supported and controls only the
1187
+ extension length.
1188
+
1189
+ For **reference-to-video (R2V)**, you can specify `duration`, `aspectRatio`,
1190
+ and `resolution` just like text-to-video.
1191
+
1073
1192
  ### Video Model Capabilities
1074
1193
 
1075
- | Model | Duration | Aspect Ratios | Resolution | Image-to-Video | Video Editing |
1076
- | -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- |
1077
- | `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> |
1194
+ | Model | Duration | Aspect Ratios | Resolution | Image-to-Video | Editing | Extension | R2V |
1195
+ | -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- | ------------------- | ------------------- |
1196
+ | `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
1078
1197
 
1079
1198
  <Note>
1080
1199
  You can also pass any available provider model ID as a string if needed.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/xai",
3
- "version": "3.0.76",
3
+ "version": "3.0.78",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -29,9 +29,9 @@
29
29
  }
30
30
  },
31
31
  "dependencies": {
32
- "@ai-sdk/openai-compatible": "2.0.38",
32
+ "@ai-sdk/openai-compatible": "2.0.39",
33
33
  "@ai-sdk/provider": "3.0.8",
34
- "@ai-sdk/provider-utils": "4.0.22"
34
+ "@ai-sdk/provider-utils": "4.0.23"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/node": "20.17.24",
@@ -16,7 +16,7 @@ import {
16
16
  import { z } from 'zod/v4';
17
17
  import { xaiFailedResponseHandler } from './xai-error';
18
18
  import {
19
- type XaiVideoModelOptions,
19
+ type XaiParsedVideoModelOptions,
20
20
  xaiVideoModelOptionsSchema,
21
21
  } from './xai-video-options';
22
22
  import type { XaiVideoModelId } from './xai-video-settings';
@@ -37,6 +37,27 @@ const RESOLUTION_MAP: Record<string, string> = {
37
37
  '640x480': '480p',
38
38
  };
39
39
 
40
+ function resolveVideoMode(
41
+ options: XaiParsedVideoModelOptions | undefined,
42
+ ): XaiParsedVideoModelOptions['mode'] | undefined {
43
+ if (options?.mode != null) {
44
+ return options.mode;
45
+ }
46
+
47
+ if (options?.videoUrl != null) {
48
+ return 'edit-video';
49
+ }
50
+
51
+ if (
52
+ options?.referenceImageUrls != null &&
53
+ options.referenceImageUrls.length > 0
54
+ ) {
55
+ return 'reference-to-video';
56
+ }
57
+
58
+ return undefined;
59
+ }
60
+
40
61
  export class XaiVideoModel implements Experimental_VideoModelV3 {
41
62
  readonly specificationVersion = 'v3';
42
63
  readonly maxVideosPerCall = 1;
@@ -60,9 +81,13 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
60
81
  provider: 'xai',
61
82
  providerOptions: options.providerOptions,
62
83
  schema: xaiVideoModelOptionsSchema,
63
- })) as XaiVideoModelOptions | undefined;
84
+ })) as XaiParsedVideoModelOptions | undefined;
64
85
 
65
- const isEdit = xaiOptions?.videoUrl != null;
86
+ const effectiveMode = resolveVideoMode(xaiOptions);
87
+
88
+ const isEdit = effectiveMode === 'edit-video';
89
+ const isExtension = effectiveMode === 'extend-video';
90
+ const hasReferenceImages = effectiveMode === 'reference-to-video';
66
91
 
67
92
  if (options.fps != null) {
68
93
  warnings.push({
@@ -90,6 +115,7 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
90
115
  });
91
116
  }
92
117
 
118
+ // Edit mode: duration, aspectRatio, resolution not supported
93
119
  if (isEdit && options.duration != null) {
94
120
  warnings.push({
95
121
  type: 'unsupported',
@@ -117,22 +143,46 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
117
143
  });
118
144
  }
119
145
 
146
+ // Extension mode: aspectRatio and resolution not supported
147
+ if (isExtension && options.aspectRatio != null) {
148
+ warnings.push({
149
+ type: 'unsupported',
150
+ feature: 'aspectRatio',
151
+ details: 'xAI video extension does not support custom aspect ratio.',
152
+ });
153
+ }
154
+
155
+ if (
156
+ isExtension &&
157
+ (xaiOptions?.resolution != null || options.resolution != null)
158
+ ) {
159
+ warnings.push({
160
+ type: 'unsupported',
161
+ feature: 'resolution',
162
+ details: 'xAI video extension does not support custom resolution.',
163
+ });
164
+ }
165
+
120
166
  const body: Record<string, unknown> = {
121
167
  model: this.modelId,
122
168
  prompt: options.prompt,
123
169
  };
124
170
 
125
- if (!isEdit && options.duration != null) {
171
+ const allowDuration = !isEdit;
172
+ const allowAspectRatio = !isEdit && !isExtension;
173
+ const allowResolution = !isEdit && !isExtension;
174
+
175
+ if (allowDuration && options.duration != null) {
126
176
  body.duration = options.duration;
127
177
  }
128
178
 
129
- if (!isEdit && options.aspectRatio != null) {
179
+ if (allowAspectRatio && options.aspectRatio != null) {
130
180
  body.aspect_ratio = options.aspectRatio;
131
181
  }
132
182
 
133
- if (!isEdit && xaiOptions?.resolution != null) {
183
+ if (allowResolution && xaiOptions?.resolution != null) {
134
184
  body.resolution = xaiOptions.resolution;
135
- } else if (!isEdit && options.resolution != null) {
185
+ } else if (allowResolution && options.resolution != null) {
136
186
  const mapped = RESOLUTION_MAP[options.resolution];
137
187
  if (mapped != null) {
138
188
  body.resolution = mapped;
@@ -147,12 +197,17 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
147
197
  }
148
198
  }
149
199
 
150
- // Video editing: pass source video URL (nested object like image)
151
- if (xaiOptions?.videoUrl != null) {
152
- body.video = { url: xaiOptions.videoUrl };
200
+ // Video editing: pass source video URL (nested object)
201
+ if (isEdit) {
202
+ body.video = { url: xaiOptions!.videoUrl };
153
203
  }
154
204
 
155
- // Image-to-video: convert SDK image to nested image object
205
+ // Video extension: pass source video URL (nested object)
206
+ if (isExtension) {
207
+ body.video = { url: xaiOptions!.videoUrl };
208
+ }
209
+
210
+ // Convert SDK image input to the nested xAI request image object
156
211
  if (options.image != null) {
157
212
  if (options.image.type === 'url') {
158
213
  body.image = { url: options.image.url };
@@ -167,14 +222,23 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
167
222
  }
168
223
  }
169
224
 
225
+ // Reference images for R2V (reference-to-video) generation
226
+ if (hasReferenceImages) {
227
+ body.reference_images = xaiOptions!.referenceImageUrls!.map(url => ({
228
+ url,
229
+ }));
230
+ }
231
+
170
232
  if (xaiOptions != null) {
171
233
  for (const [key, value] of Object.entries(xaiOptions)) {
172
234
  if (
173
235
  ![
236
+ 'mode',
174
237
  'pollIntervalMs',
175
238
  'pollTimeoutMs',
176
239
  'resolution',
177
240
  'videoUrl',
241
+ 'referenceImageUrls',
178
242
  ].includes(key)
179
243
  ) {
180
244
  body[key] = value;
@@ -184,9 +248,19 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
184
248
 
185
249
  const baseURL = this.config.baseURL ?? 'https://api.x.ai/v1';
186
250
 
187
- // Step 1: Create video generation/edit request
251
+ // Determine endpoint based on mode
252
+ let endpoint: string;
253
+ if (isEdit) {
254
+ endpoint = `${baseURL}/videos/edits`;
255
+ } else if (isExtension) {
256
+ endpoint = `${baseURL}/videos/extensions`;
257
+ } else {
258
+ endpoint = `${baseURL}/videos/generations`;
259
+ }
260
+
261
+ // Step 1: Create video generation/edit/extension request
188
262
  const { value: createResponse } = await postJsonToApi({
189
- url: `${baseURL}/videos/${isEdit ? 'edits' : 'generations'}`,
263
+ url: endpoint,
190
264
  headers: combineHeaders(this.config.headers(), options.headers),
191
265
  body,
192
266
  failedResponseHandler: xaiFailedResponseHandler,
@@ -279,6 +353,9 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
279
353
  ...(statusResponse.usage?.cost_in_usd_ticks != null
280
354
  ? { costInUsdTicks: statusResponse.usage.cost_in_usd_ticks }
281
355
  : {}),
356
+ ...(statusResponse.progress != null
357
+ ? { progress: statusResponse.progress }
358
+ : {}),
282
359
  },
283
360
  },
284
361
  };
@@ -291,6 +368,13 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
291
368
  });
292
369
  }
293
370
 
371
+ if (statusResponse.status === 'failed') {
372
+ throw new AISDKError({
373
+ name: 'XAI_VIDEO_GENERATION_FAILED',
374
+ message: 'Video generation failed.',
375
+ });
376
+ }
377
+
294
378
  // 'pending' → continue polling
295
379
  }
296
380
  }
@@ -315,4 +399,11 @@ const xaiVideoStatusResponseSchema = z.object({
315
399
  cost_in_usd_ticks: z.number().nullish(),
316
400
  })
317
401
  .nullish(),
402
+ progress: z.number().nullish(),
403
+ error: z
404
+ .object({
405
+ code: z.string().nullish(),
406
+ message: z.string().nullish(),
407
+ })
408
+ .nullish(),
318
409
  });