@ai-sdk/xai 3.0.76 → 3.0.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/index.d.mts +68 -5
- package/dist/index.d.ts +68 -5
- package/dist/index.js +117 -18
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +117 -18
- package/dist/index.mjs.map +1 -1
- package/docs/01-xai.mdx +166 -47
- package/package.json +1 -1
- package/src/xai-video-model.ts +104 -13
- package/src/xai-video-options.ts +136 -14
package/docs/01-xai.mdx
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
title: xAI Grok
|
|
3
|
-
description: Learn how to use xAI Grok.
|
|
3
|
+
description: Learn how to use xAI Grok and Imagine.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# xAI Grok Provider
|
|
@@ -73,10 +73,10 @@ You can use the following optional settings to customize the xAI provider instan
|
|
|
73
73
|
## Language Models
|
|
74
74
|
|
|
75
75
|
You can create [xAI models](https://console.x.ai) using a provider instance. The
|
|
76
|
-
first argument is the model id, e.g. `grok-
|
|
76
|
+
first argument is the model id, e.g. `grok-4.20-non-reasoning`.
|
|
77
77
|
|
|
78
78
|
```ts
|
|
79
|
-
const model = xai('grok-
|
|
79
|
+
const model = xai('grok-4.20-non-reasoning');
|
|
80
80
|
```
|
|
81
81
|
|
|
82
82
|
By default, `xai(modelId)` uses the Chat API. To use the Responses API with server-side agentic tools, explicitly use `xai.responses(modelId)`.
|
|
@@ -90,7 +90,7 @@ import { xai } from '@ai-sdk/xai';
|
|
|
90
90
|
import { generateText } from 'ai';
|
|
91
91
|
|
|
92
92
|
const { text } = await generateText({
|
|
93
|
-
model: xai('grok-
|
|
93
|
+
model: xai('grok-4.20-non-reasoning'),
|
|
94
94
|
prompt: 'Write a vegetarian lasagna recipe for 4 people.',
|
|
95
95
|
});
|
|
96
96
|
```
|
|
@@ -142,7 +142,7 @@ The following optional provider options are available for xAI chat models:
|
|
|
142
142
|
You can use the xAI Responses API with the `xai.responses(modelId)` factory method for server-side agentic tool calling. This enables the model to autonomously orchestrate tool calls and research on xAI's servers.
|
|
143
143
|
|
|
144
144
|
```ts
|
|
145
|
-
const model = xai.responses('grok-4-
|
|
145
|
+
const model = xai.responses('grok-4.20-non-reasoning');
|
|
146
146
|
```
|
|
147
147
|
|
|
148
148
|
The Responses API provides server-side tools that the model can autonomously execute during its reasoning process:
|
|
@@ -186,7 +186,7 @@ import { xai } from '@ai-sdk/xai';
|
|
|
186
186
|
import { generateText } from 'ai';
|
|
187
187
|
|
|
188
188
|
const { text, sources } = await generateText({
|
|
189
|
-
model: xai.responses('grok-4-
|
|
189
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
190
190
|
prompt: 'What are the latest developments in AI?',
|
|
191
191
|
tools: {
|
|
192
192
|
web_search: xai.tools.webSearch({
|
|
@@ -220,7 +220,7 @@ The X search tool enables searching X (Twitter) for posts, with filtering by han
|
|
|
220
220
|
|
|
221
221
|
```ts
|
|
222
222
|
const { text, sources } = await generateText({
|
|
223
|
-
model: xai.responses('grok-4-
|
|
223
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
224
224
|
prompt: 'What are people saying about AI on X this week?',
|
|
225
225
|
tools: {
|
|
226
226
|
x_search: xai.tools.xSearch({
|
|
@@ -266,7 +266,7 @@ The code execution tool enables the model to write and execute Python code for c
|
|
|
266
266
|
|
|
267
267
|
```ts
|
|
268
268
|
const { text } = await generateText({
|
|
269
|
-
model: xai.responses('grok-4-
|
|
269
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
270
270
|
prompt:
|
|
271
271
|
'Calculate the compound interest for $10,000 at 5% annually for 10 years',
|
|
272
272
|
tools: {
|
|
@@ -281,7 +281,7 @@ The view image tool enables the model to view and analyze images:
|
|
|
281
281
|
|
|
282
282
|
```ts
|
|
283
283
|
const { text } = await generateText({
|
|
284
|
-
model: xai.responses('grok-4-
|
|
284
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
285
285
|
prompt: 'Describe what you see in the image',
|
|
286
286
|
tools: {
|
|
287
287
|
view_image: xai.tools.viewImage(),
|
|
@@ -295,7 +295,7 @@ The view X video tool enables the model to view and analyze videos from X (Twitt
|
|
|
295
295
|
|
|
296
296
|
```ts
|
|
297
297
|
const { text } = await generateText({
|
|
298
|
-
model: xai.responses('grok-4-
|
|
298
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
299
299
|
prompt: 'Summarize the content of this X video',
|
|
300
300
|
tools: {
|
|
301
301
|
view_x_video: xai.tools.viewXVideo(),
|
|
@@ -309,7 +309,7 @@ The MCP server tool enables the model to connect to remote [Model Context Protoc
|
|
|
309
309
|
|
|
310
310
|
```ts
|
|
311
311
|
const { text } = await generateText({
|
|
312
|
-
model: xai.responses('grok-4-
|
|
312
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
313
313
|
prompt: 'Use the weather tool to check conditions in San Francisco',
|
|
314
314
|
tools: {
|
|
315
315
|
weather_server: xai.tools.mcpServer({
|
|
@@ -357,7 +357,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
|
|
|
357
357
|
import { streamText } from 'ai';
|
|
358
358
|
|
|
359
359
|
const result = streamText({
|
|
360
|
-
model: xai.responses('grok-4-
|
|
360
|
+
model: xai.responses('grok-4.20-reasoning'),
|
|
361
361
|
prompt: 'What documents do you have access to?',
|
|
362
362
|
tools: {
|
|
363
363
|
file_search: xai.tools.fileSearch({
|
|
@@ -390,7 +390,7 @@ const result = streamText({
|
|
|
390
390
|
Include file search results in the response. When set to `['file_search_call.results']`, the response will contain the actual search results with file content and scores.
|
|
391
391
|
|
|
392
392
|
<Note>
|
|
393
|
-
File search requires grok-4 family models and the Responses API. Vector stores
|
|
393
|
+
File search requires grok-4 family models (including grok-4.20) and the Responses API. Vector stores
|
|
394
394
|
can be created using the [xAI
|
|
395
395
|
API](https://docs.x.ai/docs/guides/using-collections/api).
|
|
396
396
|
</Note>
|
|
@@ -404,7 +404,7 @@ import { xai } from '@ai-sdk/xai';
|
|
|
404
404
|
import { streamText } from 'ai';
|
|
405
405
|
|
|
406
406
|
const { fullStream } = streamText({
|
|
407
|
-
model: xai.responses('grok-4-
|
|
407
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
408
408
|
prompt: 'Research AI safety developments and calculate risk metrics',
|
|
409
409
|
tools: {
|
|
410
410
|
web_search: xai.tools.webSearch(),
|
|
@@ -438,7 +438,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
|
|
|
438
438
|
import { generateText } from 'ai';
|
|
439
439
|
|
|
440
440
|
const result = await generateText({
|
|
441
|
-
model: xai.responses('grok-4-
|
|
441
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
442
442
|
providerOptions: {
|
|
443
443
|
xai: {
|
|
444
444
|
reasoningEffort: 'high',
|
|
@@ -768,19 +768,16 @@ console.log('Sources:', await result.sources);
|
|
|
768
768
|
|
|
769
769
|
| Model | Image Input | Object Generation | Tool Usage | Tool Streaming | Reasoning |
|
|
770
770
|
| ----------------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
|
|
771
|
-
| `grok-4-
|
|
771
|
+
| `grok-4.20-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
772
|
+
| `grok-4.20-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
772
773
|
| `grok-4-1-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
773
774
|
| `grok-4-1-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
774
|
-
| `grok-4-
|
|
775
|
+
| `grok-4-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
775
776
|
| `grok-4-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
777
|
+
| `grok-4-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
776
778
|
| `grok-code-fast-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
777
|
-
| `grok-4` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
778
|
-
| `grok-4-0709` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
779
|
-
| `grok-4-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
780
779
|
| `grok-3` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
781
|
-
| `grok-3-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
782
780
|
| `grok-3-mini` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
783
|
-
| `grok-3-mini-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
784
781
|
|
|
785
782
|
<Note>
|
|
786
783
|
The table above lists popular models. Please see the [xAI
|
|
@@ -839,7 +836,7 @@ const { images } = await generateImage({
|
|
|
839
836
|
|
|
840
837
|
#### Multi-Image Editing
|
|
841
838
|
|
|
842
|
-
Combine or reference multiple input images
|
|
839
|
+
Combine or reference multiple input images in the prompt:
|
|
843
840
|
|
|
844
841
|
```ts
|
|
845
842
|
import { xai } from '@ai-sdk/xai';
|
|
@@ -877,37 +874,53 @@ const { images } = await generateImage({
|
|
|
877
874
|
|
|
878
875
|
<Note>
|
|
879
876
|
Input images can be provided as `Buffer`, `ArrayBuffer`, `Uint8Array`, or
|
|
880
|
-
base64-encoded strings.
|
|
877
|
+
base64-encoded strings.
|
|
881
878
|
</Note>
|
|
882
879
|
|
|
883
|
-
###
|
|
880
|
+
### Image Provider Options
|
|
884
881
|
|
|
885
|
-
You can customize the image generation behavior with
|
|
882
|
+
You can customize the image generation behavior with provider-specific settings via `providerOptions.xai`:
|
|
886
883
|
|
|
887
884
|
```ts
|
|
888
|
-
import { xai } from '@ai-sdk/xai';
|
|
885
|
+
import { xai, type XaiImageModelOptions } from '@ai-sdk/xai';
|
|
889
886
|
import { generateImage } from 'ai';
|
|
890
887
|
|
|
891
888
|
const { images } = await generateImage({
|
|
892
|
-
model: xai.image('grok-imagine-image'),
|
|
889
|
+
model: xai.image('grok-imagine-image-pro'),
|
|
893
890
|
prompt: 'A futuristic cityscape at sunset',
|
|
894
891
|
aspectRatio: '16:9',
|
|
895
|
-
|
|
892
|
+
providerOptions: {
|
|
893
|
+
xai: {
|
|
894
|
+
resolution: '2k',
|
|
895
|
+
quality: 'high',
|
|
896
|
+
} satisfies XaiImageModelOptions,
|
|
897
|
+
},
|
|
896
898
|
});
|
|
897
899
|
```
|
|
898
900
|
|
|
899
|
-
|
|
901
|
+
- **resolution** _'1k' | '2k'_
|
|
900
902
|
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
903
|
+
Output resolution. `1k` produces ~1024×1024 images, `2k` produces ~2048×2048
|
|
904
|
+
images (actual dimensions vary based on aspect ratio). Available for
|
|
905
|
+
`grok-imagine-image-pro`.
|
|
906
|
+
|
|
907
|
+
- **quality** _'low' | 'medium' | 'high'_
|
|
908
|
+
|
|
909
|
+
Image quality level. Higher quality may increase generation time.
|
|
910
|
+
|
|
911
|
+
### Image Model Capabilities
|
|
912
|
+
|
|
913
|
+
| Model | Resolution | Aspect Ratios | Image Editing |
|
|
914
|
+
| ------------------------ | ------------ | ----------------------------------------------------------------------------------------------------------- | ------------------- |
|
|
915
|
+
| `grok-imagine-image-pro` | `1k`, `2k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
|
|
916
|
+
| `grok-imagine-image` | `1k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
|
|
904
917
|
|
|
905
918
|
## Video Models
|
|
906
919
|
|
|
907
920
|
You can create xAI video models using the `.video()` factory method.
|
|
908
921
|
For more on video generation with the AI SDK see [generateVideo()](/docs/reference/ai-sdk-core/generate-video).
|
|
909
922
|
|
|
910
|
-
This provider supports
|
|
923
|
+
This provider supports standard video generation from text prompts or image input, plus explicit video editing, video extension, and reference-to-video (R2V) operations.
|
|
911
924
|
|
|
912
925
|
### Text-to-Video
|
|
913
926
|
|
|
@@ -917,7 +930,7 @@ Generate videos from text prompts:
|
|
|
917
930
|
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
918
931
|
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
919
932
|
|
|
920
|
-
const {
|
|
933
|
+
const { video } = await generateVideo({
|
|
921
934
|
model: xai.video('grok-imagine-video'),
|
|
922
935
|
prompt: 'A chicken flying into the sunset in the style of 90s anime.',
|
|
923
936
|
aspectRatio: '16:9',
|
|
@@ -930,15 +943,15 @@ const { videos } = await generateVideo({
|
|
|
930
943
|
});
|
|
931
944
|
```
|
|
932
945
|
|
|
933
|
-
### Image
|
|
946
|
+
### Generation with Image Input
|
|
934
947
|
|
|
935
|
-
Generate videos using an image as the starting frame with an optional text prompt:
|
|
948
|
+
Generate videos using an image as the starting frame with an optional text prompt. This uses the standard generation path rather than a separate provider mode:
|
|
936
949
|
|
|
937
950
|
```ts
|
|
938
951
|
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
939
952
|
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
940
953
|
|
|
941
|
-
const {
|
|
954
|
+
const { video } = await generateVideo({
|
|
942
955
|
model: xai.video('grok-imagine-video'),
|
|
943
956
|
prompt: {
|
|
944
957
|
image: 'https://example.com/start-frame.png',
|
|
@@ -961,11 +974,12 @@ Edit an existing video using a text prompt by providing a source video URL via p
|
|
|
961
974
|
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
962
975
|
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
963
976
|
|
|
964
|
-
const {
|
|
977
|
+
const { video } = await generateVideo({
|
|
965
978
|
model: xai.video('grok-imagine-video'),
|
|
966
979
|
prompt: 'Give the person sunglasses and a hat',
|
|
967
980
|
providerOptions: {
|
|
968
981
|
xai: {
|
|
982
|
+
mode: 'edit-video',
|
|
969
983
|
videoUrl: 'https://example.com/source-video.mp4',
|
|
970
984
|
pollTimeoutMs: 600000, // 10 minutes
|
|
971
985
|
} satisfies XaiVideoModelOptions,
|
|
@@ -991,6 +1005,7 @@ import { experimental_generateVideo as generateVideo } from 'ai';
|
|
|
991
1005
|
|
|
992
1006
|
const providerOptions = {
|
|
993
1007
|
xai: {
|
|
1008
|
+
mode: 'edit-video',
|
|
994
1009
|
videoUrl: 'https://example.com/source-video.mp4',
|
|
995
1010
|
pollTimeoutMs: 600000,
|
|
996
1011
|
} satisfies XaiVideoModelOptions,
|
|
@@ -1012,19 +1027,99 @@ const [withSunglasses, withScarf] = await Promise.all([
|
|
|
1012
1027
|
model: xai.video('grok-imagine-video'),
|
|
1013
1028
|
prompt: 'Add sunglasses',
|
|
1014
1029
|
providerOptions: {
|
|
1015
|
-
xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
1030
|
+
xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
1016
1031
|
},
|
|
1017
1032
|
}),
|
|
1018
1033
|
generateVideo({
|
|
1019
1034
|
model: xai.video('grok-imagine-video'),
|
|
1020
1035
|
prompt: 'Add a scarf',
|
|
1021
1036
|
providerOptions: {
|
|
1022
|
-
xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
1037
|
+
xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
1023
1038
|
},
|
|
1024
1039
|
}),
|
|
1025
1040
|
]);
|
|
1026
1041
|
```
|
|
1027
1042
|
|
|
1043
|
+
### Video Extension
|
|
1044
|
+
|
|
1045
|
+
Extend an existing video from its last frame. The `duration` controls the length of the extension only, not the total output. The output inherits `aspectRatio` and `resolution` from the source video.
|
|
1046
|
+
|
|
1047
|
+
```ts
|
|
1048
|
+
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
1049
|
+
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
1050
|
+
|
|
1051
|
+
// Step 1: Generate a source video
|
|
1052
|
+
const source = await generateVideo({
|
|
1053
|
+
model: xai.video('grok-imagine-video'),
|
|
1054
|
+
prompt: 'A cat sitting on a sunlit windowsill, tail gently swishing.',
|
|
1055
|
+
duration: 5,
|
|
1056
|
+
aspectRatio: '16:9',
|
|
1057
|
+
providerOptions: {
|
|
1058
|
+
xai: {
|
|
1059
|
+
pollTimeoutMs: 600000,
|
|
1060
|
+
} satisfies XaiVideoModelOptions,
|
|
1061
|
+
},
|
|
1062
|
+
});
|
|
1063
|
+
|
|
1064
|
+
const sourceUrl = source.providerMetadata?.xai?.videoUrl as string;
|
|
1065
|
+
|
|
1066
|
+
// Step 2: Extend the video with a new scene
|
|
1067
|
+
const extended = await generateVideo({
|
|
1068
|
+
model: xai.video('grok-imagine-video'),
|
|
1069
|
+
prompt: 'The cat turns its head, notices a butterfly, and leaps off.',
|
|
1070
|
+
duration: 6,
|
|
1071
|
+
providerOptions: {
|
|
1072
|
+
xai: {
|
|
1073
|
+
mode: 'extend-video',
|
|
1074
|
+
videoUrl: sourceUrl,
|
|
1075
|
+
pollTimeoutMs: 600000,
|
|
1076
|
+
} satisfies XaiVideoModelOptions,
|
|
1077
|
+
},
|
|
1078
|
+
});
|
|
1079
|
+
```
|
|
1080
|
+
|
|
1081
|
+
<Note>
|
|
1082
|
+
Video extension does not support custom `aspectRatio` or `resolution` — the
|
|
1083
|
+
output inherits those from the source video. `duration` is supported and
|
|
1084
|
+
controls how long the extension is (not the total video length).
|
|
1085
|
+
</Note>
|
|
1086
|
+
|
|
1087
|
+
### Reference-to-Video (R2V)
|
|
1088
|
+
|
|
1089
|
+
Provide reference images to guide the video's style and content. Unlike image-to-video, reference images are not used as the first frame — the model incorporates their visual elements into the generated video. Each reference image can be a public HTTPS URL or a base64 data URI.
|
|
1090
|
+
|
|
1091
|
+
```ts
|
|
1092
|
+
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
1093
|
+
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
1094
|
+
|
|
1095
|
+
const { video } = await generateVideo({
|
|
1096
|
+
model: xai.video('grok-imagine-video'),
|
|
1097
|
+
prompt:
|
|
1098
|
+
'The comic cat from <IMAGE_1> and the comic dog from <IMAGE_2> ' +
|
|
1099
|
+
'are having a playful chase through a sunlit park. ' +
|
|
1100
|
+
'Cinematic slow-motion, warm afternoon light.',
|
|
1101
|
+
duration: 8,
|
|
1102
|
+
aspectRatio: '16:9',
|
|
1103
|
+
providerOptions: {
|
|
1104
|
+
xai: {
|
|
1105
|
+
mode: 'reference-to-video',
|
|
1106
|
+
referenceImageUrls: [
|
|
1107
|
+
'https://example.com/comic-cat.png',
|
|
1108
|
+
'https://example.com/comic-dog.png',
|
|
1109
|
+
],
|
|
1110
|
+
pollTimeoutMs: 600000,
|
|
1111
|
+
} satisfies XaiVideoModelOptions,
|
|
1112
|
+
},
|
|
1113
|
+
});
|
|
1114
|
+
```
|
|
1115
|
+
|
|
1116
|
+
Use `<IMAGE_1>`, `<IMAGE_2>`, etc. in your prompt to reference specific images. Up to 7 reference images are supported per request.
|
|
1117
|
+
|
|
1118
|
+
<Note>
|
|
1119
|
+
Reference-to-video supports `duration`, `aspectRatio`, and `resolution`. Use
|
|
1120
|
+
`mode` to select the operation — each mode is mutually exclusive.
|
|
1121
|
+
</Note>
|
|
1122
|
+
|
|
1028
1123
|
### Video Provider Options
|
|
1029
1124
|
|
|
1030
1125
|
The following provider options are available via `providerOptions.xai`.
|
|
@@ -1044,10 +1139,27 @@ You can validate the provider options using the `XaiVideoModelOptions` type.
|
|
|
1044
1139
|
`1280x720` maps to `720p` and `854x480` maps to `480p`.
|
|
1045
1140
|
Use this provider option to pass the native format directly.
|
|
1046
1141
|
|
|
1142
|
+
- **mode** _'edit-video' | 'extend-video' | 'reference-to-video'_
|
|
1143
|
+
|
|
1144
|
+
Selects the explicit video operation. Each mode is mutually exclusive:
|
|
1145
|
+
- `'edit-video'` — edit an existing video (requires `videoUrl`)
|
|
1146
|
+
- `'extend-video'` — extend a video from its last frame (requires `videoUrl`)
|
|
1147
|
+
- `'reference-to-video'` — generate from reference images (requires `referenceImageUrls`)
|
|
1148
|
+
|
|
1149
|
+
When omitted, standard generation is used. Legacy inputs are still auto-detected from fields for backward compatibility.
|
|
1150
|
+
|
|
1047
1151
|
- **videoUrl** _string_
|
|
1048
1152
|
|
|
1049
|
-
URL of a source video
|
|
1050
|
-
|
|
1153
|
+
URL of a source video. Used with `mode: 'edit-video'` for video editing
|
|
1154
|
+
and `mode: 'extend-video'` for video extension.
|
|
1155
|
+
|
|
1156
|
+
- **referenceImageUrls** _string[]_
|
|
1157
|
+
|
|
1158
|
+
Array of reference image URLs (1–7 images) or base64 data URIs for
|
|
1159
|
+
reference-to-video (R2V) generation. The model incorporates visual
|
|
1160
|
+
elements from these images without using them as the first frame. Use
|
|
1161
|
+
`<IMAGE_1>`, `<IMAGE_2>`, etc. in the prompt to reference specific
|
|
1162
|
+
images. Used with `mode: 'reference-to-video'`.
|
|
1051
1163
|
|
|
1052
1164
|
<Note>
|
|
1053
1165
|
Video generation is an asynchronous process that can take several minutes.
|
|
@@ -1067,14 +1179,21 @@ desired ratio.
|
|
|
1067
1179
|
|
|
1068
1180
|
For **video editing**, the output matches the input video's aspect ratio and
|
|
1069
1181
|
resolution. Custom `duration`, `aspectRatio`, and `resolution` are not
|
|
1070
|
-
supported
|
|
1182
|
+
supported — the output resolution is capped at 720p (e.g., a 1080p input
|
|
1071
1183
|
will be downsized to 720p).
|
|
1072
1184
|
|
|
1185
|
+
For **video extension**, the output inherits `aspectRatio` and `resolution`
|
|
1186
|
+
from the source video. `duration` is supported and controls only the
|
|
1187
|
+
extension length.
|
|
1188
|
+
|
|
1189
|
+
For **reference-to-video (R2V)**, you can specify `duration`, `aspectRatio`,
|
|
1190
|
+
and `resolution` just like text-to-video.
|
|
1191
|
+
|
|
1073
1192
|
### Video Model Capabilities
|
|
1074
1193
|
|
|
1075
|
-
| Model | Duration | Aspect Ratios | Resolution | Image-to-Video |
|
|
1076
|
-
| -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- |
|
|
1077
|
-
| `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> |
|
|
1194
|
+
| Model | Duration | Aspect Ratios | Resolution | Image-to-Video | Editing | Extension | R2V |
|
|
1195
|
+
| -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- | ------------------- | ------------------- |
|
|
1196
|
+
| `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
1078
1197
|
|
|
1079
1198
|
<Note>
|
|
1080
1199
|
You can also pass any available provider model ID as a string if needed.
|
package/package.json
CHANGED
package/src/xai-video-model.ts
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
import { z } from 'zod/v4';
|
|
17
17
|
import { xaiFailedResponseHandler } from './xai-error';
|
|
18
18
|
import {
|
|
19
|
-
type
|
|
19
|
+
type XaiParsedVideoModelOptions,
|
|
20
20
|
xaiVideoModelOptionsSchema,
|
|
21
21
|
} from './xai-video-options';
|
|
22
22
|
import type { XaiVideoModelId } from './xai-video-settings';
|
|
@@ -37,6 +37,27 @@ const RESOLUTION_MAP: Record<string, string> = {
|
|
|
37
37
|
'640x480': '480p',
|
|
38
38
|
};
|
|
39
39
|
|
|
40
|
+
function resolveVideoMode(
|
|
41
|
+
options: XaiParsedVideoModelOptions | undefined,
|
|
42
|
+
): XaiParsedVideoModelOptions['mode'] | undefined {
|
|
43
|
+
if (options?.mode != null) {
|
|
44
|
+
return options.mode;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (options?.videoUrl != null) {
|
|
48
|
+
return 'edit-video';
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (
|
|
52
|
+
options?.referenceImageUrls != null &&
|
|
53
|
+
options.referenceImageUrls.length > 0
|
|
54
|
+
) {
|
|
55
|
+
return 'reference-to-video';
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
|
|
40
61
|
export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
41
62
|
readonly specificationVersion = 'v3';
|
|
42
63
|
readonly maxVideosPerCall = 1;
|
|
@@ -60,9 +81,13 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
60
81
|
provider: 'xai',
|
|
61
82
|
providerOptions: options.providerOptions,
|
|
62
83
|
schema: xaiVideoModelOptionsSchema,
|
|
63
|
-
})) as
|
|
84
|
+
})) as XaiParsedVideoModelOptions | undefined;
|
|
64
85
|
|
|
65
|
-
const
|
|
86
|
+
const effectiveMode = resolveVideoMode(xaiOptions);
|
|
87
|
+
|
|
88
|
+
const isEdit = effectiveMode === 'edit-video';
|
|
89
|
+
const isExtension = effectiveMode === 'extend-video';
|
|
90
|
+
const hasReferenceImages = effectiveMode === 'reference-to-video';
|
|
66
91
|
|
|
67
92
|
if (options.fps != null) {
|
|
68
93
|
warnings.push({
|
|
@@ -90,6 +115,7 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
90
115
|
});
|
|
91
116
|
}
|
|
92
117
|
|
|
118
|
+
// Edit mode: duration, aspectRatio, resolution not supported
|
|
93
119
|
if (isEdit && options.duration != null) {
|
|
94
120
|
warnings.push({
|
|
95
121
|
type: 'unsupported',
|
|
@@ -117,22 +143,46 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
117
143
|
});
|
|
118
144
|
}
|
|
119
145
|
|
|
146
|
+
// Extension mode: aspectRatio and resolution not supported
|
|
147
|
+
if (isExtension && options.aspectRatio != null) {
|
|
148
|
+
warnings.push({
|
|
149
|
+
type: 'unsupported',
|
|
150
|
+
feature: 'aspectRatio',
|
|
151
|
+
details: 'xAI video extension does not support custom aspect ratio.',
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (
|
|
156
|
+
isExtension &&
|
|
157
|
+
(xaiOptions?.resolution != null || options.resolution != null)
|
|
158
|
+
) {
|
|
159
|
+
warnings.push({
|
|
160
|
+
type: 'unsupported',
|
|
161
|
+
feature: 'resolution',
|
|
162
|
+
details: 'xAI video extension does not support custom resolution.',
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
|
|
120
166
|
const body: Record<string, unknown> = {
|
|
121
167
|
model: this.modelId,
|
|
122
168
|
prompt: options.prompt,
|
|
123
169
|
};
|
|
124
170
|
|
|
125
|
-
|
|
171
|
+
const allowDuration = !isEdit;
|
|
172
|
+
const allowAspectRatio = !isEdit && !isExtension;
|
|
173
|
+
const allowResolution = !isEdit && !isExtension;
|
|
174
|
+
|
|
175
|
+
if (allowDuration && options.duration != null) {
|
|
126
176
|
body.duration = options.duration;
|
|
127
177
|
}
|
|
128
178
|
|
|
129
|
-
if (
|
|
179
|
+
if (allowAspectRatio && options.aspectRatio != null) {
|
|
130
180
|
body.aspect_ratio = options.aspectRatio;
|
|
131
181
|
}
|
|
132
182
|
|
|
133
|
-
if (
|
|
183
|
+
if (allowResolution && xaiOptions?.resolution != null) {
|
|
134
184
|
body.resolution = xaiOptions.resolution;
|
|
135
|
-
} else if (
|
|
185
|
+
} else if (allowResolution && options.resolution != null) {
|
|
136
186
|
const mapped = RESOLUTION_MAP[options.resolution];
|
|
137
187
|
if (mapped != null) {
|
|
138
188
|
body.resolution = mapped;
|
|
@@ -147,12 +197,17 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
147
197
|
}
|
|
148
198
|
}
|
|
149
199
|
|
|
150
|
-
// Video editing: pass source video URL (nested object
|
|
151
|
-
if (
|
|
152
|
-
body.video = { url: xaiOptions
|
|
200
|
+
// Video editing: pass source video URL (nested object)
|
|
201
|
+
if (isEdit) {
|
|
202
|
+
body.video = { url: xaiOptions!.videoUrl };
|
|
153
203
|
}
|
|
154
204
|
|
|
155
|
-
//
|
|
205
|
+
// Video extension: pass source video URL (nested object)
|
|
206
|
+
if (isExtension) {
|
|
207
|
+
body.video = { url: xaiOptions!.videoUrl };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Convert SDK image input to the nested xAI request image object
|
|
156
211
|
if (options.image != null) {
|
|
157
212
|
if (options.image.type === 'url') {
|
|
158
213
|
body.image = { url: options.image.url };
|
|
@@ -167,14 +222,23 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
167
222
|
}
|
|
168
223
|
}
|
|
169
224
|
|
|
225
|
+
// Reference images for R2V (reference-to-video) generation
|
|
226
|
+
if (hasReferenceImages) {
|
|
227
|
+
body.reference_images = xaiOptions!.referenceImageUrls!.map(url => ({
|
|
228
|
+
url,
|
|
229
|
+
}));
|
|
230
|
+
}
|
|
231
|
+
|
|
170
232
|
if (xaiOptions != null) {
|
|
171
233
|
for (const [key, value] of Object.entries(xaiOptions)) {
|
|
172
234
|
if (
|
|
173
235
|
![
|
|
236
|
+
'mode',
|
|
174
237
|
'pollIntervalMs',
|
|
175
238
|
'pollTimeoutMs',
|
|
176
239
|
'resolution',
|
|
177
240
|
'videoUrl',
|
|
241
|
+
'referenceImageUrls',
|
|
178
242
|
].includes(key)
|
|
179
243
|
) {
|
|
180
244
|
body[key] = value;
|
|
@@ -184,9 +248,19 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
184
248
|
|
|
185
249
|
const baseURL = this.config.baseURL ?? 'https://api.x.ai/v1';
|
|
186
250
|
|
|
187
|
-
//
|
|
251
|
+
// Determine endpoint based on mode
|
|
252
|
+
let endpoint: string;
|
|
253
|
+
if (isEdit) {
|
|
254
|
+
endpoint = `${baseURL}/videos/edits`;
|
|
255
|
+
} else if (isExtension) {
|
|
256
|
+
endpoint = `${baseURL}/videos/extensions`;
|
|
257
|
+
} else {
|
|
258
|
+
endpoint = `${baseURL}/videos/generations`;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Step 1: Create video generation/edit/extension request
|
|
188
262
|
const { value: createResponse } = await postJsonToApi({
|
|
189
|
-
url:
|
|
263
|
+
url: endpoint,
|
|
190
264
|
headers: combineHeaders(this.config.headers(), options.headers),
|
|
191
265
|
body,
|
|
192
266
|
failedResponseHandler: xaiFailedResponseHandler,
|
|
@@ -279,6 +353,9 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
279
353
|
...(statusResponse.usage?.cost_in_usd_ticks != null
|
|
280
354
|
? { costInUsdTicks: statusResponse.usage.cost_in_usd_ticks }
|
|
281
355
|
: {}),
|
|
356
|
+
...(statusResponse.progress != null
|
|
357
|
+
? { progress: statusResponse.progress }
|
|
358
|
+
: {}),
|
|
282
359
|
},
|
|
283
360
|
},
|
|
284
361
|
};
|
|
@@ -291,6 +368,13 @@ export class XaiVideoModel implements Experimental_VideoModelV3 {
|
|
|
291
368
|
});
|
|
292
369
|
}
|
|
293
370
|
|
|
371
|
+
if (statusResponse.status === 'failed') {
|
|
372
|
+
throw new AISDKError({
|
|
373
|
+
name: 'XAI_VIDEO_GENERATION_FAILED',
|
|
374
|
+
message: 'Video generation failed.',
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
|
|
294
378
|
// 'pending' → continue polling
|
|
295
379
|
}
|
|
296
380
|
}
|
|
@@ -315,4 +399,11 @@ const xaiVideoStatusResponseSchema = z.object({
|
|
|
315
399
|
cost_in_usd_ticks: z.number().nullish(),
|
|
316
400
|
})
|
|
317
401
|
.nullish(),
|
|
402
|
+
progress: z.number().nullish(),
|
|
403
|
+
error: z
|
|
404
|
+
.object({
|
|
405
|
+
code: z.string().nullish(),
|
|
406
|
+
message: z.string().nullish(),
|
|
407
|
+
})
|
|
408
|
+
.nullish(),
|
|
318
409
|
});
|