@ai-sdk/xai 4.0.0-beta.21 → 4.0.0-beta.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/index.d.mts +82 -8
- package/dist/index.d.ts +82 -8
- package/dist/index.js +264 -47
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +275 -44
- package/dist/index.mjs.map +1 -1
- package/docs/01-xai.mdx +166 -47
- package/package.json +4 -4
- package/src/convert-to-xai-chat-messages.ts +17 -1
- package/src/files/xai-files-api.ts +16 -0
- package/src/files/xai-files-options.ts +15 -0
- package/src/files/xai-files.ts +93 -0
- package/src/index.ts +1 -0
- package/src/responses/convert-to-xai-responses-input.ts +14 -2
- package/src/responses/xai-responses-api.ts +2 -1
- package/src/xai-chat-prompt.ts +2 -1
- package/src/xai-provider.ts +16 -0
- package/src/xai-video-model.ts +104 -13
- package/src/xai-video-options.ts +136 -14
package/docs/01-xai.mdx
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
title: xAI Grok
|
|
3
|
-
description: Learn how to use xAI Grok.
|
|
3
|
+
description: Learn how to use xAI Grok and Imagine.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# xAI Grok Provider
|
|
@@ -73,10 +73,10 @@ You can use the following optional settings to customize the xAI provider instan
|
|
|
73
73
|
## Language Models
|
|
74
74
|
|
|
75
75
|
You can create [xAI models](https://console.x.ai) using a provider instance. The
|
|
76
|
-
first argument is the model id, e.g. `grok-
|
|
76
|
+
first argument is the model id, e.g. `grok-4.20-non-reasoning`.
|
|
77
77
|
|
|
78
78
|
```ts
|
|
79
|
-
const model = xai('grok-
|
|
79
|
+
const model = xai('grok-4.20-non-reasoning');
|
|
80
80
|
```
|
|
81
81
|
|
|
82
82
|
By default, `xai(modelId)` uses the Responses API. To use the [Chat Completions API](https://docs.x.ai/docs/api-reference#chat-completions) (legacy), use `xai.chat(modelId)`.
|
|
@@ -90,7 +90,7 @@ import { xai } from '@ai-sdk/xai';
|
|
|
90
90
|
import { generateText } from 'ai';
|
|
91
91
|
|
|
92
92
|
const { text } = await generateText({
|
|
93
|
-
model: xai('grok-
|
|
93
|
+
model: xai('grok-4.20-non-reasoning'),
|
|
94
94
|
prompt: 'Write a vegetarian lasagna recipe for 4 people.',
|
|
95
95
|
});
|
|
96
96
|
```
|
|
@@ -104,7 +104,7 @@ and support structured data generation with [`Output`](/docs/reference/ai-sdk-co
|
|
|
104
104
|
The xAI Responses API is the default when using `xai(modelId)`. You can also use `xai.responses(modelId)` explicitly. This enables the model to autonomously orchestrate tool calls and research on xAI's servers.
|
|
105
105
|
|
|
106
106
|
```ts
|
|
107
|
-
const model = xai.responses('grok-4-
|
|
107
|
+
const model = xai.responses('grok-4.20-non-reasoning');
|
|
108
108
|
```
|
|
109
109
|
|
|
110
110
|
The Responses API provides server-side tools that the model can autonomously execute during its reasoning process:
|
|
@@ -148,7 +148,7 @@ import { xai } from '@ai-sdk/xai';
|
|
|
148
148
|
import { generateText } from 'ai';
|
|
149
149
|
|
|
150
150
|
const { text, sources } = await generateText({
|
|
151
|
-
model: xai.responses('grok-4-
|
|
151
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
152
152
|
prompt: 'What are the latest developments in AI?',
|
|
153
153
|
tools: {
|
|
154
154
|
web_search: xai.tools.webSearch({
|
|
@@ -182,7 +182,7 @@ The X search tool enables searching X (Twitter) for posts, with filtering by han
|
|
|
182
182
|
|
|
183
183
|
```ts
|
|
184
184
|
const { text, sources } = await generateText({
|
|
185
|
-
model: xai.responses('grok-4-
|
|
185
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
186
186
|
prompt: 'What are people saying about AI on X this week?',
|
|
187
187
|
tools: {
|
|
188
188
|
x_search: xai.tools.xSearch({
|
|
@@ -228,7 +228,7 @@ The code execution tool enables the model to write and execute Python code for c
|
|
|
228
228
|
|
|
229
229
|
```ts
|
|
230
230
|
const { text } = await generateText({
|
|
231
|
-
model: xai.responses('grok-4-
|
|
231
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
232
232
|
prompt:
|
|
233
233
|
'Calculate the compound interest for $10,000 at 5% annually for 10 years',
|
|
234
234
|
tools: {
|
|
@@ -243,7 +243,7 @@ The view image tool enables the model to view and analyze images:
|
|
|
243
243
|
|
|
244
244
|
```ts
|
|
245
245
|
const { text } = await generateText({
|
|
246
|
-
model: xai.responses('grok-4-
|
|
246
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
247
247
|
prompt: 'Describe what you see in the image',
|
|
248
248
|
tools: {
|
|
249
249
|
view_image: xai.tools.viewImage(),
|
|
@@ -257,7 +257,7 @@ The view X video tool enables the model to view and analyze videos from X (Twitt
|
|
|
257
257
|
|
|
258
258
|
```ts
|
|
259
259
|
const { text } = await generateText({
|
|
260
|
-
model: xai.responses('grok-4-
|
|
260
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
261
261
|
prompt: 'Summarize the content of this X video',
|
|
262
262
|
tools: {
|
|
263
263
|
view_x_video: xai.tools.viewXVideo(),
|
|
@@ -271,7 +271,7 @@ The MCP server tool enables the model to connect to remote [Model Context Protoc
|
|
|
271
271
|
|
|
272
272
|
```ts
|
|
273
273
|
const { text } = await generateText({
|
|
274
|
-
model: xai.responses('grok-4-
|
|
274
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
275
275
|
prompt: 'Use the weather tool to check conditions in San Francisco',
|
|
276
276
|
tools: {
|
|
277
277
|
weather_server: xai.tools.mcpServer({
|
|
@@ -319,7 +319,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
|
|
|
319
319
|
import { streamText } from 'ai';
|
|
320
320
|
|
|
321
321
|
const result = streamText({
|
|
322
|
-
model: xai.responses('grok-4-
|
|
322
|
+
model: xai.responses('grok-4.20-reasoning'),
|
|
323
323
|
prompt: 'What documents do you have access to?',
|
|
324
324
|
tools: {
|
|
325
325
|
file_search: xai.tools.fileSearch({
|
|
@@ -352,7 +352,7 @@ const result = streamText({
|
|
|
352
352
|
Include file search results in the response. When set to `['file_search_call.results']`, the response will contain the actual search results with file content and scores.
|
|
353
353
|
|
|
354
354
|
<Note>
|
|
355
|
-
File search requires grok-4 family models and the Responses API. Vector stores
|
|
355
|
+
File search requires grok-4 family models (including grok-4.20) and the Responses API. Vector stores
|
|
356
356
|
can be created using the [xAI
|
|
357
357
|
API](https://docs.x.ai/docs/guides/using-collections/api).
|
|
358
358
|
</Note>
|
|
@@ -366,7 +366,7 @@ import { xai } from '@ai-sdk/xai';
|
|
|
366
366
|
import { streamText } from 'ai';
|
|
367
367
|
|
|
368
368
|
const { fullStream } = streamText({
|
|
369
|
-
model: xai.responses('grok-4-
|
|
369
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
370
370
|
prompt: 'Research AI safety developments and calculate risk metrics',
|
|
371
371
|
tools: {
|
|
372
372
|
web_search: xai.tools.webSearch(),
|
|
@@ -400,7 +400,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
|
|
|
400
400
|
import { generateText } from 'ai';
|
|
401
401
|
|
|
402
402
|
const result = await generateText({
|
|
403
|
-
model: xai.responses('grok-4-
|
|
403
|
+
model: xai.responses('grok-4.20-non-reasoning'),
|
|
404
404
|
providerOptions: {
|
|
405
405
|
xai: {
|
|
406
406
|
reasoningEffort: 'high',
|
|
@@ -445,19 +445,16 @@ The following provider options are available:
|
|
|
445
445
|
|
|
446
446
|
| Model | Image Input | Object Generation | Tool Usage | Tool Streaming | Reasoning |
|
|
447
447
|
| ----------------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
|
|
448
|
-
| `grok-4-
|
|
448
|
+
| `grok-4.20-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
449
|
+
| `grok-4.20-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
449
450
|
| `grok-4-1-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
450
451
|
| `grok-4-1-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
451
|
-
| `grok-4-
|
|
452
|
+
| `grok-4-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
452
453
|
| `grok-4-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
454
|
+
| `grok-4-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
453
455
|
| `grok-code-fast-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
454
|
-
| `grok-4` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
455
|
-
| `grok-4-0709` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
456
|
-
| `grok-4-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
457
456
|
| `grok-3` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
458
|
-
| `grok-3-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
|
|
459
457
|
| `grok-3-mini` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
460
|
-
| `grok-3-mini-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
461
458
|
|
|
462
459
|
<Note>
|
|
463
460
|
The table above lists popular models. Please see the [xAI
|
|
@@ -516,7 +513,7 @@ const { images } = await generateImage({
|
|
|
516
513
|
|
|
517
514
|
#### Multi-Image Editing
|
|
518
515
|
|
|
519
|
-
Combine or reference multiple input images
|
|
516
|
+
Combine or reference multiple input images in the prompt:
|
|
520
517
|
|
|
521
518
|
```ts
|
|
522
519
|
import { xai } from '@ai-sdk/xai';
|
|
@@ -554,37 +551,53 @@ const { images } = await generateImage({
|
|
|
554
551
|
|
|
555
552
|
<Note>
|
|
556
553
|
Input images can be provided as `Buffer`, `ArrayBuffer`, `Uint8Array`, or
|
|
557
|
-
base64-encoded strings.
|
|
554
|
+
base64-encoded strings.
|
|
558
555
|
</Note>
|
|
559
556
|
|
|
560
|
-
###
|
|
557
|
+
### Image Provider Options
|
|
561
558
|
|
|
562
|
-
You can customize the image generation behavior with
|
|
559
|
+
You can customize the image generation behavior with provider-specific settings via `providerOptions.xai`:
|
|
563
560
|
|
|
564
561
|
```ts
|
|
565
|
-
import { xai } from '@ai-sdk/xai';
|
|
562
|
+
import { xai, type XaiImageModelOptions } from '@ai-sdk/xai';
|
|
566
563
|
import { generateImage } from 'ai';
|
|
567
564
|
|
|
568
565
|
const { images } = await generateImage({
|
|
569
|
-
model: xai.image('grok-imagine-image'),
|
|
566
|
+
model: xai.image('grok-imagine-image-pro'),
|
|
570
567
|
prompt: 'A futuristic cityscape at sunset',
|
|
571
568
|
aspectRatio: '16:9',
|
|
572
|
-
|
|
569
|
+
providerOptions: {
|
|
570
|
+
xai: {
|
|
571
|
+
resolution: '2k',
|
|
572
|
+
quality: 'high',
|
|
573
|
+
} satisfies XaiImageModelOptions,
|
|
574
|
+
},
|
|
573
575
|
});
|
|
574
576
|
```
|
|
575
577
|
|
|
576
|
-
|
|
578
|
+
- **resolution** _'1k' | '2k'_
|
|
577
579
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
580
|
+
Output resolution. `1k` produces ~1024×1024 images, `2k` produces ~2048×2048
|
|
581
|
+
images (actual dimensions vary based on aspect ratio). Available for
|
|
582
|
+
`grok-imagine-image-pro`.
|
|
583
|
+
|
|
584
|
+
- **quality** _'low' | 'medium' | 'high'_
|
|
585
|
+
|
|
586
|
+
Image quality level. Higher quality may increase generation time.
|
|
587
|
+
|
|
588
|
+
### Image Model Capabilities
|
|
589
|
+
|
|
590
|
+
| Model | Resolution | Aspect Ratios | Image Editing |
|
|
591
|
+
| ------------------------ | ------------ | ----------------------------------------------------------------------------------------------------------- | ------------------- |
|
|
592
|
+
| `grok-imagine-image-pro` | `1k`, `2k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
|
|
593
|
+
| `grok-imagine-image` | `1k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
|
|
581
594
|
|
|
582
595
|
## Video Models
|
|
583
596
|
|
|
584
597
|
You can create xAI video models using the `.video()` factory method.
|
|
585
598
|
For more on video generation with the AI SDK see [generateVideo()](/docs/reference/ai-sdk-core/generate-video).
|
|
586
599
|
|
|
587
|
-
This provider supports
|
|
600
|
+
This provider supports standard video generation from text prompts or image input, plus explicit video editing, video extension, and reference-to-video (R2V) operations.
|
|
588
601
|
|
|
589
602
|
### Text-to-Video
|
|
590
603
|
|
|
@@ -594,7 +607,7 @@ Generate videos from text prompts:
|
|
|
594
607
|
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
595
608
|
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
596
609
|
|
|
597
|
-
const {
|
|
610
|
+
const { video } = await generateVideo({
|
|
598
611
|
model: xai.video('grok-imagine-video'),
|
|
599
612
|
prompt: 'A chicken flying into the sunset in the style of 90s anime.',
|
|
600
613
|
aspectRatio: '16:9',
|
|
@@ -607,15 +620,15 @@ const { videos } = await generateVideo({
|
|
|
607
620
|
});
|
|
608
621
|
```
|
|
609
622
|
|
|
610
|
-
### Image
|
|
623
|
+
### Generation with Image Input
|
|
611
624
|
|
|
612
|
-
Generate videos using an image as the starting frame with an optional text prompt:
|
|
625
|
+
Generate videos using an image as the starting frame with an optional text prompt. This uses the standard generation path rather than a separate provider mode:
|
|
613
626
|
|
|
614
627
|
```ts
|
|
615
628
|
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
616
629
|
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
617
630
|
|
|
618
|
-
const {
|
|
631
|
+
const { video } = await generateVideo({
|
|
619
632
|
model: xai.video('grok-imagine-video'),
|
|
620
633
|
prompt: {
|
|
621
634
|
image: 'https://example.com/start-frame.png',
|
|
@@ -638,11 +651,12 @@ Edit an existing video using a text prompt by providing a source video URL via p
|
|
|
638
651
|
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
639
652
|
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
640
653
|
|
|
641
|
-
const {
|
|
654
|
+
const { video } = await generateVideo({
|
|
642
655
|
model: xai.video('grok-imagine-video'),
|
|
643
656
|
prompt: 'Give the person sunglasses and a hat',
|
|
644
657
|
providerOptions: {
|
|
645
658
|
xai: {
|
|
659
|
+
mode: 'edit-video',
|
|
646
660
|
videoUrl: 'https://example.com/source-video.mp4',
|
|
647
661
|
pollTimeoutMs: 600000, // 10 minutes
|
|
648
662
|
} satisfies XaiVideoModelOptions,
|
|
@@ -668,6 +682,7 @@ import { experimental_generateVideo as generateVideo } from 'ai';
|
|
|
668
682
|
|
|
669
683
|
const providerOptions = {
|
|
670
684
|
xai: {
|
|
685
|
+
mode: 'edit-video',
|
|
671
686
|
videoUrl: 'https://example.com/source-video.mp4',
|
|
672
687
|
pollTimeoutMs: 600000,
|
|
673
688
|
} satisfies XaiVideoModelOptions,
|
|
@@ -689,19 +704,99 @@ const [withSunglasses, withScarf] = await Promise.all([
|
|
|
689
704
|
model: xai.video('grok-imagine-video'),
|
|
690
705
|
prompt: 'Add sunglasses',
|
|
691
706
|
providerOptions: {
|
|
692
|
-
xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
707
|
+
xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
693
708
|
},
|
|
694
709
|
}),
|
|
695
710
|
generateVideo({
|
|
696
711
|
model: xai.video('grok-imagine-video'),
|
|
697
712
|
prompt: 'Add a scarf',
|
|
698
713
|
providerOptions: {
|
|
699
|
-
xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
714
|
+
xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
|
|
700
715
|
},
|
|
701
716
|
}),
|
|
702
717
|
]);
|
|
703
718
|
```
|
|
704
719
|
|
|
720
|
+
### Video Extension
|
|
721
|
+
|
|
722
|
+
Extend an existing video from its last frame. The `duration` controls the length of the extension only, not the total output. The output inherits `aspectRatio` and `resolution` from the source video.
|
|
723
|
+
|
|
724
|
+
```ts
|
|
725
|
+
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
726
|
+
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
727
|
+
|
|
728
|
+
// Step 1: Generate a source video
|
|
729
|
+
const source = await generateVideo({
|
|
730
|
+
model: xai.video('grok-imagine-video'),
|
|
731
|
+
prompt: 'A cat sitting on a sunlit windowsill, tail gently swishing.',
|
|
732
|
+
duration: 5,
|
|
733
|
+
aspectRatio: '16:9',
|
|
734
|
+
providerOptions: {
|
|
735
|
+
xai: {
|
|
736
|
+
pollTimeoutMs: 600000,
|
|
737
|
+
} satisfies XaiVideoModelOptions,
|
|
738
|
+
},
|
|
739
|
+
});
|
|
740
|
+
|
|
741
|
+
const sourceUrl = source.providerMetadata?.xai?.videoUrl as string;
|
|
742
|
+
|
|
743
|
+
// Step 2: Extend the video with a new scene
|
|
744
|
+
const extended = await generateVideo({
|
|
745
|
+
model: xai.video('grok-imagine-video'),
|
|
746
|
+
prompt: 'The cat turns its head, notices a butterfly, and leaps off.',
|
|
747
|
+
duration: 6,
|
|
748
|
+
providerOptions: {
|
|
749
|
+
xai: {
|
|
750
|
+
mode: 'extend-video',
|
|
751
|
+
videoUrl: sourceUrl,
|
|
752
|
+
pollTimeoutMs: 600000,
|
|
753
|
+
} satisfies XaiVideoModelOptions,
|
|
754
|
+
},
|
|
755
|
+
});
|
|
756
|
+
```
|
|
757
|
+
|
|
758
|
+
<Note>
|
|
759
|
+
Video extension does not support custom `aspectRatio` or `resolution` — the
|
|
760
|
+
output inherits those from the source video. `duration` is supported and
|
|
761
|
+
controls how long the extension is (not the total video length).
|
|
762
|
+
</Note>
|
|
763
|
+
|
|
764
|
+
### Reference-to-Video (R2V)
|
|
765
|
+
|
|
766
|
+
Provide reference images to guide the video's style and content. Unlike image-to-video, reference images are not used as the first frame — the model incorporates their visual elements into the generated video. Each reference image can be a public HTTPS URL or a base64 data URI.
|
|
767
|
+
|
|
768
|
+
```ts
|
|
769
|
+
import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
|
|
770
|
+
import { experimental_generateVideo as generateVideo } from 'ai';
|
|
771
|
+
|
|
772
|
+
const { video } = await generateVideo({
|
|
773
|
+
model: xai.video('grok-imagine-video'),
|
|
774
|
+
prompt:
|
|
775
|
+
'The comic cat from <IMAGE_1> and the comic dog from <IMAGE_2> ' +
|
|
776
|
+
'are having a playful chase through a sunlit park. ' +
|
|
777
|
+
'Cinematic slow-motion, warm afternoon light.',
|
|
778
|
+
duration: 8,
|
|
779
|
+
aspectRatio: '16:9',
|
|
780
|
+
providerOptions: {
|
|
781
|
+
xai: {
|
|
782
|
+
mode: 'reference-to-video',
|
|
783
|
+
referenceImageUrls: [
|
|
784
|
+
'https://example.com/comic-cat.png',
|
|
785
|
+
'https://example.com/comic-dog.png',
|
|
786
|
+
],
|
|
787
|
+
pollTimeoutMs: 600000,
|
|
788
|
+
} satisfies XaiVideoModelOptions,
|
|
789
|
+
},
|
|
790
|
+
});
|
|
791
|
+
```
|
|
792
|
+
|
|
793
|
+
Use `<IMAGE_1>`, `<IMAGE_2>`, etc. in your prompt to reference specific images. Up to 7 reference images are supported per request.
|
|
794
|
+
|
|
795
|
+
<Note>
|
|
796
|
+
Reference-to-video supports `duration`, `aspectRatio`, and `resolution`. Use
|
|
797
|
+
`mode` to select the operation — each mode is mutually exclusive.
|
|
798
|
+
</Note>
|
|
799
|
+
|
|
705
800
|
### Video Provider Options
|
|
706
801
|
|
|
707
802
|
The following provider options are available via `providerOptions.xai`.
|
|
@@ -721,10 +816,27 @@ You can validate the provider options using the `XaiVideoModelOptions` type.
|
|
|
721
816
|
`1280x720` maps to `720p` and `854x480` maps to `480p`.
|
|
722
817
|
Use this provider option to pass the native format directly.
|
|
723
818
|
|
|
819
|
+
- **mode** _'edit-video' | 'extend-video' | 'reference-to-video'_
|
|
820
|
+
|
|
821
|
+
Selects the explicit video operation. Each mode is mutually exclusive:
|
|
822
|
+
- `'edit-video'` — edit an existing video (requires `videoUrl`)
|
|
823
|
+
- `'extend-video'` — extend a video from its last frame (requires `videoUrl`)
|
|
824
|
+
- `'reference-to-video'` — generate from reference images (requires `referenceImageUrls`)
|
|
825
|
+
|
|
826
|
+
When omitted, standard generation is used. Legacy inputs are still auto-detected from fields for backward compatibility.
|
|
827
|
+
|
|
724
828
|
- **videoUrl** _string_
|
|
725
829
|
|
|
726
|
-
URL of a source video
|
|
727
|
-
|
|
830
|
+
URL of a source video. Used with `mode: 'edit-video'` for video editing
|
|
831
|
+
and `mode: 'extend-video'` for video extension.
|
|
832
|
+
|
|
833
|
+
- **referenceImageUrls** _string[]_
|
|
834
|
+
|
|
835
|
+
Array of reference image URLs (1–7 images) or base64 data URIs for
|
|
836
|
+
reference-to-video (R2V) generation. The model incorporates visual
|
|
837
|
+
elements from these images without using them as the first frame. Use
|
|
838
|
+
`<IMAGE_1>`, `<IMAGE_2>`, etc. in the prompt to reference specific
|
|
839
|
+
images. Used with `mode: 'reference-to-video'`.
|
|
728
840
|
|
|
729
841
|
<Note>
|
|
730
842
|
Video generation is an asynchronous process that can take several minutes.
|
|
@@ -744,14 +856,21 @@ desired ratio.
|
|
|
744
856
|
|
|
745
857
|
For **video editing**, the output matches the input video's aspect ratio and
|
|
746
858
|
resolution. Custom `duration`, `aspectRatio`, and `resolution` are not
|
|
747
|
-
supported
|
|
859
|
+
supported — the output resolution is capped at 720p (e.g., a 1080p input
|
|
748
860
|
will be downsized to 720p).
|
|
749
861
|
|
|
862
|
+
For **video extension**, the output inherits `aspectRatio` and `resolution`
|
|
863
|
+
from the source video. `duration` is supported and controls only the
|
|
864
|
+
extension length.
|
|
865
|
+
|
|
866
|
+
For **reference-to-video (R2V)**, you can specify `duration`, `aspectRatio`,
|
|
867
|
+
and `resolution` just like text-to-video.
|
|
868
|
+
|
|
750
869
|
### Video Model Capabilities
|
|
751
870
|
|
|
752
|
-
| Model | Duration | Aspect Ratios | Resolution | Image-to-Video |
|
|
753
|
-
| -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- |
|
|
754
|
-
| `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> |
|
|
871
|
+
| Model | Duration | Aspect Ratios | Resolution | Image-to-Video | Editing | Extension | R2V |
|
|
872
|
+
| -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- | ------------------- | ------------------- |
|
|
873
|
+
| `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
755
874
|
|
|
756
875
|
<Note>
|
|
757
876
|
You can also pass any available provider model ID as a string if needed.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/xai",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.23",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -29,9 +29,9 @@
|
|
|
29
29
|
}
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@ai-sdk/openai-compatible": "3.0.0-beta.
|
|
33
|
-
"@ai-sdk/provider": "4.0.0-beta.
|
|
34
|
-
"@ai-sdk/provider-utils": "5.0.0-beta.
|
|
32
|
+
"@ai-sdk/openai-compatible": "3.0.0-beta.14",
|
|
33
|
+
"@ai-sdk/provider": "4.0.0-beta.6",
|
|
34
|
+
"@ai-sdk/provider-utils": "5.0.0-beta.10"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/node": "20.17.24",
|
|
@@ -3,7 +3,11 @@ import {
|
|
|
3
3
|
LanguageModelV4Prompt,
|
|
4
4
|
UnsupportedFunctionalityError,
|
|
5
5
|
} from '@ai-sdk/provider';
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
convertToBase64,
|
|
8
|
+
isProviderReference,
|
|
9
|
+
resolveProviderReference,
|
|
10
|
+
} from '@ai-sdk/provider-utils';
|
|
7
11
|
import { XaiChatPrompt } from './xai-chat-prompt';
|
|
8
12
|
|
|
9
13
|
export function convertToXaiChatMessages(prompt: LanguageModelV4Prompt): {
|
|
@@ -34,6 +38,18 @@ export function convertToXaiChatMessages(prompt: LanguageModelV4Prompt): {
|
|
|
34
38
|
return { type: 'text', text: part.text };
|
|
35
39
|
}
|
|
36
40
|
case 'file': {
|
|
41
|
+
if (isProviderReference(part.data)) {
|
|
42
|
+
return {
|
|
43
|
+
type: 'file',
|
|
44
|
+
file: {
|
|
45
|
+
file_id: resolveProviderReference({
|
|
46
|
+
reference: part.data,
|
|
47
|
+
provider: 'xai',
|
|
48
|
+
}),
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
37
53
|
if (part.mediaType.startsWith('image/')) {
|
|
38
54
|
const mediaType =
|
|
39
55
|
part.mediaType === 'image/*'
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { lazySchema, zodSchema } from '@ai-sdk/provider-utils';
|
|
2
|
+
import { z } from 'zod/v4';
|
|
3
|
+
|
|
4
|
+
export const xaiFilesResponseSchema = lazySchema(() =>
|
|
5
|
+
zodSchema(
|
|
6
|
+
z.object({
|
|
7
|
+
id: z.string(),
|
|
8
|
+
object: z.string().nullish(),
|
|
9
|
+
bytes: z.number().nullish(),
|
|
10
|
+
created_at: z.number().nullish(),
|
|
11
|
+
filename: z.string().nullish(),
|
|
12
|
+
purpose: z.string().nullish(),
|
|
13
|
+
status: z.string().nullish(),
|
|
14
|
+
}),
|
|
15
|
+
),
|
|
16
|
+
);
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { InferSchema, lazySchema, zodSchema } from '@ai-sdk/provider-utils';
|
|
2
|
+
import { z } from 'zod/v4';
|
|
3
|
+
|
|
4
|
+
export const xaiFilesOptionsSchema = lazySchema(() =>
|
|
5
|
+
zodSchema(
|
|
6
|
+
z
|
|
7
|
+
.object({
|
|
8
|
+
teamId: z.string().optional(),
|
|
9
|
+
filePath: z.string().optional(),
|
|
10
|
+
})
|
|
11
|
+
.passthrough(),
|
|
12
|
+
),
|
|
13
|
+
);
|
|
14
|
+
|
|
15
|
+
export type XaiFilesOptions = InferSchema<typeof xaiFilesOptionsSchema>;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import {
|
|
2
|
+
FilesV4,
|
|
3
|
+
FilesV4UploadFileCallOptions,
|
|
4
|
+
FilesV4UploadFileResult,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
6
|
+
import {
|
|
7
|
+
combineHeaders,
|
|
8
|
+
convertBase64ToUint8Array,
|
|
9
|
+
createJsonResponseHandler,
|
|
10
|
+
FetchFunction,
|
|
11
|
+
parseProviderOptions,
|
|
12
|
+
postFormDataToApi,
|
|
13
|
+
} from '@ai-sdk/provider-utils';
|
|
14
|
+
import { xaiFailedResponseHandler } from '../xai-error';
|
|
15
|
+
import { xaiFilesResponseSchema } from './xai-files-api';
|
|
16
|
+
import { xaiFilesOptionsSchema, XaiFilesOptions } from './xai-files-options';
|
|
17
|
+
|
|
18
|
+
interface XaiFilesConfig {
|
|
19
|
+
provider: string;
|
|
20
|
+
baseURL: string | undefined;
|
|
21
|
+
headers: () => Record<string, string | undefined>;
|
|
22
|
+
fetch?: FetchFunction;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export class XaiFiles implements FilesV4 {
|
|
26
|
+
readonly specificationVersion = 'v4';
|
|
27
|
+
|
|
28
|
+
get provider(): string {
|
|
29
|
+
return this.config.provider;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
constructor(private readonly config: XaiFilesConfig) {}
|
|
33
|
+
|
|
34
|
+
async uploadFile({
|
|
35
|
+
data,
|
|
36
|
+
mediaType,
|
|
37
|
+
filename,
|
|
38
|
+
providerOptions,
|
|
39
|
+
}: FilesV4UploadFileCallOptions): Promise<FilesV4UploadFileResult> {
|
|
40
|
+
const xaiOptions = (await parseProviderOptions({
|
|
41
|
+
provider: 'xai',
|
|
42
|
+
providerOptions,
|
|
43
|
+
schema: xaiFilesOptionsSchema,
|
|
44
|
+
})) as XaiFilesOptions | undefined;
|
|
45
|
+
|
|
46
|
+
const fileBytes =
|
|
47
|
+
data instanceof Uint8Array ? data : convertBase64ToUint8Array(data);
|
|
48
|
+
|
|
49
|
+
const blob = new Blob([fileBytes], {
|
|
50
|
+
type: mediaType,
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const formData = new FormData();
|
|
54
|
+
if (filename != null) {
|
|
55
|
+
formData.append('file', blob, filename);
|
|
56
|
+
} else {
|
|
57
|
+
formData.append('file', blob);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (xaiOptions?.teamId != null) {
|
|
61
|
+
formData.append('team_id', xaiOptions.teamId);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const { value: response } = await postFormDataToApi({
|
|
65
|
+
url: `${this.config.baseURL}/files`,
|
|
66
|
+
headers: combineHeaders(this.config.headers()),
|
|
67
|
+
formData,
|
|
68
|
+
failedResponseHandler: xaiFailedResponseHandler,
|
|
69
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
70
|
+
xaiFilesResponseSchema,
|
|
71
|
+
),
|
|
72
|
+
fetch: this.config.fetch,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
warnings: [],
|
|
77
|
+
providerReference: { xai: response.id },
|
|
78
|
+
...((response.filename ?? filename)
|
|
79
|
+
? { filename: response.filename ?? filename }
|
|
80
|
+
: {}),
|
|
81
|
+
...(mediaType != null ? { mediaType } : {}),
|
|
82
|
+
providerMetadata: {
|
|
83
|
+
xai: {
|
|
84
|
+
...(response.filename != null ? { filename: response.filename } : {}),
|
|
85
|
+
...(response.bytes != null ? { bytes: response.bytes } : {}),
|
|
86
|
+
...(response.created_at != null
|
|
87
|
+
? { createdAt: response.created_at }
|
|
88
|
+
: {}),
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -20,6 +20,7 @@ export type {
|
|
|
20
20
|
/** @deprecated Use `XaiVideoModelOptions` instead. */
|
|
21
21
|
XaiVideoModelOptions as XaiVideoProviderOptions,
|
|
22
22
|
} from './xai-video-options';
|
|
23
|
+
export type { XaiFilesOptions } from './files/xai-files-options';
|
|
23
24
|
export { createXai, xai } from './xai-provider';
|
|
24
25
|
export type { XaiProvider, XaiProviderSettings } from './xai-provider';
|
|
25
26
|
export {
|
|
@@ -3,7 +3,11 @@ import {
|
|
|
3
3
|
LanguageModelV4Message,
|
|
4
4
|
UnsupportedFunctionalityError,
|
|
5
5
|
} from '@ai-sdk/provider';
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
convertToBase64,
|
|
8
|
+
isProviderReference,
|
|
9
|
+
resolveProviderReference,
|
|
10
|
+
} from '@ai-sdk/provider-utils';
|
|
7
11
|
import {
|
|
8
12
|
XaiResponsesInput,
|
|
9
13
|
XaiResponsesUserMessageContentPart,
|
|
@@ -42,7 +46,15 @@ export async function convertToXaiResponsesInput({
|
|
|
42
46
|
}
|
|
43
47
|
|
|
44
48
|
case 'file': {
|
|
45
|
-
if (block.
|
|
49
|
+
if (isProviderReference(block.data)) {
|
|
50
|
+
contentParts.push({
|
|
51
|
+
type: 'input_file',
|
|
52
|
+
file_id: resolveProviderReference({
|
|
53
|
+
reference: block.data,
|
|
54
|
+
provider: 'xai',
|
|
55
|
+
}),
|
|
56
|
+
});
|
|
57
|
+
} else if (block.mediaType.startsWith('image/')) {
|
|
46
58
|
const mediaType =
|
|
47
59
|
block.mediaType === 'image/*'
|
|
48
60
|
? 'image/jpeg'
|
|
@@ -26,7 +26,8 @@ export type XaiResponsesSystemMessage = {
|
|
|
26
26
|
|
|
27
27
|
export type XaiResponsesUserMessageContentPart =
|
|
28
28
|
| { type: 'input_text'; text: string }
|
|
29
|
-
| { type: 'input_image'; image_url: string }
|
|
29
|
+
| { type: 'input_image'; image_url: string }
|
|
30
|
+
| { type: 'input_file'; file_id: string };
|
|
30
31
|
|
|
31
32
|
export type XaiResponsesUserMessage = {
|
|
32
33
|
role: 'user';
|
package/src/xai-chat-prompt.ts
CHANGED
|
@@ -18,7 +18,8 @@ export interface XaiUserMessage {
|
|
|
18
18
|
|
|
19
19
|
export type XaiUserMessageContent =
|
|
20
20
|
| { type: 'text'; text: string }
|
|
21
|
-
| { type: 'image_url'; image_url: { url: string } }
|
|
21
|
+
| { type: 'image_url'; image_url: { url: string } }
|
|
22
|
+
| { type: 'file'; file: { file_id: string } };
|
|
22
23
|
|
|
23
24
|
export interface XaiAssistantMessage {
|
|
24
25
|
role: 'assistant';
|