@ai-sdk/xai 4.0.0-beta.21 → 4.0.0-beta.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/01-xai.mdx CHANGED
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  title: xAI Grok
3
- description: Learn how to use xAI Grok.
3
+ description: Learn how to use xAI Grok and Imagine.
4
4
  ---
5
5
 
6
6
  # xAI Grok Provider
@@ -73,10 +73,10 @@ You can use the following optional settings to customize the xAI provider instan
73
73
  ## Language Models
74
74
 
75
75
  You can create [xAI models](https://console.x.ai) using a provider instance. The
76
- first argument is the model id, e.g. `grok-3`.
76
+ first argument is the model id, e.g. `grok-4.20-non-reasoning`.
77
77
 
78
78
  ```ts
79
- const model = xai('grok-3');
79
+ const model = xai('grok-4.20-non-reasoning');
80
80
  ```
81
81
 
82
82
  By default, `xai(modelId)` uses the Responses API. To use the [Chat Completions API](https://docs.x.ai/docs/api-reference#chat-completions) (legacy), use `xai.chat(modelId)`.
@@ -90,7 +90,7 @@ import { xai } from '@ai-sdk/xai';
90
90
  import { generateText } from 'ai';
91
91
 
92
92
  const { text } = await generateText({
93
- model: xai('grok-3'),
93
+ model: xai('grok-4.20-non-reasoning'),
94
94
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
95
95
  });
96
96
  ```
@@ -104,7 +104,7 @@ and support structured data generation with [`Output`](/docs/reference/ai-sdk-co
104
104
  The xAI Responses API is the default when using `xai(modelId)`. You can also use `xai.responses(modelId)` explicitly. This enables the model to autonomously orchestrate tool calls and research on xAI's servers.
105
105
 
106
106
  ```ts
107
- const model = xai.responses('grok-4-fast-non-reasoning');
107
+ const model = xai.responses('grok-4.20-non-reasoning');
108
108
  ```
109
109
 
110
110
  The Responses API provides server-side tools that the model can autonomously execute during its reasoning process:
@@ -148,7 +148,7 @@ import { xai } from '@ai-sdk/xai';
148
148
  import { generateText } from 'ai';
149
149
 
150
150
  const { text, sources } = await generateText({
151
- model: xai.responses('grok-4-fast-non-reasoning'),
151
+ model: xai.responses('grok-4.20-non-reasoning'),
152
152
  prompt: 'What are the latest developments in AI?',
153
153
  tools: {
154
154
  web_search: xai.tools.webSearch({
@@ -182,7 +182,7 @@ The X search tool enables searching X (Twitter) for posts, with filtering by han
182
182
 
183
183
  ```ts
184
184
  const { text, sources } = await generateText({
185
- model: xai.responses('grok-4-fast-non-reasoning'),
185
+ model: xai.responses('grok-4.20-non-reasoning'),
186
186
  prompt: 'What are people saying about AI on X this week?',
187
187
  tools: {
188
188
  x_search: xai.tools.xSearch({
@@ -228,7 +228,7 @@ The code execution tool enables the model to write and execute Python code for c
228
228
 
229
229
  ```ts
230
230
  const { text } = await generateText({
231
- model: xai.responses('grok-4-fast-non-reasoning'),
231
+ model: xai.responses('grok-4.20-non-reasoning'),
232
232
  prompt:
233
233
  'Calculate the compound interest for $10,000 at 5% annually for 10 years',
234
234
  tools: {
@@ -243,7 +243,7 @@ The view image tool enables the model to view and analyze images:
243
243
 
244
244
  ```ts
245
245
  const { text } = await generateText({
246
- model: xai.responses('grok-4-fast-non-reasoning'),
246
+ model: xai.responses('grok-4.20-non-reasoning'),
247
247
  prompt: 'Describe what you see in the image',
248
248
  tools: {
249
249
  view_image: xai.tools.viewImage(),
@@ -257,7 +257,7 @@ The view X video tool enables the model to view and analyze videos from X (Twitt
257
257
 
258
258
  ```ts
259
259
  const { text } = await generateText({
260
- model: xai.responses('grok-4-fast-non-reasoning'),
260
+ model: xai.responses('grok-4.20-non-reasoning'),
261
261
  prompt: 'Summarize the content of this X video',
262
262
  tools: {
263
263
  view_x_video: xai.tools.viewXVideo(),
@@ -271,7 +271,7 @@ The MCP server tool enables the model to connect to remote [Model Context Protoc
271
271
 
272
272
  ```ts
273
273
  const { text } = await generateText({
274
- model: xai.responses('grok-4-fast-non-reasoning'),
274
+ model: xai.responses('grok-4.20-non-reasoning'),
275
275
  prompt: 'Use the weather tool to check conditions in San Francisco',
276
276
  tools: {
277
277
  weather_server: xai.tools.mcpServer({
@@ -319,7 +319,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
319
319
  import { streamText } from 'ai';
320
320
 
321
321
  const result = streamText({
322
- model: xai.responses('grok-4-1-fast-reasoning'),
322
+ model: xai.responses('grok-4.20-reasoning'),
323
323
  prompt: 'What documents do you have access to?',
324
324
  tools: {
325
325
  file_search: xai.tools.fileSearch({
@@ -352,7 +352,7 @@ const result = streamText({
352
352
  Include file search results in the response. When set to `['file_search_call.results']`, the response will contain the actual search results with file content and scores.
353
353
 
354
354
  <Note>
355
- File search requires grok-4 family models and the Responses API. Vector stores
355
+ File search requires grok-4 family models (including grok-4.20) and the Responses API. Vector stores
356
356
  can be created using the [xAI
357
357
  API](https://docs.x.ai/docs/guides/using-collections/api).
358
358
  </Note>
@@ -366,7 +366,7 @@ import { xai } from '@ai-sdk/xai';
366
366
  import { streamText } from 'ai';
367
367
 
368
368
  const { fullStream } = streamText({
369
- model: xai.responses('grok-4-fast-non-reasoning'),
369
+ model: xai.responses('grok-4.20-non-reasoning'),
370
370
  prompt: 'Research AI safety developments and calculate risk metrics',
371
371
  tools: {
372
372
  web_search: xai.tools.webSearch(),
@@ -400,7 +400,7 @@ import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
400
400
  import { generateText } from 'ai';
401
401
 
402
402
  const result = await generateText({
403
- model: xai.responses('grok-4-fast-non-reasoning'),
403
+ model: xai.responses('grok-4.20-non-reasoning'),
404
404
  providerOptions: {
405
405
  xai: {
406
406
  reasoningEffort: 'high',
@@ -445,19 +445,16 @@ The following provider options are available:
445
445
 
446
446
  | Model | Image Input | Object Generation | Tool Usage | Tool Streaming | Reasoning |
447
447
  | ----------------------------- | ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
448
- | `grok-4-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
448
+ | `grok-4.20-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
449
+ | `grok-4.20-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
449
450
  | `grok-4-1-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
450
451
  | `grok-4-1-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
451
- | `grok-4-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
452
+ | `grok-4-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
452
453
  | `grok-4-fast-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
454
+ | `grok-4-fast-non-reasoning` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
453
455
  | `grok-code-fast-1` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
454
- | `grok-4` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
455
- | `grok-4-0709` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
456
- | `grok-4-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
457
456
  | `grok-3` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
458
- | `grok-3-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
459
457
  | `grok-3-mini` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
460
- | `grok-3-mini-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
461
458
 
462
459
  <Note>
463
460
  The table above lists popular models. Please see the [xAI
@@ -516,7 +513,7 @@ const { images } = await generateImage({
516
513
 
517
514
  #### Multi-Image Editing
518
515
 
519
- Combine or reference multiple input images (up to 3) in the prompt:
516
+ Combine or reference multiple input images in the prompt:
520
517
 
521
518
  ```ts
522
519
  import { xai } from '@ai-sdk/xai';
@@ -554,37 +551,53 @@ const { images } = await generateImage({
554
551
 
555
552
  <Note>
556
553
  Input images can be provided as `Buffer`, `ArrayBuffer`, `Uint8Array`, or
557
- base64-encoded strings. Up to 3 input images are supported per request.
554
+ base64-encoded strings.
558
555
  </Note>
559
556
 
560
- ### Model-specific options
557
+ ### Image Provider Options
561
558
 
562
- You can customize the image generation behavior with model-specific settings:
559
+ You can customize the image generation behavior with provider-specific settings via `providerOptions.xai`:
563
560
 
564
561
  ```ts
565
- import { xai } from '@ai-sdk/xai';
562
+ import { xai, type XaiImageModelOptions } from '@ai-sdk/xai';
566
563
  import { generateImage } from 'ai';
567
564
 
568
565
  const { images } = await generateImage({
569
- model: xai.image('grok-imagine-image'),
566
+ model: xai.image('grok-imagine-image-pro'),
570
567
  prompt: 'A futuristic cityscape at sunset',
571
568
  aspectRatio: '16:9',
572
- n: 2,
569
+ providerOptions: {
570
+ xai: {
571
+ resolution: '2k',
572
+ quality: 'high',
573
+ } satisfies XaiImageModelOptions,
574
+ },
573
575
  });
574
576
  ```
575
577
 
576
- ### Model Capabilities
578
+ - **resolution** _'1k' | '2k'_
577
579
 
578
- | Model | Aspect Ratios | Image Editing |
579
- | -------------------- | ----------------------------------------------------------------------------------------------------------- | ------------------- |
580
- | `grok-imagine-image` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
580
+ Output resolution. `1k` produces ~1024×1024 images, `2k` produces ~2048×2048
581
+ images (actual dimensions vary based on aspect ratio). Available for
582
+ `grok-imagine-image-pro`.
583
+
584
+ - **quality** _'low' | 'medium' | 'high'_
585
+
586
+ Image quality level. Higher quality may increase generation time.
587
+
588
+ ### Image Model Capabilities
589
+
590
+ | Model | Resolution | Aspect Ratios | Image Editing |
591
+ | ------------------------ | ------------ | ----------------------------------------------------------------------------------------------------------- | ------------------- |
592
+ | `grok-imagine-image-pro` | `1k`, `2k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
593
+ | `grok-imagine-image` | `1k` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto` | <Check size={18} /> |
581
594
 
582
595
  ## Video Models
583
596
 
584
597
  You can create xAI video models using the `.video()` factory method.
585
598
  For more on video generation with the AI SDK see [generateVideo()](/docs/reference/ai-sdk-core/generate-video).
586
599
 
587
- This provider supports three video generation modes: text-to-video, image-to-video, and video editing.
600
+ This provider supports standard video generation from text prompts or image input, plus explicit video editing, video extension, and reference-to-video (R2V) operations.
588
601
 
589
602
  ### Text-to-Video
590
603
 
@@ -594,7 +607,7 @@ Generate videos from text prompts:
594
607
  import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
595
608
  import { experimental_generateVideo as generateVideo } from 'ai';
596
609
 
597
- const { videos } = await generateVideo({
610
+ const { video } = await generateVideo({
598
611
  model: xai.video('grok-imagine-video'),
599
612
  prompt: 'A chicken flying into the sunset in the style of 90s anime.',
600
613
  aspectRatio: '16:9',
@@ -607,15 +620,15 @@ const { videos } = await generateVideo({
607
620
  });
608
621
  ```
609
622
 
610
- ### Image-to-Video
623
+ ### Generation with Image Input
611
624
 
612
- Generate videos using an image as the starting frame with an optional text prompt:
625
+ Generate videos using an image as the starting frame with an optional text prompt. This uses the standard generation path rather than a separate provider mode:
613
626
 
614
627
  ```ts
615
628
  import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
616
629
  import { experimental_generateVideo as generateVideo } from 'ai';
617
630
 
618
- const { videos } = await generateVideo({
631
+ const { video } = await generateVideo({
619
632
  model: xai.video('grok-imagine-video'),
620
633
  prompt: {
621
634
  image: 'https://example.com/start-frame.png',
@@ -638,11 +651,12 @@ Edit an existing video using a text prompt by providing a source video URL via p
638
651
  import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
639
652
  import { experimental_generateVideo as generateVideo } from 'ai';
640
653
 
641
- const { videos } = await generateVideo({
654
+ const { video } = await generateVideo({
642
655
  model: xai.video('grok-imagine-video'),
643
656
  prompt: 'Give the person sunglasses and a hat',
644
657
  providerOptions: {
645
658
  xai: {
659
+ mode: 'edit-video',
646
660
  videoUrl: 'https://example.com/source-video.mp4',
647
661
  pollTimeoutMs: 600000, // 10 minutes
648
662
  } satisfies XaiVideoModelOptions,
@@ -668,6 +682,7 @@ import { experimental_generateVideo as generateVideo } from 'ai';
668
682
 
669
683
  const providerOptions = {
670
684
  xai: {
685
+ mode: 'edit-video',
671
686
  videoUrl: 'https://example.com/source-video.mp4',
672
687
  pollTimeoutMs: 600000,
673
688
  } satisfies XaiVideoModelOptions,
@@ -689,19 +704,99 @@ const [withSunglasses, withScarf] = await Promise.all([
689
704
  model: xai.video('grok-imagine-video'),
690
705
  prompt: 'Add sunglasses',
691
706
  providerOptions: {
692
- xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
707
+ xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
693
708
  },
694
709
  }),
695
710
  generateVideo({
696
711
  model: xai.video('grok-imagine-video'),
697
712
  prompt: 'Add a scarf',
698
713
  providerOptions: {
699
- xai: { videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
714
+ xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
700
715
  },
701
716
  }),
702
717
  ]);
703
718
  ```
704
719
 
720
+ ### Video Extension
721
+
722
+ Extend an existing video from its last frame. The `duration` controls the length of the extension only, not the total output. The output inherits `aspectRatio` and `resolution` from the source video.
723
+
724
+ ```ts
725
+ import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
726
+ import { experimental_generateVideo as generateVideo } from 'ai';
727
+
728
+ // Step 1: Generate a source video
729
+ const source = await generateVideo({
730
+ model: xai.video('grok-imagine-video'),
731
+ prompt: 'A cat sitting on a sunlit windowsill, tail gently swishing.',
732
+ duration: 5,
733
+ aspectRatio: '16:9',
734
+ providerOptions: {
735
+ xai: {
736
+ pollTimeoutMs: 600000,
737
+ } satisfies XaiVideoModelOptions,
738
+ },
739
+ });
740
+
741
+ const sourceUrl = source.providerMetadata?.xai?.videoUrl as string;
742
+
743
+ // Step 2: Extend the video with a new scene
744
+ const extended = await generateVideo({
745
+ model: xai.video('grok-imagine-video'),
746
+ prompt: 'The cat turns its head, notices a butterfly, and leaps off.',
747
+ duration: 6,
748
+ providerOptions: {
749
+ xai: {
750
+ mode: 'extend-video',
751
+ videoUrl: sourceUrl,
752
+ pollTimeoutMs: 600000,
753
+ } satisfies XaiVideoModelOptions,
754
+ },
755
+ });
756
+ ```
757
+
758
+ <Note>
759
+ Video extension does not support custom `aspectRatio` or `resolution` — the
760
+ output inherits those from the source video. `duration` is supported and
761
+ controls how long the extension is (not the total video length).
762
+ </Note>
763
+
764
+ ### Reference-to-Video (R2V)
765
+
766
+ Provide reference images to guide the video's style and content. Unlike image-to-video, reference images are not used as the first frame — the model incorporates their visual elements into the generated video. Each reference image can be a public HTTPS URL or a base64 data URI.
767
+
768
+ ```ts
769
+ import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
770
+ import { experimental_generateVideo as generateVideo } from 'ai';
771
+
772
+ const { video } = await generateVideo({
773
+ model: xai.video('grok-imagine-video'),
774
+ prompt:
775
+ 'The comic cat from <IMAGE_1> and the comic dog from <IMAGE_2> ' +
776
+ 'are having a playful chase through a sunlit park. ' +
777
+ 'Cinematic slow-motion, warm afternoon light.',
778
+ duration: 8,
779
+ aspectRatio: '16:9',
780
+ providerOptions: {
781
+ xai: {
782
+ mode: 'reference-to-video',
783
+ referenceImageUrls: [
784
+ 'https://example.com/comic-cat.png',
785
+ 'https://example.com/comic-dog.png',
786
+ ],
787
+ pollTimeoutMs: 600000,
788
+ } satisfies XaiVideoModelOptions,
789
+ },
790
+ });
791
+ ```
792
+
793
+ Use `<IMAGE_1>`, `<IMAGE_2>`, etc. in your prompt to reference specific images. Up to 7 reference images are supported per request.
794
+
795
+ <Note>
796
+ Reference-to-video supports `duration`, `aspectRatio`, and `resolution`. Use
797
+ `mode` to select the operation — each mode is mutually exclusive.
798
+ </Note>
799
+
705
800
  ### Video Provider Options
706
801
 
707
802
  The following provider options are available via `providerOptions.xai`.
@@ -721,10 +816,27 @@ You can validate the provider options using the `XaiVideoModelOptions` type.
721
816
  `1280x720` maps to `720p` and `854x480` maps to `480p`.
722
817
  Use this provider option to pass the native format directly.
723
818
 
819
+ - **mode** _'edit-video' | 'extend-video' | 'reference-to-video'_
820
+
821
+ Selects the explicit video operation. Each mode is mutually exclusive:
822
+ - `'edit-video'` — edit an existing video (requires `videoUrl`)
823
+ - `'extend-video'` — extend a video from its last frame (requires `videoUrl`)
824
+ - `'reference-to-video'` — generate from reference images (requires `referenceImageUrls`)
825
+
826
+ When omitted, standard generation is used. Legacy inputs are still auto-detected from fields for backward compatibility.
827
+
724
828
  - **videoUrl** _string_
725
829
 
726
- URL of a source video for video editing. When provided, the prompt is used
727
- to describe the desired edits to the video.
830
+ URL of a source video. Used with `mode: 'edit-video'` for video editing
831
+ and `mode: 'extend-video'` for video extension.
832
+
833
+ - **referenceImageUrls** _string[]_
834
+
835
+ Array of reference image URLs (1–7 images) or base64 data URIs for
836
+ reference-to-video (R2V) generation. The model incorporates visual
837
+ elements from these images without using them as the first frame. Use
838
+ `<IMAGE_1>`, `<IMAGE_2>`, etc. in the prompt to reference specific
839
+ images. Used with `mode: 'reference-to-video'`.
728
840
 
729
841
  <Note>
730
842
  Video generation is an asynchronous process that can take several minutes.
@@ -744,14 +856,21 @@ desired ratio.
744
856
 
745
857
  For **video editing**, the output matches the input video's aspect ratio and
746
858
  resolution. Custom `duration`, `aspectRatio`, and `resolution` are not
747
- supported - the output resolution is capped at 720p (e.g., a 1080p input
859
+ supported the output resolution is capped at 720p (e.g., a 1080p input
748
860
  will be downsized to 720p).
749
861
 
862
+ For **video extension**, the output inherits `aspectRatio` and `resolution`
863
+ from the source video. `duration` is supported and controls only the
864
+ extension length.
865
+
866
+ For **reference-to-video (R2V)**, you can specify `duration`, `aspectRatio`,
867
+ and `resolution` just like text-to-video.
868
+
750
869
  ### Video Model Capabilities
751
870
 
752
- | Model | Duration | Aspect Ratios | Resolution | Image-to-Video | Video Editing |
753
- | -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- |
754
- | `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> |
871
+ | Model | Duration | Aspect Ratios | Resolution | Image-to-Video | Editing | Extension | R2V |
872
+ | -------------------- | -------- | ------------------------------------------------- | -------------- | ------------------- | ------------------- | ------------------- | ------------------- |
873
+ | `grok-imagine-video` | 1–15s | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | `480p`, `720p` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
755
874
 
756
875
  <Note>
757
876
  You can also pass any available provider model ID as a string if needed.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/xai",
3
- "version": "4.0.0-beta.21",
3
+ "version": "4.0.0-beta.24",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -29,9 +29,9 @@
29
29
  }
30
30
  },
31
31
  "dependencies": {
32
- "@ai-sdk/openai-compatible": "3.0.0-beta.13",
33
- "@ai-sdk/provider": "4.0.0-beta.5",
34
- "@ai-sdk/provider-utils": "5.0.0-beta.9"
32
+ "@ai-sdk/openai-compatible": "3.0.0-beta.15",
33
+ "@ai-sdk/provider": "4.0.0-beta.7",
34
+ "@ai-sdk/provider-utils": "5.0.0-beta.11"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/node": "20.17.24",
@@ -3,7 +3,11 @@ import {
3
3
  LanguageModelV4Prompt,
4
4
  UnsupportedFunctionalityError,
5
5
  } from '@ai-sdk/provider';
6
- import { convertToBase64 } from '@ai-sdk/provider-utils';
6
+ import {
7
+ convertToBase64,
8
+ isProviderReference,
9
+ resolveProviderReference,
10
+ } from '@ai-sdk/provider-utils';
7
11
  import { XaiChatPrompt } from './xai-chat-prompt';
8
12
 
9
13
  export function convertToXaiChatMessages(prompt: LanguageModelV4Prompt): {
@@ -34,6 +38,18 @@ export function convertToXaiChatMessages(prompt: LanguageModelV4Prompt): {
34
38
  return { type: 'text', text: part.text };
35
39
  }
36
40
  case 'file': {
41
+ if (isProviderReference(part.data)) {
42
+ return {
43
+ type: 'file',
44
+ file: {
45
+ file_id: resolveProviderReference({
46
+ reference: part.data,
47
+ provider: 'xai',
48
+ }),
49
+ },
50
+ };
51
+ }
52
+
37
53
  if (part.mediaType.startsWith('image/')) {
38
54
  const mediaType =
39
55
  part.mediaType === 'image/*'
@@ -0,0 +1,16 @@
1
+ import { lazySchema, zodSchema } from '@ai-sdk/provider-utils';
2
+ import { z } from 'zod/v4';
3
+
4
+ export const xaiFilesResponseSchema = lazySchema(() =>
5
+ zodSchema(
6
+ z.object({
7
+ id: z.string(),
8
+ object: z.string().nullish(),
9
+ bytes: z.number().nullish(),
10
+ created_at: z.number().nullish(),
11
+ filename: z.string().nullish(),
12
+ purpose: z.string().nullish(),
13
+ status: z.string().nullish(),
14
+ }),
15
+ ),
16
+ );
@@ -0,0 +1,15 @@
1
+ import { InferSchema, lazySchema, zodSchema } from '@ai-sdk/provider-utils';
2
+ import { z } from 'zod/v4';
3
+
4
+ export const xaiFilesOptionsSchema = lazySchema(() =>
5
+ zodSchema(
6
+ z
7
+ .object({
8
+ teamId: z.string().optional(),
9
+ filePath: z.string().optional(),
10
+ })
11
+ .passthrough(),
12
+ ),
13
+ );
14
+
15
+ export type XaiFilesOptions = InferSchema<typeof xaiFilesOptionsSchema>;
@@ -0,0 +1,93 @@
1
+ import {
2
+ FilesV4,
3
+ FilesV4UploadFileCallOptions,
4
+ FilesV4UploadFileResult,
5
+ } from '@ai-sdk/provider';
6
+ import {
7
+ combineHeaders,
8
+ convertBase64ToUint8Array,
9
+ createJsonResponseHandler,
10
+ FetchFunction,
11
+ parseProviderOptions,
12
+ postFormDataToApi,
13
+ } from '@ai-sdk/provider-utils';
14
+ import { xaiFailedResponseHandler } from '../xai-error';
15
+ import { xaiFilesResponseSchema } from './xai-files-api';
16
+ import { xaiFilesOptionsSchema, XaiFilesOptions } from './xai-files-options';
17
+
18
+ interface XaiFilesConfig {
19
+ provider: string;
20
+ baseURL: string | undefined;
21
+ headers: () => Record<string, string | undefined>;
22
+ fetch?: FetchFunction;
23
+ }
24
+
25
+ export class XaiFiles implements FilesV4 {
26
+ readonly specificationVersion = 'v4';
27
+
28
+ get provider(): string {
29
+ return this.config.provider;
30
+ }
31
+
32
+ constructor(private readonly config: XaiFilesConfig) {}
33
+
34
+ async uploadFile({
35
+ data,
36
+ mediaType,
37
+ filename,
38
+ providerOptions,
39
+ }: FilesV4UploadFileCallOptions): Promise<FilesV4UploadFileResult> {
40
+ const xaiOptions = (await parseProviderOptions({
41
+ provider: 'xai',
42
+ providerOptions,
43
+ schema: xaiFilesOptionsSchema,
44
+ })) as XaiFilesOptions | undefined;
45
+
46
+ const fileBytes =
47
+ data instanceof Uint8Array ? data : convertBase64ToUint8Array(data);
48
+
49
+ const blob = new Blob([fileBytes], {
50
+ type: mediaType,
51
+ });
52
+
53
+ const formData = new FormData();
54
+ if (filename != null) {
55
+ formData.append('file', blob, filename);
56
+ } else {
57
+ formData.append('file', blob);
58
+ }
59
+
60
+ if (xaiOptions?.teamId != null) {
61
+ formData.append('team_id', xaiOptions.teamId);
62
+ }
63
+
64
+ const { value: response } = await postFormDataToApi({
65
+ url: `${this.config.baseURL}/files`,
66
+ headers: combineHeaders(this.config.headers()),
67
+ formData,
68
+ failedResponseHandler: xaiFailedResponseHandler,
69
+ successfulResponseHandler: createJsonResponseHandler(
70
+ xaiFilesResponseSchema,
71
+ ),
72
+ fetch: this.config.fetch,
73
+ });
74
+
75
+ return {
76
+ warnings: [],
77
+ providerReference: { xai: response.id },
78
+ ...((response.filename ?? filename)
79
+ ? { filename: response.filename ?? filename }
80
+ : {}),
81
+ ...(mediaType != null ? { mediaType } : {}),
82
+ providerMetadata: {
83
+ xai: {
84
+ ...(response.filename != null ? { filename: response.filename } : {}),
85
+ ...(response.bytes != null ? { bytes: response.bytes } : {}),
86
+ ...(response.created_at != null
87
+ ? { createdAt: response.created_at }
88
+ : {}),
89
+ },
90
+ },
91
+ };
92
+ }
93
+ }
package/src/index.ts CHANGED
@@ -20,6 +20,7 @@ export type {
20
20
  /** @deprecated Use `XaiVideoModelOptions` instead. */
21
21
  XaiVideoModelOptions as XaiVideoProviderOptions,
22
22
  } from './xai-video-options';
23
+ export type { XaiFilesOptions } from './files/xai-files-options';
23
24
  export { createXai, xai } from './xai-provider';
24
25
  export type { XaiProvider, XaiProviderSettings } from './xai-provider';
25
26
  export {
@@ -3,7 +3,11 @@ import {
3
3
  LanguageModelV4Message,
4
4
  UnsupportedFunctionalityError,
5
5
  } from '@ai-sdk/provider';
6
- import { convertToBase64 } from '@ai-sdk/provider-utils';
6
+ import {
7
+ convertToBase64,
8
+ isProviderReference,
9
+ resolveProviderReference,
10
+ } from '@ai-sdk/provider-utils';
7
11
  import {
8
12
  XaiResponsesInput,
9
13
  XaiResponsesUserMessageContentPart,
@@ -42,7 +46,15 @@ export async function convertToXaiResponsesInput({
42
46
  }
43
47
 
44
48
  case 'file': {
45
- if (block.mediaType.startsWith('image/')) {
49
+ if (isProviderReference(block.data)) {
50
+ contentParts.push({
51
+ type: 'input_file',
52
+ file_id: resolveProviderReference({
53
+ reference: block.data,
54
+ provider: 'xai',
55
+ }),
56
+ });
57
+ } else if (block.mediaType.startsWith('image/')) {
46
58
  const mediaType =
47
59
  block.mediaType === 'image/*'
48
60
  ? 'image/jpeg'
@@ -26,7 +26,8 @@ export type XaiResponsesSystemMessage = {
26
26
 
27
27
  export type XaiResponsesUserMessageContentPart =
28
28
  | { type: 'input_text'; text: string }
29
- | { type: 'input_image'; image_url: string };
29
+ | { type: 'input_image'; image_url: string }
30
+ | { type: 'input_file'; file_id: string };
30
31
 
31
32
  export type XaiResponsesUserMessage = {
32
33
  role: 'user';
@@ -18,7 +18,8 @@ export interface XaiUserMessage {
18
18
 
19
19
  export type XaiUserMessageContent =
20
20
  | { type: 'text'; text: string }
21
- | { type: 'image_url'; image_url: { url: string } };
21
+ | { type: 'image_url'; image_url: { url: string } }
22
+ | { type: 'file'; file: { file_id: string } };
22
23
 
23
24
  export interface XaiAssistantMessage {
24
25
  role: 'assistant';