@runpod/ai-sdk-provider 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # @runpod/ai-sdk-provider
2
2
 
3
+ ## 1.4.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 2e6efbd: Accept aiApiId (endpoint ID) as a valid model ID for image models and add video generation support.
8
+
9
+ Image models now use the same fallback pattern as speech, transcription, and video models: any unrecognized model ID is used directly as `https://api.runpod.ai/v2/{modelId}` instead of incorrectly appending `/openai/v1`. This means aiApiIds like `wan-2-6-t2i` or `black-forest-labs-flux-1-schnell` work out of the box without needing explicit mappings. Console endpoint URLs are also now supported for image models.
10
+
11
+ Video generation support includes 15 models across multiple providers (Pruna, Vidu, Kling, Wan, Seedance, Sora) with async polling, provider options, and both text-to-video and image-to-video capabilities.
12
+
13
+ - 32f06c4: Add support for the google/nano-banana-2-edit image model with resolution options (1k/2k/4k), 14 aspect ratios, output format, and safety checker.
14
+
15
+ ## 1.3.0
16
+
17
+ ### Minor Changes
18
+
19
+ - 973fae6: Add support for the Tongyi-MAI Z-Image Turbo image model with validated sizes and aspect ratios.
20
+
3
21
  ## 1.2.0
4
22
 
5
23
  ### Minor Changes
package/README.md CHANGED
@@ -285,12 +285,14 @@ Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/get
285
285
  | `pruna/p-image-edit` | edit | up to 1440x1440 | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3 |
286
286
  | `google/nano-banana-edit` | edit | up to 4096x4096 | 1:1, 4:3, 3:4 |
287
287
  | `google/nano-banana-pro-edit` | edit | 1k, 2k, 4k | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, 21:9 |
288
+ | `google/nano-banana-2-edit` | edit | 1k, 2k, 4k | 1:1, 3:2, 2:3, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, 1:4, 4:1, 1:8, 8:1 |
288
289
  | `bytedance/seedream-3.0` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
289
290
  | `bytedance/seedream-4.0` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
290
291
  | `bytedance/seedream-4.0-edit` | edit | up to 4096x4096 | uses size |
291
292
  | `qwen/qwen-image` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
292
293
  | `qwen/qwen-image-edit` | edit | up to 4096x4096 | 1:1, 4:3, 3:4 |
293
294
  | `qwen/qwen-image-edit-2511` | edit | up to 1536x1536 | 1:1, 4:3, 3:4 |
295
+ | `tongyi-mai/z-image-turbo` | t2i | up to 1536x1536 | 1:1, 4:3, 3:4, 3:2, 2:3, 16:9, 9:16 |
294
296
  | `black-forest-labs/flux-1-schnell` | t2i | up to 2048x2048 | 1:1, 4:3, 3:4 |
295
297
  | `black-forest-labs/flux-1-dev` | t2i | up to 2048x2048 | 1:1, 4:3, 3:4 |
296
298
  | `black-forest-labs/flux-1-kontext-dev` | edit | up to 2048x2048 | 1:1, 4:3, 3:4 |
@@ -435,6 +437,14 @@ const { image } = await generateImage({
435
437
  });
436
438
  ```
437
439
 
440
+ #### Tongyi-MAI (Z-Image Turbo)
441
+
442
+ Supported model: `tongyi-mai/z-image-turbo`
443
+
444
+ - Supported sizes (validated by provider): 512x512, 768x768, 1024x1024, 1280x1280, 1536x1536, 512x768, 768x512, 1024x768, 768x1024, 1328x1328, 1472x1140, 1140x1472, 768x432, 1024x576, 1280x720, 1536x864, 432x768, 576x1024, 720x1280, 864x1536
445
+ - Supported `aspectRatio` values: 1:1, 4:3, 3:4, 3:2, 2:3, 16:9, 9:16 (maps to sizes above; use `size` for exact dimensions)
446
+ - Additional parameters: `strength`, `output_format`, `enable_safety_checker`, `seed`
447
+
438
448
  ## Speech Models
439
449
 
440
450
  Generate speech using the AI SDK's `generateSpeech` and `runpod.speech(...)`:
@@ -626,16 +636,16 @@ Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/get
626
636
 
627
637
  Use `providerOptions.runpod` for model-specific parameters:
628
638
 
629
- | Option | Type | Default | Description |
630
- | ------------------- | --------- | ------- | ---------------------------------------------- |
631
- | `audio` | `string` | - | URL to audio file (alternative to binary data) |
632
- | `prompt` | `string` | - | Context prompt to guide transcription |
633
- | `language` | `string` | Auto | ISO-639-1 language code (e.g., 'en', 'es') |
634
- | `word_timestamps` | `boolean` | `false` | Include word-level timestamps |
635
- | `translate` | `boolean` | `false` | Translate audio to English |
636
- | `enable_vad` | `boolean` | `false` | Enable voice activity detection |
637
- | `maxPollAttempts` | `number` | `120` | Max polling attempts |
638
- | `pollIntervalMillis`| `number` | `2000` | Polling interval (ms) |
639
+ | Option | Type | Default | Description |
640
+ | -------------------- | --------- | ------- | ---------------------------------------------- |
641
+ | `audio` | `string` | - | URL to audio file (alternative to binary data) |
642
+ | `prompt` | `string` | - | Context prompt to guide transcription |
643
+ | `language` | `string` | Auto | ISO-639-1 language code (e.g., 'en', 'es') |
644
+ | `word_timestamps` | `boolean` | `false` | Include word-level timestamps |
645
+ | `translate` | `boolean` | `false` | Translate audio to English |
646
+ | `enable_vad` | `boolean` | `false` | Enable voice activity detection |
647
+ | `maxPollAttempts` | `number` | `120` | Max polling attempts |
648
+ | `pollIntervalMillis` | `number` | `2000` | Polling interval (ms) |
639
649
 
640
650
  **Example (providerOptions):**
641
651
 
@@ -653,6 +663,97 @@ const result = await transcribe({
653
663
  });
654
664
  ```
655
665
 
666
+ ## Video Models
667
+
668
+ Generate videos using the AI SDK's `experimental_generateVideo` and `runpod.video(...)`:
669
+
670
+ ```ts
671
+ import { runpod } from '@runpod/ai-sdk-provider';
672
+ import { experimental_generateVideo as generateVideo } from 'ai';
673
+
674
+ // Text-to-video
675
+ const result = await generateVideo({
676
+ model: runpod.video('alibaba/wan-2.6-t2v'),
677
+ prompt: 'A golden retriever running on a sunny beach, cinematic, 4k',
678
+ });
679
+
680
+ console.log(result.video.url);
681
+ ```
682
+
683
+ ```ts
684
+ // Image-to-video
685
+ const result = await generateVideo({
686
+ model: runpod.video('alibaba/wan-2.6-i2v'),
687
+ prompt: 'Animate this scene with gentle camera movement',
688
+ image: new URL('https://example.com/image.png'),
689
+ });
690
+
691
+ console.log(result.video.url);
692
+ ```
693
+
694
+ **Returns:**
695
+
696
+ - `result.video` - Generated video (`{ type: 'url', url, mediaType: 'video/mp4' }`)
697
+ - `result.warnings` - Array of any warnings
698
+ - `result.providerMetadata.runpod.jobId` - Runpod job ID
699
+
700
+ ### Examples
701
+
702
+ Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
703
+
704
+ ### Supported Models
705
+
706
+ | Model ID | Type | Company |
707
+ | --------------------------------------- | ----------- | ------------------- |
708
+ | `pruna/p-video` | t2v | Pruna AI |
709
+ | `vidu/q3-t2v` | t2v | Shengshu Technology |
710
+ | `vidu/q3-i2v` | i2v | Shengshu Technology |
711
+ | `kwaivgi/kling-v2.6-std-motion-control` | i2v + video | KwaiVGI (Kuaishou) |
712
+ | `kwaivgi/kling-video-o1-r2v` | i2v | KwaiVGI (Kuaishou) |
713
+ | `kwaivgi/kling-v2.1-i2v-pro` | i2v | KwaiVGI (Kuaishou) |
714
+ | `alibaba/wan-2.6-t2v` | t2v | Alibaba |
715
+ | `alibaba/wan-2.6-i2v` | i2v | Alibaba |
716
+ | `alibaba/wan-2.5` | i2v | Alibaba |
717
+ | `alibaba/wan-2.2-t2v-720-lora` | i2v | Alibaba |
718
+ | `alibaba/wan-2.2-i2v-720` | i2v | Alibaba |
719
+ | `alibaba/wan-2.1-i2v-720` | i2v | Alibaba |
720
+ | `bytedance/seedance-v1.5-pro-i2v` | i2v | ByteDance |
721
+ | `openai/sora-2-pro-i2v` | i2v | OpenAI |
722
+ | `openai/sora-2-i2v` | i2v | OpenAI |
723
+
724
+ ### Provider Options
725
+
726
+ Use `providerOptions.runpod` for model-specific parameters:
727
+
728
+ | Option | Type | Default | Description |
729
+ | --------------------- | -------- | ------- | ------------------------------------ |
730
+ | `negative_prompt` | `string` | - | What to avoid in the generated video |
731
+ | `guidance_scale` | `number` | - | Guidance scale for prompt adherence |
732
+ | `num_inference_steps` | `number` | - | Number of inference steps |
733
+ | `style` | `string` | - | Style preset (model-specific) |
734
+ | `maxPollAttempts` | `number` | `120` | Max polling attempts |
735
+ | `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
736
+
737
+ Any additional model-specific parameters can be passed through `providerOptions.runpod` and will be forwarded to the API.
738
+
739
+ **Example (providerOptions):**
740
+
741
+ ```ts
742
+ const result = await generateVideo({
743
+ model: runpod.video('alibaba/wan-2.6-t2v'),
744
+ prompt: 'A serene mountain landscape with flowing water',
745
+ duration: 5,
746
+ aspectRatio: '16:9',
747
+ seed: 42,
748
+ providerOptions: {
749
+ runpod: {
750
+ negative_prompt: 'blurry, low quality',
751
+ guidance_scale: 7.5,
752
+ },
753
+ },
754
+ });
755
+ ```
756
+
656
757
  ## About Runpod
657
758
 
658
759
  [Runpod](https://runpod.io) is the foundation for developers to build, deploy, and scale custom AI systems.
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3 } from '@ai-sdk/provider';
1
+ import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3, Experimental_VideoModelV3 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
4
4
  import { z } from 'zod';
@@ -64,6 +64,14 @@ interface RunpodProvider {
64
64
  Creates a transcription model for audio transcription.
65
65
  */
66
66
  transcription(modelId: string): TranscriptionModelV3;
67
+ /**
68
+ Creates a video model for video generation.
69
+ */
70
+ videoModel(modelId: string): Experimental_VideoModelV3;
71
+ /**
72
+ Creates a video model for video generation.
73
+ */
74
+ video(modelId: string): Experimental_VideoModelV3;
67
75
  }
68
76
  declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
69
77
  declare const runpod: RunpodProvider;
@@ -72,7 +80,7 @@ type RunpodChatModelId = 'qwen/qwen3-32b-awq' | (string & {});
72
80
 
73
81
  type RunpodCompletionModelId = 'qwen/qwen3-32b-awq' | (string & {});
74
82
 
75
- type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'google/nano-banana-edit' | 'nano-banana-edit';
83
+ type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'tongyi-mai/z-image-turbo' | 'google/nano-banana-edit' | 'nano-banana-edit' | 'google/nano-banana-2-edit';
76
84
 
77
85
  type RunpodTranscriptionModelId = 'pruna/whisper-v3-large' | (string & {});
78
86
  interface RunpodTranscriptionProviderOptions {
@@ -133,6 +141,41 @@ interface RunpodTranscriptionProviderOptions {
133
141
  pollIntervalMillis?: number;
134
142
  }
135
143
 
144
+ type RunpodVideoModelId = 'pruna/p-video' | 'vidu/q3-t2v' | 'vidu/q3-i2v' | 'kwaivgi/kling-v2.6-std-motion-control' | 'kwaivgi/kling-video-o1-r2v' | 'kwaivgi/kling-v2.1-i2v-pro' | 'alibaba/wan-2.6-t2v' | 'alibaba/wan-2.6-i2v' | 'alibaba/wan-2.5' | 'alibaba/wan-2.2-t2v-720-lora' | 'alibaba/wan-2.2-i2v-720' | 'alibaba/wan-2.1-i2v-720' | 'bytedance/seedance-v1.5-pro-i2v' | 'openai/sora-2-pro-i2v' | 'openai/sora-2-i2v' | (string & {});
145
+ interface RunpodVideoProviderOptions {
146
+ /**
147
+ * Negative prompt to guide what to avoid in the generated video.
148
+ */
149
+ negative_prompt?: string;
150
+ /**
151
+ * Style preset for video generation (model-specific).
152
+ */
153
+ style?: string;
154
+ /**
155
+ * Guidance scale for prompt adherence.
156
+ */
157
+ guidance_scale?: number;
158
+ /**
159
+ * Number of inference steps.
160
+ */
161
+ num_inference_steps?: number;
162
+ /**
163
+ * Maximum number of polling attempts before timing out.
164
+ * @default 120
165
+ */
166
+ maxPollAttempts?: number;
167
+ /**
168
+ * Interval between polling attempts in milliseconds.
169
+ * @default 5000
170
+ */
171
+ pollIntervalMillis?: number;
172
+ /**
173
+ * Additional model-specific parameters are passed through via
174
+ * index signature.
175
+ */
176
+ [key: string]: unknown;
177
+ }
178
+
136
179
  declare const runpodImageErrorSchema: z.ZodObject<{
137
180
  error: z.ZodOptional<z.ZodString>;
138
181
  message: z.ZodOptional<z.ZodString>;
@@ -145,4 +188,4 @@ declare const runpodImageErrorSchema: z.ZodObject<{
145
188
  }>;
146
189
  type RunpodImageErrorData = z.infer<typeof runpodImageErrorSchema>;
147
190
 
148
- export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, createRunpod, runpod };
191
+ export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, type RunpodVideoModelId, type RunpodVideoProviderOptions, createRunpod, runpod };
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3 } from '@ai-sdk/provider';
1
+ import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3, Experimental_VideoModelV3 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
4
4
  import { z } from 'zod';
@@ -64,6 +64,14 @@ interface RunpodProvider {
64
64
  Creates a transcription model for audio transcription.
65
65
  */
66
66
  transcription(modelId: string): TranscriptionModelV3;
67
+ /**
68
+ Creates a video model for video generation.
69
+ */
70
+ videoModel(modelId: string): Experimental_VideoModelV3;
71
+ /**
72
+ Creates a video model for video generation.
73
+ */
74
+ video(modelId: string): Experimental_VideoModelV3;
67
75
  }
68
76
  declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
69
77
  declare const runpod: RunpodProvider;
@@ -72,7 +80,7 @@ type RunpodChatModelId = 'qwen/qwen3-32b-awq' | (string & {});
72
80
 
73
81
  type RunpodCompletionModelId = 'qwen/qwen3-32b-awq' | (string & {});
74
82
 
75
- type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'google/nano-banana-edit' | 'nano-banana-edit';
83
+ type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'tongyi-mai/z-image-turbo' | 'google/nano-banana-edit' | 'nano-banana-edit' | 'google/nano-banana-2-edit';
76
84
 
77
85
  type RunpodTranscriptionModelId = 'pruna/whisper-v3-large' | (string & {});
78
86
  interface RunpodTranscriptionProviderOptions {
@@ -133,6 +141,41 @@ interface RunpodTranscriptionProviderOptions {
133
141
  pollIntervalMillis?: number;
134
142
  }
135
143
 
144
+ type RunpodVideoModelId = 'pruna/p-video' | 'vidu/q3-t2v' | 'vidu/q3-i2v' | 'kwaivgi/kling-v2.6-std-motion-control' | 'kwaivgi/kling-video-o1-r2v' | 'kwaivgi/kling-v2.1-i2v-pro' | 'alibaba/wan-2.6-t2v' | 'alibaba/wan-2.6-i2v' | 'alibaba/wan-2.5' | 'alibaba/wan-2.2-t2v-720-lora' | 'alibaba/wan-2.2-i2v-720' | 'alibaba/wan-2.1-i2v-720' | 'bytedance/seedance-v1.5-pro-i2v' | 'openai/sora-2-pro-i2v' | 'openai/sora-2-i2v' | (string & {});
145
+ interface RunpodVideoProviderOptions {
146
+ /**
147
+ * Negative prompt to guide what to avoid in the generated video.
148
+ */
149
+ negative_prompt?: string;
150
+ /**
151
+ * Style preset for video generation (model-specific).
152
+ */
153
+ style?: string;
154
+ /**
155
+ * Guidance scale for prompt adherence.
156
+ */
157
+ guidance_scale?: number;
158
+ /**
159
+ * Number of inference steps.
160
+ */
161
+ num_inference_steps?: number;
162
+ /**
163
+ * Maximum number of polling attempts before timing out.
164
+ * @default 120
165
+ */
166
+ maxPollAttempts?: number;
167
+ /**
168
+ * Interval between polling attempts in milliseconds.
169
+ * @default 5000
170
+ */
171
+ pollIntervalMillis?: number;
172
+ /**
173
+ * Additional model-specific parameters are passed through via
174
+ * index signature.
175
+ */
176
+ [key: string]: unknown;
177
+ }
178
+
136
179
  declare const runpodImageErrorSchema: z.ZodObject<{
137
180
  error: z.ZodOptional<z.ZodString>;
138
181
  message: z.ZodOptional<z.ZodString>;
@@ -145,4 +188,4 @@ declare const runpodImageErrorSchema: z.ZodObject<{
145
188
  }>;
146
189
  type RunpodImageErrorData = z.infer<typeof runpodImageErrorSchema>;
147
190
 
148
- export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, createRunpod, runpod };
191
+ export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, type RunpodVideoModelId, type RunpodVideoProviderOptions, createRunpod, runpod };