@runpod/ai-sdk-provider 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @runpod/ai-sdk-provider
2
2
 
3
+ ## 1.2.0
4
+
5
+ ### Minor Changes
6
+
7
+ - cf0c976: Add transcription model support with `pruna/whisper-v3-large`
8
+ - Add `transcriptionModel()` and `transcription()` methods to the provider
9
+ - Support audio transcription via RunPod's Whisper endpoint
10
+ - Accept audio as `Uint8Array`, base64 string, or URL via providerOptions
11
+ - Return transcription text, segments with timing, detected language, and duration
12
+
13
+ ## 1.1.0
14
+
15
+ ### Minor Changes
16
+
17
+ - 7ec59bc: add image models and improvements
18
+ - alibaba/wan-2.6: text-to-image model (max 1024x1024)
19
+ - qwen/qwen-image-edit-2511: edit model (max 1536x1536), supports 1-3 input images and loras
20
+ - google/nano-banana-edit: renamed from nano-banana-edit (backwards compatible), fixed payload format
21
+ - added resolution and aspect ratios columns to supported models table
22
+
3
23
  ## 1.0.1
4
24
 
5
25
  ### Patch Changes
package/README.md CHANGED
@@ -1,7 +1,5 @@
1
1
  # Runpod AI SDK Provider
2
2
 
3
- ![Runpod AI SDK Provider banner](https://image.runpod.ai/runpod/ai-sdk-provider/banner.jpg)
4
-
5
3
  The **Runpod provider** for the [AI SDK](https://ai-sdk.dev/docs) contains language model and image generation support for [Runpod's](https://runpod.io) public endpoints.
6
4
 
7
5
  ## Setup
@@ -280,20 +278,22 @@ Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/get
280
278
 
281
279
  ### Supported Models
282
280
 
283
- | Model ID | Type |
284
- | -------------------------------------- | ---- |
285
- | `pruna/p-image-t2i` | t2i |
286
- | `pruna/p-image-edit` | edit |
287
- | `google/nano-banana-pro-edit` | edit |
288
- | `bytedance/seedream-3.0` | t2i |
289
- | `bytedance/seedream-4.0` | t2i |
290
- | `bytedance/seedream-4.0-edit` | edit |
291
- | `qwen/qwen-image` | t2i |
292
- | `qwen/qwen-image-edit` | edit |
293
- | `nano-banana-edit` | edit |
294
- | `black-forest-labs/flux-1-schnell` | t2i |
295
- | `black-forest-labs/flux-1-dev` | t2i |
296
- | `black-forest-labs/flux-1-kontext-dev` | edit |
281
+ | Model ID | Type | Resolution | Aspect Ratios |
282
+ | -------------------------------------- | ---- | ----------------- | ----------------------------------------------- |
283
+ | `alibaba/wan-2.6` | t2i | 768x768–1280x1280 | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, 21:9, 9:21 |
284
+ | `pruna/p-image-t2i` | t2i | up to 1440x1440 | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3 |
285
+ | `pruna/p-image-edit` | edit | up to 1440x1440 | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3 |
286
+ | `google/nano-banana-edit` | edit | up to 4096x4096 | 1:1, 4:3, 3:4 |
287
+ | `google/nano-banana-pro-edit` | edit | 1k, 2k, 4k | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, 21:9 |
288
+ | `bytedance/seedream-3.0` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
289
+ | `bytedance/seedream-4.0` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
290
+ | `bytedance/seedream-4.0-edit` | edit | up to 4096x4096 | uses size |
291
+ | `qwen/qwen-image` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
292
+ | `qwen/qwen-image-edit` | edit | up to 4096x4096 | 1:1, 4:3, 3:4 |
293
+ | `qwen/qwen-image-edit-2511` | edit | up to 1536x1536 | 1:1, 4:3, 3:4 |
294
+ | `black-forest-labs/flux-1-schnell` | t2i | up to 2048x2048 | 1:1, 4:3, 3:4 |
295
+ | `black-forest-labs/flux-1-dev` | t2i | up to 2048x2048 | 1:1, 4:3, 3:4 |
296
+ | `black-forest-labs/flux-1-kontext-dev` | edit | up to 2048x2048 | 1:1, 4:3, 3:4 |
297
297
 
298
298
  For the full list of models, see the [Runpod Public Endpoint Reference](https://docs.runpod.io/hub/public-endpoint-reference).
299
299
 
@@ -301,21 +301,15 @@ For the full list of models, see the [Runpod Public Endpoint Reference](https://
301
301
 
302
302
  Additional options through `providerOptions.runpod` (supported options depend on the model):
303
303
 
304
- | Option | Type | Default | Description |
305
- | ------------------------ | ---------- | ------- | ----------------------------------------------------------- |
306
- | `negative_prompt` | `string` | `""` | What to avoid in the image (model-dependent) |
307
- | `enable_safety_checker` | `boolean` | `true` | Content safety filtering (model-dependent) |
308
- | `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna) |
309
- | `aspect_ratio` | `string` | - | Model-specific aspect ratio (Pruna: supports `custom`) |
310
- | `image` | `string` | - | Legacy: Single input image URL/base64 (use `prompt.images`) |
311
- | `images` | `string[]` | - | Legacy: Multiple input images (use `prompt.images`) |
312
- | `resolution` | `string` | `"1k"` | Output resolution: 1k, 2k, 4k (Nano Banana Pro) |
313
- | `width` / `height` | `number` | - | Custom dimensions (Pruna t2i, 256-1440; multiples of 16) |
314
- | `num_inference_steps` | `number` | Auto | Denoising steps (model-dependent) |
315
- | `guidance` | `number` | Auto | Prompt adherence strength (model-dependent) |
316
- | `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp (model-dependent) |
317
- | `maxPollAttempts` | `number` | `60` | Max polling attempts |
318
- | `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
304
+ | Option | Type | Default | Description |
305
+ | ----------------------- | --------- | ------- | -------------------------------------------- |
306
+ | `negative_prompt` | `string` | `""` | What to avoid in the image (model-dependent) |
307
+ | `enable_safety_checker` | `boolean` | `true` | Content safety filtering (model-dependent) |
308
+ | `num_inference_steps` | `number` | Auto | Denoising steps (model-dependent) |
309
+ | `guidance` | `number` | Auto | Prompt adherence strength (model-dependent) |
310
+ | `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp |
311
+ | `maxPollAttempts` | `number` | `60` | Max polling attempts |
312
+ | `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
319
313
 
320
314
  **Example (providerOptions):**
321
315
 
@@ -361,18 +355,85 @@ const { image } = await generateImage({
361
355
  });
362
356
  ```
363
357
 
358
+ #### Alibaba (WAN 2.6)
359
+
360
+ Text-to-image model with flexible resolution support.
361
+
362
+ **Resolution constraints:**
363
+
364
+ - Total pixels: 589,824 (768x768) to 1,638,400 (1280x1280)
365
+ - Aspect ratio: 1:4 to 4:1
366
+ - Default: 1280x1280
367
+
368
+ **Recommended resolutions for common aspect ratios:**
369
+
370
+ | Aspect Ratio | Resolution |
371
+ | :----------- | :--------- |
372
+ | 1:1 | 1280x1280 |
373
+ | 2:3 | 800x1200 |
374
+ | 3:2 | 1200x800 |
375
+ | 3:4 | 960x1280 |
376
+ | 4:3 | 1280x960 |
377
+ | 9:16 | 720x1280 |
378
+ | 16:9 | 1280x720 |
379
+ | 21:9 | 1344x576 |
380
+ | 9:21 | 576x1344 |
381
+
382
+ ```ts
383
+ const { image } = await generateImage({
384
+ model: runpod.image('alibaba/wan-2.6'),
385
+ prompt: 'A serene mountain landscape at dawn',
386
+ aspectRatio: '16:9',
387
+ });
388
+ ```
389
+
364
390
  #### Google (Nano Banana Pro)
365
391
 
366
- Supported model: `google/nano-banana-pro-edit`
392
+ | Option | Values |
393
+ | :---------------------------------- | :--------------- |
394
+ | `providerOptions.runpod.resolution` | `1k`, `2k`, `4k` |
395
+
396
+ ```ts
397
+ const { image } = await generateImage({
398
+ model: runpod.image('google/nano-banana-pro'),
399
+ prompt: 'A futuristic cityscape at sunset',
400
+ aspectRatio: '16:9',
401
+ providerOptions: {
402
+ runpod: {
403
+ resolution: '4k',
404
+ },
405
+ },
406
+ });
407
+ ```
408
+
409
+ #### Qwen (Image Edit 2511)
410
+
411
+ | Option | Values |
412
+ | :----------------------------- | :--------------------- |
413
+ | `providerOptions.runpod.loras` | `[{path, scale}, ...]` |
367
414
 
368
- | Parameter | Supported Values | Notes |
369
- | :------------------------------ | :---------------------------------------------------------------- | :----------------------------------- |
370
- | `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `21:9`, `9:21` | Standard AI SDK parameter |
371
- | `resolution` | `1k`, `2k`, `4k` | Output resolution quality |
372
- | `output_format` | `jpeg`, `png`, `webp` | Output image format |
373
- | `prompt.images` | `string[]` | Recommended. Input image(s) to edit. |
374
- | `files` | `ImageModelV3File[]` | Alternative (lower-level). |
375
- | `providerOptions.runpod.images` | `string[]` | Legacy. Input image(s) to edit. |
415
+ Supports 1-3 input images.
416
+
417
+ ```ts
418
+ const { image } = await generateImage({
419
+ model: runpod.image('qwen/qwen-image-edit-2511'),
420
+ prompt: {
421
+ text: 'Transform into anime style',
422
+ images: ['https://image.runpod.ai/asset/qwen/qwen-image-edit-2511.png'],
423
+ },
424
+ size: '1024x1024',
425
+ providerOptions: {
426
+ runpod: {
427
+ loras: [
428
+ {
429
+ path: 'https://huggingface.co/flymy-ai/qwen-image-anime-irl-lora/resolve/main/flymy_anime_irl.safetensors',
430
+ scale: 1,
431
+ },
432
+ ],
433
+ },
434
+ },
435
+ });
436
+ ```
376
437
 
377
438
  ## Speech Models
378
439
 
@@ -474,7 +535,7 @@ const result = await generateSpeech({
474
535
  text: 'Hello!',
475
536
  providerOptions: {
476
537
  runpod: {
477
- voice_url: 'https://example.com/voice.wav',
538
+ voice_url: 'https://your-audio-host.com/your-voice-sample.wav', // 5-10s audio sample
478
539
  },
479
540
  },
480
541
  });
@@ -504,6 +565,94 @@ const result = await generateSpeech({
504
565
  });
505
566
  ```
506
567
 
568
+ ## Transcription Models
569
+
570
+ Transcribe audio using the AI SDK's `experimental_transcribe` and `runpod.transcription(...)`:
571
+
572
+ ```ts
573
+ import { runpod } from '@runpod/ai-sdk-provider';
574
+ import { experimental_transcribe as transcribe } from 'ai';
575
+
576
+ const result = await transcribe({
577
+ model: runpod.transcription('pruna/whisper-v3-large'),
578
+ audio: new URL('https://image.runpod.ai/demo/transcription-demo.wav'),
579
+ });
580
+
581
+ console.log(result.text);
582
+ ```
583
+
584
+ **Returns:**
585
+
586
+ - `result.text` - Full transcription text
587
+ - `result.segments` - Array of segments with timing info
588
+ - `segment.text` - Segment text
589
+ - `segment.startSecond` - Start time in seconds
590
+ - `segment.endSecond` - End time in seconds
591
+ - `result.language` - Detected language code
592
+ - `result.durationInSeconds` - Audio duration
593
+ - `result.warnings` - Array of any warnings
594
+ - `result.providerMetadata.runpod.jobId` - RunPod job ID
595
+
596
+ ### Audio Input
597
+
598
+ You can provide audio in several ways:
599
+
600
+ ```ts
601
+ // URL (recommended for large files)
602
+ const result = await transcribe({
603
+ model: runpod.transcription('pruna/whisper-v3-large'),
604
+ audio: new URL('https://image.runpod.ai/demo/transcription-demo.wav'),
605
+ });
606
+
607
+ // Local file as Uint8Array
608
+ import { readFileSync } from 'fs';
609
+ const audioData = readFileSync('./audio.wav');
610
+
611
+ const result = await transcribe({
612
+ model: runpod.transcription('pruna/whisper-v3-large'),
613
+ audio: audioData,
614
+ });
615
+ ```
616
+
617
+ ### Examples
618
+
619
+ Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
620
+
621
+ ### Supported Models
622
+
623
+ - `pruna/whisper-v3-large`
624
+
625
+ ### Provider Options
626
+
627
+ Use `providerOptions.runpod` for model-specific parameters:
628
+
629
+ | Option | Type | Default | Description |
630
+ | ------------------- | --------- | ------- | ---------------------------------------------- |
631
+ | `audio` | `string` | - | URL to audio file (alternative to binary data) |
632
+ | `prompt` | `string` | - | Context prompt to guide transcription |
633
+ | `language` | `string` | Auto | ISO-639-1 language code (e.g., 'en', 'es') |
634
+ | `word_timestamps` | `boolean` | `false` | Include word-level timestamps |
635
+ | `translate` | `boolean` | `false` | Translate audio to English |
636
+ | `enable_vad` | `boolean` | `false` | Enable voice activity detection |
637
+ | `maxPollAttempts` | `number` | `120` | Max polling attempts |
638
+ | `pollIntervalMillis`| `number` | `2000` | Polling interval (ms) |
639
+
640
+ **Example (providerOptions):**
641
+
642
+ ```ts
643
+ const result = await transcribe({
644
+ model: runpod.transcription('pruna/whisper-v3-large'),
645
+ audio: new URL('https://image.runpod.ai/demo/transcription-demo.wav'),
646
+ providerOptions: {
647
+ runpod: {
648
+ language: 'en',
649
+ prompt: 'This is a demo of audio transcription',
650
+ word_timestamps: true,
651
+ },
652
+ },
653
+ });
654
+ ```
655
+
507
656
  ## About Runpod
508
657
 
509
658
  [Runpod](https://runpod.io) is the foundation for developers to build, deploy, and scale custom AI systems.
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
1
+ import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
4
4
  import { z } from 'zod';
@@ -56,6 +56,14 @@ interface RunpodProvider {
56
56
  Creates a speech model for speech generation.
57
57
  */
58
58
  speech(modelId: string): SpeechModelV3;
59
+ /**
60
+ Creates a transcription model for audio transcription.
61
+ */
62
+ transcriptionModel(modelId: string): TranscriptionModelV3;
63
+ /**
64
+ Creates a transcription model for audio transcription.
65
+ */
66
+ transcription(modelId: string): TranscriptionModelV3;
59
67
  }
60
68
  declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
61
69
  declare const runpod: RunpodProvider;
@@ -64,7 +72,66 @@ type RunpodChatModelId = 'qwen/qwen3-32b-awq' | (string & {});
64
72
 
65
73
  type RunpodCompletionModelId = 'qwen/qwen3-32b-awq' | (string & {});
66
74
 
67
- type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'nano-banana-edit';
75
+ type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'google/nano-banana-edit' | 'nano-banana-edit';
76
+
77
+ type RunpodTranscriptionModelId = 'pruna/whisper-v3-large' | (string & {});
78
+ interface RunpodTranscriptionProviderOptions {
79
+ /**
80
+ * URL to audio file. Use this if you want to pass an audio URL directly
81
+ * instead of binary audio data.
82
+ */
83
+ audio?: string;
84
+ /**
85
+ * Optional context prompt to guide the transcription (initial_prompt in Whisper).
86
+ */
87
+ prompt?: string;
88
+ /**
89
+ * Alias for prompt - the initial prompt for the first window.
90
+ */
91
+ initial_prompt?: string;
92
+ /**
93
+ * Language of the audio in ISO-639-1 format (e.g., 'en', 'es', 'fr').
94
+ * If not specified, Whisper will auto-detect the language.
95
+ */
96
+ language?: string;
97
+ /**
98
+ * Whether to include word-level timestamps in the response.
99
+ * @default false
100
+ */
101
+ word_timestamps?: boolean;
102
+ /**
103
+ * Whisper model to use.
104
+ * Options: 'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large-v3', 'turbo'
105
+ * @default 'base'
106
+ */
107
+ model?: string;
108
+ /**
109
+ * Output format for transcription.
110
+ * Options: 'plain_text', 'formatted_text', 'srt', 'vtt'
111
+ * @default 'plain_text'
112
+ */
113
+ transcription?: string;
114
+ /**
115
+ * Whether to translate the audio to English.
116
+ * @default false
117
+ */
118
+ translate?: boolean;
119
+ /**
120
+ * Whether to enable voice activity detection.
121
+ * @default false
122
+ */
123
+ enable_vad?: boolean;
124
+ /**
125
+ * Maximum number of polling attempts before timing out.
126
+ * @default 120
127
+ */
128
+ maxPollAttempts?: number;
129
+ /**
130
+ * Interval between polling attempts in milliseconds.
131
+ * @default 2000
132
+ */
133
+ pollIntervalMillis?: number;
134
+ }
68
135
 
69
136
  declare const runpodImageErrorSchema: z.ZodObject<{
70
137
  error: z.ZodOptional<z.ZodString>;
@@ -78,4 +145,4 @@ declare const runpodImageErrorSchema: z.ZodObject<{
78
145
  }>;
79
146
  type RunpodImageErrorData = z.infer<typeof runpodImageErrorSchema>;
80
147
 
81
- export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, createRunpod, runpod };
148
+ export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, createRunpod, runpod };
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
1
+ import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
4
4
  import { z } from 'zod';
@@ -56,6 +56,14 @@ interface RunpodProvider {
56
56
  Creates a speech model for speech generation.
57
57
  */
58
58
  speech(modelId: string): SpeechModelV3;
59
+ /**
60
+ Creates a transcription model for audio transcription.
61
+ */
62
+ transcriptionModel(modelId: string): TranscriptionModelV3;
63
+ /**
64
+ Creates a transcription model for audio transcription.
65
+ */
66
+ transcription(modelId: string): TranscriptionModelV3;
59
67
  }
60
68
  declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
61
69
  declare const runpod: RunpodProvider;
@@ -64,7 +72,66 @@ type RunpodChatModelId = 'qwen/qwen3-32b-awq' | (string & {});
64
72
 
65
73
  type RunpodCompletionModelId = 'qwen/qwen3-32b-awq' | (string & {});
66
74
 
67
- type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'nano-banana-edit';
75
+ type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'google/nano-banana-edit' | 'nano-banana-edit';
76
+
77
+ type RunpodTranscriptionModelId = 'pruna/whisper-v3-large' | (string & {});
78
+ interface RunpodTranscriptionProviderOptions {
79
+ /**
80
+ * URL to audio file. Use this if you want to pass an audio URL directly
81
+ * instead of binary audio data.
82
+ */
83
+ audio?: string;
84
+ /**
85
+ * Optional context prompt to guide the transcription (initial_prompt in Whisper).
86
+ */
87
+ prompt?: string;
88
+ /**
89
+ * Alias for prompt - the initial prompt for the first window.
90
+ */
91
+ initial_prompt?: string;
92
+ /**
93
+ * Language of the audio in ISO-639-1 format (e.g., 'en', 'es', 'fr').
94
+ * If not specified, Whisper will auto-detect the language.
95
+ */
96
+ language?: string;
97
+ /**
98
+ * Whether to include word-level timestamps in the response.
99
+ * @default false
100
+ */
101
+ word_timestamps?: boolean;
102
+ /**
103
+ * Whisper model to use.
104
+ * Options: 'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large-v3', 'turbo'
105
+ * @default 'base'
106
+ */
107
+ model?: string;
108
+ /**
109
+ * Output format for transcription.
110
+ * Options: 'plain_text', 'formatted_text', 'srt', 'vtt'
111
+ * @default 'plain_text'
112
+ */
113
+ transcription?: string;
114
+ /**
115
+ * Whether to translate the audio to English.
116
+ * @default false
117
+ */
118
+ translate?: boolean;
119
+ /**
120
+ * Whether to enable voice activity detection.
121
+ * @default false
122
+ */
123
+ enable_vad?: boolean;
124
+ /**
125
+ * Maximum number of polling attempts before timing out.
126
+ * @default 120
127
+ */
128
+ maxPollAttempts?: number;
129
+ /**
130
+ * Interval between polling attempts in milliseconds.
131
+ * @default 2000
132
+ */
133
+ pollIntervalMillis?: number;
134
+ }
68
135
 
69
136
  declare const runpodImageErrorSchema: z.ZodObject<{
70
137
  error: z.ZodOptional<z.ZodString>;
@@ -78,4 +145,4 @@ declare const runpodImageErrorSchema: z.ZodObject<{
78
145
  }>;
79
146
  type RunpodImageErrorData = z.infer<typeof runpodImageErrorSchema>;
80
147
 
81
- export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, createRunpod, runpod };
148
+ export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, createRunpod, runpod };