@runpod/ai-sdk-provider 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.md +190 -41
- package/dist/index.d.mts +70 -3
- package/dist/index.d.ts +70 -3
- package/dist/index.js +395 -26
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +399 -25
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @runpod/ai-sdk-provider
|
|
2
2
|
|
|
3
|
+
## 1.2.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- cf0c976: Add transcription model support with `pruna/whisper-v3-large`
|
|
8
|
+
- Add `transcriptionModel()` and `transcription()` methods to the provider
|
|
9
|
+
- Support audio transcription via RunPod's Whisper endpoint
|
|
10
|
+
- Accept audio as `Uint8Array`, base64 string, or URL via providerOptions
|
|
11
|
+
- Return transcription text, segments with timing, detected language, and duration
|
|
12
|
+
|
|
13
|
+
## 1.1.0
|
|
14
|
+
|
|
15
|
+
### Minor Changes
|
|
16
|
+
|
|
17
|
+
- 7ec59bc: add image models and improvements
|
|
18
|
+
- alibaba/wan-2.6: text-to-image model (max 1024x1024)
|
|
19
|
+
- qwen/qwen-image-edit-2511: edit model (max 1536x1536), supports 1-3 input images and loras
|
|
20
|
+
- google/nano-banana-edit: renamed from nano-banana-edit (backwards compatible), fixed payload format
|
|
21
|
+
- added resolution and aspect ratios columns to supported models table
|
|
22
|
+
|
|
3
23
|
## 1.0.1
|
|
4
24
|
|
|
5
25
|
### Patch Changes
|
package/README.md
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# Runpod AI SDK Provider
|
|
2
2
|
|
|
3
|
-

|
|
4
|
-
|
|
5
3
|
The **Runpod provider** for the [AI SDK](https://ai-sdk.dev/docs) contains language model and image generation support for [Runpod's](https://runpod.io) public endpoints.
|
|
6
4
|
|
|
7
5
|
## Setup
|
|
@@ -280,20 +278,22 @@ Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/get
|
|
|
280
278
|
|
|
281
279
|
### Supported Models
|
|
282
280
|
|
|
283
|
-
| Model ID | Type |
|
|
284
|
-
| -------------------------------------- | ---- |
|
|
285
|
-
| `
|
|
286
|
-
| `pruna/p-image-
|
|
287
|
-
| `
|
|
288
|
-
| `
|
|
289
|
-
| `
|
|
290
|
-
| `bytedance/seedream-
|
|
291
|
-
| `
|
|
292
|
-
| `
|
|
293
|
-
| `
|
|
294
|
-
| `
|
|
295
|
-
| `
|
|
296
|
-
| `black-forest-labs/flux-1-
|
|
281
|
+
| Model ID | Type | Resolution | Aspect Ratios |
|
|
282
|
+
| -------------------------------------- | ---- | ----------------- | ----------------------------------------------- |
|
|
283
|
+
| `alibaba/wan-2.6` | t2i | 768x768–1280x1280 | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, 21:9, 9:21 |
|
|
284
|
+
| `pruna/p-image-t2i` | t2i | up to 1440x1440 | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3 |
|
|
285
|
+
| `pruna/p-image-edit` | edit | up to 1440x1440 | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3 |
|
|
286
|
+
| `google/nano-banana-edit` | edit | up to 4096x4096 | 1:1, 4:3, 3:4 |
|
|
287
|
+
| `google/nano-banana-pro-edit` | edit | 1k, 2k, 4k | 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, 21:9 |
|
|
288
|
+
| `bytedance/seedream-3.0` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
|
|
289
|
+
| `bytedance/seedream-4.0` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
|
|
290
|
+
| `bytedance/seedream-4.0-edit` | edit | up to 4096x4096 | uses size |
|
|
291
|
+
| `qwen/qwen-image` | t2i | up to 4096x4096 | 1:1, 4:3, 3:4 |
|
|
292
|
+
| `qwen/qwen-image-edit` | edit | up to 4096x4096 | 1:1, 4:3, 3:4 |
|
|
293
|
+
| `qwen/qwen-image-edit-2511` | edit | up to 1536x1536 | 1:1, 4:3, 3:4 |
|
|
294
|
+
| `black-forest-labs/flux-1-schnell` | t2i | up to 2048x2048 | 1:1, 4:3, 3:4 |
|
|
295
|
+
| `black-forest-labs/flux-1-dev` | t2i | up to 2048x2048 | 1:1, 4:3, 3:4 |
|
|
296
|
+
| `black-forest-labs/flux-1-kontext-dev` | edit | up to 2048x2048 | 1:1, 4:3, 3:4 |
|
|
297
297
|
|
|
298
298
|
For the full list of models, see the [Runpod Public Endpoint Reference](https://docs.runpod.io/hub/public-endpoint-reference).
|
|
299
299
|
|
|
@@ -301,21 +301,15 @@ For the full list of models, see the [Runpod Public Endpoint Reference](https://
|
|
|
301
301
|
|
|
302
302
|
Additional options through `providerOptions.runpod` (supported options depend on the model):
|
|
303
303
|
|
|
304
|
-
| Option
|
|
305
|
-
|
|
|
306
|
-
| `negative_prompt`
|
|
307
|
-
| `enable_safety_checker`
|
|
308
|
-
| `
|
|
309
|
-
| `
|
|
310
|
-
| `
|
|
311
|
-
| `
|
|
312
|
-
| `
|
|
313
|
-
| `width` / `height` | `number` | - | Custom dimensions (Pruna t2i, 256-1440; multiples of 16) |
|
|
314
|
-
| `num_inference_steps` | `number` | Auto | Denoising steps (model-dependent) |
|
|
315
|
-
| `guidance` | `number` | Auto | Prompt adherence strength (model-dependent) |
|
|
316
|
-
| `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp (model-dependent) |
|
|
317
|
-
| `maxPollAttempts` | `number` | `60` | Max polling attempts |
|
|
318
|
-
| `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
|
|
304
|
+
| Option | Type | Default | Description |
|
|
305
|
+
| ----------------------- | --------- | ------- | -------------------------------------------- |
|
|
306
|
+
| `negative_prompt` | `string` | `""` | What to avoid in the image (model-dependent) |
|
|
307
|
+
| `enable_safety_checker` | `boolean` | `true` | Content safety filtering (model-dependent) |
|
|
308
|
+
| `num_inference_steps` | `number` | Auto | Denoising steps (model-dependent) |
|
|
309
|
+
| `guidance` | `number` | Auto | Prompt adherence strength (model-dependent) |
|
|
310
|
+
| `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp |
|
|
311
|
+
| `maxPollAttempts` | `number` | `60` | Max polling attempts |
|
|
312
|
+
| `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
|
|
319
313
|
|
|
320
314
|
**Example (providerOptions):**
|
|
321
315
|
|
|
@@ -361,18 +355,85 @@ const { image } = await generateImage({
|
|
|
361
355
|
});
|
|
362
356
|
```
|
|
363
357
|
|
|
358
|
+
#### Alibaba (WAN 2.6)
|
|
359
|
+
|
|
360
|
+
Text-to-image model with flexible resolution support.
|
|
361
|
+
|
|
362
|
+
**Resolution constraints:**
|
|
363
|
+
|
|
364
|
+
- Total pixels: 589,824 (768x768) to 1,638,400 (1280x1280)
|
|
365
|
+
- Aspect ratio: 1:4 to 4:1
|
|
366
|
+
- Default: 1280x1280
|
|
367
|
+
|
|
368
|
+
**Recommended resolutions for common aspect ratios:**
|
|
369
|
+
|
|
370
|
+
| Aspect Ratio | Resolution |
|
|
371
|
+
| :----------- | :--------- |
|
|
372
|
+
| 1:1 | 1280x1280 |
|
|
373
|
+
| 2:3 | 800x1200 |
|
|
374
|
+
| 3:2 | 1200x800 |
|
|
375
|
+
| 3:4 | 960x1280 |
|
|
376
|
+
| 4:3 | 1280x960 |
|
|
377
|
+
| 9:16 | 720x1280 |
|
|
378
|
+
| 16:9 | 1280x720 |
|
|
379
|
+
| 21:9 | 1344x576 |
|
|
380
|
+
| 9:21 | 576x1344 |
|
|
381
|
+
|
|
382
|
+
```ts
|
|
383
|
+
const { image } = await generateImage({
|
|
384
|
+
model: runpod.image('alibaba/wan-2.6'),
|
|
385
|
+
prompt: 'A serene mountain landscape at dawn',
|
|
386
|
+
aspectRatio: '16:9',
|
|
387
|
+
});
|
|
388
|
+
```
|
|
389
|
+
|
|
364
390
|
#### Google (Nano Banana Pro)
|
|
365
391
|
|
|
366
|
-
|
|
392
|
+
| Option | Values |
|
|
393
|
+
| :---------------------------------- | :--------------- |
|
|
394
|
+
| `providerOptions.runpod.resolution` | `1k`, `2k`, `4k` |
|
|
395
|
+
|
|
396
|
+
```ts
|
|
397
|
+
const { image } = await generateImage({
|
|
398
|
+
model: runpod.image('google/nano-banana-pro'),
|
|
399
|
+
prompt: 'A futuristic cityscape at sunset',
|
|
400
|
+
aspectRatio: '16:9',
|
|
401
|
+
providerOptions: {
|
|
402
|
+
runpod: {
|
|
403
|
+
resolution: '4k',
|
|
404
|
+
},
|
|
405
|
+
},
|
|
406
|
+
});
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
#### Qwen (Image Edit 2511)
|
|
410
|
+
|
|
411
|
+
| Option | Values |
|
|
412
|
+
| :----------------------------- | :--------------------- |
|
|
413
|
+
| `providerOptions.runpod.loras` | `[{path, scale}, ...]` |
|
|
367
414
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
415
|
+
Supports 1-3 input images.
|
|
416
|
+
|
|
417
|
+
```ts
|
|
418
|
+
const { image } = await generateImage({
|
|
419
|
+
model: runpod.image('qwen/qwen-image-edit-2511'),
|
|
420
|
+
prompt: {
|
|
421
|
+
text: 'Transform into anime style',
|
|
422
|
+
images: ['https://image.runpod.ai/asset/qwen/qwen-image-edit-2511.png'],
|
|
423
|
+
},
|
|
424
|
+
size: '1024x1024',
|
|
425
|
+
providerOptions: {
|
|
426
|
+
runpod: {
|
|
427
|
+
loras: [
|
|
428
|
+
{
|
|
429
|
+
path: 'https://huggingface.co/flymy-ai/qwen-image-anime-irl-lora/resolve/main/flymy_anime_irl.safetensors',
|
|
430
|
+
scale: 1,
|
|
431
|
+
},
|
|
432
|
+
],
|
|
433
|
+
},
|
|
434
|
+
},
|
|
435
|
+
});
|
|
436
|
+
```
|
|
376
437
|
|
|
377
438
|
## Speech Models
|
|
378
439
|
|
|
@@ -474,7 +535,7 @@ const result = await generateSpeech({
|
|
|
474
535
|
text: 'Hello!',
|
|
475
536
|
providerOptions: {
|
|
476
537
|
runpod: {
|
|
477
|
-
voice_url: 'https://
|
|
538
|
+
voice_url: 'https://your-audio-host.com/your-voice-sample.wav', // 5-10s audio sample
|
|
478
539
|
},
|
|
479
540
|
},
|
|
480
541
|
});
|
|
@@ -504,6 +565,94 @@ const result = await generateSpeech({
|
|
|
504
565
|
});
|
|
505
566
|
```
|
|
506
567
|
|
|
568
|
+
## Transcription Models
|
|
569
|
+
|
|
570
|
+
Transcribe audio using the AI SDK's `experimental_transcribe` and `runpod.transcription(...)`:
|
|
571
|
+
|
|
572
|
+
```ts
|
|
573
|
+
import { runpod } from '@runpod/ai-sdk-provider';
|
|
574
|
+
import { experimental_transcribe as transcribe } from 'ai';
|
|
575
|
+
|
|
576
|
+
const result = await transcribe({
|
|
577
|
+
model: runpod.transcription('pruna/whisper-v3-large'),
|
|
578
|
+
audio: new URL('https://image.runpod.ai/demo/transcription-demo.wav'),
|
|
579
|
+
});
|
|
580
|
+
|
|
581
|
+
console.log(result.text);
|
|
582
|
+
```
|
|
583
|
+
|
|
584
|
+
**Returns:**
|
|
585
|
+
|
|
586
|
+
- `result.text` - Full transcription text
|
|
587
|
+
- `result.segments` - Array of segments with timing info
|
|
588
|
+
- `segment.text` - Segment text
|
|
589
|
+
- `segment.startSecond` - Start time in seconds
|
|
590
|
+
- `segment.endSecond` - End time in seconds
|
|
591
|
+
- `result.language` - Detected language code
|
|
592
|
+
- `result.durationInSeconds` - Audio duration
|
|
593
|
+
- `result.warnings` - Array of any warnings
|
|
594
|
+
- `result.providerMetadata.runpod.jobId` - RunPod job ID
|
|
595
|
+
|
|
596
|
+
### Audio Input
|
|
597
|
+
|
|
598
|
+
You can provide audio in several ways:
|
|
599
|
+
|
|
600
|
+
```ts
|
|
601
|
+
// URL (recommended for large files)
|
|
602
|
+
const result = await transcribe({
|
|
603
|
+
model: runpod.transcription('pruna/whisper-v3-large'),
|
|
604
|
+
audio: new URL('https://image.runpod.ai/demo/transcription-demo.wav'),
|
|
605
|
+
});
|
|
606
|
+
|
|
607
|
+
// Local file as Uint8Array
|
|
608
|
+
import { readFileSync } from 'fs';
|
|
609
|
+
const audioData = readFileSync('./audio.wav');
|
|
610
|
+
|
|
611
|
+
const result = await transcribe({
|
|
612
|
+
model: runpod.transcription('pruna/whisper-v3-large'),
|
|
613
|
+
audio: audioData,
|
|
614
|
+
});
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
### Examples
|
|
618
|
+
|
|
619
|
+
Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
|
|
620
|
+
|
|
621
|
+
### Supported Models
|
|
622
|
+
|
|
623
|
+
- `pruna/whisper-v3-large`
|
|
624
|
+
|
|
625
|
+
### Provider Options
|
|
626
|
+
|
|
627
|
+
Use `providerOptions.runpod` for model-specific parameters:
|
|
628
|
+
|
|
629
|
+
| Option | Type | Default | Description |
|
|
630
|
+
| ------------------- | --------- | ------- | ---------------------------------------------- |
|
|
631
|
+
| `audio` | `string` | - | URL to audio file (alternative to binary data) |
|
|
632
|
+
| `prompt` | `string` | - | Context prompt to guide transcription |
|
|
633
|
+
| `language` | `string` | Auto | ISO-639-1 language code (e.g., 'en', 'es') |
|
|
634
|
+
| `word_timestamps` | `boolean` | `false` | Include word-level timestamps |
|
|
635
|
+
| `translate` | `boolean` | `false` | Translate audio to English |
|
|
636
|
+
| `enable_vad` | `boolean` | `false` | Enable voice activity detection |
|
|
637
|
+
| `maxPollAttempts` | `number` | `120` | Max polling attempts |
|
|
638
|
+
| `pollIntervalMillis`| `number` | `2000` | Polling interval (ms) |
|
|
639
|
+
|
|
640
|
+
**Example (providerOptions):**
|
|
641
|
+
|
|
642
|
+
```ts
|
|
643
|
+
const result = await transcribe({
|
|
644
|
+
model: runpod.transcription('pruna/whisper-v3-large'),
|
|
645
|
+
audio: new URL('https://image.runpod.ai/demo/transcription-demo.wav'),
|
|
646
|
+
providerOptions: {
|
|
647
|
+
runpod: {
|
|
648
|
+
language: 'en',
|
|
649
|
+
prompt: 'This is a demo of audio transcription',
|
|
650
|
+
word_timestamps: true,
|
|
651
|
+
},
|
|
652
|
+
},
|
|
653
|
+
});
|
|
654
|
+
```
|
|
655
|
+
|
|
507
656
|
## About Runpod
|
|
508
657
|
|
|
509
658
|
[Runpod](https://runpod.io) is the foundation for developers to build, deploy, and scale custom AI systems.
|
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
1
|
+
import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -56,6 +56,14 @@ interface RunpodProvider {
|
|
|
56
56
|
Creates a speech model for speech generation.
|
|
57
57
|
*/
|
|
58
58
|
speech(modelId: string): SpeechModelV3;
|
|
59
|
+
/**
|
|
60
|
+
Creates a transcription model for audio transcription.
|
|
61
|
+
*/
|
|
62
|
+
transcriptionModel(modelId: string): TranscriptionModelV3;
|
|
63
|
+
/**
|
|
64
|
+
Creates a transcription model for audio transcription.
|
|
65
|
+
*/
|
|
66
|
+
transcription(modelId: string): TranscriptionModelV3;
|
|
59
67
|
}
|
|
60
68
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
61
69
|
declare const runpod: RunpodProvider;
|
|
@@ -64,7 +72,66 @@ type RunpodChatModelId = 'qwen/qwen3-32b-awq' | (string & {});
|
|
|
64
72
|
|
|
65
73
|
type RunpodCompletionModelId = 'qwen/qwen3-32b-awq' | (string & {});
|
|
66
74
|
|
|
67
|
-
type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'nano-banana-edit';
|
|
75
|
+
type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'google/nano-banana-edit' | 'nano-banana-edit';
|
|
76
|
+
|
|
77
|
+
type RunpodTranscriptionModelId = 'pruna/whisper-v3-large' | (string & {});
|
|
78
|
+
interface RunpodTranscriptionProviderOptions {
|
|
79
|
+
/**
|
|
80
|
+
* URL to audio file. Use this if you want to pass an audio URL directly
|
|
81
|
+
* instead of binary audio data.
|
|
82
|
+
*/
|
|
83
|
+
audio?: string;
|
|
84
|
+
/**
|
|
85
|
+
* Optional context prompt to guide the transcription (initial_prompt in Whisper).
|
|
86
|
+
*/
|
|
87
|
+
prompt?: string;
|
|
88
|
+
/**
|
|
89
|
+
* Alias for prompt - the initial prompt for the first window.
|
|
90
|
+
*/
|
|
91
|
+
initial_prompt?: string;
|
|
92
|
+
/**
|
|
93
|
+
* Language of the audio in ISO-639-1 format (e.g., 'en', 'es', 'fr').
|
|
94
|
+
* If not specified, Whisper will auto-detect the language.
|
|
95
|
+
*/
|
|
96
|
+
language?: string;
|
|
97
|
+
/**
|
|
98
|
+
* Whether to include word-level timestamps in the response.
|
|
99
|
+
* @default false
|
|
100
|
+
*/
|
|
101
|
+
word_timestamps?: boolean;
|
|
102
|
+
/**
|
|
103
|
+
* Whisper model to use.
|
|
104
|
+
* Options: 'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large-v3', 'turbo'
|
|
105
|
+
* @default 'base'
|
|
106
|
+
*/
|
|
107
|
+
model?: string;
|
|
108
|
+
/**
|
|
109
|
+
* Output format for transcription.
|
|
110
|
+
* Options: 'plain_text', 'formatted_text', 'srt', 'vtt'
|
|
111
|
+
* @default 'plain_text'
|
|
112
|
+
*/
|
|
113
|
+
transcription?: string;
|
|
114
|
+
/**
|
|
115
|
+
* Whether to translate the audio to English.
|
|
116
|
+
* @default false
|
|
117
|
+
*/
|
|
118
|
+
translate?: boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Whether to enable voice activity detection.
|
|
121
|
+
* @default false
|
|
122
|
+
*/
|
|
123
|
+
enable_vad?: boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Maximum number of polling attempts before timing out.
|
|
126
|
+
* @default 120
|
|
127
|
+
*/
|
|
128
|
+
maxPollAttempts?: number;
|
|
129
|
+
/**
|
|
130
|
+
* Interval between polling attempts in milliseconds.
|
|
131
|
+
* @default 2000
|
|
132
|
+
*/
|
|
133
|
+
pollIntervalMillis?: number;
|
|
134
|
+
}
|
|
68
135
|
|
|
69
136
|
declare const runpodImageErrorSchema: z.ZodObject<{
|
|
70
137
|
error: z.ZodOptional<z.ZodString>;
|
|
@@ -78,4 +145,4 @@ declare const runpodImageErrorSchema: z.ZodObject<{
|
|
|
78
145
|
}>;
|
|
79
146
|
type RunpodImageErrorData = z.infer<typeof runpodImageErrorSchema>;
|
|
80
147
|
|
|
81
|
-
export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, createRunpod, runpod };
|
|
148
|
+
export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, createRunpod, runpod };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
1
|
+
import { LanguageModelV3, ImageModelV3, SpeechModelV3, TranscriptionModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -56,6 +56,14 @@ interface RunpodProvider {
|
|
|
56
56
|
Creates a speech model for speech generation.
|
|
57
57
|
*/
|
|
58
58
|
speech(modelId: string): SpeechModelV3;
|
|
59
|
+
/**
|
|
60
|
+
Creates a transcription model for audio transcription.
|
|
61
|
+
*/
|
|
62
|
+
transcriptionModel(modelId: string): TranscriptionModelV3;
|
|
63
|
+
/**
|
|
64
|
+
Creates a transcription model for audio transcription.
|
|
65
|
+
*/
|
|
66
|
+
transcription(modelId: string): TranscriptionModelV3;
|
|
59
67
|
}
|
|
60
68
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
61
69
|
declare const runpod: RunpodProvider;
|
|
@@ -64,7 +72,66 @@ type RunpodChatModelId = 'qwen/qwen3-32b-awq' | (string & {});
|
|
|
64
72
|
|
|
65
73
|
type RunpodCompletionModelId = 'qwen/qwen3-32b-awq' | (string & {});
|
|
66
74
|
|
|
67
|
-
type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'nano-banana-edit';
|
|
75
|
+
type RunpodImageModelId = 'qwen/qwen-image' | 'qwen/qwen-image-edit' | 'qwen/qwen-image-edit-2511' | 'bytedance/seedream-3.0' | 'bytedance/seedream-4.0' | 'bytedance/seedream-4.0-edit' | 'black-forest-labs/flux-1-kontext-dev' | 'black-forest-labs/flux-1-schnell' | 'black-forest-labs/flux-1-dev' | 'alibaba/wan-2.6' | 'google/nano-banana-edit' | 'nano-banana-edit';
|
|
76
|
+
|
|
77
|
+
type RunpodTranscriptionModelId = 'pruna/whisper-v3-large' | (string & {});
|
|
78
|
+
interface RunpodTranscriptionProviderOptions {
|
|
79
|
+
/**
|
|
80
|
+
* URL to audio file. Use this if you want to pass an audio URL directly
|
|
81
|
+
* instead of binary audio data.
|
|
82
|
+
*/
|
|
83
|
+
audio?: string;
|
|
84
|
+
/**
|
|
85
|
+
* Optional context prompt to guide the transcription (initial_prompt in Whisper).
|
|
86
|
+
*/
|
|
87
|
+
prompt?: string;
|
|
88
|
+
/**
|
|
89
|
+
* Alias for prompt - the initial prompt for the first window.
|
|
90
|
+
*/
|
|
91
|
+
initial_prompt?: string;
|
|
92
|
+
/**
|
|
93
|
+
* Language of the audio in ISO-639-1 format (e.g., 'en', 'es', 'fr').
|
|
94
|
+
* If not specified, Whisper will auto-detect the language.
|
|
95
|
+
*/
|
|
96
|
+
language?: string;
|
|
97
|
+
/**
|
|
98
|
+
* Whether to include word-level timestamps in the response.
|
|
99
|
+
* @default false
|
|
100
|
+
*/
|
|
101
|
+
word_timestamps?: boolean;
|
|
102
|
+
/**
|
|
103
|
+
* Whisper model to use.
|
|
104
|
+
* Options: 'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large-v3', 'turbo'
|
|
105
|
+
* @default 'base'
|
|
106
|
+
*/
|
|
107
|
+
model?: string;
|
|
108
|
+
/**
|
|
109
|
+
* Output format for transcription.
|
|
110
|
+
* Options: 'plain_text', 'formatted_text', 'srt', 'vtt'
|
|
111
|
+
* @default 'plain_text'
|
|
112
|
+
*/
|
|
113
|
+
transcription?: string;
|
|
114
|
+
/**
|
|
115
|
+
* Whether to translate the audio to English.
|
|
116
|
+
* @default false
|
|
117
|
+
*/
|
|
118
|
+
translate?: boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Whether to enable voice activity detection.
|
|
121
|
+
* @default false
|
|
122
|
+
*/
|
|
123
|
+
enable_vad?: boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Maximum number of polling attempts before timing out.
|
|
126
|
+
* @default 120
|
|
127
|
+
*/
|
|
128
|
+
maxPollAttempts?: number;
|
|
129
|
+
/**
|
|
130
|
+
* Interval between polling attempts in milliseconds.
|
|
131
|
+
* @default 2000
|
|
132
|
+
*/
|
|
133
|
+
pollIntervalMillis?: number;
|
|
134
|
+
}
|
|
68
135
|
|
|
69
136
|
declare const runpodImageErrorSchema: z.ZodObject<{
|
|
70
137
|
error: z.ZodOptional<z.ZodString>;
|
|
@@ -78,4 +145,4 @@ declare const runpodImageErrorSchema: z.ZodObject<{
|
|
|
78
145
|
}>;
|
|
79
146
|
type RunpodImageErrorData = z.infer<typeof runpodImageErrorSchema>;
|
|
80
147
|
|
|
81
|
-
export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, createRunpod, runpod };
|
|
148
|
+
export { type RunpodChatModelId, type RunpodCompletionModelId, type RunpodImageErrorData, type RunpodImageModelId, type RunpodProvider, type RunpodProviderSettings, type RunpodTranscriptionModelId, type RunpodTranscriptionProviderOptions, createRunpod, runpod };
|