@runpod/ai-sdk-provider 0.11.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +242 -109
- package/dist/index.d.mts +18 -6
- package/dist/index.d.ts +18 -6
- package/dist/index.js +251 -16
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +250 -15
- package/dist/index.mjs.map +1 -1
- package/package.json +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,47 @@
|
|
|
1
1
|
# @runpod/ai-sdk-provider
|
|
2
2
|
|
|
3
|
+
## 1.0.0
|
|
4
|
+
|
|
5
|
+
### Major Changes
|
|
6
|
+
|
|
7
|
+
- 7fa7435: Release 1.0.0 - AI SDK v6 Compatibility
|
|
8
|
+
|
|
9
|
+
This release marks the first stable version of the Runpod AI SDK Provider, coinciding with the AI SDK v6 release.
|
|
10
|
+
|
|
11
|
+
### Breaking Changes
|
|
12
|
+
- Now requires AI SDK v6 (`ai@6.x`) - users on AI SDK v5 must upgrade
|
|
13
|
+
- Updated to V3 provider interfaces (`LanguageModelV3`, `ImageModelV3`, `SpeechModelV3`)
|
|
14
|
+
|
|
15
|
+
### New Features
|
|
16
|
+
- **Standardized `files` parameter support**: Image models now accept the AI SDK standard `files` parameter for image editing, supporting both URLs and base64 data
|
|
17
|
+
- Legacy `providerOptions.runpod.images` still works but `files` is now the recommended approach
|
|
18
|
+
|
|
19
|
+
### Dependencies
|
|
20
|
+
- `@ai-sdk/provider`: ^2.0.0 → ^3.0.0
|
|
21
|
+
- `@ai-sdk/provider-utils`: ^3.0.5 → ^4.0.0
|
|
22
|
+
- `@ai-sdk/openai-compatible`: ^1.0.11 → ^2.0.0
|
|
23
|
+
|
|
24
|
+
### What's Unchanged
|
|
25
|
+
- Public API remains identical - no code changes required for users already on AI SDK v6
|
|
26
|
+
- All language models, image models, and speech models work the same way
|
|
27
|
+
|
|
28
|
+
### Patch Changes
|
|
29
|
+
|
|
30
|
+
- 7fa7435: docs: expand speech docs for chatterbox turbo
|
|
31
|
+
|
|
32
|
+
## 0.12.0
|
|
33
|
+
|
|
34
|
+
### Minor Changes
|
|
35
|
+
|
|
36
|
+
- dcc2cc5: Add support for speech generation with `resembleai/chatterbox-turbo` model:
|
|
37
|
+
- `speechModel()` and `speech()` methods for text-to-speech
|
|
38
|
+
- Voice cloning via URL (5-10 seconds of audio)
|
|
39
|
+
- 20 built-in voices
|
|
40
|
+
|
|
41
|
+
### Patch Changes
|
|
42
|
+
|
|
43
|
+
- ace58c2: Add comprehensive documentation for Pruna and Nano Banana Pro models, including all supported aspect ratios, resolutions, and output formats. Update examples to use standard AI SDK options where possible.
|
|
44
|
+
|
|
3
45
|
## 0.11.1
|
|
4
46
|
|
|
5
47
|
### Patch Changes
|
package/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Runpod AI SDK Provider
|
|
2
2
|
|
|
3
|
+

|
|
4
|
+
|
|
3
5
|
The **Runpod provider** for the [AI SDK](https://ai-sdk.dev/docs) contains language model and image generation support for [Runpod's](https://runpod.io) public endpoints.
|
|
4
6
|
|
|
5
7
|
## Setup
|
|
@@ -104,7 +106,11 @@ for await (const delta of textStream) {
|
|
|
104
106
|
}
|
|
105
107
|
```
|
|
106
108
|
|
|
107
|
-
###
|
|
109
|
+
### Examples
|
|
110
|
+
|
|
111
|
+
Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
|
|
112
|
+
|
|
113
|
+
### Supported Models
|
|
108
114
|
|
|
109
115
|
| Model ID | Description | Streaming | Object Generation | Tool Usage | Reasoning Notes |
|
|
110
116
|
| --------------------------------- | ------------------------------------------------------------------- | --------- | ----------------- | ---------- | ------------------------- |
|
|
@@ -196,23 +202,27 @@ console.log(result.success ? result.data : parsed);
|
|
|
196
202
|
|
|
197
203
|
## Image Models
|
|
198
204
|
|
|
199
|
-
|
|
205
|
+
With image models you can:
|
|
200
206
|
|
|
201
|
-
|
|
207
|
+
- **Text-to-image**: generate a new image from a text prompt.
|
|
208
|
+
- **Edit image**: transform an existing image by providing reference image(s).
|
|
209
|
+
|
|
210
|
+
All examples use the AI SDK's `experimental_generateImage` and `runpod.image(modelId)`.
|
|
211
|
+
|
|
212
|
+
### Text-to-Image
|
|
202
213
|
|
|
203
214
|
```ts
|
|
204
215
|
import { runpod } from '@runpod/ai-sdk-provider';
|
|
205
216
|
import { experimental_generateImage as generateImage } from 'ai';
|
|
217
|
+
import { writeFileSync } from 'fs';
|
|
206
218
|
|
|
207
219
|
const { image } = await generateImage({
|
|
208
|
-
model: runpod.
|
|
220
|
+
model: runpod.image('pruna/p-image-t2i'),
|
|
209
221
|
prompt: 'A serene mountain landscape at sunset',
|
|
210
222
|
aspectRatio: '4:3',
|
|
211
223
|
});
|
|
212
224
|
|
|
213
|
-
|
|
214
|
-
import { writeFileSync } from 'fs';
|
|
215
|
-
writeFileSync('landscape.jpg', image.uint8Array);
|
|
225
|
+
writeFileSync('image.png', image.uint8Array);
|
|
216
226
|
```
|
|
217
227
|
|
|
218
228
|
**Returns:**
|
|
@@ -222,154 +232,277 @@ writeFileSync('landscape.jpg', image.uint8Array);
|
|
|
222
232
|
- `image.mediaType` - MIME type ('image/jpeg' or 'image/png')
|
|
223
233
|
- `warnings` - Array of any warnings about unsupported parameters
|
|
224
234
|
|
|
225
|
-
###
|
|
226
|
-
|
|
227
|
-
| Model ID | Description | Supported Aspect Ratios |
|
|
228
|
-
| -------------------------------------- | ------------------------------- | ------------------------------------- |
|
|
229
|
-
| `bytedance/seedream-3.0` | Advanced text-to-image model | 1:1, 4:3, 3:4 |
|
|
230
|
-
| `bytedance/seedream-4.0` | Text-to-image (v4) | 1:1 (supports 1024, 2048, 4096) |
|
|
231
|
-
| `bytedance/seedream-4.0-edit` | Image editing (v4, multi-image) | 1:1 (supports 1024, 1536, 2048, 4096) |
|
|
232
|
-
| `black-forest-labs/flux-1-schnell` | Fast image generation (4 steps) | 1:1, 4:3, 3:4 |
|
|
233
|
-
| `black-forest-labs/flux-1-dev` | High-quality image generation | 1:1, 4:3, 3:4 |
|
|
234
|
-
| `black-forest-labs/flux-1-kontext-dev` | Context-aware image generation | 1:1, 4:3, 3:4 |
|
|
235
|
-
| `qwen/qwen-image` | Text-to-image generation | 1:1, 4:3, 3:4 |
|
|
236
|
-
| `qwen/qwen-image-edit` | Image editing (prompt-guided) | 1:1, 4:3, 3:4 |
|
|
237
|
-
| `nano-banana-edit` | Image editing (multi-image) | 1:1, 4:3, 3:4 |
|
|
238
|
-
| `google/nano-banana-pro-edit` | Image editing (Gemini-powered) | Uses resolution param (1k, 2k) |
|
|
239
|
-
| `pruna/p-image-t2i` | Pruna text-to-image | 1:1, 16:9, 9:16, 4:3, 3:4, etc. |
|
|
240
|
-
| `pruna/p-image-edit` | Pruna image editing | match_input_image, 1:1, 16:9, etc. |
|
|
241
|
-
|
|
242
|
-
**Note**: The provider uses strict validation for image parameters. Unsupported aspect ratios (like `16:9`, `9:16`, `3:2`, `2:3`) will throw an `InvalidArgumentError` with a clear message about supported alternatives.
|
|
235
|
+
### Edit Image
|
|
243
236
|
|
|
244
|
-
|
|
237
|
+
For editing, pass reference images via `prompt.images` (recommended). The AI SDK normalizes `prompt.images` into `files` for the provider call.
|
|
245
238
|
|
|
246
|
-
|
|
239
|
+
#### Single reference image (1 input image)
|
|
247
240
|
|
|
248
241
|
```ts
|
|
242
|
+
import { runpod } from '@runpod/ai-sdk-provider';
|
|
243
|
+
import { experimental_generateImage as generateImage } from 'ai';
|
|
244
|
+
|
|
249
245
|
const { image } = await generateImage({
|
|
250
|
-
model: runpod.
|
|
251
|
-
prompt:
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
providerOptions: {
|
|
255
|
-
runpod: {
|
|
256
|
-
negative_prompt: 'blurry, low quality',
|
|
257
|
-
enable_safety_checker: true,
|
|
258
|
-
},
|
|
246
|
+
model: runpod.image('pruna/p-image-edit'),
|
|
247
|
+
prompt: {
|
|
248
|
+
text: 'Virtual staging: add modern Scandinavian furniture: a gray sofa, wooden coffee table, potted plants, and warm lighting',
|
|
249
|
+
images: ['https://image.runpod.ai/demo/empty-room.png'],
|
|
259
250
|
},
|
|
251
|
+
aspectRatio: '16:9',
|
|
260
252
|
});
|
|
261
253
|
```
|
|
262
254
|
|
|
263
|
-
####
|
|
255
|
+
#### Multiple reference images (4 input images)
|
|
264
256
|
|
|
265
|
-
|
|
257
|
+
Note: Prior to v1.0.0, images were passed via `providerOptions.runpod.image` / `providerOptions.runpod.images`. This still works but `prompt.images` is now recommended.
|
|
266
258
|
|
|
267
259
|
```ts
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
model: runpod.imageModel('black-forest-labs/flux-1-kontext-dev'),
|
|
271
|
-
prompt: 'Transform this into a cyberpunk style with neon lights',
|
|
272
|
-
aspectRatio: '1:1',
|
|
273
|
-
providerOptions: {
|
|
274
|
-
runpod: {
|
|
275
|
-
image: 'https://example.com/input-image.jpg',
|
|
276
|
-
},
|
|
277
|
-
},
|
|
278
|
-
});
|
|
260
|
+
import { runpod } from '@runpod/ai-sdk-provider';
|
|
261
|
+
import { experimental_generateImage as generateImage } from 'ai';
|
|
279
262
|
|
|
280
|
-
// Example: Using base64 encoded image
|
|
281
263
|
const { image } = await generateImage({
|
|
282
|
-
model: runpod.
|
|
283
|
-
prompt:
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
264
|
+
model: runpod.image('google/nano-banana-pro-edit'),
|
|
265
|
+
prompt: {
|
|
266
|
+
text: 'Combine these four robot musicians into an epic band photo on a concert stage with dramatic lighting',
|
|
267
|
+
images: [
|
|
268
|
+
'https://image.runpod.ai/demo/robot-drummer.png',
|
|
269
|
+
'https://image.runpod.ai/demo/robot-guitarist.png',
|
|
270
|
+
'https://image.runpod.ai/demo/robot-bassist.png',
|
|
271
|
+
'https://image.runpod.ai/demo/robot-singer.png',
|
|
272
|
+
],
|
|
288
273
|
},
|
|
289
274
|
});
|
|
290
275
|
```
|
|
291
276
|
|
|
277
|
+
### Examples
|
|
278
|
+
|
|
279
|
+
Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
|
|
280
|
+
|
|
281
|
+
### Supported Models
|
|
282
|
+
|
|
283
|
+
| Model ID | Type |
|
|
284
|
+
| -------------------------------------- | ---- |
|
|
285
|
+
| `pruna/p-image-t2i` | t2i |
|
|
286
|
+
| `pruna/p-image-edit` | edit |
|
|
287
|
+
| `google/nano-banana-pro-edit` | edit |
|
|
288
|
+
| `bytedance/seedream-3.0` | t2i |
|
|
289
|
+
| `bytedance/seedream-4.0` | t2i |
|
|
290
|
+
| `bytedance/seedream-4.0-edit` | edit |
|
|
291
|
+
| `qwen/qwen-image` | t2i |
|
|
292
|
+
| `qwen/qwen-image-edit` | edit |
|
|
293
|
+
| `nano-banana-edit` | edit |
|
|
294
|
+
| `black-forest-labs/flux-1-schnell` | t2i |
|
|
295
|
+
| `black-forest-labs/flux-1-dev` | t2i |
|
|
296
|
+
| `black-forest-labs/flux-1-kontext-dev` | edit |
|
|
297
|
+
|
|
298
|
+
For the full list of models, see the [Runpod Public Endpoint Reference](https://docs.runpod.io/hub/public-endpoint-reference).
|
|
299
|
+
|
|
300
|
+
### Provider Options
|
|
301
|
+
|
|
302
|
+
Additional options through `providerOptions.runpod` (supported options depend on the model):
|
|
303
|
+
|
|
304
|
+
| Option | Type | Default | Description |
|
|
305
|
+
| ------------------------ | ---------- | ------- | ----------------------------------------------------------- |
|
|
306
|
+
| `negative_prompt` | `string` | `""` | What to avoid in the image (model-dependent) |
|
|
307
|
+
| `enable_safety_checker` | `boolean` | `true` | Content safety filtering (model-dependent) |
|
|
308
|
+
| `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna) |
|
|
309
|
+
| `aspect_ratio` | `string` | - | Model-specific aspect ratio (Pruna: supports `custom`) |
|
|
310
|
+
| `image` | `string` | - | Legacy: Single input image URL/base64 (use `prompt.images`) |
|
|
311
|
+
| `images` | `string[]` | - | Legacy: Multiple input images (use `prompt.images`) |
|
|
312
|
+
| `resolution` | `string` | `"1k"` | Output resolution: 1k, 2k, 4k (Nano Banana Pro) |
|
|
313
|
+
| `width` / `height` | `number` | - | Custom dimensions (Pruna t2i, 256-1440; multiples of 16) |
|
|
314
|
+
| `num_inference_steps` | `number` | Auto | Denoising steps (model-dependent) |
|
|
315
|
+
| `guidance` | `number` | Auto | Prompt adherence strength (model-dependent) |
|
|
316
|
+
| `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp (model-dependent) |
|
|
317
|
+
| `maxPollAttempts` | `number` | `60` | Max polling attempts |
|
|
318
|
+
| `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
|
|
319
|
+
|
|
320
|
+
**Example (providerOptions):**
|
|
321
|
+
|
|
292
322
|
```ts
|
|
293
|
-
// Example: Combine multiple images using Nano Banana edit
|
|
294
323
|
const { image } = await generateImage({
|
|
295
|
-
model: runpod.
|
|
296
|
-
prompt:
|
|
297
|
-
|
|
298
|
-
|
|
324
|
+
model: runpod.image('bytedance/seedream-3.0'),
|
|
325
|
+
prompt: 'A sunset over mountains',
|
|
326
|
+
size: '1328x1328',
|
|
327
|
+
seed: 42,
|
|
299
328
|
providerOptions: {
|
|
300
329
|
runpod: {
|
|
301
|
-
|
|
302
|
-
'https://image.runpod.ai/uploads/0bz_xzhuLq/a2166199-5bd5-496b-b9ab-a8bae3f73bdc.jpg',
|
|
303
|
-
'https://image.runpod.ai/uploads/Yw86rhY6xi/2ff8435f-f416-4096-9a4d-2f8c838b2d53.jpg',
|
|
304
|
-
'https://image.runpod.ai/uploads/bpCCX9zLY8/3bc27605-6f9a-40ad-83e9-c29bed45fed9.jpg',
|
|
305
|
-
'https://image.runpod.ai/uploads/LPHEY6pyHp/f950ceb8-fafa-4800-bdf1-fd3fd684d843.jpg',
|
|
306
|
-
],
|
|
330
|
+
negative_prompt: 'blurry, low quality',
|
|
307
331
|
enable_safety_checker: true,
|
|
332
|
+
maxPollAttempts: 30,
|
|
333
|
+
pollIntervalMillis: 4000,
|
|
308
334
|
},
|
|
309
335
|
},
|
|
310
336
|
});
|
|
311
337
|
```
|
|
312
338
|
|
|
313
|
-
|
|
339
|
+
### Model-specific Notes
|
|
340
|
+
|
|
341
|
+
#### Pruna (p-image)
|
|
342
|
+
|
|
343
|
+
Supported models: `pruna/p-image-t2i`, `pruna/p-image-edit`
|
|
344
|
+
|
|
345
|
+
- **Text-to-image**: supports standard `aspectRatio` values; for custom dimensions, set `providerOptions.runpod.aspect_ratio = 'custom'` and provide `width`/`height`.
|
|
346
|
+
- **Edit image**: supports 1–5 input images via `prompt.images` (recommended) or `files`.
|
|
314
347
|
|
|
315
|
-
|
|
348
|
+
**Example: Custom Dimensions (t2i)**
|
|
316
349
|
|
|
317
350
|
```ts
|
|
318
|
-
// Full control over generation parameters
|
|
319
351
|
const { image } = await generateImage({
|
|
320
|
-
model: runpod.
|
|
321
|
-
prompt: 'A
|
|
322
|
-
size: '1328x1328',
|
|
323
|
-
seed: 42, // For reproducible results
|
|
352
|
+
model: runpod.image('pruna/p-image-t2i'),
|
|
353
|
+
prompt: 'A robot',
|
|
324
354
|
providerOptions: {
|
|
325
355
|
runpod: {
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
guidance: 3.5, // Stronger prompt adherence (default: 2)
|
|
330
|
-
output_format: 'png', // High quality format
|
|
331
|
-
// Polling settings for long generations
|
|
332
|
-
maxPollAttempts: 30,
|
|
333
|
-
pollIntervalMillis: 4000,
|
|
356
|
+
aspect_ratio: 'custom',
|
|
357
|
+
width: 512,
|
|
358
|
+
height: 768,
|
|
334
359
|
},
|
|
335
360
|
},
|
|
336
361
|
});
|
|
362
|
+
```
|
|
337
363
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
364
|
+
#### Google (Nano Banana Pro)
|
|
365
|
+
|
|
366
|
+
Supported model: `google/nano-banana-pro-edit`
|
|
367
|
+
|
|
368
|
+
| Parameter | Supported Values | Notes |
|
|
369
|
+
| :------------------------------ | :---------------------------------------------------------------- | :----------------------------------- |
|
|
370
|
+
| `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `21:9`, `9:21` | Standard AI SDK parameter |
|
|
371
|
+
| `resolution` | `1k`, `2k`, `4k` | Output resolution quality |
|
|
372
|
+
| `output_format` | `jpeg`, `png`, `webp` | Output image format |
|
|
373
|
+
| `prompt.images` | `string[]` | Recommended. Input image(s) to edit. |
|
|
374
|
+
| `files` | `ImageModelV3File[]` | Alternative (lower-level). |
|
|
375
|
+
| `providerOptions.runpod.images` | `string[]` | Legacy. Input image(s) to edit. |
|
|
376
|
+
|
|
377
|
+
## Speech Models
|
|
378
|
+
|
|
379
|
+
Generate speech using the AI SDK's `experimental_generateSpeech` and `runpod.speech(...)`:
|
|
380
|
+
|
|
381
|
+
```ts
|
|
382
|
+
import { runpod } from '@runpod/ai-sdk-provider';
|
|
383
|
+
import { experimental_generateSpeech as generateSpeech } from 'ai';
|
|
384
|
+
|
|
385
|
+
const result = await generateSpeech({
|
|
386
|
+
model: runpod.speech('resembleai/chatterbox-turbo'),
|
|
387
|
+
text: 'Hello from Runpod.',
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
// Save to filesystem:
|
|
391
|
+
import { writeFileSync } from 'fs';
|
|
392
|
+
writeFileSync('speech.wav', result.audio.uint8Array);
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
**Returns:**
|
|
396
|
+
|
|
397
|
+
- `result.audio` (`GeneratedAudioFile`)
|
|
398
|
+
- `result.audio.uint8Array` (binary audio)
|
|
399
|
+
- `result.audio.base64` (base64-encoded audio)
|
|
400
|
+
- `result.audio.mediaType` (e.g. `audio/wav`)
|
|
401
|
+
- `result.audio.format` (e.g. `wav`)
|
|
402
|
+
- `result.warnings` (e.g. unsupported parameters)
|
|
403
|
+
- `result.responses` (telemetry/debug metadata)
|
|
404
|
+
- `result.providerMetadata.runpod`
|
|
405
|
+
- `audioUrl` (public URL to the generated audio)
|
|
406
|
+
- `cost` (if available)
|
|
407
|
+
|
|
408
|
+
### Examples
|
|
409
|
+
|
|
410
|
+
Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
|
|
411
|
+
|
|
412
|
+
### Supported Models
|
|
413
|
+
|
|
414
|
+
- `resembleai/chatterbox-turbo`
|
|
415
|
+
|
|
416
|
+
### `resembleai/chatterbox-turbo`
|
|
417
|
+
|
|
418
|
+
#### Parameters
|
|
419
|
+
|
|
420
|
+
| Parameter | Type | Default | Description |
|
|
421
|
+
| --------- | -------- | -------- | ---------------------------------------- |
|
|
422
|
+
| `text` | `string` | - | Required. The text to convert to speech. |
|
|
423
|
+
| `voice` | `string` | `"lucy"` | Built-in voice name (see list below). |
|
|
424
|
+
|
|
425
|
+
#### Provider Options
|
|
426
|
+
|
|
427
|
+
Use `providerOptions.runpod` for model-specific parameters:
|
|
428
|
+
|
|
429
|
+
| Option | Type | Default | Description |
|
|
430
|
+
| ----------- | -------- | ------- | ------------------------------------------- |
|
|
431
|
+
| `voice_url` | `string` | - | URL to audio file (5–10s) for voice cloning |
|
|
432
|
+
| `voiceUrl` | `string` | - | Alias for `voice_url` |
|
|
433
|
+
|
|
434
|
+
> Note: If `voice_url` is provided, the built-in `voice` is ignored.
|
|
435
|
+
>
|
|
436
|
+
> Note: This speech endpoint currently returns WAV only; `outputFormat` is ignored.
|
|
437
|
+
|
|
438
|
+
#### Voices
|
|
439
|
+
|
|
440
|
+
`voice` selects one of the built-in voices (default: `lucy`):
|
|
441
|
+
|
|
442
|
+
```ts
|
|
443
|
+
[
|
|
444
|
+
'aaron',
|
|
445
|
+
'abigail',
|
|
446
|
+
'anaya',
|
|
447
|
+
'andy',
|
|
448
|
+
'archer',
|
|
449
|
+
'brian',
|
|
450
|
+
'chloe',
|
|
451
|
+
'dylan',
|
|
452
|
+
'emmanuel',
|
|
453
|
+
'ethan',
|
|
454
|
+
'evelyn',
|
|
455
|
+
'gavin',
|
|
456
|
+
'gordon',
|
|
457
|
+
'ivan',
|
|
458
|
+
'laura',
|
|
459
|
+
'lucy',
|
|
460
|
+
'madison',
|
|
461
|
+
'marisol',
|
|
462
|
+
'meera',
|
|
463
|
+
'walter',
|
|
464
|
+
];
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
#### Voice cloning (via URL)
|
|
468
|
+
|
|
469
|
+
Use `providerOptions.runpod.voice_url` (or `voiceUrl`) to clone a voice from a short reference audio (5–10s):
|
|
470
|
+
|
|
471
|
+
```ts
|
|
472
|
+
const result = await generateSpeech({
|
|
473
|
+
model: runpod.speech('resembleai/chatterbox-turbo'),
|
|
474
|
+
text: 'Hello!',
|
|
343
475
|
providerOptions: {
|
|
344
476
|
runpod: {
|
|
345
|
-
|
|
346
|
-
guidance: 10, // Higher guidance for simple prompts
|
|
347
|
-
output_format: 'jpg', // Smaller file size
|
|
477
|
+
voice_url: 'https://example.com/voice.wav',
|
|
348
478
|
},
|
|
349
479
|
},
|
|
350
480
|
});
|
|
351
481
|
```
|
|
352
482
|
|
|
353
|
-
|
|
483
|
+
#### Paralinguistic Tags
|
|
484
|
+
|
|
485
|
+
Include these tags inline with your text to trigger realistic vocal expressions:
|
|
486
|
+
|
|
487
|
+
| Tag | Effect |
|
|
488
|
+
| ---------------- | --------------- |
|
|
489
|
+
| `[clear throat]` | Throat clearing |
|
|
490
|
+
| `[sigh]` | Sighing |
|
|
491
|
+
| `[sush]` | Shushing |
|
|
492
|
+
| `[cough]` | Coughing |
|
|
493
|
+
| `[groan]` | Groaning |
|
|
494
|
+
| `[sniff]` | Sniffing |
|
|
495
|
+
| `[gasp]` | Gasping |
|
|
496
|
+
| `[chuckle]` | Chuckling |
|
|
497
|
+
| `[laugh]` | Laughing |
|
|
354
498
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
| `image` | `string` | - | Single input image: URL or base64 data URI (Flux Kontext) |
|
|
363
|
-
| `images` | `string[]` | - | Multiple input images (e.g., for `nano-banana-edit` multi-image editing) |
|
|
364
|
-
| `aspect_ratio` | `string` | `"1:1"` | Aspect ratio string (Pruna: "16:9", "match_input_image", etc.) |
|
|
365
|
-
| `resolution` | `string` | `"1k"` | Output resolution (Nano Banana Pro: "1k", "2k") |
|
|
366
|
-
| `num_inference_steps` | `number` | Auto | Number of denoising steps (Flux: 4 for schnell, 28 for others) |
|
|
367
|
-
| `guidance` | `number` | Auto | Guidance scale for prompt adherence (Flux: 7 for schnell, 2 for others) |
|
|
368
|
-
| `output_format` | `string` | `"png"` | Output image format ("png", "jpg", or "jpeg") |
|
|
369
|
-
| `enable_base64_output` | `boolean` | `false` | Return base64 instead of URL (Nano Banana Pro) |
|
|
370
|
-
| `enable_sync_mode` | `boolean` | `false` | Enable synchronous mode (some models) |
|
|
371
|
-
| `maxPollAttempts` | `number` | `60` | Maximum polling attempts for async generation |
|
|
372
|
-
| `pollIntervalMillis` | `number` | `5000` | Polling interval in milliseconds (5 seconds) |
|
|
499
|
+
```ts
|
|
500
|
+
const result = await generateSpeech({
|
|
501
|
+
model: runpod.speech('resembleai/chatterbox-turbo'),
|
|
502
|
+
text: `[sigh] I can't believe it worked! [laugh] This is amazing.`,
|
|
503
|
+
voice: 'lucy',
|
|
504
|
+
});
|
|
505
|
+
```
|
|
373
506
|
|
|
374
507
|
## About Runpod
|
|
375
508
|
|
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -27,23 +27,35 @@ interface RunpodProvider {
|
|
|
27
27
|
/**
|
|
28
28
|
Creates a model for text generation.
|
|
29
29
|
*/
|
|
30
|
-
(modelId: string):
|
|
30
|
+
(modelId: string): LanguageModelV3;
|
|
31
31
|
/**
|
|
32
32
|
Creates a chat model for text generation.
|
|
33
33
|
*/
|
|
34
|
-
chatModel(modelId: string):
|
|
34
|
+
chatModel(modelId: string): LanguageModelV3;
|
|
35
35
|
/**
|
|
36
36
|
Creates a chat model for text generation.
|
|
37
37
|
*/
|
|
38
|
-
languageModel(modelId: string):
|
|
38
|
+
languageModel(modelId: string): LanguageModelV3;
|
|
39
39
|
/**
|
|
40
40
|
Creates a completion model for text generation.
|
|
41
41
|
*/
|
|
42
|
-
completionModel(modelId: string):
|
|
42
|
+
completionModel(modelId: string): LanguageModelV3;
|
|
43
43
|
/**
|
|
44
44
|
Creates an image model for image generation.
|
|
45
45
|
*/
|
|
46
|
-
imageModel(modelId: string):
|
|
46
|
+
imageModel(modelId: string): ImageModelV3;
|
|
47
|
+
/**
|
|
48
|
+
Creates an image model for image generation.
|
|
49
|
+
*/
|
|
50
|
+
image(modelId: string): ImageModelV3;
|
|
51
|
+
/**
|
|
52
|
+
Creates a speech model for speech generation.
|
|
53
|
+
*/
|
|
54
|
+
speechModel(modelId: string): SpeechModelV3;
|
|
55
|
+
/**
|
|
56
|
+
Creates a speech model for speech generation.
|
|
57
|
+
*/
|
|
58
|
+
speech(modelId: string): SpeechModelV3;
|
|
47
59
|
}
|
|
48
60
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
49
61
|
declare const runpod: RunpodProvider;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -27,23 +27,35 @@ interface RunpodProvider {
|
|
|
27
27
|
/**
|
|
28
28
|
Creates a model for text generation.
|
|
29
29
|
*/
|
|
30
|
-
(modelId: string):
|
|
30
|
+
(modelId: string): LanguageModelV3;
|
|
31
31
|
/**
|
|
32
32
|
Creates a chat model for text generation.
|
|
33
33
|
*/
|
|
34
|
-
chatModel(modelId: string):
|
|
34
|
+
chatModel(modelId: string): LanguageModelV3;
|
|
35
35
|
/**
|
|
36
36
|
Creates a chat model for text generation.
|
|
37
37
|
*/
|
|
38
|
-
languageModel(modelId: string):
|
|
38
|
+
languageModel(modelId: string): LanguageModelV3;
|
|
39
39
|
/**
|
|
40
40
|
Creates a completion model for text generation.
|
|
41
41
|
*/
|
|
42
|
-
completionModel(modelId: string):
|
|
42
|
+
completionModel(modelId: string): LanguageModelV3;
|
|
43
43
|
/**
|
|
44
44
|
Creates an image model for image generation.
|
|
45
45
|
*/
|
|
46
|
-
imageModel(modelId: string):
|
|
46
|
+
imageModel(modelId: string): ImageModelV3;
|
|
47
|
+
/**
|
|
48
|
+
Creates an image model for image generation.
|
|
49
|
+
*/
|
|
50
|
+
image(modelId: string): ImageModelV3;
|
|
51
|
+
/**
|
|
52
|
+
Creates a speech model for speech generation.
|
|
53
|
+
*/
|
|
54
|
+
speechModel(modelId: string): SpeechModelV3;
|
|
55
|
+
/**
|
|
56
|
+
Creates a speech model for speech generation.
|
|
57
|
+
*/
|
|
58
|
+
speech(modelId: string): SpeechModelV3;
|
|
47
59
|
}
|
|
48
60
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
49
61
|
declare const runpod: RunpodProvider;
|