@runpod/ai-sdk-provider 0.12.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/README.md +158 -168
- package/dist/index.d.mts +12 -8
- package/dist/index.d.ts +12 -8
- package/dist/index.js +80 -21
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +80 -21
- package/dist/index.mjs.map +1 -1
- package/package.json +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,34 @@
|
|
|
1
1
|
# @runpod/ai-sdk-provider
|
|
2
2
|
|
|
3
|
+
## 1.0.0
|
|
4
|
+
|
|
5
|
+
### Major Changes
|
|
6
|
+
|
|
7
|
+
- 7fa7435: Release 1.0.0 - AI SDK v6 Compatibility
|
|
8
|
+
|
|
9
|
+
This release marks the first stable version of the Runpod AI SDK Provider, coinciding with the AI SDK v6 release.
|
|
10
|
+
|
|
11
|
+
### Breaking Changes
|
|
12
|
+
- Now requires AI SDK v6 (`ai@6.x`) - users on AI SDK v5 must upgrade
|
|
13
|
+
- Updated to V3 provider interfaces (`LanguageModelV3`, `ImageModelV3`, `SpeechModelV3`)
|
|
14
|
+
|
|
15
|
+
### New Features
|
|
16
|
+
- **Standardized `files` parameter support**: Image models now accept the AI SDK standard `files` parameter for image editing, supporting both URLs and base64 data
|
|
17
|
+
- Legacy `providerOptions.runpod.images` still works but `files` is now the recommended approach
|
|
18
|
+
|
|
19
|
+
### Dependencies
|
|
20
|
+
- `@ai-sdk/provider`: ^2.0.0 → ^3.0.0
|
|
21
|
+
- `@ai-sdk/provider-utils`: ^3.0.5 → ^4.0.0
|
|
22
|
+
- `@ai-sdk/openai-compatible`: ^1.0.11 → ^2.0.0
|
|
23
|
+
|
|
24
|
+
### What's Unchanged
|
|
25
|
+
- Public API remains identical - no code changes required for users already on AI SDK v6
|
|
26
|
+
- All language models, image models, and speech models work the same way
|
|
27
|
+
|
|
28
|
+
### Patch Changes
|
|
29
|
+
|
|
30
|
+
- 7fa7435: docs: expand speech docs for chatterbox turbo
|
|
31
|
+
|
|
3
32
|
## 0.12.0
|
|
4
33
|
|
|
5
34
|
### Minor Changes
|
package/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Runpod AI SDK Provider
|
|
2
2
|
|
|
3
|
+

|
|
4
|
+
|
|
3
5
|
The **Runpod provider** for the [AI SDK](https://ai-sdk.dev/docs) contains language model and image generation support for [Runpod's](https://runpod.io) public endpoints.
|
|
4
6
|
|
|
5
7
|
## Setup
|
|
@@ -104,7 +106,11 @@ for await (const delta of textStream) {
|
|
|
104
106
|
}
|
|
105
107
|
```
|
|
106
108
|
|
|
107
|
-
###
|
|
109
|
+
### Examples
|
|
110
|
+
|
|
111
|
+
Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
|
|
112
|
+
|
|
113
|
+
### Supported Models
|
|
108
114
|
|
|
109
115
|
| Model ID | Description | Streaming | Object Generation | Tool Usage | Reasoning Notes |
|
|
110
116
|
| --------------------------------- | ------------------------------------------------------------------- | --------- | ----------------- | ---------- | ------------------------- |
|
|
@@ -196,23 +202,27 @@ console.log(result.success ? result.data : parsed);
|
|
|
196
202
|
|
|
197
203
|
## Image Models
|
|
198
204
|
|
|
199
|
-
|
|
205
|
+
With image models you can:
|
|
206
|
+
|
|
207
|
+
- **Text-to-image**: generate a new image from a text prompt.
|
|
208
|
+
- **Edit image**: transform an existing image by providing reference image(s).
|
|
200
209
|
|
|
201
|
-
|
|
210
|
+
All examples use the AI SDK's `experimental_generateImage` and `runpod.image(modelId)`.
|
|
211
|
+
|
|
212
|
+
### Text-to-Image
|
|
202
213
|
|
|
203
214
|
```ts
|
|
204
215
|
import { runpod } from '@runpod/ai-sdk-provider';
|
|
205
216
|
import { experimental_generateImage as generateImage } from 'ai';
|
|
217
|
+
import { writeFileSync } from 'fs';
|
|
206
218
|
|
|
207
219
|
const { image } = await generateImage({
|
|
208
|
-
model: runpod.
|
|
220
|
+
model: runpod.image('pruna/p-image-t2i'),
|
|
209
221
|
prompt: 'A serene mountain landscape at sunset',
|
|
210
222
|
aspectRatio: '4:3',
|
|
211
223
|
});
|
|
212
224
|
|
|
213
|
-
|
|
214
|
-
import { writeFileSync } from 'fs';
|
|
215
|
-
writeFileSync('landscape.jpg', image.uint8Array);
|
|
225
|
+
writeFileSync('image.png', image.uint8Array);
|
|
216
226
|
```
|
|
217
227
|
|
|
218
228
|
**Returns:**
|
|
@@ -222,77 +232,96 @@ writeFileSync('landscape.jpg', image.uint8Array);
|
|
|
222
232
|
- `image.mediaType` - MIME type ('image/jpeg' or 'image/png')
|
|
223
233
|
- `warnings` - Array of any warnings about unsupported parameters
|
|
224
234
|
|
|
225
|
-
###
|
|
235
|
+
### Edit Image
|
|
226
236
|
|
|
227
|
-
|
|
228
|
-
| -------------------------------------- | ---- |
|
|
229
|
-
| `bytedance/seedream-3.0` | t2i |
|
|
230
|
-
| `bytedance/seedream-4.0` | t2i |
|
|
231
|
-
| `bytedance/seedream-4.0-edit` | edit |
|
|
232
|
-
| `black-forest-labs/flux-1-schnell` | t2i |
|
|
233
|
-
| `black-forest-labs/flux-1-dev` | t2i |
|
|
234
|
-
| `black-forest-labs/flux-1-kontext-dev` | edit |
|
|
235
|
-
| `qwen/qwen-image` | t2i |
|
|
236
|
-
| `qwen/qwen-image-edit` | edit |
|
|
237
|
-
| `nano-banana-edit` | edit |
|
|
238
|
-
| `google/nano-banana-pro-edit` | edit |
|
|
239
|
-
| `pruna/p-image-t2i` | t2i |
|
|
240
|
-
| `pruna/p-image-edit` | edit |
|
|
237
|
+
For editing, pass reference images via `prompt.images` (recommended). The AI SDK normalizes `prompt.images` into `files` for the provider call.
|
|
241
238
|
|
|
242
|
-
|
|
239
|
+
#### Single reference image (1 input image)
|
|
243
240
|
|
|
244
|
-
|
|
241
|
+
```ts
|
|
242
|
+
import { runpod } from '@runpod/ai-sdk-provider';
|
|
243
|
+
import { experimental_generateImage as generateImage } from 'ai';
|
|
245
244
|
|
|
246
|
-
|
|
245
|
+
const { image } = await generateImage({
|
|
246
|
+
model: runpod.image('pruna/p-image-edit'),
|
|
247
|
+
prompt: {
|
|
248
|
+
text: 'Virtual staging: add modern Scandinavian furniture: a gray sofa, wooden coffee table, potted plants, and warm lighting',
|
|
249
|
+
images: ['https://image.runpod.ai/demo/empty-room.png'],
|
|
250
|
+
},
|
|
251
|
+
aspectRatio: '16:9',
|
|
252
|
+
});
|
|
253
|
+
```
|
|
247
254
|
|
|
248
|
-
|
|
249
|
-
| :---------------------------------------- | :------------------------------------------------ | :---------------------------------------------------- |
|
|
250
|
-
| `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | Standard AI SDK parameter |
|
|
251
|
-
| `aspectRatio` (t2i only) | `custom` | Requires `width` & `height` in providerOptions |
|
|
252
|
-
| `providerOptions.runpod.width` / `height` | `256` - `1440` | Custom dimensions (t2i only). Must be multiple of 16. |
|
|
253
|
-
| `providerOptions.runpod.images` | `string[]` | Required for `p-image-edit`. Supports 1-5 images. |
|
|
255
|
+
#### Multiple reference images (4 input images)
|
|
254
256
|
|
|
255
|
-
|
|
257
|
+
Note: Prior to v1.0.0, images were passed via `providerOptions.runpod.image` / `providerOptions.runpod.images`. This still works but `prompt.images` is now recommended.
|
|
256
258
|
|
|
257
259
|
```ts
|
|
260
|
+
import { runpod } from '@runpod/ai-sdk-provider';
|
|
261
|
+
import { experimental_generateImage as generateImage } from 'ai';
|
|
262
|
+
|
|
258
263
|
const { image } = await generateImage({
|
|
259
|
-
model: runpod.
|
|
260
|
-
prompt:
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
264
|
+
model: runpod.image('google/nano-banana-pro-edit'),
|
|
265
|
+
prompt: {
|
|
266
|
+
text: 'Combine these four robot musicians into an epic band photo on a concert stage with dramatic lighting',
|
|
267
|
+
images: [
|
|
268
|
+
'https://image.runpod.ai/demo/robot-drummer.png',
|
|
269
|
+
'https://image.runpod.ai/demo/robot-guitarist.png',
|
|
270
|
+
'https://image.runpod.ai/demo/robot-bassist.png',
|
|
271
|
+
'https://image.runpod.ai/demo/robot-singer.png',
|
|
272
|
+
],
|
|
267
273
|
},
|
|
268
274
|
});
|
|
269
275
|
```
|
|
270
276
|
|
|
271
|
-
###
|
|
277
|
+
### Examples
|
|
272
278
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
Supported model: `google/nano-banana-pro-edit`
|
|
279
|
+
Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
|
|
276
280
|
|
|
277
|
-
|
|
278
|
-
| :------------------------------ | :---------------------------------------------------------------- | :-------------------------------- |
|
|
279
|
-
| `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `21:9`, `9:21` | Standard AI SDK parameter |
|
|
280
|
-
| `resolution` | `1k`, `2k`, `4k` | Output resolution quality |
|
|
281
|
-
| `output_format` | `jpeg`, `png`, `webp` | Output image format |
|
|
282
|
-
| `providerOptions.runpod.images` | `string[]` | Required. Input image(s) to edit. |
|
|
281
|
+
### Supported Models
|
|
283
282
|
|
|
284
|
-
|
|
283
|
+
| Model ID | Type |
|
|
284
|
+
| -------------------------------------- | ---- |
|
|
285
|
+
| `pruna/p-image-t2i` | t2i |
|
|
286
|
+
| `pruna/p-image-edit` | edit |
|
|
287
|
+
| `google/nano-banana-pro-edit` | edit |
|
|
288
|
+
| `bytedance/seedream-3.0` | t2i |
|
|
289
|
+
| `bytedance/seedream-4.0` | t2i |
|
|
290
|
+
| `bytedance/seedream-4.0-edit` | edit |
|
|
291
|
+
| `qwen/qwen-image` | t2i |
|
|
292
|
+
| `qwen/qwen-image-edit` | edit |
|
|
293
|
+
| `nano-banana-edit` | edit |
|
|
294
|
+
| `black-forest-labs/flux-1-schnell` | t2i |
|
|
295
|
+
| `black-forest-labs/flux-1-dev` | t2i |
|
|
296
|
+
| `black-forest-labs/flux-1-kontext-dev` | edit |
|
|
285
297
|
|
|
286
|
-
|
|
298
|
+
For the full list of models, see the [Runpod Public Endpoint Reference](https://docs.runpod.io/hub/public-endpoint-reference).
|
|
287
299
|
|
|
288
|
-
|
|
289
|
-
- **Edit models**: Require an input image via `providerOptions.runpod.image` (single) or `images` (multiple).
|
|
300
|
+
### Provider Options
|
|
290
301
|
|
|
291
|
-
|
|
302
|
+
Additional options through `providerOptions.runpod` (supported options depend on the model):
|
|
303
|
+
|
|
304
|
+
| Option | Type | Default | Description |
|
|
305
|
+
| ------------------------ | ---------- | ------- | ----------------------------------------------------------- |
|
|
306
|
+
| `negative_prompt` | `string` | `""` | What to avoid in the image (model-dependent) |
|
|
307
|
+
| `enable_safety_checker` | `boolean` | `true` | Content safety filtering (model-dependent) |
|
|
308
|
+
| `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna) |
|
|
309
|
+
| `aspect_ratio` | `string` | - | Model-specific aspect ratio (Pruna: supports `custom`) |
|
|
310
|
+
| `image` | `string` | - | Legacy: Single input image URL/base64 (use `prompt.images`) |
|
|
311
|
+
| `images` | `string[]` | - | Legacy: Multiple input images (use `prompt.images`) |
|
|
312
|
+
| `resolution` | `string` | `"1k"` | Output resolution: 1k, 2k, 4k (Nano Banana Pro) |
|
|
313
|
+
| `width` / `height` | `number` | - | Custom dimensions (Pruna t2i, 256-1440; multiples of 16) |
|
|
314
|
+
| `num_inference_steps` | `number` | Auto | Denoising steps (model-dependent) |
|
|
315
|
+
| `guidance` | `number` | Auto | Prompt adherence strength (model-dependent) |
|
|
316
|
+
| `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp (model-dependent) |
|
|
317
|
+
| `maxPollAttempts` | `number` | `60` | Max polling attempts |
|
|
318
|
+
| `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
|
|
319
|
+
|
|
320
|
+
**Example (providerOptions):**
|
|
292
321
|
|
|
293
322
|
```ts
|
|
294
323
|
const { image } = await generateImage({
|
|
295
|
-
model: runpod.
|
|
324
|
+
model: runpod.image('bytedance/seedream-3.0'),
|
|
296
325
|
prompt: 'A sunset over mountains',
|
|
297
326
|
size: '1328x1328',
|
|
298
327
|
seed: 42,
|
|
@@ -300,134 +329,62 @@ const { image } = await generateImage({
|
|
|
300
329
|
runpod: {
|
|
301
330
|
negative_prompt: 'blurry, low quality',
|
|
302
331
|
enable_safety_checker: true,
|
|
332
|
+
maxPollAttempts: 30,
|
|
333
|
+
pollIntervalMillis: 4000,
|
|
303
334
|
},
|
|
304
335
|
},
|
|
305
336
|
});
|
|
306
337
|
```
|
|
307
338
|
|
|
308
|
-
|
|
339
|
+
### Model-specific Notes
|
|
309
340
|
|
|
310
|
-
|
|
341
|
+
#### Pruna (p-image)
|
|
311
342
|
|
|
312
|
-
|
|
313
|
-
// Example: Transform existing image
|
|
314
|
-
const { image } = await generateImage({
|
|
315
|
-
model: runpod.imageModel('black-forest-labs/flux-1-kontext-dev'),
|
|
316
|
-
prompt: 'Transform this into a cyberpunk style with neon lights',
|
|
317
|
-
aspectRatio: '1:1',
|
|
318
|
-
providerOptions: {
|
|
319
|
-
runpod: {
|
|
320
|
-
image: 'https://example.com/input-image.jpg',
|
|
321
|
-
},
|
|
322
|
-
},
|
|
323
|
-
});
|
|
324
|
-
|
|
325
|
-
// Example: Using base64 encoded image
|
|
326
|
-
const { image } = await generateImage({
|
|
327
|
-
model: runpod.imageModel('black-forest-labs/flux-1-kontext-dev'),
|
|
328
|
-
prompt: 'Make this image look like a painting',
|
|
329
|
-
providerOptions: {
|
|
330
|
-
runpod: {
|
|
331
|
-
image: 'data:image/png;base64,iVBORw0KGgoAAAANS...',
|
|
332
|
-
},
|
|
333
|
-
},
|
|
334
|
-
});
|
|
335
|
-
```
|
|
336
|
-
|
|
337
|
-
```ts
|
|
338
|
-
// Example: Combine multiple images using Nano Banana edit
|
|
339
|
-
const { image } = await generateImage({
|
|
340
|
-
model: runpod.imageModel('nano-banana-edit'),
|
|
341
|
-
prompt:
|
|
342
|
-
'Combine these four images into a single realistic 3D character scene.',
|
|
343
|
-
// Defaults to 1:1; you can also set size: '1328x1328' or aspectRatio: '4:3'
|
|
344
|
-
providerOptions: {
|
|
345
|
-
runpod: {
|
|
346
|
-
images: [
|
|
347
|
-
'https://image.runpod.ai/uploads/0bz_xzhuLq/a2166199-5bd5-496b-b9ab-a8bae3f73bdc.jpg',
|
|
348
|
-
'https://image.runpod.ai/uploads/Yw86rhY6xi/2ff8435f-f416-4096-9a4d-2f8c838b2d53.jpg',
|
|
349
|
-
'https://image.runpod.ai/uploads/bpCCX9zLY8/3bc27605-6f9a-40ad-83e9-c29bed45fed9.jpg',
|
|
350
|
-
'https://image.runpod.ai/uploads/LPHEY6pyHp/f950ceb8-fafa-4800-bdf1-fd3fd684d843.jpg',
|
|
351
|
-
],
|
|
352
|
-
enable_safety_checker: true,
|
|
353
|
-
},
|
|
354
|
-
},
|
|
355
|
-
});
|
|
356
|
-
```
|
|
343
|
+
Supported models: `pruna/p-image-t2i`, `pruna/p-image-edit`
|
|
357
344
|
|
|
358
|
-
|
|
345
|
+
- **Text-to-image**: supports standard `aspectRatio` values; for custom dimensions, set `providerOptions.runpod.aspect_ratio = 'custom'` and provide `width`/`height`.
|
|
346
|
+
- **Edit image**: supports 1–5 input images via `prompt.images` (recommended) or `files`.
|
|
359
347
|
|
|
360
|
-
|
|
348
|
+
**Example: Custom Dimensions (t2i)**
|
|
361
349
|
|
|
362
350
|
```ts
|
|
363
|
-
// Full control over generation parameters
|
|
364
|
-
const { image } = await generateImage({
|
|
365
|
-
model: runpod.imageModel('black-forest-labs/flux-1-dev'),
|
|
366
|
-
prompt: 'A majestic dragon breathing fire in a medieval castle',
|
|
367
|
-
size: '1328x1328',
|
|
368
|
-
seed: 42, // For reproducible results
|
|
369
|
-
providerOptions: {
|
|
370
|
-
runpod: {
|
|
371
|
-
negative_prompt: 'blurry, low quality, distorted, ugly, bad anatomy',
|
|
372
|
-
enable_safety_checker: true,
|
|
373
|
-
num_inference_steps: 50, // Higher quality (default: 28)
|
|
374
|
-
guidance: 3.5, // Stronger prompt adherence (default: 2)
|
|
375
|
-
output_format: 'png', // High quality format
|
|
376
|
-
// Polling settings for long generations
|
|
377
|
-
maxPollAttempts: 30,
|
|
378
|
-
pollIntervalMillis: 4000,
|
|
379
|
-
},
|
|
380
|
-
},
|
|
381
|
-
});
|
|
382
|
-
|
|
383
|
-
// Fast generation with minimal steps
|
|
384
351
|
const { image } = await generateImage({
|
|
385
|
-
model: runpod.
|
|
386
|
-
prompt: 'A
|
|
387
|
-
aspectRatio: '1:1',
|
|
352
|
+
model: runpod.image('pruna/p-image-t2i'),
|
|
353
|
+
prompt: 'A robot',
|
|
388
354
|
providerOptions: {
|
|
389
355
|
runpod: {
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
356
|
+
aspect_ratio: 'custom',
|
|
357
|
+
width: 512,
|
|
358
|
+
height: 768,
|
|
393
359
|
},
|
|
394
360
|
},
|
|
395
361
|
});
|
|
396
362
|
```
|
|
397
363
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
Use `providerOptions.runpod` for model-specific parameters:
|
|
364
|
+
#### Google (Nano Banana Pro)
|
|
401
365
|
|
|
402
|
-
|
|
403
|
-
| ------------------------ | ---------- | ------- | ----------------------------------------------- |
|
|
404
|
-
| `negative_prompt` | `string` | `""` | What to avoid in the image |
|
|
405
|
-
| `enable_safety_checker` | `boolean` | `true` | Content safety filtering |
|
|
406
|
-
| `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna) |
|
|
407
|
-
| `image` | `string` | - | Input image URL or base64 (Flux Kontext) |
|
|
408
|
-
| `images` | `string[]` | - | Multiple input images (edit models) |
|
|
409
|
-
| `resolution` | `string` | `"1k"` | Output resolution: 1k, 2k, 4k (Nano Banana Pro) |
|
|
410
|
-
| `width` / `height` | `number` | - | Custom dimensions (Pruna t2i, 256-1440) |
|
|
411
|
-
| `num_inference_steps` | `number` | Auto | Denoising steps |
|
|
412
|
-
| `guidance` | `number` | Auto | Prompt adherence strength |
|
|
413
|
-
| `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp |
|
|
414
|
-
| `maxPollAttempts` | `number` | `60` | Max polling attempts |
|
|
415
|
-
| `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
|
|
366
|
+
Supported model: `google/nano-banana-pro-edit`
|
|
416
367
|
|
|
417
|
-
|
|
368
|
+
| Parameter | Supported Values | Notes |
|
|
369
|
+
| :------------------------------ | :---------------------------------------------------------------- | :----------------------------------- |
|
|
370
|
+
| `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `21:9`, `9:21` | Standard AI SDK parameter |
|
|
371
|
+
| `resolution` | `1k`, `2k`, `4k` | Output resolution quality |
|
|
372
|
+
| `output_format` | `jpeg`, `png`, `webp` | Output image format |
|
|
373
|
+
| `prompt.images` | `string[]` | Recommended. Input image(s) to edit. |
|
|
374
|
+
| `files` | `ImageModelV3File[]` | Alternative (lower-level). |
|
|
375
|
+
| `providerOptions.runpod.images` | `string[]` | Legacy. Input image(s) to edit. |
|
|
418
376
|
|
|
419
|
-
|
|
377
|
+
## Speech Models
|
|
420
378
|
|
|
421
|
-
|
|
379
|
+
Generate speech using the AI SDK's `experimental_generateSpeech` and `runpod.speech(...)`:
|
|
422
380
|
|
|
423
381
|
```ts
|
|
424
382
|
import { runpod } from '@runpod/ai-sdk-provider';
|
|
425
383
|
import { experimental_generateSpeech as generateSpeech } from 'ai';
|
|
426
384
|
|
|
427
385
|
const result = await generateSpeech({
|
|
428
|
-
model: runpod.
|
|
429
|
-
text: 'Hello
|
|
430
|
-
voice: 'lucy',
|
|
386
|
+
model: runpod.speech('resembleai/chatterbox-turbo'),
|
|
387
|
+
text: 'Hello from Runpod.',
|
|
431
388
|
});
|
|
432
389
|
|
|
433
390
|
// Save to filesystem:
|
|
@@ -437,26 +394,35 @@ writeFileSync('speech.wav', result.audio.uint8Array);
|
|
|
437
394
|
|
|
438
395
|
**Returns:**
|
|
439
396
|
|
|
440
|
-
- `result.audio
|
|
441
|
-
- `result.audio.
|
|
442
|
-
- `result.audio.
|
|
443
|
-
- `result.audio.
|
|
444
|
-
- `result.
|
|
445
|
-
- `result.
|
|
446
|
-
- `result.
|
|
397
|
+
- `result.audio` (`GeneratedAudioFile`)
|
|
398
|
+
- `result.audio.uint8Array` (binary audio)
|
|
399
|
+
- `result.audio.base64` (base64-encoded audio)
|
|
400
|
+
- `result.audio.mediaType` (e.g. `audio/wav`)
|
|
401
|
+
- `result.audio.format` (e.g. `wav`)
|
|
402
|
+
- `result.warnings` (e.g. unsupported parameters)
|
|
403
|
+
- `result.responses` (telemetry/debug metadata)
|
|
404
|
+
- `result.providerMetadata.runpod`
|
|
405
|
+
- `audioUrl` (public URL to the generated audio)
|
|
406
|
+
- `cost` (if available)
|
|
407
|
+
|
|
408
|
+
### Examples
|
|
409
|
+
|
|
410
|
+
Check out our [examples](https://github.com/runpod/examples/tree/main/ai-sdk/getting-started) for more code snippets on how to use all the different models.
|
|
447
411
|
|
|
448
412
|
### Supported Models
|
|
449
413
|
|
|
450
|
-
|
|
414
|
+
- `resembleai/chatterbox-turbo`
|
|
451
415
|
|
|
452
|
-
###
|
|
416
|
+
### `resembleai/chatterbox-turbo`
|
|
417
|
+
|
|
418
|
+
#### Parameters
|
|
453
419
|
|
|
454
420
|
| Parameter | Type | Default | Description |
|
|
455
421
|
| --------- | -------- | -------- | ---------------------------------------- |
|
|
456
422
|
| `text` | `string` | - | Required. The text to convert to speech. |
|
|
457
423
|
| `voice` | `string` | `"lucy"` | Built-in voice name (see list below). |
|
|
458
424
|
|
|
459
|
-
|
|
425
|
+
#### Provider Options
|
|
460
426
|
|
|
461
427
|
Use `providerOptions.runpod` for model-specific parameters:
|
|
462
428
|
|
|
@@ -469,7 +435,7 @@ Use `providerOptions.runpod` for model-specific parameters:
|
|
|
469
435
|
>
|
|
470
436
|
> Note: This speech endpoint currently returns WAV only; `outputFormat` is ignored.
|
|
471
437
|
|
|
472
|
-
|
|
438
|
+
#### Voices
|
|
473
439
|
|
|
474
440
|
`voice` selects one of the built-in voices (default: `lucy`):
|
|
475
441
|
|
|
@@ -498,9 +464,9 @@ Use `providerOptions.runpod` for model-specific parameters:
|
|
|
498
464
|
];
|
|
499
465
|
```
|
|
500
466
|
|
|
501
|
-
|
|
467
|
+
#### Voice cloning (via URL)
|
|
502
468
|
|
|
503
|
-
|
|
469
|
+
Use `providerOptions.runpod.voice_url` (or `voiceUrl`) to clone a voice from a short reference audio (5–10s):
|
|
504
470
|
|
|
505
471
|
```ts
|
|
506
472
|
const result = await generateSpeech({
|
|
@@ -514,6 +480,30 @@ const result = await generateSpeech({
|
|
|
514
480
|
});
|
|
515
481
|
```
|
|
516
482
|
|
|
483
|
+
#### Paralinguistic Tags
|
|
484
|
+
|
|
485
|
+
Include these tags inline with your text to trigger realistic vocal expressions:
|
|
486
|
+
|
|
487
|
+
| Tag | Effect |
|
|
488
|
+
| ---------------- | --------------- |
|
|
489
|
+
| `[clear throat]` | Throat clearing |
|
|
490
|
+
| `[sigh]` | Sighing |
|
|
491
|
+
| `[sush]` | Shushing |
|
|
492
|
+
| `[cough]` | Coughing |
|
|
493
|
+
| `[groan]` | Groaning |
|
|
494
|
+
| `[sniff]` | Sniffing |
|
|
495
|
+
| `[gasp]` | Gasping |
|
|
496
|
+
| `[chuckle]` | Chuckling |
|
|
497
|
+
| `[laugh]` | Laughing |
|
|
498
|
+
|
|
499
|
+
```ts
|
|
500
|
+
const result = await generateSpeech({
|
|
501
|
+
model: runpod.speech('resembleai/chatterbox-turbo'),
|
|
502
|
+
text: `[sigh] I can't believe it worked! [laugh] This is amazing.`,
|
|
503
|
+
voice: 'lucy',
|
|
504
|
+
});
|
|
505
|
+
```
|
|
506
|
+
|
|
517
507
|
## About Runpod
|
|
518
508
|
|
|
519
509
|
[Runpod](https://runpod.io) is the foundation for developers to build, deploy, and scale custom AI systems.
|
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -27,31 +27,35 @@ interface RunpodProvider {
|
|
|
27
27
|
/**
|
|
28
28
|
Creates a model for text generation.
|
|
29
29
|
*/
|
|
30
|
-
(modelId: string):
|
|
30
|
+
(modelId: string): LanguageModelV3;
|
|
31
31
|
/**
|
|
32
32
|
Creates a chat model for text generation.
|
|
33
33
|
*/
|
|
34
|
-
chatModel(modelId: string):
|
|
34
|
+
chatModel(modelId: string): LanguageModelV3;
|
|
35
35
|
/**
|
|
36
36
|
Creates a chat model for text generation.
|
|
37
37
|
*/
|
|
38
|
-
languageModel(modelId: string):
|
|
38
|
+
languageModel(modelId: string): LanguageModelV3;
|
|
39
39
|
/**
|
|
40
40
|
Creates a completion model for text generation.
|
|
41
41
|
*/
|
|
42
|
-
completionModel(modelId: string):
|
|
42
|
+
completionModel(modelId: string): LanguageModelV3;
|
|
43
43
|
/**
|
|
44
44
|
Creates an image model for image generation.
|
|
45
45
|
*/
|
|
46
|
-
imageModel(modelId: string):
|
|
46
|
+
imageModel(modelId: string): ImageModelV3;
|
|
47
|
+
/**
|
|
48
|
+
Creates an image model for image generation.
|
|
49
|
+
*/
|
|
50
|
+
image(modelId: string): ImageModelV3;
|
|
47
51
|
/**
|
|
48
52
|
Creates a speech model for speech generation.
|
|
49
53
|
*/
|
|
50
|
-
speechModel(modelId: string):
|
|
54
|
+
speechModel(modelId: string): SpeechModelV3;
|
|
51
55
|
/**
|
|
52
56
|
Creates a speech model for speech generation.
|
|
53
57
|
*/
|
|
54
|
-
speech(modelId: string):
|
|
58
|
+
speech(modelId: string): SpeechModelV3;
|
|
55
59
|
}
|
|
56
60
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
57
61
|
declare const runpod: RunpodProvider;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LanguageModelV3, ImageModelV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -27,31 +27,35 @@ interface RunpodProvider {
|
|
|
27
27
|
/**
|
|
28
28
|
Creates a model for text generation.
|
|
29
29
|
*/
|
|
30
|
-
(modelId: string):
|
|
30
|
+
(modelId: string): LanguageModelV3;
|
|
31
31
|
/**
|
|
32
32
|
Creates a chat model for text generation.
|
|
33
33
|
*/
|
|
34
|
-
chatModel(modelId: string):
|
|
34
|
+
chatModel(modelId: string): LanguageModelV3;
|
|
35
35
|
/**
|
|
36
36
|
Creates a chat model for text generation.
|
|
37
37
|
*/
|
|
38
|
-
languageModel(modelId: string):
|
|
38
|
+
languageModel(modelId: string): LanguageModelV3;
|
|
39
39
|
/**
|
|
40
40
|
Creates a completion model for text generation.
|
|
41
41
|
*/
|
|
42
|
-
completionModel(modelId: string):
|
|
42
|
+
completionModel(modelId: string): LanguageModelV3;
|
|
43
43
|
/**
|
|
44
44
|
Creates an image model for image generation.
|
|
45
45
|
*/
|
|
46
|
-
imageModel(modelId: string):
|
|
46
|
+
imageModel(modelId: string): ImageModelV3;
|
|
47
|
+
/**
|
|
48
|
+
Creates an image model for image generation.
|
|
49
|
+
*/
|
|
50
|
+
image(modelId: string): ImageModelV3;
|
|
47
51
|
/**
|
|
48
52
|
Creates a speech model for speech generation.
|
|
49
53
|
*/
|
|
50
|
-
speechModel(modelId: string):
|
|
54
|
+
speechModel(modelId: string): SpeechModelV3;
|
|
51
55
|
/**
|
|
52
56
|
Creates a speech model for speech generation.
|
|
53
57
|
*/
|
|
54
|
-
speech(modelId: string):
|
|
58
|
+
speech(modelId: string): SpeechModelV3;
|
|
55
59
|
}
|
|
56
60
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
57
61
|
declare const runpod: RunpodProvider;
|