@runpod/ai-sdk-provider 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +178 -35
- package/dist/index.d.mts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +223 -36
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +222 -35
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,37 @@
|
|
|
1
1
|
# @runpod/ai-sdk-provider
|
|
2
2
|
|
|
3
|
+
## 0.12.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- dcc2cc5: Add support for speech generation with `resembleai/chatterbox-turbo` model:
|
|
8
|
+
- `speechModel()` and `speech()` methods for text-to-speech
|
|
9
|
+
- Voice cloning via URL (5-10 seconds of audio)
|
|
10
|
+
- 20 built-in voices
|
|
11
|
+
|
|
12
|
+
### Patch Changes
|
|
13
|
+
|
|
14
|
+
- ace58c2: Add comprehensive documentation for Pruna and Nano Banana Pro models, including all supported aspect ratios, resolutions, and output formats. Update examples to use standard AI SDK options where possible.
|
|
15
|
+
|
|
16
|
+
## 0.11.1
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- f6115ac: Fix Pruna and Nano Banana Pro model support for all aspect ratios:
|
|
21
|
+
|
|
22
|
+
Pruna models:
|
|
23
|
+
- Skip standard size/aspectRatio validation
|
|
24
|
+
- Support all t2i aspect ratios: 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, custom
|
|
25
|
+
- Support all edit aspect ratios: match_input_image, 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3
|
|
26
|
+
- Support custom width/height for t2i (256-1440, must be multiple of 16)
|
|
27
|
+
- Support 1-5 images for edit
|
|
28
|
+
|
|
29
|
+
Nano Banana Pro model:
|
|
30
|
+
- Skip standard size/aspectRatio validation
|
|
31
|
+
- Support all aspect ratios: 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, 21:9, 9:21
|
|
32
|
+
- Support resolution: 1k, 2k, 4k
|
|
33
|
+
- Support output_format: jpeg, png, webp
|
|
34
|
+
|
|
3
35
|
## 0.11.0
|
|
4
36
|
|
|
5
37
|
### Minor Changes
|
package/README.md
CHANGED
|
@@ -224,24 +224,69 @@ writeFileSync('landscape.jpg', image.uint8Array);
|
|
|
224
224
|
|
|
225
225
|
### Model Capabilities
|
|
226
226
|
|
|
227
|
-
| Model ID |
|
|
228
|
-
| -------------------------------------- |
|
|
229
|
-
| `bytedance/seedream-3.0` |
|
|
230
|
-
| `bytedance/seedream-4.0` |
|
|
231
|
-
| `bytedance/seedream-4.0-edit` |
|
|
232
|
-
| `black-forest-labs/flux-1-schnell` |
|
|
233
|
-
| `black-forest-labs/flux-1-dev` |
|
|
234
|
-
| `black-forest-labs/flux-1-kontext-dev` |
|
|
235
|
-
| `qwen/qwen-image` |
|
|
236
|
-
| `qwen/qwen-image-edit` |
|
|
237
|
-
| `nano-banana-edit` |
|
|
238
|
-
| `google/nano-banana-pro-edit` |
|
|
239
|
-
| `pruna/p-image-t2i` |
|
|
240
|
-
| `pruna/p-image-edit` |
|
|
241
|
-
|
|
242
|
-
|
|
227
|
+
| Model ID | Type |
|
|
228
|
+
| -------------------------------------- | ---- |
|
|
229
|
+
| `bytedance/seedream-3.0` | t2i |
|
|
230
|
+
| `bytedance/seedream-4.0` | t2i |
|
|
231
|
+
| `bytedance/seedream-4.0-edit` | edit |
|
|
232
|
+
| `black-forest-labs/flux-1-schnell` | t2i |
|
|
233
|
+
| `black-forest-labs/flux-1-dev` | t2i |
|
|
234
|
+
| `black-forest-labs/flux-1-kontext-dev` | edit |
|
|
235
|
+
| `qwen/qwen-image` | t2i |
|
|
236
|
+
| `qwen/qwen-image-edit` | edit |
|
|
237
|
+
| `nano-banana-edit` | edit |
|
|
238
|
+
| `google/nano-banana-pro-edit` | edit |
|
|
239
|
+
| `pruna/p-image-t2i` | t2i |
|
|
240
|
+
| `pruna/p-image-edit` | edit |
|
|
241
|
+
|
|
242
|
+
For the full list of models, see the [Runpod Public Endpoint Reference](https://docs.runpod.io/hub/public-endpoint-reference).
|
|
243
|
+
|
|
244
|
+
### Pruna Models
|
|
245
|
+
|
|
246
|
+
Supported models: `pruna/p-image-t2i`, `pruna/p-image-edit`
|
|
247
|
+
|
|
248
|
+
| Parameter | Supported Values | Notes |
|
|
249
|
+
| :---------------------------------------- | :------------------------------------------------ | :---------------------------------------------------- |
|
|
250
|
+
| `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | Standard AI SDK parameter |
|
|
251
|
+
| `aspectRatio` (t2i only) | `custom` | Requires `width` & `height` in providerOptions |
|
|
252
|
+
| `providerOptions.runpod.width` / `height` | `256` - `1440` | Custom dimensions (t2i only). Must be multiple of 16. |
|
|
253
|
+
| `providerOptions.runpod.images` | `string[]` | Required for `p-image-edit`. Supports 1-5 images. |
|
|
254
|
+
|
|
255
|
+
**Example: Custom Resolution (t2i)**
|
|
243
256
|
|
|
244
|
-
|
|
257
|
+
```ts
|
|
258
|
+
const { image } = await generateImage({
|
|
259
|
+
model: runpod.imageModel('pruna/p-image-t2i'),
|
|
260
|
+
prompt: 'A robot',
|
|
261
|
+
providerOptions: {
|
|
262
|
+
runpod: {
|
|
263
|
+
aspect_ratio: 'custom',
|
|
264
|
+
width: 512,
|
|
265
|
+
height: 768,
|
|
266
|
+
},
|
|
267
|
+
},
|
|
268
|
+
});
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Google Models
|
|
272
|
+
|
|
273
|
+
#### Nano Banana Pro
|
|
274
|
+
|
|
275
|
+
Supported model: `google/nano-banana-pro-edit`
|
|
276
|
+
|
|
277
|
+
| Parameter | Supported Values | Notes |
|
|
278
|
+
| :------------------------------ | :---------------------------------------------------------------- | :-------------------------------- |
|
|
279
|
+
| `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `21:9`, `9:21` | Standard AI SDK parameter |
|
|
280
|
+
| `resolution` | `1k`, `2k`, `4k` | Output resolution quality |
|
|
281
|
+
| `output_format` | `jpeg`, `png`, `webp` | Output image format |
|
|
282
|
+
| `providerOptions.runpod.images` | `string[]` | Required. Input image(s) to edit. |
|
|
283
|
+
|
|
284
|
+
### Other Models
|
|
285
|
+
|
|
286
|
+
Most other models (Flux, Seedream, Qwen, etc.) support standard `1:1`, `4:3`, and `3:4` aspect ratios.
|
|
287
|
+
|
|
288
|
+
- **Flux models**: Support `num_inference_steps` and `guidance` settings.
|
|
289
|
+
- **Edit models**: Require an input image via `providerOptions.runpod.image` (single) or `images` (multiple).
|
|
245
290
|
|
|
246
291
|
### Advanced Parameters
|
|
247
292
|
|
|
@@ -352,24 +397,122 @@ const { image } = await generateImage({
|
|
|
352
397
|
|
|
353
398
|
### Provider Options
|
|
354
399
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
| Option | Type | Default | Description
|
|
358
|
-
| ------------------------ | ---------- | ------- |
|
|
359
|
-
| `negative_prompt` | `string` | `""` |
|
|
360
|
-
| `enable_safety_checker` | `boolean` | `true` |
|
|
361
|
-
| `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna
|
|
362
|
-
| `image` | `string` | - |
|
|
363
|
-
| `images` | `string[]` | - | Multiple input images (
|
|
364
|
-
| `
|
|
365
|
-
| `
|
|
366
|
-
| `num_inference_steps` | `number` | Auto |
|
|
367
|
-
| `guidance` | `number` | Auto |
|
|
368
|
-
| `output_format` | `string` | `"png"` | Output
|
|
369
|
-
| `
|
|
370
|
-
| `
|
|
371
|
-
|
|
372
|
-
|
|
400
|
+
Use `providerOptions.runpod` for model-specific parameters:
|
|
401
|
+
|
|
402
|
+
| Option | Type | Default | Description |
|
|
403
|
+
| ------------------------ | ---------- | ------- | ----------------------------------------------- |
|
|
404
|
+
| `negative_prompt` | `string` | `""` | What to avoid in the image |
|
|
405
|
+
| `enable_safety_checker` | `boolean` | `true` | Content safety filtering |
|
|
406
|
+
| `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna) |
|
|
407
|
+
| `image` | `string` | - | Input image URL or base64 (Flux Kontext) |
|
|
408
|
+
| `images` | `string[]` | - | Multiple input images (edit models) |
|
|
409
|
+
| `resolution` | `string` | `"1k"` | Output resolution: 1k, 2k, 4k (Nano Banana Pro) |
|
|
410
|
+
| `width` / `height` | `number` | - | Custom dimensions (Pruna t2i, 256-1440) |
|
|
411
|
+
| `num_inference_steps` | `number` | Auto | Denoising steps |
|
|
412
|
+
| `guidance` | `number` | Auto | Prompt adherence strength |
|
|
413
|
+
| `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp |
|
|
414
|
+
| `maxPollAttempts` | `number` | `60` | Max polling attempts |
|
|
415
|
+
| `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
|
|
416
|
+
|
|
417
|
+
## Speech
|
|
418
|
+
|
|
419
|
+
You can generate speech using the AI SDK's `experimental_generateSpeech` and a Runpod speech model created via `runpod.speechModel()` (or the shorthand `runpod.speech()`).
|
|
420
|
+
|
|
421
|
+
### Basic Usage
|
|
422
|
+
|
|
423
|
+
```ts
|
|
424
|
+
import { runpod } from '@runpod/ai-sdk-provider';
|
|
425
|
+
import { experimental_generateSpeech as generateSpeech } from 'ai';
|
|
426
|
+
|
|
427
|
+
const result = await generateSpeech({
|
|
428
|
+
model: runpod.speechModel('resembleai/chatterbox-turbo'),
|
|
429
|
+
text: 'Hello, this is Chatterbox Turbo running on Runpod.',
|
|
430
|
+
voice: 'lucy',
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
// Save to filesystem:
|
|
434
|
+
import { writeFileSync } from 'fs';
|
|
435
|
+
writeFileSync('speech.wav', result.audio.uint8Array);
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
**Returns:**
|
|
439
|
+
|
|
440
|
+
- `result.audio.uint8Array` - Binary audio data (efficient for processing/saving)
|
|
441
|
+
- `result.audio.base64` - Base64 encoded audio (useful for web embedding)
|
|
442
|
+
- `result.audio.mediaType` - MIME type (e.g. `audio/wav`)
|
|
443
|
+
- `result.audio.format` - Format (e.g. `wav`)
|
|
444
|
+
- `result.warnings` - Array of any warnings about unsupported parameters
|
|
445
|
+
- `result.providerMetadata.runpod.audioUrl` - Public URL to the generated audio
|
|
446
|
+
- `result.providerMetadata.runpod.cost` - Cost information (if available)
|
|
447
|
+
|
|
448
|
+
### Supported Models
|
|
449
|
+
|
|
450
|
+
Supported model: `resembleai/chatterbox-turbo`
|
|
451
|
+
|
|
452
|
+
### Parameters
|
|
453
|
+
|
|
454
|
+
| Parameter | Type | Default | Description |
|
|
455
|
+
| --------- | -------- | -------- | ---------------------------------------- |
|
|
456
|
+
| `text` | `string` | - | Required. The text to convert to speech. |
|
|
457
|
+
| `voice` | `string` | `"lucy"` | Built-in voice name (see list below). |
|
|
458
|
+
|
|
459
|
+
### Provider Options
|
|
460
|
+
|
|
461
|
+
Use `providerOptions.runpod` for model-specific parameters:
|
|
462
|
+
|
|
463
|
+
| Option | Type | Default | Description |
|
|
464
|
+
| ----------- | -------- | ------- | ------------------------------------------- |
|
|
465
|
+
| `voice_url` | `string` | - | URL to audio file (5–10s) for voice cloning |
|
|
466
|
+
| `voiceUrl` | `string` | - | Alias for `voice_url` |
|
|
467
|
+
|
|
468
|
+
> Note: If `voice_url` is provided, the built-in `voice` is ignored.
|
|
469
|
+
>
|
|
470
|
+
> Note: This speech endpoint currently returns WAV only; `outputFormat` is ignored.
|
|
471
|
+
|
|
472
|
+
### Voices
|
|
473
|
+
|
|
474
|
+
`voice` selects one of the built-in voices (default: `lucy`):
|
|
475
|
+
|
|
476
|
+
```ts
|
|
477
|
+
[
|
|
478
|
+
'aaron',
|
|
479
|
+
'abigail',
|
|
480
|
+
'anaya',
|
|
481
|
+
'andy',
|
|
482
|
+
'archer',
|
|
483
|
+
'brian',
|
|
484
|
+
'chloe',
|
|
485
|
+
'dylan',
|
|
486
|
+
'emmanuel',
|
|
487
|
+
'ethan',
|
|
488
|
+
'evelyn',
|
|
489
|
+
'gavin',
|
|
490
|
+
'gordon',
|
|
491
|
+
'ivan',
|
|
492
|
+
'laura',
|
|
493
|
+
'lucy',
|
|
494
|
+
'madison',
|
|
495
|
+
'marisol',
|
|
496
|
+
'meera',
|
|
497
|
+
'walter',
|
|
498
|
+
];
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
### Voice cloning (via URL)
|
|
502
|
+
|
|
503
|
+
You can provide a `voice_url` (5–10s audio) through `providerOptions.runpod`:
|
|
504
|
+
|
|
505
|
+
```ts
|
|
506
|
+
const result = await generateSpeech({
|
|
507
|
+
model: runpod.speech('resembleai/chatterbox-turbo'),
|
|
508
|
+
text: 'Hello!',
|
|
509
|
+
providerOptions: {
|
|
510
|
+
runpod: {
|
|
511
|
+
voice_url: 'https://example.com/voice.wav',
|
|
512
|
+
},
|
|
513
|
+
},
|
|
514
|
+
});
|
|
515
|
+
```
|
|
373
516
|
|
|
374
517
|
## About Runpod
|
|
375
518
|
|
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LanguageModelV2, ImageModelV2 } from '@ai-sdk/provider';
|
|
1
|
+
import { LanguageModelV2, ImageModelV2, SpeechModelV2 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -44,6 +44,14 @@ interface RunpodProvider {
|
|
|
44
44
|
Creates an image model for image generation.
|
|
45
45
|
*/
|
|
46
46
|
imageModel(modelId: string): ImageModelV2;
|
|
47
|
+
/**
|
|
48
|
+
Creates a speech model for speech generation.
|
|
49
|
+
*/
|
|
50
|
+
speechModel(modelId: string): SpeechModelV2;
|
|
51
|
+
/**
|
|
52
|
+
Creates a speech model for speech generation.
|
|
53
|
+
*/
|
|
54
|
+
speech(modelId: string): SpeechModelV2;
|
|
47
55
|
}
|
|
48
56
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
49
57
|
declare const runpod: RunpodProvider;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LanguageModelV2, ImageModelV2 } from '@ai-sdk/provider';
|
|
1
|
+
import { LanguageModelV2, ImageModelV2, SpeechModelV2 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
3
|
export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
|
|
4
4
|
import { z } from 'zod';
|
|
@@ -44,6 +44,14 @@ interface RunpodProvider {
|
|
|
44
44
|
Creates an image model for image generation.
|
|
45
45
|
*/
|
|
46
46
|
imageModel(modelId: string): ImageModelV2;
|
|
47
|
+
/**
|
|
48
|
+
Creates a speech model for speech generation.
|
|
49
|
+
*/
|
|
50
|
+
speechModel(modelId: string): SpeechModelV2;
|
|
51
|
+
/**
|
|
52
|
+
Creates a speech model for speech generation.
|
|
53
|
+
*/
|
|
54
|
+
speech(modelId: string): SpeechModelV2;
|
|
47
55
|
}
|
|
48
56
|
declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
|
|
49
57
|
declare const runpod: RunpodProvider;
|
package/dist/index.js
CHANGED
|
@@ -27,7 +27,7 @@ module.exports = __toCommonJS(index_exports);
|
|
|
27
27
|
|
|
28
28
|
// src/runpod-provider.ts
|
|
29
29
|
var import_openai_compatible = require("@ai-sdk/openai-compatible");
|
|
30
|
-
var
|
|
30
|
+
var import_provider_utils4 = require("@ai-sdk/provider-utils");
|
|
31
31
|
|
|
32
32
|
// src/runpod-image-model.ts
|
|
33
33
|
var import_provider_utils2 = require("@ai-sdk/provider-utils");
|
|
@@ -115,8 +115,12 @@ var RunpodImageModel = class {
|
|
|
115
115
|
abortSignal
|
|
116
116
|
}) {
|
|
117
117
|
const warnings = [];
|
|
118
|
+
const isPrunaModel = this.modelId.includes("pruna") || this.modelId.includes("p-image");
|
|
119
|
+
const isNanoBananaProModel = this.modelId.includes("nano-banana-pro");
|
|
118
120
|
let runpodSize;
|
|
119
|
-
if (
|
|
121
|
+
if (isPrunaModel || isNanoBananaProModel) {
|
|
122
|
+
runpodSize = aspectRatio || "1:1";
|
|
123
|
+
} else if (size) {
|
|
120
124
|
const runpodSizeCandidate = size.replace("x", "*");
|
|
121
125
|
if (!SUPPORTED_SIZES.has(runpodSizeCandidate)) {
|
|
122
126
|
throw new import_provider.InvalidArgumentError({
|
|
@@ -150,7 +154,8 @@ var RunpodImageModel = class {
|
|
|
150
154
|
prompt,
|
|
151
155
|
runpodSize,
|
|
152
156
|
seed,
|
|
153
|
-
providerOptions.runpod
|
|
157
|
+
providerOptions.runpod,
|
|
158
|
+
aspectRatio
|
|
154
159
|
);
|
|
155
160
|
const { value: response, responseHeaders } = await (0, import_provider_utils2.postJsonToApi)({
|
|
156
161
|
url: `${this.config.baseURL}/runsync`,
|
|
@@ -264,7 +269,7 @@ var RunpodImageModel = class {
|
|
|
264
269
|
`Image generation timed out after ${maxAttempts} attempts (${maxAttempts * pollInterval / 1e3}s)`
|
|
265
270
|
);
|
|
266
271
|
}
|
|
267
|
-
buildInputPayload(prompt, runpodSize, seed, runpodOptions) {
|
|
272
|
+
buildInputPayload(prompt, runpodSize, seed, runpodOptions, aspectRatio) {
|
|
268
273
|
const isFluxModel = this.modelId.includes("flux") || this.modelId.includes("black-forest-labs");
|
|
269
274
|
if (isFluxModel) {
|
|
270
275
|
const isKontext = this.modelId.includes("kontext");
|
|
@@ -300,50 +305,56 @@ var RunpodImageModel = class {
|
|
|
300
305
|
if (isPrunaModel) {
|
|
301
306
|
const isPrunaEdit = this.modelId.includes("edit");
|
|
302
307
|
if (isPrunaEdit) {
|
|
303
|
-
|
|
308
|
+
const editPayload = {
|
|
304
309
|
prompt,
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
disable_safety_checker: runpodOptions?.disable_safety_checker ?? false,
|
|
308
|
-
enable_sync_mode: runpodOptions?.enable_sync_mode ?? false,
|
|
309
|
-
...runpodOptions
|
|
310
|
+
aspect_ratio: runpodOptions?.aspect_ratio ?? aspectRatio ?? "1:1",
|
|
311
|
+
disable_safety_checker: runpodOptions?.disable_safety_checker ?? false
|
|
310
312
|
};
|
|
313
|
+
if (seed !== void 0) {
|
|
314
|
+
editPayload.seed = seed;
|
|
315
|
+
} else if (runpodOptions?.seed !== void 0) {
|
|
316
|
+
editPayload.seed = runpodOptions.seed;
|
|
317
|
+
}
|
|
318
|
+
if (runpodOptions?.images) {
|
|
319
|
+
editPayload.images = runpodOptions.images;
|
|
320
|
+
}
|
|
321
|
+
return editPayload;
|
|
311
322
|
} else {
|
|
312
|
-
const
|
|
313
|
-
"1328*1328": "1:1",
|
|
314
|
-
"1472*1140": "4:3",
|
|
315
|
-
"1140*1472": "3:4",
|
|
316
|
-
"512*512": "1:1",
|
|
317
|
-
"768*768": "1:1",
|
|
318
|
-
"1024*1024": "1:1",
|
|
319
|
-
"1536*1536": "1:1",
|
|
320
|
-
"2048*2048": "1:1",
|
|
321
|
-
"4096*4096": "1:1",
|
|
322
|
-
"512*768": "2:3",
|
|
323
|
-
"768*512": "3:2",
|
|
324
|
-
"1024*768": "4:3",
|
|
325
|
-
"768*1024": "3:4"
|
|
326
|
-
};
|
|
327
|
-
const aspectRatio = runpodOptions?.aspect_ratio ?? aspectRatioMap[runpodSize] ?? "1:1";
|
|
328
|
-
return {
|
|
323
|
+
const t2iPayload = {
|
|
329
324
|
prompt,
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
enable_safety_checker: runpodOptions?.enable_safety_checker ?? true,
|
|
333
|
-
...runpodOptions
|
|
325
|
+
aspect_ratio: runpodOptions?.aspect_ratio ?? aspectRatio ?? "1:1",
|
|
326
|
+
disable_safety_checker: runpodOptions?.disable_safety_checker ?? false
|
|
334
327
|
};
|
|
328
|
+
if (seed !== void 0) {
|
|
329
|
+
t2iPayload.seed = seed;
|
|
330
|
+
} else if (runpodOptions?.seed !== void 0) {
|
|
331
|
+
t2iPayload.seed = runpodOptions.seed;
|
|
332
|
+
}
|
|
333
|
+
if (t2iPayload.aspect_ratio === "custom") {
|
|
334
|
+
if (runpodOptions?.width) {
|
|
335
|
+
t2iPayload.width = runpodOptions.width;
|
|
336
|
+
}
|
|
337
|
+
if (runpodOptions?.height) {
|
|
338
|
+
t2iPayload.height = runpodOptions.height;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return t2iPayload;
|
|
335
342
|
}
|
|
336
343
|
}
|
|
337
344
|
const isNanaBananaProModel = this.modelId.includes("nano-banana-pro");
|
|
338
345
|
if (isNanaBananaProModel) {
|
|
339
|
-
|
|
346
|
+
const nanoBananaPayload = {
|
|
340
347
|
prompt,
|
|
348
|
+
aspect_ratio: runpodOptions?.aspect_ratio ?? aspectRatio ?? "1:1",
|
|
341
349
|
resolution: runpodOptions?.resolution ?? "1k",
|
|
342
350
|
output_format: runpodOptions?.output_format ?? "jpeg",
|
|
343
351
|
enable_base64_output: runpodOptions?.enable_base64_output ?? false,
|
|
344
|
-
enable_sync_mode: runpodOptions?.enable_sync_mode ?? false
|
|
345
|
-
...runpodOptions
|
|
352
|
+
enable_sync_mode: runpodOptions?.enable_sync_mode ?? false
|
|
346
353
|
};
|
|
354
|
+
if (runpodOptions?.images) {
|
|
355
|
+
nanoBananaPayload.images = runpodOptions.images;
|
|
356
|
+
}
|
|
357
|
+
return nanoBananaPayload;
|
|
347
358
|
}
|
|
348
359
|
return {
|
|
349
360
|
prompt,
|
|
@@ -381,6 +392,148 @@ var runpodImageStatusSchema = import_zod2.z.object({
|
|
|
381
392
|
// Error message if FAILED
|
|
382
393
|
});
|
|
383
394
|
|
|
395
|
+
// src/runpod-speech-model.ts
|
|
396
|
+
var import_provider_utils3 = require("@ai-sdk/provider-utils");
|
|
397
|
+
function isRecord(value) {
|
|
398
|
+
return typeof value === "object" && value !== null;
|
|
399
|
+
}
|
|
400
|
+
function replaceNewlinesWithSpaces(value) {
|
|
401
|
+
return value.replace(/[\r\n]+/g, " ");
|
|
402
|
+
}
|
|
403
|
+
var RunpodSpeechModel = class {
|
|
404
|
+
constructor(modelId, config) {
|
|
405
|
+
this.modelId = modelId;
|
|
406
|
+
this.config = config;
|
|
407
|
+
this.specificationVersion = "v2";
|
|
408
|
+
}
|
|
409
|
+
get provider() {
|
|
410
|
+
return this.config.provider;
|
|
411
|
+
}
|
|
412
|
+
getRunpodRunSyncUrl() {
|
|
413
|
+
const baseURL = (0, import_provider_utils3.withoutTrailingSlash)(this.config.baseURL) ?? this.config.baseURL;
|
|
414
|
+
if (baseURL.endsWith("/run") || baseURL.endsWith("/runsync")) {
|
|
415
|
+
return baseURL;
|
|
416
|
+
}
|
|
417
|
+
return `${baseURL}/runsync`;
|
|
418
|
+
}
|
|
419
|
+
async doGenerate(options) {
|
|
420
|
+
const currentDate = this.config._internal?.currentDate?.() ?? /* @__PURE__ */ new Date();
|
|
421
|
+
const warnings = [];
|
|
422
|
+
const {
|
|
423
|
+
text,
|
|
424
|
+
voice,
|
|
425
|
+
outputFormat,
|
|
426
|
+
instructions,
|
|
427
|
+
speed,
|
|
428
|
+
language,
|
|
429
|
+
providerOptions,
|
|
430
|
+
abortSignal,
|
|
431
|
+
headers
|
|
432
|
+
} = options;
|
|
433
|
+
if (outputFormat != null && outputFormat !== "wav") {
|
|
434
|
+
warnings.push({
|
|
435
|
+
type: "unsupported-setting",
|
|
436
|
+
setting: "outputFormat",
|
|
437
|
+
details: `Unsupported outputFormat: ${outputFormat}. This endpoint returns 'wav'.`
|
|
438
|
+
});
|
|
439
|
+
}
|
|
440
|
+
if (instructions != null) {
|
|
441
|
+
warnings.push({
|
|
442
|
+
type: "unsupported-setting",
|
|
443
|
+
setting: "instructions",
|
|
444
|
+
details: `Instructions are not supported by this speech endpoint.`
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
if (speed != null) {
|
|
448
|
+
warnings.push({
|
|
449
|
+
type: "unsupported-setting",
|
|
450
|
+
setting: "speed",
|
|
451
|
+
details: `Speed is not supported by this speech endpoint.`
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
if (language != null) {
|
|
455
|
+
warnings.push({
|
|
456
|
+
type: "unsupported-setting",
|
|
457
|
+
setting: "language",
|
|
458
|
+
details: `Language selection is not supported by this speech endpoint.`
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
const runpodProviderOptions = isRecord(providerOptions) ? providerOptions.runpod : void 0;
|
|
462
|
+
const voiceUrl = isRecord(runpodProviderOptions) && (typeof runpodProviderOptions.voice_url === "string" || typeof runpodProviderOptions.voiceUrl === "string") ? runpodProviderOptions.voice_url ?? runpodProviderOptions.voiceUrl ?? void 0 : void 0;
|
|
463
|
+
const input = {
|
|
464
|
+
prompt: replaceNewlinesWithSpaces(text)
|
|
465
|
+
};
|
|
466
|
+
if (voiceUrl) {
|
|
467
|
+
input.voice_url = voiceUrl;
|
|
468
|
+
} else if (voice) {
|
|
469
|
+
input.voice = voice;
|
|
470
|
+
}
|
|
471
|
+
const requestBody = { input };
|
|
472
|
+
const url = this.getRunpodRunSyncUrl();
|
|
473
|
+
const fetchFn = this.config.fetch ?? fetch;
|
|
474
|
+
const requestHeaders = {
|
|
475
|
+
"Content-Type": "application/json",
|
|
476
|
+
...this.config.headers()
|
|
477
|
+
};
|
|
478
|
+
if (headers) {
|
|
479
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
480
|
+
if (value != null) {
|
|
481
|
+
requestHeaders[key] = value;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
const response = await fetchFn(url, {
|
|
486
|
+
method: "POST",
|
|
487
|
+
headers: requestHeaders,
|
|
488
|
+
body: JSON.stringify(requestBody),
|
|
489
|
+
signal: abortSignal
|
|
490
|
+
});
|
|
491
|
+
const responseHeaders = Object.fromEntries(response.headers.entries());
|
|
492
|
+
const rawBodyText = await response.text();
|
|
493
|
+
let parsed = void 0;
|
|
494
|
+
try {
|
|
495
|
+
parsed = rawBodyText ? JSON.parse(rawBodyText) : void 0;
|
|
496
|
+
} catch {
|
|
497
|
+
}
|
|
498
|
+
if (!response.ok) {
|
|
499
|
+
const message = parsed && typeof parsed.error === "string" && parsed.error || rawBodyText || `HTTP ${response.status}`;
|
|
500
|
+
throw new Error(`Runpod speech request failed: ${message}`);
|
|
501
|
+
}
|
|
502
|
+
const output = parsed?.output ?? parsed;
|
|
503
|
+
const audioUrl = output?.audio_url;
|
|
504
|
+
if (typeof audioUrl !== "string" || audioUrl.length === 0) {
|
|
505
|
+
throw new Error("Runpod speech response did not include an audio_url.");
|
|
506
|
+
}
|
|
507
|
+
const audioResponse = await fetchFn(audioUrl, { signal: abortSignal });
|
|
508
|
+
if (!audioResponse.ok) {
|
|
509
|
+
throw new Error(
|
|
510
|
+
`Failed to download generated audio (${audioResponse.status}).`
|
|
511
|
+
);
|
|
512
|
+
}
|
|
513
|
+
const audio = new Uint8Array(await audioResponse.arrayBuffer());
|
|
514
|
+
const providerMetadata = {
|
|
515
|
+
runpod: {
|
|
516
|
+
audioUrl,
|
|
517
|
+
...typeof output?.cost === "number" ? { cost: output.cost } : {}
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
return {
|
|
521
|
+
audio,
|
|
522
|
+
warnings,
|
|
523
|
+
request: {
|
|
524
|
+
body: JSON.stringify(requestBody)
|
|
525
|
+
},
|
|
526
|
+
response: {
|
|
527
|
+
timestamp: currentDate,
|
|
528
|
+
modelId: this.modelId,
|
|
529
|
+
headers: responseHeaders,
|
|
530
|
+
body: rawBodyText
|
|
531
|
+
},
|
|
532
|
+
providerMetadata
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
};
|
|
536
|
+
|
|
384
537
|
// src/runpod-provider.ts
|
|
385
538
|
var MODEL_ID_TO_ENDPOINT_URL = {
|
|
386
539
|
"qwen/qwen3-32b-awq": "https://api.runpod.ai/v2/qwen3-32b-awq/openai/v1",
|
|
@@ -408,6 +561,9 @@ var IMAGE_MODEL_ID_TO_ENDPOINT_URL = {
|
|
|
408
561
|
"pruna/p-image-t2i": "https://api.runpod.ai/v2/p-image-t2i",
|
|
409
562
|
"pruna/p-image-edit": "https://api.runpod.ai/v2/p-image-edit"
|
|
410
563
|
};
|
|
564
|
+
var SPEECH_MODEL_ID_TO_ENDPOINT_URL = {
|
|
565
|
+
"resembleai/chatterbox-turbo": "https://api.runpod.ai/v2/chatterbox-turbo/"
|
|
566
|
+
};
|
|
411
567
|
var MODEL_ID_TO_OPENAI_NAME = {
|
|
412
568
|
"qwen/qwen3-32b-awq": "Qwen/Qwen3-32B-AWQ",
|
|
413
569
|
"deepcogito/cogito-671b-v2.1-fp8": "deepcogito/cogito-671b-v2.1-FP8",
|
|
@@ -417,9 +573,26 @@ function deriveEndpointURL(modelId) {
|
|
|
417
573
|
const normalized = modelId.replace(/\//g, "-");
|
|
418
574
|
return `https://api.runpod.ai/v2/${normalized}/openai/v1`;
|
|
419
575
|
}
|
|
576
|
+
function parseRunpodConsoleEndpointId(modelIdOrUrl) {
|
|
577
|
+
if (!modelIdOrUrl.startsWith("http")) {
|
|
578
|
+
return null;
|
|
579
|
+
}
|
|
580
|
+
try {
|
|
581
|
+
const url = new URL(modelIdOrUrl);
|
|
582
|
+
if (url.hostname !== "console.runpod.io") {
|
|
583
|
+
return null;
|
|
584
|
+
}
|
|
585
|
+
const parts = url.pathname.split("/").filter(Boolean);
|
|
586
|
+
const idx = parts.lastIndexOf("endpoint");
|
|
587
|
+
const endpointId = idx !== -1 ? parts[idx + 1] : void 0;
|
|
588
|
+
return endpointId || null;
|
|
589
|
+
} catch {
|
|
590
|
+
return null;
|
|
591
|
+
}
|
|
592
|
+
}
|
|
420
593
|
function createRunpod(options = {}) {
|
|
421
594
|
const getHeaders = () => ({
|
|
422
|
-
Authorization: `Bearer ${(0,
|
|
595
|
+
Authorization: `Bearer ${(0, import_provider_utils4.loadApiKey)({
|
|
423
596
|
apiKey: options.apiKey,
|
|
424
597
|
environmentVariableName: "RUNPOD_API_KEY",
|
|
425
598
|
description: "Runpod"
|
|
@@ -449,7 +622,7 @@ function createRunpod(options = {}) {
|
|
|
449
622
|
}
|
|
450
623
|
return {
|
|
451
624
|
provider: `runpod.${modelType}`,
|
|
452
|
-
url: ({ path }) => `${(0,
|
|
625
|
+
url: ({ path }) => `${(0, import_provider_utils4.withoutTrailingSlash)(baseURL)}${path}`,
|
|
453
626
|
headers: getHeaders,
|
|
454
627
|
fetch: runpodFetch
|
|
455
628
|
};
|
|
@@ -482,11 +655,25 @@ function createRunpod(options = {}) {
|
|
|
482
655
|
fetch: options.fetch
|
|
483
656
|
});
|
|
484
657
|
};
|
|
658
|
+
const createSpeechModel = (modelId) => {
|
|
659
|
+
const endpointIdFromConsole = parseRunpodConsoleEndpointId(modelId);
|
|
660
|
+
const normalizedModelId = endpointIdFromConsole ?? modelId;
|
|
661
|
+
const mappedBaseURL = SPEECH_MODEL_ID_TO_ENDPOINT_URL[normalizedModelId];
|
|
662
|
+
const baseURL = mappedBaseURL ?? (normalizedModelId.startsWith("http") ? normalizedModelId : `https://api.runpod.ai/v2/${normalizedModelId}`);
|
|
663
|
+
return new RunpodSpeechModel(normalizedModelId, {
|
|
664
|
+
provider: "runpod.speech",
|
|
665
|
+
baseURL,
|
|
666
|
+
headers: getHeaders,
|
|
667
|
+
fetch: runpodFetch
|
|
668
|
+
});
|
|
669
|
+
};
|
|
485
670
|
const provider = (modelId) => createChatModel(modelId);
|
|
486
671
|
provider.completionModel = createCompletionModel;
|
|
487
672
|
provider.languageModel = createChatModel;
|
|
488
673
|
provider.chatModel = createChatModel;
|
|
489
674
|
provider.imageModel = createImageModel;
|
|
675
|
+
provider.speechModel = createSpeechModel;
|
|
676
|
+
provider.speech = createSpeechModel;
|
|
490
677
|
return provider;
|
|
491
678
|
}
|
|
492
679
|
var runpod = createRunpod();
|