@runpod/ai-sdk-provider 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,37 @@
1
1
  # @runpod/ai-sdk-provider
2
2
 
3
+ ## 0.12.0
4
+
5
+ ### Minor Changes
6
+
7
+ - dcc2cc5: Add support for speech generation with `resembleai/chatterbox-turbo` model:
8
+ - `speechModel()` and `speech()` methods for text-to-speech
9
+ - Voice cloning via URL (5-10 seconds of audio)
10
+ - 20 built-in voices
11
+
12
+ ### Patch Changes
13
+
14
+ - ace58c2: Add comprehensive documentation for Pruna and Nano Banana Pro models, including all supported aspect ratios, resolutions, and output formats. Update examples to use standard AI SDK options where possible.
15
+
16
+ ## 0.11.1
17
+
18
+ ### Patch Changes
19
+
20
+ - f6115ac: Fix Pruna and Nano Banana Pro model support for all aspect ratios:
21
+
22
+ Pruna models:
23
+ - Skip standard size/aspectRatio validation
24
+ - Support all t2i aspect ratios: 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, custom
25
+ - Support all edit aspect ratios: match_input_image, 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3
26
+ - Support custom width/height for t2i (256-1440, must be multiple of 16)
27
+ - Support 1-5 images for edit
28
+
29
+ Nano Banana Pro model:
30
+ - Skip standard size/aspectRatio validation
31
+ - Support all aspect ratios: 1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3, 21:9, 9:21
32
+ - Support resolution: 1k, 2k, 4k
33
+ - Support output_format: jpeg, png, webp
34
+
3
35
  ## 0.11.0
4
36
 
5
37
  ### Minor Changes
package/README.md CHANGED
@@ -224,24 +224,69 @@ writeFileSync('landscape.jpg', image.uint8Array);
224
224
 
225
225
  ### Model Capabilities
226
226
 
227
- | Model ID | Description | Supported Aspect Ratios |
228
- | -------------------------------------- | ------------------------------- | ------------------------------------- |
229
- | `bytedance/seedream-3.0` | Advanced text-to-image model | 1:1, 4:3, 3:4 |
230
- | `bytedance/seedream-4.0` | Text-to-image (v4) | 1:1 (supports 1024, 2048, 4096) |
231
- | `bytedance/seedream-4.0-edit` | Image editing (v4, multi-image) | 1:1 (supports 1024, 1536, 2048, 4096) |
232
- | `black-forest-labs/flux-1-schnell` | Fast image generation (4 steps) | 1:1, 4:3, 3:4 |
233
- | `black-forest-labs/flux-1-dev` | High-quality image generation | 1:1, 4:3, 3:4 |
234
- | `black-forest-labs/flux-1-kontext-dev` | Context-aware image generation | 1:1, 4:3, 3:4 |
235
- | `qwen/qwen-image` | Text-to-image generation | 1:1, 4:3, 3:4 |
236
- | `qwen/qwen-image-edit` | Image editing (prompt-guided) | 1:1, 4:3, 3:4 |
237
- | `nano-banana-edit` | Image editing (multi-image) | 1:1, 4:3, 3:4 |
238
- | `google/nano-banana-pro-edit` | Image editing (Gemini-powered) | Uses resolution param (1k, 2k) |
239
- | `pruna/p-image-t2i` | Pruna text-to-image | 1:1, 16:9, 9:16, 4:3, 3:4, etc. |
240
- | `pruna/p-image-edit` | Pruna image editing | match_input_image, 1:1, 16:9, etc. |
241
-
242
- **Note**: The provider uses strict validation for image parameters. Unsupported aspect ratios (like `16:9`, `9:16`, `3:2`, `2:3`) will throw an `InvalidArgumentError` with a clear message about supported alternatives.
227
+ | Model ID | Type |
228
+ | -------------------------------------- | ---- |
229
+ | `bytedance/seedream-3.0` | t2i |
230
+ | `bytedance/seedream-4.0` | t2i |
231
+ | `bytedance/seedream-4.0-edit` | edit |
232
+ | `black-forest-labs/flux-1-schnell` | t2i |
233
+ | `black-forest-labs/flux-1-dev` | t2i |
234
+ | `black-forest-labs/flux-1-kontext-dev` | edit |
235
+ | `qwen/qwen-image` | t2i |
236
+ | `qwen/qwen-image-edit` | edit |
237
+ | `nano-banana-edit` | edit |
238
+ | `google/nano-banana-pro-edit` | edit |
239
+ | `pruna/p-image-t2i` | t2i |
240
+ | `pruna/p-image-edit` | edit |
241
+
242
+ For the full list of models, see the [Runpod Public Endpoint Reference](https://docs.runpod.io/hub/public-endpoint-reference).
243
+
244
+ ### Pruna Models
245
+
246
+ Supported models: `pruna/p-image-t2i`, `pruna/p-image-edit`
247
+
248
+ | Parameter | Supported Values | Notes |
249
+ | :---------------------------------------- | :------------------------------------------------ | :---------------------------------------------------- |
250
+ | `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3` | Standard AI SDK parameter |
251
+ | `aspectRatio` (t2i only) | `custom` | Requires `width` & `height` in providerOptions |
252
+ | `providerOptions.runpod.width` / `height` | `256` - `1440` | Custom dimensions (t2i only). Must be multiple of 16. |
253
+ | `providerOptions.runpod.images` | `string[]` | Required for `p-image-edit`. Supports 1-5 images. |
254
+
255
+ **Example: Custom Resolution (t2i)**
243
256
 
244
- **Note:** This list is not complete. For a full list of all available models, see the [Runpod Public Endpoint Reference](https://docs.runpod.io/hub/public-endpoint-reference).
257
+ ```ts
258
+ const { image } = await generateImage({
259
+ model: runpod.imageModel('pruna/p-image-t2i'),
260
+ prompt: 'A robot',
261
+ providerOptions: {
262
+ runpod: {
263
+ aspect_ratio: 'custom',
264
+ width: 512,
265
+ height: 768,
266
+ },
267
+ },
268
+ });
269
+ ```
270
+
271
+ ### Google Models
272
+
273
+ #### Nano Banana Pro
274
+
275
+ Supported model: `google/nano-banana-pro-edit`
276
+
277
+ | Parameter | Supported Values | Notes |
278
+ | :------------------------------ | :---------------------------------------------------------------- | :-------------------------------- |
279
+ | `aspectRatio` | `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `21:9`, `9:21` | Standard AI SDK parameter |
280
+ | `resolution` | `1k`, `2k`, `4k` | Output resolution quality |
281
+ | `output_format` | `jpeg`, `png`, `webp` | Output image format |
282
+ | `providerOptions.runpod.images` | `string[]` | Required. Input image(s) to edit. |
283
+
284
+ ### Other Models
285
+
286
+ Most other models (Flux, Seedream, Qwen, etc.) support standard `1:1`, `4:3`, and `3:4` aspect ratios.
287
+
288
+ - **Flux models**: Support `num_inference_steps` and `guidance` settings.
289
+ - **Edit models**: Require an input image via `providerOptions.runpod.image` (single) or `images` (multiple).
245
290
 
246
291
  ### Advanced Parameters
247
292
 
@@ -352,24 +397,122 @@ const { image } = await generateImage({
352
397
 
353
398
  ### Provider Options
354
399
 
355
- Runpod image models support flexible provider options through the `providerOptions.runpod` object:
356
-
357
- | Option | Type | Default | Description |
358
- | ------------------------ | ---------- | ------- | ------------------------------------------------------------------------ |
359
- | `negative_prompt` | `string` | `""` | Text describing what you don't want in the image |
360
- | `enable_safety_checker` | `boolean` | `true` | Enable content safety filtering |
361
- | `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna models) |
362
- | `image` | `string` | - | Single input image: URL or base64 data URI (Flux Kontext) |
363
- | `images` | `string[]` | - | Multiple input images (e.g., for `nano-banana-edit` multi-image editing) |
364
- | `aspect_ratio` | `string` | `"1:1"` | Aspect ratio string (Pruna: "16:9", "match_input_image", etc.) |
365
- | `resolution` | `string` | `"1k"` | Output resolution (Nano Banana Pro: "1k", "2k") |
366
- | `num_inference_steps` | `number` | Auto | Number of denoising steps (Flux: 4 for schnell, 28 for others) |
367
- | `guidance` | `number` | Auto | Guidance scale for prompt adherence (Flux: 7 for schnell, 2 for others) |
368
- | `output_format` | `string` | `"png"` | Output image format ("png", "jpg", or "jpeg") |
369
- | `enable_base64_output` | `boolean` | `false` | Return base64 instead of URL (Nano Banana Pro) |
370
- | `enable_sync_mode` | `boolean` | `false` | Enable synchronous mode (some models) |
371
- | `maxPollAttempts` | `number` | `60` | Maximum polling attempts for async generation |
372
- | `pollIntervalMillis` | `number` | `5000` | Polling interval in milliseconds (5 seconds) |
400
+ Use `providerOptions.runpod` for model-specific parameters:
401
+
402
+ | Option | Type | Default | Description |
403
+ | ------------------------ | ---------- | ------- | ----------------------------------------------- |
404
+ | `negative_prompt` | `string` | `""` | What to avoid in the image |
405
+ | `enable_safety_checker` | `boolean` | `true` | Content safety filtering |
406
+ | `disable_safety_checker` | `boolean` | `false` | Disable safety checker (Pruna) |
407
+ | `image` | `string` | - | Input image URL or base64 (Flux Kontext) |
408
+ | `images` | `string[]` | - | Multiple input images (edit models) |
409
+ | `resolution` | `string` | `"1k"` | Output resolution: 1k, 2k, 4k (Nano Banana Pro) |
410
+ | `width` / `height` | `number` | - | Custom dimensions (Pruna t2i, 256-1440) |
411
+ | `num_inference_steps` | `number` | Auto | Denoising steps |
412
+ | `guidance` | `number` | Auto | Prompt adherence strength |
413
+ | `output_format` | `string` | `"png"` | Output format: png, jpg, jpeg, webp |
414
+ | `maxPollAttempts` | `number` | `60` | Max polling attempts |
415
+ | `pollIntervalMillis` | `number` | `5000` | Polling interval (ms) |
416
+
417
+ ## Speech
418
+
419
+ You can generate speech using the AI SDK's `experimental_generateSpeech` and a Runpod speech model created via `runpod.speechModel()` (or the shorthand `runpod.speech()`).
420
+
421
+ ### Basic Usage
422
+
423
+ ```ts
424
+ import { runpod } from '@runpod/ai-sdk-provider';
425
+ import { experimental_generateSpeech as generateSpeech } from 'ai';
426
+
427
+ const result = await generateSpeech({
428
+ model: runpod.speechModel('resembleai/chatterbox-turbo'),
429
+ text: 'Hello, this is Chatterbox Turbo running on Runpod.',
430
+ voice: 'lucy',
431
+ });
432
+
433
+ // Save to filesystem:
434
+ import { writeFileSync } from 'fs';
435
+ writeFileSync('speech.wav', result.audio.uint8Array);
436
+ ```
437
+
438
+ **Returns:**
439
+
440
+ - `result.audio.uint8Array` - Binary audio data (efficient for processing/saving)
441
+ - `result.audio.base64` - Base64 encoded audio (useful for web embedding)
442
+ - `result.audio.mediaType` - MIME type (e.g. `audio/wav`)
443
+ - `result.audio.format` - Format (e.g. `wav`)
444
+ - `result.warnings` - Array of any warnings about unsupported parameters
445
+ - `result.providerMetadata.runpod.audioUrl` - Public URL to the generated audio
446
+ - `result.providerMetadata.runpod.cost` - Cost information (if available)
447
+
448
+ ### Supported Models
449
+
450
+ Supported model: `resembleai/chatterbox-turbo`
451
+
452
+ ### Parameters
453
+
454
+ | Parameter | Type | Default | Description |
455
+ | --------- | -------- | -------- | ---------------------------------------- |
456
+ | `text` | `string` | - | Required. The text to convert to speech. |
457
+ | `voice` | `string` | `"lucy"` | Built-in voice name (see list below). |
458
+
459
+ ### Provider Options
460
+
461
+ Use `providerOptions.runpod` for model-specific parameters:
462
+
463
+ | Option | Type | Default | Description |
464
+ | ----------- | -------- | ------- | ------------------------------------------- |
465
+ | `voice_url` | `string` | - | URL to audio file (5–10s) for voice cloning |
466
+ | `voiceUrl` | `string` | - | Alias for `voice_url` |
467
+
468
+ > Note: If `voice_url` is provided, the built-in `voice` is ignored.
469
+ >
470
+ > Note: This speech endpoint currently returns WAV only; `outputFormat` is ignored.
471
+
472
+ ### Voices
473
+
474
+ `voice` selects one of the built-in voices (default: `lucy`):
475
+
476
+ ```ts
477
+ [
478
+ 'aaron',
479
+ 'abigail',
480
+ 'anaya',
481
+ 'andy',
482
+ 'archer',
483
+ 'brian',
484
+ 'chloe',
485
+ 'dylan',
486
+ 'emmanuel',
487
+ 'ethan',
488
+ 'evelyn',
489
+ 'gavin',
490
+ 'gordon',
491
+ 'ivan',
492
+ 'laura',
493
+ 'lucy',
494
+ 'madison',
495
+ 'marisol',
496
+ 'meera',
497
+ 'walter',
498
+ ];
499
+ ```
500
+
501
+ ### Voice cloning (via URL)
502
+
503
+ You can provide a `voice_url` (5–10s audio) through `providerOptions.runpod`:
504
+
505
+ ```ts
506
+ const result = await generateSpeech({
507
+ model: runpod.speech('resembleai/chatterbox-turbo'),
508
+ text: 'Hello!',
509
+ providerOptions: {
510
+ runpod: {
511
+ voice_url: 'https://example.com/voice.wav',
512
+ },
513
+ },
514
+ });
515
+ ```
373
516
 
374
517
  ## About Runpod
375
518
 
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV2, ImageModelV2 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ImageModelV2, SpeechModelV2 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
4
4
  import { z } from 'zod';
@@ -44,6 +44,14 @@ interface RunpodProvider {
44
44
  Creates an image model for image generation.
45
45
  */
46
46
  imageModel(modelId: string): ImageModelV2;
47
+ /**
48
+ Creates a speech model for speech generation.
49
+ */
50
+ speechModel(modelId: string): SpeechModelV2;
51
+ /**
52
+ Creates a speech model for speech generation.
53
+ */
54
+ speech(modelId: string): SpeechModelV2;
47
55
  }
48
56
  declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
49
57
  declare const runpod: RunpodProvider;
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV2, ImageModelV2 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ImageModelV2, SpeechModelV2 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  export { OpenAICompatibleErrorData as RunpodErrorData } from '@ai-sdk/openai-compatible';
4
4
  import { z } from 'zod';
@@ -44,6 +44,14 @@ interface RunpodProvider {
44
44
  Creates an image model for image generation.
45
45
  */
46
46
  imageModel(modelId: string): ImageModelV2;
47
+ /**
48
+ Creates a speech model for speech generation.
49
+ */
50
+ speechModel(modelId: string): SpeechModelV2;
51
+ /**
52
+ Creates a speech model for speech generation.
53
+ */
54
+ speech(modelId: string): SpeechModelV2;
47
55
  }
48
56
  declare function createRunpod(options?: RunpodProviderSettings): RunpodProvider;
49
57
  declare const runpod: RunpodProvider;
package/dist/index.js CHANGED
@@ -27,7 +27,7 @@ module.exports = __toCommonJS(index_exports);
27
27
 
28
28
  // src/runpod-provider.ts
29
29
  var import_openai_compatible = require("@ai-sdk/openai-compatible");
30
- var import_provider_utils3 = require("@ai-sdk/provider-utils");
30
+ var import_provider_utils4 = require("@ai-sdk/provider-utils");
31
31
 
32
32
  // src/runpod-image-model.ts
33
33
  var import_provider_utils2 = require("@ai-sdk/provider-utils");
@@ -115,8 +115,12 @@ var RunpodImageModel = class {
115
115
  abortSignal
116
116
  }) {
117
117
  const warnings = [];
118
+ const isPrunaModel = this.modelId.includes("pruna") || this.modelId.includes("p-image");
119
+ const isNanoBananaProModel = this.modelId.includes("nano-banana-pro");
118
120
  let runpodSize;
119
- if (size) {
121
+ if (isPrunaModel || isNanoBananaProModel) {
122
+ runpodSize = aspectRatio || "1:1";
123
+ } else if (size) {
120
124
  const runpodSizeCandidate = size.replace("x", "*");
121
125
  if (!SUPPORTED_SIZES.has(runpodSizeCandidate)) {
122
126
  throw new import_provider.InvalidArgumentError({
@@ -150,7 +154,8 @@ var RunpodImageModel = class {
150
154
  prompt,
151
155
  runpodSize,
152
156
  seed,
153
- providerOptions.runpod
157
+ providerOptions.runpod,
158
+ aspectRatio
154
159
  );
155
160
  const { value: response, responseHeaders } = await (0, import_provider_utils2.postJsonToApi)({
156
161
  url: `${this.config.baseURL}/runsync`,
@@ -264,7 +269,7 @@ var RunpodImageModel = class {
264
269
  `Image generation timed out after ${maxAttempts} attempts (${maxAttempts * pollInterval / 1e3}s)`
265
270
  );
266
271
  }
267
- buildInputPayload(prompt, runpodSize, seed, runpodOptions) {
272
+ buildInputPayload(prompt, runpodSize, seed, runpodOptions, aspectRatio) {
268
273
  const isFluxModel = this.modelId.includes("flux") || this.modelId.includes("black-forest-labs");
269
274
  if (isFluxModel) {
270
275
  const isKontext = this.modelId.includes("kontext");
@@ -300,50 +305,56 @@ var RunpodImageModel = class {
300
305
  if (isPrunaModel) {
301
306
  const isPrunaEdit = this.modelId.includes("edit");
302
307
  if (isPrunaEdit) {
303
- return {
308
+ const editPayload = {
304
309
  prompt,
305
- seed: seed ?? -1,
306
- aspect_ratio: runpodOptions?.aspect_ratio ?? "match_input_image",
307
- disable_safety_checker: runpodOptions?.disable_safety_checker ?? false,
308
- enable_sync_mode: runpodOptions?.enable_sync_mode ?? false,
309
- ...runpodOptions
310
+ aspect_ratio: runpodOptions?.aspect_ratio ?? aspectRatio ?? "1:1",
311
+ disable_safety_checker: runpodOptions?.disable_safety_checker ?? false
310
312
  };
313
+ if (seed !== void 0) {
314
+ editPayload.seed = seed;
315
+ } else if (runpodOptions?.seed !== void 0) {
316
+ editPayload.seed = runpodOptions.seed;
317
+ }
318
+ if (runpodOptions?.images) {
319
+ editPayload.images = runpodOptions.images;
320
+ }
321
+ return editPayload;
311
322
  } else {
312
- const aspectRatioMap = {
313
- "1328*1328": "1:1",
314
- "1472*1140": "4:3",
315
- "1140*1472": "3:4",
316
- "512*512": "1:1",
317
- "768*768": "1:1",
318
- "1024*1024": "1:1",
319
- "1536*1536": "1:1",
320
- "2048*2048": "1:1",
321
- "4096*4096": "1:1",
322
- "512*768": "2:3",
323
- "768*512": "3:2",
324
- "1024*768": "4:3",
325
- "768*1024": "3:4"
326
- };
327
- const aspectRatio = runpodOptions?.aspect_ratio ?? aspectRatioMap[runpodSize] ?? "1:1";
328
- return {
323
+ const t2iPayload = {
329
324
  prompt,
330
- seed: seed ?? 0,
331
- aspect_ratio: aspectRatio,
332
- enable_safety_checker: runpodOptions?.enable_safety_checker ?? true,
333
- ...runpodOptions
325
+ aspect_ratio: runpodOptions?.aspect_ratio ?? aspectRatio ?? "1:1",
326
+ disable_safety_checker: runpodOptions?.disable_safety_checker ?? false
334
327
  };
328
+ if (seed !== void 0) {
329
+ t2iPayload.seed = seed;
330
+ } else if (runpodOptions?.seed !== void 0) {
331
+ t2iPayload.seed = runpodOptions.seed;
332
+ }
333
+ if (t2iPayload.aspect_ratio === "custom") {
334
+ if (runpodOptions?.width) {
335
+ t2iPayload.width = runpodOptions.width;
336
+ }
337
+ if (runpodOptions?.height) {
338
+ t2iPayload.height = runpodOptions.height;
339
+ }
340
+ }
341
+ return t2iPayload;
335
342
  }
336
343
  }
337
344
  const isNanaBananaProModel = this.modelId.includes("nano-banana-pro");
338
345
  if (isNanaBananaProModel) {
339
- return {
346
+ const nanoBananaPayload = {
340
347
  prompt,
348
+ aspect_ratio: runpodOptions?.aspect_ratio ?? aspectRatio ?? "1:1",
341
349
  resolution: runpodOptions?.resolution ?? "1k",
342
350
  output_format: runpodOptions?.output_format ?? "jpeg",
343
351
  enable_base64_output: runpodOptions?.enable_base64_output ?? false,
344
- enable_sync_mode: runpodOptions?.enable_sync_mode ?? false,
345
- ...runpodOptions
352
+ enable_sync_mode: runpodOptions?.enable_sync_mode ?? false
346
353
  };
354
+ if (runpodOptions?.images) {
355
+ nanoBananaPayload.images = runpodOptions.images;
356
+ }
357
+ return nanoBananaPayload;
347
358
  }
348
359
  return {
349
360
  prompt,
@@ -381,6 +392,148 @@ var runpodImageStatusSchema = import_zod2.z.object({
381
392
  // Error message if FAILED
382
393
  });
383
394
 
395
+ // src/runpod-speech-model.ts
396
+ var import_provider_utils3 = require("@ai-sdk/provider-utils");
397
+ function isRecord(value) {
398
+ return typeof value === "object" && value !== null;
399
+ }
400
+ function replaceNewlinesWithSpaces(value) {
401
+ return value.replace(/[\r\n]+/g, " ");
402
+ }
403
+ var RunpodSpeechModel = class {
404
+ constructor(modelId, config) {
405
+ this.modelId = modelId;
406
+ this.config = config;
407
+ this.specificationVersion = "v2";
408
+ }
409
+ get provider() {
410
+ return this.config.provider;
411
+ }
412
+ getRunpodRunSyncUrl() {
413
+ const baseURL = (0, import_provider_utils3.withoutTrailingSlash)(this.config.baseURL) ?? this.config.baseURL;
414
+ if (baseURL.endsWith("/run") || baseURL.endsWith("/runsync")) {
415
+ return baseURL;
416
+ }
417
+ return `${baseURL}/runsync`;
418
+ }
419
+ async doGenerate(options) {
420
+ const currentDate = this.config._internal?.currentDate?.() ?? /* @__PURE__ */ new Date();
421
+ const warnings = [];
422
+ const {
423
+ text,
424
+ voice,
425
+ outputFormat,
426
+ instructions,
427
+ speed,
428
+ language,
429
+ providerOptions,
430
+ abortSignal,
431
+ headers
432
+ } = options;
433
+ if (outputFormat != null && outputFormat !== "wav") {
434
+ warnings.push({
435
+ type: "unsupported-setting",
436
+ setting: "outputFormat",
437
+ details: `Unsupported outputFormat: ${outputFormat}. This endpoint returns 'wav'.`
438
+ });
439
+ }
440
+ if (instructions != null) {
441
+ warnings.push({
442
+ type: "unsupported-setting",
443
+ setting: "instructions",
444
+ details: `Instructions are not supported by this speech endpoint.`
445
+ });
446
+ }
447
+ if (speed != null) {
448
+ warnings.push({
449
+ type: "unsupported-setting",
450
+ setting: "speed",
451
+ details: `Speed is not supported by this speech endpoint.`
452
+ });
453
+ }
454
+ if (language != null) {
455
+ warnings.push({
456
+ type: "unsupported-setting",
457
+ setting: "language",
458
+ details: `Language selection is not supported by this speech endpoint.`
459
+ });
460
+ }
461
+ const runpodProviderOptions = isRecord(providerOptions) ? providerOptions.runpod : void 0;
462
+ const voiceUrl = isRecord(runpodProviderOptions) && (typeof runpodProviderOptions.voice_url === "string" || typeof runpodProviderOptions.voiceUrl === "string") ? runpodProviderOptions.voice_url ?? runpodProviderOptions.voiceUrl ?? void 0 : void 0;
463
+ const input = {
464
+ prompt: replaceNewlinesWithSpaces(text)
465
+ };
466
+ if (voiceUrl) {
467
+ input.voice_url = voiceUrl;
468
+ } else if (voice) {
469
+ input.voice = voice;
470
+ }
471
+ const requestBody = { input };
472
+ const url = this.getRunpodRunSyncUrl();
473
+ const fetchFn = this.config.fetch ?? fetch;
474
+ const requestHeaders = {
475
+ "Content-Type": "application/json",
476
+ ...this.config.headers()
477
+ };
478
+ if (headers) {
479
+ for (const [key, value] of Object.entries(headers)) {
480
+ if (value != null) {
481
+ requestHeaders[key] = value;
482
+ }
483
+ }
484
+ }
485
+ const response = await fetchFn(url, {
486
+ method: "POST",
487
+ headers: requestHeaders,
488
+ body: JSON.stringify(requestBody),
489
+ signal: abortSignal
490
+ });
491
+ const responseHeaders = Object.fromEntries(response.headers.entries());
492
+ const rawBodyText = await response.text();
493
+ let parsed = void 0;
494
+ try {
495
+ parsed = rawBodyText ? JSON.parse(rawBodyText) : void 0;
496
+ } catch {
497
+ }
498
+ if (!response.ok) {
499
+ const message = parsed && typeof parsed.error === "string" && parsed.error || rawBodyText || `HTTP ${response.status}`;
500
+ throw new Error(`Runpod speech request failed: ${message}`);
501
+ }
502
+ const output = parsed?.output ?? parsed;
503
+ const audioUrl = output?.audio_url;
504
+ if (typeof audioUrl !== "string" || audioUrl.length === 0) {
505
+ throw new Error("Runpod speech response did not include an audio_url.");
506
+ }
507
+ const audioResponse = await fetchFn(audioUrl, { signal: abortSignal });
508
+ if (!audioResponse.ok) {
509
+ throw new Error(
510
+ `Failed to download generated audio (${audioResponse.status}).`
511
+ );
512
+ }
513
+ const audio = new Uint8Array(await audioResponse.arrayBuffer());
514
+ const providerMetadata = {
515
+ runpod: {
516
+ audioUrl,
517
+ ...typeof output?.cost === "number" ? { cost: output.cost } : {}
518
+ }
519
+ };
520
+ return {
521
+ audio,
522
+ warnings,
523
+ request: {
524
+ body: JSON.stringify(requestBody)
525
+ },
526
+ response: {
527
+ timestamp: currentDate,
528
+ modelId: this.modelId,
529
+ headers: responseHeaders,
530
+ body: rawBodyText
531
+ },
532
+ providerMetadata
533
+ };
534
+ }
535
+ };
536
+
384
537
  // src/runpod-provider.ts
385
538
  var MODEL_ID_TO_ENDPOINT_URL = {
386
539
  "qwen/qwen3-32b-awq": "https://api.runpod.ai/v2/qwen3-32b-awq/openai/v1",
@@ -408,6 +561,9 @@ var IMAGE_MODEL_ID_TO_ENDPOINT_URL = {
408
561
  "pruna/p-image-t2i": "https://api.runpod.ai/v2/p-image-t2i",
409
562
  "pruna/p-image-edit": "https://api.runpod.ai/v2/p-image-edit"
410
563
  };
564
+ var SPEECH_MODEL_ID_TO_ENDPOINT_URL = {
565
+ "resembleai/chatterbox-turbo": "https://api.runpod.ai/v2/chatterbox-turbo/"
566
+ };
411
567
  var MODEL_ID_TO_OPENAI_NAME = {
412
568
  "qwen/qwen3-32b-awq": "Qwen/Qwen3-32B-AWQ",
413
569
  "deepcogito/cogito-671b-v2.1-fp8": "deepcogito/cogito-671b-v2.1-FP8",
@@ -417,9 +573,26 @@ function deriveEndpointURL(modelId) {
417
573
  const normalized = modelId.replace(/\//g, "-");
418
574
  return `https://api.runpod.ai/v2/${normalized}/openai/v1`;
419
575
  }
576
+ function parseRunpodConsoleEndpointId(modelIdOrUrl) {
577
+ if (!modelIdOrUrl.startsWith("http")) {
578
+ return null;
579
+ }
580
+ try {
581
+ const url = new URL(modelIdOrUrl);
582
+ if (url.hostname !== "console.runpod.io") {
583
+ return null;
584
+ }
585
+ const parts = url.pathname.split("/").filter(Boolean);
586
+ const idx = parts.lastIndexOf("endpoint");
587
+ const endpointId = idx !== -1 ? parts[idx + 1] : void 0;
588
+ return endpointId || null;
589
+ } catch {
590
+ return null;
591
+ }
592
+ }
420
593
  function createRunpod(options = {}) {
421
594
  const getHeaders = () => ({
422
- Authorization: `Bearer ${(0, import_provider_utils3.loadApiKey)({
595
+ Authorization: `Bearer ${(0, import_provider_utils4.loadApiKey)({
423
596
  apiKey: options.apiKey,
424
597
  environmentVariableName: "RUNPOD_API_KEY",
425
598
  description: "Runpod"
@@ -449,7 +622,7 @@ function createRunpod(options = {}) {
449
622
  }
450
623
  return {
451
624
  provider: `runpod.${modelType}`,
452
- url: ({ path }) => `${(0, import_provider_utils3.withoutTrailingSlash)(baseURL)}${path}`,
625
+ url: ({ path }) => `${(0, import_provider_utils4.withoutTrailingSlash)(baseURL)}${path}`,
453
626
  headers: getHeaders,
454
627
  fetch: runpodFetch
455
628
  };
@@ -482,11 +655,25 @@ function createRunpod(options = {}) {
482
655
  fetch: options.fetch
483
656
  });
484
657
  };
658
+ const createSpeechModel = (modelId) => {
659
+ const endpointIdFromConsole = parseRunpodConsoleEndpointId(modelId);
660
+ const normalizedModelId = endpointIdFromConsole ?? modelId;
661
+ const mappedBaseURL = SPEECH_MODEL_ID_TO_ENDPOINT_URL[normalizedModelId];
662
+ const baseURL = mappedBaseURL ?? (normalizedModelId.startsWith("http") ? normalizedModelId : `https://api.runpod.ai/v2/${normalizedModelId}`);
663
+ return new RunpodSpeechModel(normalizedModelId, {
664
+ provider: "runpod.speech",
665
+ baseURL,
666
+ headers: getHeaders,
667
+ fetch: runpodFetch
668
+ });
669
+ };
485
670
  const provider = (modelId) => createChatModel(modelId);
486
671
  provider.completionModel = createCompletionModel;
487
672
  provider.languageModel = createChatModel;
488
673
  provider.chatModel = createChatModel;
489
674
  provider.imageModel = createImageModel;
675
+ provider.speechModel = createSpeechModel;
676
+ provider.speech = createSpeechModel;
490
677
  return provider;
491
678
  }
492
679
  var runpod = createRunpod();