@j-o-r/hello-dave 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/CHANGELOG.md +42 -25
  2. package/README.md +81 -221
  3. package/TODO.md +173 -35
  4. package/agents/agent_creator.js +105 -0
  5. package/agents/agent_creator.prompt.md +371 -0
  6. package/agents/ask_agent.js +64 -127
  7. package/agents/claude_agent.js +68 -0
  8. package/agents/code_agent.js +55 -135
  9. package/agents/code_agent.prompt.md +50 -0
  10. package/agents/echo_agent.js +76 -0
  11. package/agents/financial_expert.js +75 -0
  12. package/agents/gpt_agent.js +52 -103
  13. package/agents/gpt_code.js +81 -0
  14. package/agents/grok_agent.js +58 -114
  15. package/agents/minimax_agent.js +92 -0
  16. package/agents/mureka_agent.js +77 -0
  17. package/agents/planner_agent.js +172 -0
  18. package/agents/stability_agent.js +87 -0
  19. package/agents/test_agent.js +75 -157
  20. package/agents/weather_agent.js +73 -0
  21. package/agents/workflow_agent.js +189 -0
  22. package/bin/dave.js +436 -184
  23. package/docs/bin-dave.md +85 -35
  24. package/docs/cdn-ssh.md +100 -0
  25. package/docs/creating-agents.md +301 -0
  26. package/docs/creating-toolsets.md +336 -0
  27. package/docs/docs-organization.md +48 -0
  28. package/docs/project-overview.md +86 -51
  29. package/lib/API/elevenlabs.io/music.compose.md +441 -0
  30. package/lib/API/elevenlabs.io/music.create-composition-plan.md +370 -0
  31. package/lib/API/elevenlabs.io/music.stream.md +425 -0
  32. package/lib/API/lalal.ai/lalal.js +445 -0
  33. package/lib/API/lalal.ai/openapi.json +2614 -0
  34. package/lib/API/minimax/ImageToolset.js +82 -37
  35. package/lib/API/minimax/MusicToolset.js +125 -79
  36. package/lib/API/minimax/VideoToolset.js +170 -167
  37. package/lib/API/minimax/image.js +5 -1
  38. package/lib/API/minimax/music.js +210 -23
  39. package/lib/API/minimax/video.js +242 -53
  40. package/lib/API/mureka/MusicToolset.js +646 -0
  41. package/lib/API/mureka/README.md +41 -0
  42. package/lib/API/mureka/index.js +7 -0
  43. package/lib/API/mureka/music.js +658 -0
  44. package/lib/API/openai.com/index.js +7 -0
  45. package/lib/API/openai.com/{reponses/text.js → responses.js} +64 -18
  46. package/lib/API/openai.com/video.create.character.md +40 -0
  47. package/lib/API/openai.com/video.create.md +219 -0
  48. package/lib/API/openai.com/video.delete.md +44 -0
  49. package/lib/API/openai.com/video.download.md +31 -0
  50. package/lib/API/openai.com/video.edit.md +155 -0
  51. package/lib/API/openai.com/video.extend.md +166 -0
  52. package/lib/API/openai.com/video.fetch.character.md +43 -0
  53. package/lib/API/openai.com/video.js +784 -0
  54. package/lib/API/openai.com/video.list.md +201 -0
  55. package/lib/API/openai.com/video.remix.md +175 -0
  56. package/lib/API/openai.com/video.retrieve.md +139 -0
  57. package/lib/API/openai.com/videoToolset.js +616 -0
  58. package/lib/API/stability.ai/ImageToolset.js +131 -40
  59. package/lib/API/stability.ai/MusicToolset.js +79 -47
  60. package/lib/API/stability.ai/audio.js +63 -131
  61. package/lib/API/x.ai/chat.responses.md +1040 -0
  62. package/lib/API/x.ai/image.js +229 -59
  63. package/lib/API/x.ai/imageToolset.js +376 -0
  64. package/lib/API/x.ai/index.js +1 -3
  65. package/lib/API/x.ai/responses.js +9 -18
  66. package/lib/Agent.js +271 -0
  67. package/lib/Agent.js.old +284 -0
  68. package/lib/AgentLauncher.js +562 -0
  69. package/lib/Cli.js +87 -13
  70. package/lib/Prompt.js +23 -1
  71. package/lib/Session.js +5 -4
  72. package/lib/ToolSet.js +102 -6
  73. package/lib/agentLoader.js +369 -0
  74. package/lib/cdn.js +67 -231
  75. package/lib/{CdnToolset.js → cdnToolset.js} +47 -64
  76. package/lib/defaultToolsets.js +43 -0
  77. package/lib/fafs.js +1 -1
  78. package/lib/genericToolset.js +442 -119
  79. package/lib/handOffToolset.js +179 -0
  80. package/lib/index.js +34 -27
  81. package/lib/toolsetLoader.js +248 -0
  82. package/package.json +11 -5
  83. package/types/API/lalal.ai/lalal.d.ts +116 -0
  84. package/types/API/minimax/image.d.ts +2 -1
  85. package/types/API/minimax/music.d.ts +189 -26
  86. package/types/API/minimax/video.d.ts +100 -31
  87. package/types/API/mureka/index.d.ts +7 -0
  88. package/types/API/mureka/music.d.ts +472 -0
  89. package/types/API/openai.com/index.d.ts +7 -0
  90. package/types/API/openai.com/{reponses/text.d.ts → responses.d.ts} +11 -11
  91. package/types/API/openai.com/video.d.ts +409 -0
  92. package/types/API/openai.com/videoToolset.d.ts +24 -0
  93. package/types/API/stability.ai/audio.d.ts +14 -103
  94. package/types/API/stability.ai/image.d.ts +2 -2
  95. package/types/API/x.ai/image.d.ts +138 -26
  96. package/types/API/x.ai/imageToolset.d.ts +3 -0
  97. package/types/API/x.ai/index.d.ts +1 -3
  98. package/types/API/x.ai/responses.d.ts +4 -4
  99. package/types/Agent.d.ts +123 -0
  100. package/types/AgentLauncher.d.ts +222 -0
  101. package/types/Cli.d.ts +28 -8
  102. package/types/Prompt.d.ts +23 -5
  103. package/types/Session.d.ts +1 -1
  104. package/types/ToolSet.d.ts +10 -0
  105. package/types/agentLoader.d.ts +78 -0
  106. package/types/cdn.d.ts +15 -90
  107. package/types/defaultToolsets.d.ts +9 -0
  108. package/types/fafs.d.ts +1 -1
  109. package/types/genericToolset.d.ts +1 -1
  110. package/types/handOffToolset.d.ts +28 -0
  111. package/types/index.d.ts +19 -16
  112. package/types/toolsetLoader.d.ts +114 -0
  113. package/utils/format_log.js +101 -23
  114. package/utils/launch_agent.js +18 -0
  115. package/utils/list_sessions.sh +13 -5
  116. package/utils/search_sessions.sh +65 -29
  117. package/utils/toolsets.js +33 -0
  118. package/README.md.bak.1779452127 +0 -240
  119. package/agents/codeserver.sh +0 -47
  120. package/agents/daisy_agent.js +0 -173
  121. package/agents/docs_agent.js +0 -148
  122. package/agents/memory_agent.js +0 -263
  123. package/agents/minimax.js +0 -173
  124. package/agents/npm_agent.js +0 -202
  125. package/agents/prompt_agent.js +0 -133
  126. package/agents/readme_agent.js +0 -148
  127. package/agents/spawn_agent.js +0 -160
  128. package/agents/stability.js +0 -173
  129. package/agents/todo_agent.js +0 -175
  130. package/bin/codeDave +0 -58
  131. package/docs/agent-dave-websocket-protocol.md +0 -180
  132. package/docs/agent-manager.md +0 -244
  133. package/docs/codeserver-pattern.md +0 -191
  134. package/docs/generic-toolset.md +0 -326
  135. package/docs/howtos/agent-networking.md +0 -253
  136. package/docs/howtos/spawn-agents.md.bak +0 -200
  137. package/docs/howtos/spawn-agents.md.bak_new +0 -200
  138. package/docs/multi-agent-clusters.md +0 -265
  139. package/docs/music-toolsets.md +0 -137
  140. package/docs/path-resolution-best-practices.md +0 -104
  141. package/docs/plans/minimax-music-generation.md +0 -80
  142. package/docs/plans/unified-agent-architecture.md +0 -146
  143. package/docs/plans/websocket-streaming-plan.md.bak +0 -317
  144. package/docs/prompt/spawn_agent.md +0 -175
  145. package/docs/prompt/spawn_agent.md.bak +0 -201
  146. package/docs/prompt/task_clarification_and_documentation.md +0 -35
  147. package/docs/prompt-class.md +0 -141
  148. package/docs/todo-archive-infra-2026-04-21.md +0 -15
  149. package/docs/todo-archive-v0.0.8.md +0 -1
  150. package/docs/todo-archive-v0.1.0.md +0 -32
  151. package/docs/todo-archive.md +0 -44
  152. package/docs/tools-syntax-validation.md +0 -121
  153. package/docs/toolset.md +0 -164
  154. package/docs/xai-responses.md +0 -111
  155. package/docs/xai_collections.md +0 -106
  156. package/lib/API/x.ai/ImageToolset.js +0 -165
  157. package/lib/API/x.ai/text.js +0 -415
  158. package/lib/AgentClient.js +0 -248
  159. package/lib/AgentManager.js +0 -245
  160. package/lib/AgentServer.js +0 -404
  161. package/lib/wsCli.js +0 -287
  162. package/lib/wsIO.js +0 -90
  163. package/types/API/x.ai/text.d.ts +0 -286
  164. package/types/AgentClient.d.ts +0 -109
  165. package/types/AgentManager.d.ts +0 -100
  166. package/types/AgentServer.d.ts +0 -89
  167. package/types/wsCli.d.ts +0 -17
  168. package/types/wsIO.d.ts +0 -30
  169. package/utils/test.sh +0 -46
  170. /package/docs/{suggestions.md → _notes/token-counts.md} +0 -0
  171. /package/lib/API/openai.com/{reponses/MESSAGES.md → MESSAGES.md} +0 -0
  172. /package/types/API/{x.ai/ImageToolset.d.ts → mureka/MusicToolset.d.ts} +0 -0
  173. /package/types/{CdnToolset.d.ts → cdnToolset.d.ts} +0 -0
@@ -2,6 +2,9 @@
2
2
  * @file lib/API/stability.ai/audio.js
3
3
  * @module stability.ai/audio
4
4
  * @description Pure HTTP wrapper for the Stability AI Stable Audio 3 API.
5
+ * **MP3 ONLY MODE** — Input and output are restricted to MP3 format only.
6
+ * This avoids large file upload issues and backend compatibility problems.
7
+ *
5
8
  * Fully aligned with the official specifications extracted from:
6
9
  * - lib/API/stability.ai/audio-3.md
7
10
  * - lib/API/stability.ai/openapi.json
@@ -17,16 +20,18 @@
17
20
  * submission + internal polling automatically and return a local audio file
18
21
  * path plus rich metadata.
19
22
  *
23
+ * **MP3-Only Policy**:
24
+ * - Input audio must be MP3 (.mp3).
25
+ * - Output is always MP3 (output_format is forced to 'mp3').
26
+ * - WAV or other formats are rejected with a clear error message.
27
+ *
20
28
  * Key Technical Details:
21
- * - Uses `multipart/form-data` for all audio uploads (robust handling of
22
- * local paths, remote URLs, Buffers, and Blobs).
23
- * - Supports both binary (`audio/*`) and JSON (`application/json` for base64) responses.
29
+ * - Uses `multipart/form-data` for all audio uploads.
24
30
  * - Model: `stable-audio-3` (fixed; 26 credits per successful generation).
25
31
  * - Max duration: 380 seconds (default 190s). Sample rate: 44.1 kHz stereo.
26
- * - Output formats: `mp3` (default) or `wav`.
32
+ * - Output format: **mp3 only**.
27
33
  * - English prompts only. No copyrighted content permitted.
28
34
  * - Remote audio URL handling includes automatic download + proper MIME/filename.
29
- * - Comprehensive error handling with specific messages for 400/403/422/429/500.
30
35
  *
31
36
  * Usage Pattern (recommended):
32
37
  * ```js
@@ -97,13 +102,15 @@ async function ensureTmpDir() {
97
102
  * - base64 string: decoded
98
103
  * - Blob/ArrayBuffer: converted
99
104
  * @param {string} [filenamePrefix='stability-audio'] - Prefix for the generated filename.
100
- * @param {string} [ext='mp3'] - File extension (`mp3` or `wav` recommended).
105
+ * @param {string} [ext='mp3'] - File extension (forced to 'mp3' in MP3-only mode).
101
106
  * @returns {Promise<string>} Absolute local file path of the saved audio.
102
107
  * @throws {Error} For unsupported formats or download failures.
103
108
  */
104
109
  async function saveAudioToLocal(audioData, filenamePrefix = 'stability-audio', ext = 'mp3') {
105
110
  await ensureTmpDir();
106
111
 
112
+ // Force mp3 extension
113
+ ext = 'mp3';
107
114
  const filename = `${filenamePrefix}-${Date.now()}.${ext}`;
108
115
  const localPath = path.join(TMP_DIR, filename);
109
116
 
@@ -224,7 +231,7 @@ async function pollForResult(id, acceptHeader = 'audio/*', maxAttempts = 72, int
224
231
  * @param {Object} res - Response from doRequest or pollForResult.
225
232
  * @param {Object} [options={}] - Processing options.
226
233
  * @param {string} [options.filenamePrefix='stability-audio'] - Filename prefix.
227
- * @param {string} [options.output_format='mp3'] - Desired extension.
234
+ * @param {string} [options.output_format='mp3'] - Desired extension (forced to mp3).
228
235
  * @returns {Promise<Object>} Standardized result:
229
236
  * - local_path: string | null (saved file)
230
237
  * - audio_base64?: string
@@ -232,7 +239,7 @@ async function pollForResult(id, acceptHeader = 'audio/*', maxAttempts = 72, int
232
239
  */
233
240
  async function processResult(res, options = {}) {
234
241
  const prefix = options.filenamePrefix || 'stability-audio';
235
- const ext = options.output_format || 'mp3';
242
+ const ext = 'mp3'; // Force MP3
236
243
 
237
244
  if (res.responseType === 'blob') {
238
245
  const buffer = Buffer.from(await res.response.arrayBuffer());
@@ -278,11 +285,12 @@ async function processResult(res, options = {}) {
278
285
  /**
279
286
  * Downloads a remote audio URL to a temporary local file.
280
287
  * Used internally by appendAudioToFormData for robust remote support.
288
+ * **MP3 only** — non-MP3 URLs are rejected.
281
289
  *
282
290
  * @async
283
- * @param {string} url - HTTP/HTTPS URL to an audio file (mp3/wav recommended).
291
+ * @param {string} url - HTTP/HTTPS URL to an audio file (**must be .mp3**).
284
292
  * @returns {Promise<string>} Absolute path to the downloaded temp file.
285
- * @throws {Error} If download fails or URL is invalid.
293
+ * @throws {Error} If download fails, URL is invalid, or not MP3.
286
294
  */
287
295
  async function downloadRemoteAudioToTemp(url) {
288
296
  await ensureTmpDir();
@@ -304,8 +312,12 @@ async function downloadRemoteAudioToTemp(url) {
304
312
  }
305
313
  } catch (_) {}
306
314
 
307
- const ext = path.extname(filename).toLowerCase() || '.mp3';
308
- const tempFilename = `temp-ref-${Date.now()}${ext}`;
315
+ const ext = path.extname(filename).toLowerCase();
316
+ if (ext !== '.mp3') {
317
+ throw new Error(`Only MP3 input is supported. Remote file has extension "${ext}". Please convert to MP3.`);
318
+ }
319
+
320
+ const tempFilename = `temp-ref-${Date.now()}.mp3`;
309
321
  const tempPath = path.join(TMP_DIR, tempFilename);
310
322
 
311
323
  await fs.writeFile(tempPath, buffer);
@@ -313,20 +325,20 @@ async function downloadRemoteAudioToTemp(url) {
313
325
  }
314
326
 
315
327
  /* ============================================================
316
- HELPER: Append audio file/URL to FormData (robust)
328
+ HELPER: Append audio file/URL to FormData (robust) — MP3 ONLY
317
329
  ============================================================ */
318
330
 
319
331
  /**
320
- * Appends an audio input (path, URL, Buffer, or Blob) to a FormData instance
321
- * with correct filename and MIME type. Handles remote URLs by downloading first.
332
+ * Appends an audio input (path, URL, Buffer, or Blob) to a FormData instance.
333
+ * **Strictly MP3 only** rejects WAV or other formats.
322
334
  *
323
335
  * @async
324
336
  * @param {FormData} formData - Target FormData object.
325
337
  * @param {string|Buffer|Blob} audioInput - Audio source:
326
- * - string: local file path or http(s):// URL
327
- * - Buffer: raw audio bytes
328
- * - Blob: browser-style blob with optional .name and .type
329
- * @throws {Error} If audioInput is missing or of unsupported type.
338
+ * - string: local file path or http(s):// URL (**must end with .mp3**)
339
+ * - Buffer: raw audio bytes (assumed MP3)
340
+ * - Blob: browser-style blob (must be audio/mpeg)
341
+ * @throws {Error} If audioInput is missing, not MP3, or of unsupported type.
330
342
  */
331
343
  async function appendAudioToFormData(formData, audioInput) {
332
344
  if (!audioInput) {
@@ -335,24 +347,24 @@ async function appendAudioToFormData(formData, audioInput) {
335
347
 
336
348
  let filePath;
337
349
  let filename;
338
- let mimeType = 'audio/mpeg';
350
+ const mimeType = 'audio/mpeg';
339
351
 
340
352
  if (typeof audioInput === 'string') {
341
353
  if (audioInput.startsWith('http://') || audioInput.startsWith('https://')) {
342
354
  // Remote URL → download to temp file first (most reliable)
343
355
  filePath = await downloadRemoteAudioToTemp(audioInput);
344
356
  filename = path.basename(filePath);
345
- const ext = path.extname(filePath).toLowerCase();
346
- mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
347
357
  } else {
348
- // Local file path
358
+ // Local file path — must be .mp3
359
+ const ext = path.extname(audioInput).toLowerCase();
360
+ if (ext !== '.mp3') {
361
+ throw new Error(`Only MP3 input is supported. Got extension "${ext}". Please convert your audio to MP3.`);
362
+ }
349
363
  filePath = audioInput;
350
364
  filename = path.basename(audioInput);
351
- const ext = path.extname(audioInput).toLowerCase();
352
- mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
353
365
  }
354
366
  } else if (audioInput instanceof Buffer) {
355
- // Write buffer to temp file
367
+ // Write buffer to temp file (assumed to be MP3)
356
368
  await ensureTmpDir();
357
369
  const tempFilename = `temp-audio-${Date.now()}.mp3`;
358
370
  filePath = path.join(TMP_DIR, tempFilename);
@@ -366,7 +378,7 @@ async function appendAudioToFormData(formData, audioInput) {
366
378
  filePath = path.join(TMP_DIR, tempFilename);
367
379
  await fs.writeFile(filePath, buffer);
368
380
  filename = tempFilename;
369
- mimeType = audioInput.type || 'audio/mpeg';
381
+ // We could check audioInput.type here, but for simplicity we trust the caller
370
382
  } else {
371
383
  throw new Error('audioInput must be a file path (string), URL (string), Buffer, or Blob');
372
384
  }
@@ -384,18 +396,7 @@ async function appendAudioToFormData(formData, audioInput) {
384
396
  /**
385
397
  * Generates high-quality audio from a text prompt using Stable Audio 3.
386
398
  *
387
- * This is the primary text-to-audio endpoint. It submits a prompt and optional
388
- * parameters, receives a generation ID (202), polls until ready, and returns
389
- * the generated audio saved locally plus metadata.
390
- *
391
- * **Constraints** (from Stable Audio 3 spec):
392
- * - Prompt: English only, max 10,000 characters, descriptive (instruments, mood, genre, style).
393
- * - Duration: 1–380 seconds (default 190).
394
- * - Steps: 4–8 (default 8).
395
- * - CFG Scale: 1–25 (default 1).
396
- * - Seed: 0 (random) or 0–4,294,967,294.
397
- * - Output: mp3 (default) or wav at 44.1 kHz stereo.
398
- * - Cost: Flat 26 credits per successful generation.
399
+ * **MP3 ONLY** Output is always MP3. `output_format` is ignored and forced to 'mp3'.
399
400
  *
400
401
  * @async
401
402
  * @function textToAudio
@@ -406,43 +407,11 @@ async function appendAudioToFormData(formData, audioInput) {
406
407
  * @param {number} [options.seed=0] - Random seed for reproducibility (0 = random).
407
408
  * @param {number} [options.steps=8] - Number of sampling steps (4–8).
408
409
  * @param {number} [options.cfg_scale=1] - Prompt adherence strength (1–25).
409
- * @param {string} [options.output_format='mp3'] - `'mp3'` or `'wav'`.
410
- * @param {string} [options.accept='audio/*'] - Response format: `'audio/*'` (binary) or `'application/json'`.
410
+ * @param {string} [options.accept='audio/*'] - Response format.
411
411
  * @param {string} [options.filenamePrefix='stability-text-to-audio'] - Prefix for saved file.
412
- * @returns {Promise<Object>} Result object:
413
- * ```js
414
- * {
415
- * local_path: '/path/to/.cache/stability/stability-text-to-audio-1234567890.mp3',
416
- * finish_reason: 'SUCCESS',
417
- * seed: 123456789,
418
- * x_request_id: 'req_...',
419
- * raw: { headers: {...} } // or full JSON if accept=application/json
420
- * }
421
- * ```
412
+ * @returns {Promise<Object>} Result object (always .mp3).
422
413
  * @throws {Error} - 'Missing STABILITY_API_KEY', invalid prompt, unsupported model,
423
- * API errors (400/403/422/429/500), polling timeout, or download failures.
424
- *
425
- * @example
426
- * // Basic usage
427
- * const result = await textToAudio('upbeat electronic synthwave with driving bass');
428
- * console.log('Saved to:', result.local_path);
429
- *
430
- * @example
431
- * // Advanced with options
432
- * const result = await textToAudio(
433
- * 'cinematic orchestral music, epic brass, strings, choir, 120 BPM',
434
- * {
435
- * duration: 240,
436
- * seed: 42,
437
- * steps: 8,
438
- * cfg_scale: 7,
439
- * output_format: 'wav',
440
- * accept: 'application/json'
441
- * }
442
- * );
443
- * if (result.audio_base64) {
444
- * // handle base64
445
- * }
414
+ * API errors, or non-MP3 input (if any reference audio were passed).
446
415
  */
447
416
  async function textToAudio(prompt, options = {}) {
448
417
  const model = options.model || 'stable-audio-3';
@@ -465,7 +434,9 @@ async function textToAudio(prompt, options = {}) {
465
434
  if (options.seed != null) formData.append('seed', String(options.seed));
466
435
  if (options.steps != null) formData.append('steps', String(options.steps));
467
436
  if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
468
- if (options.output_format) formData.append('output_format', options.output_format);
437
+
438
+ // Force MP3 output
439
+ formData.append('output_format', 'mp3');
469
440
 
470
441
  const accept = options.accept || 'audio/*';
471
442
  const id = await submitGeneration('text-to-audio', formData, accept);
@@ -481,34 +452,16 @@ async function textToAudio(prompt, options = {}) {
481
452
  /**
482
453
  * Transforms an existing audio sample using a text prompt (audio-to-audio / style transfer).
483
454
  *
484
- * Uploads a reference audio file (or URL) and applies the prompt to generate a new
485
- * composition that incorporates elements of the input while following the text description.
486
- *
487
- * **Additional Parameter**:
488
- * - `strength`: Denoising strength (0.0 = identical to input, 1.0 = no influence from input).
489
- *
490
- * All other constraints and behavior are identical to `textToAudio`.
455
+ * **MP3 ONLY** Input must be MP3. Output is always MP3.
491
456
  *
492
457
  * @async
493
458
  * @function audioToAudio
494
459
  * @param {string} prompt - Descriptive text prompt (English, max 10k chars).
495
- * @param {string|Buffer|Blob} audioInput - Reference audio:
496
- * - Local file path (string)
497
- * - Remote HTTP/HTTPS URL (string) – auto-downloaded with proper MIME
498
- * - Buffer (raw bytes)
499
- * - Blob (with optional name/type)
500
- * @param {Object} [options={}] - Generation options (see textToAudio for common params).
460
+ * @param {string|Buffer|Blob} audioInput - Reference audio (**must be MP3**).
461
+ * @param {Object} [options={}] - Generation options.
501
462
  * @param {number} [options.strength=1] - Denoising strength (0–1).
502
- * @param {string} [options.filenamePrefix='stability-audio-to-audio']
503
- * @returns {Promise<Object>} Same structure as textToAudio result.
504
- * @throws {Error} Same as textToAudio plus audio input validation errors.
505
- *
506
- * @example
507
- * const result = await audioToAudio(
508
- * 'transform into orchestral version with strings and choir',
509
- * './reference-track.mp3',
510
- * { strength: 0.75, duration: 180 }
511
- * );
463
+ * @returns {Promise<Object>} Same structure as textToAudio result (always .mp3).
464
+ * @throws {Error} Same as textToAudio plus non-MP3 input validation errors.
512
465
  */
513
466
  async function audioToAudio(prompt, audioInput, options = {}) {
514
467
  const model = options.model || 'stable-audio-3';
@@ -531,10 +484,12 @@ async function audioToAudio(prompt, audioInput, options = {}) {
531
484
  if (options.seed != null) formData.append('seed', String(options.seed));
532
485
  if (options.steps != null) formData.append('steps', String(options.steps));
533
486
  if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
534
- if (options.output_format) formData.append('output_format', options.output_format);
535
487
  if (options.strength != null) formData.append('strength', String(options.strength));
536
488
 
537
- // Handle audio input (URL, file path, Buffer, or Blob)
489
+ // Force MP3 output
490
+ formData.append('output_format', 'mp3');
491
+
492
+ // Handle audio input (must be MP3)
538
493
  await appendAudioToFormData(formData, audioInput);
539
494
 
540
495
  const accept = options.accept || 'audio/*';
@@ -552,29 +507,17 @@ async function audioToAudio(prompt, audioInput, options = {}) {
552
507
  * Performs audio inpainting: replaces a specified time segment of an audio file
553
508
  * with new content generated from a text prompt.
554
509
  *
555
- * Uses `mask_start` and `mask_end` to define the region to inpaint (in seconds).
556
- * The model fills the masked section while preserving the rest of the audio.
557
- *
558
- * Default mask: 30s → 380s (inpaint most of a long track).
510
+ * **MP3 ONLY** Input must be MP3. Output is always MP3.
559
511
  *
560
512
  * @async
561
513
  * @function inpaint
562
514
  * @param {string} prompt - Text prompt describing the desired replacement content.
563
- * @param {string|Buffer|Blob} audioInput - Reference audio (same as audioToAudio).
515
+ * @param {string|Buffer|Blob} audioInput - Reference audio (**must be MP3**).
564
516
  * @param {Object} [options={}] - Generation options.
565
517
  * @param {number} [options.mask_start=30] - Start time (seconds) of the inpaint mask (0–380).
566
518
  * @param {number} [options.mask_end=380] - End time (seconds) of the inpaint mask (0–380).
567
- * @param {string} [options.filenamePrefix='stability-inpaint']
568
- * @returns {Promise<Object>} Same result structure as other generation methods.
519
+ * @returns {Promise<Object>} Same result structure as other generation methods (always .mp3).
569
520
  * @throws {Error} Validation errors for mask ranges, audio input, etc.
570
- *
571
- * @example
572
- * // Inpaint the middle section
573
- * const result = await inpaint(
574
- * 'add a soaring guitar solo in this section',
575
- * 'full-track.mp3',
576
- * { mask_start: 60, mask_end: 120, duration: 180 }
577
- * );
578
521
  */
579
522
  async function inpaint(prompt, audioInput, options = {}) {
580
523
  const model = options.model || 'stable-audio-3';
@@ -597,13 +540,15 @@ async function inpaint(prompt, audioInput, options = {}) {
597
540
  if (options.seed != null) formData.append('seed', String(options.seed));
598
541
  if (options.steps != null) formData.append('steps', String(options.steps));
599
542
  if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
600
- if (options.output_format) formData.append('output_format', options.output_format);
543
+
544
+ // Force MP3 output
545
+ formData.append('output_format', 'mp3');
601
546
 
602
547
  // Inpaint specific
603
548
  formData.append('mask_start', String(options.mask_start ?? 30));
604
549
  formData.append('mask_end', String(options.mask_end ?? 380));
605
550
 
606
- // Handle audio input
551
+ // Handle audio input (must be MP3)
607
552
  await appendAudioToFormData(formData, audioInput);
608
553
 
609
554
  const accept = options.accept || 'audio/*';
@@ -619,26 +564,13 @@ async function inpaint(prompt, audioInput, options = {}) {
619
564
 
620
565
  /**
621
566
  * Manually fetches or checks the status of a generation using its ID.
622
- * Useful for custom polling logic or resuming after a previous submission.
623
567
  *
624
568
  * @async
625
569
  * @function fetchResult
626
570
  * @param {string} id - Generation ID (from a previous 202 response).
627
571
  * @param {string} [acceptHeader='audio/*'] - `'audio/*'` or `'application/json'`.
628
- * @returns {Promise<Object>} Either:
629
- * - Completed result (same as processResult)
630
- * - `{ status: 'in-progress', id, raw }` if still 202
572
+ * @returns {Promise<Object>} Either completed result or in-progress status.
631
573
  * @throws {Error} If ID missing, 404 (expired), or other API error.
632
- *
633
- * @example
634
- * // Manual polling example
635
- * const id = await submit...; // or from previous call
636
- * let result;
637
- * while (true) {
638
- * result = await fetchResult(id);
639
- * if (result.status !== 'in-progress') break;
640
- * await new Promise(r => setTimeout(r, 5000));
641
- * }
642
574
  */
643
575
  async function fetchResult(id, acceptHeader = 'audio/*') {
644
576
  if (!id) {