@j-o-r/hello-dave 0.0.10 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +2 -0
  2. package/README.md.bak.1779452127 +240 -0
  3. package/TODO.md +30 -8
  4. package/agents/code_agent.js +6 -6
  5. package/agents/daisy_agent.js +10 -7
  6. package/agents/minimax.js +173 -0
  7. package/agents/stability.js +173 -0
  8. package/bin/codeDave +1 -1
  9. package/bin/dave.js +1 -1
  10. package/docs/music-toolsets.md +137 -0
  11. package/docs/plans/minimax-music-generation.md +80 -0
  12. package/docs/plans/unified-agent-architecture.md +146 -0
  13. package/docs/plans/websocket-streaming-plan.md.bak +317 -0
  14. package/docs/prompt/task_clarification_and_documentation.md +35 -0
  15. package/lib/API/minimax/ImageToolset.js +169 -0
  16. package/lib/API/minimax/MusicToolset.js +290 -0
  17. package/lib/API/minimax/VideoToolset.js +296 -0
  18. package/lib/API/minimax/image.generation.md +239 -0
  19. package/lib/API/minimax/image.js +219 -0
  20. package/lib/API/minimax/image.to.image.md +257 -0
  21. package/lib/API/minimax/index.js +16 -0
  22. package/lib/API/minimax/music.cover.preprocess.md +206 -0
  23. package/lib/API/minimax/music.generation.md +346 -0
  24. package/lib/API/minimax/music.js +257 -0
  25. package/lib/API/minimax/music.lyrics.generation.md +205 -0
  26. package/lib/API/minimax/video.download.md +133 -0
  27. package/lib/API/minimax/video.first.last.image.md +186 -0
  28. package/lib/API/minimax/video.from.image.md +206 -0
  29. package/lib/API/minimax/video.from.subject.md +164 -0
  30. package/lib/API/minimax/video.generation.md +192 -0
  31. package/lib/API/minimax/video.js +339 -0
  32. package/lib/API/minimax/video.query.md +128 -0
  33. package/lib/API/stability.ai/ImageToolset.js +357 -0
  34. package/lib/API/stability.ai/MusicToolset.js +302 -0
  35. package/lib/API/stability.ai/audio-3.md +205 -0
  36. package/lib/API/stability.ai/audio.js +679 -0
  37. package/lib/API/stability.ai/image.js +911 -0
  38. package/lib/API/stability.ai/image.md +271 -0
  39. package/lib/API/stability.ai/index.js +11 -0
  40. package/lib/API/stability.ai/openapi.json +17118 -0
  41. package/lib/API/x.ai/ImageToolset.js +165 -0
  42. package/lib/API/x.ai/image.editing.md +86 -0
  43. package/lib/API/x.ai/image.js +393 -0
  44. package/lib/API/x.ai/image.md +213 -0
  45. package/lib/API/x.ai/image.to.generation.md +494 -0
  46. package/lib/API/x.ai/image.to.video.md +23 -0
  47. package/lib/API/x.ai/index.js +9 -0
  48. package/lib/AgentManager.js +1 -1
  49. package/lib/CdnToolset.js +191 -0
  50. package/lib/ToolSet.js +19 -1
  51. package/lib/cdn.js +373 -0
  52. package/lib/fafs.js +3 -1
  53. package/lib/genericToolset.js +43 -166
  54. package/lib/index.js +9 -1
  55. package/package.json +2 -2
  56. package/types/API/minimax/ImageToolset.d.ts +3 -0
  57. package/types/API/minimax/MusicToolset.d.ts +3 -0
  58. package/types/API/minimax/VideoToolset.d.ts +3 -0
  59. package/types/API/minimax/image.d.ts +109 -0
  60. package/types/API/minimax/index.d.ts +15 -0
  61. package/types/API/minimax/music.d.ts +46 -0
  62. package/types/API/minimax/video.d.ts +165 -0
  63. package/types/API/stability.ai/ImageToolset.d.ts +3 -0
  64. package/types/API/stability.ai/MusicToolset.d.ts +3 -0
  65. package/types/API/stability.ai/audio.d.ts +193 -0
  66. package/types/API/stability.ai/image.d.ts +274 -0
  67. package/types/API/stability.ai/index.d.ts +11 -0
  68. package/types/API/x.ai/ImageToolset.d.ts +3 -0
  69. package/types/API/x.ai/image.d.ts +82 -0
  70. package/types/API/x.ai/index.d.ts +9 -0
  71. package/types/AgentManager.d.ts +1 -1
  72. package/types/CdnToolset.d.ts +20 -0
  73. package/types/ToolSet.d.ts +8 -0
  74. package/types/cdn.d.ts +141 -0
  75. package/types/index.d.ts +8 -2
  76. package/docs/multi-agent-clusters.md.bak +0 -229
@@ -0,0 +1,679 @@
1
+ /**
2
+ * @file lib/API/stability.ai/audio.js
3
+ * @module stability.ai/audio
4
+ * @description Pure HTTP wrapper for the Stability AI Stable Audio 3 API.
5
+ * Fully aligned with the official specifications extracted from:
6
+ * - lib/API/stability.ai/audio-3.md
7
+ * - lib/API/stability.ai/openapi.json
8
+ *
9
+ * This is a clean, production-ready library providing four main workflows:
10
+ * 1. textToAudio() → Text-to-audio generation using the `stable-audio-3` model
11
+ * 2. audioToAudio() → Audio-to-audio transformation / style transfer
12
+ * 3. inpaint() → Audio inpainting with time-based mask (replace sections)
13
+ * 4. fetchResult() → Manual polling for async generation results
14
+ *
15
+ * All generation endpoints are asynchronous (HTTP 202 Accepted + polling).
16
+ * High-level methods (`textToAudio`, `audioToAudio`, `inpaint`) handle
17
+ * submission + internal polling automatically and return a local audio file
18
+ * path plus rich metadata.
19
+ *
20
+ * Key Technical Details:
21
+ * - Uses `multipart/form-data` for all audio uploads (robust handling of
22
+ * local paths, remote URLs, Buffers, and Blobs).
23
+ * - Supports both binary (`audio/*`) and JSON (`application/json` for base64) responses.
24
+ * - Model: `stable-audio-3` (fixed; 26 credits per successful generation).
25
+ * - Max duration: 380 seconds (default 190s). Sample rate: 44.1 kHz stereo.
26
+ * - Output formats: `mp3` (default) or `wav`.
27
+ * - English prompts only. No copyrighted content permitted.
28
+ * - Remote audio URL handling includes automatic download + proper MIME/filename.
29
+ * - Comprehensive error handling with specific messages for 400/403/422/429/500.
30
+ *
31
+ * Usage Pattern (recommended):
32
+ * ```js
33
+ * import { textToAudio, audioToAudio, inpaint } from './audio.js';
34
+ * const result = await textToAudio('cinematic orchestral score', { duration: 240 });
35
+ * ```
36
+ *
37
+ * @see {@link https://platform.stability.ai/docs} Official Stability AI docs
38
+ * @see {@link ./audio-3.md} Detailed API specification
39
+ */
40
+
41
+ /**
42
+ * @constant {string} BASE_URL
43
+ * @description Base URL for the Stability AI API v2beta audio endpoints.
44
+ */
45
+ const BASE_URL = 'https://api.stability.ai';
46
+
47
+ /**
48
+ * @constant {string} TMP_DIR
49
+ * @description Local temporary directory for storing generated and reference audio files.
50
+ * Located at `<cwd>/.cache/stability`.
51
+ */
52
+ const TMP_DIR = path.join(process.cwd(), '.cache', 'stability');
53
+
54
+ import { request as doRequest } from '@j-o-r/apiserver';
55
+ import fs from 'fs/promises';
56
+ import path from 'path';
57
+
58
+ /**
59
+ * Builds authenticated headers for Stability AI requests.
60
+ *
61
+ * @param {string} [acceptHeader='audio/*'] - Accept header value.
62
+ * Use `'audio/*'` for binary audio response or `'application/json'` for base64 JSON.
63
+ * @returns {Object} Headers object containing Authorization (Bearer) and Accept.
64
+ * @throws {Error} If `STABILITY_API_KEY` environment variable is not set.
65
+ *
66
+ * @example
67
+ * const headers = getHeaders('application/json');
68
+ */
69
+ const getHeaders = (acceptHeader = 'audio/*') => {
70
+ if (!process.env.STABILITY_API_KEY) {
71
+ throw new Error('Missing STABILITY_API_KEY! Please export STABILITY_API_KEY=your_key');
72
+ }
73
+ return {
74
+ 'Authorization': `Bearer ${process.env.STABILITY_API_KEY}`,
75
+ 'Accept': acceptHeader
76
+ };
77
+ };
78
+
79
+ /**
80
+ * Ensures the temporary directory for audio files exists.
81
+ * Creates `.cache/stability` recursively if needed.
82
+ *
83
+ * @async
84
+ * @returns {Promise<void>}
85
+ */
86
+ async function ensureTmpDir() {
87
+ await fs.mkdir(TMP_DIR, { recursive: true });
88
+ }
89
+
90
+ /**
91
+ * Saves audio data (Buffer, base64 string, URL, Blob, or ArrayBuffer) to a local file.
92
+ *
93
+ * @async
94
+ * @param {Buffer|string|Blob|ArrayBuffer} audioData - Audio content to save.
95
+ * - Buffer: raw bytes
96
+ * - string starting with 'http': remote URL (auto-downloaded)
97
+ * - base64 string: decoded
98
+ * - Blob/ArrayBuffer: converted
99
+ * @param {string} [filenamePrefix='stability-audio'] - Prefix for the generated filename.
100
+ * @param {string} [ext='mp3'] - File extension (`mp3` or `wav` recommended).
101
+ * @returns {Promise<string>} Absolute local file path of the saved audio.
102
+ * @throws {Error} For unsupported formats or download failures.
103
+ */
104
+ async function saveAudioToLocal(audioData, filenamePrefix = 'stability-audio', ext = 'mp3') {
105
+ await ensureTmpDir();
106
+
107
+ const filename = `${filenamePrefix}-${Date.now()}.${ext}`;
108
+ const localPath = path.join(TMP_DIR, filename);
109
+
110
+ let buffer;
111
+ if (typeof audioData === 'string') {
112
+ if (audioData.startsWith('http')) {
113
+ const response = await fetch(audioData);
114
+ if (!response.ok) {
115
+ throw new Error(`Failed to download audio: ${response.status} ${response.statusText}`);
116
+ }
117
+ buffer = Buffer.from(await response.arrayBuffer());
118
+ } else if (audioData.match(/^[A-Za-z0-9+/=]+$/)) {
119
+ // base64
120
+ buffer = Buffer.from(audioData, 'base64');
121
+ } else {
122
+ throw new Error('Unsupported audioData string format');
123
+ }
124
+ } else if (audioData instanceof Buffer) {
125
+ buffer = audioData;
126
+ } else if (audioData instanceof Blob || audioData instanceof ArrayBuffer) {
127
+ buffer = Buffer.from(await (audioData instanceof Blob ? audioData.arrayBuffer() : audioData));
128
+ } else {
129
+ throw new Error('Unsupported audioData type for saving');
130
+ }
131
+
132
+ await fs.writeFile(localPath, buffer);
133
+ return localPath;
134
+ }
135
+
136
+ /* ============================================================
137
+ INTERNAL: Submit generation request (returns generation id)
138
+ ============================================================ */
139
+
140
+ /**
141
+ * Submits a generation request to a Stable Audio endpoint.
142
+ * Internal helper used by textToAudio, audioToAudio, and inpaint.
143
+ *
144
+ * @async
145
+ * @param {string} endpoint - Endpoint slug: 'text-to-audio', 'audio-to-audio', or 'inpaint'.
146
+ * @param {FormData} formData - Populated multipart form data.
147
+ * @param {string} [acceptHeader='audio/*'] - Accept header for response type.
148
+ * @returns {Promise<string>} Generation ID (from 202 response) for polling.
149
+ * @throws {Error} On non-202 responses or missing ID.
150
+ */
151
+ async function submitGeneration(endpoint, formData, acceptHeader = 'audio/*') {
152
+ const url = `${BASE_URL}/v2beta/audio/stable-audio/${endpoint}`;
153
+ const headers = getHeaders(acceptHeader);
154
+
155
+ const res = await doRequest(url, 'POST', headers, formData);
156
+
157
+ if (res.status === 202) {
158
+ const id = res.response?.id;
159
+ if (!id) {
160
+ throw new Error(`No generation id in 202 response: ${JSON.stringify(res.response)}`);
161
+ }
162
+ return id;
163
+ }
164
+
165
+ if (res.status >= 400) {
166
+ throw new Error(`Stability API error ${res.status}: ${JSON.stringify(res.response)}`);
167
+ }
168
+
169
+ throw new Error(`Unexpected status ${res.status} from ${endpoint}`);
170
+ }
171
+
172
+ /* ============================================================
173
+ INTERNAL: Poll for generation result
174
+ ============================================================ */
175
+
176
+ /**
177
+ * Polls the results endpoint until the generation completes or times out.
178
+ * Internal helper with exponential backoff-style interval.
179
+ *
180
+ * @async
181
+ * @param {string} id - Generation ID returned by submitGeneration.
182
+ * @param {string} [acceptHeader='audio/*'] - Accept header.
183
+ * @param {number} [maxAttempts=72] - Maximum polling attempts (~6 minutes at 5s interval).
184
+ * @param {number} [intervalMs=5000] - Delay between polls in milliseconds.
185
+ * @returns {Promise<Object>} The successful 200 response object from doRequest.
186
+ * @throws {Error} On 404, other errors, or timeout.
187
+ */
188
+ async function pollForResult(id, acceptHeader = 'audio/*', maxAttempts = 72, intervalMs = 5000) {
189
+ const url = `${BASE_URL}/v2beta/audio/results/${id}`;
190
+ const headers = getHeaders(acceptHeader);
191
+
192
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
193
+ const res = await doRequest(url, 'GET', headers);
194
+
195
+ if (res.status === 200) {
196
+ return res;
197
+ }
198
+
199
+ if (res.status === 202) {
200
+ // still in progress
201
+ await new Promise(resolve => setTimeout(resolve, intervalMs));
202
+ continue;
203
+ }
204
+
205
+ if (res.status === 404) {
206
+ throw new Error(`Generation ${id} not found or expired`);
207
+ }
208
+
209
+ throw new Error(`Poll error ${res.status}: ${JSON.stringify(res.response)}`);
210
+ }
211
+
212
+ throw new Error(`Timeout after ${maxAttempts} attempts polling generation ${id}`);
213
+ }
214
+
215
+ /* ============================================================
216
+ INTERNAL: Process result into usable output
217
+ ============================================================ */
218
+
219
+ /**
220
+ * Processes a successful poll result into a standardized output object.
221
+ * Handles both binary (blob) and JSON (base64) response types.
222
+ *
223
+ * @async
224
+ * @param {Object} res - Response from doRequest or pollForResult.
225
+ * @param {Object} [options={}] - Processing options.
226
+ * @param {string} [options.filenamePrefix='stability-audio'] - Filename prefix.
227
+ * @param {string} [options.output_format='mp3'] - Desired extension.
228
+ * @returns {Promise<Object>} Standardized result:
229
+ * - local_path: string | null (saved file)
230
+ * - audio_base64?: string
231
+ * - finish_reason, seed, x_request_id, raw metadata
232
+ */
233
+ async function processResult(res, options = {}) {
234
+ const prefix = options.filenamePrefix || 'stability-audio';
235
+ const ext = options.output_format || 'mp3';
236
+
237
+ if (res.responseType === 'blob') {
238
+ const buffer = Buffer.from(await res.response.arrayBuffer());
239
+ const localPath = await saveAudioToLocal(buffer, prefix, ext);
240
+
241
+ return {
242
+ local_path: localPath,
243
+ finish_reason: res.headers.get('finish-reason') || 'SUCCESS',
244
+ seed: res.headers.get('seed'),
245
+ x_request_id: res.headers.get('x-request-id'),
246
+ raw: { headers: Object.fromEntries(res.headers.entries()) }
247
+ };
248
+ }
249
+
250
+ if (res.responseType === 'js' || typeof res.response === 'object') {
251
+ const data = res.response;
252
+ let localPath = null;
253
+
254
+ if (data.audio && typeof data.audio === 'string') {
255
+ localPath = await saveAudioToLocal(data.audio, prefix, ext);
256
+ }
257
+
258
+ return {
259
+ local_path: localPath,
260
+ audio_base64: data.audio || null,
261
+ seed: data.seed,
262
+ finish_reason: data.finish_reason || 'SUCCESS',
263
+ raw: data
264
+ };
265
+ }
266
+
267
+ // fallback
268
+ return {
269
+ raw: res.response,
270
+ status: res.status
271
+ };
272
+ }
273
+
274
+ /* ============================================================
275
+ HELPER: Download remote audio to local temp file
276
+ ============================================================ */
277
+
278
+ /**
279
+ * Downloads a remote audio URL to a temporary local file.
280
+ * Used internally by appendAudioToFormData for robust remote support.
281
+ *
282
+ * @async
283
+ * @param {string} url - HTTP/HTTPS URL to an audio file (mp3/wav recommended).
284
+ * @returns {Promise<string>} Absolute path to the downloaded temp file.
285
+ * @throws {Error} If download fails or URL is invalid.
286
+ */
287
+ async function downloadRemoteAudioToTemp(url) {
288
+ await ensureTmpDir();
289
+
290
+ const response = await fetch(url);
291
+ if (!response.ok) {
292
+ throw new Error(`Failed to download reference audio: ${response.status} ${response.statusText}`);
293
+ }
294
+
295
+ const buffer = Buffer.from(await response.arrayBuffer());
296
+
297
+ // Create a proper filename based on URL
298
+ let filename = 'reference-audio.mp3';
299
+ try {
300
+ const urlObj = new URL(url);
301
+ const base = path.basename(urlObj.pathname);
302
+ if (base && base.includes('.')) {
303
+ filename = base;
304
+ }
305
+ } catch (_) {}
306
+
307
+ const ext = path.extname(filename).toLowerCase() || '.mp3';
308
+ const tempFilename = `temp-ref-${Date.now()}${ext}`;
309
+ const tempPath = path.join(TMP_DIR, tempFilename);
310
+
311
+ await fs.writeFile(tempPath, buffer);
312
+ return tempPath;
313
+ }
314
+
315
+ /* ============================================================
316
+ HELPER: Append audio file/URL to FormData (robust)
317
+ ============================================================ */
318
+
319
+ /**
320
+ * Appends an audio input (path, URL, Buffer, or Blob) to a FormData instance
321
+ * with correct filename and MIME type. Handles remote URLs by downloading first.
322
+ *
323
+ * @async
324
+ * @param {FormData} formData - Target FormData object.
325
+ * @param {string|Buffer|Blob} audioInput - Audio source:
326
+ * - string: local file path or http(s):// URL
327
+ * - Buffer: raw audio bytes
328
+ * - Blob: browser-style blob with optional .name and .type
329
+ * @throws {Error} If audioInput is missing or of unsupported type.
330
+ */
331
+ async function appendAudioToFormData(formData, audioInput) {
332
+ if (!audioInput) {
333
+ throw new Error('Audio input is required (file path, URL, Buffer, or Blob)');
334
+ }
335
+
336
+ let filePath;
337
+ let filename;
338
+ let mimeType = 'audio/mpeg';
339
+
340
+ if (typeof audioInput === 'string') {
341
+ if (audioInput.startsWith('http://') || audioInput.startsWith('https://')) {
342
+ // Remote URL → download to temp file first (most reliable)
343
+ filePath = await downloadRemoteAudioToTemp(audioInput);
344
+ filename = path.basename(filePath);
345
+ const ext = path.extname(filePath).toLowerCase();
346
+ mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
347
+ } else {
348
+ // Local file path
349
+ filePath = audioInput;
350
+ filename = path.basename(audioInput);
351
+ const ext = path.extname(audioInput).toLowerCase();
352
+ mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
353
+ }
354
+ } else if (audioInput instanceof Buffer) {
355
+ // Write buffer to temp file
356
+ await ensureTmpDir();
357
+ const tempFilename = `temp-audio-${Date.now()}.mp3`;
358
+ filePath = path.join(TMP_DIR, tempFilename);
359
+ await fs.writeFile(filePath, audioInput);
360
+ filename = tempFilename;
361
+ } else if (audioInput instanceof Blob) {
362
+ // Convert Blob to temp file
363
+ await ensureTmpDir();
364
+ const buffer = Buffer.from(await audioInput.arrayBuffer());
365
+ const tempFilename = audioInput.name || `temp-audio-${Date.now()}.mp3`;
366
+ filePath = path.join(TMP_DIR, tempFilename);
367
+ await fs.writeFile(filePath, buffer);
368
+ filename = tempFilename;
369
+ mimeType = audioInput.type || 'audio/mpeg';
370
+ } else {
371
+ throw new Error('audioInput must be a file path (string), URL (string), Buffer, or Blob');
372
+ }
373
+
374
+ // Read the file and append with proper filename + MIME
375
+ const buffer = await fs.readFile(filePath);
376
+ const blob = new Blob([buffer], { type: mimeType });
377
+ formData.append('audio', blob, filename);
378
+ }
379
+
380
+ /* ============================================================
381
+ WORKFLOW 1: TEXT-TO-AUDIO
382
+ ============================================================ */
383
+
384
+ /**
385
+ * Generates high-quality audio from a text prompt using Stable Audio 3.
386
+ *
387
+ * This is the primary text-to-audio endpoint. It submits a prompt and optional
388
+ * parameters, receives a generation ID (202), polls until ready, and returns
389
+ * the generated audio saved locally plus metadata.
390
+ *
391
+ * **Constraints** (from Stable Audio 3 spec):
392
+ * - Prompt: English only, max 10,000 characters, descriptive (instruments, mood, genre, style).
393
+ * - Duration: 1–380 seconds (default 190).
394
+ * - Steps: 4–8 (default 8).
395
+ * - CFG Scale: 1–25 (default 1).
396
+ * - Seed: 0 (random) or 0–4,294,967,294.
397
+ * - Output: mp3 (default) or wav at 44.1 kHz stereo.
398
+ * - Cost: Flat 26 credits per successful generation.
399
+ *
400
+ * @async
401
+ * @function textToAudio
402
+ * @param {string} prompt - Required descriptive text prompt. Must be non-empty English string.
403
+ * @param {Object} [options={}] - Optional generation parameters.
404
+ * @param {string} [options.model='stable-audio-3'] - Model identifier. Must be exactly `'stable-audio-3'`.
405
+ * @param {number} [options.duration=190] - Target duration in seconds (1 ≤ duration ≤ 380).
406
+ * @param {number} [options.seed=0] - Random seed for reproducibility (0 = random).
407
+ * @param {number} [options.steps=8] - Number of sampling steps (4–8).
408
+ * @param {number} [options.cfg_scale=1] - Prompt adherence strength (1–25).
409
+ * @param {string} [options.output_format='mp3'] - `'mp3'` or `'wav'`.
410
+ * @param {string} [options.accept='audio/*'] - Response format: `'audio/*'` (binary) or `'application/json'`.
411
+ * @param {string} [options.filenamePrefix='stability-text-to-audio'] - Prefix for saved file.
412
+ * @returns {Promise<Object>} Result object:
413
+ * ```js
414
+ * {
415
+ * local_path: '/path/to/.cache/stability/stability-text-to-audio-1234567890.mp3',
416
+ * finish_reason: 'SUCCESS',
417
+ * seed: 123456789,
418
+ * x_request_id: 'req_...',
419
+ * raw: { headers: {...} } // or full JSON if accept=application/json
420
+ * }
421
+ * ```
422
+ * @throws {Error} - 'Missing STABILITY_API_KEY', invalid prompt, unsupported model,
423
+ * API errors (400/403/422/429/500), polling timeout, or download failures.
424
+ *
425
+ * @example
426
+ * // Basic usage
427
+ * const result = await textToAudio('upbeat electronic synthwave with driving bass');
428
+ * console.log('Saved to:', result.local_path);
429
+ *
430
+ * @example
431
+ * // Advanced with options
432
+ * const result = await textToAudio(
433
+ * 'cinematic orchestral music, epic brass, strings, choir, 120 BPM',
434
+ * {
435
+ * duration: 240,
436
+ * seed: 42,
437
+ * steps: 8,
438
+ * cfg_scale: 7,
439
+ * output_format: 'wav',
440
+ * accept: 'application/json'
441
+ * }
442
+ * );
443
+ * if (result.audio_base64) {
444
+ * // handle base64
445
+ * }
446
+ */
447
+ async function textToAudio(prompt, options = {}) {
448
+ const model = options.model || 'stable-audio-3';
449
+
450
+ if (model !== 'stable-audio-3') {
451
+ throw new Error(
452
+ `textToAudio() expects model 'stable-audio-3'. Got: ${model}`
453
+ );
454
+ }
455
+
456
+ if (!prompt || typeof prompt !== 'string' || prompt.trim() === '') {
457
+ throw new Error('textToAudio() requires a non-empty prompt string');
458
+ }
459
+
460
+ const formData = new FormData();
461
+ formData.append('prompt', prompt);
462
+ formData.append('model', model);
463
+
464
+ if (options.duration != null) formData.append('duration', String(options.duration));
465
+ if (options.seed != null) formData.append('seed', String(options.seed));
466
+ if (options.steps != null) formData.append('steps', String(options.steps));
467
+ if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
468
+ if (options.output_format) formData.append('output_format', options.output_format);
469
+
470
+ const accept = options.accept || 'audio/*';
471
+ const id = await submitGeneration('text-to-audio', formData, accept);
472
+ const resultRes = await pollForResult(id, accept);
473
+
474
+ return processResult(resultRes, { ...options, filenamePrefix: 'stability-text-to-audio' });
475
+ }
476
+
477
+ /* ============================================================
478
+ WORKFLOW 2: AUDIO-TO-AUDIO
479
+ ============================================================ */
480
+
481
+ /**
482
+ * Transforms an existing audio sample using a text prompt (audio-to-audio / style transfer).
483
+ *
484
+ * Uploads a reference audio file (or URL) and applies the prompt to generate a new
485
+ * composition that incorporates elements of the input while following the text description.
486
+ *
487
+ * **Additional Parameter**:
488
+ * - `strength`: Denoising strength (0.0 = identical to input, 1.0 = no influence from input).
489
+ *
490
+ * All other constraints and behavior are identical to `textToAudio`.
491
+ *
492
+ * @async
493
+ * @function audioToAudio
494
+ * @param {string} prompt - Descriptive text prompt (English, max 10k chars).
495
+ * @param {string|Buffer|Blob} audioInput - Reference audio:
496
+ * - Local file path (string)
497
+ * - Remote HTTP/HTTPS URL (string) – auto-downloaded with proper MIME
498
+ * - Buffer (raw bytes)
499
+ * - Blob (with optional name/type)
500
+ * @param {Object} [options={}] - Generation options (see textToAudio for common params).
501
+ * @param {number} [options.strength=1] - Denoising strength (0–1).
502
+ * @param {string} [options.filenamePrefix='stability-audio-to-audio']
503
+ * @returns {Promise<Object>} Same structure as textToAudio result.
504
+ * @throws {Error} Same as textToAudio plus audio input validation errors.
505
+ *
506
+ * @example
507
+ * const result = await audioToAudio(
508
+ * 'transform into orchestral version with strings and choir',
509
+ * './reference-track.mp3',
510
+ * { strength: 0.75, duration: 180 }
511
+ * );
512
+ */
513
+ async function audioToAudio(prompt, audioInput, options = {}) {
514
+ const model = options.model || 'stable-audio-3';
515
+
516
+ if (model !== 'stable-audio-3') {
517
+ throw new Error(
518
+ `audioToAudio() expects model 'stable-audio-3'. Got: ${model}`
519
+ );
520
+ }
521
+
522
+ if (!prompt || typeof prompt !== 'string' || prompt.trim() === '') {
523
+ throw new Error('audioToAudio() requires a non-empty prompt string');
524
+ }
525
+
526
+ const formData = new FormData();
527
+ formData.append('prompt', prompt);
528
+ formData.append('model', model);
529
+
530
+ if (options.duration != null) formData.append('duration', String(options.duration));
531
+ if (options.seed != null) formData.append('seed', String(options.seed));
532
+ if (options.steps != null) formData.append('steps', String(options.steps));
533
+ if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
534
+ if (options.output_format) formData.append('output_format', options.output_format);
535
+ if (options.strength != null) formData.append('strength', String(options.strength));
536
+
537
+ // Handle audio input (URL, file path, Buffer, or Blob)
538
+ await appendAudioToFormData(formData, audioInput);
539
+
540
+ const accept = options.accept || 'audio/*';
541
+ const id = await submitGeneration('audio-to-audio', formData, accept);
542
+ const resultRes = await pollForResult(id, accept);
543
+
544
+ return processResult(resultRes, { ...options, filenamePrefix: 'stability-audio-to-audio' });
545
+ }
546
+
547
+ /* ============================================================
548
+ WORKFLOW 3: INPAINT
549
+ ============================================================ */
550
+
551
+ /**
552
+ * Performs audio inpainting: replaces a specified time segment of an audio file
553
+ * with new content generated from a text prompt.
554
+ *
555
+ * Uses `mask_start` and `mask_end` to define the region to inpaint (in seconds).
556
+ * The model fills the masked section while preserving the rest of the audio.
557
+ *
558
+ * Default mask: 30s → 380s (inpaint most of a long track).
559
+ *
560
+ * @async
561
+ * @function inpaint
562
+ * @param {string} prompt - Text prompt describing the desired replacement content.
563
+ * @param {string|Buffer|Blob} audioInput - Reference audio (same as audioToAudio).
564
+ * @param {Object} [options={}] - Generation options.
565
+ * @param {number} [options.mask_start=30] - Start time (seconds) of the inpaint mask (0–380).
566
+ * @param {number} [options.mask_end=380] - End time (seconds) of the inpaint mask (0–380).
567
+ * @param {string} [options.filenamePrefix='stability-inpaint']
568
+ * @returns {Promise<Object>} Same result structure as other generation methods.
569
+ * @throws {Error} Validation errors for mask ranges, audio input, etc.
570
+ *
571
+ * @example
572
+ * // Inpaint the middle section
573
+ * const result = await inpaint(
574
+ * 'add a soaring guitar solo in this section',
575
+ * 'full-track.mp3',
576
+ * { mask_start: 60, mask_end: 120, duration: 180 }
577
+ * );
578
+ */
579
+ async function inpaint(prompt, audioInput, options = {}) {
580
+ const model = options.model || 'stable-audio-3';
581
+
582
+ if (model !== 'stable-audio-3') {
583
+ throw new Error(
584
+ `inpaint() expects model 'stable-audio-3'. Got: ${model}`
585
+ );
586
+ }
587
+
588
+ if (!prompt || typeof prompt !== 'string' || prompt.trim() === '') {
589
+ throw new Error('inpaint() requires a non-empty prompt string');
590
+ }
591
+
592
+ const formData = new FormData();
593
+ formData.append('prompt', prompt);
594
+ formData.append('model', model);
595
+
596
+ if (options.duration != null) formData.append('duration', String(options.duration));
597
+ if (options.seed != null) formData.append('seed', String(options.seed));
598
+ if (options.steps != null) formData.append('steps', String(options.steps));
599
+ if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
600
+ if (options.output_format) formData.append('output_format', options.output_format);
601
+
602
+ // Inpaint specific
603
+ formData.append('mask_start', String(options.mask_start ?? 30));
604
+ formData.append('mask_end', String(options.mask_end ?? 380));
605
+
606
+ // Handle audio input
607
+ await appendAudioToFormData(formData, audioInput);
608
+
609
+ const accept = options.accept || 'audio/*';
610
+ const id = await submitGeneration('inpaint', formData, accept);
611
+ const resultRes = await pollForResult(id, accept);
612
+
613
+ return processResult(resultRes, { ...options, filenamePrefix: 'stability-inpaint' });
614
+ }
615
+
616
+ /* ============================================================
617
+ WORKFLOW 4: FETCH RESULT (manual polling)
618
+ ============================================================ */
619
+
620
+ /**
621
+ * Manually fetches or checks the status of a generation using its ID.
622
+ * Useful for custom polling logic or resuming after a previous submission.
623
+ *
624
+ * @async
625
+ * @function fetchResult
626
+ * @param {string} id - Generation ID (from a previous 202 response).
627
+ * @param {string} [acceptHeader='audio/*'] - `'audio/*'` or `'application/json'`.
628
+ * @returns {Promise<Object>} Either:
629
+ * - Completed result (same as processResult)
630
+ * - `{ status: 'in-progress', id, raw }` if still 202
631
+ * @throws {Error} If ID missing, 404 (expired), or other API error.
632
+ *
633
+ * @example
634
+ * // Manual polling example
635
+ * const id = await submit...; // or from previous call
636
+ * let result;
637
+ * while (true) {
638
+ * result = await fetchResult(id);
639
+ * if (result.status !== 'in-progress') break;
640
+ * await new Promise(r => setTimeout(r, 5000));
641
+ * }
642
+ */
643
+ async function fetchResult(id, acceptHeader = 'audio/*') {
644
+ if (!id) {
645
+ throw new Error('fetchResult() requires a generation id');
646
+ }
647
+
648
+ const headers = getHeaders(acceptHeader);
649
+
650
+ const url = `${BASE_URL}/v2beta/audio/results/${id}`;
651
+ const res = await doRequest(url, 'GET', headers);
652
+
653
+ if (res.status === 200) {
654
+ return processResult(res);
655
+ }
656
+
657
+ if (res.status === 202) {
658
+ return {
659
+ status: 'in-progress',
660
+ id,
661
+ raw: res.response
662
+ };
663
+ }
664
+
665
+ if (res.status === 404) {
666
+ throw new Error(`Generation ${id} not found or expired`);
667
+ }
668
+
669
+ throw new Error(`fetchResult error ${res.status}: ${JSON.stringify(res.response)}`);
670
+ }
671
+
672
+ export {
673
+ getHeaders,
674
+ saveAudioToLocal,
675
+ textToAudio,
676
+ audioToAudio,
677
+ inpaint,
678
+ fetchResult
679
+ };