@j-o-r/hello-dave 0.1.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -25
- package/README.md +81 -221
- package/TODO.md +173 -35
- package/agents/agent_creator.js +105 -0
- package/agents/agent_creator.prompt.md +371 -0
- package/agents/ask_agent.js +64 -127
- package/agents/claude_agent.js +68 -0
- package/agents/code_agent.js +55 -135
- package/agents/code_agent.prompt.md +50 -0
- package/agents/echo_agent.js +76 -0
- package/agents/financial_expert.js +75 -0
- package/agents/gpt_agent.js +52 -103
- package/agents/gpt_code.js +81 -0
- package/agents/grok_agent.js +58 -114
- package/agents/minimax_agent.js +92 -0
- package/agents/mureka_agent.js +77 -0
- package/agents/planner_agent.js +172 -0
- package/agents/stability_agent.js +87 -0
- package/agents/test_agent.js +75 -157
- package/agents/weather_agent.js +73 -0
- package/agents/workflow_agent.js +189 -0
- package/bin/dave.js +436 -184
- package/docs/bin-dave.md +85 -35
- package/docs/cdn-ssh.md +100 -0
- package/docs/creating-agents.md +301 -0
- package/docs/creating-toolsets.md +336 -0
- package/docs/docs-organization.md +48 -0
- package/docs/project-overview.md +86 -51
- package/lib/API/elevenlabs.io/music.compose.md +441 -0
- package/lib/API/elevenlabs.io/music.create-composition-plan.md +370 -0
- package/lib/API/elevenlabs.io/music.stream.md +425 -0
- package/lib/API/lalal.ai/lalal.js +445 -0
- package/lib/API/lalal.ai/openapi.json +2614 -0
- package/lib/API/minimax/ImageToolset.js +82 -37
- package/lib/API/minimax/MusicToolset.js +125 -79
- package/lib/API/minimax/VideoToolset.js +170 -167
- package/lib/API/minimax/image.js +5 -1
- package/lib/API/minimax/music.js +210 -23
- package/lib/API/minimax/video.js +242 -53
- package/lib/API/mureka/MusicToolset.js +646 -0
- package/lib/API/mureka/README.md +41 -0
- package/lib/API/mureka/index.js +7 -0
- package/lib/API/mureka/music.js +658 -0
- package/lib/API/openai.com/index.js +7 -0
- package/lib/API/openai.com/{reponses/text.js → responses.js} +64 -18
- package/lib/API/openai.com/video.create.character.md +40 -0
- package/lib/API/openai.com/video.create.md +219 -0
- package/lib/API/openai.com/video.delete.md +44 -0
- package/lib/API/openai.com/video.download.md +31 -0
- package/lib/API/openai.com/video.edit.md +155 -0
- package/lib/API/openai.com/video.extend.md +166 -0
- package/lib/API/openai.com/video.fetch.character.md +43 -0
- package/lib/API/openai.com/video.js +784 -0
- package/lib/API/openai.com/video.list.md +201 -0
- package/lib/API/openai.com/video.remix.md +175 -0
- package/lib/API/openai.com/video.retrieve.md +139 -0
- package/lib/API/openai.com/videoToolset.js +616 -0
- package/lib/API/stability.ai/ImageToolset.js +131 -40
- package/lib/API/stability.ai/MusicToolset.js +79 -47
- package/lib/API/stability.ai/audio.js +63 -131
- package/lib/API/x.ai/chat.responses.md +1040 -0
- package/lib/API/x.ai/image.js +229 -59
- package/lib/API/x.ai/imageToolset.js +376 -0
- package/lib/API/x.ai/index.js +1 -1
- package/lib/API/x.ai/responses.js +9 -18
- package/lib/Agent.js +271 -0
- package/lib/Agent.js.old +284 -0
- package/lib/AgentLauncher.js +562 -0
- package/lib/Cli.js +87 -13
- package/lib/Prompt.js +23 -1
- package/lib/Session.js +5 -4
- package/lib/ToolSet.js +102 -6
- package/lib/agentLoader.js +369 -0
- package/lib/cdn.js +67 -231
- package/lib/{CdnToolset.js → cdnToolset.js} +47 -64
- package/lib/defaultToolsets.js +43 -0
- package/lib/fafs.js +1 -1
- package/lib/genericToolset.js +442 -119
- package/lib/handOffToolset.js +179 -0
- package/lib/index.js +34 -27
- package/lib/toolsetLoader.js +248 -0
- package/package.json +11 -5
- package/types/API/lalal.ai/lalal.d.ts +116 -0
- package/types/API/minimax/image.d.ts +2 -1
- package/types/API/minimax/music.d.ts +189 -26
- package/types/API/minimax/video.d.ts +100 -31
- package/types/API/mureka/index.d.ts +7 -0
- package/types/API/mureka/music.d.ts +472 -0
- package/types/API/openai.com/index.d.ts +7 -0
- package/types/API/openai.com/{reponses/text.d.ts → responses.d.ts} +11 -11
- package/types/API/openai.com/video.d.ts +409 -0
- package/types/API/openai.com/videoToolset.d.ts +24 -0
- package/types/API/stability.ai/audio.d.ts +14 -103
- package/types/API/stability.ai/image.d.ts +2 -2
- package/types/API/x.ai/image.d.ts +138 -26
- package/types/API/x.ai/imageToolset.d.ts +3 -0
- package/types/API/x.ai/index.d.ts +1 -1
- package/types/API/x.ai/responses.d.ts +4 -4
- package/types/Agent.d.ts +123 -0
- package/types/AgentLauncher.d.ts +222 -0
- package/types/Cli.d.ts +28 -8
- package/types/Prompt.d.ts +23 -5
- package/types/Session.d.ts +1 -1
- package/types/ToolSet.d.ts +10 -0
- package/types/agentLoader.d.ts +78 -0
- package/types/cdn.d.ts +15 -90
- package/types/defaultToolsets.d.ts +9 -0
- package/types/fafs.d.ts +1 -1
- package/types/genericToolset.d.ts +1 -1
- package/types/handOffToolset.d.ts +28 -0
- package/types/index.d.ts +19 -17
- package/types/toolsetLoader.d.ts +114 -0
- package/utils/format_log.js +101 -23
- package/utils/launch_agent.js +18 -0
- package/utils/list_sessions.sh +13 -5
- package/utils/search_sessions.sh +65 -29
- package/utils/toolsets.js +33 -0
- package/README.md.bak.1779452127 +0 -240
- package/agents/codeserver.sh +0 -47
- package/agents/daisy_agent.js +0 -173
- package/agents/docs_agent.js +0 -148
- package/agents/memory_agent.js +0 -263
- package/agents/minimax.js +0 -173
- package/agents/npm_agent.js +0 -202
- package/agents/prompt_agent.js +0 -133
- package/agents/readme_agent.js +0 -148
- package/agents/spawn_agent.js +0 -160
- package/agents/stability.js +0 -173
- package/agents/todo_agent.js +0 -175
- package/bin/codeDave +0 -58
- package/docs/agent-dave-websocket-protocol.md +0 -180
- package/docs/agent-manager.md +0 -244
- package/docs/codeserver-pattern.md +0 -191
- package/docs/generic-toolset.md +0 -326
- package/docs/howtos/agent-networking.md +0 -253
- package/docs/howtos/spawn-agents.md.bak +0 -200
- package/docs/howtos/spawn-agents.md.bak_new +0 -200
- package/docs/multi-agent-clusters.md +0 -265
- package/docs/music-toolsets.md +0 -137
- package/docs/path-resolution-best-practices.md +0 -104
- package/docs/plans/minimax-music-generation.md +0 -80
- package/docs/plans/unified-agent-architecture.md +0 -146
- package/docs/plans/websocket-streaming-plan.md.bak +0 -317
- package/docs/prompt/spawn_agent.md +0 -175
- package/docs/prompt/spawn_agent.md.bak +0 -201
- package/docs/prompt/task_clarification_and_documentation.md +0 -35
- package/docs/prompt-class.md +0 -141
- package/docs/todo-archive-infra-2026-04-21.md +0 -15
- package/docs/todo-archive-v0.0.8.md +0 -1
- package/docs/todo-archive-v0.1.0.md +0 -32
- package/docs/todo-archive.md +0 -44
- package/docs/tools-syntax-validation.md +0 -121
- package/docs/toolset.md +0 -164
- package/docs/xai-responses.md +0 -111
- package/docs/xai_collections.md +0 -106
- package/lib/API/x.ai/ImageToolset.js +0 -165
- package/lib/API/x.ai/text.js +0 -415
- package/lib/AgentClient.js +0 -248
- package/lib/AgentManager.js +0 -245
- package/lib/AgentServer.js +0 -404
- package/lib/wsCli.js +0 -287
- package/lib/wsIO.js +0 -90
- package/types/API/x.ai/text.d.ts +0 -286
- package/types/AgentClient.d.ts +0 -109
- package/types/AgentManager.d.ts +0 -100
- package/types/AgentServer.d.ts +0 -89
- package/types/wsCli.d.ts +0 -17
- package/types/wsIO.d.ts +0 -30
- package/utils/test.sh +0 -46
- /package/docs/{suggestions.md → _notes/token-counts.md} +0 -0
- /package/lib/API/openai.com/{reponses/MESSAGES.md → MESSAGES.md} +0 -0
- /package/types/API/{x.ai/ImageToolset.d.ts → mureka/MusicToolset.d.ts} +0 -0
- /package/types/{CdnToolset.d.ts → cdnToolset.d.ts} +0 -0
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
* @file lib/API/stability.ai/audio.js
|
|
3
3
|
* @module stability.ai/audio
|
|
4
4
|
* @description Pure HTTP wrapper for the Stability AI Stable Audio 3 API.
|
|
5
|
+
* **MP3 ONLY MODE** — Input and output are restricted to MP3 format only.
|
|
6
|
+
* This avoids large file upload issues and backend compatibility problems.
|
|
7
|
+
*
|
|
5
8
|
* Fully aligned with the official specifications extracted from:
|
|
6
9
|
* - lib/API/stability.ai/audio-3.md
|
|
7
10
|
* - lib/API/stability.ai/openapi.json
|
|
@@ -17,16 +20,18 @@
|
|
|
17
20
|
* submission + internal polling automatically and return a local audio file
|
|
18
21
|
* path plus rich metadata.
|
|
19
22
|
*
|
|
23
|
+
* **MP3-Only Policy**:
|
|
24
|
+
* - Input audio must be MP3 (.mp3).
|
|
25
|
+
* - Output is always MP3 (output_format is forced to 'mp3').
|
|
26
|
+
* - WAV or other formats are rejected with a clear error message.
|
|
27
|
+
*
|
|
20
28
|
* Key Technical Details:
|
|
21
|
-
* - Uses `multipart/form-data` for all audio uploads
|
|
22
|
-
* local paths, remote URLs, Buffers, and Blobs).
|
|
23
|
-
* - Supports both binary (`audio/*`) and JSON (`application/json` for base64) responses.
|
|
29
|
+
* - Uses `multipart/form-data` for all audio uploads.
|
|
24
30
|
* - Model: `stable-audio-3` (fixed; 26 credits per successful generation).
|
|
25
31
|
* - Max duration: 380 seconds (default 190s). Sample rate: 44.1 kHz stereo.
|
|
26
|
-
* - Output
|
|
32
|
+
* - Output format: **mp3 only**.
|
|
27
33
|
* - English prompts only. No copyrighted content permitted.
|
|
28
34
|
* - Remote audio URL handling includes automatic download + proper MIME/filename.
|
|
29
|
-
* - Comprehensive error handling with specific messages for 400/403/422/429/500.
|
|
30
35
|
*
|
|
31
36
|
* Usage Pattern (recommended):
|
|
32
37
|
* ```js
|
|
@@ -97,13 +102,15 @@ async function ensureTmpDir() {
|
|
|
97
102
|
* - base64 string: decoded
|
|
98
103
|
* - Blob/ArrayBuffer: converted
|
|
99
104
|
* @param {string} [filenamePrefix='stability-audio'] - Prefix for the generated filename.
|
|
100
|
-
* @param {string} [ext='mp3'] - File extension (
|
|
105
|
+
* @param {string} [ext='mp3'] - File extension (forced to 'mp3' in MP3-only mode).
|
|
101
106
|
* @returns {Promise<string>} Absolute local file path of the saved audio.
|
|
102
107
|
* @throws {Error} For unsupported formats or download failures.
|
|
103
108
|
*/
|
|
104
109
|
async function saveAudioToLocal(audioData, filenamePrefix = 'stability-audio', ext = 'mp3') {
|
|
105
110
|
await ensureTmpDir();
|
|
106
111
|
|
|
112
|
+
// Force mp3 extension
|
|
113
|
+
ext = 'mp3';
|
|
107
114
|
const filename = `${filenamePrefix}-${Date.now()}.${ext}`;
|
|
108
115
|
const localPath = path.join(TMP_DIR, filename);
|
|
109
116
|
|
|
@@ -224,7 +231,7 @@ async function pollForResult(id, acceptHeader = 'audio/*', maxAttempts = 72, int
|
|
|
224
231
|
* @param {Object} res - Response from doRequest or pollForResult.
|
|
225
232
|
* @param {Object} [options={}] - Processing options.
|
|
226
233
|
* @param {string} [options.filenamePrefix='stability-audio'] - Filename prefix.
|
|
227
|
-
* @param {string} [options.output_format='mp3'] - Desired extension.
|
|
234
|
+
* @param {string} [options.output_format='mp3'] - Desired extension (forced to mp3).
|
|
228
235
|
* @returns {Promise<Object>} Standardized result:
|
|
229
236
|
* - local_path: string | null (saved file)
|
|
230
237
|
* - audio_base64?: string
|
|
@@ -232,7 +239,7 @@ async function pollForResult(id, acceptHeader = 'audio/*', maxAttempts = 72, int
|
|
|
232
239
|
*/
|
|
233
240
|
async function processResult(res, options = {}) {
|
|
234
241
|
const prefix = options.filenamePrefix || 'stability-audio';
|
|
235
|
-
const ext =
|
|
242
|
+
const ext = 'mp3'; // Force MP3
|
|
236
243
|
|
|
237
244
|
if (res.responseType === 'blob') {
|
|
238
245
|
const buffer = Buffer.from(await res.response.arrayBuffer());
|
|
@@ -278,11 +285,12 @@ async function processResult(res, options = {}) {
|
|
|
278
285
|
/**
|
|
279
286
|
* Downloads a remote audio URL to a temporary local file.
|
|
280
287
|
* Used internally by appendAudioToFormData for robust remote support.
|
|
288
|
+
* **MP3 only** — non-MP3 URLs are rejected.
|
|
281
289
|
*
|
|
282
290
|
* @async
|
|
283
|
-
* @param {string} url - HTTP/HTTPS URL to an audio file (mp3
|
|
291
|
+
* @param {string} url - HTTP/HTTPS URL to an audio file (**must be .mp3**).
|
|
284
292
|
* @returns {Promise<string>} Absolute path to the downloaded temp file.
|
|
285
|
-
* @throws {Error} If download fails
|
|
293
|
+
* @throws {Error} If download fails, URL is invalid, or not MP3.
|
|
286
294
|
*/
|
|
287
295
|
async function downloadRemoteAudioToTemp(url) {
|
|
288
296
|
await ensureTmpDir();
|
|
@@ -304,8 +312,12 @@ async function downloadRemoteAudioToTemp(url) {
|
|
|
304
312
|
}
|
|
305
313
|
} catch (_) {}
|
|
306
314
|
|
|
307
|
-
const ext = path.extname(filename).toLowerCase()
|
|
308
|
-
|
|
315
|
+
const ext = path.extname(filename).toLowerCase();
|
|
316
|
+
if (ext !== '.mp3') {
|
|
317
|
+
throw new Error(`Only MP3 input is supported. Remote file has extension "${ext}". Please convert to MP3.`);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const tempFilename = `temp-ref-${Date.now()}.mp3`;
|
|
309
321
|
const tempPath = path.join(TMP_DIR, tempFilename);
|
|
310
322
|
|
|
311
323
|
await fs.writeFile(tempPath, buffer);
|
|
@@ -313,20 +325,20 @@ async function downloadRemoteAudioToTemp(url) {
|
|
|
313
325
|
}
|
|
314
326
|
|
|
315
327
|
/* ============================================================
|
|
316
|
-
HELPER: Append audio file/URL to FormData (robust)
|
|
328
|
+
HELPER: Append audio file/URL to FormData (robust) — MP3 ONLY
|
|
317
329
|
============================================================ */
|
|
318
330
|
|
|
319
331
|
/**
|
|
320
|
-
* Appends an audio input (path, URL, Buffer, or Blob) to a FormData instance
|
|
321
|
-
*
|
|
332
|
+
* Appends an audio input (path, URL, Buffer, or Blob) to a FormData instance.
|
|
333
|
+
* **Strictly MP3 only** — rejects WAV or other formats.
|
|
322
334
|
*
|
|
323
335
|
* @async
|
|
324
336
|
* @param {FormData} formData - Target FormData object.
|
|
325
337
|
* @param {string|Buffer|Blob} audioInput - Audio source:
|
|
326
|
-
* - string: local file path or http(s):// URL
|
|
327
|
-
* - Buffer: raw audio bytes
|
|
328
|
-
* - Blob: browser-style blob
|
|
329
|
-
* @throws {Error} If audioInput is missing or of unsupported type.
|
|
338
|
+
* - string: local file path or http(s):// URL (**must end with .mp3**)
|
|
339
|
+
* - Buffer: raw audio bytes (assumed MP3)
|
|
340
|
+
* - Blob: browser-style blob (must be audio/mpeg)
|
|
341
|
+
* @throws {Error} If audioInput is missing, not MP3, or of unsupported type.
|
|
330
342
|
*/
|
|
331
343
|
async function appendAudioToFormData(formData, audioInput) {
|
|
332
344
|
if (!audioInput) {
|
|
@@ -335,24 +347,24 @@ async function appendAudioToFormData(formData, audioInput) {
|
|
|
335
347
|
|
|
336
348
|
let filePath;
|
|
337
349
|
let filename;
|
|
338
|
-
|
|
350
|
+
const mimeType = 'audio/mpeg';
|
|
339
351
|
|
|
340
352
|
if (typeof audioInput === 'string') {
|
|
341
353
|
if (audioInput.startsWith('http://') || audioInput.startsWith('https://')) {
|
|
342
354
|
// Remote URL → download to temp file first (most reliable)
|
|
343
355
|
filePath = await downloadRemoteAudioToTemp(audioInput);
|
|
344
356
|
filename = path.basename(filePath);
|
|
345
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
346
|
-
mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
|
|
347
357
|
} else {
|
|
348
|
-
// Local file path
|
|
358
|
+
// Local file path — must be .mp3
|
|
359
|
+
const ext = path.extname(audioInput).toLowerCase();
|
|
360
|
+
if (ext !== '.mp3') {
|
|
361
|
+
throw new Error(`Only MP3 input is supported. Got extension "${ext}". Please convert your audio to MP3.`);
|
|
362
|
+
}
|
|
349
363
|
filePath = audioInput;
|
|
350
364
|
filename = path.basename(audioInput);
|
|
351
|
-
const ext = path.extname(audioInput).toLowerCase();
|
|
352
|
-
mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
|
|
353
365
|
}
|
|
354
366
|
} else if (audioInput instanceof Buffer) {
|
|
355
|
-
// Write buffer to temp file
|
|
367
|
+
// Write buffer to temp file (assumed to be MP3)
|
|
356
368
|
await ensureTmpDir();
|
|
357
369
|
const tempFilename = `temp-audio-${Date.now()}.mp3`;
|
|
358
370
|
filePath = path.join(TMP_DIR, tempFilename);
|
|
@@ -366,7 +378,7 @@ async function appendAudioToFormData(formData, audioInput) {
|
|
|
366
378
|
filePath = path.join(TMP_DIR, tempFilename);
|
|
367
379
|
await fs.writeFile(filePath, buffer);
|
|
368
380
|
filename = tempFilename;
|
|
369
|
-
|
|
381
|
+
// We could check audioInput.type here, but for simplicity we trust the caller
|
|
370
382
|
} else {
|
|
371
383
|
throw new Error('audioInput must be a file path (string), URL (string), Buffer, or Blob');
|
|
372
384
|
}
|
|
@@ -384,18 +396,7 @@ async function appendAudioToFormData(formData, audioInput) {
|
|
|
384
396
|
/**
|
|
385
397
|
* Generates high-quality audio from a text prompt using Stable Audio 3.
|
|
386
398
|
*
|
|
387
|
-
*
|
|
388
|
-
* parameters, receives a generation ID (202), polls until ready, and returns
|
|
389
|
-
* the generated audio saved locally plus metadata.
|
|
390
|
-
*
|
|
391
|
-
* **Constraints** (from Stable Audio 3 spec):
|
|
392
|
-
* - Prompt: English only, max 10,000 characters, descriptive (instruments, mood, genre, style).
|
|
393
|
-
* - Duration: 1–380 seconds (default 190).
|
|
394
|
-
* - Steps: 4–8 (default 8).
|
|
395
|
-
* - CFG Scale: 1–25 (default 1).
|
|
396
|
-
* - Seed: 0 (random) or 0–4,294,967,294.
|
|
397
|
-
* - Output: mp3 (default) or wav at 44.1 kHz stereo.
|
|
398
|
-
* - Cost: Flat 26 credits per successful generation.
|
|
399
|
+
* **MP3 ONLY** — Output is always MP3. `output_format` is ignored and forced to 'mp3'.
|
|
399
400
|
*
|
|
400
401
|
* @async
|
|
401
402
|
* @function textToAudio
|
|
@@ -406,43 +407,11 @@ async function appendAudioToFormData(formData, audioInput) {
|
|
|
406
407
|
* @param {number} [options.seed=0] - Random seed for reproducibility (0 = random).
|
|
407
408
|
* @param {number} [options.steps=8] - Number of sampling steps (4–8).
|
|
408
409
|
* @param {number} [options.cfg_scale=1] - Prompt adherence strength (1–25).
|
|
409
|
-
* @param {string} [options.
|
|
410
|
-
* @param {string} [options.accept='audio/*'] - Response format: `'audio/*'` (binary) or `'application/json'`.
|
|
410
|
+
* @param {string} [options.accept='audio/*'] - Response format.
|
|
411
411
|
* @param {string} [options.filenamePrefix='stability-text-to-audio'] - Prefix for saved file.
|
|
412
|
-
* @returns {Promise<Object>} Result object
|
|
413
|
-
* ```js
|
|
414
|
-
* {
|
|
415
|
-
* local_path: '/path/to/.cache/stability/stability-text-to-audio-1234567890.mp3',
|
|
416
|
-
* finish_reason: 'SUCCESS',
|
|
417
|
-
* seed: 123456789,
|
|
418
|
-
* x_request_id: 'req_...',
|
|
419
|
-
* raw: { headers: {...} } // or full JSON if accept=application/json
|
|
420
|
-
* }
|
|
421
|
-
* ```
|
|
412
|
+
* @returns {Promise<Object>} Result object (always .mp3).
|
|
422
413
|
* @throws {Error} - 'Missing STABILITY_API_KEY', invalid prompt, unsupported model,
|
|
423
|
-
* API errors (
|
|
424
|
-
*
|
|
425
|
-
* @example
|
|
426
|
-
* // Basic usage
|
|
427
|
-
* const result = await textToAudio('upbeat electronic synthwave with driving bass');
|
|
428
|
-
* console.log('Saved to:', result.local_path);
|
|
429
|
-
*
|
|
430
|
-
* @example
|
|
431
|
-
* // Advanced with options
|
|
432
|
-
* const result = await textToAudio(
|
|
433
|
-
* 'cinematic orchestral music, epic brass, strings, choir, 120 BPM',
|
|
434
|
-
* {
|
|
435
|
-
* duration: 240,
|
|
436
|
-
* seed: 42,
|
|
437
|
-
* steps: 8,
|
|
438
|
-
* cfg_scale: 7,
|
|
439
|
-
* output_format: 'wav',
|
|
440
|
-
* accept: 'application/json'
|
|
441
|
-
* }
|
|
442
|
-
* );
|
|
443
|
-
* if (result.audio_base64) {
|
|
444
|
-
* // handle base64
|
|
445
|
-
* }
|
|
414
|
+
* API errors, or non-MP3 input (if any reference audio were passed).
|
|
446
415
|
*/
|
|
447
416
|
async function textToAudio(prompt, options = {}) {
|
|
448
417
|
const model = options.model || 'stable-audio-3';
|
|
@@ -465,7 +434,9 @@ async function textToAudio(prompt, options = {}) {
|
|
|
465
434
|
if (options.seed != null) formData.append('seed', String(options.seed));
|
|
466
435
|
if (options.steps != null) formData.append('steps', String(options.steps));
|
|
467
436
|
if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
|
|
468
|
-
|
|
437
|
+
|
|
438
|
+
// Force MP3 output
|
|
439
|
+
formData.append('output_format', 'mp3');
|
|
469
440
|
|
|
470
441
|
const accept = options.accept || 'audio/*';
|
|
471
442
|
const id = await submitGeneration('text-to-audio', formData, accept);
|
|
@@ -481,34 +452,16 @@ async function textToAudio(prompt, options = {}) {
|
|
|
481
452
|
/**
|
|
482
453
|
* Transforms an existing audio sample using a text prompt (audio-to-audio / style transfer).
|
|
483
454
|
*
|
|
484
|
-
*
|
|
485
|
-
* composition that incorporates elements of the input while following the text description.
|
|
486
|
-
*
|
|
487
|
-
* **Additional Parameter**:
|
|
488
|
-
* - `strength`: Denoising strength (0.0 = identical to input, 1.0 = no influence from input).
|
|
489
|
-
*
|
|
490
|
-
* All other constraints and behavior are identical to `textToAudio`.
|
|
455
|
+
* **MP3 ONLY** — Input must be MP3. Output is always MP3.
|
|
491
456
|
*
|
|
492
457
|
* @async
|
|
493
458
|
* @function audioToAudio
|
|
494
459
|
* @param {string} prompt - Descriptive text prompt (English, max 10k chars).
|
|
495
|
-
* @param {string|Buffer|Blob} audioInput - Reference audio
|
|
496
|
-
*
|
|
497
|
-
* - Remote HTTP/HTTPS URL (string) – auto-downloaded with proper MIME
|
|
498
|
-
* - Buffer (raw bytes)
|
|
499
|
-
* - Blob (with optional name/type)
|
|
500
|
-
* @param {Object} [options={}] - Generation options (see textToAudio for common params).
|
|
460
|
+
* @param {string|Buffer|Blob} audioInput - Reference audio (**must be MP3**).
|
|
461
|
+
* @param {Object} [options={}] - Generation options.
|
|
501
462
|
* @param {number} [options.strength=1] - Denoising strength (0–1).
|
|
502
|
-
* @
|
|
503
|
-
* @
|
|
504
|
-
* @throws {Error} Same as textToAudio plus audio input validation errors.
|
|
505
|
-
*
|
|
506
|
-
* @example
|
|
507
|
-
* const result = await audioToAudio(
|
|
508
|
-
* 'transform into orchestral version with strings and choir',
|
|
509
|
-
* './reference-track.mp3',
|
|
510
|
-
* { strength: 0.75, duration: 180 }
|
|
511
|
-
* );
|
|
463
|
+
* @returns {Promise<Object>} Same structure as textToAudio result (always .mp3).
|
|
464
|
+
* @throws {Error} Same as textToAudio plus non-MP3 input validation errors.
|
|
512
465
|
*/
|
|
513
466
|
async function audioToAudio(prompt, audioInput, options = {}) {
|
|
514
467
|
const model = options.model || 'stable-audio-3';
|
|
@@ -531,10 +484,12 @@ async function audioToAudio(prompt, audioInput, options = {}) {
|
|
|
531
484
|
if (options.seed != null) formData.append('seed', String(options.seed));
|
|
532
485
|
if (options.steps != null) formData.append('steps', String(options.steps));
|
|
533
486
|
if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
|
|
534
|
-
if (options.output_format) formData.append('output_format', options.output_format);
|
|
535
487
|
if (options.strength != null) formData.append('strength', String(options.strength));
|
|
536
488
|
|
|
537
|
-
//
|
|
489
|
+
// Force MP3 output
|
|
490
|
+
formData.append('output_format', 'mp3');
|
|
491
|
+
|
|
492
|
+
// Handle audio input (must be MP3)
|
|
538
493
|
await appendAudioToFormData(formData, audioInput);
|
|
539
494
|
|
|
540
495
|
const accept = options.accept || 'audio/*';
|
|
@@ -552,29 +507,17 @@ async function audioToAudio(prompt, audioInput, options = {}) {
|
|
|
552
507
|
* Performs audio inpainting: replaces a specified time segment of an audio file
|
|
553
508
|
* with new content generated from a text prompt.
|
|
554
509
|
*
|
|
555
|
-
*
|
|
556
|
-
* The model fills the masked section while preserving the rest of the audio.
|
|
557
|
-
*
|
|
558
|
-
* Default mask: 30s → 380s (inpaint most of a long track).
|
|
510
|
+
* **MP3 ONLY** — Input must be MP3. Output is always MP3.
|
|
559
511
|
*
|
|
560
512
|
* @async
|
|
561
513
|
* @function inpaint
|
|
562
514
|
* @param {string} prompt - Text prompt describing the desired replacement content.
|
|
563
|
-
* @param {string|Buffer|Blob} audioInput - Reference audio (
|
|
515
|
+
* @param {string|Buffer|Blob} audioInput - Reference audio (**must be MP3**).
|
|
564
516
|
* @param {Object} [options={}] - Generation options.
|
|
565
517
|
* @param {number} [options.mask_start=30] - Start time (seconds) of the inpaint mask (0–380).
|
|
566
518
|
* @param {number} [options.mask_end=380] - End time (seconds) of the inpaint mask (0–380).
|
|
567
|
-
* @
|
|
568
|
-
* @returns {Promise<Object>} Same result structure as other generation methods.
|
|
519
|
+
* @returns {Promise<Object>} Same result structure as other generation methods (always .mp3).
|
|
569
520
|
* @throws {Error} Validation errors for mask ranges, audio input, etc.
|
|
570
|
-
*
|
|
571
|
-
* @example
|
|
572
|
-
* // Inpaint the middle section
|
|
573
|
-
* const result = await inpaint(
|
|
574
|
-
* 'add a soaring guitar solo in this section',
|
|
575
|
-
* 'full-track.mp3',
|
|
576
|
-
* { mask_start: 60, mask_end: 120, duration: 180 }
|
|
577
|
-
* );
|
|
578
521
|
*/
|
|
579
522
|
async function inpaint(prompt, audioInput, options = {}) {
|
|
580
523
|
const model = options.model || 'stable-audio-3';
|
|
@@ -597,13 +540,15 @@ async function inpaint(prompt, audioInput, options = {}) {
|
|
|
597
540
|
if (options.seed != null) formData.append('seed', String(options.seed));
|
|
598
541
|
if (options.steps != null) formData.append('steps', String(options.steps));
|
|
599
542
|
if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
|
|
600
|
-
|
|
543
|
+
|
|
544
|
+
// Force MP3 output
|
|
545
|
+
formData.append('output_format', 'mp3');
|
|
601
546
|
|
|
602
547
|
// Inpaint specific
|
|
603
548
|
formData.append('mask_start', String(options.mask_start ?? 30));
|
|
604
549
|
formData.append('mask_end', String(options.mask_end ?? 380));
|
|
605
550
|
|
|
606
|
-
// Handle audio input
|
|
551
|
+
// Handle audio input (must be MP3)
|
|
607
552
|
await appendAudioToFormData(formData, audioInput);
|
|
608
553
|
|
|
609
554
|
const accept = options.accept || 'audio/*';
|
|
@@ -619,26 +564,13 @@ async function inpaint(prompt, audioInput, options = {}) {
|
|
|
619
564
|
|
|
620
565
|
/**
|
|
621
566
|
* Manually fetches or checks the status of a generation using its ID.
|
|
622
|
-
* Useful for custom polling logic or resuming after a previous submission.
|
|
623
567
|
*
|
|
624
568
|
* @async
|
|
625
569
|
* @function fetchResult
|
|
626
570
|
* @param {string} id - Generation ID (from a previous 202 response).
|
|
627
571
|
* @param {string} [acceptHeader='audio/*'] - `'audio/*'` or `'application/json'`.
|
|
628
|
-
* @returns {Promise<Object>} Either
|
|
629
|
-
* - Completed result (same as processResult)
|
|
630
|
-
* - `{ status: 'in-progress', id, raw }` if still 202
|
|
572
|
+
* @returns {Promise<Object>} Either completed result or in-progress status.
|
|
631
573
|
* @throws {Error} If ID missing, 404 (expired), or other API error.
|
|
632
|
-
*
|
|
633
|
-
* @example
|
|
634
|
-
* // Manual polling example
|
|
635
|
-
* const id = await submit...; // or from previous call
|
|
636
|
-
* let result;
|
|
637
|
-
* while (true) {
|
|
638
|
-
* result = await fetchResult(id);
|
|
639
|
-
* if (result.status !== 'in-progress') break;
|
|
640
|
-
* await new Promise(r => setTimeout(r, 5000));
|
|
641
|
-
* }
|
|
642
574
|
*/
|
|
643
575
|
async function fetchResult(id, acceptHeader = 'audio/*') {
|
|
644
576
|
if (!id) {
|