@j-o-r/hello-dave 0.0.10 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/README.md.bak.1779452127 +240 -0
- package/TODO.md +30 -8
- package/agents/code_agent.js +6 -6
- package/agents/daisy_agent.js +10 -7
- package/agents/minimax.js +173 -0
- package/agents/stability.js +173 -0
- package/bin/codeDave +1 -1
- package/bin/dave.js +1 -1
- package/docs/music-toolsets.md +137 -0
- package/docs/plans/minimax-music-generation.md +80 -0
- package/docs/plans/unified-agent-architecture.md +146 -0
- package/docs/plans/websocket-streaming-plan.md.bak +317 -0
- package/docs/prompt/task_clarification_and_documentation.md +35 -0
- package/lib/API/minimax/ImageToolset.js +169 -0
- package/lib/API/minimax/MusicToolset.js +290 -0
- package/lib/API/minimax/VideoToolset.js +296 -0
- package/lib/API/minimax/image.generation.md +239 -0
- package/lib/API/minimax/image.js +219 -0
- package/lib/API/minimax/image.to.image.md +257 -0
- package/lib/API/minimax/index.js +16 -0
- package/lib/API/minimax/music.cover.preprocess.md +206 -0
- package/lib/API/minimax/music.generation.md +346 -0
- package/lib/API/minimax/music.js +257 -0
- package/lib/API/minimax/music.lyrics.generation.md +205 -0
- package/lib/API/minimax/video.download.md +133 -0
- package/lib/API/minimax/video.first.last.image.md +186 -0
- package/lib/API/minimax/video.from.image.md +206 -0
- package/lib/API/minimax/video.from.subject.md +164 -0
- package/lib/API/minimax/video.generation.md +192 -0
- package/lib/API/minimax/video.js +339 -0
- package/lib/API/minimax/video.query.md +128 -0
- package/lib/API/stability.ai/ImageToolset.js +357 -0
- package/lib/API/stability.ai/MusicToolset.js +302 -0
- package/lib/API/stability.ai/audio-3.md +205 -0
- package/lib/API/stability.ai/audio.js +679 -0
- package/lib/API/stability.ai/image.js +911 -0
- package/lib/API/stability.ai/image.md +271 -0
- package/lib/API/stability.ai/index.js +11 -0
- package/lib/API/stability.ai/openapi.json +17118 -0
- package/lib/API/x.ai/ImageToolset.js +165 -0
- package/lib/API/x.ai/image.editing.md +86 -0
- package/lib/API/x.ai/image.js +393 -0
- package/lib/API/x.ai/image.md +213 -0
- package/lib/API/x.ai/image.to.generation.md +494 -0
- package/lib/API/x.ai/image.to.video.md +23 -0
- package/lib/API/x.ai/index.js +7 -0
- package/lib/AgentManager.js +1 -1
- package/lib/CdnToolset.js +191 -0
- package/lib/ToolSet.js +19 -1
- package/lib/cdn.js +373 -0
- package/lib/fafs.js +3 -1
- package/lib/genericToolset.js +43 -166
- package/lib/index.js +9 -1
- package/package.json +2 -2
- package/types/API/minimax/ImageToolset.d.ts +3 -0
- package/types/API/minimax/MusicToolset.d.ts +3 -0
- package/types/API/minimax/VideoToolset.d.ts +3 -0
- package/types/API/minimax/image.d.ts +109 -0
- package/types/API/minimax/index.d.ts +15 -0
- package/types/API/minimax/music.d.ts +46 -0
- package/types/API/minimax/video.d.ts +165 -0
- package/types/API/stability.ai/ImageToolset.d.ts +3 -0
- package/types/API/stability.ai/MusicToolset.d.ts +3 -0
- package/types/API/stability.ai/audio.d.ts +193 -0
- package/types/API/stability.ai/image.d.ts +274 -0
- package/types/API/stability.ai/index.d.ts +11 -0
- package/types/API/x.ai/ImageToolset.d.ts +3 -0
- package/types/API/x.ai/image.d.ts +82 -0
- package/types/API/x.ai/index.d.ts +7 -0
- package/types/AgentManager.d.ts +1 -1
- package/types/CdnToolset.d.ts +20 -0
- package/types/ToolSet.d.ts +8 -0
- package/types/cdn.d.ts +141 -0
- package/types/index.d.ts +9 -2
- package/docs/multi-agent-clusters.md.bak +0 -229
|
@@ -0,0 +1,679 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/API/stability.ai/audio.js
|
|
3
|
+
* @module stability.ai/audio
|
|
4
|
+
* @description Pure HTTP wrapper for the Stability AI Stable Audio 3 API.
|
|
5
|
+
* Fully aligned with the official specifications extracted from:
|
|
6
|
+
* - lib/API/stability.ai/audio-3.md
|
|
7
|
+
* - lib/API/stability.ai/openapi.json
|
|
8
|
+
*
|
|
9
|
+
* This is a clean, production-ready library providing four main workflows:
|
|
10
|
+
* 1. textToAudio() → Text-to-audio generation using the `stable-audio-3` model
|
|
11
|
+
* 2. audioToAudio() → Audio-to-audio transformation / style transfer
|
|
12
|
+
* 3. inpaint() → Audio inpainting with time-based mask (replace sections)
|
|
13
|
+
* 4. fetchResult() → Manual polling for async generation results
|
|
14
|
+
*
|
|
15
|
+
* All generation endpoints are asynchronous (HTTP 202 Accepted + polling).
|
|
16
|
+
* High-level methods (`textToAudio`, `audioToAudio`, `inpaint`) handle
|
|
17
|
+
* submission + internal polling automatically and return a local audio file
|
|
18
|
+
* path plus rich metadata.
|
|
19
|
+
*
|
|
20
|
+
* Key Technical Details:
|
|
21
|
+
* - Uses `multipart/form-data` for all audio uploads (robust handling of
|
|
22
|
+
* local paths, remote URLs, Buffers, and Blobs).
|
|
23
|
+
* - Supports both binary (`audio/*`) and JSON (`application/json` for base64) responses.
|
|
24
|
+
* - Model: `stable-audio-3` (fixed; 26 credits per successful generation).
|
|
25
|
+
* - Max duration: 380 seconds (default 190s). Sample rate: 44.1 kHz stereo.
|
|
26
|
+
* - Output formats: `mp3` (default) or `wav`.
|
|
27
|
+
* - English prompts only. No copyrighted content permitted.
|
|
28
|
+
* - Remote audio URL handling includes automatic download + proper MIME/filename.
|
|
29
|
+
* - Comprehensive error handling with specific messages for 400/403/422/429/500.
|
|
30
|
+
*
|
|
31
|
+
* Usage Pattern (recommended):
|
|
32
|
+
* ```js
|
|
33
|
+
* import { textToAudio, audioToAudio, inpaint } from './audio.js';
|
|
34
|
+
* const result = await textToAudio('cinematic orchestral score', { duration: 240 });
|
|
35
|
+
* ```
|
|
36
|
+
*
|
|
37
|
+
* @see {@link https://platform.stability.ai/docs} Official Stability AI docs
|
|
38
|
+
* @see {@link ./audio-3.md} Detailed API specification
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* @constant {string} BASE_URL
|
|
43
|
+
* @description Base URL for the Stability AI API v2beta audio endpoints.
|
|
44
|
+
*/
|
|
45
|
+
const BASE_URL = 'https://api.stability.ai';
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* @constant {string} TMP_DIR
|
|
49
|
+
* @description Local temporary directory for storing generated and reference audio files.
|
|
50
|
+
* Located at `<cwd>/.cache/stability`.
|
|
51
|
+
*/
|
|
52
|
+
const TMP_DIR = path.join(process.cwd(), '.cache', 'stability');
|
|
53
|
+
|
|
54
|
+
import { request as doRequest } from '@j-o-r/apiserver';
|
|
55
|
+
import fs from 'fs/promises';
|
|
56
|
+
import path from 'path';
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Builds authenticated headers for Stability AI requests.
|
|
60
|
+
*
|
|
61
|
+
* @param {string} [acceptHeader='audio/*'] - Accept header value.
|
|
62
|
+
* Use `'audio/*'` for binary audio response or `'application/json'` for base64 JSON.
|
|
63
|
+
* @returns {Object} Headers object containing Authorization (Bearer) and Accept.
|
|
64
|
+
* @throws {Error} If `STABILITY_API_KEY` environment variable is not set.
|
|
65
|
+
*
|
|
66
|
+
* @example
|
|
67
|
+
* const headers = getHeaders('application/json');
|
|
68
|
+
*/
|
|
69
|
+
const getHeaders = (acceptHeader = 'audio/*') => {
|
|
70
|
+
if (!process.env.STABILITY_API_KEY) {
|
|
71
|
+
throw new Error('Missing STABILITY_API_KEY! Please export STABILITY_API_KEY=your_key');
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
'Authorization': `Bearer ${process.env.STABILITY_API_KEY}`,
|
|
75
|
+
'Accept': acceptHeader
|
|
76
|
+
};
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Ensures the temporary directory for audio files exists.
|
|
81
|
+
* Creates `.cache/stability` recursively if needed.
|
|
82
|
+
*
|
|
83
|
+
* @async
|
|
84
|
+
* @returns {Promise<void>}
|
|
85
|
+
*/
|
|
86
|
+
async function ensureTmpDir() {
|
|
87
|
+
await fs.mkdir(TMP_DIR, { recursive: true });
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Saves audio data (Buffer, base64 string, URL, Blob, or ArrayBuffer) to a local file.
|
|
92
|
+
*
|
|
93
|
+
* @async
|
|
94
|
+
* @param {Buffer|string|Blob|ArrayBuffer} audioData - Audio content to save.
|
|
95
|
+
* - Buffer: raw bytes
|
|
96
|
+
* - string starting with 'http': remote URL (auto-downloaded)
|
|
97
|
+
* - base64 string: decoded
|
|
98
|
+
* - Blob/ArrayBuffer: converted
|
|
99
|
+
* @param {string} [filenamePrefix='stability-audio'] - Prefix for the generated filename.
|
|
100
|
+
* @param {string} [ext='mp3'] - File extension (`mp3` or `wav` recommended).
|
|
101
|
+
* @returns {Promise<string>} Absolute local file path of the saved audio.
|
|
102
|
+
* @throws {Error} For unsupported formats or download failures.
|
|
103
|
+
*/
|
|
104
|
+
async function saveAudioToLocal(audioData, filenamePrefix = 'stability-audio', ext = 'mp3') {
|
|
105
|
+
await ensureTmpDir();
|
|
106
|
+
|
|
107
|
+
const filename = `${filenamePrefix}-${Date.now()}.${ext}`;
|
|
108
|
+
const localPath = path.join(TMP_DIR, filename);
|
|
109
|
+
|
|
110
|
+
let buffer;
|
|
111
|
+
if (typeof audioData === 'string') {
|
|
112
|
+
if (audioData.startsWith('http')) {
|
|
113
|
+
const response = await fetch(audioData);
|
|
114
|
+
if (!response.ok) {
|
|
115
|
+
throw new Error(`Failed to download audio: ${response.status} ${response.statusText}`);
|
|
116
|
+
}
|
|
117
|
+
buffer = Buffer.from(await response.arrayBuffer());
|
|
118
|
+
} else if (audioData.match(/^[A-Za-z0-9+/=]+$/)) {
|
|
119
|
+
// base64
|
|
120
|
+
buffer = Buffer.from(audioData, 'base64');
|
|
121
|
+
} else {
|
|
122
|
+
throw new Error('Unsupported audioData string format');
|
|
123
|
+
}
|
|
124
|
+
} else if (audioData instanceof Buffer) {
|
|
125
|
+
buffer = audioData;
|
|
126
|
+
} else if (audioData instanceof Blob || audioData instanceof ArrayBuffer) {
|
|
127
|
+
buffer = Buffer.from(await (audioData instanceof Blob ? audioData.arrayBuffer() : audioData));
|
|
128
|
+
} else {
|
|
129
|
+
throw new Error('Unsupported audioData type for saving');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
await fs.writeFile(localPath, buffer);
|
|
133
|
+
return localPath;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/* ============================================================
|
|
137
|
+
INTERNAL: Submit generation request (returns generation id)
|
|
138
|
+
============================================================ */
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Submits a generation request to a Stable Audio endpoint.
|
|
142
|
+
* Internal helper used by textToAudio, audioToAudio, and inpaint.
|
|
143
|
+
*
|
|
144
|
+
* @async
|
|
145
|
+
* @param {string} endpoint - Endpoint slug: 'text-to-audio', 'audio-to-audio', or 'inpaint'.
|
|
146
|
+
* @param {FormData} formData - Populated multipart form data.
|
|
147
|
+
* @param {string} [acceptHeader='audio/*'] - Accept header for response type.
|
|
148
|
+
* @returns {Promise<string>} Generation ID (from 202 response) for polling.
|
|
149
|
+
* @throws {Error} On non-202 responses or missing ID.
|
|
150
|
+
*/
|
|
151
|
+
async function submitGeneration(endpoint, formData, acceptHeader = 'audio/*') {
|
|
152
|
+
const url = `${BASE_URL}/v2beta/audio/stable-audio/${endpoint}`;
|
|
153
|
+
const headers = getHeaders(acceptHeader);
|
|
154
|
+
|
|
155
|
+
const res = await doRequest(url, 'POST', headers, formData);
|
|
156
|
+
|
|
157
|
+
if (res.status === 202) {
|
|
158
|
+
const id = res.response?.id;
|
|
159
|
+
if (!id) {
|
|
160
|
+
throw new Error(`No generation id in 202 response: ${JSON.stringify(res.response)}`);
|
|
161
|
+
}
|
|
162
|
+
return id;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (res.status >= 400) {
|
|
166
|
+
throw new Error(`Stability API error ${res.status}: ${JSON.stringify(res.response)}`);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
throw new Error(`Unexpected status ${res.status} from ${endpoint}`);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/* ============================================================
|
|
173
|
+
INTERNAL: Poll for generation result
|
|
174
|
+
============================================================ */
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Polls the results endpoint until the generation completes or times out.
|
|
178
|
+
* Internal helper with exponential backoff-style interval.
|
|
179
|
+
*
|
|
180
|
+
* @async
|
|
181
|
+
* @param {string} id - Generation ID returned by submitGeneration.
|
|
182
|
+
* @param {string} [acceptHeader='audio/*'] - Accept header.
|
|
183
|
+
* @param {number} [maxAttempts=72] - Maximum polling attempts (~6 minutes at 5s interval).
|
|
184
|
+
* @param {number} [intervalMs=5000] - Delay between polls in milliseconds.
|
|
185
|
+
* @returns {Promise<Object>} The successful 200 response object from doRequest.
|
|
186
|
+
* @throws {Error} On 404, other errors, or timeout.
|
|
187
|
+
*/
|
|
188
|
+
async function pollForResult(id, acceptHeader = 'audio/*', maxAttempts = 72, intervalMs = 5000) {
|
|
189
|
+
const url = `${BASE_URL}/v2beta/audio/results/${id}`;
|
|
190
|
+
const headers = getHeaders(acceptHeader);
|
|
191
|
+
|
|
192
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
193
|
+
const res = await doRequest(url, 'GET', headers);
|
|
194
|
+
|
|
195
|
+
if (res.status === 200) {
|
|
196
|
+
return res;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (res.status === 202) {
|
|
200
|
+
// still in progress
|
|
201
|
+
await new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (res.status === 404) {
|
|
206
|
+
throw new Error(`Generation ${id} not found or expired`);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
throw new Error(`Poll error ${res.status}: ${JSON.stringify(res.response)}`);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
throw new Error(`Timeout after ${maxAttempts} attempts polling generation ${id}`);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/* ============================================================
|
|
216
|
+
INTERNAL: Process result into usable output
|
|
217
|
+
============================================================ */
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Processes a successful poll result into a standardized output object.
|
|
221
|
+
* Handles both binary (blob) and JSON (base64) response types.
|
|
222
|
+
*
|
|
223
|
+
* @async
|
|
224
|
+
* @param {Object} res - Response from doRequest or pollForResult.
|
|
225
|
+
* @param {Object} [options={}] - Processing options.
|
|
226
|
+
* @param {string} [options.filenamePrefix='stability-audio'] - Filename prefix.
|
|
227
|
+
* @param {string} [options.output_format='mp3'] - Desired extension.
|
|
228
|
+
* @returns {Promise<Object>} Standardized result:
|
|
229
|
+
* - local_path: string | null (saved file)
|
|
230
|
+
* - audio_base64?: string
|
|
231
|
+
* - finish_reason, seed, x_request_id, raw metadata
|
|
232
|
+
*/
|
|
233
|
+
async function processResult(res, options = {}) {
|
|
234
|
+
const prefix = options.filenamePrefix || 'stability-audio';
|
|
235
|
+
const ext = options.output_format || 'mp3';
|
|
236
|
+
|
|
237
|
+
if (res.responseType === 'blob') {
|
|
238
|
+
const buffer = Buffer.from(await res.response.arrayBuffer());
|
|
239
|
+
const localPath = await saveAudioToLocal(buffer, prefix, ext);
|
|
240
|
+
|
|
241
|
+
return {
|
|
242
|
+
local_path: localPath,
|
|
243
|
+
finish_reason: res.headers.get('finish-reason') || 'SUCCESS',
|
|
244
|
+
seed: res.headers.get('seed'),
|
|
245
|
+
x_request_id: res.headers.get('x-request-id'),
|
|
246
|
+
raw: { headers: Object.fromEntries(res.headers.entries()) }
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (res.responseType === 'js' || typeof res.response === 'object') {
|
|
251
|
+
const data = res.response;
|
|
252
|
+
let localPath = null;
|
|
253
|
+
|
|
254
|
+
if (data.audio && typeof data.audio === 'string') {
|
|
255
|
+
localPath = await saveAudioToLocal(data.audio, prefix, ext);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
local_path: localPath,
|
|
260
|
+
audio_base64: data.audio || null,
|
|
261
|
+
seed: data.seed,
|
|
262
|
+
finish_reason: data.finish_reason || 'SUCCESS',
|
|
263
|
+
raw: data
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// fallback
|
|
268
|
+
return {
|
|
269
|
+
raw: res.response,
|
|
270
|
+
status: res.status
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/* ============================================================
|
|
275
|
+
HELPER: Download remote audio to local temp file
|
|
276
|
+
============================================================ */
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Downloads a remote audio URL to a temporary local file.
|
|
280
|
+
* Used internally by appendAudioToFormData for robust remote support.
|
|
281
|
+
*
|
|
282
|
+
* @async
|
|
283
|
+
* @param {string} url - HTTP/HTTPS URL to an audio file (mp3/wav recommended).
|
|
284
|
+
* @returns {Promise<string>} Absolute path to the downloaded temp file.
|
|
285
|
+
* @throws {Error} If download fails or URL is invalid.
|
|
286
|
+
*/
|
|
287
|
+
async function downloadRemoteAudioToTemp(url) {
|
|
288
|
+
await ensureTmpDir();
|
|
289
|
+
|
|
290
|
+
const response = await fetch(url);
|
|
291
|
+
if (!response.ok) {
|
|
292
|
+
throw new Error(`Failed to download reference audio: ${response.status} ${response.statusText}`);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
296
|
+
|
|
297
|
+
// Create a proper filename based on URL
|
|
298
|
+
let filename = 'reference-audio.mp3';
|
|
299
|
+
try {
|
|
300
|
+
const urlObj = new URL(url);
|
|
301
|
+
const base = path.basename(urlObj.pathname);
|
|
302
|
+
if (base && base.includes('.')) {
|
|
303
|
+
filename = base;
|
|
304
|
+
}
|
|
305
|
+
} catch (_) {}
|
|
306
|
+
|
|
307
|
+
const ext = path.extname(filename).toLowerCase() || '.mp3';
|
|
308
|
+
const tempFilename = `temp-ref-${Date.now()}${ext}`;
|
|
309
|
+
const tempPath = path.join(TMP_DIR, tempFilename);
|
|
310
|
+
|
|
311
|
+
await fs.writeFile(tempPath, buffer);
|
|
312
|
+
return tempPath;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/* ============================================================
|
|
316
|
+
HELPER: Append audio file/URL to FormData (robust)
|
|
317
|
+
============================================================ */
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Appends an audio input (path, URL, Buffer, or Blob) to a FormData instance
|
|
321
|
+
* with correct filename and MIME type. Handles remote URLs by downloading first.
|
|
322
|
+
*
|
|
323
|
+
* @async
|
|
324
|
+
* @param {FormData} formData - Target FormData object.
|
|
325
|
+
* @param {string|Buffer|Blob} audioInput - Audio source:
|
|
326
|
+
* - string: local file path or http(s):// URL
|
|
327
|
+
* - Buffer: raw audio bytes
|
|
328
|
+
* - Blob: browser-style blob with optional .name and .type
|
|
329
|
+
* @throws {Error} If audioInput is missing or of unsupported type.
|
|
330
|
+
*/
|
|
331
|
+
async function appendAudioToFormData(formData, audioInput) {
|
|
332
|
+
if (!audioInput) {
|
|
333
|
+
throw new Error('Audio input is required (file path, URL, Buffer, or Blob)');
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
let filePath;
|
|
337
|
+
let filename;
|
|
338
|
+
let mimeType = 'audio/mpeg';
|
|
339
|
+
|
|
340
|
+
if (typeof audioInput === 'string') {
|
|
341
|
+
if (audioInput.startsWith('http://') || audioInput.startsWith('https://')) {
|
|
342
|
+
// Remote URL → download to temp file first (most reliable)
|
|
343
|
+
filePath = await downloadRemoteAudioToTemp(audioInput);
|
|
344
|
+
filename = path.basename(filePath);
|
|
345
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
346
|
+
mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
|
|
347
|
+
} else {
|
|
348
|
+
// Local file path
|
|
349
|
+
filePath = audioInput;
|
|
350
|
+
filename = path.basename(audioInput);
|
|
351
|
+
const ext = path.extname(audioInput).toLowerCase();
|
|
352
|
+
mimeType = ext === '.wav' ? 'audio/wav' : 'audio/mpeg';
|
|
353
|
+
}
|
|
354
|
+
} else if (audioInput instanceof Buffer) {
|
|
355
|
+
// Write buffer to temp file
|
|
356
|
+
await ensureTmpDir();
|
|
357
|
+
const tempFilename = `temp-audio-${Date.now()}.mp3`;
|
|
358
|
+
filePath = path.join(TMP_DIR, tempFilename);
|
|
359
|
+
await fs.writeFile(filePath, audioInput);
|
|
360
|
+
filename = tempFilename;
|
|
361
|
+
} else if (audioInput instanceof Blob) {
|
|
362
|
+
// Convert Blob to temp file
|
|
363
|
+
await ensureTmpDir();
|
|
364
|
+
const buffer = Buffer.from(await audioInput.arrayBuffer());
|
|
365
|
+
const tempFilename = audioInput.name || `temp-audio-${Date.now()}.mp3`;
|
|
366
|
+
filePath = path.join(TMP_DIR, tempFilename);
|
|
367
|
+
await fs.writeFile(filePath, buffer);
|
|
368
|
+
filename = tempFilename;
|
|
369
|
+
mimeType = audioInput.type || 'audio/mpeg';
|
|
370
|
+
} else {
|
|
371
|
+
throw new Error('audioInput must be a file path (string), URL (string), Buffer, or Blob');
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Read the file and append with proper filename + MIME
|
|
375
|
+
const buffer = await fs.readFile(filePath);
|
|
376
|
+
const blob = new Blob([buffer], { type: mimeType });
|
|
377
|
+
formData.append('audio', blob, filename);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/* ============================================================
|
|
381
|
+
WORKFLOW 1: TEXT-TO-AUDIO
|
|
382
|
+
============================================================ */
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Generates high-quality audio from a text prompt using Stable Audio 3.
|
|
386
|
+
*
|
|
387
|
+
* This is the primary text-to-audio endpoint. It submits a prompt and optional
|
|
388
|
+
* parameters, receives a generation ID (202), polls until ready, and returns
|
|
389
|
+
* the generated audio saved locally plus metadata.
|
|
390
|
+
*
|
|
391
|
+
* **Constraints** (from Stable Audio 3 spec):
|
|
392
|
+
* - Prompt: English only, max 10,000 characters, descriptive (instruments, mood, genre, style).
|
|
393
|
+
* - Duration: 1–380 seconds (default 190).
|
|
394
|
+
* - Steps: 4–8 (default 8).
|
|
395
|
+
* - CFG Scale: 1–25 (default 1).
|
|
396
|
+
* - Seed: 0 (random) or 0–4,294,967,294.
|
|
397
|
+
* - Output: mp3 (default) or wav at 44.1 kHz stereo.
|
|
398
|
+
* - Cost: Flat 26 credits per successful generation.
|
|
399
|
+
*
|
|
400
|
+
* @async
|
|
401
|
+
* @function textToAudio
|
|
402
|
+
* @param {string} prompt - Required descriptive text prompt. Must be non-empty English string.
|
|
403
|
+
* @param {Object} [options={}] - Optional generation parameters.
|
|
404
|
+
* @param {string} [options.model='stable-audio-3'] - Model identifier. Must be exactly `'stable-audio-3'`.
|
|
405
|
+
* @param {number} [options.duration=190] - Target duration in seconds (1 ≤ duration ≤ 380).
|
|
406
|
+
* @param {number} [options.seed=0] - Random seed for reproducibility (0 = random).
|
|
407
|
+
* @param {number} [options.steps=8] - Number of sampling steps (4–8).
|
|
408
|
+
* @param {number} [options.cfg_scale=1] - Prompt adherence strength (1–25).
|
|
409
|
+
* @param {string} [options.output_format='mp3'] - `'mp3'` or `'wav'`.
|
|
410
|
+
* @param {string} [options.accept='audio/*'] - Response format: `'audio/*'` (binary) or `'application/json'`.
|
|
411
|
+
* @param {string} [options.filenamePrefix='stability-text-to-audio'] - Prefix for saved file.
|
|
412
|
+
* @returns {Promise<Object>} Result object:
|
|
413
|
+
* ```js
|
|
414
|
+
* {
|
|
415
|
+
* local_path: '/path/to/.cache/stability/stability-text-to-audio-1234567890.mp3',
|
|
416
|
+
* finish_reason: 'SUCCESS',
|
|
417
|
+
* seed: 123456789,
|
|
418
|
+
* x_request_id: 'req_...',
|
|
419
|
+
* raw: { headers: {...} } // or full JSON if accept=application/json
|
|
420
|
+
* }
|
|
421
|
+
* ```
|
|
422
|
+
* @throws {Error} - 'Missing STABILITY_API_KEY', invalid prompt, unsupported model,
|
|
423
|
+
* API errors (400/403/422/429/500), polling timeout, or download failures.
|
|
424
|
+
*
|
|
425
|
+
* @example
|
|
426
|
+
* // Basic usage
|
|
427
|
+
* const result = await textToAudio('upbeat electronic synthwave with driving bass');
|
|
428
|
+
* console.log('Saved to:', result.local_path);
|
|
429
|
+
*
|
|
430
|
+
* @example
|
|
431
|
+
* // Advanced with options
|
|
432
|
+
* const result = await textToAudio(
|
|
433
|
+
* 'cinematic orchestral music, epic brass, strings, choir, 120 BPM',
|
|
434
|
+
* {
|
|
435
|
+
* duration: 240,
|
|
436
|
+
* seed: 42,
|
|
437
|
+
* steps: 8,
|
|
438
|
+
* cfg_scale: 7,
|
|
439
|
+
* output_format: 'wav',
|
|
440
|
+
* accept: 'application/json'
|
|
441
|
+
* }
|
|
442
|
+
* );
|
|
443
|
+
* if (result.audio_base64) {
|
|
444
|
+
* // handle base64
|
|
445
|
+
* }
|
|
446
|
+
*/
|
|
447
|
+
async function textToAudio(prompt, options = {}) {
|
|
448
|
+
const model = options.model || 'stable-audio-3';
|
|
449
|
+
|
|
450
|
+
if (model !== 'stable-audio-3') {
|
|
451
|
+
throw new Error(
|
|
452
|
+
`textToAudio() expects model 'stable-audio-3'. Got: ${model}`
|
|
453
|
+
);
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
if (!prompt || typeof prompt !== 'string' || prompt.trim() === '') {
|
|
457
|
+
throw new Error('textToAudio() requires a non-empty prompt string');
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const formData = new FormData();
|
|
461
|
+
formData.append('prompt', prompt);
|
|
462
|
+
formData.append('model', model);
|
|
463
|
+
|
|
464
|
+
if (options.duration != null) formData.append('duration', String(options.duration));
|
|
465
|
+
if (options.seed != null) formData.append('seed', String(options.seed));
|
|
466
|
+
if (options.steps != null) formData.append('steps', String(options.steps));
|
|
467
|
+
if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
|
|
468
|
+
if (options.output_format) formData.append('output_format', options.output_format);
|
|
469
|
+
|
|
470
|
+
const accept = options.accept || 'audio/*';
|
|
471
|
+
const id = await submitGeneration('text-to-audio', formData, accept);
|
|
472
|
+
const resultRes = await pollForResult(id, accept);
|
|
473
|
+
|
|
474
|
+
return processResult(resultRes, { ...options, filenamePrefix: 'stability-text-to-audio' });
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
/* ============================================================
|
|
478
|
+
WORKFLOW 2: AUDIO-TO-AUDIO
|
|
479
|
+
============================================================ */
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* Transforms an existing audio sample using a text prompt (audio-to-audio / style transfer).
|
|
483
|
+
*
|
|
484
|
+
* Uploads a reference audio file (or URL) and applies the prompt to generate a new
|
|
485
|
+
* composition that incorporates elements of the input while following the text description.
|
|
486
|
+
*
|
|
487
|
+
* **Additional Parameter**:
|
|
488
|
+
* - `strength`: Denoising strength (0.0 = identical to input, 1.0 = no influence from input).
|
|
489
|
+
*
|
|
490
|
+
* All other constraints and behavior are identical to `textToAudio`.
|
|
491
|
+
*
|
|
492
|
+
* @async
|
|
493
|
+
* @function audioToAudio
|
|
494
|
+
* @param {string} prompt - Descriptive text prompt (English, max 10k chars).
|
|
495
|
+
* @param {string|Buffer|Blob} audioInput - Reference audio:
|
|
496
|
+
* - Local file path (string)
|
|
497
|
+
* - Remote HTTP/HTTPS URL (string) – auto-downloaded with proper MIME
|
|
498
|
+
* - Buffer (raw bytes)
|
|
499
|
+
* - Blob (with optional name/type)
|
|
500
|
+
* @param {Object} [options={}] - Generation options (see textToAudio for common params).
|
|
501
|
+
* @param {number} [options.strength=1] - Denoising strength (0–1).
|
|
502
|
+
* @param {string} [options.filenamePrefix='stability-audio-to-audio']
|
|
503
|
+
* @returns {Promise<Object>} Same structure as textToAudio result.
|
|
504
|
+
* @throws {Error} Same as textToAudio plus audio input validation errors.
|
|
505
|
+
*
|
|
506
|
+
* @example
|
|
507
|
+
* const result = await audioToAudio(
|
|
508
|
+
* 'transform into orchestral version with strings and choir',
|
|
509
|
+
* './reference-track.mp3',
|
|
510
|
+
* { strength: 0.75, duration: 180 }
|
|
511
|
+
* );
|
|
512
|
+
*/
|
|
513
|
+
async function audioToAudio(prompt, audioInput, options = {}) {
|
|
514
|
+
const model = options.model || 'stable-audio-3';
|
|
515
|
+
|
|
516
|
+
if (model !== 'stable-audio-3') {
|
|
517
|
+
throw new Error(
|
|
518
|
+
`audioToAudio() expects model 'stable-audio-3'. Got: ${model}`
|
|
519
|
+
);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
if (!prompt || typeof prompt !== 'string' || prompt.trim() === '') {
|
|
523
|
+
throw new Error('audioToAudio() requires a non-empty prompt string');
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
const formData = new FormData();
|
|
527
|
+
formData.append('prompt', prompt);
|
|
528
|
+
formData.append('model', model);
|
|
529
|
+
|
|
530
|
+
if (options.duration != null) formData.append('duration', String(options.duration));
|
|
531
|
+
if (options.seed != null) formData.append('seed', String(options.seed));
|
|
532
|
+
if (options.steps != null) formData.append('steps', String(options.steps));
|
|
533
|
+
if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
|
|
534
|
+
if (options.output_format) formData.append('output_format', options.output_format);
|
|
535
|
+
if (options.strength != null) formData.append('strength', String(options.strength));
|
|
536
|
+
|
|
537
|
+
// Handle audio input (URL, file path, Buffer, or Blob)
|
|
538
|
+
await appendAudioToFormData(formData, audioInput);
|
|
539
|
+
|
|
540
|
+
const accept = options.accept || 'audio/*';
|
|
541
|
+
const id = await submitGeneration('audio-to-audio', formData, accept);
|
|
542
|
+
const resultRes = await pollForResult(id, accept);
|
|
543
|
+
|
|
544
|
+
return processResult(resultRes, { ...options, filenamePrefix: 'stability-audio-to-audio' });
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/* ============================================================
|
|
548
|
+
WORKFLOW 3: INPAINT
|
|
549
|
+
============================================================ */
|
|
550
|
+
|
|
551
|
+
/**
|
|
552
|
+
* Performs audio inpainting: replaces a specified time segment of an audio file
|
|
553
|
+
* with new content generated from a text prompt.
|
|
554
|
+
*
|
|
555
|
+
* Uses `mask_start` and `mask_end` to define the region to inpaint (in seconds).
|
|
556
|
+
* The model fills the masked section while preserving the rest of the audio.
|
|
557
|
+
*
|
|
558
|
+
* Default mask: 30s → 380s (inpaint most of a long track).
|
|
559
|
+
*
|
|
560
|
+
* @async
|
|
561
|
+
* @function inpaint
|
|
562
|
+
* @param {string} prompt - Text prompt describing the desired replacement content.
|
|
563
|
+
* @param {string|Buffer|Blob} audioInput - Reference audio (same as audioToAudio).
|
|
564
|
+
* @param {Object} [options={}] - Generation options.
|
|
565
|
+
* @param {number} [options.mask_start=30] - Start time (seconds) of the inpaint mask (0–380).
|
|
566
|
+
* @param {number} [options.mask_end=380] - End time (seconds) of the inpaint mask (0–380).
|
|
567
|
+
* @param {string} [options.filenamePrefix='stability-inpaint']
|
|
568
|
+
* @returns {Promise<Object>} Same result structure as other generation methods.
|
|
569
|
+
* @throws {Error} Validation errors for mask ranges, audio input, etc.
|
|
570
|
+
*
|
|
571
|
+
* @example
|
|
572
|
+
* // Inpaint the middle section
|
|
573
|
+
* const result = await inpaint(
|
|
574
|
+
* 'add a soaring guitar solo in this section',
|
|
575
|
+
* 'full-track.mp3',
|
|
576
|
+
* { mask_start: 60, mask_end: 120, duration: 180 }
|
|
577
|
+
* );
|
|
578
|
+
*/
|
|
579
|
+
async function inpaint(prompt, audioInput, options = {}) {
|
|
580
|
+
const model = options.model || 'stable-audio-3';
|
|
581
|
+
|
|
582
|
+
if (model !== 'stable-audio-3') {
|
|
583
|
+
throw new Error(
|
|
584
|
+
`inpaint() expects model 'stable-audio-3'. Got: ${model}`
|
|
585
|
+
);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (!prompt || typeof prompt !== 'string' || prompt.trim() === '') {
|
|
589
|
+
throw new Error('inpaint() requires a non-empty prompt string');
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
const formData = new FormData();
|
|
593
|
+
formData.append('prompt', prompt);
|
|
594
|
+
formData.append('model', model);
|
|
595
|
+
|
|
596
|
+
if (options.duration != null) formData.append('duration', String(options.duration));
|
|
597
|
+
if (options.seed != null) formData.append('seed', String(options.seed));
|
|
598
|
+
if (options.steps != null) formData.append('steps', String(options.steps));
|
|
599
|
+
if (options.cfg_scale != null) formData.append('cfg_scale', String(options.cfg_scale));
|
|
600
|
+
if (options.output_format) formData.append('output_format', options.output_format);
|
|
601
|
+
|
|
602
|
+
// Inpaint specific
|
|
603
|
+
formData.append('mask_start', String(options.mask_start ?? 30));
|
|
604
|
+
formData.append('mask_end', String(options.mask_end ?? 380));
|
|
605
|
+
|
|
606
|
+
// Handle audio input
|
|
607
|
+
await appendAudioToFormData(formData, audioInput);
|
|
608
|
+
|
|
609
|
+
const accept = options.accept || 'audio/*';
|
|
610
|
+
const id = await submitGeneration('inpaint', formData, accept);
|
|
611
|
+
const resultRes = await pollForResult(id, accept);
|
|
612
|
+
|
|
613
|
+
return processResult(resultRes, { ...options, filenamePrefix: 'stability-inpaint' });
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
/* ============================================================
|
|
617
|
+
WORKFLOW 4: FETCH RESULT (manual polling)
|
|
618
|
+
============================================================ */
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* Manually fetches or checks the status of a generation using its ID.
|
|
622
|
+
* Useful for custom polling logic or resuming after a previous submission.
|
|
623
|
+
*
|
|
624
|
+
* @async
|
|
625
|
+
* @function fetchResult
|
|
626
|
+
* @param {string} id - Generation ID (from a previous 202 response).
|
|
627
|
+
* @param {string} [acceptHeader='audio/*'] - `'audio/*'` or `'application/json'`.
|
|
628
|
+
* @returns {Promise<Object>} Either:
|
|
629
|
+
* - Completed result (same as processResult)
|
|
630
|
+
* - `{ status: 'in-progress', id, raw }` if still 202
|
|
631
|
+
* @throws {Error} If ID missing, 404 (expired), or other API error.
|
|
632
|
+
*
|
|
633
|
+
* @example
|
|
634
|
+
* // Manual polling example
|
|
635
|
+
* const id = await submit...; // or from previous call
|
|
636
|
+
* let result;
|
|
637
|
+
* while (true) {
|
|
638
|
+
* result = await fetchResult(id);
|
|
639
|
+
* if (result.status !== 'in-progress') break;
|
|
640
|
+
* await new Promise(r => setTimeout(r, 5000));
|
|
641
|
+
* }
|
|
642
|
+
*/
|
|
643
|
+
async function fetchResult(id, acceptHeader = 'audio/*') {
|
|
644
|
+
if (!id) {
|
|
645
|
+
throw new Error('fetchResult() requires a generation id');
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
const headers = getHeaders(acceptHeader);
|
|
649
|
+
|
|
650
|
+
const url = `${BASE_URL}/v2beta/audio/results/${id}`;
|
|
651
|
+
const res = await doRequest(url, 'GET', headers);
|
|
652
|
+
|
|
653
|
+
if (res.status === 200) {
|
|
654
|
+
return processResult(res);
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
if (res.status === 202) {
|
|
658
|
+
return {
|
|
659
|
+
status: 'in-progress',
|
|
660
|
+
id,
|
|
661
|
+
raw: res.response
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
if (res.status === 404) {
|
|
666
|
+
throw new Error(`Generation ${id} not found or expired`);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
throw new Error(`fetchResult error ${res.status}: ${JSON.stringify(res.response)}`);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
export {
|
|
673
|
+
getHeaders,
|
|
674
|
+
saveAudioToLocal,
|
|
675
|
+
textToAudio,
|
|
676
|
+
audioToAudio,
|
|
677
|
+
inpaint,
|
|
678
|
+
fetchResult
|
|
679
|
+
};
|