@j-o-r/hello-dave 0.0.10 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/README.md.bak.1779452127 +240 -0
- package/TODO.md +30 -8
- package/agents/code_agent.js +6 -6
- package/agents/daisy_agent.js +10 -7
- package/agents/minimax.js +173 -0
- package/agents/stability.js +173 -0
- package/bin/codeDave +1 -1
- package/bin/dave.js +1 -1
- package/docs/music-toolsets.md +137 -0
- package/docs/plans/minimax-music-generation.md +80 -0
- package/docs/plans/unified-agent-architecture.md +146 -0
- package/docs/plans/websocket-streaming-plan.md.bak +317 -0
- package/docs/prompt/task_clarification_and_documentation.md +35 -0
- package/lib/API/minimax/ImageToolset.js +169 -0
- package/lib/API/minimax/MusicToolset.js +290 -0
- package/lib/API/minimax/VideoToolset.js +296 -0
- package/lib/API/minimax/image.generation.md +239 -0
- package/lib/API/minimax/image.js +219 -0
- package/lib/API/minimax/image.to.image.md +257 -0
- package/lib/API/minimax/index.js +16 -0
- package/lib/API/minimax/music.cover.preprocess.md +206 -0
- package/lib/API/minimax/music.generation.md +346 -0
- package/lib/API/minimax/music.js +257 -0
- package/lib/API/minimax/music.lyrics.generation.md +205 -0
- package/lib/API/minimax/video.download.md +133 -0
- package/lib/API/minimax/video.first.last.image.md +186 -0
- package/lib/API/minimax/video.from.image.md +206 -0
- package/lib/API/minimax/video.from.subject.md +164 -0
- package/lib/API/minimax/video.generation.md +192 -0
- package/lib/API/minimax/video.js +339 -0
- package/lib/API/minimax/video.query.md +128 -0
- package/lib/API/stability.ai/ImageToolset.js +357 -0
- package/lib/API/stability.ai/MusicToolset.js +302 -0
- package/lib/API/stability.ai/audio-3.md +205 -0
- package/lib/API/stability.ai/audio.js +679 -0
- package/lib/API/stability.ai/image.js +911 -0
- package/lib/API/stability.ai/image.md +271 -0
- package/lib/API/stability.ai/index.js +11 -0
- package/lib/API/stability.ai/openapi.json +17118 -0
- package/lib/API/x.ai/ImageToolset.js +165 -0
- package/lib/API/x.ai/image.editing.md +86 -0
- package/lib/API/x.ai/image.js +393 -0
- package/lib/API/x.ai/image.md +213 -0
- package/lib/API/x.ai/image.to.generation.md +494 -0
- package/lib/API/x.ai/image.to.video.md +23 -0
- package/lib/API/x.ai/index.js +9 -0
- package/lib/AgentManager.js +1 -1
- package/lib/CdnToolset.js +191 -0
- package/lib/ToolSet.js +19 -1
- package/lib/cdn.js +373 -0
- package/lib/fafs.js +3 -1
- package/lib/genericToolset.js +43 -166
- package/lib/index.js +9 -1
- package/package.json +2 -2
- package/types/API/minimax/ImageToolset.d.ts +3 -0
- package/types/API/minimax/MusicToolset.d.ts +3 -0
- package/types/API/minimax/VideoToolset.d.ts +3 -0
- package/types/API/minimax/image.d.ts +109 -0
- package/types/API/minimax/index.d.ts +15 -0
- package/types/API/minimax/music.d.ts +46 -0
- package/types/API/minimax/video.d.ts +165 -0
- package/types/API/stability.ai/ImageToolset.d.ts +3 -0
- package/types/API/stability.ai/MusicToolset.d.ts +3 -0
- package/types/API/stability.ai/audio.d.ts +193 -0
- package/types/API/stability.ai/image.d.ts +274 -0
- package/types/API/stability.ai/index.d.ts +11 -0
- package/types/API/x.ai/ImageToolset.d.ts +3 -0
- package/types/API/x.ai/image.d.ts +82 -0
- package/types/API/x.ai/index.d.ts +9 -0
- package/types/AgentManager.d.ts +1 -1
- package/types/CdnToolset.d.ts +20 -0
- package/types/ToolSet.d.ts +8 -0
- package/types/cdn.d.ts +141 -0
- package/types/index.d.ts +8 -2
- package/docs/multi-agent-clusters.md.bak +0 -229
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/API/stability.ai/ImageToolset.js
|
|
3
|
+
* @module stability.ai/ImageToolset
|
|
4
|
+
* @description Comprehensive ToolSet for the Stability AI Stable Image API (v2beta).
|
|
5
|
+
*
|
|
6
|
+
* Exposes high-level tools that wrap the underlying `image.js` implementation:
|
|
7
|
+
* - generate_ultra
|
|
8
|
+
* - generate_core
|
|
9
|
+
* - generate_sd3
|
|
10
|
+
* - upscale_conservative
|
|
11
|
+
* - edit_erase
|
|
12
|
+
* - edit_inpaint
|
|
13
|
+
* - edit_outpaint
|
|
14
|
+
* - edit_search_and_replace
|
|
15
|
+
* - edit_search_and_recolor
|
|
16
|
+
* - edit_remove_background
|
|
17
|
+
*
|
|
18
|
+
* All tools handle asynchronous generation internally (submit → poll until ready).
|
|
19
|
+
* The result is only returned once the image file is successfully generated and saved locally.
|
|
20
|
+
*
|
|
21
|
+
* Designed specifically for AI agents, LLMs, and function-calling systems.
|
|
22
|
+
* Each tool includes rich, LLM-friendly descriptions, full JSON schemas with constraints,
|
|
23
|
+
* defaults, and practical guidance.
|
|
24
|
+
*
|
|
25
|
+
* Key Stable Image Facts (for LLMs):
|
|
26
|
+
* - Output formats: png (default), jpeg, webp
|
|
27
|
+
* - Max image size: ~9.4 million pixels total
|
|
28
|
+
* - Content moderation is strict (public figures, copyrighted material, etc.)
|
|
29
|
+
* - Many operations cost between 2–60 credits
|
|
30
|
+
*
|
|
31
|
+
* @see ./image.js for the underlying implementation
|
|
32
|
+
* @see ./image.md for the full API specification
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import ToolSet from '../../ToolSet.js';
|
|
36
|
+
import * as stability from './image.js';
|
|
37
|
+
|
|
38
|
+
const tools = new ToolSet('auto');
|
|
39
|
+
|
|
40
|
+
/* ============================================================
|
|
41
|
+
GENERATE: ULTRA
|
|
42
|
+
============================================================ */
|
|
43
|
+
|
|
44
|
+
tools.add(
|
|
45
|
+
'generate_ultra',
|
|
46
|
+
'Generate the highest quality photorealistic image using Stability AI\'s Stable Image Ultra model. ' +
|
|
47
|
+
'Best for professional, highly detailed, realistic results. Supports text-to-image and image-to-image. ' +
|
|
48
|
+
'Automatically handles polling for async generations.',
|
|
49
|
+
{
|
|
50
|
+
type: 'object',
|
|
51
|
+
properties: {
|
|
52
|
+
prompt: {
|
|
53
|
+
type: 'string',
|
|
54
|
+
description: 'Detailed English prompt describing the desired image. Max 10,000 characters.'
|
|
55
|
+
},
|
|
56
|
+
negative_prompt: {
|
|
57
|
+
type: 'string',
|
|
58
|
+
description: 'What you do NOT want to see in the image.'
|
|
59
|
+
},
|
|
60
|
+
aspect_ratio: {
|
|
61
|
+
type: 'string',
|
|
62
|
+
enum: ['1:1', '16:9', '9:16', '21:9', '9:21', '2:3', '3:2', '4:5', '5:4'],
|
|
63
|
+
default: '1:1',
|
|
64
|
+
description: 'Aspect ratio of the output image.'
|
|
65
|
+
},
|
|
66
|
+
image_url: {
|
|
67
|
+
type: 'string',
|
|
68
|
+
description: 'Optional reference image URL for image-to-image generation.'
|
|
69
|
+
},
|
|
70
|
+
strength: {
|
|
71
|
+
type: 'number',
|
|
72
|
+
minimum: 0,
|
|
73
|
+
maximum: 1,
|
|
74
|
+
description: 'Denoising strength for image-to-image (required when using image_url).'
|
|
75
|
+
},
|
|
76
|
+
output_format: {
|
|
77
|
+
type: 'string',
|
|
78
|
+
enum: ['png', 'jpeg', 'webp'],
|
|
79
|
+
default: 'png',
|
|
80
|
+
description: 'Output image format.'
|
|
81
|
+
},
|
|
82
|
+
seed: {
|
|
83
|
+
type: 'number',
|
|
84
|
+
description: 'Optional seed for reproducibility.'
|
|
85
|
+
}
|
|
86
|
+
},
|
|
87
|
+
required: ['prompt']
|
|
88
|
+
},
|
|
89
|
+
async (params) => {
|
|
90
|
+
const result = await stability.generateUltra(params.prompt, params);
|
|
91
|
+
return JSON.stringify({
|
|
92
|
+
local_path: result.local_path,
|
|
93
|
+
finish_reason: result.finish_reason,
|
|
94
|
+
seed: result.seed,
|
|
95
|
+
note: 'Image generated with generate_ultra. Saved to local_path.'
|
|
96
|
+
}, null, 2);
|
|
97
|
+
}
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
/* ============================================================
|
|
101
|
+
GENERATE: CORE
|
|
102
|
+
============================================================ */
|
|
103
|
+
|
|
104
|
+
tools.add(
|
|
105
|
+
'generate_core',
|
|
106
|
+
'Fast and affordable image generation using Stable Image Core. Good for quick ideation and iteration.',
|
|
107
|
+
{
|
|
108
|
+
type: 'object',
|
|
109
|
+
properties: {
|
|
110
|
+
prompt: { type: 'string', description: 'Detailed English prompt.' },
|
|
111
|
+
negative_prompt: { type: 'string' },
|
|
112
|
+
aspect_ratio: { type: 'string', enum: ['1:1', '16:9', '9:16', '21:9', '9:21', '2:3', '3:2', '4:5', '5:4'], default: '1:1' },
|
|
113
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' },
|
|
114
|
+
seed: { type: 'number' }
|
|
115
|
+
},
|
|
116
|
+
required: ['prompt']
|
|
117
|
+
},
|
|
118
|
+
async (params) => {
|
|
119
|
+
const result = await stability.generateCore(params.prompt, params);
|
|
120
|
+
return JSON.stringify({
|
|
121
|
+
local_path: result.local_path,
|
|
122
|
+
finish_reason: result.finish_reason,
|
|
123
|
+
seed: result.seed,
|
|
124
|
+
note: 'Image generated with generate_core.'
|
|
125
|
+
}, null, 2);
|
|
126
|
+
}
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
/* ============================================================
|
|
130
|
+
GENERATE: SD3
|
|
131
|
+
============================================================ */
|
|
132
|
+
|
|
133
|
+
tools.add(
|
|
134
|
+
'generate_sd3',
|
|
135
|
+
'Generate images using Stable Diffusion 3.5 models (Large, Large Turbo, Medium, Flash).',
|
|
136
|
+
{
|
|
137
|
+
type: 'object',
|
|
138
|
+
properties: {
|
|
139
|
+
prompt: { type: 'string', description: 'Detailed English prompt.' },
|
|
140
|
+
mode: { type: 'string', enum: ['text-to-image', 'image-to-image'], default: 'text-to-image' },
|
|
141
|
+
model: { type: 'string', enum: ['sd3.5-large', 'sd3.5-large-turbo', 'sd3.5-medium', 'sd3.5-flash'], default: 'sd3.5-large' },
|
|
142
|
+
image_url: { type: 'string' },
|
|
143
|
+
strength: { type: 'number', minimum: 0, maximum: 1 },
|
|
144
|
+
aspect_ratio: { type: 'string', enum: ['1:1', '16:9', '9:16', '21:9', '9:21', '2:3', '3:2', '4:5', '5:4'], default: '1:1' },
|
|
145
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' },
|
|
146
|
+
cfg_scale: { type: 'number' },
|
|
147
|
+
seed: { type: 'number' }
|
|
148
|
+
},
|
|
149
|
+
required: ['prompt']
|
|
150
|
+
},
|
|
151
|
+
async (params) => {
|
|
152
|
+
const result = await stability.generateSD3(params.prompt, params);
|
|
153
|
+
return JSON.stringify({
|
|
154
|
+
local_path: result.local_path,
|
|
155
|
+
finish_reason: result.finish_reason,
|
|
156
|
+
seed: result.seed,
|
|
157
|
+
note: 'Image generated with generate_sd3.'
|
|
158
|
+
}, null, 2);
|
|
159
|
+
}
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
/* ============================================================
|
|
163
|
+
UPSCALE: CONSERVATIVE
|
|
164
|
+
============================================================ */
|
|
165
|
+
|
|
166
|
+
tools.add(
|
|
167
|
+
'upscale_conservative',
|
|
168
|
+
'Upscale an image to 4MP while preserving original details as much as possible.',
|
|
169
|
+
{
|
|
170
|
+
type: 'object',
|
|
171
|
+
properties: {
|
|
172
|
+
image_url: { type: 'string', description: 'URL of the image to upscale.' },
|
|
173
|
+
prompt: { type: 'string', description: 'Description of the desired output.' },
|
|
174
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' },
|
|
175
|
+
creativity: { type: 'number', minimum: 0.2, maximum: 0.5, default: 0.35 }
|
|
176
|
+
},
|
|
177
|
+
required: ['image_url', 'prompt']
|
|
178
|
+
},
|
|
179
|
+
async (params) => {
|
|
180
|
+
const result = await stability.upscaleConservative(params.image_url, params.prompt, params);
|
|
181
|
+
return JSON.stringify({
|
|
182
|
+
local_path: result.local_path,
|
|
183
|
+
finish_reason: result.finish_reason,
|
|
184
|
+
seed: result.seed,
|
|
185
|
+
note: 'Image upscaled with upscale_conservative.'
|
|
186
|
+
}, null, 2);
|
|
187
|
+
}
|
|
188
|
+
);
|
|
189
|
+
|
|
190
|
+
/* ============================================================
|
|
191
|
+
EDIT: ERASE
|
|
192
|
+
============================================================ */
|
|
193
|
+
|
|
194
|
+
tools.add(
|
|
195
|
+
'edit_erase',
|
|
196
|
+
'Remove unwanted objects or areas from an image using a mask or alpha channel.',
|
|
197
|
+
{
|
|
198
|
+
type: 'object',
|
|
199
|
+
properties: {
|
|
200
|
+
image_url: { type: 'string' },
|
|
201
|
+
mask_url: { type: 'string' },
|
|
202
|
+
grow_mask: { type: 'number', default: 5 },
|
|
203
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' },
|
|
204
|
+
seed: { type: 'number' }
|
|
205
|
+
},
|
|
206
|
+
required: ['image_url']
|
|
207
|
+
},
|
|
208
|
+
async (params) => {
|
|
209
|
+
const result = await stability.editErase(params.image_url, params);
|
|
210
|
+
return JSON.stringify({
|
|
211
|
+
local_path: result.local_path,
|
|
212
|
+
finish_reason: result.finish_reason,
|
|
213
|
+
note: 'Objects erased with edit_erase.'
|
|
214
|
+
}, null, 2);
|
|
215
|
+
}
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
/* ============================================================
|
|
219
|
+
EDIT: INPAINT
|
|
220
|
+
============================================================ */
|
|
221
|
+
|
|
222
|
+
tools.add(
|
|
223
|
+
'edit_inpaint',
|
|
224
|
+
'Fill or replace areas in an image using a prompt. Supports search mode (auto-segment) or explicit mask.',
|
|
225
|
+
{
|
|
226
|
+
type: 'object',
|
|
227
|
+
properties: {
|
|
228
|
+
image_url: { type: 'string' },
|
|
229
|
+
prompt: { type: 'string', description: 'What the replaced area should become.' },
|
|
230
|
+
mode: { type: 'string', enum: ['mask', 'search'], default: 'search' },
|
|
231
|
+
search_prompt: { type: 'string', description: 'Object to find and replace (when mode=search).' },
|
|
232
|
+
mask_url: { type: 'string' },
|
|
233
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' },
|
|
234
|
+
seed: { type: 'number' }
|
|
235
|
+
},
|
|
236
|
+
required: ['image_url', 'prompt']
|
|
237
|
+
},
|
|
238
|
+
async (params) => {
|
|
239
|
+
const result = await stability.editInpaint(params.image_url, params.prompt, params);
|
|
240
|
+
return JSON.stringify({
|
|
241
|
+
local_path: result.local_path,
|
|
242
|
+
finish_reason: result.finish_reason,
|
|
243
|
+
note: 'Inpainting completed with edit_inpaint.'
|
|
244
|
+
}, null, 2);
|
|
245
|
+
}
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
/* ============================================================
|
|
249
|
+
EDIT: OUTPAINT
|
|
250
|
+
============================================================ */
|
|
251
|
+
|
|
252
|
+
tools.add(
|
|
253
|
+
'edit_outpaint',
|
|
254
|
+
'Expand an image by generating new content in any direction (left, right, up, down).',
|
|
255
|
+
{
|
|
256
|
+
type: 'object',
|
|
257
|
+
properties: {
|
|
258
|
+
image_url: { type: 'string' },
|
|
259
|
+
left: { type: 'number', default: 0 },
|
|
260
|
+
right: { type: 'number', default: 0 },
|
|
261
|
+
up: { type: 'number', default: 0 },
|
|
262
|
+
down: { type: 'number', default: 0 },
|
|
263
|
+
prompt: { type: 'string' },
|
|
264
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' }
|
|
265
|
+
},
|
|
266
|
+
required: ['image_url']
|
|
267
|
+
},
|
|
268
|
+
async (params) => {
|
|
269
|
+
const result = await stability.editOutpaint(params.image_url, params);
|
|
270
|
+
return JSON.stringify({
|
|
271
|
+
local_path: result.local_path,
|
|
272
|
+
finish_reason: result.finish_reason,
|
|
273
|
+
note: 'Image expanded with edit_outpaint.'
|
|
274
|
+
}, null, 2);
|
|
275
|
+
}
|
|
276
|
+
);
|
|
277
|
+
|
|
278
|
+
/* ============================================================
|
|
279
|
+
EDIT: SEARCH AND REPLACE
|
|
280
|
+
============================================================ */
|
|
281
|
+
|
|
282
|
+
tools.add(
|
|
283
|
+
'edit_search_and_replace',
|
|
284
|
+
'Automatically find an object using search_prompt and replace it with new content from prompt.',
|
|
285
|
+
{
|
|
286
|
+
type: 'object',
|
|
287
|
+
properties: {
|
|
288
|
+
image_url: { type: 'string' },
|
|
289
|
+
prompt: { type: 'string' },
|
|
290
|
+
search_prompt: { type: 'string', description: 'Short description of the object to replace.' },
|
|
291
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' }
|
|
292
|
+
},
|
|
293
|
+
required: ['image_url', 'prompt', 'search_prompt']
|
|
294
|
+
},
|
|
295
|
+
async (params) => {
|
|
296
|
+
const result = await stability.editSearchAndReplace(params.image_url, params.prompt, params.search_prompt, params);
|
|
297
|
+
return JSON.stringify({
|
|
298
|
+
local_path: result.local_path,
|
|
299
|
+
finish_reason: result.finish_reason,
|
|
300
|
+
note: 'Search and replace completed.'
|
|
301
|
+
}, null, 2);
|
|
302
|
+
}
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
/* ============================================================
|
|
306
|
+
EDIT: SEARCH AND RECOLOR
|
|
307
|
+
============================================================ */
|
|
308
|
+
|
|
309
|
+
tools.add(
|
|
310
|
+
'edit_search_and_recolor',
|
|
311
|
+
'Automatically find an object and change its color according to the prompt.',
|
|
312
|
+
{
|
|
313
|
+
type: 'object',
|
|
314
|
+
properties: {
|
|
315
|
+
image_url: { type: 'string' },
|
|
316
|
+
prompt: { type: 'string', description: 'Desired new color description.' },
|
|
317
|
+
select_prompt: { type: 'string', description: 'Object to recolor.' },
|
|
318
|
+
output_format: { type: 'string', enum: ['png', 'jpeg', 'webp'], default: 'png' }
|
|
319
|
+
},
|
|
320
|
+
required: ['image_url', 'prompt', 'select_prompt']
|
|
321
|
+
},
|
|
322
|
+
async (params) => {
|
|
323
|
+
const result = await stability.editSearchAndRecolor(params.image_url, params.prompt, params.select_prompt, params);
|
|
324
|
+
return JSON.stringify({
|
|
325
|
+
local_path: result.local_path,
|
|
326
|
+
finish_reason: result.finish_reason,
|
|
327
|
+
note: 'Search and recolor completed.'
|
|
328
|
+
}, null, 2);
|
|
329
|
+
}
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
/* ============================================================
|
|
333
|
+
EDIT: REMOVE BACKGROUND
|
|
334
|
+
============================================================ */
|
|
335
|
+
|
|
336
|
+
tools.add(
|
|
337
|
+
'edit_remove_background',
|
|
338
|
+
'Accurately segment the foreground and remove the background from an image.',
|
|
339
|
+
{
|
|
340
|
+
type: 'object',
|
|
341
|
+
properties: {
|
|
342
|
+
image_url: { type: 'string' },
|
|
343
|
+
output_format: { type: 'string', enum: ['png', 'webp'], default: 'png' }
|
|
344
|
+
},
|
|
345
|
+
required: ['image_url']
|
|
346
|
+
},
|
|
347
|
+
async (params) => {
|
|
348
|
+
const result = await stability.editRemoveBackground(params.image_url, params);
|
|
349
|
+
return JSON.stringify({
|
|
350
|
+
local_path: result.local_path,
|
|
351
|
+
finish_reason: result.finish_reason,
|
|
352
|
+
note: 'Background removed successfully.'
|
|
353
|
+
}, null, 2);
|
|
354
|
+
}
|
|
355
|
+
);
|
|
356
|
+
|
|
357
|
+
export default tools;
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/API/stability.ai/MusicToolset.js
|
|
3
|
+
* @module stability.ai/MusicToolset
|
|
4
|
+
* @description Comprehensive ToolSet for the Stability AI Stable Audio 3 API.
|
|
5
|
+
*
|
|
6
|
+
* Exposes three primary high-level tools that wrap the underlying `audio.js` implementation:
|
|
7
|
+
* - text_to_audio → Pure text-to-audio generation using the `stable-audio-3` model
|
|
8
|
+
* - audio_to_audio → Transform / style-transfer an existing audio clip using a text prompt
|
|
9
|
+
* - inpaint → Replace a specific timed section of an audio file using a text prompt + mask
|
|
10
|
+
*
|
|
11
|
+
* All tools handle asynchronous generation internally (submit → poll until ready).
|
|
12
|
+
* The result is only returned once the audio file is successfully generated and saved locally.
|
|
13
|
+
*
|
|
14
|
+
* Designed specifically for AI agents, LLMs, and function-calling systems.
|
|
15
|
+
* Each tool includes rich, LLM-friendly descriptions, full JSON schemas with constraints,
|
|
16
|
+
* defaults, and practical guidance so an LLM can reliably decide when and how to call them.
|
|
17
|
+
*
|
|
18
|
+
* Key Stable Audio 3 Facts (for LLMs):
|
|
19
|
+
* - Model: `stable-audio-3` (fixed)
|
|
20
|
+
* - Cost: Flat 26 credits per successful generation (not charged on failure)
|
|
21
|
+
* - Max duration: 380 seconds (default 190s)
|
|
22
|
+
* - Output: 44.1 kHz stereo, mp3 (default) or wav
|
|
23
|
+
* - Input audio must be 6–380 seconds long
|
|
24
|
+
* - Prompts must be in English; no copyrighted material
|
|
25
|
+
* - Polling is fully automatic — you do not need to call fetch_result manually
|
|
26
|
+
*
|
|
27
|
+
* @see ./audio.js for the underlying implementation
|
|
28
|
+
* @see ./audio-3.md for the full API specification
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import ToolSet from '../../ToolSet.js';
|
|
32
|
+
import * as stability from './audio.js';
|
|
33
|
+
|
|
34
|
+
const tools = new ToolSet('auto');
|
|
35
|
+
|
|
36
|
+
/* ============================================================
|
|
37
|
+
WORKFLOW 1: TEXT-TO-AUDIO
|
|
38
|
+
============================================================ */
|
|
39
|
+
|
|
40
|
+
tools.add(
|
|
41
|
+
'text_to_audio',
|
|
42
|
+
'Generate original high-quality music or audio directly from a descriptive text prompt using Stability AI\'s Stable Audio 3 model. ' +
|
|
43
|
+
'This is the primary tool for creating new tracks, soundscapes, cinematic scores, or sound design from scratch. ' +
|
|
44
|
+
'No reference audio is required. ' +
|
|
45
|
+
'The function submits the request, automatically polls the server until the audio is ready (usually 30–90 seconds), ' +
|
|
46
|
+
'and returns a JSON object containing the local file path to the generated audio. ' +
|
|
47
|
+
'Supports up to 380 seconds of 44.1 kHz stereo audio. Flat cost of 26 credits per successful generation.',
|
|
48
|
+
{
|
|
49
|
+
type: 'object',
|
|
50
|
+
properties: {
|
|
51
|
+
prompt: {
|
|
52
|
+
type: 'string',
|
|
53
|
+
description: 'Strong, detailed, and descriptive English-language prompt that defines the desired audio. ' +
|
|
54
|
+
'Include instruments, mood, genre, tempo, structure, and style. ' +
|
|
55
|
+
'Maximum 10,000 characters. ' +
|
|
56
|
+
'Good example: "epic orchestral cinematic trailer music, powerful brass and strings, building tension, 120 BPM, dramatic and emotional". ' +
|
|
57
|
+
'Avoid copyrighted material or non-English text.'
|
|
58
|
+
},
|
|
59
|
+
duration: {
|
|
60
|
+
type: 'number',
|
|
61
|
+
minimum: 1,
|
|
62
|
+
maximum: 380,
|
|
63
|
+
default: 190,
|
|
64
|
+
description: 'Target duration of the generated audio in seconds. ' +
|
|
65
|
+
'Must be between 1 and 380. Default is 190 seconds (about 3 minutes 10 seconds). ' +
|
|
66
|
+
'Longer durations consume the same 26 credits but take slightly longer to generate.'
|
|
67
|
+
},
|
|
68
|
+
output_format: {
|
|
69
|
+
type: 'string',
|
|
70
|
+
enum: ['mp3', 'wav'],
|
|
71
|
+
default: 'mp3',
|
|
72
|
+
description: 'Desired output audio format. ' +
|
|
73
|
+
'"mp3" (default) for smaller file size and broad compatibility. ' +
|
|
74
|
+
'"wav" for lossless quality (larger files).'
|
|
75
|
+
},
|
|
76
|
+
seed: {
|
|
77
|
+
type: 'number',
|
|
78
|
+
description: 'Optional random seed for reproducible results. ' +
|
|
79
|
+
'Use 0 (or omit) for a random seed. ' +
|
|
80
|
+
'Any integer between 0 and 4,294,967,294. ' +
|
|
81
|
+
'Same seed + same prompt usually produces similar output.'
|
|
82
|
+
},
|
|
83
|
+
steps: {
|
|
84
|
+
type: 'integer',
|
|
85
|
+
minimum: 4,
|
|
86
|
+
maximum: 8,
|
|
87
|
+
default: 8,
|
|
88
|
+
description: 'Number of diffusion/sampling steps. ' +
|
|
89
|
+
'Higher values (up to 8) generally produce better quality at the cost of longer generation time. ' +
|
|
90
|
+
'Default 8 is recommended for most cases.'
|
|
91
|
+
},
|
|
92
|
+
cfg_scale: {
|
|
93
|
+
type: 'number',
|
|
94
|
+
minimum: 1,
|
|
95
|
+
maximum: 25,
|
|
96
|
+
default: 1,
|
|
97
|
+
description: 'Classifier-Free Guidance scale controlling how strictly the model follows the prompt. ' +
|
|
98
|
+
'Higher values (e.g. 5–15) increase prompt adherence but can reduce creativity. ' +
|
|
99
|
+
'Default 1 is a good starting point; increase for more literal interpretations.'
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
required: ['prompt']
|
|
103
|
+
},
|
|
104
|
+
async (params) => {
|
|
105
|
+
const result = await stability.textToAudio(params.prompt, params);
|
|
106
|
+
|
|
107
|
+
return JSON.stringify({
|
|
108
|
+
local_path: result.local_path,
|
|
109
|
+
finish_reason: result.finish_reason,
|
|
110
|
+
seed: result.seed,
|
|
111
|
+
x_request_id: result.x_request_id,
|
|
112
|
+
note: 'Audio generated successfully with text_to_audio using Stable Audio 3. Saved to local_path. 26 credits used.'
|
|
113
|
+
}, null, 2);
|
|
114
|
+
}
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
/* ============================================================
|
|
118
|
+
WORKFLOW 2: AUDIO-TO-AUDIO
|
|
119
|
+
============================================================ */
|
|
120
|
+
|
|
121
|
+
tools.add(
|
|
122
|
+
'audio_to_audio',
|
|
123
|
+
'Transform or reinterpret an existing audio file according to a new text prompt using Stable Audio 3\'s audio-to-audio capability. ' +
|
|
124
|
+
'The model takes the reference audio as a starting point and applies the prompt to change style, genre, instruments, mood, etc., ' +
|
|
125
|
+
'while preserving some of the original structure and timing. ' +
|
|
126
|
+
'Use the `strength` parameter to control how much the original audio influences the result. ' +
|
|
127
|
+
'Supports remote URLs (auto-downloaded) or local file paths. ' +
|
|
128
|
+
'Automatically handles polling and returns the transformed audio when ready. Flat 26 credits per successful generation.',
|
|
129
|
+
{
|
|
130
|
+
type: 'object',
|
|
131
|
+
properties: {
|
|
132
|
+
prompt: {
|
|
133
|
+
type: 'string',
|
|
134
|
+
description: 'Description of the desired transformation or new style. ' +
|
|
135
|
+
'Example: "turn this into a lush orchestral version with strings, choir, and cinematic atmosphere". ' +
|
|
136
|
+
'Must be in English. Max 10,000 characters.'
|
|
137
|
+
},
|
|
138
|
+
audio_url: {
|
|
139
|
+
type: 'string',
|
|
140
|
+
description: 'Public HTTP/HTTPS URL to a reference audio file (mp3 or wav recommended). ' +
|
|
141
|
+
'The file must be between 6 and 380 seconds long. ' +
|
|
142
|
+
'The system will automatically download it and set the correct MIME type. ' +
|
|
143
|
+
'Alternative to audio_path.'
|
|
144
|
+
},
|
|
145
|
+
audio_path: {
|
|
146
|
+
type: 'string',
|
|
147
|
+
description: 'Local filesystem path to the reference audio file. ' +
|
|
148
|
+
'Alternative to audio_url. Must be a valid readable audio file (6–380 seconds).'
|
|
149
|
+
},
|
|
150
|
+
duration: {
|
|
151
|
+
type: 'number',
|
|
152
|
+
minimum: 1,
|
|
153
|
+
maximum: 380,
|
|
154
|
+
default: 190,
|
|
155
|
+
description: 'Desired output duration in seconds (1–380). ' +
|
|
156
|
+
'Can be shorter or longer than the input audio.'
|
|
157
|
+
},
|
|
158
|
+
strength: {
|
|
159
|
+
type: 'number',
|
|
160
|
+
minimum: 0,
|
|
161
|
+
maximum: 1,
|
|
162
|
+
default: 1,
|
|
163
|
+
description: 'Denoising strength / influence of the original audio. ' +
|
|
164
|
+
'0.0 = output is almost identical to input (minimal change). ' +
|
|
165
|
+
'1.0 = output has almost no influence from the input (full transformation). ' +
|
|
166
|
+
'Typical useful range: 0.5–0.9.'
|
|
167
|
+
},
|
|
168
|
+
output_format: {
|
|
169
|
+
type: 'string',
|
|
170
|
+
enum: ['mp3', 'wav'],
|
|
171
|
+
default: 'mp3',
|
|
172
|
+
description: 'Output format: "mp3" (default) or "wav".'
|
|
173
|
+
},
|
|
174
|
+
seed: {
|
|
175
|
+
type: 'number',
|
|
176
|
+
description: 'Optional seed for reproducibility (0–4294967294).'
|
|
177
|
+
},
|
|
178
|
+
steps: {
|
|
179
|
+
type: 'integer',
|
|
180
|
+
minimum: 4,
|
|
181
|
+
maximum: 8,
|
|
182
|
+
default: 8,
|
|
183
|
+
description: 'Sampling steps (4–8). Higher = better quality.'
|
|
184
|
+
},
|
|
185
|
+
cfg_scale: {
|
|
186
|
+
type: 'number',
|
|
187
|
+
minimum: 1,
|
|
188
|
+
maximum: 25,
|
|
189
|
+
default: 1,
|
|
190
|
+
description: 'Prompt adherence scale (1–25).'
|
|
191
|
+
}
|
|
192
|
+
},
|
|
193
|
+
required: ['prompt']
|
|
194
|
+
},
|
|
195
|
+
async (params) => {
|
|
196
|
+
// Support both audio_url and audio_path for flexibility (and legacy 'audio' field)
|
|
197
|
+
const audioInput = params.audio_path || params.audio_url || params.audio;
|
|
198
|
+
|
|
199
|
+
const result = await stability.audioToAudio(params.prompt, audioInput, params);
|
|
200
|
+
|
|
201
|
+
return JSON.stringify({
|
|
202
|
+
local_path: result.local_path,
|
|
203
|
+
finish_reason: result.finish_reason,
|
|
204
|
+
seed: result.seed,
|
|
205
|
+
note: 'Audio transformed successfully with audio_to_audio. Saved to local_path. 26 credits used.'
|
|
206
|
+
}, null, 2);
|
|
207
|
+
}
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
/* ============================================================
|
|
211
|
+
WORKFLOW 3: INPAINT
|
|
212
|
+
============================================================ */
|
|
213
|
+
|
|
214
|
+
tools.add(
|
|
215
|
+
'inpaint',
|
|
216
|
+
'Regenerate or replace a specific time-based section of an existing audio file using a text prompt and a mask. ' +
|
|
217
|
+
'Perfect for fixing mistakes, changing a verse/chorus, adding a bridge, extending a section, or creative remixing. ' +
|
|
218
|
+
'You define the start and end time (in seconds) of the region to inpaint. ' +
|
|
219
|
+
'The model keeps the audio outside the mask and generates new content inside it that matches the prompt. ' +
|
|
220
|
+
'Supports remote URLs or local paths. Automatic polling included. Flat 26 credits per successful generation.',
|
|
221
|
+
{
|
|
222
|
+
type: 'object',
|
|
223
|
+
properties: {
|
|
224
|
+
prompt: {
|
|
225
|
+
type: 'string',
|
|
226
|
+
description: 'Description of the new content that should replace the masked section. ' +
|
|
227
|
+
'Example: "add a soaring electric guitar solo with reverb and delay". ' +
|
|
228
|
+
'English only, max 10,000 characters.'
|
|
229
|
+
},
|
|
230
|
+
audio_url: {
|
|
231
|
+
type: 'string',
|
|
232
|
+
description: 'Public URL of the reference audio file to edit. Must be 6–380 seconds long.'
|
|
233
|
+
},
|
|
234
|
+
audio_path: {
|
|
235
|
+
type: 'string',
|
|
236
|
+
description: 'Local path to the reference audio file to edit.'
|
|
237
|
+
},
|
|
238
|
+
mask_start: {
|
|
239
|
+
type: 'number',
|
|
240
|
+
minimum: 0,
|
|
241
|
+
maximum: 380,
|
|
242
|
+
default: 30,
|
|
243
|
+
description: 'Start time in seconds of the section to replace/inpaint. ' +
|
|
244
|
+
'Must be less than mask_end. Default: 30 seconds.'
|
|
245
|
+
},
|
|
246
|
+
mask_end: {
|
|
247
|
+
type: 'number',
|
|
248
|
+
minimum: 0,
|
|
249
|
+
maximum: 380,
|
|
250
|
+
default: 380,
|
|
251
|
+
description: 'End time in seconds of the section to replace. ' +
|
|
252
|
+
'Must be greater than mask_start. Default: 380 (end of track).'
|
|
253
|
+
},
|
|
254
|
+
duration: {
|
|
255
|
+
type: 'number',
|
|
256
|
+
minimum: 1,
|
|
257
|
+
maximum: 380,
|
|
258
|
+
default: 190,
|
|
259
|
+
description: 'Total duration of the final output audio in seconds.'
|
|
260
|
+
},
|
|
261
|
+
output_format: {
|
|
262
|
+
type: 'string',
|
|
263
|
+
enum: ['mp3', 'wav'],
|
|
264
|
+
default: 'mp3',
|
|
265
|
+
description: 'Output format.'
|
|
266
|
+
},
|
|
267
|
+
seed: {
|
|
268
|
+
type: 'number',
|
|
269
|
+
description: 'Optional seed for reproducibility.'
|
|
270
|
+
},
|
|
271
|
+
steps: {
|
|
272
|
+
type: 'integer',
|
|
273
|
+
minimum: 4,
|
|
274
|
+
maximum: 8,
|
|
275
|
+
default: 8,
|
|
276
|
+
description: 'Sampling steps (4–8).'
|
|
277
|
+
},
|
|
278
|
+
cfg_scale: {
|
|
279
|
+
type: 'number',
|
|
280
|
+
minimum: 1,
|
|
281
|
+
maximum: 25,
|
|
282
|
+
default: 1,
|
|
283
|
+
description: 'Prompt adherence (1–25).'
|
|
284
|
+
}
|
|
285
|
+
},
|
|
286
|
+
required: ['prompt']
|
|
287
|
+
},
|
|
288
|
+
async (params) => {
|
|
289
|
+
const audioInput = params.audio_path || params.audio_url || params.audio;
|
|
290
|
+
|
|
291
|
+
const result = await stability.inpaint(params.prompt, audioInput, params);
|
|
292
|
+
|
|
293
|
+
return JSON.stringify({
|
|
294
|
+
local_path: result.local_path,
|
|
295
|
+
finish_reason: result.finish_reason,
|
|
296
|
+
seed: result.seed,
|
|
297
|
+
note: `Inpaint completed successfully. Replaced section ${params.mask_start ?? 30}s–${params.mask_end ?? 380}s. Saved to local_path. 26 credits used.`
|
|
298
|
+
}, null, 2);
|
|
299
|
+
}
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
export default tools;
|