@j-o-r/hello-dave 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/README.md.bak.1779452127 +240 -0
- package/TODO.md +31 -20
- package/agents/code_agent.js +6 -6
- package/agents/daisy_agent.js +10 -7
- package/agents/minimax.js +173 -0
- package/agents/spawn_agent.js +33 -10
- package/agents/stability.js +173 -0
- package/bin/codeDave +1 -1
- package/bin/dave.js +1 -1
- package/docs/dependencies.md +7 -0
- package/docs/music-toolsets.md +137 -0
- package/docs/plans/minimax-music-generation.md +80 -0
- package/docs/plans/unified-agent-architecture.md +146 -0
- package/docs/plans/websocket-streaming-plan.md.bak +317 -0
- package/docs/prompt/spawn_agent.md +46 -44
- package/docs/prompt/task_clarification_and_documentation.md +35 -0
- package/docs/todo-archive-infra-2026-04-21.md +15 -0
- package/docs/todo-archive-v0.1.0.md +32 -0
- package/lib/API/minimax/ImageToolset.js +169 -0
- package/lib/API/minimax/MusicToolset.js +290 -0
- package/lib/API/minimax/VideoToolset.js +296 -0
- package/lib/API/minimax/image.generation.md +239 -0
- package/lib/API/minimax/image.js +219 -0
- package/lib/API/minimax/image.to.image.md +257 -0
- package/lib/API/minimax/index.js +16 -0
- package/lib/API/minimax/music.cover.preprocess.md +206 -0
- package/lib/API/minimax/music.generation.md +346 -0
- package/lib/API/minimax/music.js +257 -0
- package/lib/API/minimax/music.lyrics.generation.md +205 -0
- package/lib/API/minimax/video.download.md +133 -0
- package/lib/API/minimax/video.first.last.image.md +186 -0
- package/lib/API/minimax/video.from.image.md +206 -0
- package/lib/API/minimax/video.from.subject.md +164 -0
- package/lib/API/minimax/video.generation.md +192 -0
- package/lib/API/minimax/video.js +339 -0
- package/lib/API/minimax/video.query.md +128 -0
- package/lib/API/stability.ai/ImageToolset.js +357 -0
- package/lib/API/stability.ai/MusicToolset.js +302 -0
- package/lib/API/stability.ai/audio-3.md +205 -0
- package/lib/API/stability.ai/audio.js +679 -0
- package/lib/API/stability.ai/image.js +911 -0
- package/lib/API/stability.ai/image.md +271 -0
- package/lib/API/stability.ai/index.js +11 -0
- package/lib/API/stability.ai/openapi.json +17118 -0
- package/lib/API/x.ai/ImageToolset.js +165 -0
- package/lib/API/x.ai/image.editing.md +86 -0
- package/lib/API/x.ai/image.js +393 -0
- package/lib/API/x.ai/image.md +213 -0
- package/lib/API/x.ai/image.to.generation.md +494 -0
- package/lib/API/x.ai/image.to.video.md +23 -0
- package/lib/API/x.ai/index.js +9 -0
- package/lib/AgentManager.js +1 -1
- package/lib/CdnToolset.js +191 -0
- package/lib/ToolSet.js +19 -1
- package/lib/cdn.js +373 -0
- package/lib/fafs.js +5 -3
- package/lib/genericToolset.js +75 -210
- package/lib/index.js +9 -1
- package/package.json +2 -2
- package/types/API/minimax/ImageToolset.d.ts +3 -0
- package/types/API/minimax/MusicToolset.d.ts +3 -0
- package/types/API/minimax/VideoToolset.d.ts +3 -0
- package/types/API/minimax/image.d.ts +109 -0
- package/types/API/minimax/index.d.ts +15 -0
- package/types/API/minimax/music.d.ts +46 -0
- package/types/API/minimax/video.d.ts +165 -0
- package/types/API/stability.ai/ImageToolset.d.ts +3 -0
- package/types/API/stability.ai/MusicToolset.d.ts +3 -0
- package/types/API/stability.ai/audio.d.ts +193 -0
- package/types/API/stability.ai/image.d.ts +274 -0
- package/types/API/stability.ai/index.d.ts +11 -0
- package/types/API/x.ai/ImageToolset.d.ts +3 -0
- package/types/API/x.ai/image.d.ts +82 -0
- package/types/API/x.ai/index.d.ts +9 -0
- package/types/AgentManager.d.ts +1 -1
- package/types/CdnToolset.d.ts +20 -0
- package/types/ToolSet.d.ts +8 -0
- package/types/cdn.d.ts +141 -0
- package/types/index.d.ts +8 -2
- package/utils/syntax_check.sh +59 -15
- package/docs/multi-agent-clusters.md.bak +0 -229
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { AgentManager, API, CdnToolset } from '@j-o-r/hello-dave';
|
|
3
|
+
import * as test from '@j-o-r/hello-dave';
|
|
4
|
+
import { parseArgs } from '@j-o-r/sh';
|
|
5
|
+
|
|
6
|
+
const name = 'stability';
|
|
7
|
+
const api = 'xai';
|
|
8
|
+
let secret = '';
|
|
9
|
+
|
|
10
|
+
const args = parseArgs();
|
|
11
|
+
|
|
12
|
+
let input;
|
|
13
|
+
if (args._.length === 1 && typeof args._[0] === 'string' && args._[0].trim() !== '') {
|
|
14
|
+
input = args._[0].trim();
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const help = args['help'] || false;
|
|
18
|
+
const connect = args['connect'] ? args['connect'] : undefined;
|
|
19
|
+
const serve = args['serve'] ? parseInt(args['serve']) : undefined;
|
|
20
|
+
|
|
21
|
+
/** @type {import('lib/API/x.ai/responses.js').XAIOptions} */
|
|
22
|
+
const options = { tools: [] };
|
|
23
|
+
options.tools.push({
|
|
24
|
+
type: 'web_search'
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
if (args['secret']) {
|
|
28
|
+
secret = args['secret'];
|
|
29
|
+
}
|
|
30
|
+
if (args['model'] || true) {
|
|
31
|
+
options.model = args['model'] || 'grok-4-fast-reasoning';
|
|
32
|
+
}
|
|
33
|
+
if (args['temperature']) {
|
|
34
|
+
options.temperature = parseFloat(args['temperature']);
|
|
35
|
+
} else {
|
|
36
|
+
options.temperature = 0.8;
|
|
37
|
+
}
|
|
38
|
+
if (args['tokens']) {
|
|
39
|
+
options.max_output_tokens = parseInt(args['tokens']);
|
|
40
|
+
}
|
|
41
|
+
if (args['top_p']) {
|
|
42
|
+
options.top_p = parseFloat(args['top_p']);
|
|
43
|
+
}
|
|
44
|
+
const reasoning = true;
|
|
45
|
+
if (reasoning) {
|
|
46
|
+
options.reasoning = {
|
|
47
|
+
effort: 'medium',
|
|
48
|
+
summary: 'auto'
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const toolsetMode = 'auto';
|
|
52
|
+
const contextWindow = args['context'] ? parseInt(args['context']) : 1900000;
|
|
53
|
+
|
|
54
|
+
function printHelp() {
|
|
55
|
+
console.log(`
|
|
56
|
+
'${name} --help' You are looking at it.
|
|
57
|
+
|
|
58
|
+
## USAGE MODES:
|
|
59
|
+
|
|
60
|
+
### 1. Direct Call (One-Shot, Positional ONLY):
|
|
61
|
+
./agents/${name}.js "Generate lyrics for pop song" [--options]
|
|
62
|
+
|
|
63
|
+
### 2. Interactive CLI (no positional arg):
|
|
64
|
+
./agents/${name}.js [--options]
|
|
65
|
+
|
|
66
|
+
### 3. WS Server (no positional arg):
|
|
67
|
+
./agents/${name}.js --serve 8080 [--secret mysecret] [--options]
|
|
68
|
+
|
|
69
|
+
### 4. WS Client (no positional arg):
|
|
70
|
+
./agents/${name}.js --connect ws://127.0.0.1:8080/ws --secret mysecret [--options]
|
|
71
|
+
|
|
72
|
+
### 5. Hybrid (Server + Client, no positional arg):
|
|
73
|
+
./agents/${name}.js --serve 8081 --connect ws://other:8080/ws [--secret ...] [--options]
|
|
74
|
+
|
|
75
|
+
## SERVER OPTIONS EXPLAINED:
|
|
76
|
+
--serve [port]: Starts WebSocket SERVER at ws://127.0.0.1:[port]/ws. Allows other agents (--connect) to connect and use this agent as a remote TOOL (e.g., 'daisy_agent'). Runs indefinitely until Ctrl+C.
|
|
77
|
+
|
|
78
|
+
--connect [ws_url]: Connects as CLIENT to remote WS server at [ws_url] (e.g., ws://127.0.0.1:8080/ws). Gains access to remote agent's tools. Interactive CLI available.
|
|
79
|
+
|
|
80
|
+
--secret [string]: SHARED AUTH TOKEN (min 3 chars). SERVER rejects clients without matching --secret. CLIENTS must provide server's secret to connect. Use same secret for chains.
|
|
81
|
+
|
|
82
|
+
Note: Server/Client/Hybrid IGNORES positional input arg (use CLI modes instead). Hybrid: This agent serves AND uses remote tools.
|
|
83
|
+
|
|
84
|
+
## OPTIONS:
|
|
85
|
+
--model [grok-4-fast-reasoning|...] (default: grok-4-fast-reasoning)
|
|
86
|
+
--temperature [float] (-2 to +2, default 0.8 for creativity)
|
|
87
|
+
--tokens [number] (max output tokens)
|
|
88
|
+
--top_p [float]
|
|
89
|
+
--context [number] (default: 1900000)
|
|
90
|
+
|
|
91
|
+
## SERVER TOOLS (when no input):
|
|
92
|
+
Exposes as 'daisy_agent' tool for chaining.
|
|
93
|
+
`);
|
|
94
|
+
process.exit();
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (help) {
|
|
98
|
+
printHelp();
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const tool_call_name = 'daisy_agent';
|
|
102
|
+
const tool_call_description = `
|
|
103
|
+
Daisy Music Assistant:
|
|
104
|
+
- "Lyrics for [theme]" → Generate lyrics.
|
|
105
|
+
- "Music minimax prompt: [style]" → Optimized prompt.
|
|
106
|
+
- "ffmpeg [task] on file.wav" → Bash script to run.
|
|
107
|
+
- web_search: Research chords/lyrics.
|
|
108
|
+
- execute_bash_script: Processes your local files safely.
|
|
109
|
+
- Create music
|
|
110
|
+
`.trim();
|
|
111
|
+
|
|
112
|
+
const prompt = `
|
|
113
|
+
You are ${name}, a helpful music creation and editing assistant for the user's computer.
|
|
114
|
+
|
|
115
|
+
Core expertise:
|
|
116
|
+
- Generate lyrics: Creative, structured (verses, chorus), themed, rhyming.
|
|
117
|
+
- niMAx 2.6 AI prompts: Detailed, vivid descriptions (genre, mood, instruments, structure, vocals).
|
|
118
|
+
- Local audio editing: Use execute_bash_script with ffmpeg/sox commands. Provide exact bash snippets first, confirm before running. Examples:
|
|
119
|
+
* Trim: ffmpeg -i input.mp3 -ss 00:00:30 -t 00:01:00 output.mp3
|
|
120
|
+
* Concat: echo "file 'a.mp3'" > list.txt; ffmpeg -f concat -i list.txt out.mp3
|
|
121
|
+
* Sox effects: sox input.wav output.wav fade 0 3 2 norm
|
|
122
|
+
* Convert: ffmpeg -i video.mp4 audio.aac
|
|
123
|
+
- Music theory: Chords, scales, BPM, EQ tips.
|
|
124
|
+
- Workflows: Step-by-step for mixing, mastering, layering tracks.
|
|
125
|
+
|
|
126
|
+
Behavior:
|
|
127
|
+
- Be creative & enthusiastic!
|
|
128
|
+
- Step-by-step: Explain, provide code, suggest files in current dir.
|
|
129
|
+
- Safety: Quote bash commands; ask confirmation for destructive ops (e.g., overwrite).
|
|
130
|
+
- Use web_search for inspiration/lyrics if needed.
|
|
131
|
+
- Output ready-to-copy bash for ffmpeg/sox.
|
|
132
|
+
- List files if unclear: Use ls *.wav *.mp3 etc. via bash.
|
|
133
|
+
|
|
134
|
+
Current env: Ubuntu, ffmpeg & sox installed
|
|
135
|
+
|
|
136
|
+
Respond concisely but completely. Use markdown for code/lyrics/prompts.
|
|
137
|
+
`.trim();
|
|
138
|
+
|
|
139
|
+
const agent = new AgentManager({ name, secret });
|
|
140
|
+
agent.setup({
|
|
141
|
+
prompt,
|
|
142
|
+
api,
|
|
143
|
+
options,
|
|
144
|
+
toolsetMode,
|
|
145
|
+
contextWindow
|
|
146
|
+
});
|
|
147
|
+
const toolset = agent.getToolset();
|
|
148
|
+
toolset?.borrow(API.stability.musicToolset);
|
|
149
|
+
toolset?.borrow(CdnToolset);
|
|
150
|
+
if (toolset) {
|
|
151
|
+
agent.addGenericToolcall('open_link');
|
|
152
|
+
agent.addGenericToolcall('execute_bash_script');
|
|
153
|
+
agent.addGenericToolcall('read_file');
|
|
154
|
+
agent.addGenericToolcall('write_file');
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const cliIntro = `
|
|
158
|
+
${name} ${options.model} ready! (temp: ${options.temperature}, context: ${contextWindow})
|
|
159
|
+
|
|
160
|
+
Ask me to:
|
|
161
|
+
- Write lyrics
|
|
162
|
+
- Craft Music prompts
|
|
163
|
+
- Edit audio: "fade out my track.mp3" → I'll give ffmpeg cmd
|
|
164
|
+
Type /help for more.
|
|
165
|
+
${tool_call_name}
|
|
166
|
+
`.trim();
|
|
167
|
+
|
|
168
|
+
if (input) {
|
|
169
|
+
const RES = await agent.directCall(input);
|
|
170
|
+
console.log(RES);
|
|
171
|
+
} else {
|
|
172
|
+
await agent.start(serve, connect, cliIntro, tool_call_name, tool_call_description);
|
|
173
|
+
}
|
package/bin/codeDave
CHANGED
|
@@ -52,7 +52,7 @@ pm2 start "${PROJECT_DIR}/agents/npm_agent.js" --name "${FOLDER}_npm_${PORT}"
|
|
|
52
52
|
pm2 start "${PROJECT_DIR}/agents/docs_agent.js" --name "${FOLDER}_docs_${PORT}" -- --connect "ws://127.0.0.1:${PORT}/ws" --secret "${SECRET}"
|
|
53
53
|
pm2 start "${PROJECT_DIR}/agents/test_agent.js" --name "${FOLDER}_test_${PORT}" -- --connect "ws://127.0.0.1:${PORT}/ws" --secret "${SECRET}"
|
|
54
54
|
pm2 start "${PROJECT_DIR}/agents/memory_agent.js" --name "${FOLDER}_memory_${PORT}" -- --connect "ws://127.0.0.1:${PORT}/ws" --secret "${SECRET}"
|
|
55
|
-
pm2 start "${PROJECT_DIR}/agents/spawn_agent.js" --name "${FOLDER}_spawn_${PORT}" -- --connect "ws://127.0.0.1:${PORT}/ws" --secret "${SECRET}"
|
|
55
|
+
# pm2 start "${PROJECT_DIR}/agents/spawn_agent.js" --name "${FOLDER}_spawn_${PORT}" -- --connect "ws://127.0.0.1:${PORT}/ws" --secret "${SECRET}"
|
|
56
56
|
|
|
57
57
|
echo "codeDave processes spawned with prefix '${FOLDER}_' and suffix _${PORT}. Check with: pm2 list | grep '${FOLDER}_${PORT}'"
|
|
58
58
|
echo "dave --connect ws://127.0.0.1:${PORT}/ws --secret '$SECRET'"
|
package/bin/dave.js
CHANGED
|
@@ -97,7 +97,7 @@ if (args.clear) {
|
|
|
97
97
|
|
|
98
98
|
try {
|
|
99
99
|
const response = await wsIO(args.connect, secret, action, input);
|
|
100
|
-
console.log(response.content);
|
|
100
|
+
console.log(JSON.stringify(response.content, null, ' '));
|
|
101
101
|
} catch (e) {
|
|
102
102
|
console.error(`Error: ${e.message}`);
|
|
103
103
|
process.exit(1);
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Music Toolsets
|
|
2
|
+
|
|
3
|
+
**Date:** May 22, 2026
|
|
4
|
+
**Status:** Implemented and Documented
|
|
5
|
+
|
|
6
|
+
The `hello-dave` toolkit now includes two specialized **MusicToolsets** for AI-powered music generation and editing:
|
|
7
|
+
|
|
8
|
+
- **Stability AI MusicToolset** (`API.stability.musicToolset`) — Powered by Stable Audio 3.
|
|
9
|
+
- **Minimax MusicToolset** (`API.minimax.musicToolset`) — Powered by Minimax Music 2.6 / Cover models.
|
|
10
|
+
|
|
11
|
+
These toolsets are designed for seamless integration into AI agents (via `AgentManager` and `ToolSet.borrow()`). They provide high-level, LLM-friendly tools with rich descriptions, JSON schemas, constraints, examples, and automatic async polling.
|
|
12
|
+
|
|
13
|
+
## Prerequisites
|
|
14
|
+
|
|
15
|
+
- **Stability AI**: Set `STABILITY_API_KEY` environment variable (obtain from [platform.stability.ai](https://platform.stability.ai)).
|
|
16
|
+
- **Minimax**: Set `MINIMAX_API_KEY` environment variable.
|
|
17
|
+
- Both toolsets automatically handle local file saving (to `.cache/stability` or equivalent) and support remote URLs where applicable.
|
|
18
|
+
- Optional: `CdnToolset` for publishing results.
|
|
19
|
+
|
|
20
|
+
## Stability AI MusicToolset
|
|
21
|
+
|
|
22
|
+
**Location**: `lib/API/stability.ai/MusicToolset.js` (with underlying implementation in `lib/API/stability.ai/audio.js` and specs in `audio-3.md`).
|
|
23
|
+
|
|
24
|
+
**Key Facts** (for LLMs/agents):
|
|
25
|
+
- Model: `stable-audio-3` (fixed).
|
|
26
|
+
- Cost: 26 credits per successful generation.
|
|
27
|
+
- Max duration: 380 seconds (default ~190s).
|
|
28
|
+
- Output: 44.1 kHz stereo, MP3 (default) or WAV.
|
|
29
|
+
- Prompts: English only; no copyrighted material.
|
|
30
|
+
- Polling: Fully automatic (no manual `fetch_result` needed).
|
|
31
|
+
|
|
32
|
+
### Available Tools
|
|
33
|
+
|
|
34
|
+
1. **`text_to_audio`**
|
|
35
|
+
Generate original music/sound from a descriptive text prompt.
|
|
36
|
+
Parameters: `prompt` (required), `duration`, `output_format`, `seed`, `steps`, `cfg_scale`.
|
|
37
|
+
Returns: JSON with `local_path`, `finish_reason`, etc.
|
|
38
|
+
|
|
39
|
+
2. **`audio_to_audio`**
|
|
40
|
+
Transform an existing audio file using a new prompt (style transfer, genre change, etc.).
|
|
41
|
+
Supports `audio_url` (remote) or `audio_path` (local).
|
|
42
|
+
Parameters: `prompt` (required), `audio_url`/`audio_path`, `strength`, `duration`, etc.
|
|
43
|
+
|
|
44
|
+
3. **`inpaint`**
|
|
45
|
+
Replace a specific timed section of audio using a prompt + mask.
|
|
46
|
+
Parameters: `prompt`, `audio_url`/`audio_path`, `mask_start`, `mask_end`, etc.
|
|
47
|
+
|
|
48
|
+
**Full Documentation**: See the comprehensive JSDoc in `lib/API/stability.ai/MusicToolset.js` and `audio.js`.
|
|
49
|
+
|
|
50
|
+
## Minimax MusicToolset
|
|
51
|
+
|
|
52
|
+
**Location**: `lib/API/minimax/MusicToolset.js` (with underlying `lib/API/minimax/music.js`).
|
|
53
|
+
|
|
54
|
+
**Key Features**:
|
|
55
|
+
- Supports text-to-music, cover generation (one-step and advanced two-step with lyrics modification), analysis/preprocessing, and lyrics generation/editing.
|
|
56
|
+
- Strict mutual-exclusivity guardrails for reference audio vs. `cover_feature_id`.
|
|
57
|
+
- Lyrics optional for instrumental tracks; structured tags supported (`[Verse]`, `[Chorus]`, etc.).
|
|
58
|
+
- Models: `music-2.6`, `music-cover`, etc.
|
|
59
|
+
|
|
60
|
+
### Available Tools
|
|
61
|
+
|
|
62
|
+
1. **`create_music`** — Pure text-to-music generation (no reference audio).
|
|
63
|
+
2. **`change_music`** — Cover generation (direct reference or two-step via `analyze_music`).
|
|
64
|
+
3. **`analyze_music`** — Preprocess reference audio to obtain `cover_feature_id` for advanced covers.
|
|
65
|
+
4. **`lyrics`** — Generate, edit, or continue song lyrics (full song or snippets).
|
|
66
|
+
|
|
67
|
+
**Full Documentation**: See JSDoc in `lib/API/minimax/MusicToolset.js`.
|
|
68
|
+
|
|
69
|
+
## Usage in Custom Agents
|
|
70
|
+
|
|
71
|
+
The dedicated example agents demonstrate best practices:
|
|
72
|
+
|
|
73
|
+
- **`agents/stability.js`** — Music assistant using Stability tools + local editing (ffmpeg/sox via `execute_bash_script`).
|
|
74
|
+
- **`agents/minimax.js`** — Similar assistant using Minimax tools (note: filename uses "minimaxi" internally for display).
|
|
75
|
+
|
|
76
|
+
### Example Integration Pattern (from the agents)
|
|
77
|
+
|
|
78
|
+
```javascript
|
|
79
|
+
import { AgentManager, API, CdnToolset } from '@j-o-r/hello-dave';
|
|
80
|
+
|
|
81
|
+
const agent = new AgentManager({ name: 'my-music-agent', secret: '...' });
|
|
82
|
+
agent.setup({
|
|
83
|
+
prompt: `You are a music creation assistant...`,
|
|
84
|
+
api: 'xai',
|
|
85
|
+
options: { model: 'grok-4-fast-reasoning', temperature: 0.8, ... },
|
|
86
|
+
toolsetMode: 'auto',
|
|
87
|
+
contextWindow: 1900000
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const toolset = agent.getToolset();
|
|
91
|
+
toolset?.borrow(API.stability.musicToolset); // or API.minimax.musicToolset
|
|
92
|
+
toolset?.borrow(CdnToolset);
|
|
93
|
+
|
|
94
|
+
if (toolset) {
|
|
95
|
+
agent.addGenericToolcall('execute_bash_script');
|
|
96
|
+
agent.addGenericToolcall('read_file');
|
|
97
|
+
agent.addGenericToolcall('write_file');
|
|
98
|
+
// ... other tools
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
await agent.start(/* serve/connect options */);
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Agent Prompts**: Both examples include rich system prompts covering lyrics generation, music theory, local audio editing with ffmpeg, safety confirmations, and step-by-step workflows.
|
|
105
|
+
|
|
106
|
+
**CLI Usage**:
|
|
107
|
+
```bash
|
|
108
|
+
node agents/stability.js "Create an epic orchestral track"
|
|
109
|
+
node agents/minimax.js "Generate a jazz cover of my song"
|
|
110
|
+
# Or run as server/client for chaining with other agents
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Best Practices & Notes
|
|
114
|
+
|
|
115
|
+
- **Local-First**: Generated audio is saved locally by default (never auto-CDN unless using CdnToolset explicitly).
|
|
116
|
+
- **Error Handling**: Toolsets include robust validation, polling, and error messages. Stability has a known note on remote audio URL handling (MIME type detection for FormData in `audioToAudio`).
|
|
117
|
+
- **Parity**: The Stability implementation was modeled directly after the Minimax one for consistency (same tool patterns, JSDoc style, demo structure).
|
|
118
|
+
- **Demos**: See `scenarios/demo-stability-audio.js` (modeled after Minimax demo).
|
|
119
|
+
- **Extensibility**: Borrow additional toolsets (e.g., `CdnToolset`) or generic tools as needed.
|
|
120
|
+
- **LLM-Friendly**: All tool descriptions are optimized for function-calling with examples, constraints, defaults, and usage guidance.
|
|
121
|
+
|
|
122
|
+
## Related Files
|
|
123
|
+
|
|
124
|
+
- `lib/API/stability.ai/MusicToolset.js`
|
|
125
|
+
- `lib/API/stability.ai/audio.js`
|
|
126
|
+
- `lib/API/stability.ai/audio-3.md`
|
|
127
|
+
- `lib/API/minimax/MusicToolset.js`
|
|
128
|
+
- `lib/API/minimax/music.js`
|
|
129
|
+
- `agents/stability.js`
|
|
130
|
+
- `agents/minimax.js`
|
|
131
|
+
- `docs/plans/minimax-music-generation.md` (historical plan)
|
|
132
|
+
|
|
133
|
+
For questions or contributions, see the main project README or open an issue.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
*This documentation ensures the new music capabilities are discoverable and easy to adopt.*
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Minimax Music Toolset & HTTP Wrapper Plan (Focused Version v1.4 – 2-Stage Approach)
|
|
2
|
+
|
|
3
|
+
**Date:** May 20, 2026
|
|
4
|
+
**Version:** 1.4 (2-Stage Implementation Approach)
|
|
5
|
+
**Status:** Planning Phase – Stage 1 Ready for Implementation
|
|
6
|
+
**Focus:** Strictly the HTTP wrapper (`lib/API/minimax/music.js`) and the special Music Toolset (`lib/API/minimax/MusicToolset.js`).
|
|
7
|
+
**No Music Agent creation** – This plan provides everything needed to later create a music agent (using patterns from `./agents/*`).
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## 1. 2-Stage Implementation Approach (Approved)
|
|
12
|
+
|
|
13
|
+
**Stage 1 – HTTP Wrapper (Current Focus)**
|
|
14
|
+
- Create `lib/API/minimax/music.js` first.
|
|
15
|
+
- This will be a complete, reusable, low-level HTTP client for all Minimax music endpoints.
|
|
16
|
+
- Includes support for:
|
|
17
|
+
- Basic music generation
|
|
18
|
+
- Two-Step Cover (Advanced Mode with Lyrics Modification)
|
|
19
|
+
- Local-only storage for generated results
|
|
20
|
+
- Organized CDN publishing for references only (when explicitly requested)
|
|
21
|
+
- User will create a Minimax account and obtain an API key (`MINIMAX_API_KEY`) before testing.
|
|
22
|
+
- Once the wrapper is ready and tested (with real key), we move to Stage 2.
|
|
23
|
+
|
|
24
|
+
**Stage 2 – Music Toolset (After Stage 1)**
|
|
25
|
+
- Create `lib/API/minimax/MusicToolset.js` using the wrapper from Stage 1.
|
|
26
|
+
- Expose high-level tools (including the high-priority `generate_cover_with_lyrics_modification`).
|
|
27
|
+
- Follows exact patterns from `lib/genericToolset.js`.
|
|
28
|
+
|
|
29
|
+
This staged approach ensures we have a solid, tested foundation before building the tool layer.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## 2. Current Rules for Audio Handling (v1.3 + v1.4)
|
|
34
|
+
|
|
35
|
+
- Generated audio → **Local storage only** (never auto-published to CDN).
|
|
36
|
+
- Reference audio for Two-Step Cover → Published to organized CDN **only when project_slug is provided**.
|
|
37
|
+
- Organized structure: `projects/<slug>/` with `meta.json`, `description.md`, `plan.md`.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 3. Stage 1 Deliverable: `lib/API/minimax/music.js`
|
|
42
|
+
|
|
43
|
+
**Purpose:** Pure HTTP wrapper (no ToolSet logic).
|
|
44
|
+
|
|
45
|
+
**Required Exports (to be implemented):**
|
|
46
|
+
- `getHeaders()`
|
|
47
|
+
- `requestMusic(prompt, options)`
|
|
48
|
+
- `preprocessCover(audioUrl, options)`
|
|
49
|
+
- `generateCoverWithLyricsModification(coverFeatureId, newLyrics, prompt, options)`
|
|
50
|
+
- `twoStepCoverWithLocalResult(referenceAudio, newLyrics, prompt, options)`
|
|
51
|
+
- `downloadToLocal(audioUrl, targetLocalPath)`
|
|
52
|
+
- `publishReferenceToOrganizedCDN(localPath, projectSlug, description, planContent)`
|
|
53
|
+
- `slugify(text)`
|
|
54
|
+
- Helper constants for endpoints and default audio settings.
|
|
55
|
+
|
|
56
|
+
**Implementation Style:**
|
|
57
|
+
Exact mirror of `lib/API/x.ai/responses.js`:
|
|
58
|
+
- Uses `import { request as doRequest } from '@j-o-r/apiserver';`
|
|
59
|
+
- Uses `import { SH, sleep } from '@j-o-r/sh';`
|
|
60
|
+
- Env check for `MINIMAX_API_KEY`
|
|
61
|
+
- Duration tracking + clean error handling
|
|
62
|
+
- All generated results downloaded locally via `downloadToLocal`
|
|
63
|
+
|
|
64
|
+
**Two-Step Cover Support:** Fully included as requested.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## 4. Next Steps
|
|
69
|
+
|
|
70
|
+
1. **Now (Stage 1):** Implement `lib/API/minimax/music.js`
|
|
71
|
+
2. User creates Minimax account + API key
|
|
72
|
+
3. Test the wrapper (with real key)
|
|
73
|
+
4. **Later (Stage 2):** Implement the Music Toolset
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
**Plan v1.4 is now the single source of truth for the 2-stage approach.**
|
|
78
|
+
|
|
79
|
+
Ready to begin **Stage 1** (create `lib/API/minimax/music.js`)?
|
|
80
|
+
Reply with **"Approved – start Stage 1: implement music.js"** and I will proceed immediately.
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# Unified Agent Architecture – Human-Readable Event-Driven Design
|
|
2
|
+
|
|
3
|
+
**Date:** May 09, 2026
|
|
4
|
+
**Version:** 2.5 (Complete & Self-Contained – Includes Key Implementation Patterns)
|
|
5
|
+
**Status:** Final – This document is the single source of truth
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## 1. Core Philosophy
|
|
10
|
+
|
|
11
|
+
- Everything that happens is an **event**.
|
|
12
|
+
- There is **no "final response"** concept.
|
|
13
|
+
- The **only** way to know that an agent (or user acting as agent) has finished its turn is when it emits a **`kind: "ready"`** event.
|
|
14
|
+
- **Users and agents are completely symmetric.**
|
|
15
|
+
- A `function_call` sent by an Agent to a connected peer is treated exactly as a **`query`** by the receiver.
|
|
16
|
+
- A `function_response` is the answer that comes back when the receiver has processed that query and emitted its own **`ready`** event.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## 2. What `lib/Agent.js` Must Do (Core Responsibilities)
|
|
21
|
+
|
|
22
|
+
`lib/Agent.js` is the **single source of truth** for all agent behavior. It must be a pure core class with **zero** terminal I/O, readline, or CLI code.
|
|
23
|
+
|
|
24
|
+
It must implement **all** of the following functionality in one class:
|
|
25
|
+
|
|
26
|
+
### 2.1 Hybrid Server + Client Mode
|
|
27
|
+
- When instantiated with `serve: port`, it acts as a WebSocket server.
|
|
28
|
+
- When instantiated with `connect: url`, it acts as a WebSocket client.
|
|
29
|
+
- It can do both at the same time (hybrid).
|
|
30
|
+
|
|
31
|
+
### 2.2 Dynamic Tool Registration (from AgentServer + AgentClient)
|
|
32
|
+
- Any peer (remote agent **or** user) that connects and sends a `kind: "introduction"` message is automatically registered as a tool in `prompt.toolset`.
|
|
33
|
+
- The tool name = the peer’s `name`.
|
|
34
|
+
- The tool description = the peer’s `description`.
|
|
35
|
+
- The tool implementation sends a `function_call` (or `query`) to the peer and waits for a `function_response`.
|
|
36
|
+
|
|
37
|
+
### 2.3 User as First-Class Expert (Two Modes)
|
|
38
|
+
- **Direct mode**: Normal user input is passed directly to `prompt.call(content)`.
|
|
39
|
+
- **Expert mode** (optional): If the user provides `name` + `description`, the Agent sends an `introduction` on connect and registers the user as a callable tool. The model can then issue `function_call` to the human user.
|
|
40
|
+
|
|
41
|
+
### 2.4 Unified Event Protocol (Mandatory)
|
|
42
|
+
All messages use this exact shape:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"type": "event",
|
|
47
|
+
"kind": "query" | "reasoning" | "log" | "function_call" | "function_response" |
|
|
48
|
+
"ready" | "info" | "session_list" | "load_session" | "reset" |
|
|
49
|
+
"proceed" | "tool_start" | "tool_end" | "error" | "introduction",
|
|
50
|
+
"from": "user:cli" | "agent:main" | "agent:memory" | "human",
|
|
51
|
+
"id": "correlation-id",
|
|
52
|
+
"content": any,
|
|
53
|
+
"metadata": object | null
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Key rules:
|
|
58
|
+
- `function_call` received → treated internally as a `query`.
|
|
59
|
+
- `ready` is the **only** completion signal.
|
|
60
|
+
- When the Prompt emits `ready`, the Agent emits a `kind: "ready"` event.
|
|
61
|
+
|
|
62
|
+
### 2.5 Internal Behaviors (Must Be Implemented)
|
|
63
|
+
- **Queue + Processing Flag**: Incoming queries are processed one at a time.
|
|
64
|
+
- **Epoch-based Reset**: On `reset`, increment an epoch so in-flight work can be safely discarded.
|
|
65
|
+
- **Auto-reconnect** (client side): If the WebSocket drops, automatically retry.
|
|
66
|
+
- **Pending Response Map**: Track pending `function_call` / `query` responses with timeouts.
|
|
67
|
+
- **Prompt Integration**: Wrap a `Prompt` instance and translate its events (`message`, `ready`, `tool_request`, `tool_response`, `error`) into the unified protocol.
|
|
68
|
+
|
|
69
|
+
### 2.6 Key Implementation Patterns from Existing Code (Mandatory)
|
|
70
|
+
|
|
71
|
+
The new `lib/Agent.js` **must preserve** the following proven patterns from `lib/AgentServer.js` and `lib/AgentClient.js`:
|
|
72
|
+
|
|
73
|
+
- **Tool registration pattern** (`addClient` in AgentServer):
|
|
74
|
+
- On `introduction`, create a tool in `prompt.toolset` whose implementation sends a message to the peer and returns a Promise that resolves on `function_response` / `agent_response`.
|
|
75
|
+
|
|
76
|
+
- **Pending response handling** (`pendingResponses` Map):
|
|
77
|
+
- Use a Map keyed by `conn_id:id` (or just `id` in the unified protocol) to store `{resolve, reject, timer}`.
|
|
78
|
+
- On receiving `function_response` / `agent_response`, resolve the matching promise and clear the timer.
|
|
79
|
+
|
|
80
|
+
- **Queue + epoch pattern** (from AgentClient):
|
|
81
|
+
- Maintain an internal message queue and a `processing` flag.
|
|
82
|
+
- On `reset`, increment `epoch` and discard the queue.
|
|
83
|
+
- Only process one message at a time.
|
|
84
|
+
|
|
85
|
+
- **Auto-reconnect** (from AgentClient):
|
|
86
|
+
- On client-side `onclose`, schedule a reconnect after 5 seconds.
|
|
87
|
+
|
|
88
|
+
- **Message sending with correlation**:
|
|
89
|
+
- Every outgoing `query` / `function_call` gets a unique `id`.
|
|
90
|
+
- Responses are matched by this `id`.
|
|
91
|
+
|
|
92
|
+
These patterns must be re-implemented inside the single `Agent` class using the new unified event kinds.
|
|
93
|
+
|
|
94
|
+
### 2.7 Public API (High-Level)
|
|
95
|
+
The class must expose at minimum:
|
|
96
|
+
|
|
97
|
+
```js
|
|
98
|
+
new Agent(prompt, {
|
|
99
|
+
name: 'agent:main',
|
|
100
|
+
serve: 8080, // server mode
|
|
101
|
+
connect: 'ws://...', // client mode
|
|
102
|
+
debug: true
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
agent.query(content);
|
|
106
|
+
agent.sendFunctionCall(name, args);
|
|
107
|
+
agent.on('event', fn);
|
|
108
|
+
agent.on('ready', fn);
|
|
109
|
+
agent.on('query', fn);
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### 2.8 No I/O in the Core
|
|
113
|
+
- `lib/Agent.js` must have **zero** dependencies on `@j-o-r/cli`, readline, stdin/stdout, or any terminal code.
|
|
114
|
+
- All terminal and web interfaces are provided by separate binding modules.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## 3. Architectural Principle: Core vs Bindings
|
|
119
|
+
|
|
120
|
+
- `lib/Agent.js` = pure core (event logic, registration, protocol, Prompt integration).
|
|
121
|
+
- All user interfaces = separate bindings (`lib/cli/terminal-binding.js`, future web binding, etc.).
|
|
122
|
+
- `wsIO.js` and `wsCli.js` remain **completely untouched**.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## 4. Requirements for the Terminal Binding (`lib/cli/terminal-binding.js`)
|
|
127
|
+
|
|
128
|
+
(See separate detailed requirements in sections above – must use `@j-o-r/cli` + `@j-o-r/sh`, keep 100% command parity with `wsCli.js`, support expert registration, and use the new event protocol.)
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## 5. Implementation Order (v2.5)
|
|
133
|
+
|
|
134
|
+
1. Approve this plan.
|
|
135
|
+
2. Implement / rewrite `lib/Agent.js` exactly according to section 2 (including the key patterns in 2.6).
|
|
136
|
+
3. Implement `lib/cli/terminal-binding.js` (using `@j-o-r/cli` + `@j-o-r/sh`).
|
|
137
|
+
4. Update `lib/index.js` and documentation.
|
|
138
|
+
5. Do **not** modify `wsIO.js` or `wsCli.js`.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
**This document is now complete, precise, and self-contained.**
|
|
143
|
+
|
|
144
|
+
It contains every piece of information needed to implement `lib/Agent.js` and the terminal binding in a new session with **no prior context**.
|
|
145
|
+
|
|
146
|
+
**Approved – proceed with implementation** (or requested changes): _______________________________
|