@j-o-r/hello-dave 0.0.10 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +2 -0
  2. package/README.md.bak.1779452127 +240 -0
  3. package/TODO.md +30 -8
  4. package/agents/code_agent.js +6 -6
  5. package/agents/daisy_agent.js +10 -7
  6. package/agents/minimax.js +173 -0
  7. package/agents/stability.js +173 -0
  8. package/bin/codeDave +1 -1
  9. package/bin/dave.js +1 -1
  10. package/docs/music-toolsets.md +137 -0
  11. package/docs/plans/minimax-music-generation.md +80 -0
  12. package/docs/plans/unified-agent-architecture.md +146 -0
  13. package/docs/plans/websocket-streaming-plan.md.bak +317 -0
  14. package/docs/prompt/task_clarification_and_documentation.md +35 -0
  15. package/lib/API/minimax/ImageToolset.js +169 -0
  16. package/lib/API/minimax/MusicToolset.js +290 -0
  17. package/lib/API/minimax/VideoToolset.js +296 -0
  18. package/lib/API/minimax/image.generation.md +239 -0
  19. package/lib/API/minimax/image.js +219 -0
  20. package/lib/API/minimax/image.to.image.md +257 -0
  21. package/lib/API/minimax/index.js +16 -0
  22. package/lib/API/minimax/music.cover.preprocess.md +206 -0
  23. package/lib/API/minimax/music.generation.md +346 -0
  24. package/lib/API/minimax/music.js +257 -0
  25. package/lib/API/minimax/music.lyrics.generation.md +205 -0
  26. package/lib/API/minimax/video.download.md +133 -0
  27. package/lib/API/minimax/video.first.last.image.md +186 -0
  28. package/lib/API/minimax/video.from.image.md +206 -0
  29. package/lib/API/minimax/video.from.subject.md +164 -0
  30. package/lib/API/minimax/video.generation.md +192 -0
  31. package/lib/API/minimax/video.js +339 -0
  32. package/lib/API/minimax/video.query.md +128 -0
  33. package/lib/API/stability.ai/ImageToolset.js +357 -0
  34. package/lib/API/stability.ai/MusicToolset.js +302 -0
  35. package/lib/API/stability.ai/audio-3.md +205 -0
  36. package/lib/API/stability.ai/audio.js +679 -0
  37. package/lib/API/stability.ai/image.js +911 -0
  38. package/lib/API/stability.ai/image.md +271 -0
  39. package/lib/API/stability.ai/index.js +11 -0
  40. package/lib/API/stability.ai/openapi.json +17118 -0
  41. package/lib/API/x.ai/ImageToolset.js +165 -0
  42. package/lib/API/x.ai/image.editing.md +86 -0
  43. package/lib/API/x.ai/image.js +393 -0
  44. package/lib/API/x.ai/image.md +213 -0
  45. package/lib/API/x.ai/image.to.generation.md +494 -0
  46. package/lib/API/x.ai/image.to.video.md +23 -0
  47. package/lib/API/x.ai/index.js +7 -0
  48. package/lib/AgentManager.js +1 -1
  49. package/lib/CdnToolset.js +191 -0
  50. package/lib/ToolSet.js +19 -1
  51. package/lib/cdn.js +373 -0
  52. package/lib/fafs.js +3 -1
  53. package/lib/genericToolset.js +43 -166
  54. package/lib/index.js +9 -1
  55. package/package.json +2 -2
  56. package/types/API/minimax/ImageToolset.d.ts +3 -0
  57. package/types/API/minimax/MusicToolset.d.ts +3 -0
  58. package/types/API/minimax/VideoToolset.d.ts +3 -0
  59. package/types/API/minimax/image.d.ts +109 -0
  60. package/types/API/minimax/index.d.ts +15 -0
  61. package/types/API/minimax/music.d.ts +46 -0
  62. package/types/API/minimax/video.d.ts +165 -0
  63. package/types/API/stability.ai/ImageToolset.d.ts +3 -0
  64. package/types/API/stability.ai/MusicToolset.d.ts +3 -0
  65. package/types/API/stability.ai/audio.d.ts +193 -0
  66. package/types/API/stability.ai/image.d.ts +274 -0
  67. package/types/API/stability.ai/index.d.ts +11 -0
  68. package/types/API/x.ai/ImageToolset.d.ts +3 -0
  69. package/types/API/x.ai/image.d.ts +82 -0
  70. package/types/API/x.ai/index.d.ts +7 -0
  71. package/types/AgentManager.d.ts +1 -1
  72. package/types/CdnToolset.d.ts +20 -0
  73. package/types/ToolSet.d.ts +8 -0
  74. package/types/cdn.d.ts +141 -0
  75. package/types/index.d.ts +9 -2
  76. package/docs/multi-agent-clusters.md.bak +0 -229
package/README.md CHANGED
@@ -30,6 +30,7 @@ The project emphasizes modular agent management, tool integration (e.g., web sea
30
30
  - **Agent Scripts**: Specialized agents for code, docs, npm, todo, readme, memory, and more.
31
31
  - **WebSocket Support**: Connect to remote agent servers for interactive or one-shot interactions.
32
32
  - **Toolsets**: Built-in tools for search, file I/O, email, and custom toolcalls.
33
+ - **Music Toolsets**: Dedicated Stability AI (Stable Audio 3) and Minimax (Music 2.6/Cover) toolsets for text-to-music, audio transformation, inpainting, covers, lyrics, and more. See [docs/music-toolsets.md](docs/music-toolsets.md) for full details, tools, and agent examples.
33
34
  - **Session Management**: Cache history, search sessions, reset, and inspect logs.
34
35
  - **ESM-First**: Modern Node.js modules with TypeScript definitions.
35
36
  - **Portable Agent Spawning**: `spawn_agent.js` enables creating and deploying agents anywhere (project dirs or fresh `/tmp`). Supports auto-deploy in existing projects (detects `./agents/*.js`), manual code+bash in isolated setups. Tools adapt to CWD for portability. Hybrid modes combine server/client for chaining. See [docs/prompt/spawn_agent.md](docs/prompt/spawn_agent.md) for blueprint and validation.
@@ -63,6 +64,7 @@ The project emphasizes modular agent management, tool integration (e.g., web sea
63
64
  - **types/**: TypeScript definitions.
64
65
  - **utils/**: Utility scripts (e.g., session management, testing).
65
66
  - **docs/**: Additional documentation, including agent blueprints (e.g., [prompt/spawn_agent.md](docs/prompt/spawn_agent.md)) and multi-agent guides (e.g., [multi-agent-clusters.md](docs/multi-agent-clusters.md)).
67
+ - New: [music-toolsets.md](docs/music-toolsets.md) — Documentation for Stability and Minimax MusicToolsets with usage examples from agents/stability.js and agents/minimax.js.
66
68
  - **release/**: Build artifacts.
67
69
 
68
70
  Other files: `package.json`, `CHANGELOG.md`, `TODO.md`, `LICENSE`.
@@ -0,0 +1,240 @@
1
+ # hello-dave
2
+
3
+ [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
4
+ [![Node.js >=20](https://img.shields.io/badge/Node.js-%3E=20-green.svg)](https://nodejs.org/)
5
+ [![Version: v0.0.8](https://img.shields.io/badge/Version-v0.0.8-green.svg)](https://codeberg.org/duin/hello-dave)
6
+ [![PM2 Compatible](https://img.shields.io/badge/PM2-Compatible-orange.svg)](https://pm2.keymetrics.io/)
7
+
8
+ ## Table of Contents
9
+
10
+ - [Description](#description)
11
+ - [Features](#features)
12
+ - [Folder Structure](#folder-structure)
13
+ - [Installation](#installation)
14
+ - [Usage](#usage)
15
+ - [Usage Examples](#usage-examples)
16
+ - [Development](#development)
17
+ - [Contributing](#contributing)
18
+ - [License](#license)
19
+
20
+ ## Description
21
+
22
+ **hello-dave** is an ESM (ECMAScript Modules) toolkit for building AI agents with unified access to endpoints from Grok (xAI), OpenAI, and Anthropic. It provides CLI tools for interacting with agents locally or via WebSocket servers, along with pre-built agent scripts for various tasks like code generation, documentation, testing, and more.
23
+
24
+ The project emphasizes modular agent management, tool integration (e.g., web search, file operations), and session handling with memory and caching. With v0.0.8, enhanced portability allows spawning agents in isolated environments like `/tmp` while maintaining tool functionality relative to the current working directory (CWD).
25
+
26
+ ## Features
27
+
28
+ - **Unified API Access**: Seamless integration with xAI (Grok), OpenAI, and Anthropic models.
29
+ - **CLI Tools**: `dave` for querying agents, `agentDave` for spawning servers, `codeDave` for code servers.
30
+ - **Agent Scripts**: Specialized agents for code, docs, npm, todo, readme, memory, and more.
31
+ - **WebSocket Support**: Connect to remote agent servers for interactive or one-shot interactions.
32
+ - **Toolsets**: Built-in tools for search, file I/O, email, and custom toolcalls.
33
+ - **Session Management**: Cache history, search sessions, reset, and inspect logs.
34
+ - **ESM-First**: Modern Node.js modules with TypeScript definitions.
35
+ - **Portable Agent Spawning**: `spawn_agent.js` enables creating and deploying agents anywhere (project dirs or fresh `/tmp`). Supports auto-deploy in existing projects (detects `./agents/*.js`), manual code+bash in isolated setups. Tools adapt to CWD for portability. Hybrid modes combine server/client for chaining. See [docs/prompt/spawn_agent.md](docs/prompt/spawn_agent.md) for blueprint and validation.
36
+ - **Multi-Agent Clusters**: PM2-powered CodeServer for scalable, clustered agent deployments (e.g., code_agent + todo_agent). Defaults to port 9000/secret '123'. See [docs/multi-agent-clusters.md](docs/multi-agent-clusters.md) for configuration.
37
+
38
+ ## Folder Structure
39
+
40
+ - **bin/**: Executables for CLI tools.
41
+ - `dave.js`: Main CLI for asking agents or connecting to WebSocket servers.
42
+ - `spawn_agent.js`: Spawns agent instances (binary: `agentDave`).
43
+ - `codeDave`: Launches code servers (via PM2 clusters). References [docs/multi-agent-clusters.md](docs/multi-agent-clusters.md) for setup and scaling.
44
+
45
+ - **agents/**: Agent scripts (`*_agent.js`) for specific tasks. These can be run directly with `node agents/<script>.js`.
46
+ - `ask_agent.js`: General query agent.
47
+ - `code_agent.js`: Code generation and execution agent (serves as main server in code clusters).
48
+ - `daisy_agent.js`: Specialized agent (details in script).
49
+ - `docs_agent.js`: Documentation-focused agent.
50
+ - `gpt_agent.js`: OpenAI GPT integration agent.
51
+ - `grok_agent.js`: xAI Grok-specific agent.
52
+ - `memory_agent.js`: Agent with enhanced memory and context handling.
53
+ - `npm_agent.js`: NPM package management agent.
54
+ - `prompt_agent.js`: Prompt engineering and testing agent.
55
+ - `readme_agent.js`: README.md management agent.
56
+ - `spawn_agent.js`: Agent spawner (also in bin/). Creates portable CLI/WS agents + PM2 launchers. Validates/tests/deploys to `./agents/<name>.js`. Supports hybrid server/client modes. Portable to `/tmp` (tools follow CWD). Blueprint: [docs/prompt/spawn_agent.md](docs/prompt/spawn_agent.md).
57
+ - `test_agent.js`: Testing and validation agent.
58
+ - `todo_agent.js`: TODO list and task management agent.
59
+ - `codeserver.sh`: Shell script for launching PM2-based code server clusters (uses agents above). See [docs/multi-agent-clusters.md](docs/multi-agent-clusters.md).
60
+
61
+ - **lib/**: Core library modules (e.g., `index.js`, `wsCli.js`, `wsIO.js`, API integrations).
62
+ - **scenarios/**: Test and example scenarios (e.g., toolset tests, integration scripts).
63
+ - **types/**: TypeScript definitions.
64
+ - **utils/**: Utility scripts (e.g., session management, testing).
65
+ - **docs/**: Additional documentation, including agent blueprints (e.g., [prompt/spawn_agent.md](docs/prompt/spawn_agent.md)) and multi-agent guides (e.g., [multi-agent-clusters.md](docs/multi-agent-clusters.md)).
66
+ - **release/**: Build artifacts.
67
+
68
+ Other files: `package.json`, `CHANGELOG.md`, `TODO.md`, `LICENSE`.
69
+
70
+ ## Installation
71
+
72
+ 1. **Prerequisites**: Node.js >= 20. PM2 for multi-agent clusters: `npm install -g pm2`.
73
+
74
+ 2. **Local Development**:
75
+ ```bash
76
+ git clone https://codeberg.org/duin/hello-dave.git
77
+ cd hello-dave
78
+ npm install
79
+ ```
80
+
81
+ 3. **Global CLI Installation** (for `dave`, `agentDave`, `codeDave`):
82
+ ```bash
83
+ npm install -g .
84
+ # Or link for dev: npm run link-self
85
+ ```
86
+
87
+ 4. **API Keys**: Set environment variables (e.g., `XAIKEY` for xAI, `OPENAI_API_KEY` for OpenAI).
88
+
89
+ ## Usage
90
+
91
+ ### CLI Tools
92
+
93
+ - **dave**: Interact with agents locally or remotely.
94
+ - Local query: `dave --ask "Predict the weather"`
95
+ - One-shot remote: `echo "Hello" | dave --connect 'ws://127.0.0.1:8080' --secret '123'`
96
+ - Interactive remote: `dave --connect 'ws://127.0.0.1:8080' --secret '123'`
97
+ - Other: `dave --list` (sessions), `dave --clear` (cache), `dave --help`.
98
+
99
+ - **agentDave**: Spawn an agent server (powered by `spawn_agent.js`).
100
+ ```bash
101
+ agentDave --serve 8080 --secret '123' # Starts WebSocket server
102
+ ```
103
+
104
+ - **codeDave**: Launch a PM2 code server cluster.
105
+ ```bash
106
+ codeDave 8080 --secret '123' # Or: dave --code 8080 --secret '123'
107
+ ```
108
+
109
+ ### Running Agents
110
+
111
+ Run agent scripts directly:
112
+ ```bash
113
+ node agents/spawn_agent.js --serve 8080 --secret '123' # Spawn server
114
+ node agents/code_agent.js --connect 'ws://127.0.0.1:8080' --secret '123' # Client mode
115
+ ```
116
+
117
+ For full options, see script headers or run with `--help`.
118
+
119
+ ### Programmatic Usage
120
+
121
+ Import and use in your Node.js app:
122
+ ```javascript
123
+ import { AgentManager } from '@j-o-r/hello-dave';
124
+ // Setup and run agent...
125
+ ```
126
+
127
+ ### spawn_agent.js Specifics (v0.0.8 Portability Ready)
128
+
129
+ `spawn_agent.js` (executable as `agentDave`) creates and deploys portable agents in CLI, WebSocket server, client, or **hybrid** modes (server + client for tool chaining). It fetches live prompts from [docs/prompt/spawn_agent.md](docs/prompt/spawn_agent.md) and validates new agents via blueprint (temp files, syntax checks, grep for modes/tools).
130
+
131
+ - **Modes**:
132
+ - **Direct (One-Shot)**: Positional input, e.g., `node agents/spawn_agent.js "Create a tester"`.
133
+ - **Interactive CLI**: No input; runs REPL-like session.
134
+ - **Server**: `--serve <port>` – Exposes as remote tool (e.g., for other agents to call `spawn_agent`).
135
+ - **Client**: `--connect <ws_url>` – Connects to remote server for tool access.
136
+ - **Hybrid**: `--serve <port> --connect <ws_url>` – Serves locally while using remote tools.
137
+
138
+ - **Portability**:
139
+ - **In Project**: Auto-deploys to `./agents/<name>.js` if `./agents/*.js` exist (uses existing structure).
140
+ - **Fresh /tmp**: Manual code generation + bash setup (e.g., `mkdir /tmp/myproj/agents; cd /tmp/myproj; node spawn_agent.js ...`). Tools (e.g., `read_file`) follow CWD, ensuring isolation without fixed paths.
141
+ - Works in any dir; no repo deps beyond Node.js + npm installs.
142
+
143
+ - **Custom Tools**: Specify in prompt, e.g., "name=coderev, tools=read_file,web_search,execute_bash_script".
144
+
145
+ For validation/testing: See blueprint in [docs/prompt/spawn_agent.md](docs/prompt/spawn_agent.md) (includes bash examples like `node agents/NEW_AGENT.js --help` or server connect tests).
146
+
147
+ ### CodeServer & PM2 Multi-Agent (v0.0.8)
148
+
149
+ CodeServer (via `bin/codeDave` or `agents/codeserver.sh`) launches scalable, PM2-clustered multi-agent environments for collaborative tasks (e.g., code generation + task management). It deploys agents like `code_agent`, `todo_agent`, and `readme_agent` on a WebSocket server, enabling chaining and load balancing.
150
+
151
+ - **Defaults**: Port 9000, secret '123' (customizable via args).
152
+ - **Prerequisites**: Install PM2 globally (`npm install -g pm2`).
153
+ - **Quickstart**:
154
+ ```bash
155
+ codeDave 9000 --secret 123 # Launches PM2 cluster on ws://127.0.0.1:9000/ws
156
+ pm2 list # Verify processes (e.g., code_agent, todo_agent instances)
157
+ echo "task" | bin/dave.js --connect ws://127.0.0.1:9000/ws --secret 123 # One-shot query to cluster
158
+ ```
159
+ This starts the cluster, lists running agents, and sends a test task (response routed to available agents).
160
+
161
+ - **Scaling & Management**: Use PM2 commands (e.g., `pm2 scale <app> 4`, `pm2 stop all`). For ecosystem config, PM2 JSON files, or agent orchestration, see the [full guide in docs/multi-agent-clusters.md](docs/multi-agent-clusters.md).
162
+
163
+ - **Integration**: Combine with `spawn_agent.js` for dynamic agent addition to clusters (e.g., spawn new agents and connect via `--connect ws://127.0.0.1:9000/ws`).
164
+
165
+ ## Usage Examples
166
+
167
+ - **Direct Spawn (Project)**:
168
+ ```bash
169
+ cd hello-dave # Or any project with ./agents/
170
+ node agents/spawn_agent.js "Create code-review agent: name=coderev, desc=Git diff analyzer, tools=read_file,execute_bash_script,web_search"
171
+ # Deploys to ./agents/coderev_agent.js; test: node agents/coderev_agent.js "Review this diff"
172
+ ```
173
+
174
+ - **/tmp Workflow (Portable, Fresh Setup)**:
175
+ ```bash
176
+ mkdir -p /tmp/myportable/agents
177
+ cd /tmp/myportable
178
+ # Copy or fetch spawn_agent.js (e.g., curl from repo or npm install @j-o-r/hello-dave)
179
+ npm init -y && npm i @j-o-r/hello-dave @j-o-r/sh
180
+ node spawn_agent.js "Create todo agent: name=todo, desc=Task manager, tools=read_file,write_file"
181
+ # Deploys to ./agents/todo_agent.js; tools use /tmp/myportable as CWD
182
+ node agents/todo_agent.js --serve 8081 --secret abc # Test server mode
183
+ ```
184
+
185
+ - **Hybrid Mode with Custom Tools**:
186
+ ```bash
187
+ node agents/spawn_agent.js --serve 8081 --connect ws://127.0.0.1:8080/ws --secret abc "Spawn a docs agent with custom email tool"
188
+ # Serves on 8081 (exposes new agent), connects to 8080 (gains remote tools like email)
189
+ # Test: In another term, node agents/spawn_agent.js --connect ws://127.0.0.1:8081/ws --secret abc "Use the new docs agent"
190
+ ```
191
+
192
+ - **Spawn a Server**:
193
+ ```bash
194
+ node agents/spawn_agent.js --serve 8080 --secret '123'
195
+ # Then connect: echo "Task" | dave --connect 'ws://127.0.0.1:8080' --secret '123'
196
+ ```
197
+
198
+ - **Launch Code Cluster** (includes multiple agents):
199
+ ```bash
200
+ node agents/codeserver.sh 9000 '123' # Or use codeDave 9000 --secret 123
201
+ # Connects code_agent, todo_agent, readme_agent, etc., to the server on ws://127.0.0.1:9000/ws.
202
+ # Test: pm2 list; echo "Optimize this code" | dave --connect ws://127.0.0.1:9000/ws --secret 123
203
+ # For advanced config: See [docs/multi-agent-clusters.md](docs/multi-agent-clusters.md).
204
+ ```
205
+
206
+ - **Local Agent Query** (no server):
207
+ ```bash
208
+ dave --ask --model 'grok-4-1-fast-reasoning' "Write a function"
209
+ ```
210
+
211
+ - **Custom Agent**:
212
+ ```bash
213
+ node agents/docs_agent.js --connect 'ws://127.0.0.1:8080' --secret '123'
214
+ ```
215
+
216
+ See `scenarios/` for more test scenarios.
217
+
218
+ ## Development
219
+
220
+ - **Build Types**: `npm run types`
221
+ - **Test**: `npm run tests` (runs `utils/test.sh`)
222
+ - **Release**: `npm run release` then `npm run publish`
223
+ - **Local Linking**: `npm run link-self` / `npm run unlink-self`
224
+
225
+ Use `git status` and `ls` to inspect changes. Ensure ESM compatibility.
226
+
227
+ ## Contributing
228
+
229
+ Contributions welcome! Fork the repo, create a branch, and submit a pull request to https://codeberg.org/duin/hello-dave.
230
+
231
+ - Report bugs: https://codeberg.org/duin/hello-dave/issues
232
+ - Follow Apache-2.0 license.
233
+
234
+ ## License
235
+
236
+ This project is licensed under the Apache-2.0 License - see the [LICENSE](LICENSE) file for details.
237
+
238
+ ---
239
+ *Repository: https://codeberg.org/duin/hello-dave*
240
+ ---
package/TODO.md CHANGED
@@ -1,13 +1,35 @@
1
- ## TODO (high priority pending)
2
- (none)
1
+ ## High Priority Pending
2
+ - [ ] 2026-05-04: Create simple event-based unified WebSocket architecture. Remove complicated streaming plan. Focus on live reasoning in CLI/WS, unify user/agent client handling in AgentServer.js, make Prompt events broadcast consistently to user WS connections. Make code more readable and logical. Reference: docs/plans/simple-event-driven-websocket.md.
3
+ - [ ] Review current WebSocket protocol and Prompt event emitters.
4
+ - [ ] In AgentServer.js: Centralize event listeners for Prompt, add unified broadcast method for user WS, unify user/agent client handling.
5
+ - [ ] Update lib/wsIO.js for CLI to handle events similarly to WS.
6
+ - [ ] Refactor code for readability and logical flow.
7
+ - [ ] Test end-to-end: Event flow from Prompt to user display in CLI and WS.
8
+ - [ ] Update docs/websocket-protocol.md with new architecture.
3
9
 
4
- ## In Progress
5
- (none)
10
+ - [ ] 2026-05-04: Implement unified lib/Agent.js that treats users and agents symmetrically. Key rules: No 'final response' concept. Use Prompt 'ready' event as completion signal. Agents can send 'function_call' requests to user connections (user acts as expert/tool). Update message protocol to be fully event-driven with 'ready' as end marker. Reference: docs/plans/unified-agent-architecture.md.
11
+ - [ ] Review current Agent.js, AgentServer.js, and Prompt event system.
12
+ - [ ] Refactor lib/Agent.js to unify user and agent handling: shared methods for event emission, listening, and protocol adherence.
13
+ - [ ] Integrate Prompt 'ready' event as the primary completion hook across agents and user interactions.
14
+ - [ ] Add support for agents to emit 'function_call' events to user WS connections.
15
+ - [ ] Update message protocol: Define event types, ensure 'ready' is broadcast consistently.
16
+ - [ ] Test symmetry: Verify bidirectional flows (agent-to-user function calls, user-to-agent events) in CLI/WS.
17
+ - [ ] Document in docs/websocket-protocol.md and update any related docs.
6
18
 
7
- ## Later (future tasks)
8
- (none)
19
+ ## Prompt & Agent Quality (High Priority)
20
+ - [ ] 2026-04-25: Review all prompts (in agents, spawn_agent, memory protocol, etc.) to prevent over-complicating implementations by adding unrequested requirements (e.g. backward compatibility when user explicitly says the project is BETA v0.0.x and does not need it).
21
+
22
+ ## Multi-modal
23
+ - [ ] 2026-04-25: Implement multi-modal (audio, images, video) capabilities to the CLI.
24
+
25
+ ## Agent Improvements
26
+ - [ ] 2026-04-25: Teach/instruct the spawn_agent to test and improve agents in a 'server' setting.
27
+
28
+ ## Input Handling
29
+ - [ ] 2026-04-25: Create a 'smart' pre-evaluation agent that assesses user input for clarity before the main agent starts work. It should check: Is the task clear? Are the necessary tools present? Guess context from previous messages if input seems incomplete (e.g. accidental early enter).
30
+
31
+ ## API Capabilities
32
+ - [ ] 2026-04-25: Ensure 'hello-dave' is fully capable of implementing/improving other APIs following the exact structure and patterns of this project (tools, agents, memory protocol, etc.).
9
33
 
10
34
  ## Done
11
35
  (none - all archived to docs/todo-archive-v0.1.0.md on 2026-04-24)
12
-
13
- Archive notes: All tasks completed and archived as of 2026-04-24. Previous archives: docs/todo-archive-v0.0.9.md (2026-04-20), docs/todo-archive-v0.0.8.md (2026-04-15), docs/todo-archive.md (2026-04-13), docs/todo-archive-infra-2026-04-21.md (2026-04-21).
@@ -24,7 +24,7 @@ if (args['secret']) {
24
24
  // Set properties only if provided via command line (except model which has default)
25
25
  if (args['model'] || true) { // model gets default value
26
26
  // @ts-ignore
27
- options.model = args['model'] || 'grok-4.20-reasoning';
27
+ options.model = args['model'] || 'grok-4.3';
28
28
  }
29
29
  // if (args['temperature']) {
30
30
  options.temperature = 0.2;
@@ -37,11 +37,11 @@ if (args['top_p']) {
37
37
  }
38
38
  // const reasoning = args['reasoning'] ? args['reasoning'] : 'medium';
39
39
  // if (reasoning) {
40
- // options.reasoning = {
41
- // // @ts-ignore
42
- // effort:reasoning,
43
- // summary: 'auto'
44
- // }
40
+ options.reasoning = {
41
+ // @ts-ignore
42
+ effort:'high',
43
+ summary: 'auto'
44
+ }
45
45
  // }
46
46
  const toolsetMode = 'auto';
47
47
  const contextWindow = args['context'] ? parseInt(args['context']) : 2565000;
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env node
2
- import { AgentManager } from '@j-o-r/hello-dave';
2
+ import { AgentManager, API, CdnToolset } from '@j-o-r/hello-dave';
3
+ import * as test from '@j-o-r/hello-dave';
3
4
  import { parseArgs } from '@j-o-r/sh';
4
5
 
5
6
  const name = 'daisy_agent';
@@ -101,10 +102,11 @@ const tool_call_name = 'daisy_agent';
101
102
  const tool_call_description = `
102
103
  Daisy Music Assistant:
103
104
  - "Lyrics for [theme]" → Generate lyrics.
104
- - "Suno prompt: [style]" → Optimized Suno prompt.
105
+ - "Music minimax prompt: [style]" → Optimized prompt.
105
106
  - "ffmpeg [task] on file.wav" → Bash script to run.
106
107
  - web_search: Research chords/lyrics.
107
108
  - execute_bash_script: Processes your local files safely.
109
+ - Create music
108
110
  `.trim();
109
111
 
110
112
  const prompt = `
@@ -112,7 +114,7 @@ You are ${name}, a helpful music creation and editing assistant for the user's c
112
114
 
113
115
  Core expertise:
114
116
  - Generate lyrics: Creative, structured (verses, chorus), themed, rhyming.
115
- - Suno AI prompts: Detailed, vivid descriptions (genre, mood, instruments, structure, vocals).
117
+ - niMAx 2.6 AI prompts: Detailed, vivid descriptions (genre, mood, instruments, structure, vocals).
116
118
  - Local audio editing: Use execute_bash_script with ffmpeg/sox commands. Provide exact bash snippets first, confirm before running. Examples:
117
119
  * Trim: ffmpeg -i input.mp3 -ss 00:00:30 -t 00:01:00 output.mp3
118
120
  * Concat: echo "file 'a.mp3'" > list.txt; ffmpeg -f concat -i list.txt out.mp3
@@ -129,7 +131,7 @@ Behavior:
129
131
  - Output ready-to-copy bash for ffmpeg/sox.
130
132
  - List files if unclear: Use ls *.wav *.mp3 etc. via bash.
131
133
 
132
- Current env: Ubuntu, ffmpeg & sox installed, cwd: /home/jd/devpri/js/hello-dave
134
+ Current env: Ubuntu, ffmpeg & sox installed
133
135
 
134
136
  Respond concisely but completely. Use markdown for code/lyrics/prompts.
135
137
  `.trim();
@@ -143,9 +145,10 @@ agent.setup({
143
145
  contextWindow
144
146
  });
145
147
  const toolset = agent.getToolset();
148
+ toolset?.borrow(API.minimax.musicToolset);
149
+ toolset?.borrow(CdnToolset);
146
150
  if (toolset) {
147
151
  agent.addGenericToolcall('open_link');
148
- agent.addGenericToolcall('send_email');
149
152
  agent.addGenericToolcall('execute_bash_script');
150
153
  agent.addGenericToolcall('read_file');
151
154
  agent.addGenericToolcall('write_file');
@@ -156,7 +159,7 @@ ${name} ${options.model} ready! (temp: ${options.temperature}, context: ${contex
156
159
 
157
160
  Ask me to:
158
161
  - Write lyrics
159
- - Craft Suno prompts
162
+ - Craft Music prompts
160
163
  - Edit audio: "fade out my track.mp3" → I'll give ffmpeg cmd
161
164
  Type /help for more.
162
165
  ${tool_call_name}
@@ -167,4 +170,4 @@ if (input) {
167
170
  console.log(RES);
168
171
  } else {
169
172
  await agent.start(serve, connect, cliIntro, tool_call_name, tool_call_description);
170
- }
173
+ }
@@ -0,0 +1,173 @@
1
+ #!/usr/bin/env node
2
+ import { AgentManager, API, CdnToolset } from '@j-o-r/hello-dave';
3
+ import * as test from '@j-o-r/hello-dave';
4
+ import { parseArgs } from '@j-o-r/sh';
5
+
6
+ const name = 'minimaxi';
7
+ const api = 'xai';
8
+ let secret = '';
9
+
10
+ const args = parseArgs();
11
+
12
+ let input;
13
+ if (args._.length === 1 && typeof args._[0] === 'string' && args._[0].trim() !== '') {
14
+ input = args._[0].trim();
15
+ }
16
+
17
+ const help = args['help'] || false;
18
+ const connect = args['connect'] ? args['connect'] : undefined;
19
+ const serve = args['serve'] ? parseInt(args['serve']) : undefined;
20
+
21
+ /** @type {import('lib/API/x.ai/responses.js').XAIOptions} */
22
+ const options = { tools: [] };
23
+ options.tools.push({
24
+ type: 'web_search'
25
+ });
26
+
27
+ if (args['secret']) {
28
+ secret = args['secret'];
29
+ }
30
+ if (args['model'] || true) {
31
+ options.model = args['model'] || 'grok-4-fast-reasoning';
32
+ }
33
+ if (args['temperature']) {
34
+ options.temperature = parseFloat(args['temperature']);
35
+ } else {
36
+ options.temperature = 0.8;
37
+ }
38
+ if (args['tokens']) {
39
+ options.max_output_tokens = parseInt(args['tokens']);
40
+ }
41
+ if (args['top_p']) {
42
+ options.top_p = parseFloat(args['top_p']);
43
+ }
44
+ const reasoning = true;
45
+ if (reasoning) {
46
+ options.reasoning = {
47
+ effort: 'medium',
48
+ summary: 'auto'
49
+ }
50
+ }
51
+ const toolsetMode = 'auto';
52
+ const contextWindow = args['context'] ? parseInt(args['context']) : 1900000;
53
+
54
+ function printHelp() {
55
+ console.log(`
56
+ '${name} --help' You are looking at it.
57
+
58
+ ## USAGE MODES:
59
+
60
+ ### 1. Direct Call (One-Shot, Positional ONLY):
61
+ ./agents/${name}.js "Generate lyrics for pop song" [--options]
62
+
63
+ ### 2. Interactive CLI (no positional arg):
64
+ ./agents/${name}.js [--options]
65
+
66
+ ### 3. WS Server (no positional arg):
67
+ ./agents/${name}.js --serve 8080 [--secret mysecret] [--options]
68
+
69
+ ### 4. WS Client (no positional arg):
70
+ ./agents/${name}.js --connect ws://127.0.0.1:8080/ws --secret mysecret [--options]
71
+
72
+ ### 5. Hybrid (Server + Client, no positional arg):
73
+ ./agents/${name}.js --serve 8081 --connect ws://other:8080/ws [--secret ...] [--options]
74
+
75
+ ## SERVER OPTIONS EXPLAINED:
76
+ --serve [port]: Starts WebSocket SERVER at ws://127.0.0.1:[port]/ws. Allows other agents (--connect) to connect and use this agent as a remote TOOL (e.g., 'daisy_agent'). Runs indefinitely until Ctrl+C.
77
+
78
+ --connect [ws_url]: Connects as CLIENT to remote WS server at [ws_url] (e.g., ws://127.0.0.1:8080/ws). Gains access to remote agent's tools. Interactive CLI available.
79
+
80
+ --secret [string]: SHARED AUTH TOKEN (min 3 chars). SERVER rejects clients without matching --secret. CLIENTS must provide server's secret to connect. Use same secret for chains.
81
+
82
+ Note: Server/Client/Hybrid IGNORES positional input arg (use CLI modes instead). Hybrid: This agent serves AND uses remote tools.
83
+
84
+ ## OPTIONS:
85
+ --model [grok-4-fast-reasoning|...] (default: grok-4-fast-reasoning)
86
+ --temperature [float] (-2 to +2, default 0.8 for creativity)
87
+ --tokens [number] (max output tokens)
88
+ --top_p [float]
89
+ --context [number] (default: 1900000)
90
+
91
+ ## SERVER TOOLS (when no input):
92
+ Exposes as 'daisy_agent' tool for chaining.
93
+ `);
94
+ process.exit();
95
+ }
96
+
97
+ if (help) {
98
+ printHelp();
99
+ }
100
+
101
+ const tool_call_name = 'daisy_agent';
102
+ const tool_call_description = `
103
+ Daisy Music Assistant:
104
+ - "Lyrics for [theme]" → Generate lyrics.
105
+ - "Music minimax prompt: [style]" → Optimized prompt.
106
+ - "ffmpeg [task] on file.wav" → Bash script to run.
107
+ - web_search: Research chords/lyrics.
108
+ - execute_bash_script: Processes your local files safely.
109
+ - Create music
110
+ `.trim();
111
+
112
+ const prompt = `
113
+ You are ${name}, a helpful music creation and editing assistant for the user's computer.
114
+
115
+ Core expertise:
116
+ - Generate lyrics: Creative, structured (verses, chorus), themed, rhyming.
117
+ - niMAx 2.6 AI prompts: Detailed, vivid descriptions (genre, mood, instruments, structure, vocals).
118
+ - Local audio editing: Use execute_bash_script with ffmpeg/sox commands. Provide exact bash snippets first, confirm before running. Examples:
119
+ * Trim: ffmpeg -i input.mp3 -ss 00:00:30 -t 00:01:00 output.mp3
120
+ * Concat: echo "file 'a.mp3'" > list.txt; ffmpeg -f concat -i list.txt out.mp3
121
+ * Sox effects: sox input.wav output.wav fade 0 3 2 norm
122
+ * Convert: ffmpeg -i video.mp4 audio.aac
123
+ - Music theory: Chords, scales, BPM, EQ tips.
124
+ - Workflows: Step-by-step for mixing, mastering, layering tracks.
125
+
126
+ Behavior:
127
+ - Be creative & enthusiastic!
128
+ - Step-by-step: Explain, provide code, suggest files in current dir.
129
+ - Safety: Quote bash commands; ask confirmation for destructive ops (e.g., overwrite).
130
+ - Use web_search for inspiration/lyrics if needed.
131
+ - Output ready-to-copy bash for ffmpeg/sox.
132
+ - List files if unclear: Use ls *.wav *.mp3 etc. via bash.
133
+
134
+ Current env: Ubuntu, ffmpeg & sox installed
135
+
136
+ Respond concisely but completely. Use markdown for code/lyrics/prompts.
137
+ `.trim();
138
+
139
+ const agent = new AgentManager({ name, secret });
140
+ agent.setup({
141
+ prompt,
142
+ api,
143
+ options,
144
+ toolsetMode,
145
+ contextWindow
146
+ });
147
+ const toolset = agent.getToolset();
148
+ toolset?.borrow(API.minimax.musicToolset);
149
+ toolset?.borrow(CdnToolset);
150
+ if (toolset) {
151
+ agent.addGenericToolcall('open_link');
152
+ agent.addGenericToolcall('execute_bash_script');
153
+ agent.addGenericToolcall('read_file');
154
+ agent.addGenericToolcall('write_file');
155
+ }
156
+
157
+ const cliIntro = `
158
+ ${name} ${options.model} ready! (temp: ${options.temperature}, context: ${contextWindow})
159
+
160
+ Ask me to:
161
+ - Write lyrics
162
+ - Craft Music prompts
163
+ - Edit audio: "fade out my track.mp3" → I'll give ffmpeg cmd
164
+ Type /help for more.
165
+ ${tool_call_name}
166
+ `.trim();
167
+
168
+ if (input) {
169
+ const RES = await agent.directCall(input);
170
+ console.log(RES);
171
+ } else {
172
+ await agent.start(serve, connect, cliIntro, tool_call_name, tool_call_description);
173
+ }