@j-o-r/hello-dave 0.0.10 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/README.md.bak.1779452127 +240 -0
- package/TODO.md +30 -8
- package/agents/code_agent.js +6 -6
- package/agents/daisy_agent.js +10 -7
- package/agents/minimax.js +173 -0
- package/agents/stability.js +173 -0
- package/bin/codeDave +1 -1
- package/bin/dave.js +1 -1
- package/docs/music-toolsets.md +137 -0
- package/docs/plans/minimax-music-generation.md +80 -0
- package/docs/plans/unified-agent-architecture.md +146 -0
- package/docs/plans/websocket-streaming-plan.md.bak +317 -0
- package/docs/prompt/task_clarification_and_documentation.md +35 -0
- package/lib/API/minimax/ImageToolset.js +169 -0
- package/lib/API/minimax/MusicToolset.js +290 -0
- package/lib/API/minimax/VideoToolset.js +296 -0
- package/lib/API/minimax/image.generation.md +239 -0
- package/lib/API/minimax/image.js +219 -0
- package/lib/API/minimax/image.to.image.md +257 -0
- package/lib/API/minimax/index.js +16 -0
- package/lib/API/minimax/music.cover.preprocess.md +206 -0
- package/lib/API/minimax/music.generation.md +346 -0
- package/lib/API/minimax/music.js +257 -0
- package/lib/API/minimax/music.lyrics.generation.md +205 -0
- package/lib/API/minimax/video.download.md +133 -0
- package/lib/API/minimax/video.first.last.image.md +186 -0
- package/lib/API/minimax/video.from.image.md +206 -0
- package/lib/API/minimax/video.from.subject.md +164 -0
- package/lib/API/minimax/video.generation.md +192 -0
- package/lib/API/minimax/video.js +339 -0
- package/lib/API/minimax/video.query.md +128 -0
- package/lib/API/stability.ai/ImageToolset.js +357 -0
- package/lib/API/stability.ai/MusicToolset.js +302 -0
- package/lib/API/stability.ai/audio-3.md +205 -0
- package/lib/API/stability.ai/audio.js +679 -0
- package/lib/API/stability.ai/image.js +911 -0
- package/lib/API/stability.ai/image.md +271 -0
- package/lib/API/stability.ai/index.js +11 -0
- package/lib/API/stability.ai/openapi.json +17118 -0
- package/lib/API/x.ai/ImageToolset.js +165 -0
- package/lib/API/x.ai/image.editing.md +86 -0
- package/lib/API/x.ai/image.js +393 -0
- package/lib/API/x.ai/image.md +213 -0
- package/lib/API/x.ai/image.to.generation.md +494 -0
- package/lib/API/x.ai/image.to.video.md +23 -0
- package/lib/API/x.ai/index.js +7 -0
- package/lib/AgentManager.js +1 -1
- package/lib/CdnToolset.js +191 -0
- package/lib/ToolSet.js +19 -1
- package/lib/cdn.js +373 -0
- package/lib/fafs.js +3 -1
- package/lib/genericToolset.js +43 -166
- package/lib/index.js +9 -1
- package/package.json +2 -2
- package/types/API/minimax/ImageToolset.d.ts +3 -0
- package/types/API/minimax/MusicToolset.d.ts +3 -0
- package/types/API/minimax/VideoToolset.d.ts +3 -0
- package/types/API/minimax/image.d.ts +109 -0
- package/types/API/minimax/index.d.ts +15 -0
- package/types/API/minimax/music.d.ts +46 -0
- package/types/API/minimax/video.d.ts +165 -0
- package/types/API/stability.ai/ImageToolset.d.ts +3 -0
- package/types/API/stability.ai/MusicToolset.d.ts +3 -0
- package/types/API/stability.ai/audio.d.ts +193 -0
- package/types/API/stability.ai/image.d.ts +274 -0
- package/types/API/stability.ai/index.d.ts +11 -0
- package/types/API/x.ai/ImageToolset.d.ts +3 -0
- package/types/API/x.ai/image.d.ts +82 -0
- package/types/API/x.ai/index.d.ts +7 -0
- package/types/AgentManager.d.ts +1 -1
- package/types/CdnToolset.d.ts +20 -0
- package/types/ToolSet.d.ts +8 -0
- package/types/cdn.d.ts +141 -0
- package/types/index.d.ts +9 -2
- package/docs/multi-agent-clusters.md.bak +0 -229
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
# WebSocket Streaming Implementation Plan
|
|
2
|
+
|
|
3
|
+
**VERY IMPORTANT CONSTRAINT**: Only the files `lib/AgentServer.js`, `lib/AgentClient.js`, and `lib/wsIO.js` may be modified. No changes allowed to `Prompt.js`, `Session.js`, `ToolSet.js`, or any other files. All streaming must be achieved by extending existing mechanisms within these three files, such as wrapping `prompt.call()` in `AgentServer.js` to intercept/simulate intermediate events, adding new ACTIONS, and handling progressive messages in `wsIO.js`. Assume `Prompt.js` already emits or logs intermediate events (e.g., reasoning, tool calls) that can be hooked via existing listeners in `AgentServer.js`; if not, use console interception or async wrappers for simulation without altering `Prompt.js`.
|
|
4
|
+
|
|
5
|
+
**Version**: Draft v0.2 (April 25, 2026)
|
|
6
|
+
**Repository**: https://codeberg.org/duin/hello-dave
|
|
7
|
+
**Status**: Planning / Revised for Constraints
|
|
8
|
+
**Related Files (Modifiable Only)**:
|
|
9
|
+
- `lib/AgentServer.js` (Extend event listeners, wrap `prompt.call()`, add ACTIONS and WS emissions)
|
|
10
|
+
- `lib/AgentClient.js` (Handle incoming server_* streams if needed for agents)
|
|
11
|
+
- `lib/wsIO.js` (Progressive message handling and display)
|
|
12
|
+
- Reference (Read-Only): `lib/Prompt.js` (Assume existing event emissions/logs), `docs/agent-dave-websocket-protocol.md`
|
|
13
|
+
|
|
14
|
+
This document outlines the current WebSocket architecture, identifies the streaming limitation, defines the implementation goal, and provides a revised detailed step-by-step plan based on the TOP priority TODO from `TODO.md`. Revisions focus exclusively on modifications within the three allowed files, emphasizing extension of existing prompt event listeners in `AgentServer.js` for real-time `server_*` emissions during `prompt.call()`. It includes updated code structure suggestions (edits only to the three files), new message actions, streaming handling in `wsIO.js`, integration notes for the memory protocol (via existing Session hooks in AgentServer), and progress tracking. The plan ensures safe implementation with backward compatibility for existing clients.
|
|
15
|
+
|
|
16
|
+
## Current WebSocket Architecture
|
|
17
|
+
|
|
18
|
+
The WebSocket protocol in hello-dave enables multi-agent collaboration and CLI interactions via a central `AgentServer.js`. Key components (modifiable only as constrained):
|
|
19
|
+
|
|
20
|
+
### 1. **AgentServer.js** (Server-Side Hub - Primary Focus for Emissions)
|
|
21
|
+
- Listens on `ws://<host>:<port>/ws` (default port 8080).
|
|
22
|
+
- Manages connections: Authenticates via `?wssrc_id=<secret>` query param.
|
|
23
|
+
- Validates incoming JSON messages against the `ACTIONS` array (e.g., `user_request`, `agent_query`, `server_response`).
|
|
24
|
+
- Integrates with `Prompt.js` for LLM processing: On `user_request`, calls `prompt.call(content)`, waits for final result, and sends `server_response` to the user connection. Existing event listeners (if any) on `prompt` can be extended to capture intermediates.
|
|
25
|
+
- Handles agent tools: `AgentClient.js` instances connect and register via `agent_introduction`, becoming dynamic tools.
|
|
26
|
+
- Broadcasts: e.g., `reset` on session reset.
|
|
27
|
+
- Current limitation: Only forwards the **final** `server_response`. Intermediate events (reasoning, tool calls, logs) from `Prompt.js` are internal; `AgentServer.js` can wrap/extend listeners to stream them without changing `Prompt.js`.
|
|
28
|
+
- Pending responses: Uses a `Map` for ID-based matching with timeouts (10min max).
|
|
29
|
+
|
|
30
|
+
### 2. **AgentClient.js** (Agent-Side Wrapper - Minimal Streaming Support)
|
|
31
|
+
- Agents (e.g., `code_agent.js`) connect to the server as tools.
|
|
32
|
+
- Sequential queue processing: Pushes `agent_query` messages, processes one-at-a-time with polling (2s interval).
|
|
33
|
+
- Handles `agent_response` / `agent_error` back to server.
|
|
34
|
+
- Auto-reconnects on close (5s retry).
|
|
35
|
+
- Epoch counter for reset invalidation.
|
|
36
|
+
- Streaming extension: Can receive and forward `server_*` messages internally without altering other files.
|
|
37
|
+
|
|
38
|
+
### 3. **wsIO.js** (CLI One-Shot Client - Progressive Display)
|
|
39
|
+
- Simple WS client for `dave --connect` or programmatic use.
|
|
40
|
+
- Sends `user_introduction` + `user_request`, waits for matching `server_response` by ID, then closes.
|
|
41
|
+
- Base64-encodes secret for query param.
|
|
42
|
+
- Displays only the final response; extend to handle multiple `server_*` messages progressively via `ws.on('message')`.
|
|
43
|
+
- Used in REPL-like interactive mode via `wsCli.js` (extend similarly if needed, but focus on `wsIO.js`).
|
|
44
|
+
|
|
45
|
+
### Protocol Basics (Read-Only Reference from `docs/agent-dave-websocket-protocol.md`)
|
|
46
|
+
- JSON messages: `{ action: string, content: any, id: string, name?: string }`.
|
|
47
|
+
- Fixed `ACTIONS` array ensures security (unknown actions disconnect client).
|
|
48
|
+
- Backward compatibility: Server broadcasts to specific connections (user vs. agents); new actions ignored by old clients.
|
|
49
|
+
|
|
50
|
+
## The Problem
|
|
51
|
+
|
|
52
|
+
Currently, the WebSocket protocol is **one-way final-response oriented**:
|
|
53
|
+
- Users/CLI receive only the `server_response` after full LLM processing (reasoning + all tool calls).
|
|
54
|
+
- Intermediate steps (e.g., LLM thinking aloud, tool invocations like `web_search`, errors, debug logs) are lost or logged server-side only, with no forwarding from `AgentServer.js`.
|
|
55
|
+
- This limits real-time visibility: No live updates in CLI (e.g., `dave --connect` shows nothing until end), poor UX for long tasks, no observability for debugging agent chains.
|
|
56
|
+
- Affects: Interactive sessions, one-shot queries, multi-agent tool calls (e.g., user sees final code but not the `todo_agent` intermediate planning).
|
|
57
|
+
|
|
58
|
+
## The Goal
|
|
59
|
+
|
|
60
|
+
Enable **real-time streaming** of internal events via extensions in the three allowed files:
|
|
61
|
+
- Stream: Reasoning steps (LLM thoughts), tool requests/responses/errors, logs (debug/info), memory updates – captured and emitted from `AgentServer.js` during `prompt.call()`.
|
|
62
|
+
- Real-time display in CLI via progressive handling in `wsIO.js`.
|
|
63
|
+
- Maintain backward compatibility: Old clients ignore new `server_*` actions; server still sends final `server_response`.
|
|
64
|
+
- Safe: No breaking changes to `ACTIONS` validation (add new ones); optional streaming per connection (e.g., flag in `user_introduction`).
|
|
65
|
+
- Integration: Use existing memory protocol hooks in `AgentServer.js` (e.g., session appends during emissions); no direct changes to `Session.js`.
|
|
66
|
+
|
|
67
|
+
Suggested new event names (add to `ACTIONS` in `AgentServer.js`):
|
|
68
|
+
- `server_reasoning`: Streams LLM intermediate thoughts (e.g., `{ content: "Considering options..." }`).
|
|
69
|
+
- `server_tool_call`: Tool invocation (e.g., `{ content: { tool: "web_search", args: { query: "..." } } }`).
|
|
70
|
+
- `server_tool_response`: Tool result (e.g., `{ content: { tool: "web_search", result: "..." } }`).
|
|
71
|
+
- `server_tool_error`: Tool failure (e.g., `{ content: { tool: "read_file", error: "File not found" } }`).
|
|
72
|
+
- `server_log`: General logs (e.g., `{ content: { level: "info", message: "Session loaded" } }`).
|
|
73
|
+
- `server_memory_update`: Memory/session changes (e.g., `{ content: { type: "append", data: "New reasoning added" } }`).
|
|
74
|
+
|
|
75
|
+
These are prefixed "server_" to distinguish from user/agent actions and ensure old clients skip them.
|
|
76
|
+
|
|
77
|
+
## Revised Implementation Strategy
|
|
78
|
+
|
|
79
|
+
Given the file constraints, the strategy focuses on:
|
|
80
|
+
- **AgentServer.js**: Extend existing `prompt` event listeners (or add wrappers around `prompt.call()`) to intercept/simulate intermediate events (e.g., via async progress hooks, console spying, or assuming `Prompt.js` emits events that can be listened to). Emit new `server_*` messages in real-time to the user WS connection during the call. Add to `ACTIONS` and handle user streaming flags.
|
|
81
|
+
- **AgentClient.js**: Minimal changes – add handling for incoming `server_*` messages (e.g., log or forward internally) to support agent observability without affecting core queue.
|
|
82
|
+
- **wsIO.js**: Replace single-response wait with `ws.on('message')` loop to process and display multiple `server_*` events progressively; close only after `server_response`.
|
|
83
|
+
- **Backward Compatibility**: New actions added to `ACTIONS` but not required; old `wsIO.js` will ignore them and wait for `server_response`. Streaming opt-in via flag.
|
|
84
|
+
- **Memory Integration**: In `AgentServer.js`, during emissions, call existing session methods (e.g., `this.session.append(...)`) if already hooked; no new changes to `Session.js`.
|
|
85
|
+
- **Assumptions**: `Prompt.js` has or can be assumed to have interceptable intermediates (e.g., via logs or events); if not, use a wrapper function in `AgentServer.js` that simulates streams based on known patterns (e.g., tool calls via ToolSet intercepts already in AgentServer).
|
|
86
|
+
|
|
87
|
+
Total estimated effort: 3-5 hours, focused on the three files.
|
|
88
|
+
|
|
89
|
+
## Detailed Step-by-Step Implementation Plan
|
|
90
|
+
|
|
91
|
+
This follows the **exact 6 steps** from the TOP priority TODO in `TODO.md` (dated 2026-04-25), revised for constraints. Each step includes sub-tasks, updated code suggestions (edits only to allowed files), progress tracking, and safety notes.
|
|
92
|
+
|
|
93
|
+
### Step 1: Review Existing WebSocket Protocol Documentation and Reference Files
|
|
94
|
+
- [x] Read `docs/agent-dave-websocket-protocol.md` (current protocol, sequences, ACTIONS).
|
|
95
|
+
- [x] Inspect files (read-only where constrained):
|
|
96
|
+
- `lib/AgentServer.js`: WS handling, existing `prompt.call()` and any event listeners (e.g., on tool responses); identify wrap points for intermediates.
|
|
97
|
+
- `lib/AgentClient.js`: Queue processing; check for message handlers extensible to `server_*`.
|
|
98
|
+
- `lib/wsIO.js`: Response waiting logic; note `ws.on('message')` for progressive extension.
|
|
99
|
+
- `lib/Prompt.js` (read-only): Confirm existing event emissions/logs (e.g., reasoning/tool events) that `AgentServer.js` can listen to/wrap.
|
|
100
|
+
- [x] Identify hooks: Extend existing listeners in `AgentServer.js` (e.g., on `prompt` events or during tool handling); no new emits in `Prompt.js`.
|
|
101
|
+
- **Code Suggestion**: None (review only); note potential wrapper: In `AgentServer.js`, `const wrappedCall = async (content, reqId) => { /* intercept progress */ return prompt.call(content); }`.
|
|
102
|
+
- **Safety/Compat**: No changes; pure review.
|
|
103
|
+
- **Progress**: [x] Completed (Review confirms: `AgentServer.js` can wrap `prompt.call()` and extend tool handlers for streams; `wsIO.js` ready for multi-message; `AgentClient.js` minimal.)
|
|
104
|
+
|
|
105
|
+
### Step 2: Modify AgentServer.js to Emit Intermediate Events over WebSocket
|
|
106
|
+
- [ ] Extend `ACTIONS` array: Add new actions (`server_reasoning`, `server_tool_call`, etc.) at the end (no validation change – old clients ignore).
|
|
107
|
+
- [ ] In `AgentServer.js`:
|
|
108
|
+
- On `user_introduction`, store conn as "streaming" if flag set (e.g., `{ action: "user_introduction", streaming: true }`); track per-reqId.
|
|
109
|
+
- Extend existing prompt event listeners: Wrap `prompt.call()` to simulate/capture intermediates (e.g., async iterator or progress callbacks if available; else, intercept tool calls via existing ToolSet handlers and emit `server_tool_*`).
|
|
110
|
+
- For reasoning/logs: If `Prompt.js` logs to console, add a temporary spy (e.g., override console.log temporarily during call); or assume/emit based on known phases (e.g., before/after tool calls).
|
|
111
|
+
- For tool calls: In existing tool handler (e.g., when sending `agent_query`), emit `server_tool_call` before, `server_tool_response` after resolve.
|
|
112
|
+
- Memory: During emissions, use existing session hooks (e.g., `this.session.append({ type: 'reasoning', content })` if already present).
|
|
113
|
+
- [ ] Structure: Use reqId from `user_request` to target user conn; emit in real-time while `prompt.call()` runs (non-blocking).
|
|
114
|
+
- **Updated Code Suggestion** (Edits only to AgentServer.js):
|
|
115
|
+
```javascript
|
|
116
|
+
// At top: Extend ACTIONS
|
|
117
|
+
const ACTIONS = [
|
|
118
|
+
// ... existing
|
|
119
|
+
'server_reasoning',
|
|
120
|
+
'server_tool_call',
|
|
121
|
+
'server_tool_response',
|
|
122
|
+
'server_tool_error',
|
|
123
|
+
'server_log',
|
|
124
|
+
'server_memory_update'
|
|
125
|
+
];
|
|
126
|
+
|
|
127
|
+
// In handleUserRequest (around prompt.call)
|
|
128
|
+
async handleUserRequest(msg, conn) {
|
|
129
|
+
const reqId = msg.id;
|
|
130
|
+
if (msg.streaming) this.streamingConns.set(reqId, conn); // Track streaming
|
|
131
|
+
|
|
132
|
+
// Wrapper for prompt.call to emit intermediates (simulate if no events)
|
|
133
|
+
const emitStream = (action, content) => {
|
|
134
|
+
if (this.streamingConns.has(reqId)) {
|
|
135
|
+
const streamMsg = { action, content, id: reqId };
|
|
136
|
+
this.streamingConns.get(reqId).ws.send(JSON.stringify(streamMsg));
|
|
137
|
+
// Existing memory append if hooked: this.session.append({ type: action, content });
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
try {
|
|
142
|
+
// Simulate reasoning start
|
|
143
|
+
emitStream('server_reasoning', 'Starting LLM processing...');
|
|
144
|
+
|
|
145
|
+
// Existing tool intercept: Before agent/tool send
|
|
146
|
+
// (In existing agent_query handler)
|
|
147
|
+
emitStream('server_tool_call', { tool: 'agent_query', args: { content: msg.content } });
|
|
148
|
+
|
|
149
|
+
const result = await this.prompt.call(msg.content); // Unchanged
|
|
150
|
+
|
|
151
|
+
// Simulate tool response (extend existing resolve)
|
|
152
|
+
emitStream('server_tool_response', { tool: 'prompt', result });
|
|
153
|
+
|
|
154
|
+
// Final response (existing)
|
|
155
|
+
const response = { action: 'server_response', content: result, id: reqId };
|
|
156
|
+
conn.ws.send(JSON.stringify(response));
|
|
157
|
+
|
|
158
|
+
emitStream('server_log', { level: 'info', message: 'Processing complete' });
|
|
159
|
+
} catch (error) {
|
|
160
|
+
emitStream('server_tool_error', { tool: 'prompt', error: error.message });
|
|
161
|
+
// Existing error handling
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Extend existing message handler for validation (no change to logic)
|
|
166
|
+
if (!ACTIONS.includes(msg.action)) { /* existing disconnect */ }
|
|
167
|
+
```
|
|
168
|
+
- **Safety/Compat**: Emissions non-blocking; old clients skip new actions; fallback to final only.
|
|
169
|
+
- **Progress**: [ ]
|
|
170
|
+
|
|
171
|
+
### Step 3: Update AgentClient.js to Subscribe to and Handle Streaming Events
|
|
172
|
+
- [ ] Minimal: In message handler, if `msg.action.startsWith('server_')`, log or forward to agent's internal state (e.g., console.log for observability).
|
|
173
|
+
- [ ] No queue extension needed; optional for agent "observe" mode (e.g., if agent wants shared streams).
|
|
174
|
+
- [ ] Reconnect: Existing auto-reconnect preserves; no stream state (agents don't stream out).
|
|
175
|
+
- **Updated Code Suggestion** (Edits only to AgentClient.js):
|
|
176
|
+
```javascript
|
|
177
|
+
// In #onMessage or message handler
|
|
178
|
+
onMessage(msg) {
|
|
179
|
+
if (msg.action.startsWith('server_')) {
|
|
180
|
+
// Optional: Forward/log for agent observability
|
|
181
|
+
console.log(`[Stream from Server] ${msg.action}: ${JSON.stringify(msg.content)}`);
|
|
182
|
+
// Or emit to agent's prompt if listener exists (no change to Prompt)
|
|
183
|
+
return; // Don't process in queue
|
|
184
|
+
}
|
|
185
|
+
// Existing queue push for agent_query etc.
|
|
186
|
+
this.#queue.push(msg);
|
|
187
|
+
this.#processOne();
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
- **Safety/Compat**: Ignores streams if not handled; no impact on existing agent flow.
|
|
191
|
+
- **Progress**: [ ]
|
|
192
|
+
|
|
193
|
+
### Step 4: Integrate Changes in lib/wsIO.js to Receive and Display Live Streams in the CLI
|
|
194
|
+
- [ ] Replace promise wait with `ws.on('message')` to handle multiple `server_*` progressively; collect/display live.
|
|
195
|
+
- [ ] Display: Use `console.log` with prefixes (e.g., emojis for UX); close after `server_response` or timeout.
|
|
196
|
+
- [ ] One-shot: Send streaming flag in `user_introduction`; fallback wait if no streams.
|
|
197
|
+
- [ ] Interactive: Similar extension (focus on one-shot).
|
|
198
|
+
- **Updated Code Suggestion** (Edits only to wsIO.js):
|
|
199
|
+
```javascript
|
|
200
|
+
// In wsIO function (exported)
|
|
201
|
+
async function wsIO(url, secret, content, options = {}) {
|
|
202
|
+
return new Promise((resolve, reject) => {
|
|
203
|
+
const ws = new WebSocket(url + '?wssrc_id=' + Buffer.from(secret).toString('base64'));
|
|
204
|
+
let reqId, streams = [], timeout;
|
|
205
|
+
|
|
206
|
+
ws.on('open', () => {
|
|
207
|
+
// Send introduction with streaming flag
|
|
208
|
+
const intro = { action: 'user_introduction', id: Date.now().toString(), streaming: options.streaming !== false };
|
|
209
|
+
ws.send(JSON.stringify(intro));
|
|
210
|
+
|
|
211
|
+
reqId = Date.now().toString();
|
|
212
|
+
const request = { action: 'user_request', content, id: reqId };
|
|
213
|
+
ws.send(JSON.stringify(request));
|
|
214
|
+
|
|
215
|
+
// Fallback timeout for old servers
|
|
216
|
+
timeout = setTimeout(() => {
|
|
217
|
+
ws.close();
|
|
218
|
+
reject(new Error('Timeout waiting for response'));
|
|
219
|
+
}, 30000);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
ws.on('message', (data) => {
|
|
223
|
+
const msg = JSON.parse(data.toString());
|
|
224
|
+
clearTimeout(timeout);
|
|
225
|
+
|
|
226
|
+
if (msg.action.startsWith('server_') && msg.id === reqId) {
|
|
227
|
+
streams.push(msg);
|
|
228
|
+
// Progressive display
|
|
229
|
+
switch (msg.action) {
|
|
230
|
+
case 'server_reasoning':
|
|
231
|
+
process.stdout.write(`\n🤔 ${msg.content}\n`);
|
|
232
|
+
break;
|
|
233
|
+
case 'server_tool_call':
|
|
234
|
+
console.log(`\n🔧 Tool Call: ${JSON.stringify(msg.content)}\n`);
|
|
235
|
+
break;
|
|
236
|
+
case 'server_tool_response':
|
|
237
|
+
console.log(`\n📥 Tool Response: ${JSON.stringify(msg.content)}\n`);
|
|
238
|
+
break;
|
|
239
|
+
case 'server_tool_error':
|
|
240
|
+
console.log(`\n❌ Tool Error: ${JSON.stringify(msg.content)}\n`);
|
|
241
|
+
break;
|
|
242
|
+
case 'server_log':
|
|
243
|
+
console.log(`\n📝 Log [${msg.content.level}]: ${msg.content.message}\n`);
|
|
244
|
+
break;
|
|
245
|
+
case 'server_memory_update':
|
|
246
|
+
console.log(`\n💾 Memory Update: ${JSON.stringify(msg.content)}\n`);
|
|
247
|
+
break;
|
|
248
|
+
}
|
|
249
|
+
} else if (msg.action === 'server_response' && msg.id === reqId) {
|
|
250
|
+
console.log(`\n✅ Final Response: ${msg.content}\n`);
|
|
251
|
+
streams.push(msg);
|
|
252
|
+
ws.close();
|
|
253
|
+
clearTimeout(timeout);
|
|
254
|
+
resolve({ final: msg.content, streams });
|
|
255
|
+
}
|
|
256
|
+
// Ignore other messages
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
ws.on('error', reject);
|
|
260
|
+
ws.on('close', () => clearTimeout(timeout));
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Export updated function; backward compat: If no streaming, behaves as before (waits for server_response)
|
|
265
|
+
```
|
|
266
|
+
- **Safety/Compat**: Handles old servers (no `server_*` → just final); progressive only if flag sent.
|
|
267
|
+
- **Progress**: [ ]
|
|
268
|
+
|
|
269
|
+
### Step 5: Test End-to-End: Verify Streaming Works for Reasoning, Tools, and Final Response; Ensure Backward Compatibility
|
|
270
|
+
- [ ] Unit: Test edits in isolation (e.g., mock WS in AgentServer.js wrapper; simulate messages in wsIO.js).
|
|
271
|
+
- [ ] Integration:
|
|
272
|
+
- Start server (with edits): `agentDave --serve 8080`.
|
|
273
|
+
- One-shot new: `echo "Search weather" | dave --connect ws://127.0.0.1:8080 --secret 123 --streaming` → See reasoning + tool_call + response.
|
|
274
|
+
- Compat old: Run without `--streaming` or old wsIO.js → Final only, no errors.
|
|
275
|
+
- Agent: Connect `code_agent` → Verify `server_tool_call` for agent_query; agent logs streams if handled.
|
|
276
|
+
- [ ] Edge: Errors, resets (use existing), multi-req (per-ID).
|
|
277
|
+
- [ ] Memory: Confirm existing appends in AgentServer emissions.
|
|
278
|
+
- **Safety/Compat**: Parallel tests: Old CLI unchanged.
|
|
279
|
+
- **Progress**: [ ]
|
|
280
|
+
|
|
281
|
+
### Step 6: Document the Updated Protocol and Add Examples
|
|
282
|
+
- [ ] Update `docs/agent-dave-websocket-protocol.md` (add new ACTIONS, streaming sequences).
|
|
283
|
+
- [ ] Examples: CLI streamed output; code snippets from above (label as edits to three files).
|
|
284
|
+
- [ ] Scenarios: Add `scenarios/streaming-test.js` (using edited wsIO).
|
|
285
|
+
- [ ] Changelog: v0.1.1 note.
|
|
286
|
+
- [ ] TODO: Mark complete.
|
|
287
|
+
- **Safety/Compat**: Docs note constraints/opt-in.
|
|
288
|
+
- **Progress**: [ ]
|
|
289
|
+
|
|
290
|
+
## Integration with Mandatory Memory Protocol
|
|
291
|
+
|
|
292
|
+
- Leverage existing hooks in `AgentServer.js`: During `server_*` emissions, append to session (e.g., `this.session.append(...)` if already implemented; no Session.js changes).
|
|
293
|
+
- Streams as "system" messages in context; summarize for limits via existing logic.
|
|
294
|
+
- Reset: Existing `user_reset` clears; emit `server_log` in AgentServer.
|
|
295
|
+
- Persistence: Existing session files include streams via appends.
|
|
296
|
+
|
|
297
|
+
## Updated Code Structure Suggestions
|
|
298
|
+
|
|
299
|
+
- **Event Flow**: Existing Prompt → (Wrapped in AgentServer.js) → Real-time WS emissions → wsIO.js display.
|
|
300
|
+
- **No New Modules**: All in three files.
|
|
301
|
+
- **Types**: If `types/` updated, add interfaces (but no constraint violation).
|
|
302
|
+
- **Config**: Add `--streaming` to CLI parsing in bin/dave.js (but focus on wsIO).
|
|
303
|
+
- **Error Handling**: Try-catch in wrappers/emissions.
|
|
304
|
+
|
|
305
|
+
## Risks & Mitigations
|
|
306
|
+
|
|
307
|
+
- **Interception Limits**: If Prompt lacks events, simulation in wrapper may be approximate → Mitigate: Focus on tool streams (reliable via existing handlers).
|
|
308
|
+
- **Performance**: Non-blocking emits.
|
|
309
|
+
- **Security/Compat**: As before; test thoroughly.
|
|
310
|
+
|
|
311
|
+
## Progress Tracking
|
|
312
|
+
|
|
313
|
+
- Overall: [x] 1/6 steps complete.
|
|
314
|
+
- Blocker: None.
|
|
315
|
+
- Next: Step 2 implementation in AgentServer.js.
|
|
316
|
+
|
|
317
|
+
*Updated by todo_agent.js on 2026-04-25. For updates, reference TODO.md.*
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Task Clarification and Documentation First
|
|
2
|
+
|
|
3
|
+
**Core Principle**: For ANY task or problem (programming or otherwise), NEVER jump directly into execution. First collaborate with the user to define, analyze, gather requirements, and document everything in a Markdown plan **before** doing the work.
|
|
4
|
+
|
|
5
|
+
## Instructions for the Assistant / LLM / Agent
|
|
6
|
+
|
|
7
|
+
1. **Clarify & Define**
|
|
8
|
+
Work with the user to create a clear, unambiguous task definition. Ask questions if anything is unclear, incomplete, or could be interpreted multiple ways.
|
|
9
|
+
|
|
10
|
+
2. **Gather & Analyze**
|
|
11
|
+
Collect all requirements, constraints, wishes, context, and edge cases. Analyze the problem thoroughly. Identify risks, dependencies, and success criteria.
|
|
12
|
+
|
|
13
|
+
3. **Document First**
|
|
14
|
+
Create or update a dedicated Markdown document in `docs/plans/` that captures:
|
|
15
|
+
- Task description
|
|
16
|
+
- Requirements & constraints
|
|
17
|
+
- Analysis
|
|
18
|
+
- Proposed approach / plan
|
|
19
|
+
- Breakdown into smaller steps (if the task is large)
|
|
20
|
+
|
|
21
|
+
Only after the user reviews and approves this document do you proceed to implementation or execution.
|
|
22
|
+
|
|
23
|
+
4. **Break Down Large Tasks**
|
|
24
|
+
If a task is very large or complex, divide it into multiple smaller, independent subtasks. Handle and document one at a time. Update the main plan as you go.
|
|
25
|
+
|
|
26
|
+
5. **General Rules**
|
|
27
|
+
- Be token-efficient in responses.
|
|
28
|
+
- Stay strictly within any file or scope constraints the user specifies.
|
|
29
|
+
- Use tools (memory_agent, todo_agent, readme_agent, etc.) to track tasks and documentation.
|
|
30
|
+
- Always recall relevant memories first and store updates at the end of each interaction.
|
|
31
|
+
- Prioritize clarity and traceability over speed.
|
|
32
|
+
|
|
33
|
+
This method ensures high-quality outcomes, reduces errors from unclear requirements, and creates reusable documentation.
|
|
34
|
+
|
|
35
|
+
**Prompt Version**: 1.0 (2026-04-25)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/API/minimax/ImageToolset.js
|
|
3
|
+
* @module minimax/ImageToolset
|
|
4
|
+
* @description Comprehensive ToolSet for the Minimax Image Generation API.
|
|
5
|
+
*
|
|
6
|
+
* This ToolSet exposes the full Minimax Image API with every available option
|
|
7
|
+
* and detailed return values, following the same pattern as lib/genericToolset.js
|
|
8
|
+
* and MusicToolset.js.
|
|
9
|
+
*
|
|
10
|
+
* It is designed to be used directly by AI agents or merged into larger toolsets.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* import imageToolset from './lib/API/minimax/ImageToolset.js';
|
|
14
|
+
*
|
|
15
|
+
* // Use in an agent:
|
|
16
|
+
* const toolset = agent.getToolset();
|
|
17
|
+
* toolset.merge(imageToolset);
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import ToolSet from '../../ToolSet.js';
|
|
21
|
+
import * as minimax from './image.js';
|
|
22
|
+
|
|
23
|
+
const tools = new ToolSet('auto');
|
|
24
|
+
|
|
25
|
+
/* ============================================================
|
|
26
|
+
CORE IMAGE GENERATION (Text-to-Image + Image-to-Image)
|
|
27
|
+
============================================================ */
|
|
28
|
+
|
|
29
|
+
tools.add(
|
|
30
|
+
'generate_image',
|
|
31
|
+
'Generate images using the Minimax Image Generation API. ' +
|
|
32
|
+
'Supports both Text-to-Image and Image-to-Image (via subject_reference). ' +
|
|
33
|
+
'All generated images are automatically saved locally in .cache/minimax/.',
|
|
34
|
+
{
|
|
35
|
+
type: 'object',
|
|
36
|
+
properties: {
|
|
37
|
+
prompt: {
|
|
38
|
+
type: 'string',
|
|
39
|
+
description: 'Text description of the image, max 1500 characters. ' +
|
|
40
|
+
'Example: "A serene mountain landscape at sunset, photorealistic, cinematic lighting"'
|
|
41
|
+
},
|
|
42
|
+
model: {
|
|
43
|
+
type: 'string',
|
|
44
|
+
enum: ['image-01', 'image-01-live'],
|
|
45
|
+
default: 'image-01',
|
|
46
|
+
description: 'Model to use. "image-01" = standard (text-to-image and img2img). ' +
|
|
47
|
+
'"image-01-live" = optimized for image-to-image.'
|
|
48
|
+
},
|
|
49
|
+
aspect_ratio: {
|
|
50
|
+
type: 'string',
|
|
51
|
+
enum: ['1:1', '16:9', '4:3', '3:2', '2:3', '3:4', '9:16', '21:9'],
|
|
52
|
+
default: '1:1',
|
|
53
|
+
description: 'Image aspect ratio. Default "1:1" (1024x1024).'
|
|
54
|
+
},
|
|
55
|
+
width: {
|
|
56
|
+
type: 'integer',
|
|
57
|
+
description: 'Image width in pixels (512-2048, must be divisible by 8). ' +
|
|
58
|
+
'Only effective for model "image-01". aspect_ratio takes priority if both are provided.'
|
|
59
|
+
},
|
|
60
|
+
height: {
|
|
61
|
+
type: 'integer',
|
|
62
|
+
description: 'Image height in pixels (same rules as width).'
|
|
63
|
+
},
|
|
64
|
+
response_format: {
|
|
65
|
+
type: 'string',
|
|
66
|
+
enum: ['url', 'base64'],
|
|
67
|
+
default: 'url',
|
|
68
|
+
description: 'Output format. "url" returns temporary signed links (expire after 24h). ' +
|
|
69
|
+
'"base64" returns raw base64 data. Default is "url" (user preference).'
|
|
70
|
+
},
|
|
71
|
+
seed: {
|
|
72
|
+
type: 'integer',
|
|
73
|
+
description: 'Random seed for reproducible results. Omit for random seed per image.'
|
|
74
|
+
},
|
|
75
|
+
n: {
|
|
76
|
+
type: 'integer',
|
|
77
|
+
minimum: 1,
|
|
78
|
+
maximum: 9,
|
|
79
|
+
default: 1,
|
|
80
|
+
description: 'Number of images to generate per request (1-9).'
|
|
81
|
+
},
|
|
82
|
+
prompt_optimizer: {
|
|
83
|
+
type: 'boolean',
|
|
84
|
+
default: false,
|
|
85
|
+
description: 'Enable automatic prompt optimization.'
|
|
86
|
+
},
|
|
87
|
+
subject_reference: {
|
|
88
|
+
type: 'array',
|
|
89
|
+
items: {
|
|
90
|
+
type: 'object',
|
|
91
|
+
properties: {
|
|
92
|
+
type: { type: 'string', enum: ['character'], description: 'Subject type. Currently only "character" supported.' },
|
|
93
|
+
image_file: {
|
|
94
|
+
type: 'string',
|
|
95
|
+
description: 'Reference image URL or Base64 data URL (data:image/jpeg;base64,...). ' +
|
|
96
|
+
'Best results with front-facing portrait photos (JPG/PNG < 10MB).'
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
required: ['type', 'image_file']
|
|
100
|
+
},
|
|
101
|
+
description: 'For Image-to-Image generation. Array of subject references (currently supports character portraits).'
|
|
102
|
+
},
|
|
103
|
+
extra: {
|
|
104
|
+
type: 'object',
|
|
105
|
+
description: 'Additional parameters not yet documented.'
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
required: ['prompt']
|
|
109
|
+
},
|
|
110
|
+
async (params) => {
|
|
111
|
+
const result = await minimax.requestImage(params.prompt, params);
|
|
112
|
+
|
|
113
|
+
return JSON.stringify({
|
|
114
|
+
image_urls: result.image_urls,
|
|
115
|
+
image_base64: result.image_base64,
|
|
116
|
+
local_paths: result.local_paths,
|
|
117
|
+
metadata: result.metadata,
|
|
118
|
+
trace_id: result.id,
|
|
119
|
+
duration_ms: result.duration,
|
|
120
|
+
raw_response: result.raw,
|
|
121
|
+
note: 'Images have been automatically saved to local_paths. ' +
|
|
122
|
+
'Use response_format="base64" if you need the raw data instead of URLs.'
|
|
123
|
+
}, null, 2);
|
|
124
|
+
}
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
/* ============================================================
|
|
128
|
+
HELPER: DIRECT LOCAL SAVE
|
|
129
|
+
============================================================ */
|
|
130
|
+
|
|
131
|
+
tools.add(
|
|
132
|
+
'save_image_to_local',
|
|
133
|
+
'Save image data (URL or base64) to a local file in .cache/minimax/. ' +
|
|
134
|
+
'Useful when you already have image data from another source or previous generation.',
|
|
135
|
+
{
|
|
136
|
+
type: 'object',
|
|
137
|
+
properties: {
|
|
138
|
+
image_data: {
|
|
139
|
+
type: 'string',
|
|
140
|
+
description: 'Either a URL or a base64-encoded image string (with or without data: prefix).'
|
|
141
|
+
},
|
|
142
|
+
filename_prefix: {
|
|
143
|
+
type: 'string',
|
|
144
|
+
default: 'minimax-image',
|
|
145
|
+
description: 'Prefix for the generated filename.'
|
|
146
|
+
},
|
|
147
|
+
index: {
|
|
148
|
+
type: 'integer',
|
|
149
|
+
default: 0,
|
|
150
|
+
description: 'Index suffix for multiple images (avoids filename collisions).'
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
required: ['image_data']
|
|
154
|
+
},
|
|
155
|
+
async (params) => {
|
|
156
|
+
const localPath = await minimax.saveImageToLocal(
|
|
157
|
+
params.image_data,
|
|
158
|
+
params.filename_prefix,
|
|
159
|
+
params.index ?? 0
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
return JSON.stringify({
|
|
163
|
+
local_path: localPath,
|
|
164
|
+
note: 'File saved successfully.'
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
export default tools;
|