@sesamespace/hivemind 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/{chunk-YDD5EZ46.js → chunk-ELFD4Y4W.js} +2 -2
  2. package/dist/{chunk-GOW62FNS.js → chunk-ERR5JR42.js} +2 -2
  3. package/dist/{chunk-R6XIZH3I.js → chunk-TL4GV2TJ.js} +3 -3
  4. package/dist/{chunk-LDTBAMQY.js → chunk-WAX2THXK.js} +2 -2
  5. package/dist/{chunk-A7X4FKQZ.js → chunk-WSLVHVNP.js} +3 -2
  6. package/dist/chunk-WSLVHVNP.js.map +1 -0
  7. package/dist/commands/fleet.js +3 -3
  8. package/dist/commands/start.js +3 -3
  9. package/dist/commands/watchdog.js +3 -3
  10. package/dist/index.js +2 -2
  11. package/dist/main.js +5 -5
  12. package/dist/start.js +1 -1
  13. package/install.sh +120 -0
  14. package/package.json +25 -22
  15. package/packages/memory/Cargo.lock +6480 -0
  16. package/packages/memory/Cargo.toml +21 -0
  17. package/packages/memory/src/src/context.rs +179 -0
  18. package/packages/memory/src/src/embeddings.rs +51 -0
  19. package/packages/memory/src/src/main.rs +626 -0
  20. package/packages/memory/src/src/promotion.rs +637 -0
  21. package/packages/memory/src/src/scoring.rs +131 -0
  22. package/packages/memory/src/src/store.rs +460 -0
  23. package/packages/memory/src/src/tasks.rs +321 -0
  24. package/.pnpmrc.json +0 -1
  25. package/DASHBOARD-PLAN.md +0 -206
  26. package/TOOL-USE-DESIGN.md +0 -173
  27. package/dist/chunk-A7X4FKQZ.js.map +0 -1
  28. package/docs/TOOL-PARITY-PLAN.md +0 -191
  29. /package/dist/{chunk-YDD5EZ46.js.map → chunk-ELFD4Y4W.js.map} +0 -0
  30. /package/dist/{chunk-GOW62FNS.js.map → chunk-ERR5JR42.js.map} +0 -0
  31. /package/dist/{chunk-R6XIZH3I.js.map → chunk-TL4GV2TJ.js.map} +0 -0
  32. /package/dist/{chunk-LDTBAMQY.js.map → chunk-WAX2THXK.js.map} +0 -0
@@ -0,0 +1,321 @@
1
+ use anyhow::Result;
2
+ use arrow_array::{RecordBatch, RecordBatchIterator, StringArray};
3
+ use arrow_schema::{DataType, Field, Schema};
4
+ use chrono::Utc;
5
+ use futures::stream::TryStreamExt;
6
+ use lancedb::{connection::Connection, query::ExecutableQuery, query::QueryBase, Table};
7
+ use serde::{Deserialize, Serialize};
8
+ use std::sync::Arc;
9
+
10
+ const TASKS_TABLE: &str = "tasks";
11
+
12
+ #[derive(Debug, Serialize, Deserialize, Clone)]
13
+ pub struct TaskRecord {
14
+ pub id: String,
15
+ pub context_name: String,
16
+ pub title: String,
17
+ pub description: String,
18
+ pub status: String,
19
+ pub blocked_by: String, // JSON array stored as string
20
+ pub created_at: String,
21
+ pub updated_at: String,
22
+ }
23
+
24
+ #[derive(Debug, Deserialize)]
25
+ pub struct TaskInput {
26
+ pub context_name: String,
27
+ pub title: String,
28
+ pub description: String,
29
+ #[serde(default = "default_status")]
30
+ pub status: String,
31
+ #[serde(default)]
32
+ pub blocked_by: Vec<String>,
33
+ }
34
+
35
+ fn default_status() -> String {
36
+ "planned".to_string()
37
+ }
38
+
39
+ #[derive(Debug, Deserialize)]
40
+ pub struct TaskUpdate {
41
+ pub status: Option<String>,
42
+ pub title: Option<String>,
43
+ pub description: Option<String>,
44
+ pub blocked_by: Option<Vec<String>>,
45
+ }
46
+
47
+ pub struct TaskStore {
48
+ db: Connection,
49
+ }
50
+
51
+ impl TaskStore {
52
+ pub async fn new(db: Connection) -> Result<Self> {
53
+ let store = Self { db };
54
+ store.ensure_table().await?;
55
+ Ok(store)
56
+ }
57
+
58
+ fn schema() -> Arc<Schema> {
59
+ Arc::new(Schema::new(vec![
60
+ Field::new("id", DataType::Utf8, false),
61
+ Field::new("context_name", DataType::Utf8, false),
62
+ Field::new("title", DataType::Utf8, false),
63
+ Field::new("description", DataType::Utf8, false),
64
+ Field::new("status", DataType::Utf8, false),
65
+ Field::new("blocked_by", DataType::Utf8, false),
66
+ Field::new("created_at", DataType::Utf8, false),
67
+ Field::new("updated_at", DataType::Utf8, false),
68
+ ]))
69
+ }
70
+
71
+ async fn ensure_table(&self) -> Result<()> {
72
+ let names = self.db.table_names().execute().await?;
73
+ if !names.contains(&TASKS_TABLE.to_string()) {
74
+ let schema = Self::schema();
75
+ let batch = RecordBatch::new_empty(schema.clone());
76
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
77
+ self.db
78
+ .create_table(TASKS_TABLE, Box::new(batches))
79
+ .execute()
80
+ .await?;
81
+ tracing::info!("Created tasks table");
82
+ }
83
+ Ok(())
84
+ }
85
+
86
+ pub async fn create_task(&self, input: TaskInput) -> Result<TaskRecord> {
87
+ let id = uuid::Uuid::new_v4().to_string();
88
+ let now = Utc::now().to_rfc3339();
89
+ let blocked_by_json = serde_json::to_string(&input.blocked_by)?;
90
+
91
+ let task = TaskRecord {
92
+ id: id.clone(),
93
+ context_name: input.context_name.clone(),
94
+ title: input.title.clone(),
95
+ description: input.description.clone(),
96
+ status: input.status,
97
+ blocked_by: blocked_by_json.clone(),
98
+ created_at: now.clone(),
99
+ updated_at: now.clone(),
100
+ };
101
+
102
+ let schema = Self::schema();
103
+ let batch = RecordBatch::try_new(
104
+ schema.clone(),
105
+ vec![
106
+ Arc::new(StringArray::from(vec![task.id.as_str()])),
107
+ Arc::new(StringArray::from(vec![task.context_name.as_str()])),
108
+ Arc::new(StringArray::from(vec![task.title.as_str()])),
109
+ Arc::new(StringArray::from(vec![task.description.as_str()])),
110
+ Arc::new(StringArray::from(vec![task.status.as_str()])),
111
+ Arc::new(StringArray::from(vec![blocked_by_json.as_str()])),
112
+ Arc::new(StringArray::from(vec![task.created_at.as_str()])),
113
+ Arc::new(StringArray::from(vec![task.updated_at.as_str()])),
114
+ ],
115
+ )?;
116
+
117
+ let table = self.db.open_table(TASKS_TABLE).execute().await?;
118
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
119
+ table.add(Box::new(batches)).execute().await?;
120
+
121
+ tracing::debug!("Created task {} in context {}", task.id, task.context_name);
122
+ Ok(task)
123
+ }
124
+
125
+ pub async fn list_tasks(
126
+ &self,
127
+ context: &str,
128
+ status_filter: Option<&str>,
129
+ ) -> Result<Vec<TaskRecord>> {
130
+ let table = self.db.open_table(TASKS_TABLE).execute().await?;
131
+
132
+ let filter = match status_filter {
133
+ Some(status) => format!("context_name = '{}' AND status = '{}'", context, status),
134
+ None => format!("context_name = '{}'", context),
135
+ };
136
+
137
+ let results = table.query().only_if(filter).execute().await?;
138
+
139
+ let mut tasks = Vec::new();
140
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
141
+
142
+ for batch in &batches {
143
+ let ids = batch
144
+ .column_by_name("id")
145
+ .unwrap()
146
+ .as_any()
147
+ .downcast_ref::<StringArray>()
148
+ .unwrap();
149
+ let ctx_names = batch
150
+ .column_by_name("context_name")
151
+ .unwrap()
152
+ .as_any()
153
+ .downcast_ref::<StringArray>()
154
+ .unwrap();
155
+ let titles = batch
156
+ .column_by_name("title")
157
+ .unwrap()
158
+ .as_any()
159
+ .downcast_ref::<StringArray>()
160
+ .unwrap();
161
+ let descriptions = batch
162
+ .column_by_name("description")
163
+ .unwrap()
164
+ .as_any()
165
+ .downcast_ref::<StringArray>()
166
+ .unwrap();
167
+ let statuses = batch
168
+ .column_by_name("status")
169
+ .unwrap()
170
+ .as_any()
171
+ .downcast_ref::<StringArray>()
172
+ .unwrap();
173
+ let blocked_bys = batch
174
+ .column_by_name("blocked_by")
175
+ .unwrap()
176
+ .as_any()
177
+ .downcast_ref::<StringArray>()
178
+ .unwrap();
179
+ let created_ats = batch
180
+ .column_by_name("created_at")
181
+ .unwrap()
182
+ .as_any()
183
+ .downcast_ref::<StringArray>()
184
+ .unwrap();
185
+ let updated_ats = batch
186
+ .column_by_name("updated_at")
187
+ .unwrap()
188
+ .as_any()
189
+ .downcast_ref::<StringArray>()
190
+ .unwrap();
191
+
192
+ for i in 0..batch.num_rows() {
193
+ tasks.push(TaskRecord {
194
+ id: ids.value(i).to_string(),
195
+ context_name: ctx_names.value(i).to_string(),
196
+ title: titles.value(i).to_string(),
197
+ description: descriptions.value(i).to_string(),
198
+ status: statuses.value(i).to_string(),
199
+ blocked_by: blocked_bys.value(i).to_string(),
200
+ created_at: created_ats.value(i).to_string(),
201
+ updated_at: updated_ats.value(i).to_string(),
202
+ });
203
+ }
204
+ }
205
+
206
+ // Sort by created_at
207
+ tasks.sort_by(|a, b| a.created_at.cmp(&b.created_at));
208
+
209
+ Ok(tasks)
210
+ }
211
+
212
+ pub async fn get_task(&self, id: &str) -> Result<Option<TaskRecord>> {
213
+ let table = self.db.open_table(TASKS_TABLE).execute().await?;
214
+ let results = table
215
+ .query()
216
+ .only_if(format!("id = '{}'", id))
217
+ .execute()
218
+ .await?;
219
+
220
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
221
+ for batch in &batches {
222
+ if batch.num_rows() > 0 {
223
+ let ids = batch.column_by_name("id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
224
+ let ctx_names = batch.column_by_name("context_name").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
225
+ let titles = batch.column_by_name("title").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
226
+ let descriptions = batch.column_by_name("description").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
227
+ let statuses = batch.column_by_name("status").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
228
+ let blocked_bys = batch.column_by_name("blocked_by").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
229
+ let created_ats = batch.column_by_name("created_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
230
+ let updated_ats = batch.column_by_name("updated_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
231
+
232
+ return Ok(Some(TaskRecord {
233
+ id: ids.value(0).to_string(),
234
+ context_name: ctx_names.value(0).to_string(),
235
+ title: titles.value(0).to_string(),
236
+ description: descriptions.value(0).to_string(),
237
+ status: statuses.value(0).to_string(),
238
+ blocked_by: blocked_bys.value(0).to_string(),
239
+ created_at: created_ats.value(0).to_string(),
240
+ updated_at: updated_ats.value(0).to_string(),
241
+ }));
242
+ }
243
+ }
244
+
245
+ Ok(None)
246
+ }
247
+
248
+ pub async fn update_task(&self, id: &str, update: TaskUpdate) -> Result<Option<TaskRecord>> {
249
+ let existing = self.get_task(id).await?;
250
+ let Some(mut task) = existing else {
251
+ return Ok(None);
252
+ };
253
+
254
+ // Apply updates
255
+ if let Some(status) = update.status {
256
+ task.status = status;
257
+ }
258
+ if let Some(title) = update.title {
259
+ task.title = title;
260
+ }
261
+ if let Some(description) = update.description {
262
+ task.description = description;
263
+ }
264
+ if let Some(blocked_by) = update.blocked_by {
265
+ task.blocked_by = serde_json::to_string(&blocked_by)?;
266
+ }
267
+ task.updated_at = Utc::now().to_rfc3339();
268
+
269
+ // Delete and reinsert
270
+ let table = self.db.open_table(TASKS_TABLE).execute().await?;
271
+ table.delete(&format!("id = '{}'", id)).await?;
272
+
273
+ let schema = Self::schema();
274
+ let batch = RecordBatch::try_new(
275
+ schema.clone(),
276
+ vec![
277
+ Arc::new(StringArray::from(vec![task.id.as_str()])),
278
+ Arc::new(StringArray::from(vec![task.context_name.as_str()])),
279
+ Arc::new(StringArray::from(vec![task.title.as_str()])),
280
+ Arc::new(StringArray::from(vec![task.description.as_str()])),
281
+ Arc::new(StringArray::from(vec![task.status.as_str()])),
282
+ Arc::new(StringArray::from(vec![task.blocked_by.as_str()])),
283
+ Arc::new(StringArray::from(vec![task.created_at.as_str()])),
284
+ Arc::new(StringArray::from(vec![task.updated_at.as_str()])),
285
+ ],
286
+ )?;
287
+
288
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
289
+ table.add(Box::new(batches)).execute().await?;
290
+
291
+ Ok(Some(task))
292
+ }
293
+
294
+ /// Get the next available task for a context:
295
+ /// - Status is "planned" (not active/complete/archived)
296
+ /// - Not blocked by any incomplete tasks
297
+ pub async fn get_next_task(&self, context: &str) -> Result<Option<TaskRecord>> {
298
+ let planned = self.list_tasks(context, Some("planned")).await?;
299
+ let all_tasks = self.list_tasks(context, None).await?;
300
+
301
+ // Build a set of complete task IDs
302
+ let complete_ids: std::collections::HashSet<String> = all_tasks
303
+ .iter()
304
+ .filter(|t| t.status == "complete" || t.status == "archived")
305
+ .map(|t| t.id.clone())
306
+ .collect();
307
+
308
+ for task in planned {
309
+ let blocked_by: Vec<String> =
310
+ serde_json::from_str(&task.blocked_by).unwrap_or_default();
311
+
312
+ // Task is available if all blockers are complete
313
+ let is_blocked = blocked_by.iter().any(|b| !complete_ids.contains(b));
314
+ if !is_blocked {
315
+ return Ok(Some(task));
316
+ }
317
+ }
318
+
319
+ Ok(None)
320
+ }
321
+ }
package/.pnpmrc.json DELETED
@@ -1 +0,0 @@
1
- {"onlyBuiltDependencies":["better-sqlite3"]}
package/DASHBOARD-PLAN.md DELETED
@@ -1,206 +0,0 @@
1
- # Hivemind Dashboard — Implementation Plan
2
-
3
- **Goal:** Local web dashboard for debugging memory, context routing, and LLM request formation.
4
- **Access:** `http://localhost:9485` on the Mac mini (local access only for now).
5
- **Priority:** LLM Request Inspector first, then Memory Browser, then Context Overview.
6
-
7
- ---
8
-
9
- ## Phase 1: LLM Request Logger + Inspector UI
10
-
11
- ### Backend: Request Logging
12
-
13
- **Where:** Instrument `buildMessages()` in `prompt.ts` and `processMessage()` in `agent.ts`.
14
-
15
- Each logged request captures:
16
- ```typescript
17
- interface RequestLog {
18
- id: string; // uuid
19
- timestamp: string; // ISO-8601
20
- // Routing
21
- context: string; // which context was used
22
- contextSwitched: boolean; // explicit switch?
23
- routingReason: string; // "pattern_match:X" | "inferred:X" | "active:X"
24
- // Sender
25
- channelId: string;
26
- channelKind: "dm" | "group";
27
- senderHandle: string;
28
- rawMessage: string; // as received (with prefix)
29
- // Prompt components (broken out for UI)
30
- systemPrompt: {
31
- identity: string; // workspace files section
32
- l3Knowledge: string[]; // individual L3 entries
33
- l2Episodes: Array<{
34
- id: string;
35
- content: string;
36
- score: number;
37
- timestamp: string;
38
- context_name: string;
39
- role: string;
40
- }>;
41
- contextInfo: string; // active context section
42
- fullText: string; // complete system prompt as sent
43
- };
44
- conversationHistory: Array<{ role: string; content: string }>; // L1 turns included
45
- userMessage: string; // final user message
46
- // Response
47
- response: {
48
- content: string;
49
- model: string;
50
- latencyMs: number;
51
- skipped: boolean; // was it __SKIP__?
52
- };
53
- // Config snapshot
54
- config: {
55
- topK: number;
56
- model: string;
57
- maxTokens: number;
58
- temperature: number;
59
- };
60
- // Approximate token counts (char-based estimate: chars/4)
61
- tokenEstimates: {
62
- systemPrompt: number;
63
- conversationHistory: number;
64
- userMessage: number;
65
- total: number;
66
- };
67
- }
68
- ```
69
-
70
- **Storage:** SQLite database at `data/dashboard.db`.
71
- - Single `request_logs` table with JSON columns for complex fields.
72
- - Auto-prune: keep last 7 days or 10,000 entries (whichever is smaller).
73
- - Why SQLite over ring buffer: survives restarts, queryable, minimal overhead.
74
-
75
- **Token estimation:** Use chars/4 approximation. Good enough for relative sizing. Avoid tokenizer dependency.
76
-
77
- **Logging approach:** Eager logging. Serialize at request time. The overhead is minimal (~1ms for JSON.stringify) compared to LLM latency (~1-10s). Capturing the exact state at request time is more valuable than lazy reconstruction.
78
-
79
- ### Backend: Dashboard HTTP Server
80
-
81
- **Where:** New file `packages/runtime/src/dashboard.ts`.
82
-
83
- Extend the existing health server (or create a sibling on port 9485):
84
- - `GET /` — serve the SPA (single HTML file)
85
- - `GET /api/requests` — list recent requests (paginated, filterable)
86
- - `GET /api/requests/:id` — single request detail
87
- - `GET /api/contexts` — proxy to memory daemon's context list
88
- - `GET /api/contexts/:name/episodes` — proxy L2 episodes
89
- - `GET /api/contexts/:name/l3` — proxy L3 knowledge
90
- - `GET /api/stats` — memory stats (episode counts, last promotion, etc.)
91
- - `DELETE /api/l3/:id` — delete a bad L3 entry (write op from day 1)
92
- - `POST /api/l3/:id/edit` — edit L3 entry content
93
-
94
- Bind to `127.0.0.1:9485` only.
95
-
96
- ### Frontend: Single-File SPA
97
-
98
- **Why single file:** No build step, no React, no dependencies. Ship as one HTML file with embedded CSS/JS. Can always upgrade later.
99
-
100
- **Layout:**
101
- - Left sidebar: navigation (Requests, Memory, Contexts)
102
- - Main area: content
103
-
104
- **Request Inspector view:**
105
- - Reverse-chronological list of requests
106
- - Each row: timestamp, sender, context, model, latency, token estimate
107
- - Click to expand → shows all sections:
108
- - **Identity files** (collapsible, usually not interesting)
109
- - **L3 Knowledge** (list of entries with metadata)
110
- - **L2 Episodes** (with similarity scores, timestamps, source context)
111
- - **L1 History** (conversation turns)
112
- - **User Message** (raw with prefix)
113
- - **Response** (with model, latency)
114
- - **Config** (top_k, model, temperature)
115
- - **Token breakdown** (bar chart showing proportion per section)
116
- - Filters: by context, by sender, by time range
117
- - Search: full-text search across messages
118
-
119
- **Memory Browser view (Phase 2):**
120
- - L2: searchable episode list, filterable by context/role/time
121
- - L3: per-context knowledge entries with edit/delete buttons
122
- - Promotion log (if we add logging for it)
123
-
124
- **Context Overview (Phase 2):**
125
- - List of contexts with episode counts, last active
126
- - Active context highlighted
127
- - Click to drill into episodes/L3
128
-
129
- ---
130
-
131
- ## Phase 2: Memory Browser + Context Overview
132
-
133
- After Phase 1 is working and useful, add:
134
- - Full L2 browsing with semantic search UI
135
- - L3 management (view, edit, delete)
136
- - Context explorer with stats
137
- - Promotion history logging
138
-
139
- ---
140
-
141
- ## Implementation Steps (Phase 1)
142
-
143
- ### Step 1: Request logging infrastructure
144
- - [ ] Create `packages/runtime/src/request-logger.ts`
145
- - SQLite setup (using better-sqlite3)
146
- - `logRequest()` method
147
- - `getRequests()` with pagination/filters
148
- - `getRequest(id)` for detail view
149
- - Auto-pruning on startup
150
- - [ ] Add better-sqlite3 dependency
151
-
152
- ### Step 2: Instrument the pipeline
153
- - [ ] Modify `agent.ts` `processMessage()` to capture routing decision + timing
154
- - [ ] Modify `prompt.ts` `buildSystemPrompt()` to return structured components (not just string)
155
- - [ ] Log each request after LLM response arrives
156
- - [ ] Capture config snapshot with each log entry
157
-
158
- ### Step 3: Dashboard HTTP server
159
- - [ ] Create `packages/runtime/src/dashboard.ts`
160
- - Express-free: use Node's built-in `http` module (like health server)
161
- - Serve SPA at `/`
162
- - JSON APIs for request logs and memory proxy
163
- - [ ] Wire into `pipeline.ts` startup
164
-
165
- ### Step 4: Frontend SPA
166
- - [ ] Single HTML file at `packages/runtime/src/dashboard.html`
167
- - Vanilla JS, no framework
168
- - CSS grid layout
169
- - Fetch-based API calls
170
- - Expandable request cards
171
- - Token breakdown visualization
172
- - Basic filtering
173
-
174
- ### Step 5: Memory proxy + write ops
175
- - [ ] Proxy endpoints to memory daemon for L2/L3 browsing
176
- - [ ] DELETE/PATCH endpoints for L3 management
177
-
178
- ---
179
-
180
- ## Design Decisions
181
-
182
- | Question | Decision | Rationale |
183
- |----------|----------|-----------|
184
- | Storage | SQLite | Survives restarts, queryable, lightweight |
185
- | Token counting | chars/4 estimate | Good enough, no tokenizer dep |
186
- | Logging | Eager | Captures exact state, overhead negligible vs LLM latency |
187
- | Bind address | 127.0.0.1 only | Local access, no auth needed |
188
- | Framework | None (vanilla) | Single HTML file, no build step |
189
- | Read-only or read-write? | Read-write from start | Ryan will want to delete bad L3 entries immediately |
190
- | Persist request logs? | Yes, 7 days | Need to compare across memory config changes |
191
- | Multi-agent? | Single agent for now | Don't over-engineer, but use agent name in logs |
192
- | Port | 9485 | Next to health port (9484), easy to remember |
193
-
194
- ---
195
-
196
- ## Sesame Command Fix (Bonus)
197
-
198
- While we're in the code, fix the sender prefix issue:
199
- - In `pipeline.ts` `startSesameLoop()`, before calling `agent.processMessage()`, strip the sender prefix for command parsing
200
- - Or better: in `agent.ts` `handleSpecialCommand()`, strip known prefix patterns before regex matching
201
- - This unblocks context switching, task commands, and cross-context search over Sesame
202
-
203
- ---
204
-
205
- *Created: 2026-02-28*
206
- *Status: Ready to implement*
@@ -1,173 +0,0 @@
1
- # Hivemind Tool Use — Architecture Design
2
-
3
- ## Current State
4
-
5
- The LLM client does simple chat completions: `messages[] → response.content`. No tool/function calling.
6
-
7
- ## Goal
8
-
9
- Full agentic tool-use loop matching OpenClaw capabilities, with Hivemind's memory system as a differentiator.
10
-
11
- ## Architecture
12
-
13
- ### 1. Tool Calling Protocol (OpenAI-compatible, works with OpenRouter)
14
-
15
- The OpenAI chat completions API supports `tools` (function definitions) and `tool_choice`. When the model wants to use a tool, it returns a `tool_calls` array instead of (or alongside) content. We then execute the tool, append the result as a `tool` role message, and call the model again.
16
-
17
- ```
18
- User message
19
-
20
- LLM (with tools defined)
21
-
22
- If tool_calls → execute tools → append results → call LLM again (loop)
23
- If content only → return response
24
- ```
25
-
26
- This is a **while loop**, not a single call. The model may chain multiple tool calls before producing a final text response.
27
-
28
- ### 2. Key Data Structures
29
-
30
- ```typescript
31
- interface ToolDefinition {
32
- name: string;
33
- description: string;
34
- parameters: JSONSchema; // JSON Schema for function params
35
- }
36
-
37
- interface ToolCall {
38
- id: string;
39
- type: "function";
40
- function: { name: string; arguments: string }; // arguments is JSON string
41
- }
42
-
43
- interface ToolResult {
44
- tool_call_id: string;
45
- role: "tool";
46
- content: string; // result as string
47
- }
48
-
49
- // Extended message types
50
- interface AssistantMessage {
51
- role: "assistant";
52
- content: string | null;
53
- tool_calls?: ToolCall[];
54
- }
55
-
56
- interface ToolMessage {
57
- role: "tool";
58
- tool_call_id: string;
59
- content: string;
60
- }
61
- ```
62
-
63
- ### 3. Tool Registry
64
-
65
- A simple registry where tools are registered with:
66
- - Name
67
- - Description (for the LLM)
68
- - JSON Schema for parameters
69
- - Executor function: `(params: any) => Promise<string>`
70
-
71
- ```typescript
72
- class ToolRegistry {
73
- private tools: Map<string, { def: ToolDefinition; exec: (params: any) => Promise<string> }>;
74
-
75
- register(name, description, schema, executor): void;
76
- getDefinitions(): ToolDefinition[]; // For LLM API call
77
- execute(name: string, params: any): Promise<string>; // Run a tool
78
- }
79
- ```
80
-
81
- ### 4. The Agentic Loop (in Agent.processMessage)
82
-
83
- ```
84
- 1. Build messages (system + history + user)
85
- 2. Call LLM with tools
86
- 3. While response has tool_calls:
87
- a. For each tool_call: execute, collect result
88
- b. Append assistant message (with tool_calls) to messages
89
- c. Append tool result messages
90
- d. Call LLM again with updated messages
91
- 4. Return final text content
92
- 5. Store in memory (include tool usage summary)
93
- ```
94
-
95
- **Safety limits:**
96
- - Max iterations per turn (e.g., 25)
97
- - Max total tokens per turn
98
- - Tool execution timeout (per tool)
99
- - Dangerous command confirmation (optional)
100
-
101
- ### 5. Phase 1 Tools
102
-
103
- #### `shell` (exec)
104
- - Run a shell command, return stdout/stderr
105
- - Working directory: `~/hivemind/workspace`
106
- - Timeout: 30s default, configurable
107
- - Safety: no `rm -rf /` etc.
108
-
109
- #### `read_file`
110
- - Read file contents (with optional offset/limit for large files)
111
- - Returns text content or error
112
-
113
- #### `write_file`
114
- - Write content to a file (creates dirs if needed)
115
- - Returns success/failure
116
-
117
- #### `edit_file`
118
- - Find and replace exact text in a file
119
- - oldText → newText pattern (surgical edits)
120
-
121
- #### `web_search`
122
- - Search via Brave API
123
- - Returns titles, URLs, snippets
124
-
125
- #### `web_fetch`
126
- - Fetch URL, extract markdown
127
- - Returns readable content
128
-
129
- ### 6. Memory Integration
130
-
131
- Tool calls and results should be stored in memory, but summarized:
132
- - Don't store full file contents in L2 episodes
133
- - Store: "Used shell to run `git status`, found 3 modified files"
134
- - L3 promotion can learn patterns: "For git operations, agent uses shell tool"
135
-
136
- ### 7. Config
137
-
138
- ```toml
139
- [tools]
140
- enabled = true
141
- max_iterations = 25
142
- shell_timeout_s = 30
143
- workspace = "workspace"
144
-
145
- [tools.web_search]
146
- api_key = "" # or from vault
147
- ```
148
-
149
- ### 8. Implementation Order
150
-
151
- 1. **ToolRegistry class** — registration, definitions, execution
152
- 2. **LLMClient.chatWithTools()** — extended chat that handles tool_calls
153
- 3. **Agentic loop in Agent** — the while loop with safety limits
154
- 4. **shell tool** — most impactful, enables everything
155
- 5. **File tools** — read/write/edit
156
- 6. **Web tools** — search/fetch
157
- 7. **Memory integration** — summarize tool usage in episodes
158
-
159
- ### 9. OpenRouter Compatibility
160
-
161
- OpenRouter passes through tool definitions to the underlying model. Most models support tools:
162
- - Claude: Native tool_use
163
- - GPT-4: Native function_calling
164
- - Gemini: Native function declarations
165
-
166
- The OpenAI-compatible format works for all of them through OpenRouter.
167
-
168
- ### 10. Safety Considerations
169
-
170
- - **Sandbox**: Tools run on the agent's machine. File access should be scoped to workspace.
171
- - **Confirmation**: Optionally require human approval for destructive operations.
172
- - **Logging**: All tool calls logged to request logger for debugging.
173
- - **Rate limiting**: Prevent runaway tool loops.