@sesamespace/hivemind 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-A7X4FKQZ.js → chunk-SLEK4XYQ.js} +6 -3
- package/dist/chunk-SLEK4XYQ.js.map +1 -0
- package/dist/{chunk-YDD5EZ46.js → chunk-TUIYJ5KQ.js} +2 -2
- package/dist/{chunk-LDTBAMQY.js → chunk-UAWCLBR5.js} +2 -2
- package/dist/{chunk-R6XIZH3I.js → chunk-VZKHZLRD.js} +3 -3
- package/dist/{chunk-GOW62FNS.js → chunk-WPJ2KZXP.js} +2 -2
- package/dist/commands/fleet.js +3 -3
- package/dist/commands/start.js +3 -3
- package/dist/commands/watchdog.js +3 -3
- package/dist/index.js +2 -2
- package/dist/main.js +5 -5
- package/dist/start.js +1 -1
- package/install.sh +131 -0
- package/package.json +25 -22
- package/packages/memory/Cargo.lock +6480 -0
- package/packages/memory/Cargo.toml +21 -0
- package/packages/memory/src/src/context.rs +179 -0
- package/packages/memory/src/src/embeddings.rs +51 -0
- package/packages/memory/src/src/main.rs +626 -0
- package/packages/memory/src/src/promotion.rs +637 -0
- package/packages/memory/src/src/scoring.rs +131 -0
- package/packages/memory/src/src/store.rs +460 -0
- package/packages/memory/src/src/tasks.rs +321 -0
- package/.pnpmrc.json +0 -1
- package/DASHBOARD-PLAN.md +0 -206
- package/TOOL-USE-DESIGN.md +0 -173
- package/dist/chunk-A7X4FKQZ.js.map +0 -1
- package/docs/TOOL-PARITY-PLAN.md +0 -191
- /package/dist/{chunk-YDD5EZ46.js.map → chunk-TUIYJ5KQ.js.map} +0 -0
- /package/dist/{chunk-LDTBAMQY.js.map → chunk-UAWCLBR5.js.map} +0 -0
- /package/dist/{chunk-R6XIZH3I.js.map → chunk-VZKHZLRD.js.map} +0 -0
- /package/dist/{chunk-GOW62FNS.js.map → chunk-WPJ2KZXP.js.map} +0 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use arrow_array::{RecordBatch, RecordBatchIterator, StringArray};
|
|
3
|
+
use arrow_schema::{DataType, Field, Schema};
|
|
4
|
+
use chrono::Utc;
|
|
5
|
+
use futures::stream::TryStreamExt;
|
|
6
|
+
use lancedb::{connection::Connection, query::ExecutableQuery, query::QueryBase, Table};
|
|
7
|
+
use serde::{Deserialize, Serialize};
|
|
8
|
+
use std::sync::Arc;
|
|
9
|
+
|
|
10
|
+
const TASKS_TABLE: &str = "tasks";
|
|
11
|
+
|
|
12
|
+
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
13
|
+
pub struct TaskRecord {
|
|
14
|
+
pub id: String,
|
|
15
|
+
pub context_name: String,
|
|
16
|
+
pub title: String,
|
|
17
|
+
pub description: String,
|
|
18
|
+
pub status: String,
|
|
19
|
+
pub blocked_by: String, // JSON array stored as string
|
|
20
|
+
pub created_at: String,
|
|
21
|
+
pub updated_at: String,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
#[derive(Debug, Deserialize)]
|
|
25
|
+
pub struct TaskInput {
|
|
26
|
+
pub context_name: String,
|
|
27
|
+
pub title: String,
|
|
28
|
+
pub description: String,
|
|
29
|
+
#[serde(default = "default_status")]
|
|
30
|
+
pub status: String,
|
|
31
|
+
#[serde(default)]
|
|
32
|
+
pub blocked_by: Vec<String>,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
fn default_status() -> String {
|
|
36
|
+
"planned".to_string()
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
#[derive(Debug, Deserialize)]
|
|
40
|
+
pub struct TaskUpdate {
|
|
41
|
+
pub status: Option<String>,
|
|
42
|
+
pub title: Option<String>,
|
|
43
|
+
pub description: Option<String>,
|
|
44
|
+
pub blocked_by: Option<Vec<String>>,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
pub struct TaskStore {
|
|
48
|
+
db: Connection,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
impl TaskStore {
|
|
52
|
+
pub async fn new(db: Connection) -> Result<Self> {
|
|
53
|
+
let store = Self { db };
|
|
54
|
+
store.ensure_table().await?;
|
|
55
|
+
Ok(store)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
fn schema() -> Arc<Schema> {
|
|
59
|
+
Arc::new(Schema::new(vec![
|
|
60
|
+
Field::new("id", DataType::Utf8, false),
|
|
61
|
+
Field::new("context_name", DataType::Utf8, false),
|
|
62
|
+
Field::new("title", DataType::Utf8, false),
|
|
63
|
+
Field::new("description", DataType::Utf8, false),
|
|
64
|
+
Field::new("status", DataType::Utf8, false),
|
|
65
|
+
Field::new("blocked_by", DataType::Utf8, false),
|
|
66
|
+
Field::new("created_at", DataType::Utf8, false),
|
|
67
|
+
Field::new("updated_at", DataType::Utf8, false),
|
|
68
|
+
]))
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async fn ensure_table(&self) -> Result<()> {
|
|
72
|
+
let names = self.db.table_names().execute().await?;
|
|
73
|
+
if !names.contains(&TASKS_TABLE.to_string()) {
|
|
74
|
+
let schema = Self::schema();
|
|
75
|
+
let batch = RecordBatch::new_empty(schema.clone());
|
|
76
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
77
|
+
self.db
|
|
78
|
+
.create_table(TASKS_TABLE, Box::new(batches))
|
|
79
|
+
.execute()
|
|
80
|
+
.await?;
|
|
81
|
+
tracing::info!("Created tasks table");
|
|
82
|
+
}
|
|
83
|
+
Ok(())
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
pub async fn create_task(&self, input: TaskInput) -> Result<TaskRecord> {
|
|
87
|
+
let id = uuid::Uuid::new_v4().to_string();
|
|
88
|
+
let now = Utc::now().to_rfc3339();
|
|
89
|
+
let blocked_by_json = serde_json::to_string(&input.blocked_by)?;
|
|
90
|
+
|
|
91
|
+
let task = TaskRecord {
|
|
92
|
+
id: id.clone(),
|
|
93
|
+
context_name: input.context_name.clone(),
|
|
94
|
+
title: input.title.clone(),
|
|
95
|
+
description: input.description.clone(),
|
|
96
|
+
status: input.status,
|
|
97
|
+
blocked_by: blocked_by_json.clone(),
|
|
98
|
+
created_at: now.clone(),
|
|
99
|
+
updated_at: now.clone(),
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
let schema = Self::schema();
|
|
103
|
+
let batch = RecordBatch::try_new(
|
|
104
|
+
schema.clone(),
|
|
105
|
+
vec![
|
|
106
|
+
Arc::new(StringArray::from(vec![task.id.as_str()])),
|
|
107
|
+
Arc::new(StringArray::from(vec![task.context_name.as_str()])),
|
|
108
|
+
Arc::new(StringArray::from(vec![task.title.as_str()])),
|
|
109
|
+
Arc::new(StringArray::from(vec![task.description.as_str()])),
|
|
110
|
+
Arc::new(StringArray::from(vec![task.status.as_str()])),
|
|
111
|
+
Arc::new(StringArray::from(vec![blocked_by_json.as_str()])),
|
|
112
|
+
Arc::new(StringArray::from(vec![task.created_at.as_str()])),
|
|
113
|
+
Arc::new(StringArray::from(vec![task.updated_at.as_str()])),
|
|
114
|
+
],
|
|
115
|
+
)?;
|
|
116
|
+
|
|
117
|
+
let table = self.db.open_table(TASKS_TABLE).execute().await?;
|
|
118
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
119
|
+
table.add(Box::new(batches)).execute().await?;
|
|
120
|
+
|
|
121
|
+
tracing::debug!("Created task {} in context {}", task.id, task.context_name);
|
|
122
|
+
Ok(task)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
pub async fn list_tasks(
|
|
126
|
+
&self,
|
|
127
|
+
context: &str,
|
|
128
|
+
status_filter: Option<&str>,
|
|
129
|
+
) -> Result<Vec<TaskRecord>> {
|
|
130
|
+
let table = self.db.open_table(TASKS_TABLE).execute().await?;
|
|
131
|
+
|
|
132
|
+
let filter = match status_filter {
|
|
133
|
+
Some(status) => format!("context_name = '{}' AND status = '{}'", context, status),
|
|
134
|
+
None => format!("context_name = '{}'", context),
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
let results = table.query().only_if(filter).execute().await?;
|
|
138
|
+
|
|
139
|
+
let mut tasks = Vec::new();
|
|
140
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
141
|
+
|
|
142
|
+
for batch in &batches {
|
|
143
|
+
let ids = batch
|
|
144
|
+
.column_by_name("id")
|
|
145
|
+
.unwrap()
|
|
146
|
+
.as_any()
|
|
147
|
+
.downcast_ref::<StringArray>()
|
|
148
|
+
.unwrap();
|
|
149
|
+
let ctx_names = batch
|
|
150
|
+
.column_by_name("context_name")
|
|
151
|
+
.unwrap()
|
|
152
|
+
.as_any()
|
|
153
|
+
.downcast_ref::<StringArray>()
|
|
154
|
+
.unwrap();
|
|
155
|
+
let titles = batch
|
|
156
|
+
.column_by_name("title")
|
|
157
|
+
.unwrap()
|
|
158
|
+
.as_any()
|
|
159
|
+
.downcast_ref::<StringArray>()
|
|
160
|
+
.unwrap();
|
|
161
|
+
let descriptions = batch
|
|
162
|
+
.column_by_name("description")
|
|
163
|
+
.unwrap()
|
|
164
|
+
.as_any()
|
|
165
|
+
.downcast_ref::<StringArray>()
|
|
166
|
+
.unwrap();
|
|
167
|
+
let statuses = batch
|
|
168
|
+
.column_by_name("status")
|
|
169
|
+
.unwrap()
|
|
170
|
+
.as_any()
|
|
171
|
+
.downcast_ref::<StringArray>()
|
|
172
|
+
.unwrap();
|
|
173
|
+
let blocked_bys = batch
|
|
174
|
+
.column_by_name("blocked_by")
|
|
175
|
+
.unwrap()
|
|
176
|
+
.as_any()
|
|
177
|
+
.downcast_ref::<StringArray>()
|
|
178
|
+
.unwrap();
|
|
179
|
+
let created_ats = batch
|
|
180
|
+
.column_by_name("created_at")
|
|
181
|
+
.unwrap()
|
|
182
|
+
.as_any()
|
|
183
|
+
.downcast_ref::<StringArray>()
|
|
184
|
+
.unwrap();
|
|
185
|
+
let updated_ats = batch
|
|
186
|
+
.column_by_name("updated_at")
|
|
187
|
+
.unwrap()
|
|
188
|
+
.as_any()
|
|
189
|
+
.downcast_ref::<StringArray>()
|
|
190
|
+
.unwrap();
|
|
191
|
+
|
|
192
|
+
for i in 0..batch.num_rows() {
|
|
193
|
+
tasks.push(TaskRecord {
|
|
194
|
+
id: ids.value(i).to_string(),
|
|
195
|
+
context_name: ctx_names.value(i).to_string(),
|
|
196
|
+
title: titles.value(i).to_string(),
|
|
197
|
+
description: descriptions.value(i).to_string(),
|
|
198
|
+
status: statuses.value(i).to_string(),
|
|
199
|
+
blocked_by: blocked_bys.value(i).to_string(),
|
|
200
|
+
created_at: created_ats.value(i).to_string(),
|
|
201
|
+
updated_at: updated_ats.value(i).to_string(),
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Sort by created_at
|
|
207
|
+
tasks.sort_by(|a, b| a.created_at.cmp(&b.created_at));
|
|
208
|
+
|
|
209
|
+
Ok(tasks)
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
pub async fn get_task(&self, id: &str) -> Result<Option<TaskRecord>> {
|
|
213
|
+
let table = self.db.open_table(TASKS_TABLE).execute().await?;
|
|
214
|
+
let results = table
|
|
215
|
+
.query()
|
|
216
|
+
.only_if(format!("id = '{}'", id))
|
|
217
|
+
.execute()
|
|
218
|
+
.await?;
|
|
219
|
+
|
|
220
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
221
|
+
for batch in &batches {
|
|
222
|
+
if batch.num_rows() > 0 {
|
|
223
|
+
let ids = batch.column_by_name("id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
224
|
+
let ctx_names = batch.column_by_name("context_name").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
225
|
+
let titles = batch.column_by_name("title").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
226
|
+
let descriptions = batch.column_by_name("description").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
227
|
+
let statuses = batch.column_by_name("status").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
228
|
+
let blocked_bys = batch.column_by_name("blocked_by").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
229
|
+
let created_ats = batch.column_by_name("created_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
230
|
+
let updated_ats = batch.column_by_name("updated_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
231
|
+
|
|
232
|
+
return Ok(Some(TaskRecord {
|
|
233
|
+
id: ids.value(0).to_string(),
|
|
234
|
+
context_name: ctx_names.value(0).to_string(),
|
|
235
|
+
title: titles.value(0).to_string(),
|
|
236
|
+
description: descriptions.value(0).to_string(),
|
|
237
|
+
status: statuses.value(0).to_string(),
|
|
238
|
+
blocked_by: blocked_bys.value(0).to_string(),
|
|
239
|
+
created_at: created_ats.value(0).to_string(),
|
|
240
|
+
updated_at: updated_ats.value(0).to_string(),
|
|
241
|
+
}));
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
Ok(None)
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
pub async fn update_task(&self, id: &str, update: TaskUpdate) -> Result<Option<TaskRecord>> {
|
|
249
|
+
let existing = self.get_task(id).await?;
|
|
250
|
+
let Some(mut task) = existing else {
|
|
251
|
+
return Ok(None);
|
|
252
|
+
};
|
|
253
|
+
|
|
254
|
+
// Apply updates
|
|
255
|
+
if let Some(status) = update.status {
|
|
256
|
+
task.status = status;
|
|
257
|
+
}
|
|
258
|
+
if let Some(title) = update.title {
|
|
259
|
+
task.title = title;
|
|
260
|
+
}
|
|
261
|
+
if let Some(description) = update.description {
|
|
262
|
+
task.description = description;
|
|
263
|
+
}
|
|
264
|
+
if let Some(blocked_by) = update.blocked_by {
|
|
265
|
+
task.blocked_by = serde_json::to_string(&blocked_by)?;
|
|
266
|
+
}
|
|
267
|
+
task.updated_at = Utc::now().to_rfc3339();
|
|
268
|
+
|
|
269
|
+
// Delete and reinsert
|
|
270
|
+
let table = self.db.open_table(TASKS_TABLE).execute().await?;
|
|
271
|
+
table.delete(&format!("id = '{}'", id)).await?;
|
|
272
|
+
|
|
273
|
+
let schema = Self::schema();
|
|
274
|
+
let batch = RecordBatch::try_new(
|
|
275
|
+
schema.clone(),
|
|
276
|
+
vec![
|
|
277
|
+
Arc::new(StringArray::from(vec![task.id.as_str()])),
|
|
278
|
+
Arc::new(StringArray::from(vec![task.context_name.as_str()])),
|
|
279
|
+
Arc::new(StringArray::from(vec![task.title.as_str()])),
|
|
280
|
+
Arc::new(StringArray::from(vec![task.description.as_str()])),
|
|
281
|
+
Arc::new(StringArray::from(vec![task.status.as_str()])),
|
|
282
|
+
Arc::new(StringArray::from(vec![task.blocked_by.as_str()])),
|
|
283
|
+
Arc::new(StringArray::from(vec![task.created_at.as_str()])),
|
|
284
|
+
Arc::new(StringArray::from(vec![task.updated_at.as_str()])),
|
|
285
|
+
],
|
|
286
|
+
)?;
|
|
287
|
+
|
|
288
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
289
|
+
table.add(Box::new(batches)).execute().await?;
|
|
290
|
+
|
|
291
|
+
Ok(Some(task))
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/// Get the next available task for a context:
|
|
295
|
+
/// - Status is "planned" (not active/complete/archived)
|
|
296
|
+
/// - Not blocked by any incomplete tasks
|
|
297
|
+
pub async fn get_next_task(&self, context: &str) -> Result<Option<TaskRecord>> {
|
|
298
|
+
let planned = self.list_tasks(context, Some("planned")).await?;
|
|
299
|
+
let all_tasks = self.list_tasks(context, None).await?;
|
|
300
|
+
|
|
301
|
+
// Build a set of complete task IDs
|
|
302
|
+
let complete_ids: std::collections::HashSet<String> = all_tasks
|
|
303
|
+
.iter()
|
|
304
|
+
.filter(|t| t.status == "complete" || t.status == "archived")
|
|
305
|
+
.map(|t| t.id.clone())
|
|
306
|
+
.collect();
|
|
307
|
+
|
|
308
|
+
for task in planned {
|
|
309
|
+
let blocked_by: Vec<String> =
|
|
310
|
+
serde_json::from_str(&task.blocked_by).unwrap_or_default();
|
|
311
|
+
|
|
312
|
+
// Task is available if all blockers are complete
|
|
313
|
+
let is_blocked = blocked_by.iter().any(|b| !complete_ids.contains(b));
|
|
314
|
+
if !is_blocked {
|
|
315
|
+
return Ok(Some(task));
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
Ok(None)
|
|
320
|
+
}
|
|
321
|
+
}
|
package/.pnpmrc.json
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"onlyBuiltDependencies":["better-sqlite3"]}
|
package/DASHBOARD-PLAN.md
DELETED
|
@@ -1,206 +0,0 @@
|
|
|
1
|
-
# Hivemind Dashboard — Implementation Plan
|
|
2
|
-
|
|
3
|
-
**Goal:** Local web dashboard for debugging memory, context routing, and LLM request formation.
|
|
4
|
-
**Access:** `http://localhost:9485` on the Mac mini (local access only for now).
|
|
5
|
-
**Priority:** LLM Request Inspector first, then Memory Browser, then Context Overview.
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
## Phase 1: LLM Request Logger + Inspector UI
|
|
10
|
-
|
|
11
|
-
### Backend: Request Logging
|
|
12
|
-
|
|
13
|
-
**Where:** Instrument `buildMessages()` in `prompt.ts` and `processMessage()` in `agent.ts`.
|
|
14
|
-
|
|
15
|
-
Each logged request captures:
|
|
16
|
-
```typescript
|
|
17
|
-
interface RequestLog {
|
|
18
|
-
id: string; // uuid
|
|
19
|
-
timestamp: string; // ISO-8601
|
|
20
|
-
// Routing
|
|
21
|
-
context: string; // which context was used
|
|
22
|
-
contextSwitched: boolean; // explicit switch?
|
|
23
|
-
routingReason: string; // "pattern_match:X" | "inferred:X" | "active:X"
|
|
24
|
-
// Sender
|
|
25
|
-
channelId: string;
|
|
26
|
-
channelKind: "dm" | "group";
|
|
27
|
-
senderHandle: string;
|
|
28
|
-
rawMessage: string; // as received (with prefix)
|
|
29
|
-
// Prompt components (broken out for UI)
|
|
30
|
-
systemPrompt: {
|
|
31
|
-
identity: string; // workspace files section
|
|
32
|
-
l3Knowledge: string[]; // individual L3 entries
|
|
33
|
-
l2Episodes: Array<{
|
|
34
|
-
id: string;
|
|
35
|
-
content: string;
|
|
36
|
-
score: number;
|
|
37
|
-
timestamp: string;
|
|
38
|
-
context_name: string;
|
|
39
|
-
role: string;
|
|
40
|
-
}>;
|
|
41
|
-
contextInfo: string; // active context section
|
|
42
|
-
fullText: string; // complete system prompt as sent
|
|
43
|
-
};
|
|
44
|
-
conversationHistory: Array<{ role: string; content: string }>; // L1 turns included
|
|
45
|
-
userMessage: string; // final user message
|
|
46
|
-
// Response
|
|
47
|
-
response: {
|
|
48
|
-
content: string;
|
|
49
|
-
model: string;
|
|
50
|
-
latencyMs: number;
|
|
51
|
-
skipped: boolean; // was it __SKIP__?
|
|
52
|
-
};
|
|
53
|
-
// Config snapshot
|
|
54
|
-
config: {
|
|
55
|
-
topK: number;
|
|
56
|
-
model: string;
|
|
57
|
-
maxTokens: number;
|
|
58
|
-
temperature: number;
|
|
59
|
-
};
|
|
60
|
-
// Approximate token counts (char-based estimate: chars/4)
|
|
61
|
-
tokenEstimates: {
|
|
62
|
-
systemPrompt: number;
|
|
63
|
-
conversationHistory: number;
|
|
64
|
-
userMessage: number;
|
|
65
|
-
total: number;
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
**Storage:** SQLite database at `data/dashboard.db`.
|
|
71
|
-
- Single `request_logs` table with JSON columns for complex fields.
|
|
72
|
-
- Auto-prune: keep last 7 days or 10,000 entries (whichever is smaller).
|
|
73
|
-
- Why SQLite over ring buffer: survives restarts, queryable, minimal overhead.
|
|
74
|
-
|
|
75
|
-
**Token estimation:** Use chars/4 approximation. Good enough for relative sizing. Avoid tokenizer dependency.
|
|
76
|
-
|
|
77
|
-
**Logging approach:** Eager logging. Serialize at request time. The overhead is minimal (~1ms for JSON.stringify) compared to LLM latency (~1-10s). Capturing the exact state at request time is more valuable than lazy reconstruction.
|
|
78
|
-
|
|
79
|
-
### Backend: Dashboard HTTP Server
|
|
80
|
-
|
|
81
|
-
**Where:** New file `packages/runtime/src/dashboard.ts`.
|
|
82
|
-
|
|
83
|
-
Extend the existing health server (or create a sibling on port 9485):
|
|
84
|
-
- `GET /` — serve the SPA (single HTML file)
|
|
85
|
-
- `GET /api/requests` — list recent requests (paginated, filterable)
|
|
86
|
-
- `GET /api/requests/:id` — single request detail
|
|
87
|
-
- `GET /api/contexts` — proxy to memory daemon's context list
|
|
88
|
-
- `GET /api/contexts/:name/episodes` — proxy L2 episodes
|
|
89
|
-
- `GET /api/contexts/:name/l3` — proxy L3 knowledge
|
|
90
|
-
- `GET /api/stats` — memory stats (episode counts, last promotion, etc.)
|
|
91
|
-
- `DELETE /api/l3/:id` — delete a bad L3 entry (write op from day 1)
|
|
92
|
-
- `POST /api/l3/:id/edit` — edit L3 entry content
|
|
93
|
-
|
|
94
|
-
Bind to `127.0.0.1:9485` only.
|
|
95
|
-
|
|
96
|
-
### Frontend: Single-File SPA
|
|
97
|
-
|
|
98
|
-
**Why single file:** No build step, no React, no dependencies. Ship as one HTML file with embedded CSS/JS. Can always upgrade later.
|
|
99
|
-
|
|
100
|
-
**Layout:**
|
|
101
|
-
- Left sidebar: navigation (Requests, Memory, Contexts)
|
|
102
|
-
- Main area: content
|
|
103
|
-
|
|
104
|
-
**Request Inspector view:**
|
|
105
|
-
- Reverse-chronological list of requests
|
|
106
|
-
- Each row: timestamp, sender, context, model, latency, token estimate
|
|
107
|
-
- Click to expand → shows all sections:
|
|
108
|
-
- **Identity files** (collapsible, usually not interesting)
|
|
109
|
-
- **L3 Knowledge** (list of entries with metadata)
|
|
110
|
-
- **L2 Episodes** (with similarity scores, timestamps, source context)
|
|
111
|
-
- **L1 History** (conversation turns)
|
|
112
|
-
- **User Message** (raw with prefix)
|
|
113
|
-
- **Response** (with model, latency)
|
|
114
|
-
- **Config** (top_k, model, temperature)
|
|
115
|
-
- **Token breakdown** (bar chart showing proportion per section)
|
|
116
|
-
- Filters: by context, by sender, by time range
|
|
117
|
-
- Search: full-text search across messages
|
|
118
|
-
|
|
119
|
-
**Memory Browser view (Phase 2):**
|
|
120
|
-
- L2: searchable episode list, filterable by context/role/time
|
|
121
|
-
- L3: per-context knowledge entries with edit/delete buttons
|
|
122
|
-
- Promotion log (if we add logging for it)
|
|
123
|
-
|
|
124
|
-
**Context Overview (Phase 2):**
|
|
125
|
-
- List of contexts with episode counts, last active
|
|
126
|
-
- Active context highlighted
|
|
127
|
-
- Click to drill into episodes/L3
|
|
128
|
-
|
|
129
|
-
---
|
|
130
|
-
|
|
131
|
-
## Phase 2: Memory Browser + Context Overview
|
|
132
|
-
|
|
133
|
-
After Phase 1 is working and useful, add:
|
|
134
|
-
- Full L2 browsing with semantic search UI
|
|
135
|
-
- L3 management (view, edit, delete)
|
|
136
|
-
- Context explorer with stats
|
|
137
|
-
- Promotion history logging
|
|
138
|
-
|
|
139
|
-
---
|
|
140
|
-
|
|
141
|
-
## Implementation Steps (Phase 1)
|
|
142
|
-
|
|
143
|
-
### Step 1: Request logging infrastructure
|
|
144
|
-
- [ ] Create `packages/runtime/src/request-logger.ts`
|
|
145
|
-
- SQLite setup (using better-sqlite3)
|
|
146
|
-
- `logRequest()` method
|
|
147
|
-
- `getRequests()` with pagination/filters
|
|
148
|
-
- `getRequest(id)` for detail view
|
|
149
|
-
- Auto-pruning on startup
|
|
150
|
-
- [ ] Add better-sqlite3 dependency
|
|
151
|
-
|
|
152
|
-
### Step 2: Instrument the pipeline
|
|
153
|
-
- [ ] Modify `agent.ts` `processMessage()` to capture routing decision + timing
|
|
154
|
-
- [ ] Modify `prompt.ts` `buildSystemPrompt()` to return structured components (not just string)
|
|
155
|
-
- [ ] Log each request after LLM response arrives
|
|
156
|
-
- [ ] Capture config snapshot with each log entry
|
|
157
|
-
|
|
158
|
-
### Step 3: Dashboard HTTP server
|
|
159
|
-
- [ ] Create `packages/runtime/src/dashboard.ts`
|
|
160
|
-
- Express-free: use Node's built-in `http` module (like health server)
|
|
161
|
-
- Serve SPA at `/`
|
|
162
|
-
- JSON APIs for request logs and memory proxy
|
|
163
|
-
- [ ] Wire into `pipeline.ts` startup
|
|
164
|
-
|
|
165
|
-
### Step 4: Frontend SPA
|
|
166
|
-
- [ ] Single HTML file at `packages/runtime/src/dashboard.html`
|
|
167
|
-
- Vanilla JS, no framework
|
|
168
|
-
- CSS grid layout
|
|
169
|
-
- Fetch-based API calls
|
|
170
|
-
- Expandable request cards
|
|
171
|
-
- Token breakdown visualization
|
|
172
|
-
- Basic filtering
|
|
173
|
-
|
|
174
|
-
### Step 5: Memory proxy + write ops
|
|
175
|
-
- [ ] Proxy endpoints to memory daemon for L2/L3 browsing
|
|
176
|
-
- [ ] DELETE/PATCH endpoints for L3 management
|
|
177
|
-
|
|
178
|
-
---
|
|
179
|
-
|
|
180
|
-
## Design Decisions
|
|
181
|
-
|
|
182
|
-
| Question | Decision | Rationale |
|
|
183
|
-
|----------|----------|-----------|
|
|
184
|
-
| Storage | SQLite | Survives restarts, queryable, lightweight |
|
|
185
|
-
| Token counting | chars/4 estimate | Good enough, no tokenizer dep |
|
|
186
|
-
| Logging | Eager | Captures exact state, overhead negligible vs LLM latency |
|
|
187
|
-
| Bind address | 127.0.0.1 only | Local access, no auth needed |
|
|
188
|
-
| Framework | None (vanilla) | Single HTML file, no build step |
|
|
189
|
-
| Read-only or read-write? | Read-write from start | Ryan will want to delete bad L3 entries immediately |
|
|
190
|
-
| Persist request logs? | Yes, 7 days | Need to compare across memory config changes |
|
|
191
|
-
| Multi-agent? | Single agent for now | Don't over-engineer, but use agent name in logs |
|
|
192
|
-
| Port | 9485 | Next to health port (9484), easy to remember |
|
|
193
|
-
|
|
194
|
-
---
|
|
195
|
-
|
|
196
|
-
## Sesame Command Fix (Bonus)
|
|
197
|
-
|
|
198
|
-
While we're in the code, fix the sender prefix issue:
|
|
199
|
-
- In `pipeline.ts` `startSesameLoop()`, before calling `agent.processMessage()`, strip the sender prefix for command parsing
|
|
200
|
-
- Or better: in `agent.ts` `handleSpecialCommand()`, strip known prefix patterns before regex matching
|
|
201
|
-
- This unblocks context switching, task commands, and cross-context search over Sesame
|
|
202
|
-
|
|
203
|
-
---
|
|
204
|
-
|
|
205
|
-
*Created: 2026-02-28*
|
|
206
|
-
*Status: Ready to implement*
|
package/TOOL-USE-DESIGN.md
DELETED
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
# Hivemind Tool Use — Architecture Design
|
|
2
|
-
|
|
3
|
-
## Current State
|
|
4
|
-
|
|
5
|
-
The LLM client does simple chat completions: `messages[] → response.content`. No tool/function calling.
|
|
6
|
-
|
|
7
|
-
## Goal
|
|
8
|
-
|
|
9
|
-
Full agentic tool-use loop matching OpenClaw capabilities, with Hivemind's memory system as a differentiator.
|
|
10
|
-
|
|
11
|
-
## Architecture
|
|
12
|
-
|
|
13
|
-
### 1. Tool Calling Protocol (OpenAI-compatible, works with OpenRouter)
|
|
14
|
-
|
|
15
|
-
The OpenAI chat completions API supports `tools` (function definitions) and `tool_choice`. When the model wants to use a tool, it returns a `tool_calls` array instead of (or alongside) content. We then execute the tool, append the result as a `tool` role message, and call the model again.
|
|
16
|
-
|
|
17
|
-
```
|
|
18
|
-
User message
|
|
19
|
-
↓
|
|
20
|
-
LLM (with tools defined)
|
|
21
|
-
↓
|
|
22
|
-
If tool_calls → execute tools → append results → call LLM again (loop)
|
|
23
|
-
If content only → return response
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
This is a **while loop**, not a single call. The model may chain multiple tool calls before producing a final text response.
|
|
27
|
-
|
|
28
|
-
### 2. Key Data Structures
|
|
29
|
-
|
|
30
|
-
```typescript
|
|
31
|
-
interface ToolDefinition {
|
|
32
|
-
name: string;
|
|
33
|
-
description: string;
|
|
34
|
-
parameters: JSONSchema; // JSON Schema for function params
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
interface ToolCall {
|
|
38
|
-
id: string;
|
|
39
|
-
type: "function";
|
|
40
|
-
function: { name: string; arguments: string }; // arguments is JSON string
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
interface ToolResult {
|
|
44
|
-
tool_call_id: string;
|
|
45
|
-
role: "tool";
|
|
46
|
-
content: string; // result as string
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// Extended message types
|
|
50
|
-
interface AssistantMessage {
|
|
51
|
-
role: "assistant";
|
|
52
|
-
content: string | null;
|
|
53
|
-
tool_calls?: ToolCall[];
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
interface ToolMessage {
|
|
57
|
-
role: "tool";
|
|
58
|
-
tool_call_id: string;
|
|
59
|
-
content: string;
|
|
60
|
-
}
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
### 3. Tool Registry
|
|
64
|
-
|
|
65
|
-
A simple registry where tools are registered with:
|
|
66
|
-
- Name
|
|
67
|
-
- Description (for the LLM)
|
|
68
|
-
- JSON Schema for parameters
|
|
69
|
-
- Executor function: `(params: any) => Promise<string>`
|
|
70
|
-
|
|
71
|
-
```typescript
|
|
72
|
-
class ToolRegistry {
|
|
73
|
-
private tools: Map<string, { def: ToolDefinition; exec: (params: any) => Promise<string> }>;
|
|
74
|
-
|
|
75
|
-
register(name, description, schema, executor): void;
|
|
76
|
-
getDefinitions(): ToolDefinition[]; // For LLM API call
|
|
77
|
-
execute(name: string, params: any): Promise<string>; // Run a tool
|
|
78
|
-
}
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
### 4. The Agentic Loop (in Agent.processMessage)
|
|
82
|
-
|
|
83
|
-
```
|
|
84
|
-
1. Build messages (system + history + user)
|
|
85
|
-
2. Call LLM with tools
|
|
86
|
-
3. While response has tool_calls:
|
|
87
|
-
a. For each tool_call: execute, collect result
|
|
88
|
-
b. Append assistant message (with tool_calls) to messages
|
|
89
|
-
c. Append tool result messages
|
|
90
|
-
d. Call LLM again with updated messages
|
|
91
|
-
4. Return final text content
|
|
92
|
-
5. Store in memory (include tool usage summary)
|
|
93
|
-
```
|
|
94
|
-
|
|
95
|
-
**Safety limits:**
|
|
96
|
-
- Max iterations per turn (e.g., 25)
|
|
97
|
-
- Max total tokens per turn
|
|
98
|
-
- Tool execution timeout (per tool)
|
|
99
|
-
- Dangerous command confirmation (optional)
|
|
100
|
-
|
|
101
|
-
### 5. Phase 1 Tools
|
|
102
|
-
|
|
103
|
-
#### `shell` (exec)
|
|
104
|
-
- Run a shell command, return stdout/stderr
|
|
105
|
-
- Working directory: `~/hivemind/workspace`
|
|
106
|
-
- Timeout: 30s default, configurable
|
|
107
|
-
- Safety: no `rm -rf /` etc.
|
|
108
|
-
|
|
109
|
-
#### `read_file`
|
|
110
|
-
- Read file contents (with optional offset/limit for large files)
|
|
111
|
-
- Returns text content or error
|
|
112
|
-
|
|
113
|
-
#### `write_file`
|
|
114
|
-
- Write content to a file (creates dirs if needed)
|
|
115
|
-
- Returns success/failure
|
|
116
|
-
|
|
117
|
-
#### `edit_file`
|
|
118
|
-
- Find and replace exact text in a file
|
|
119
|
-
- oldText → newText pattern (surgical edits)
|
|
120
|
-
|
|
121
|
-
#### `web_search`
|
|
122
|
-
- Search via Brave API
|
|
123
|
-
- Returns titles, URLs, snippets
|
|
124
|
-
|
|
125
|
-
#### `web_fetch`
|
|
126
|
-
- Fetch URL, extract markdown
|
|
127
|
-
- Returns readable content
|
|
128
|
-
|
|
129
|
-
### 6. Memory Integration
|
|
130
|
-
|
|
131
|
-
Tool calls and results should be stored in memory, but summarized:
|
|
132
|
-
- Don't store full file contents in L2 episodes
|
|
133
|
-
- Store: "Used shell to run `git status`, found 3 modified files"
|
|
134
|
-
- L3 promotion can learn patterns: "For git operations, agent uses shell tool"
|
|
135
|
-
|
|
136
|
-
### 7. Config
|
|
137
|
-
|
|
138
|
-
```toml
|
|
139
|
-
[tools]
|
|
140
|
-
enabled = true
|
|
141
|
-
max_iterations = 25
|
|
142
|
-
shell_timeout_s = 30
|
|
143
|
-
workspace = "workspace"
|
|
144
|
-
|
|
145
|
-
[tools.web_search]
|
|
146
|
-
api_key = "" # or from vault
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
### 8. Implementation Order
|
|
150
|
-
|
|
151
|
-
1. **ToolRegistry class** — registration, definitions, execution
|
|
152
|
-
2. **LLMClient.chatWithTools()** — extended chat that handles tool_calls
|
|
153
|
-
3. **Agentic loop in Agent** — the while loop with safety limits
|
|
154
|
-
4. **shell tool** — most impactful, enables everything
|
|
155
|
-
5. **File tools** — read/write/edit
|
|
156
|
-
6. **Web tools** — search/fetch
|
|
157
|
-
7. **Memory integration** — summarize tool usage in episodes
|
|
158
|
-
|
|
159
|
-
### 9. OpenRouter Compatibility
|
|
160
|
-
|
|
161
|
-
OpenRouter passes through tool definitions to the underlying model. Most models support tools:
|
|
162
|
-
- Claude: Native tool_use
|
|
163
|
-
- GPT-4: Native function_calling
|
|
164
|
-
- Gemini: Native function declarations
|
|
165
|
-
|
|
166
|
-
The OpenAI-compatible format works for all of them through OpenRouter.
|
|
167
|
-
|
|
168
|
-
### 10. Safety Considerations
|
|
169
|
-
|
|
170
|
-
- **Sandbox**: Tools run on the agent's machine. File access should be scoped to workspace.
|
|
171
|
-
- **Confirmation**: Optionally require human approval for destructive operations.
|
|
172
|
-
- **Logging**: All tool calls logged to request logger for debugging.
|
|
173
|
-
- **Rate limiting**: Prevent runaway tool loops.
|