@deeplake/hivemind 0.6.47 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +158 -51
  4. package/bundle/cli.js +4103 -282
  5. package/codex/bundle/capture.js +510 -90
  6. package/codex/bundle/commands/auth-login.js +219 -72
  7. package/codex/bundle/embeddings/embed-daemon.js +243 -0
  8. package/codex/bundle/pre-tool-use.js +713 -108
  9. package/codex/bundle/session-start-setup.js +209 -58
  10. package/codex/bundle/session-start.js +40 -11
  11. package/codex/bundle/shell/deeplake-shell.js +679 -112
  12. package/codex/bundle/stop.js +477 -59
  13. package/codex/bundle/wiki-worker.js +312 -11
  14. package/cursor/bundle/capture.js +768 -57
  15. package/cursor/bundle/commands/auth-login.js +219 -72
  16. package/cursor/bundle/embeddings/embed-daemon.js +243 -0
  17. package/cursor/bundle/pre-tool-use.js +1684 -0
  18. package/cursor/bundle/session-end.js +223 -2
  19. package/cursor/bundle/session-start.js +209 -57
  20. package/cursor/bundle/shell/deeplake-shell.js +679 -112
  21. package/cursor/bundle/wiki-worker.js +571 -0
  22. package/hermes/bundle/capture.js +1194 -0
  23. package/hermes/bundle/commands/auth-login.js +1009 -0
  24. package/hermes/bundle/embeddings/embed-daemon.js +243 -0
  25. package/hermes/bundle/package.json +1 -0
  26. package/hermes/bundle/pre-tool-use.js +1681 -0
  27. package/hermes/bundle/session-end.js +265 -0
  28. package/hermes/bundle/session-start.js +655 -0
  29. package/hermes/bundle/shell/deeplake-shell.js +69905 -0
  30. package/hermes/bundle/wiki-worker.js +572 -0
  31. package/mcp/bundle/server.js +289 -69
  32. package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
  33. package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
  34. package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
  35. package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
  36. package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
  37. package/openclaw/dist/index.js +752 -702
  38. package/openclaw/openclaw.plugin.json +1 -1
  39. package/openclaw/package.json +1 -1
  40. package/package.json +7 -3
  41. package/pi/extension-source/hivemind.ts +807 -0
@@ -0,0 +1,807 @@
1
+ // @ts-nocheck — distributed as raw .ts; pi's runtime loads + compiles it.
2
+ // We ship this file verbatim into ~/.pi/agent/extensions/hivemind.ts.
3
+ //
4
+ // Hivemind extension for pi (badlogic/pi-mono coding-agent).
5
+ //
6
+ // Subscribes to the agent lifecycle events documented in
7
+ // `pi-mono/packages/coding-agent/src/core/extensions/types.ts` to:
8
+ // - inject deeplake memory context at session_start
9
+ // - capture user prompts (input event)
10
+ // - capture tool call results (tool_result event)
11
+ // - capture assistant messages (message_end event)
12
+ // - finalize on session_shutdown
13
+ //
14
+ // Plus registers three first-class pi tools (since pi has no MCP):
15
+ // - hivemind_search
16
+ // - hivemind_read
17
+ // - hivemind_index
18
+ //
19
+ // All deeplake interactions are inline `fetch` calls so this file has
20
+ // zero non-builtin runtime dependencies — it only needs Node 22+ globals.
21
+ //
22
+ // Type imports are erased at runtime so they don't need to be installed
23
+ // at our build time. pi's `@mariozechner/pi-coding-agent` types are
24
+ // available to pi's compiler when this is loaded.
25
+
26
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
27
+ import {
28
+ readFileSync, existsSync, appendFileSync, mkdirSync, writeFileSync,
29
+ openSync, closeSync, constants as fsConstants,
30
+ } from "node:fs";
31
+ import { homedir, tmpdir } from "node:os";
32
+ import { join, dirname } from "node:path";
33
+ import { connect } from "node:net";
34
+ import { spawn, execSync } from "node:child_process";
35
+
36
+ // ---------- diagnostic logging --------------------------------------------------
37
+ //
38
+ // The capture path is fully async + swallows errors (writeSessionRow's catch
39
+ // is intentionally non-fatal, so a transient deeplake outage never breaks pi).
40
+ // That means a buggy extension is silent: rows just don't appear, with no
41
+ // indication where things went wrong. When HIVEMIND_DEBUG=1 we dump a
42
+ // breadcrumb to ~/.deeplake/hivemind-pi.log at every meaningful step so the
43
+ // failure mode is observable. Off by default to keep `pi` quiet for normal
44
+ // users.
45
+
46
+ const LOG_PATH = join(homedir(), ".deeplake", "hivemind-pi.log");
47
+
48
+ function logHm(msg: string): void {
49
+ if (process.env.HIVEMIND_DEBUG !== "1") return;
50
+ try {
51
+ mkdirSync(dirname(LOG_PATH), { recursive: true });
52
+ appendFileSync(LOG_PATH, `${new Date().toISOString()} [pi] ${msg}\n`);
53
+ } catch { /* logging must never break the agent */ }
54
+ }
55
+
56
+ // ---------- credentials / config -----------------------------------------------
57
+
58
+ interface Creds {
59
+ token: string;
60
+ apiUrl: string;
61
+ orgId: string;
62
+ orgName?: string;
63
+ workspaceId: string;
64
+ userName: string;
65
+ }
66
+
67
+ function loadCreds(): Creds | null {
68
+ const path = join(homedir(), ".deeplake", "credentials.json");
69
+ if (!existsSync(path)) return null;
70
+ try {
71
+ const raw = readFileSync(path, "utf-8");
72
+ const parsed = JSON.parse(raw);
73
+ if (!parsed?.token) return null;
74
+ return {
75
+ token: parsed.token,
76
+ apiUrl: parsed.apiUrl ?? "https://api.deeplake.ai",
77
+ orgId: parsed.orgId,
78
+ orgName: parsed.orgName,
79
+ workspaceId: parsed.workspaceId ?? "default",
80
+ userName: parsed.userName ?? "unknown",
81
+ };
82
+ } catch {
83
+ return null;
84
+ }
85
+ }
86
+
87
+ const MEMORY_TABLE = process.env.HIVEMIND_TABLE ?? "memory";
88
+ const SESSIONS_TABLE = process.env.HIVEMIND_SESSIONS_TABLE ?? "sessions";
89
+
90
+ // ---------- SQL escape (matches src/utils/sql.ts) ------------------------------
91
+
92
+ function sqlStr(value: string): string {
93
+ return value
94
+ .replace(/\\/g, "\\\\")
95
+ .replace(/'/g, "''")
96
+ .replace(/\0/g, "")
97
+ .replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
98
+ }
99
+
100
+ // LIKE-pattern escape: sqlStr only handles SQL string quoting, NOT LIKE
101
+ // metacharacters. Without this, a tool arg containing `%` or `_` (which
102
+ // the LLM controls via the tool schema) would bypass the intended path
103
+ // filter — e.g. prefix='%' would match every row in the table. Wrap the
104
+ // resulting LIKE clause with `ESCAPE '\\'` so the engine honours the
105
+ // backslash escaping below.
106
+ function sqlLike(value: string): string {
107
+ return sqlStr(value)
108
+ .replace(/\\/g, "\\\\")
109
+ .replace(/%/g, "\\%")
110
+ .replace(/_/g, "\\_");
111
+ }
112
+
113
+ // JSONB column escape — only single-quote doubling, preserves JSON escape sequences.
114
+ function sqlJsonb(json: string): string {
115
+ return json.replace(/'/g, "''");
116
+ }
117
+
118
+ // ---------- deeplake api -------------------------------------------------------
119
+
120
+ async function dlQuery(creds: Creds, sql: string): Promise<unknown[]> {
121
+ const resp = await fetch(`${creds.apiUrl}/workspaces/${creds.workspaceId}/tables/query`, {
122
+ method: "POST",
123
+ headers: {
124
+ "Authorization": `Bearer ${creds.token}`,
125
+ "Content-Type": "application/json",
126
+ "X-Activeloop-Org-Id": creds.orgId,
127
+ },
128
+ body: JSON.stringify({ query: sql }),
129
+ });
130
+ if (!resp.ok) {
131
+ const text = await resp.text().catch(() => "");
132
+ throw new Error(`deeplake query failed: ${resp.status} ${text.slice(0, 200)}`);
133
+ }
134
+ const json = (await resp.json()) as { columns?: string[]; rows?: unknown[][] };
135
+ if (!json.rows || !json.columns) return [];
136
+ return json.rows.map((r) => Object.fromEntries(json.columns!.map((c, i) => [c, r[i]])));
137
+ }
138
+
139
+ // ---------- embedding client (inline; reuses the shared daemon) ----------------
140
+ //
141
+ // Pi avoids importing EmbedClient (which is bundled into other agents but
142
+ // here would break the "raw .ts, zero deps" promise of pi extensions).
143
+ // Instead we open a Unix socket directly to the daemon at the same well-known
144
+ // path EmbedClient uses. If the socket isn't there yet, we spawn the
145
+ // canonical daemon at ~/.hivemind/embed-deps/embed-daemon.js (deposited by
146
+ // `hivemind embeddings install`) and wait for it to listen, mirroring the
147
+ // auto-spawn-on-miss logic in src/embeddings/client.ts. Subsequent agents
148
+ // (codex, CC, cursor, hermes, …) connect to the SAME daemon — pi pays the
149
+ // cold-start cost only when it's the first user on the box.
150
+ //
151
+ // Graceful fallback: any failure → return null → caller writes NULL into
152
+ // message_embedding. Embedding is never on the critical path.
153
+
154
+ const EMBED_DAEMON_ENTRY = join(homedir(), ".hivemind", "embed-deps", "embed-daemon.js");
155
+ const EMBED_SOCKET_PATH = (() => {
156
+ const uid = typeof process.getuid === "function" ? String(process.getuid()) : (process.env.USER ?? "default");
157
+ return `/tmp/hivemind-embed-${uid}.sock`;
158
+ })();
159
+
160
+ function tryEmbedOverSocket(text: string, kind: "document" | "query"): Promise<number[] | null> {
161
+ return new Promise((resolve) => {
162
+ let resolved = false;
163
+ const settle = (v: number[] | null) => { if (!resolved) { resolved = true; resolve(v); } };
164
+ const sock = connect(EMBED_SOCKET_PATH);
165
+ let buf = "";
166
+ const timer = setTimeout(() => { sock.destroy(); settle(null); }, 5000);
167
+ sock.on("connect", () => {
168
+ // Protocol shape comes from src/embeddings/protocol.ts: {op, id, kind, text}.
169
+ // id is a string ("1"), not a number, and the verb field is "op" not "type".
170
+ sock.write(JSON.stringify({ op: "embed", id: "1", kind, text }) + "\n");
171
+ });
172
+ sock.on("data", (chunk: Buffer) => {
173
+ buf += chunk.toString("utf-8");
174
+ const nl = buf.indexOf("\n");
175
+ if (nl !== -1) {
176
+ clearTimeout(timer);
177
+ try {
178
+ const resp = JSON.parse(buf.slice(0, nl));
179
+ settle(Array.isArray(resp.embedding) ? resp.embedding : null);
180
+ } catch { settle(null); }
181
+ sock.destroy();
182
+ }
183
+ });
184
+ sock.on("error", () => { clearTimeout(timer); settle(null); });
185
+ sock.on("close", () => { clearTimeout(timer); settle(null); });
186
+ });
187
+ }
188
+
189
+ // ---------- summary state + wiki-worker spawn ---------------------------------
190
+ //
191
+ // Mirror of src/hooks/summary-state.ts (same dir, same JSON shape, shared
192
+ // across CC/codex/cursor/hermes — session ids are UUIDs so collisions are
193
+ // impossible). The pi extension increments totalCount on every captured
194
+ // event and spawns the bundled wiki-worker (see pi/bundle/wiki-worker.js)
195
+ // when the threshold is hit. The worker, after generating the summary,
196
+ // calls finalizeSummary() / releaseLock() against this same dir. So the
197
+ // extension and the worker share state.
198
+
199
+ const SUMMARY_STATE_DIR = join(homedir(), ".claude", "hooks", "summary-state");
200
+ const PI_WIKI_WORKER_PATH = join(homedir(), ".pi", "agent", "hivemind", "wiki-worker.js");
201
+
202
+ interface SummaryState {
203
+ lastSummaryAt: number;
204
+ lastSummaryCount: number;
205
+ totalCount: number;
206
+ }
207
+ interface SummaryConfig {
208
+ everyNMessages: number;
209
+ everyHours: number;
210
+ }
211
+
212
+ function summaryStatePath(sessionId: string): string {
213
+ return join(SUMMARY_STATE_DIR, `${sessionId}.json`);
214
+ }
215
+ function summaryLockPath(sessionId: string): string {
216
+ return join(SUMMARY_STATE_DIR, `${sessionId}.lock`);
217
+ }
218
+
219
+ function loadSummaryConfig(): SummaryConfig {
220
+ const n = Number(process.env.HIVEMIND_SUMMARY_EVERY_N_MSGS ?? "");
221
+ const h = Number(process.env.HIVEMIND_SUMMARY_EVERY_HOURS ?? "");
222
+ return {
223
+ everyNMessages: Number.isInteger(n) && n > 0 ? n : 50,
224
+ everyHours: Number.isFinite(h) && h > 0 ? h : 2,
225
+ };
226
+ }
227
+
228
+ // Mirrors src/hooks/summary-state.ts — the very first summary fires at
229
+ // totalCount=10 (vs the steady-state N=50) so a fresh chat gets indexed
230
+ // quickly without waiting for ~50 messages.
231
+ const FIRST_SUMMARY_AT = 10;
232
+
233
+ function readSummaryState(sessionId: string): SummaryState | null {
234
+ try {
235
+ const p = summaryStatePath(sessionId);
236
+ if (!existsSync(p)) return null;
237
+ const raw = JSON.parse(readFileSync(p, "utf-8"));
238
+ return {
239
+ lastSummaryAt: Number(raw.lastSummaryAt) || 0,
240
+ lastSummaryCount: Number(raw.lastSummaryCount) || 0,
241
+ totalCount: Number(raw.totalCount) || 0,
242
+ };
243
+ } catch { return null; }
244
+ }
245
+
246
+ function writeSummaryState(sessionId: string, state: SummaryState): void {
247
+ try {
248
+ mkdirSync(SUMMARY_STATE_DIR, { recursive: true });
249
+ writeFileSync(summaryStatePath(sessionId), JSON.stringify(state));
250
+ } catch { /* non-fatal */ }
251
+ }
252
+
253
+ function bumpCounter(sessionId: string): SummaryState {
254
+ const cur = readSummaryState(sessionId) ?? { lastSummaryAt: 0, lastSummaryCount: 0, totalCount: 0 };
255
+ cur.totalCount += 1;
256
+ writeSummaryState(sessionId, cur);
257
+ return cur;
258
+ }
259
+
260
+ function shouldTriggerNow(state: SummaryState, cfg: SummaryConfig): boolean {
261
+ const msgsSince = state.totalCount - state.lastSummaryCount;
262
+ // First-chat trigger: index a fresh session quickly (10 events) instead of
263
+ // waiting until N=50. Mirrors summary-state.ts in CC/codex.
264
+ if (state.lastSummaryCount === 0 && state.totalCount >= FIRST_SUMMARY_AT) return true;
265
+ if (msgsSince >= cfg.everyNMessages) return true;
266
+ if (msgsSince > 0 && state.lastSummaryAt > 0
267
+ && Date.now() - state.lastSummaryAt >= cfg.everyHours * 3600 * 1000) return true;
268
+ return false;
269
+ }
270
+
271
+ function tryAcquireSummaryLock(sessionId: string): boolean {
272
+ try {
273
+ mkdirSync(SUMMARY_STATE_DIR, { recursive: true });
274
+ const fd = openSync(summaryLockPath(sessionId),
275
+ fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY);
276
+ closeSync(fd);
277
+ return true;
278
+ } catch { return false; }
279
+ }
280
+
281
+ function findPiBin(): string {
282
+ try {
283
+ const out = execSync("which pi 2>/dev/null", { encoding: "utf-8" }).trim();
284
+ if (out) return out;
285
+ } catch { /* fall through */ }
286
+ return "pi";
287
+ }
288
+
289
+ // Same template the CC/codex spawn-wiki-worker.ts ships. Inlined here
290
+ // because the pi extension is raw .ts and can't import it.
291
+ const WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge — entities, decisions, relationships, and facts — into a structured, searchable wiki entry.
292
+
293
+ SESSION JSONL path: __JSONL__
294
+ SUMMARY FILE to write: __SUMMARY__
295
+ SESSION ID: __SESSION_ID__
296
+ PROJECT: __PROJECT__
297
+ PREVIOUS JSONL OFFSET (lines already processed): __PREV_OFFSET__
298
+ CURRENT JSONL LINES: __JSONL_LINES__
299
+
300
+ Steps:
301
+ 1. Read the session JSONL at the path above.
302
+ - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first,
303
+ then focus on lines AFTER the offset for new content. Merge new facts into the existing summary.
304
+ - If offset is 0, generate from scratch.
305
+
306
+ 2. Write the summary file at the path above with this EXACT format:
307
+
308
+ # Session __SESSION_ID__
309
+ - **Source**: __JSONL_SERVER_PATH__
310
+ - **Started**: <extract from JSONL>
311
+ - **Ended**: <now>
312
+ - **Project**: __PROJECT__
313
+ - **JSONL offset**: __JSONL_LINES__
314
+
315
+ ## What Happened
316
+ <2-3 dense sentences. What was the goal, what was accomplished, what's left.>
317
+
318
+ ## People
319
+ <For each person mentioned: name, role, what they did/said. Format: **Name** — role — action>
320
+
321
+ ## Entities
322
+ <Every named thing: repos, branches, files, APIs, tools, services, tables, features, bugs.
323
+ Format: **entity** (type) — what was done with it, its current state>
324
+
325
+ ## Decisions & Reasoning
326
+ <Every decision made and WHY.>
327
+
328
+ ## Key Facts
329
+ <Bullet list of atomic facts that could answer future questions.>
330
+
331
+ ## Files Modified
332
+ <bullet list: path (new/modified/deleted) — what changed>
333
+
334
+ ## Open Questions / TODO
335
+ <Anything unresolved, blocked, or explicitly deferred>
336
+
337
+ IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact.
338
+ PRIVACY: Never include absolute filesystem paths in the summary.
339
+ LENGTH LIMIT: Keep the total summary under 4000 characters.`;
340
+
341
+ function spawnWikiWorker(
342
+ creds: Creds,
343
+ sessionId: string,
344
+ cwd: string,
345
+ reason: "periodic" | "final",
346
+ ): void {
347
+ if (!existsSync(PI_WIKI_WORKER_PATH)) {
348
+ logHm(`spawnWikiWorker(${reason}): no worker at ${PI_WIKI_WORKER_PATH} — install via 'hivemind pi install' or rebuild`);
349
+ return;
350
+ }
351
+ // Periodic: only one in-flight; lock prevents races between events.
352
+ // Final: also takes the lock — if a periodic was mid-flight at session_shutdown,
353
+ // skip the final to avoid two concurrent workers writing back to the same row.
354
+ if (!tryAcquireSummaryLock(sessionId)) {
355
+ logHm(`spawnWikiWorker(${reason}): lock held — skipping (a worker is already running)`);
356
+ return;
357
+ }
358
+ // tmp dir owned by the worker; it removes it on completion.
359
+ const tmpDir = join(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`);
360
+ try { mkdirSync(tmpDir, { recursive: true }); } catch { /* ignore */ }
361
+ const configPath = join(tmpDir, "config.json");
362
+ const project = (cwd ?? "").split("/").pop() || "unknown";
363
+ const config = {
364
+ apiUrl: creds.apiUrl,
365
+ token: creds.token,
366
+ orgId: creds.orgId,
367
+ workspaceId: creds.workspaceId,
368
+ memoryTable: MEMORY_TABLE,
369
+ sessionsTable: SESSIONS_TABLE,
370
+ sessionId,
371
+ userName: creds.userName,
372
+ project,
373
+ tmpDir,
374
+ piBin: findPiBin(),
375
+ piProvider: process.env.HIVEMIND_PI_PROVIDER ?? "google",
376
+ piModel: process.env.HIVEMIND_PI_MODEL ?? "gemini-2.5-flash",
377
+ wikiLog: join(homedir(), ".deeplake", "hivemind-pi.log"),
378
+ hooksDir: join(homedir(), ".pi", "agent", "hivemind"),
379
+ promptTemplate: WIKI_PROMPT_TEMPLATE,
380
+ };
381
+ try { writeFileSync(configPath, JSON.stringify(config)); }
382
+ catch (e: any) { logHm(`spawnWikiWorker(${reason}): writeFileSync failed: ${e?.message ?? e}`); return; }
383
+ logHm(`spawnWikiWorker(${reason}): spawning ${PI_WIKI_WORKER_PATH} session=${sessionId} provider=${config.piProvider} model=${config.piModel}`);
384
+ try {
385
+ spawn(process.execPath, [PI_WIKI_WORKER_PATH, configPath], {
386
+ detached: true,
387
+ stdio: "ignore",
388
+ env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" },
389
+ }).unref();
390
+ } catch (e: any) {
391
+ logHm(`spawnWikiWorker(${reason}): spawn failed: ${e?.message ?? e}`);
392
+ }
393
+ }
394
+
395
+ function maybeTriggerPeriodicSummary(creds: Creds, sessionId: string, cwd: string): void {
396
+ if (process.env.HIVEMIND_CAPTURE === "false") return;
397
+ const state = bumpCounter(sessionId);
398
+ const cfg = loadSummaryConfig();
399
+ if (!shouldTriggerNow(state, cfg)) return;
400
+ logHm(`periodic threshold hit (total=${state.totalCount}, since=${state.totalCount - state.lastSummaryCount}, N=${cfg.everyNMessages}, hours=${cfg.everyHours})`);
401
+ spawnWikiWorker(creds, sessionId, cwd, "periodic");
402
+ }
403
+
404
+ async function embed(text: string): Promise<number[] | null> {
405
+ if (process.env.HIVEMIND_EMBEDDINGS === "false") {
406
+ logHm(`embed: skipped (HIVEMIND_EMBEDDINGS=false)`);
407
+ return null;
408
+ }
409
+ if (!text || text.length === 0) {
410
+ logHm(`embed: skipped (empty text)`);
411
+ return null;
412
+ }
413
+ // 1) socket already up (another agent or us in a previous turn) → fast path
414
+ let v = await tryEmbedOverSocket(text, "document");
415
+ if (v !== null) {
416
+ logHm(`embed: ok via existing socket (dims=${v.length})`);
417
+ return v;
418
+ }
419
+ // 2) no daemon binary deposited → fallback NULL
420
+ if (!existsSync(EMBED_DAEMON_ENTRY)) {
421
+ logHm(`embed: no daemon at ${EMBED_DAEMON_ENTRY} — run 'hivemind embeddings install'`);
422
+ return null;
423
+ }
424
+ // 3) spawn the canonical daemon detached; daemon's own pidfile lock guards
425
+ // against double-spawn if multiple pi turns race.
426
+ logHm(`embed: spawning daemon at ${EMBED_DAEMON_ENTRY}`);
427
+ try {
428
+ spawn(process.execPath, [EMBED_DAEMON_ENTRY], { detached: true, stdio: "ignore" }).unref();
429
+ } catch (e: any) {
430
+ logHm(`embed: spawn failed: ${e?.message ?? e}`);
431
+ return null;
432
+ }
433
+ // 4) poll for the socket up to ~5s, then retry the embed once
434
+ for (let i = 0; i < 25; i++) {
435
+ await new Promise(r => setTimeout(r, 200));
436
+ if (existsSync(EMBED_SOCKET_PATH)) {
437
+ v = await tryEmbedOverSocket(text, "document");
438
+ if (v !== null) {
439
+ logHm(`embed: ok after spawn (dims=${v.length}, polls=${i + 1})`);
440
+ return v;
441
+ }
442
+ }
443
+ }
444
+ logHm(`embed: timed out after spawn (5s)`);
445
+ return null;
446
+ }
447
+
448
+ function embedSqlLiteral(emb: number[] | null): string {
449
+ if (!emb || emb.length === 0) return "NULL";
450
+ // FLOAT4[] literal. Numbers serialize without quotes; emb is a plain
451
+ // number[] from the daemon so JSON-style join is safe.
452
+ return `ARRAY[${emb.join(",")}]::FLOAT4[]`;
453
+ }
454
+
455
+ // ---------- session-row writer -------------------------------------------------
456
+
457
+ function buildSessionPath(creds: Creds, sessionId: string): string {
458
+ const filename = `${creds.userName}_${creds.orgName ?? creds.orgId}_${creds.workspaceId}_${sessionId}.jsonl`;
459
+ return `/sessions/${creds.userName}/${filename}`;
460
+ }
461
+
462
+ // Deeplake quirk: CREATE TABLE IF NOT EXISTS returns 200 before the table
463
+ // is queryable for INSERTs (the propagation can take 30+ seconds on a fresh
464
+ // table). Other agents don't hit this in steady state because they reuse
465
+ // existing tables; pi's e2e tests use fresh timestamped tables every run.
466
+ // Fix: tolerate "Table does not exist" specifically and retry with backoff.
467
+ const INSERT_RETRY_BACKOFFS_MS = [1000, 3000, 8000, 15000];
468
+
469
+ async function writeSessionRow(
470
+ creds: Creds,
471
+ sessionId: string,
472
+ agent: string,
473
+ event: string,
474
+ cwd: string,
475
+ entry: Record<string, unknown>,
476
+ ): Promise<void> {
477
+ const ts = new Date().toISOString();
478
+ const sessionPath = buildSessionPath(creds, sessionId);
479
+ const filename = sessionPath.split("/").pop() ?? "";
480
+ const projectName = (cwd ?? "").split("/").pop() || "unknown";
481
+ const line = JSON.stringify(entry);
482
+ const jsonForSql = sqlJsonb(line);
483
+ logHm(`writeSessionRow: event=${event} session=${sessionId} bytes=${line.length} table=${SESSIONS_TABLE}`);
484
+ const emb = await embed(line);
485
+ logHm(`writeSessionRow: embed=${emb ? `dims=${emb.length}` : "null"}`);
486
+ const insertSql =
487
+ `INSERT INTO "${SESSIONS_TABLE}" (id, path, filename, message, message_embedding, author, size_bytes, project, description, agent, creation_date, last_update_date) ` +
488
+ `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, ${embedSqlLiteral(emb)}, '${sqlStr(creds.userName)}', ` +
489
+ `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(event)}', '${agent}', '${ts}', '${ts}')`;
490
+ let lastErr: any = null;
491
+ for (let attempt = 0; attempt <= INSERT_RETRY_BACKOFFS_MS.length; attempt++) {
492
+ try {
493
+ await dlQuery(creds, insertSql);
494
+ logHm(`writeSessionRow: INSERT ok (event=${event}, attempt=${attempt + 1})`);
495
+ return;
496
+ } catch (e: any) {
497
+ lastErr = e;
498
+ const msg = e?.message ?? String(e);
499
+ const isPropagationDelay = /table does not exist|relation .* does not exist/i.test(msg);
500
+ if (!isPropagationDelay || attempt === INSERT_RETRY_BACKOFFS_MS.length) {
501
+ logHm(`writeSessionRow: INSERT FAILED (event=${event}, attempt=${attempt + 1}): ${msg}`);
502
+ throw e;
503
+ }
504
+ const delay = INSERT_RETRY_BACKOFFS_MS[attempt];
505
+ logHm(`writeSessionRow: table not yet visible, retrying in ${delay}ms (attempt=${attempt + 1}/${INSERT_RETRY_BACKOFFS_MS.length + 1})`);
506
+ await new Promise(r => setTimeout(r, delay));
507
+ }
508
+ }
509
+ throw lastErr;
510
+ }
511
+
512
+ // ---------- search primitive (used by hivemind_search) -------------------------
513
+
514
+ async function searchTables(creds: Creds, query: string, limit: number): Promise<string> {
515
+ // ILIKE pattern: escape both SQL quotes AND LIKE wildcards. ESCAPE '\\'
516
+ // tells the engine to treat backslash as the escape character so our
517
+ // \% / \_ are matched literally instead of as wildcards.
518
+ const pattern = sqlLike(query);
519
+ const memQuery = `SELECT path, summary::text AS content, 0 AS source_order FROM "${MEMORY_TABLE}" WHERE summary::text ILIKE '%${pattern}%' ESCAPE '\\' LIMIT ${limit}`;
520
+ const sessQuery = `SELECT path, message::text AS content, 1 AS source_order FROM "${SESSIONS_TABLE}" WHERE message::text ILIKE '%${pattern}%' ESCAPE '\\' LIMIT ${limit}`;
521
+ const sql = `SELECT path, content, source_order FROM ((${memQuery}) UNION ALL (${sessQuery})) AS combined ORDER BY path, source_order LIMIT ${limit}`;
522
+ const rows = await dlQuery(creds, sql);
523
+ if (rows.length === 0) return `No matches for "${query}".`;
524
+ return rows
525
+ .map((r: any) => `[${r.path}]\n${String(r.content ?? "").slice(0, 600)}`)
526
+ .join("\n\n---\n\n");
527
+ }
528
+
529
+ // pi tools must return AgentToolResult: { content: [{type:"text", text}], details }.
530
+ // Returning a raw string crashes pi's renderer (render-utils.js: result.content.filter).
531
+ function textResult(text: string) {
532
+ return { content: [{ type: "text" as const, text }], details: {} };
533
+ }
534
+
535
+ // ---------- main extension -----------------------------------------------------
536
+
537
+ const CONTEXT_PREAMBLE = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents in your org.
538
+
539
+ Three hivemind tools are registered:
540
+ hivemind_search { query, limit? } keyword search across summaries + sessions
541
+ hivemind_read { path } read full content at a memory path
542
+ hivemind_index { prefix?, limit? } list summary entries
543
+
544
+ Prefer these tools — one call returns ranked hits across all summaries and sessions in a single SQL query. Different paths under /summaries/<username>/ are different users; do NOT merge or alias them. Fall back to grep on ~/.deeplake/memory/ only if tools are unavailable.`;
545
+
546
+ export default function hivemindExtension(pi: ExtensionAPI): void {
547
+ const captureEnabled = process.env.HIVEMIND_CAPTURE !== "false";
548
+
549
+ // --- Tools (read path) -------------------------------------------------------
550
+
551
+ pi.registerTool({
552
+ name: "hivemind_search",
553
+ description: "Search Hivemind shared memory (summaries + raw sessions) by keyword. Use this first when the user asks about prior work or context that may exist in Hivemind. Different paths under /summaries/<username>/ are different users — do NOT merge them.",
554
+ parameters: {
555
+ type: "object",
556
+ properties: {
557
+ query: { type: "string", description: "Keyword or substring to search for." },
558
+ limit: { type: "number", description: "Max hits (default 10)." },
559
+ },
560
+ required: ["query"],
561
+ },
562
+ async execute(_toolCallId: string, params: { query: string; limit?: number }) {
563
+ const creds = loadCreds();
564
+ if (!creds) return textResult("Hivemind: not authenticated. Run `hivemind login` in a terminal.");
565
+ try {
566
+ return textResult(await searchTables(creds, params.query, params.limit ?? 10));
567
+ } catch (err: any) {
568
+ return textResult(`Hivemind search failed: ${err.message}`);
569
+ }
570
+ },
571
+ });
572
+
573
+ pi.registerTool({
574
+ name: "hivemind_read",
575
+ description: "Read the full content at a Hivemind memory path (e.g. /summaries/alice/abc.md or /sessions/alice/...jsonl). Use after hivemind_search to drill into a hit.",
576
+ parameters: {
577
+ type: "object",
578
+ properties: { path: { type: "string", description: "Absolute Hivemind memory path." } },
579
+ required: ["path"],
580
+ },
581
+ async execute(_toolCallId: string, params: { path: string }) {
582
+ const creds = loadCreds();
583
+ if (!creds) return textResult("Hivemind: not authenticated.");
584
+ const path = params.path;
585
+ const isSession = path.startsWith("/sessions/");
586
+ const table = isSession ? SESSIONS_TABLE : MEMORY_TABLE;
587
+ const col = isSession ? "message::text" : "summary::text";
588
+ const sql = `SELECT path, ${col} AS content FROM "${table}" WHERE path = '${sqlStr(path)}' LIMIT 200`;
589
+ try {
590
+ const rows = await dlQuery(creds, sql);
591
+ if (rows.length === 0) return textResult(`No content at ${path}.`);
592
+ return textResult(rows.map((r: any) => String(r.content ?? "")).join("\n"));
593
+ } catch (err: any) {
594
+ return textResult(`Hivemind read failed: ${err.message}`);
595
+ }
596
+ },
597
+ });
598
+
599
+ pi.registerTool({
600
+ name: "hivemind_index",
601
+ description: "List Hivemind summary entries (one row per session). Use to see what's in shared memory.",
602
+ parameters: {
603
+ type: "object",
604
+ properties: {
605
+ prefix: { type: "string", description: "Path prefix, e.g. '/summaries/alice/'." },
606
+ limit: { type: "number", description: "Max rows (default 50)." },
607
+ },
608
+ },
609
+ async execute(_toolCallId: string, params: { prefix?: string; limit?: number }) {
610
+ const creds = loadCreds();
611
+ if (!creds) return textResult("Hivemind: not authenticated.");
612
+ const where = params.prefix
613
+ ? `WHERE path LIKE '${sqlLike(params.prefix)}%' ESCAPE '\\'`
614
+ : `WHERE path LIKE '/summaries/%'`;
615
+ const sql = `SELECT path, description, project, last_update_date FROM "${MEMORY_TABLE}" ${where} ORDER BY last_update_date DESC LIMIT ${params.limit ?? 50}`;
616
+ try {
617
+ const rows = await dlQuery(creds, sql);
618
+ if (rows.length === 0) return textResult("No summaries.");
619
+ return textResult(rows
620
+ .map((r: any) => `${r.path}\t${r.last_update_date}\t${r.project ?? ""}\t${r.description ?? ""}`)
621
+ .join("\n"));
622
+ } catch (err: any) {
623
+ return textResult(`Hivemind index failed: ${err.message}`);
624
+ }
625
+ },
626
+ });
627
+
628
+ // --- Lifecycle hooks (capture path) -----------------------------------------
629
+ //
630
+ // Event shapes per pi-coding-agent/dist/core/extensions/types.d.ts:
631
+ // - SessionStartEvent: { type, reason, previousSessionFile? }
632
+ // - InputEvent: { type, text, images?, source }
633
+ // - ToolResultEvent: { type, toolCallId, toolName, input, content, isError, details }
634
+ // - MessageEndEvent: { type, message: AgentMessage }
635
+ // Every handler receives (event, ctx). ctx.sessionManager.getSessionId() and
636
+ // ctx.cwd are the canonical sources for session id + cwd — the events
637
+ // themselves don't carry them.
638
+
639
+ pi.on("session_start", async (_event: any, _ctx: any) => {
640
+ logHm(`session_start: fired (capture=${captureEnabled}, embed=${process.env.HIVEMIND_EMBEDDINGS !== "false"}, table=${SESSIONS_TABLE})`);
641
+ const creds = loadCreds();
642
+ if (!creds) {
643
+ logHm(`session_start: no credentials at ~/.deeplake/credentials.json — capture disabled this session`);
644
+ } else {
645
+ logHm(`session_start: creds org=${creds.orgName ?? creds.orgId} ws=${creds.workspaceId}`);
646
+ }
647
+ if (creds && captureEnabled) {
648
+ // Other agents' session-start hooks create the memory + sessions tables
649
+ // via DeeplakeApi.ensureTable / ensureSessionsTable. The pi extension is
650
+ // standalone (no shared lib import to keep it raw-.ts), so we issue the
651
+ // CREATE TABLE IF NOT EXISTS directly. Schema matches the canonical one
652
+ // in src/deeplake-api.ts so all agents read/write the same shape.
653
+ const memCreate = `CREATE TABLE IF NOT EXISTS "${MEMORY_TABLE}" (` +
654
+ `id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', ` +
655
+ `filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', ` +
656
+ `summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', ` +
657
+ `mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, ` +
658
+ `project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', ` +
659
+ `agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', ` +
660
+ `last_update_date TEXT NOT NULL DEFAULT ''` +
661
+ `) USING deeplake`;
662
+ const sessCreate = `CREATE TABLE IF NOT EXISTS "${SESSIONS_TABLE}" (` +
663
+ `id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', ` +
664
+ `filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], ` +
665
+ `author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', ` +
666
+ `size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', ` +
667
+ `description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', ` +
668
+ `creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT ''` +
669
+ `) USING deeplake`;
670
+ try { await dlQuery(creds, memCreate); logHm(`session_start: memory CREATE TABLE ok (${MEMORY_TABLE})`); }
671
+ catch (e: any) { logHm(`session_start: memory CREATE failed: ${e?.message ?? e}`); }
672
+ try { await dlQuery(creds, sessCreate); logHm(`session_start: sessions CREATE TABLE ok (${SESSIONS_TABLE})`); }
673
+ catch (e: any) { logHm(`session_start: sessions CREATE failed: ${e?.message ?? e}`); }
674
+ // Proactively poll until the sessions table is queryable. CREATE TABLE
675
+ // returns 200 before propagation completes on Deeplake; the first INSERT
676
+ // can otherwise fail with "Table does not exist" for ~30s. Polling here
677
+ // amortises the delay before any event fires.
678
+ const probeSql = `SELECT 1 FROM "${SESSIONS_TABLE}" LIMIT 1`;
679
+ const start = Date.now();
680
+ let visible = false;
681
+ for (let i = 0; i < 30 && !visible; i++) {
682
+ try {
683
+ await dlQuery(creds, probeSql);
684
+ visible = true;
685
+ } catch (e: any) {
686
+ const msg = e?.message ?? String(e);
687
+ if (!/table does not exist|relation .* does not exist/i.test(msg)) {
688
+ logHm(`session_start: probe failed (non-propagation): ${msg}`);
689
+ break;
690
+ }
691
+ await new Promise(r => setTimeout(r, 1000));
692
+ }
693
+ }
694
+ logHm(`session_start: sessions table visible=${visible} (probe took ${Date.now() - start}ms)`);
695
+ }
696
+ const additional = creds
697
+ ? `${CONTEXT_PREAMBLE}\nLogged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId}).`
698
+ : `${CONTEXT_PREAMBLE}\nNot logged in to Deeplake. Run \`hivemind login\` to authenticate.`;
699
+ return { additionalContext: additional };
700
+ });
701
+
702
+ pi.on("input", async (event: any, ctx: any) => {
703
+ logHm(`input: fired source=${event?.source ?? "?"}`);
704
+ if (!captureEnabled) { logHm(`input: capture disabled, skipping`); return; }
705
+ if (event.source === "extension") { logHm(`input: extension-injected, skipping`); return; }
706
+ const creds = loadCreds();
707
+ if (!creds) { logHm(`input: no creds, skipping`); return; }
708
+ const text = typeof event.text === "string" ? event.text : "";
709
+ if (!text) { logHm(`input: empty text, skipping`); return; }
710
+ const sessionId = ctx?.sessionManager?.getSessionId?.() ?? `pi-${Date.now()}`;
711
+ const cwd = ctx?.cwd ?? ctx?.sessionManager?.getCwd?.() ?? process.cwd();
712
+ try {
713
+ await writeSessionRow(creds, sessionId, "pi", "input", cwd, {
714
+ id: crypto.randomUUID(),
715
+ type: "user_message",
716
+ session_id: sessionId,
717
+ content: text,
718
+ timestamp: new Date().toISOString(),
719
+ });
720
+ } catch (e: any) {
721
+ logHm(`input: writeSessionRow swallowed: ${e?.message ?? e}`);
722
+ }
723
+ maybeTriggerPeriodicSummary(creds, sessionId, cwd);
724
+ });
725
+
726
+ pi.on("tool_result", async (event: any, ctx: any) => {
727
+ logHm(`tool_result: fired tool=${event?.toolName ?? "?"} isError=${event?.isError === true}`);
728
+ if (!captureEnabled) { logHm(`tool_result: capture disabled, skipping`); return; }
729
+ const creds = loadCreds();
730
+ if (!creds) { logHm(`tool_result: no creds, skipping`); return; }
731
+ const sessionId = ctx?.sessionManager?.getSessionId?.() ?? `pi-${Date.now()}`;
732
+ const cwd = ctx?.cwd ?? ctx?.sessionManager?.getCwd?.() ?? process.cwd();
733
+ // event.content is (TextContent | ImageContent)[]; extract text blocks.
734
+ const contentBlocks: any[] = Array.isArray(event.content) ? event.content : [];
735
+ const responseText = contentBlocks
736
+ .filter((b: any) => b?.type === "text" && typeof b.text === "string")
737
+ .map((b: any) => b.text)
738
+ .join("\n");
739
+ try {
740
+ await writeSessionRow(creds, sessionId, "pi", "tool_result", cwd, {
741
+ id: crypto.randomUUID(),
742
+ type: "tool_call",
743
+ session_id: sessionId,
744
+ tool_call_id: event.toolCallId ?? null,
745
+ tool_name: event.toolName ?? "unknown",
746
+ tool_input: JSON.stringify(event.input ?? {}),
747
+ tool_response: responseText || JSON.stringify(contentBlocks),
748
+ is_error: event.isError === true,
749
+ timestamp: new Date().toISOString(),
750
+ });
751
+ } catch (e: any) {
752
+ logHm(`tool_result: writeSessionRow swallowed: ${e?.message ?? e}`);
753
+ }
754
+ maybeTriggerPeriodicSummary(creds, sessionId, cwd);
755
+ });
756
+
757
+ pi.on("message_end", async (event: any, ctx: any) => {
758
+ logHm(`message_end: fired role=${event?.message?.role ?? "?"}`);
759
+ if (!captureEnabled) { logHm(`message_end: capture disabled, skipping`); return; }
760
+ const creds = loadCreds();
761
+ if (!creds) { logHm(`message_end: no creds, skipping`); return; }
762
+ const message = event.message ?? null;
763
+ // AgentMessage is UserMessage | AssistantMessage | ToolResultMessage.
764
+ // user is captured via `input`; toolResult via `tool_result`. Only assistant here.
765
+ if (!message || message.role !== "assistant") {
766
+ logHm(`message_end: skipping (role=${message?.role ?? "null"} — only assistant rows are written here)`);
767
+ return;
768
+ }
769
+ // AssistantMessage.content is (TextContent | ThinkingContent | ToolCall)[].
770
+ const blocks: any[] = Array.isArray(message.content) ? message.content : [];
771
+ const text = blocks
772
+ .filter((b: any) => b?.type === "text" && typeof b.text === "string")
773
+ .map((b: any) => b.text)
774
+ .join("\n");
775
+ if (!text) { logHm(`message_end: assistant message had no text blocks, skipping`); return; }
776
+ const sessionId = ctx?.sessionManager?.getSessionId?.() ?? `pi-${Date.now()}`;
777
+ const cwd = ctx?.cwd ?? ctx?.sessionManager?.getCwd?.() ?? process.cwd();
778
+ try {
779
+ await writeSessionRow(creds, sessionId, "pi", "message_end", cwd, {
780
+ id: crypto.randomUUID(),
781
+ type: "assistant_message",
782
+ session_id: sessionId,
783
+ content: text,
784
+ timestamp: new Date().toISOString(),
785
+ });
786
+ } catch (e: any) {
787
+ logHm(`message_end: writeSessionRow swallowed: ${e?.message ?? e}`);
788
+ }
789
+ maybeTriggerPeriodicSummary(creds, sessionId, cwd);
790
+ });
791
+
792
+ pi.on("session_shutdown", async (_event: any, ctx: any) => {
793
+ logHm(`session_shutdown: fired`);
794
+ if (process.env.HIVEMIND_CAPTURE === "false") return;
795
+ const creds = loadCreds();
796
+ if (!creds) { logHm(`session_shutdown: no creds, skipping final summary`); return; }
797
+ const sessionId = ctx?.sessionManager?.getSessionId?.() ?? null;
798
+ if (!sessionId) { logHm(`session_shutdown: no sessionId, skipping final summary`); return; }
799
+ const cwd = ctx?.cwd ?? ctx?.sessionManager?.getCwd?.() ?? process.cwd();
800
+ // Always spawn for "final" — but the lock check inside spawnWikiWorker
801
+ // skips if a periodic worker is mid-flight. Non-fatal either way.
802
+ spawnWikiWorker(creds, sessionId, cwd, "final");
803
+ });
804
+
805
+ // Module-load breadcrumb so we know the extension's default export ran at all.
806
+ logHm(`extension loaded (table=${SESSIONS_TABLE}, mem=${MEMORY_TABLE})`);
807
+ }