@deeplake/hivemind 0.6.48 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +244 -20
  4. package/bundle/cli.js +1369 -112
  5. package/codex/bundle/capture.js +546 -96
  6. package/codex/bundle/commands/auth-login.js +290 -81
  7. package/codex/bundle/embeddings/embed-daemon.js +243 -0
  8. package/codex/bundle/pre-tool-use.js +666 -111
  9. package/codex/bundle/session-start-setup.js +231 -64
  10. package/codex/bundle/session-start.js +52 -13
  11. package/codex/bundle/shell/deeplake-shell.js +716 -119
  12. package/codex/bundle/skilify-worker.js +907 -0
  13. package/codex/bundle/stop.js +819 -79
  14. package/codex/bundle/wiki-worker.js +312 -11
  15. package/cursor/bundle/capture.js +1116 -64
  16. package/cursor/bundle/commands/auth-login.js +290 -81
  17. package/cursor/bundle/embeddings/embed-daemon.js +243 -0
  18. package/cursor/bundle/pre-tool-use.js +598 -77
  19. package/cursor/bundle/session-end.js +520 -2
  20. package/cursor/bundle/session-start.js +257 -65
  21. package/cursor/bundle/shell/deeplake-shell.js +716 -119
  22. package/cursor/bundle/skilify-worker.js +907 -0
  23. package/cursor/bundle/wiki-worker.js +571 -0
  24. package/hermes/bundle/capture.js +1119 -65
  25. package/hermes/bundle/commands/auth-login.js +290 -81
  26. package/hermes/bundle/embeddings/embed-daemon.js +243 -0
  27. package/hermes/bundle/pre-tool-use.js +597 -76
  28. package/hermes/bundle/session-end.js +522 -1
  29. package/hermes/bundle/session-start.js +260 -65
  30. package/hermes/bundle/shell/deeplake-shell.js +716 -119
  31. package/hermes/bundle/skilify-worker.js +907 -0
  32. package/hermes/bundle/wiki-worker.js +572 -0
  33. package/mcp/bundle/server.js +290 -75
  34. package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
  35. package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
  36. package/openclaw/dist/chunks/config-ZLH6JFJS.js +34 -0
  37. package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
  38. package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
  39. package/openclaw/dist/index.js +929 -710
  40. package/openclaw/dist/skilify-worker.js +907 -0
  41. package/openclaw/openclaw.plugin.json +1 -1
  42. package/openclaw/package.json +1 -1
  43. package/openclaw/skills/SKILL.md +19 -0
  44. package/package.json +7 -1
  45. package/pi/extension-source/hivemind.ts +603 -22
@@ -1,10 +1,62 @@
1
1
  #!/usr/bin/env node
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __esm = (fn, res) => function __init() {
5
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
6
+ };
7
+ var __export = (target, all) => {
8
+ for (var name in all)
9
+ __defProp(target, name, { get: all[name], enumerable: true });
10
+ };
11
+
12
+ // dist/src/index-marker-store.js
13
+ var index_marker_store_exports = {};
14
+ __export(index_marker_store_exports, {
15
+ buildIndexMarkerPath: () => buildIndexMarkerPath,
16
+ getIndexMarkerDir: () => getIndexMarkerDir,
17
+ hasFreshIndexMarker: () => hasFreshIndexMarker,
18
+ writeIndexMarker: () => writeIndexMarker
19
+ });
20
+ import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
21
+ import { join as join3 } from "node:path";
22
+ import { tmpdir } from "node:os";
23
+ function getIndexMarkerDir() {
24
+ return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
25
+ }
26
+ function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
27
+ const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
28
+ return join3(getIndexMarkerDir(), `${markerKey}.json`);
29
+ }
30
+ function hasFreshIndexMarker(markerPath) {
31
+ if (!existsSync2(markerPath))
32
+ return false;
33
+ try {
34
+ const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
35
+ const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
36
+ if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
37
+ return false;
38
+ return true;
39
+ } catch {
40
+ return false;
41
+ }
42
+ }
43
+ function writeIndexMarker(markerPath) {
44
+ mkdirSync(getIndexMarkerDir(), { recursive: true });
45
+ writeFileSync(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
46
+ }
47
+ var INDEX_MARKER_TTL_MS;
48
+ var init_index_marker_store = __esm({
49
+ "dist/src/index-marker-store.js"() {
50
+ "use strict";
51
+ INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
52
+ }
53
+ });
2
54
 
3
55
  // dist/src/hooks/codex/pre-tool-use.js
4
56
  import { execFileSync } from "node:child_process";
5
- import { existsSync as existsSync3 } from "node:fs";
6
- import { join as join6, dirname } from "node:path";
7
- import { fileURLToPath as fileURLToPath2 } from "node:url";
57
+ import { existsSync as existsSync4 } from "node:fs";
58
+ import { join as join9, dirname as dirname2 } from "node:path";
59
+ import { fileURLToPath as fileURLToPath3 } from "node:url";
8
60
 
9
61
  // dist/src/utils/stdin.js
10
62
  function readStdin() {
@@ -51,15 +103,13 @@ function loadConfig() {
51
103
  apiUrl: process.env.HIVEMIND_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai",
52
104
  tableName: process.env.HIVEMIND_TABLE ?? "memory",
53
105
  sessionsTableName: process.env.HIVEMIND_SESSIONS_TABLE ?? "sessions",
106
+ skillsTableName: process.env.HIVEMIND_SKILLS_TABLE ?? "skills",
54
107
  memoryPath: process.env.HIVEMIND_MEMORY_PATH ?? join(home, ".deeplake", "memory")
55
108
  };
56
109
  }
57
110
 
58
111
  // dist/src/deeplake-api.js
59
112
  import { randomUUID } from "node:crypto";
60
- import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
61
- import { join as join3 } from "node:path";
62
- import { tmpdir } from "node:os";
63
113
 
64
114
  // dist/src/utils/debug.js
65
115
  import { appendFileSync } from "node:fs";
@@ -81,8 +131,33 @@ function sqlStr(value) {
81
131
  function sqlLike(value) {
82
132
  return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_");
83
133
  }
134
+ function sqlIdent(name) {
135
+ if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) {
136
+ throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`);
137
+ }
138
+ return name;
139
+ }
140
+
141
+ // dist/src/embeddings/columns.js
142
+ var SUMMARY_EMBEDDING_COL = "summary_embedding";
143
+ var MESSAGE_EMBEDDING_COL = "message_embedding";
144
+
145
+ // dist/src/utils/client-header.js
146
+ var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
147
+ function deeplakeClientValue() {
148
+ return "hivemind";
149
+ }
150
+ function deeplakeClientHeader() {
151
+ return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
152
+ }
84
153
 
85
154
  // dist/src/deeplake-api.js
155
+ var indexMarkerStorePromise = null;
156
+ function getIndexMarkerStore() {
157
+ if (!indexMarkerStorePromise)
158
+ indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
159
+ return indexMarkerStorePromise;
160
+ }
86
161
  var log2 = (msg) => log("sdk", msg);
87
162
  function summarizeSql(sql, maxLen = 220) {
88
163
  const compact = sql.replace(/\s+/g, " ").trim();
@@ -102,7 +177,6 @@ var MAX_RETRIES = 3;
102
177
  var BASE_DELAY_MS = 500;
103
178
  var MAX_CONCURRENCY = 5;
104
179
  var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
105
- var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
106
180
  function sleep(ms) {
107
181
  return new Promise((resolve2) => setTimeout(resolve2, ms));
108
182
  }
@@ -122,9 +196,6 @@ function isTransientHtml403(text) {
122
196
  const body = text.toLowerCase();
123
197
  return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
124
198
  }
125
- function getIndexMarkerDir() {
126
- return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
127
- }
128
199
  var Semaphore = class {
129
200
  max;
130
201
  waiting = [];
@@ -193,7 +264,8 @@ var DeeplakeApi = class {
193
264
  headers: {
194
265
  Authorization: `Bearer ${this.token}`,
195
266
  "Content-Type": "application/json",
196
- "X-Activeloop-Org-Id": this.orgId
267
+ "X-Activeloop-Org-Id": this.orgId,
268
+ ...deeplakeClientHeader()
197
269
  },
198
270
  signal,
199
271
  body: JSON.stringify({ query: sql })
@@ -220,7 +292,8 @@ var DeeplakeApi = class {
220
292
  }
221
293
  const text = await resp.text().catch(() => "");
222
294
  const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
223
- if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
295
+ const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
296
+ if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
224
297
  const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
225
298
  log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
226
299
  await sleep(delay);
@@ -254,7 +327,7 @@ var DeeplakeApi = class {
254
327
  const lud = row.lastUpdateDate ?? ts;
255
328
  const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
256
329
  if (exists.length > 0) {
257
- let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
330
+ let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
258
331
  if (row.project !== void 0)
259
332
  setClauses += `, project = '${sqlStr(row.project)}'`;
260
333
  if (row.description !== void 0)
@@ -262,8 +335,8 @@ var DeeplakeApi = class {
262
335
  await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
263
336
  } else {
264
337
  const id = randomUUID();
265
- let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date";
266
- let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
338
+ let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
339
+ let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
267
340
  if (row.project !== void 0) {
268
341
  cols += ", project";
269
342
  vals += `, '${sqlStr(row.project)}'`;
@@ -288,48 +361,83 @@ var DeeplakeApi = class {
288
361
  buildLookupIndexName(table, suffix) {
289
362
  return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
290
363
  }
291
- getLookupIndexMarkerPath(table, suffix) {
292
- const markerKey = [
293
- this.workspaceId,
294
- this.orgId,
295
- table,
296
- suffix
297
- ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
298
- return join3(getIndexMarkerDir(), `${markerKey}.json`);
299
- }
300
- hasFreshLookupIndexMarker(table, suffix) {
301
- const markerPath = this.getLookupIndexMarkerPath(table, suffix);
302
- if (!existsSync2(markerPath))
303
- return false;
304
- try {
305
- const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
306
- const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
307
- if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
308
- return false;
309
- return true;
310
- } catch {
311
- return false;
312
- }
313
- }
314
- markLookupIndexReady(table, suffix) {
315
- mkdirSync(getIndexMarkerDir(), { recursive: true });
316
- writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
317
- }
318
364
  async ensureLookupIndex(table, suffix, columnsSql) {
319
- if (this.hasFreshLookupIndexMarker(table, suffix))
365
+ const markers = await getIndexMarkerStore();
366
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
367
+ if (markers.hasFreshIndexMarker(markerPath))
320
368
  return;
321
369
  const indexName = this.buildLookupIndexName(table, suffix);
322
370
  try {
323
371
  await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
324
- this.markLookupIndexReady(table, suffix);
372
+ markers.writeIndexMarker(markerPath);
325
373
  } catch (e) {
326
374
  if (isDuplicateIndexError(e)) {
327
- this.markLookupIndexReady(table, suffix);
375
+ markers.writeIndexMarker(markerPath);
328
376
  return;
329
377
  }
330
378
  log2(`index "${indexName}" skipped: ${e.message}`);
331
379
  }
332
380
  }
381
+ /**
382
+ * Ensure a vector column exists on the given table.
383
+ *
384
+ * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
385
+ * EXISTS …` on every SessionStart. On a long-running workspace that's
386
+ * already migrated, every call returns 500 "Column already exists" — noisy
387
+ * in the log and a wasted round-trip. Worse, the very first call after the
388
+ * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
389
+ * window (~30s) during which subsequent INSERTs fail; minimising the
390
+ * number of ALTER calls minimises exposure to that window.
391
+ *
392
+ * New flow:
393
+ * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
394
+ * return — zero network calls.
395
+ * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
396
+ * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
397
+ * bug. If the column is present → mark + return.
398
+ * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
399
+ * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
400
+ * "already exists" (race: another client added it between our SELECT
401
+ * and ALTER).
402
+ *
403
+ * Marker uses the same dir / TTL as ensureLookupIndex so both schema
404
+ * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
405
+ */
406
+ async ensureEmbeddingColumn(table, column) {
407
+ await this.ensureColumn(table, column, "FLOAT4[]");
408
+ }
409
+ /**
410
+ * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
411
+ * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
412
+ * column that was added to the schema after the table was originally
413
+ * created. Used today for `summary_embedding`, `message_embedding`, and
414
+ * the `agent` column (added 2026-04-11) — the latter has no fallback if
415
+ * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
416
+ * with `column "agent" does not exist`.
417
+ */
418
+ async ensureColumn(table, column, sqlType) {
419
+ const markers = await getIndexMarkerStore();
420
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
421
+ if (markers.hasFreshIndexMarker(markerPath))
422
+ return;
423
+ const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
424
+ const rows = await this.query(colCheck);
425
+ if (rows.length > 0) {
426
+ markers.writeIndexMarker(markerPath);
427
+ return;
428
+ }
429
+ try {
430
+ await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
431
+ } catch (e) {
432
+ const msg = e instanceof Error ? e.message : String(e);
433
+ if (!/already exists/i.test(msg))
434
+ throw e;
435
+ const recheck = await this.query(colCheck);
436
+ if (recheck.length === 0)
437
+ throw e;
438
+ }
439
+ markers.writeIndexMarker(markerPath);
440
+ }
333
441
  /** List all tables in the workspace (with retry). */
334
442
  async listTables(forceRefresh = false) {
335
443
  if (!forceRefresh && this._tablesCache)
@@ -345,7 +453,8 @@ var DeeplakeApi = class {
345
453
  const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
346
454
  headers: {
347
455
  Authorization: `Bearer ${this.token}`,
348
- "X-Activeloop-Org-Id": this.orgId
456
+ "X-Activeloop-Org-Id": this.orgId,
457
+ ...deeplakeClientHeader()
349
458
  }
350
459
  });
351
460
  if (resp.ok) {
@@ -370,29 +479,84 @@ var DeeplakeApi = class {
370
479
  }
371
480
  return { tables: [], cacheable: false };
372
481
  }
482
+ /**
483
+ * Run a `CREATE TABLE` with an extra outer retry budget. The base
484
+ * `query()` already retries 3 times on fetch errors (~3.5s total), but a
485
+ * failed CREATE is permanent corruption — every subsequent SELECT against
486
+ * the missing table fails. Wrapping in an outer loop with longer backoff
487
+ * (2s, 5s, then 10s) gives us ~17s of reach across transient network
488
+ * blips before giving up. Failures still propagate; getApi() resets its
489
+ * cache on init failure (openclaw plugin) so the next call retries the
490
+ * whole init flow.
491
+ */
492
+ async createTableWithRetry(sql, label) {
493
+ const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
494
+ let lastErr = null;
495
+ for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
496
+ try {
497
+ await this.query(sql);
498
+ return;
499
+ } catch (err) {
500
+ lastErr = err;
501
+ const msg = err instanceof Error ? err.message : String(err);
502
+ log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
503
+ if (attempt < OUTER_BACKOFFS_MS.length) {
504
+ await sleep(OUTER_BACKOFFS_MS[attempt]);
505
+ }
506
+ }
507
+ }
508
+ throw lastErr;
509
+ }
373
510
  /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
374
511
  async ensureTable(name) {
375
- const tbl = name ?? this.tableName;
512
+ const tbl = sqlIdent(name ?? this.tableName);
376
513
  const tables = await this.listTables();
377
514
  if (!tables.includes(tbl)) {
378
515
  log2(`table "${tbl}" not found, creating`);
379
- await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
516
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
380
517
  log2(`table "${tbl}" created`);
381
518
  if (!tables.includes(tbl))
382
519
  this._tablesCache = [...tables, tbl];
383
520
  }
521
+ await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
522
+ await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
384
523
  }
385
524
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
386
525
  async ensureSessionsTable(name) {
526
+ const safe = sqlIdent(name);
527
+ const tables = await this.listTables();
528
+ if (!tables.includes(safe)) {
529
+ log2(`table "${safe}" not found, creating`);
530
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
531
+ log2(`table "${safe}" created`);
532
+ if (!tables.includes(safe))
533
+ this._tablesCache = [...tables, safe];
534
+ }
535
+ await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
536
+ await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
537
+ await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
538
+ }
539
+ /**
540
+ * Create the skills table.
541
+ *
542
+ * One row per skill version. Workers INSERT a fresh row on every KEEP /
543
+ * MERGE rather than UPDATE-ing in place, so the full version history is
544
+ * recoverable. Uniqueness in the *current* state is by (project_key, name)
545
+ * — newer rows shadow older ones at read time (ORDER BY version DESC).
546
+ * This sidesteps the Deeplake UPDATE-coalescing quirk that bit the wiki
547
+ * worker.
548
+ */
549
+ async ensureSkillsTable(name) {
550
+ const safe = sqlIdent(name);
387
551
  const tables = await this.listTables();
388
- if (!tables.includes(name)) {
389
- log2(`table "${name}" not found, creating`);
390
- await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
391
- log2(`table "${name}" created`);
392
- if (!tables.includes(name))
393
- this._tablesCache = [...tables, name];
552
+ if (!tables.includes(safe)) {
553
+ log2(`table "${safe}" not found, creating`);
554
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
555
+ log2(`table "${safe}" created`);
556
+ if (!tables.includes(safe))
557
+ this._tablesCache = [...tables, safe];
394
558
  }
395
- await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
559
+ await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
396
560
  }
397
561
  };
398
562
 
@@ -560,24 +724,25 @@ function normalizeContent(path, raw) {
560
724
  return raw;
561
725
  }
562
726
  if (Array.isArray(obj.turns)) {
563
- const header = [];
564
- if (obj.date_time)
565
- header.push(`date: ${obj.date_time}`);
566
- if (obj.speakers) {
567
- const s = obj.speakers;
568
- const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", ");
569
- if (names)
570
- header.push(`speakers: ${names}`);
571
- }
727
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
572
728
  const lines = obj.turns.map((t) => {
573
729
  const sp = String(t?.speaker ?? t?.name ?? "?").trim();
574
730
  const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim();
575
731
  const tag = t?.dia_id ? `[${t.dia_id}] ` : "";
576
- return `${tag}${sp}: ${tx}`;
732
+ return `${dateHeader}${tag}${sp}: ${tx}`;
577
733
  });
578
- const out2 = [...header, ...lines].join("\n");
734
+ const out2 = lines.join("\n");
579
735
  return out2.trim() ? out2 : raw;
580
736
  }
737
+ if (obj.turn && typeof obj.turn === "object" && !Array.isArray(obj.turn)) {
738
+ const t = obj.turn;
739
+ const sp = String(t.speaker ?? t.name ?? "?").trim();
740
+ const tx = String(t.text ?? t.content ?? "").replace(/\s+/g, " ").trim();
741
+ const tag = t.dia_id ? `[${String(t.dia_id)}] ` : "";
742
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
743
+ const line = `${dateHeader}${tag}${sp}: ${tx}`;
744
+ return line.trim() ? line : raw;
745
+ }
581
746
  const stripRecalled = (t) => {
582
747
  const i = t.indexOf("<recalled-memories>");
583
748
  if (i === -1)
@@ -620,8 +785,38 @@ function buildPathCondition(targetPath) {
620
785
  return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
621
786
  }
622
787
  async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
623
- const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
788
+ const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, queryEmbedding, multiWordPatterns } = opts;
624
789
  const limit = opts.limit ?? 100;
790
+ if (queryEmbedding && queryEmbedding.length > 0) {
791
+ const vecLit = serializeFloat4Array(queryEmbedding);
792
+ const semanticLimit = Math.min(limit, Number(process.env.HIVEMIND_SEMANTIC_LIMIT ?? "20"));
793
+ const lexicalLimit = Math.min(limit, Number(process.env.HIVEMIND_HYBRID_LEXICAL_LIMIT ?? "20"));
794
+ const filterPatternsForLex = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
795
+ const memLexFilter = buildContentFilter("summary::text", likeOp, filterPatternsForLex);
796
+ const sessLexFilter = buildContentFilter("message::text", likeOp, filterPatternsForLex);
797
+ const memLexQuery = memLexFilter ? `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, 1.0 AS score FROM "${memoryTable}" WHERE 1=1${pathFilter}${memLexFilter} LIMIT ${lexicalLimit}` : null;
798
+ const sessLexQuery = sessLexFilter ? `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, 1.0 AS score FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessLexFilter} LIMIT ${lexicalLimit}` : null;
799
+ const memSemQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, (summary_embedding <#> ${vecLit}) AS score FROM "${memoryTable}" WHERE ARRAY_LENGTH(summary_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
800
+ const sessSemQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, (message_embedding <#> ${vecLit}) AS score FROM "${sessionsTable}" WHERE ARRAY_LENGTH(message_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
801
+ const parts = [memSemQuery, sessSemQuery];
802
+ if (memLexQuery)
803
+ parts.push(memLexQuery);
804
+ if (sessLexQuery)
805
+ parts.push(sessLexQuery);
806
+ const unionSql = parts.map((q) => `(${q})`).join(" UNION ALL ");
807
+ const outerLimit = semanticLimit + lexicalLimit;
808
+ const rows2 = await api.query(`SELECT path, content, source_order, creation_date, score FROM (` + unionSql + `) AS combined ORDER BY score DESC LIMIT ${outerLimit}`);
809
+ const seen = /* @__PURE__ */ new Set();
810
+ const unique = [];
811
+ for (const row of rows2) {
812
+ const p = String(row["path"]);
813
+ if (seen.has(p))
814
+ continue;
815
+ seen.add(p);
816
+ unique.push({ path: p, content: String(row["content"] ?? "") });
817
+ }
818
+ return unique;
819
+ }
625
820
  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
626
821
  const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
627
822
  const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
@@ -633,6 +828,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
633
828
  content: String(row["content"] ?? "")
634
829
  }));
635
830
  }
831
+ function serializeFloat4Array(vec) {
832
+ const parts = [];
833
+ for (const v of vec) {
834
+ if (!Number.isFinite(v))
835
+ return "NULL";
836
+ parts.push(String(v));
837
+ }
838
+ return `ARRAY[${parts.join(",")}]::float4[]`;
839
+ }
636
840
  function buildPathFilter(targetPath) {
637
841
  const condition = buildPathCondition(targetPath);
638
842
  return condition ? ` AND ${condition}` : "";
@@ -715,7 +919,7 @@ function buildGrepSearchOptions(params, targetPath) {
715
919
  return {
716
920
  pathFilter: buildPathFilter(targetPath),
717
921
  contentScanOnly: hasRegexMeta,
718
- likeOp: params.ignoreCase ? "ILIKE" : "LIKE",
922
+ likeOp: process.env.HIVEMIND_GREP_LIKE === "case-sensitive" ? "LIKE" : "ILIKE",
719
923
  escapedPattern: sqlLike(params.pattern),
720
924
  prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
721
925
  prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
@@ -770,11 +974,28 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) {
770
974
  }
771
975
  return output;
772
976
  }
773
- async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) {
774
- const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath));
977
+ async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, queryEmbedding) {
978
+ const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, {
979
+ ...buildGrepSearchOptions(params, targetPath),
980
+ queryEmbedding
981
+ });
775
982
  const seen = /* @__PURE__ */ new Set();
776
983
  const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true));
777
984
  const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) }));
985
+ if (queryEmbedding && queryEmbedding.length > 0) {
986
+ const emitAllLines = process.env.HIVEMIND_SEMANTIC_EMIT_ALL !== "false";
987
+ if (emitAllLines) {
988
+ const lines = [];
989
+ for (const r of normalized) {
990
+ for (const line of r.content.split("\n")) {
991
+ const trimmed = line.trim();
992
+ if (trimmed)
993
+ lines.push(`${r.path}:${line}`);
994
+ }
995
+ }
996
+ return lines;
997
+ }
998
+ }
778
999
  return refineGrepMatches(normalized, params);
779
1000
  }
780
1001
 
@@ -818,7 +1039,298 @@ function capOutputForClaude(output, options = {}) {
818
1039
  return keptLines.join("\n") + footer;
819
1040
  }
820
1041
 
1042
+ // dist/src/embeddings/client.js
1043
+ import { connect } from "node:net";
1044
+ import { spawn } from "node:child_process";
1045
+ import { openSync, closeSync, writeSync, unlinkSync, existsSync as existsSync3, readFileSync as readFileSync3 } from "node:fs";
1046
+ import { homedir as homedir3 } from "node:os";
1047
+ import { join as join4 } from "node:path";
1048
+
1049
+ // dist/src/embeddings/protocol.js
1050
+ var DEFAULT_SOCKET_DIR = "/tmp";
1051
+ var DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1e3;
1052
+ var DEFAULT_CLIENT_TIMEOUT_MS = 2e3;
1053
+ function socketPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
1054
+ return `${dir}/hivemind-embed-${uid}.sock`;
1055
+ }
1056
+ function pidPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
1057
+ return `${dir}/hivemind-embed-${uid}.pid`;
1058
+ }
1059
+
1060
+ // dist/src/embeddings/client.js
1061
+ var SHARED_DAEMON_PATH = join4(homedir3(), ".hivemind", "embed-deps", "embed-daemon.js");
1062
+ var log3 = (m) => log("embed-client", m);
1063
+ function getUid() {
1064
+ const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
1065
+ return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
1066
+ }
1067
+ var EmbedClient = class {
1068
+ socketPath;
1069
+ pidPath;
1070
+ timeoutMs;
1071
+ daemonEntry;
1072
+ autoSpawn;
1073
+ spawnWaitMs;
1074
+ nextId = 0;
1075
+ constructor(opts = {}) {
1076
+ const uid = getUid();
1077
+ const dir = opts.socketDir ?? "/tmp";
1078
+ this.socketPath = socketPathFor(uid, dir);
1079
+ this.pidPath = pidPathFor(uid, dir);
1080
+ this.timeoutMs = opts.timeoutMs ?? DEFAULT_CLIENT_TIMEOUT_MS;
1081
+ this.daemonEntry = opts.daemonEntry ?? process.env.HIVEMIND_EMBED_DAEMON ?? (existsSync3(SHARED_DAEMON_PATH) ? SHARED_DAEMON_PATH : void 0);
1082
+ this.autoSpawn = opts.autoSpawn ?? true;
1083
+ this.spawnWaitMs = opts.spawnWaitMs ?? 5e3;
1084
+ }
1085
+ /**
1086
+ * Returns an embedding vector, or null on timeout/failure. Hooks MUST treat
1087
+ * null as "skip embedding column" — never block the write path on us.
1088
+ *
1089
+ * Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
1090
+ * null AND kicks off a background spawn. The next call finds a ready daemon.
1091
+ */
1092
+ async embed(text, kind = "document") {
1093
+ let sock;
1094
+ try {
1095
+ sock = await this.connectOnce();
1096
+ } catch {
1097
+ if (this.autoSpawn)
1098
+ this.trySpawnDaemon();
1099
+ return null;
1100
+ }
1101
+ try {
1102
+ const id = String(++this.nextId);
1103
+ const req = { op: "embed", id, kind, text };
1104
+ const resp = await this.sendAndWait(sock, req);
1105
+ if (resp.error || !("embedding" in resp) || !resp.embedding) {
1106
+ log3(`embed err: ${resp.error ?? "no embedding"}`);
1107
+ return null;
1108
+ }
1109
+ return resp.embedding;
1110
+ } catch (e) {
1111
+ const err = e instanceof Error ? e.message : String(e);
1112
+ log3(`embed failed: ${err}`);
1113
+ return null;
1114
+ } finally {
1115
+ try {
1116
+ sock.end();
1117
+ } catch {
1118
+ }
1119
+ }
1120
+ }
1121
+ /**
1122
+ * Wait up to spawnWaitMs for the daemon to accept connections, spawning if
1123
+ * necessary. Meant for SessionStart / long-running batches — not the hot path.
1124
+ */
1125
+ async warmup() {
1126
+ try {
1127
+ const s = await this.connectOnce();
1128
+ s.end();
1129
+ return true;
1130
+ } catch {
1131
+ if (!this.autoSpawn)
1132
+ return false;
1133
+ this.trySpawnDaemon();
1134
+ try {
1135
+ const s = await this.waitForSocket();
1136
+ s.end();
1137
+ return true;
1138
+ } catch {
1139
+ return false;
1140
+ }
1141
+ }
1142
+ }
1143
+ connectOnce() {
1144
+ return new Promise((resolve2, reject) => {
1145
+ const sock = connect(this.socketPath);
1146
+ const to = setTimeout(() => {
1147
+ sock.destroy();
1148
+ reject(new Error("connect timeout"));
1149
+ }, this.timeoutMs);
1150
+ sock.once("connect", () => {
1151
+ clearTimeout(to);
1152
+ resolve2(sock);
1153
+ });
1154
+ sock.once("error", (e) => {
1155
+ clearTimeout(to);
1156
+ reject(e);
1157
+ });
1158
+ });
1159
+ }
1160
+ trySpawnDaemon() {
1161
+ let fd;
1162
+ try {
1163
+ fd = openSync(this.pidPath, "wx", 384);
1164
+ writeSync(fd, String(process.pid));
1165
+ } catch (e) {
1166
+ if (this.isPidFileStale()) {
1167
+ try {
1168
+ unlinkSync(this.pidPath);
1169
+ } catch {
1170
+ }
1171
+ try {
1172
+ fd = openSync(this.pidPath, "wx", 384);
1173
+ writeSync(fd, String(process.pid));
1174
+ } catch {
1175
+ return;
1176
+ }
1177
+ } else {
1178
+ return;
1179
+ }
1180
+ }
1181
+ if (!this.daemonEntry || !existsSync3(this.daemonEntry)) {
1182
+ log3(`daemonEntry not configured or missing: ${this.daemonEntry}`);
1183
+ try {
1184
+ closeSync(fd);
1185
+ unlinkSync(this.pidPath);
1186
+ } catch {
1187
+ }
1188
+ return;
1189
+ }
1190
+ try {
1191
+ const child = spawn(process.execPath, [this.daemonEntry], {
1192
+ detached: true,
1193
+ stdio: "ignore",
1194
+ env: process.env
1195
+ });
1196
+ child.unref();
1197
+ log3(`spawned daemon pid=${child.pid}`);
1198
+ } finally {
1199
+ closeSync(fd);
1200
+ }
1201
+ }
1202
+ isPidFileStale() {
1203
+ try {
1204
+ const raw = readFileSync3(this.pidPath, "utf-8").trim();
1205
+ const pid = Number(raw);
1206
+ if (!pid || Number.isNaN(pid))
1207
+ return true;
1208
+ try {
1209
+ process.kill(pid, 0);
1210
+ return false;
1211
+ } catch {
1212
+ return true;
1213
+ }
1214
+ } catch {
1215
+ return true;
1216
+ }
1217
+ }
1218
+ async waitForSocket() {
1219
+ const deadline = Date.now() + this.spawnWaitMs;
1220
+ let delay = 30;
1221
+ while (Date.now() < deadline) {
1222
+ await sleep2(delay);
1223
+ delay = Math.min(delay * 1.5, 300);
1224
+ if (!existsSync3(this.socketPath))
1225
+ continue;
1226
+ try {
1227
+ return await this.connectOnce();
1228
+ } catch {
1229
+ }
1230
+ }
1231
+ throw new Error("daemon did not become ready within spawnWaitMs");
1232
+ }
1233
+ sendAndWait(sock, req) {
1234
+ return new Promise((resolve2, reject) => {
1235
+ let buf = "";
1236
+ const to = setTimeout(() => {
1237
+ sock.destroy();
1238
+ reject(new Error("request timeout"));
1239
+ }, this.timeoutMs);
1240
+ sock.setEncoding("utf-8");
1241
+ sock.on("data", (chunk) => {
1242
+ buf += chunk;
1243
+ const nl = buf.indexOf("\n");
1244
+ if (nl === -1)
1245
+ return;
1246
+ const line = buf.slice(0, nl);
1247
+ clearTimeout(to);
1248
+ try {
1249
+ resolve2(JSON.parse(line));
1250
+ } catch (e) {
1251
+ reject(e);
1252
+ }
1253
+ });
1254
+ sock.on("error", (e) => {
1255
+ clearTimeout(to);
1256
+ reject(e);
1257
+ });
1258
+ sock.on("end", () => {
1259
+ clearTimeout(to);
1260
+ reject(new Error("connection closed without response"));
1261
+ });
1262
+ sock.write(JSON.stringify(req) + "\n");
1263
+ });
1264
+ }
1265
+ };
1266
+ function sleep2(ms) {
1267
+ return new Promise((r) => setTimeout(r, ms));
1268
+ }
1269
+
1270
+ // dist/src/embeddings/disable.js
1271
+ import { createRequire } from "node:module";
1272
+ import { homedir as homedir4 } from "node:os";
1273
+ import { join as join5 } from "node:path";
1274
+ import { pathToFileURL } from "node:url";
1275
+ var cachedStatus = null;
1276
+ function defaultResolveTransformers() {
1277
+ try {
1278
+ createRequire(import.meta.url).resolve("@huggingface/transformers");
1279
+ return;
1280
+ } catch {
1281
+ }
1282
+ const sharedDir = join5(homedir4(), ".hivemind", "embed-deps");
1283
+ createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
1284
+ }
1285
+ var _resolve = defaultResolveTransformers;
1286
+ function detectStatus() {
1287
+ if (process.env.HIVEMIND_EMBEDDINGS === "false")
1288
+ return "env-disabled";
1289
+ try {
1290
+ _resolve();
1291
+ return "enabled";
1292
+ } catch {
1293
+ return "no-transformers";
1294
+ }
1295
+ }
1296
+ function embeddingsStatus() {
1297
+ if (cachedStatus !== null)
1298
+ return cachedStatus;
1299
+ cachedStatus = detectStatus();
1300
+ return cachedStatus;
1301
+ }
1302
+ function embeddingsDisabled() {
1303
+ return embeddingsStatus() !== "enabled";
1304
+ }
1305
+
821
1306
  // dist/src/hooks/grep-direct.js
1307
+ import { fileURLToPath } from "node:url";
1308
+ import { dirname, join as join6 } from "node:path";
1309
+ var SEMANTIC_ENABLED = process.env.HIVEMIND_SEMANTIC_SEARCH !== "false" && !embeddingsDisabled();
1310
+ var SEMANTIC_TIMEOUT_MS = Number(process.env.HIVEMIND_SEMANTIC_EMBED_TIMEOUT_MS ?? "500");
1311
+ function resolveDaemonPath() {
1312
+ return join6(dirname(fileURLToPath(import.meta.url)), "..", "embeddings", "embed-daemon.js");
1313
+ }
1314
+ var sharedEmbedClient = null;
1315
+ function getEmbedClient() {
1316
+ if (!sharedEmbedClient) {
1317
+ sharedEmbedClient = new EmbedClient({
1318
+ daemonEntry: resolveDaemonPath(),
1319
+ timeoutMs: SEMANTIC_TIMEOUT_MS
1320
+ });
1321
+ }
1322
+ return sharedEmbedClient;
1323
+ }
1324
+ function patternIsSemanticFriendly(pattern, fixedString) {
1325
+ if (!pattern || pattern.length < 2)
1326
+ return false;
1327
+ if (fixedString)
1328
+ return true;
1329
+ const meta = pattern.match(/[|()\[\]{}+?^$\\]/g);
1330
+ if (!meta)
1331
+ return true;
1332
+ return meta.length <= 1;
1333
+ }
822
1334
  function splitFirstPipelineStage(cmd) {
823
1335
  const input = cmd.trim();
824
1336
  let quote = null;
@@ -1136,7 +1648,15 @@ async function handleGrepDirect(api, table, sessionsTable, params) {
1136
1648
  invertMatch: params.invertMatch,
1137
1649
  fixedString: params.fixedString
1138
1650
  };
1139
- const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath);
1651
+ let queryEmbedding = null;
1652
+ if (SEMANTIC_ENABLED && patternIsSemanticFriendly(params.pattern, params.fixedString)) {
1653
+ try {
1654
+ queryEmbedding = await getEmbedClient().embed(params.pattern, "query");
1655
+ } catch {
1656
+ queryEmbedding = null;
1657
+ }
1658
+ }
1659
+ const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, queryEmbedding);
1140
1660
  const joined = output.join("\n") || "(no matches)";
1141
1661
  return capOutputForClaude(joined, { kind: "grep" });
1142
1662
  }
@@ -1145,33 +1665,65 @@ async function handleGrepDirect(api, table, sessionsTable, params) {
1145
1665
  function normalizeSessionPart(path, content) {
1146
1666
  return normalizeContent(path, content);
1147
1667
  }
1148
- function buildVirtualIndexContent(summaryRows, sessionRows = []) {
1149
- const total = summaryRows.length + sessionRows.length;
1668
+ var INDEX_LIMIT_PER_SECTION = 50;
1669
+ function buildVirtualIndexContent(summaryRows, sessionRows = [], opts = {}) {
1150
1670
  const lines = [
1151
- "# Memory Index",
1671
+ "# Session Index",
1152
1672
  "",
1153
- `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`,
1673
+ "Two sources are available. Consult the section relevant to the question.",
1154
1674
  ""
1155
1675
  ];
1156
- if (summaryRows.length > 0) {
1157
- lines.push("## Summaries", "");
1676
+ lines.push("## memory", "");
1677
+ if (summaryRows.length === 0) {
1678
+ lines.push("_(empty \u2014 no summaries ingested yet)_");
1679
+ } else {
1680
+ lines.push("AI-generated summaries per session. Read these first for topic-level overviews.");
1681
+ lines.push("");
1682
+ if (opts.summaryTruncated) {
1683
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older summaries reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
1684
+ lines.push("");
1685
+ }
1686
+ lines.push("| Session | Created | Last Updated | Project | Description |");
1687
+ lines.push("|---------|---------|--------------|---------|-------------|");
1158
1688
  for (const row of summaryRows) {
1159
- const path = row["path"];
1689
+ const p = row["path"] || "";
1690
+ const match = p.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/);
1691
+ if (!match)
1692
+ continue;
1693
+ const summaryUser = match[1];
1694
+ const sessionId = match[2];
1695
+ const relPath = `summaries/${summaryUser}/${sessionId}.md`;
1160
1696
  const project = row["project"] || "";
1161
- const description = (row["description"] || "").slice(0, 120);
1162
- const date = (row["creation_date"] || "").slice(0, 10);
1163
- lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`);
1697
+ const description = row["description"] || "";
1698
+ const creationDate = row["creation_date"] || "";
1699
+ const lastUpdateDate = row["last_update_date"] || "";
1700
+ lines.push(`| [${sessionId}](${relPath}) | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`);
1164
1701
  }
1165
- lines.push("");
1166
1702
  }
1167
- if (sessionRows.length > 0) {
1168
- lines.push("## Sessions", "");
1703
+ lines.push("");
1704
+ lines.push("## sessions", "");
1705
+ if (sessionRows.length === 0) {
1706
+ lines.push("_(empty \u2014 no session records ingested yet)_");
1707
+ } else {
1708
+ lines.push("Raw session records (dialogue, tool calls). Read for exact detail / quotes.");
1709
+ lines.push("");
1710
+ if (opts.sessionTruncated) {
1711
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older sessions reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
1712
+ lines.push("");
1713
+ }
1714
+ lines.push("| Session | Created | Last Updated | Description |");
1715
+ lines.push("|---------|---------|--------------|-------------|");
1169
1716
  for (const row of sessionRows) {
1170
- const path = row["path"];
1171
- const description = (row["description"] || "").slice(0, 120);
1172
- lines.push(`- [${path}](${path}) ${description}`);
1717
+ const p = row["path"] || "";
1718
+ const rel = p.startsWith("/") ? p.slice(1) : p;
1719
+ const filename = p.split("/").pop() ?? p;
1720
+ const description = row["description"] || "";
1721
+ const creationDate = row["creation_date"] || "";
1722
+ const lastUpdateDate = row["last_update_date"] || "";
1723
+ lines.push(`| [${filename}](${rel}) | ${creationDate} | ${lastUpdateDate} | ${description} |`);
1173
1724
  }
1174
1725
  }
1726
+ lines.push("");
1175
1727
  return lines.join("\n");
1176
1728
  }
1177
1729
  function buildUnionQuery(memoryQuery, sessionsQuery) {
@@ -1233,11 +1785,14 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP
1233
1785
  }
1234
1786
  }
1235
1787
  if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) {
1788
+ const fetchLimit = INDEX_LIMIT_PER_SECTION + 1;
1236
1789
  const [summaryRows, sessionRows] = await Promise.all([
1237
- api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []),
1238
- api.query(`SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path`).catch(() => [])
1790
+ api.query(`SELECT path, project, description, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC LIMIT ${fetchLimit}`).catch(() => []),
1791
+ api.query(`SELECT path, MAX(description) AS description, MIN(creation_date) AS creation_date, MAX(last_update_date) AS last_update_date FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' GROUP BY path ORDER BY MAX(last_update_date) DESC LIMIT ${fetchLimit}`).catch(() => [])
1239
1792
  ]);
1240
- result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows));
1793
+ const summaryTruncated = summaryRows.length > INDEX_LIMIT_PER_SECTION;
1794
+ const sessionTruncated = sessionRows.length > INDEX_LIMIT_PER_SECTION;
1795
+ result.set("/index.md", buildVirtualIndexContent(summaryRows.slice(0, INDEX_LIMIT_PER_SECTION), sessionRows.slice(0, INDEX_LIMIT_PER_SECTION), { summaryTruncated, sessionTruncated }));
1241
1796
  }
1242
1797
  return result;
1243
1798
  }
@@ -1757,20 +2312,20 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd,
1757
2312
  }
1758
2313
 
1759
2314
  // dist/src/hooks/query-cache.js
1760
- import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
1761
- import { join as join4 } from "node:path";
1762
- import { homedir as homedir3 } from "node:os";
1763
- var log3 = (msg) => log("query-cache", msg);
1764
- var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache");
2315
+ import { mkdirSync as mkdirSync2, readFileSync as readFileSync4, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
2316
+ import { join as join7 } from "node:path";
2317
+ import { homedir as homedir5 } from "node:os";
2318
+ var log4 = (msg) => log("query-cache", msg);
2319
+ var DEFAULT_CACHE_ROOT = join7(homedir5(), ".deeplake", "query-cache");
1765
2320
  var INDEX_CACHE_FILE = "index.md";
1766
2321
  function getSessionQueryCacheDir(sessionId, deps = {}) {
1767
2322
  const { cacheRoot = DEFAULT_CACHE_ROOT } = deps;
1768
- return join4(cacheRoot, sessionId);
2323
+ return join7(cacheRoot, sessionId);
1769
2324
  }
1770
2325
  function readCachedIndexContent(sessionId, deps = {}) {
1771
- const { logFn = log3 } = deps;
2326
+ const { logFn = log4 } = deps;
1772
2327
  try {
1773
- return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8");
2328
+ return readFileSync4(join7(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8");
1774
2329
  } catch (e) {
1775
2330
  if (e?.code === "ENOENT")
1776
2331
  return null;
@@ -1779,11 +2334,11 @@ function readCachedIndexContent(sessionId, deps = {}) {
1779
2334
  }
1780
2335
  }
1781
2336
  function writeCachedIndexContent(sessionId, content, deps = {}) {
1782
- const { logFn = log3 } = deps;
2337
+ const { logFn = log4 } = deps;
1783
2338
  try {
1784
2339
  const dir = getSessionQueryCacheDir(sessionId, deps);
1785
2340
  mkdirSync2(dir, { recursive: true });
1786
- writeFileSync2(join4(dir, INDEX_CACHE_FILE), content, "utf-8");
2341
+ writeFileSync2(join7(dir, INDEX_CACHE_FILE), content, "utf-8");
1787
2342
  } catch (e) {
1788
2343
  logFn(`write failed for session=${sessionId}: ${e.message}`);
1789
2344
  }
@@ -1791,22 +2346,22 @@ function writeCachedIndexContent(sessionId, content, deps = {}) {
1791
2346
 
1792
2347
  // dist/src/utils/direct-run.js
1793
2348
  import { resolve } from "node:path";
1794
- import { fileURLToPath } from "node:url";
2349
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
1795
2350
  function isDirectRun(metaUrl) {
1796
2351
  const entry = process.argv[1];
1797
2352
  if (!entry)
1798
2353
  return false;
1799
2354
  try {
1800
- return resolve(fileURLToPath(metaUrl)) === resolve(entry);
2355
+ return resolve(fileURLToPath2(metaUrl)) === resolve(entry);
1801
2356
  } catch {
1802
2357
  return false;
1803
2358
  }
1804
2359
  }
1805
2360
 
1806
2361
  // dist/src/hooks/memory-path-utils.js
1807
- import { homedir as homedir4 } from "node:os";
1808
- import { join as join5 } from "node:path";
1809
- var MEMORY_PATH = join5(homedir4(), ".deeplake", "memory");
2362
+ import { homedir as homedir6 } from "node:os";
2363
+ import { join as join8 } from "node:path";
2364
+ var MEMORY_PATH = join8(homedir6(), ".deeplake", "memory");
1810
2365
  var TILDE_PATH = "~/.deeplake/memory";
1811
2366
  var HOME_VAR_PATH = "$HOME/.deeplake/memory";
1812
2367
  var SAFE_BUILTINS = /* @__PURE__ */ new Set([
@@ -1922,13 +2477,13 @@ function rewritePaths(cmd) {
1922
2477
  }
1923
2478
 
1924
2479
  // dist/src/hooks/codex/pre-tool-use.js
1925
- var log4 = (msg) => log("codex-pre", msg);
1926
- var __bundleDir = dirname(fileURLToPath2(import.meta.url));
1927
- var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js");
2480
+ var log5 = (msg) => log("codex-pre", msg);
2481
+ var __bundleDir = dirname2(fileURLToPath3(import.meta.url));
2482
+ var SHELL_BUNDLE = existsSync4(join9(__bundleDir, "shell", "deeplake-shell.js")) ? join9(__bundleDir, "shell", "deeplake-shell.js") : join9(__bundleDir, "..", "shell", "deeplake-shell.js");
1928
2483
  function buildUnsupportedGuidance() {
1929
2484
  return "This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry.";
1930
2485
  }
1931
- function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) {
2486
+ function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log5) {
1932
2487
  try {
1933
2488
  return execFileSync("node", [shellBundle, "-c", cmd], {
1934
2489
  encoding: "utf-8",
@@ -1953,7 +2508,7 @@ function buildIndexContent(rows) {
1953
2508
  return lines.join("\n");
1954
2509
  }
1955
2510
  async function processCodexPreToolUse(input, deps = {}) {
1956
- const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps;
2511
+ const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log5 } = deps;
1957
2512
  const cmd = input.tool_input?.command ?? "";
1958
2513
  logFn(`hook fired: cmd=${cmd}`);
1959
2514
  if (!touchesMemory(cmd))
@@ -2163,7 +2718,7 @@ async function main() {
2163
2718
  }
2164
2719
  if (isDirectRun(import.meta.url)) {
2165
2720
  main().catch((e) => {
2166
- log4(`fatal: ${e.message}`);
2721
+ log5(`fatal: ${e.message}`);
2167
2722
  process.exit(0);
2168
2723
  });
2169
2724
  }