@deeplake/hivemind 0.6.47 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +158 -51
  4. package/bundle/cli.js +4103 -282
  5. package/codex/bundle/capture.js +510 -90
  6. package/codex/bundle/commands/auth-login.js +219 -72
  7. package/codex/bundle/embeddings/embed-daemon.js +243 -0
  8. package/codex/bundle/pre-tool-use.js +713 -108
  9. package/codex/bundle/session-start-setup.js +209 -58
  10. package/codex/bundle/session-start.js +40 -11
  11. package/codex/bundle/shell/deeplake-shell.js +679 -112
  12. package/codex/bundle/stop.js +477 -59
  13. package/codex/bundle/wiki-worker.js +312 -11
  14. package/cursor/bundle/capture.js +768 -57
  15. package/cursor/bundle/commands/auth-login.js +219 -72
  16. package/cursor/bundle/embeddings/embed-daemon.js +243 -0
  17. package/cursor/bundle/pre-tool-use.js +1684 -0
  18. package/cursor/bundle/session-end.js +223 -2
  19. package/cursor/bundle/session-start.js +209 -57
  20. package/cursor/bundle/shell/deeplake-shell.js +679 -112
  21. package/cursor/bundle/wiki-worker.js +571 -0
  22. package/hermes/bundle/capture.js +1194 -0
  23. package/hermes/bundle/commands/auth-login.js +1009 -0
  24. package/hermes/bundle/embeddings/embed-daemon.js +243 -0
  25. package/hermes/bundle/package.json +1 -0
  26. package/hermes/bundle/pre-tool-use.js +1681 -0
  27. package/hermes/bundle/session-end.js +265 -0
  28. package/hermes/bundle/session-start.js +655 -0
  29. package/hermes/bundle/shell/deeplake-shell.js +69905 -0
  30. package/hermes/bundle/wiki-worker.js +572 -0
  31. package/mcp/bundle/server.js +289 -69
  32. package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
  33. package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
  34. package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
  35. package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
  36. package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
  37. package/openclaw/dist/index.js +752 -702
  38. package/openclaw/openclaw.plugin.json +1 -1
  39. package/openclaw/package.json +1 -1
  40. package/package.json +7 -3
  41. package/pi/extension-source/hivemind.ts +807 -0
@@ -1,10 +1,62 @@
1
1
  #!/usr/bin/env node
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __esm = (fn, res) => function __init() {
5
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
6
+ };
7
+ var __export = (target, all) => {
8
+ for (var name in all)
9
+ __defProp(target, name, { get: all[name], enumerable: true });
10
+ };
11
+
12
+ // dist/src/index-marker-store.js
13
+ var index_marker_store_exports = {};
14
+ __export(index_marker_store_exports, {
15
+ buildIndexMarkerPath: () => buildIndexMarkerPath,
16
+ getIndexMarkerDir: () => getIndexMarkerDir,
17
+ hasFreshIndexMarker: () => hasFreshIndexMarker,
18
+ writeIndexMarker: () => writeIndexMarker
19
+ });
20
+ import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
21
+ import { join as join3 } from "node:path";
22
+ import { tmpdir } from "node:os";
23
+ function getIndexMarkerDir() {
24
+ return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
25
+ }
26
+ function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
27
+ const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
28
+ return join3(getIndexMarkerDir(), `${markerKey}.json`);
29
+ }
30
+ function hasFreshIndexMarker(markerPath) {
31
+ if (!existsSync2(markerPath))
32
+ return false;
33
+ try {
34
+ const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
35
+ const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
36
+ if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
37
+ return false;
38
+ return true;
39
+ } catch {
40
+ return false;
41
+ }
42
+ }
43
+ function writeIndexMarker(markerPath) {
44
+ mkdirSync(getIndexMarkerDir(), { recursive: true });
45
+ writeFileSync(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
46
+ }
47
+ var INDEX_MARKER_TTL_MS;
48
+ var init_index_marker_store = __esm({
49
+ "dist/src/index-marker-store.js"() {
50
+ "use strict";
51
+ INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
52
+ }
53
+ });
2
54
 
3
55
  // dist/src/hooks/codex/pre-tool-use.js
4
56
  import { execFileSync } from "node:child_process";
5
- import { existsSync as existsSync3 } from "node:fs";
6
- import { join as join6, dirname } from "node:path";
7
- import { fileURLToPath as fileURLToPath2 } from "node:url";
57
+ import { existsSync as existsSync4 } from "node:fs";
58
+ import { join as join9, dirname as dirname2 } from "node:path";
59
+ import { fileURLToPath as fileURLToPath3 } from "node:url";
8
60
 
9
61
  // dist/src/utils/stdin.js
10
62
  function readStdin() {
@@ -57,9 +109,6 @@ function loadConfig() {
57
109
 
58
110
  // dist/src/deeplake-api.js
59
111
  import { randomUUID } from "node:crypto";
60
- import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
61
- import { join as join3 } from "node:path";
62
- import { tmpdir } from "node:os";
63
112
 
64
113
  // dist/src/utils/debug.js
65
114
  import { appendFileSync } from "node:fs";
@@ -82,7 +131,26 @@ function sqlLike(value) {
82
131
  return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_");
83
132
  }
84
133
 
134
+ // dist/src/embeddings/columns.js
135
+ var SUMMARY_EMBEDDING_COL = "summary_embedding";
136
+ var MESSAGE_EMBEDDING_COL = "message_embedding";
137
+
138
+ // dist/src/utils/client-header.js
139
+ var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
140
+ function deeplakeClientValue() {
141
+ return "hivemind";
142
+ }
143
+ function deeplakeClientHeader() {
144
+ return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
145
+ }
146
+
85
147
  // dist/src/deeplake-api.js
148
+ var indexMarkerStorePromise = null;
149
+ function getIndexMarkerStore() {
150
+ if (!indexMarkerStorePromise)
151
+ indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
152
+ return indexMarkerStorePromise;
153
+ }
86
154
  var log2 = (msg) => log("sdk", msg);
87
155
  function summarizeSql(sql, maxLen = 220) {
88
156
  const compact = sql.replace(/\s+/g, " ").trim();
@@ -102,7 +170,6 @@ var MAX_RETRIES = 3;
102
170
  var BASE_DELAY_MS = 500;
103
171
  var MAX_CONCURRENCY = 5;
104
172
  var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
105
- var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
106
173
  function sleep(ms) {
107
174
  return new Promise((resolve2) => setTimeout(resolve2, ms));
108
175
  }
@@ -122,9 +189,6 @@ function isTransientHtml403(text) {
122
189
  const body = text.toLowerCase();
123
190
  return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
124
191
  }
125
- function getIndexMarkerDir() {
126
- return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
127
- }
128
192
  var Semaphore = class {
129
193
  max;
130
194
  waiting = [];
@@ -193,7 +257,8 @@ var DeeplakeApi = class {
193
257
  headers: {
194
258
  Authorization: `Bearer ${this.token}`,
195
259
  "Content-Type": "application/json",
196
- "X-Activeloop-Org-Id": this.orgId
260
+ "X-Activeloop-Org-Id": this.orgId,
261
+ ...deeplakeClientHeader()
197
262
  },
198
263
  signal,
199
264
  body: JSON.stringify({ query: sql })
@@ -220,7 +285,8 @@ var DeeplakeApi = class {
220
285
  }
221
286
  const text = await resp.text().catch(() => "");
222
287
  const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
223
- if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
288
+ const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
289
+ if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
224
290
  const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
225
291
  log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
226
292
  await sleep(delay);
@@ -254,7 +320,7 @@ var DeeplakeApi = class {
254
320
  const lud = row.lastUpdateDate ?? ts;
255
321
  const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
256
322
  if (exists.length > 0) {
257
- let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
323
+ let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
258
324
  if (row.project !== void 0)
259
325
  setClauses += `, project = '${sqlStr(row.project)}'`;
260
326
  if (row.description !== void 0)
@@ -262,8 +328,8 @@ var DeeplakeApi = class {
262
328
  await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
263
329
  } else {
264
330
  const id = randomUUID();
265
- let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date";
266
- let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
331
+ let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
332
+ let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
267
333
  if (row.project !== void 0) {
268
334
  cols += ", project";
269
335
  vals += `, '${sqlStr(row.project)}'`;
@@ -288,48 +354,83 @@ var DeeplakeApi = class {
288
354
  buildLookupIndexName(table, suffix) {
289
355
  return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
290
356
  }
291
- getLookupIndexMarkerPath(table, suffix) {
292
- const markerKey = [
293
- this.workspaceId,
294
- this.orgId,
295
- table,
296
- suffix
297
- ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
298
- return join3(getIndexMarkerDir(), `${markerKey}.json`);
299
- }
300
- hasFreshLookupIndexMarker(table, suffix) {
301
- const markerPath = this.getLookupIndexMarkerPath(table, suffix);
302
- if (!existsSync2(markerPath))
303
- return false;
304
- try {
305
- const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
306
- const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
307
- if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
308
- return false;
309
- return true;
310
- } catch {
311
- return false;
312
- }
313
- }
314
- markLookupIndexReady(table, suffix) {
315
- mkdirSync(getIndexMarkerDir(), { recursive: true });
316
- writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
317
- }
318
357
  async ensureLookupIndex(table, suffix, columnsSql) {
319
- if (this.hasFreshLookupIndexMarker(table, suffix))
358
+ const markers = await getIndexMarkerStore();
359
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
360
+ if (markers.hasFreshIndexMarker(markerPath))
320
361
  return;
321
362
  const indexName = this.buildLookupIndexName(table, suffix);
322
363
  try {
323
364
  await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
324
- this.markLookupIndexReady(table, suffix);
365
+ markers.writeIndexMarker(markerPath);
325
366
  } catch (e) {
326
367
  if (isDuplicateIndexError(e)) {
327
- this.markLookupIndexReady(table, suffix);
368
+ markers.writeIndexMarker(markerPath);
328
369
  return;
329
370
  }
330
371
  log2(`index "${indexName}" skipped: ${e.message}`);
331
372
  }
332
373
  }
374
+ /**
375
+ * Ensure a vector column exists on the given table.
376
+ *
377
+ * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
378
+ * EXISTS …` on every SessionStart. On a long-running workspace that's
379
+ * already migrated, every call returns 500 "Column already exists" — noisy
380
+ * in the log and a wasted round-trip. Worse, the very first call after the
381
+ * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
382
+ * window (~30s) during which subsequent INSERTs fail; minimising the
383
+ * number of ALTER calls minimises exposure to that window.
384
+ *
385
+ * New flow:
386
+ * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
387
+ * return — zero network calls.
388
+ * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
389
+ * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
390
+ * bug. If the column is present → mark + return.
391
+ * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
392
+ * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
393
+ * "already exists" (race: another client added it between our SELECT
394
+ * and ALTER).
395
+ *
396
+ * Marker uses the same dir / TTL as ensureLookupIndex so both schema
397
+ * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
398
+ */
399
+ async ensureEmbeddingColumn(table, column) {
400
+ await this.ensureColumn(table, column, "FLOAT4[]");
401
+ }
402
+ /**
403
+ * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
404
+ * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
405
+ * column that was added to the schema after the table was originally
406
+ * created. Used today for `summary_embedding`, `message_embedding`, and
407
+ * the `agent` column (added 2026-04-11) — the latter has no fallback if
408
+ * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
409
+ * with `column "agent" does not exist`.
410
+ */
411
+ async ensureColumn(table, column, sqlType) {
412
+ const markers = await getIndexMarkerStore();
413
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
414
+ if (markers.hasFreshIndexMarker(markerPath))
415
+ return;
416
+ const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
417
+ const rows = await this.query(colCheck);
418
+ if (rows.length > 0) {
419
+ markers.writeIndexMarker(markerPath);
420
+ return;
421
+ }
422
+ try {
423
+ await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
424
+ } catch (e) {
425
+ const msg = e instanceof Error ? e.message : String(e);
426
+ if (!/already exists/i.test(msg))
427
+ throw e;
428
+ const recheck = await this.query(colCheck);
429
+ if (recheck.length === 0)
430
+ throw e;
431
+ }
432
+ markers.writeIndexMarker(markerPath);
433
+ }
333
434
  /** List all tables in the workspace (with retry). */
334
435
  async listTables(forceRefresh = false) {
335
436
  if (!forceRefresh && this._tablesCache)
@@ -345,7 +446,8 @@ var DeeplakeApi = class {
345
446
  const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
346
447
  headers: {
347
448
  Authorization: `Bearer ${this.token}`,
348
- "X-Activeloop-Org-Id": this.orgId
449
+ "X-Activeloop-Org-Id": this.orgId,
450
+ ...deeplakeClientHeader()
349
451
  }
350
452
  });
351
453
  if (resp.ok) {
@@ -370,28 +472,60 @@ var DeeplakeApi = class {
370
472
  }
371
473
  return { tables: [], cacheable: false };
372
474
  }
475
+ /**
476
+ * Run a `CREATE TABLE` with an extra outer retry budget. The base
477
+ * `query()` already retries 3 times on fetch errors (~3.5s total), but a
478
+ * failed CREATE is permanent corruption — every subsequent SELECT against
479
+ * the missing table fails. Wrapping in an outer loop with longer backoff
480
+ * (2s, 5s, then 10s) gives us ~17s of reach across transient network
481
+ * blips before giving up. Failures still propagate; getApi() resets its
482
+ * cache on init failure (openclaw plugin) so the next call retries the
483
+ * whole init flow.
484
+ */
485
+ async createTableWithRetry(sql, label) {
486
+ const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
487
+ let lastErr = null;
488
+ for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
489
+ try {
490
+ await this.query(sql);
491
+ return;
492
+ } catch (err) {
493
+ lastErr = err;
494
+ const msg = err instanceof Error ? err.message : String(err);
495
+ log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
496
+ if (attempt < OUTER_BACKOFFS_MS.length) {
497
+ await sleep(OUTER_BACKOFFS_MS[attempt]);
498
+ }
499
+ }
500
+ }
501
+ throw lastErr;
502
+ }
373
503
  /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
374
504
  async ensureTable(name) {
375
505
  const tbl = name ?? this.tableName;
376
506
  const tables = await this.listTables();
377
507
  if (!tables.includes(tbl)) {
378
508
  log2(`table "${tbl}" not found, creating`);
379
- await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
509
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
380
510
  log2(`table "${tbl}" created`);
381
511
  if (!tables.includes(tbl))
382
512
  this._tablesCache = [...tables, tbl];
383
513
  }
514
+ await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
515
+ await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
384
516
  }
385
517
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
386
518
  async ensureSessionsTable(name) {
387
519
  const tables = await this.listTables();
388
520
  if (!tables.includes(name)) {
389
521
  log2(`table "${name}" not found, creating`);
390
- await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
522
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, name);
391
523
  log2(`table "${name}" created`);
392
524
  if (!tables.includes(name))
393
525
  this._tablesCache = [...tables, name];
394
526
  }
527
+ await this.ensureEmbeddingColumn(name, MESSAGE_EMBEDDING_COL);
528
+ await this.ensureColumn(name, "agent", "TEXT NOT NULL DEFAULT ''");
395
529
  await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
396
530
  }
397
531
  };
@@ -560,24 +694,25 @@ function normalizeContent(path, raw) {
560
694
  return raw;
561
695
  }
562
696
  if (Array.isArray(obj.turns)) {
563
- const header = [];
564
- if (obj.date_time)
565
- header.push(`date: ${obj.date_time}`);
566
- if (obj.speakers) {
567
- const s = obj.speakers;
568
- const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", ");
569
- if (names)
570
- header.push(`speakers: ${names}`);
571
- }
697
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
572
698
  const lines = obj.turns.map((t) => {
573
699
  const sp = String(t?.speaker ?? t?.name ?? "?").trim();
574
700
  const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim();
575
701
  const tag = t?.dia_id ? `[${t.dia_id}] ` : "";
576
- return `${tag}${sp}: ${tx}`;
702
+ return `${dateHeader}${tag}${sp}: ${tx}`;
577
703
  });
578
- const out2 = [...header, ...lines].join("\n");
704
+ const out2 = lines.join("\n");
579
705
  return out2.trim() ? out2 : raw;
580
706
  }
707
+ if (obj.turn && typeof obj.turn === "object" && !Array.isArray(obj.turn)) {
708
+ const t = obj.turn;
709
+ const sp = String(t.speaker ?? t.name ?? "?").trim();
710
+ const tx = String(t.text ?? t.content ?? "").replace(/\s+/g, " ").trim();
711
+ const tag = t.dia_id ? `[${String(t.dia_id)}] ` : "";
712
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
713
+ const line = `${dateHeader}${tag}${sp}: ${tx}`;
714
+ return line.trim() ? line : raw;
715
+ }
581
716
  const stripRecalled = (t) => {
582
717
  const i = t.indexOf("<recalled-memories>");
583
718
  if (i === -1)
@@ -620,8 +755,38 @@ function buildPathCondition(targetPath) {
620
755
  return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
621
756
  }
622
757
  async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
623
- const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
758
+ const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, queryEmbedding, multiWordPatterns } = opts;
624
759
  const limit = opts.limit ?? 100;
760
+ if (queryEmbedding && queryEmbedding.length > 0) {
761
+ const vecLit = serializeFloat4Array(queryEmbedding);
762
+ const semanticLimit = Math.min(limit, Number(process.env.HIVEMIND_SEMANTIC_LIMIT ?? "20"));
763
+ const lexicalLimit = Math.min(limit, Number(process.env.HIVEMIND_HYBRID_LEXICAL_LIMIT ?? "20"));
764
+ const filterPatternsForLex = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
765
+ const memLexFilter = buildContentFilter("summary::text", likeOp, filterPatternsForLex);
766
+ const sessLexFilter = buildContentFilter("message::text", likeOp, filterPatternsForLex);
767
+ const memLexQuery = memLexFilter ? `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, 1.0 AS score FROM "${memoryTable}" WHERE 1=1${pathFilter}${memLexFilter} LIMIT ${lexicalLimit}` : null;
768
+ const sessLexQuery = sessLexFilter ? `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, 1.0 AS score FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessLexFilter} LIMIT ${lexicalLimit}` : null;
769
+ const memSemQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, (summary_embedding <#> ${vecLit}) AS score FROM "${memoryTable}" WHERE ARRAY_LENGTH(summary_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
770
+ const sessSemQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, (message_embedding <#> ${vecLit}) AS score FROM "${sessionsTable}" WHERE ARRAY_LENGTH(message_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
771
+ const parts = [memSemQuery, sessSemQuery];
772
+ if (memLexQuery)
773
+ parts.push(memLexQuery);
774
+ if (sessLexQuery)
775
+ parts.push(sessLexQuery);
776
+ const unionSql = parts.map((q) => `(${q})`).join(" UNION ALL ");
777
+ const outerLimit = semanticLimit + lexicalLimit;
778
+ const rows2 = await api.query(`SELECT path, content, source_order, creation_date, score FROM (` + unionSql + `) AS combined ORDER BY score DESC LIMIT ${outerLimit}`);
779
+ const seen = /* @__PURE__ */ new Set();
780
+ const unique = [];
781
+ for (const row of rows2) {
782
+ const p = String(row["path"]);
783
+ if (seen.has(p))
784
+ continue;
785
+ seen.add(p);
786
+ unique.push({ path: p, content: String(row["content"] ?? "") });
787
+ }
788
+ return unique;
789
+ }
625
790
  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
626
791
  const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
627
792
  const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
@@ -633,6 +798,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
633
798
  content: String(row["content"] ?? "")
634
799
  }));
635
800
  }
801
+ function serializeFloat4Array(vec) {
802
+ const parts = [];
803
+ for (const v of vec) {
804
+ if (!Number.isFinite(v))
805
+ return "NULL";
806
+ parts.push(String(v));
807
+ }
808
+ return `ARRAY[${parts.join(",")}]::float4[]`;
809
+ }
636
810
  function buildPathFilter(targetPath) {
637
811
  const condition = buildPathCondition(targetPath);
638
812
  return condition ? ` AND ${condition}` : "";
@@ -715,7 +889,7 @@ function buildGrepSearchOptions(params, targetPath) {
715
889
  return {
716
890
  pathFilter: buildPathFilter(targetPath),
717
891
  contentScanOnly: hasRegexMeta,
718
- likeOp: params.ignoreCase ? "ILIKE" : "LIKE",
892
+ likeOp: process.env.HIVEMIND_GREP_LIKE === "case-sensitive" ? "LIKE" : "ILIKE",
719
893
  escapedPattern: sqlLike(params.pattern),
720
894
  prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
721
895
  prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
@@ -770,11 +944,28 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) {
770
944
  }
771
945
  return output;
772
946
  }
773
- async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) {
774
- const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, buildGrepSearchOptions(params, targetPath));
947
+ async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, queryEmbedding) {
948
+ const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, {
949
+ ...buildGrepSearchOptions(params, targetPath),
950
+ queryEmbedding
951
+ });
775
952
  const seen = /* @__PURE__ */ new Set();
776
953
  const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true));
777
954
  const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) }));
955
+ if (queryEmbedding && queryEmbedding.length > 0) {
956
+ const emitAllLines = process.env.HIVEMIND_SEMANTIC_EMIT_ALL !== "false";
957
+ if (emitAllLines) {
958
+ const lines = [];
959
+ for (const r of normalized) {
960
+ for (const line of r.content.split("\n")) {
961
+ const trimmed = line.trim();
962
+ if (trimmed)
963
+ lines.push(`${r.path}:${line}`);
964
+ }
965
+ }
966
+ return lines;
967
+ }
968
+ }
778
969
  return refineGrepMatches(normalized, params);
779
970
  }
780
971
 
@@ -818,7 +1009,298 @@ function capOutputForClaude(output, options = {}) {
818
1009
  return keptLines.join("\n") + footer;
819
1010
  }
820
1011
 
1012
+ // dist/src/embeddings/client.js
1013
+ import { connect } from "node:net";
1014
+ import { spawn } from "node:child_process";
1015
+ import { openSync, closeSync, writeSync, unlinkSync, existsSync as existsSync3, readFileSync as readFileSync3 } from "node:fs";
1016
+ import { homedir as homedir3 } from "node:os";
1017
+ import { join as join4 } from "node:path";
1018
+
1019
+ // dist/src/embeddings/protocol.js
1020
+ var DEFAULT_SOCKET_DIR = "/tmp";
1021
+ var DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1e3;
1022
+ var DEFAULT_CLIENT_TIMEOUT_MS = 2e3;
1023
+ function socketPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
1024
+ return `${dir}/hivemind-embed-${uid}.sock`;
1025
+ }
1026
+ function pidPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
1027
+ return `${dir}/hivemind-embed-${uid}.pid`;
1028
+ }
1029
+
1030
+ // dist/src/embeddings/client.js
1031
+ var SHARED_DAEMON_PATH = join4(homedir3(), ".hivemind", "embed-deps", "embed-daemon.js");
1032
+ var log3 = (m) => log("embed-client", m);
1033
+ function getUid() {
1034
+ const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
1035
+ return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
1036
+ }
1037
+ var EmbedClient = class {
1038
+ socketPath;
1039
+ pidPath;
1040
+ timeoutMs;
1041
+ daemonEntry;
1042
+ autoSpawn;
1043
+ spawnWaitMs;
1044
+ nextId = 0;
1045
+ constructor(opts = {}) {
1046
+ const uid = getUid();
1047
+ const dir = opts.socketDir ?? "/tmp";
1048
+ this.socketPath = socketPathFor(uid, dir);
1049
+ this.pidPath = pidPathFor(uid, dir);
1050
+ this.timeoutMs = opts.timeoutMs ?? DEFAULT_CLIENT_TIMEOUT_MS;
1051
+ this.daemonEntry = opts.daemonEntry ?? process.env.HIVEMIND_EMBED_DAEMON ?? (existsSync3(SHARED_DAEMON_PATH) ? SHARED_DAEMON_PATH : void 0);
1052
+ this.autoSpawn = opts.autoSpawn ?? true;
1053
+ this.spawnWaitMs = opts.spawnWaitMs ?? 5e3;
1054
+ }
1055
+ /**
1056
+ * Returns an embedding vector, or null on timeout/failure. Hooks MUST treat
1057
+ * null as "skip embedding column" — never block the write path on us.
1058
+ *
1059
+ * Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
1060
+ * null AND kicks off a background spawn. The next call finds a ready daemon.
1061
+ */
1062
+ async embed(text, kind = "document") {
1063
+ let sock;
1064
+ try {
1065
+ sock = await this.connectOnce();
1066
+ } catch {
1067
+ if (this.autoSpawn)
1068
+ this.trySpawnDaemon();
1069
+ return null;
1070
+ }
1071
+ try {
1072
+ const id = String(++this.nextId);
1073
+ const req = { op: "embed", id, kind, text };
1074
+ const resp = await this.sendAndWait(sock, req);
1075
+ if (resp.error || !("embedding" in resp) || !resp.embedding) {
1076
+ log3(`embed err: ${resp.error ?? "no embedding"}`);
1077
+ return null;
1078
+ }
1079
+ return resp.embedding;
1080
+ } catch (e) {
1081
+ const err = e instanceof Error ? e.message : String(e);
1082
+ log3(`embed failed: ${err}`);
1083
+ return null;
1084
+ } finally {
1085
+ try {
1086
+ sock.end();
1087
+ } catch {
1088
+ }
1089
+ }
1090
+ }
1091
+ /**
1092
+ * Wait up to spawnWaitMs for the daemon to accept connections, spawning if
1093
+ * necessary. Meant for SessionStart / long-running batches — not the hot path.
1094
+ */
1095
+ async warmup() {
1096
+ try {
1097
+ const s = await this.connectOnce();
1098
+ s.end();
1099
+ return true;
1100
+ } catch {
1101
+ if (!this.autoSpawn)
1102
+ return false;
1103
+ this.trySpawnDaemon();
1104
+ try {
1105
+ const s = await this.waitForSocket();
1106
+ s.end();
1107
+ return true;
1108
+ } catch {
1109
+ return false;
1110
+ }
1111
+ }
1112
+ }
1113
+ connectOnce() {
1114
+ return new Promise((resolve2, reject) => {
1115
+ const sock = connect(this.socketPath);
1116
+ const to = setTimeout(() => {
1117
+ sock.destroy();
1118
+ reject(new Error("connect timeout"));
1119
+ }, this.timeoutMs);
1120
+ sock.once("connect", () => {
1121
+ clearTimeout(to);
1122
+ resolve2(sock);
1123
+ });
1124
+ sock.once("error", (e) => {
1125
+ clearTimeout(to);
1126
+ reject(e);
1127
+ });
1128
+ });
1129
+ }
1130
+ trySpawnDaemon() {
1131
+ let fd;
1132
+ try {
1133
+ fd = openSync(this.pidPath, "wx", 384);
1134
+ writeSync(fd, String(process.pid));
1135
+ } catch (e) {
1136
+ if (this.isPidFileStale()) {
1137
+ try {
1138
+ unlinkSync(this.pidPath);
1139
+ } catch {
1140
+ }
1141
+ try {
1142
+ fd = openSync(this.pidPath, "wx", 384);
1143
+ writeSync(fd, String(process.pid));
1144
+ } catch {
1145
+ return;
1146
+ }
1147
+ } else {
1148
+ return;
1149
+ }
1150
+ }
1151
+ if (!this.daemonEntry || !existsSync3(this.daemonEntry)) {
1152
+ log3(`daemonEntry not configured or missing: ${this.daemonEntry}`);
1153
+ try {
1154
+ closeSync(fd);
1155
+ unlinkSync(this.pidPath);
1156
+ } catch {
1157
+ }
1158
+ return;
1159
+ }
1160
+ try {
1161
+ const child = spawn(process.execPath, [this.daemonEntry], {
1162
+ detached: true,
1163
+ stdio: "ignore",
1164
+ env: process.env
1165
+ });
1166
+ child.unref();
1167
+ log3(`spawned daemon pid=${child.pid}`);
1168
+ } finally {
1169
+ closeSync(fd);
1170
+ }
1171
+ }
1172
+ isPidFileStale() {
1173
+ try {
1174
+ const raw = readFileSync3(this.pidPath, "utf-8").trim();
1175
+ const pid = Number(raw);
1176
+ if (!pid || Number.isNaN(pid))
1177
+ return true;
1178
+ try {
1179
+ process.kill(pid, 0);
1180
+ return false;
1181
+ } catch {
1182
+ return true;
1183
+ }
1184
+ } catch {
1185
+ return true;
1186
+ }
1187
+ }
1188
+ async waitForSocket() {
1189
+ const deadline = Date.now() + this.spawnWaitMs;
1190
+ let delay = 30;
1191
+ while (Date.now() < deadline) {
1192
+ await sleep2(delay);
1193
+ delay = Math.min(delay * 1.5, 300);
1194
+ if (!existsSync3(this.socketPath))
1195
+ continue;
1196
+ try {
1197
+ return await this.connectOnce();
1198
+ } catch {
1199
+ }
1200
+ }
1201
+ throw new Error("daemon did not become ready within spawnWaitMs");
1202
+ }
1203
+ sendAndWait(sock, req) {
1204
+ return new Promise((resolve2, reject) => {
1205
+ let buf = "";
1206
+ const to = setTimeout(() => {
1207
+ sock.destroy();
1208
+ reject(new Error("request timeout"));
1209
+ }, this.timeoutMs);
1210
+ sock.setEncoding("utf-8");
1211
+ sock.on("data", (chunk) => {
1212
+ buf += chunk;
1213
+ const nl = buf.indexOf("\n");
1214
+ if (nl === -1)
1215
+ return;
1216
+ const line = buf.slice(0, nl);
1217
+ clearTimeout(to);
1218
+ try {
1219
+ resolve2(JSON.parse(line));
1220
+ } catch (e) {
1221
+ reject(e);
1222
+ }
1223
+ });
1224
+ sock.on("error", (e) => {
1225
+ clearTimeout(to);
1226
+ reject(e);
1227
+ });
1228
+ sock.on("end", () => {
1229
+ clearTimeout(to);
1230
+ reject(new Error("connection closed without response"));
1231
+ });
1232
+ sock.write(JSON.stringify(req) + "\n");
1233
+ });
1234
+ }
1235
+ };
1236
+ function sleep2(ms) {
1237
+ return new Promise((r) => setTimeout(r, ms));
1238
+ }
1239
+
1240
+ // dist/src/embeddings/disable.js
1241
+ import { createRequire } from "node:module";
1242
+ import { homedir as homedir4 } from "node:os";
1243
+ import { join as join5 } from "node:path";
1244
+ import { pathToFileURL } from "node:url";
1245
+ var cachedStatus = null;
1246
+ function defaultResolveTransformers() {
1247
+ try {
1248
+ createRequire(import.meta.url).resolve("@huggingface/transformers");
1249
+ return;
1250
+ } catch {
1251
+ }
1252
+ const sharedDir = join5(homedir4(), ".hivemind", "embed-deps");
1253
+ createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
1254
+ }
1255
+ var _resolve = defaultResolveTransformers;
1256
+ function detectStatus() {
1257
+ if (process.env.HIVEMIND_EMBEDDINGS === "false")
1258
+ return "env-disabled";
1259
+ try {
1260
+ _resolve();
1261
+ return "enabled";
1262
+ } catch {
1263
+ return "no-transformers";
1264
+ }
1265
+ }
1266
+ function embeddingsStatus() {
1267
+ if (cachedStatus !== null)
1268
+ return cachedStatus;
1269
+ cachedStatus = detectStatus();
1270
+ return cachedStatus;
1271
+ }
1272
+ function embeddingsDisabled() {
1273
+ return embeddingsStatus() !== "enabled";
1274
+ }
1275
+
821
1276
  // dist/src/hooks/grep-direct.js
1277
+ import { fileURLToPath } from "node:url";
1278
+ import { dirname, join as join6 } from "node:path";
1279
+ var SEMANTIC_ENABLED = process.env.HIVEMIND_SEMANTIC_SEARCH !== "false" && !embeddingsDisabled();
1280
+ var SEMANTIC_TIMEOUT_MS = Number(process.env.HIVEMIND_SEMANTIC_EMBED_TIMEOUT_MS ?? "500");
1281
+ function resolveDaemonPath() {
1282
+ return join6(dirname(fileURLToPath(import.meta.url)), "..", "embeddings", "embed-daemon.js");
1283
+ }
1284
+ var sharedEmbedClient = null;
1285
+ function getEmbedClient() {
1286
+ if (!sharedEmbedClient) {
1287
+ sharedEmbedClient = new EmbedClient({
1288
+ daemonEntry: resolveDaemonPath(),
1289
+ timeoutMs: SEMANTIC_TIMEOUT_MS
1290
+ });
1291
+ }
1292
+ return sharedEmbedClient;
1293
+ }
1294
+ function patternIsSemanticFriendly(pattern, fixedString) {
1295
+ if (!pattern || pattern.length < 2)
1296
+ return false;
1297
+ if (fixedString)
1298
+ return true;
1299
+ const meta = pattern.match(/[|()\[\]{}+?^$\\]/g);
1300
+ if (!meta)
1301
+ return true;
1302
+ return meta.length <= 1;
1303
+ }
822
1304
  function splitFirstPipelineStage(cmd) {
823
1305
  const input = cmd.trim();
824
1306
  let quote = null;
@@ -895,13 +1377,16 @@ function parseBashGrep(cmd) {
895
1377
  const first = splitFirstPipelineStage(cmd);
896
1378
  if (!first)
897
1379
  return null;
898
- if (!/^(grep|egrep|fgrep)\b/.test(first))
1380
+ const matchTool = first.match(/^(grep|egrep|fgrep|rg)\b/);
1381
+ if (!matchTool)
899
1382
  return null;
900
- const isFixed = first.startsWith("fgrep");
1383
+ const tool = matchTool[1];
1384
+ const isFixed = tool === "fgrep";
1385
+ const isRg = tool === "rg";
901
1386
  const tokens = tokenizeGrepStage(first);
902
1387
  if (!tokens || tokens.length === 0)
903
1388
  return null;
904
- let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed;
1389
+ let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = isRg, invertMatch = false, fixedString = isFixed;
905
1390
  const explicitPatterns = [];
906
1391
  let ti = 1;
907
1392
  while (ti < tokens.length) {
@@ -914,6 +1399,31 @@ function parseBashGrep(cmd) {
914
1399
  break;
915
1400
  if (token.startsWith("--")) {
916
1401
  const [flag, inlineValue] = token.split("=", 2);
1402
+ const rgValueLongs = /* @__PURE__ */ new Set([
1403
+ "--type",
1404
+ "--type-not",
1405
+ "--type-add",
1406
+ "--type-clear",
1407
+ "--glob",
1408
+ "--iglob",
1409
+ "--threads",
1410
+ "--max-columns",
1411
+ "--max-depth",
1412
+ "--max-filesize",
1413
+ "--pre",
1414
+ "--pre-glob",
1415
+ "--replace",
1416
+ "--encoding",
1417
+ "--color",
1418
+ "--colors",
1419
+ "--sort",
1420
+ "--sortr",
1421
+ "--context-separator",
1422
+ "--field-context-separator",
1423
+ "--field-match-separator",
1424
+ "--path-separator",
1425
+ "--hostname-bin"
1426
+ ]);
917
1427
  const handlers = {
918
1428
  "--ignore-case": () => {
919
1429
  ignoreCase = true;
@@ -927,14 +1437,28 @@ function parseBashGrep(cmd) {
927
1437
  filesOnly = true;
928
1438
  return false;
929
1439
  },
1440
+ // rg uses `--files` to list files (without searching). For our purposes
1441
+ // it's similar enough to `-l` that we treat it as filesOnly.
1442
+ "--files": () => {
1443
+ filesOnly = true;
1444
+ return false;
1445
+ },
930
1446
  "--count": () => {
931
1447
  countOnly = true;
932
1448
  return false;
933
1449
  },
1450
+ "--count-matches": () => {
1451
+ countOnly = true;
1452
+ return false;
1453
+ },
934
1454
  "--line-number": () => {
935
1455
  lineNumber = true;
936
1456
  return false;
937
1457
  },
1458
+ "--no-line-number": () => {
1459
+ lineNumber = false;
1460
+ return false;
1461
+ },
938
1462
  "--invert-match": () => {
939
1463
  invertMatch = true;
940
1464
  return false;
@@ -955,7 +1479,10 @@ function parseBashGrep(cmd) {
955
1479
  return true;
956
1480
  }
957
1481
  };
958
- const consumeNext = handlers[flag]?.() ?? false;
1482
+ let consumeNext = handlers[flag]?.() ?? false;
1483
+ if (!consumeNext && isRg && rgValueLongs.has(flag) && inlineValue === void 0) {
1484
+ consumeNext = true;
1485
+ }
959
1486
  if (consumeNext) {
960
1487
  ti++;
961
1488
  if (ti >= tokens.length)
@@ -966,7 +1493,9 @@ function parseBashGrep(cmd) {
966
1493
  ti++;
967
1494
  continue;
968
1495
  }
1496
+ const rgValueShorts = new Set(isRg ? ["t", "T", "g", "j", "M", "r", "E"] : []);
969
1497
  const shortFlags = token.slice(1);
1498
+ let consumedValueFlag = false;
970
1499
  for (let i = 0; i < shortFlags.length; i++) {
971
1500
  const flag = shortFlags[i];
972
1501
  switch (flag) {
@@ -985,6 +1514,10 @@ function parseBashGrep(cmd) {
985
1514
  case "n":
986
1515
  lineNumber = true;
987
1516
  break;
1517
+ case "N":
1518
+ lineNumber = false;
1519
+ break;
1520
+ // rg --no-line-number short form
988
1521
  case "v":
989
1522
  invertMatch = true;
990
1523
  break;
@@ -992,8 +1525,27 @@ function parseBashGrep(cmd) {
992
1525
  fixedString = true;
993
1526
  break;
994
1527
  case "r":
1528
+ if (isRg) {
1529
+ if (i === shortFlags.length - 1) {
1530
+ ti++;
1531
+ if (ti >= tokens.length)
1532
+ return null;
1533
+ }
1534
+ consumedValueFlag = true;
1535
+ i = shortFlags.length;
1536
+ }
1537
+ break;
995
1538
  case "R":
996
1539
  case "E":
1540
+ if (isRg && flag === "E") {
1541
+ if (i === shortFlags.length - 1) {
1542
+ ti++;
1543
+ if (ti >= tokens.length)
1544
+ return null;
1545
+ }
1546
+ consumedValueFlag = true;
1547
+ i = shortFlags.length;
1548
+ }
997
1549
  break;
998
1550
  case "A":
999
1551
  case "B":
@@ -1020,9 +1572,19 @@ function parseBashGrep(cmd) {
1020
1572
  break;
1021
1573
  }
1022
1574
  default:
1575
+ if (rgValueShorts.has(flag)) {
1576
+ if (i === shortFlags.length - 1) {
1577
+ ti++;
1578
+ if (ti >= tokens.length)
1579
+ return null;
1580
+ }
1581
+ consumedValueFlag = true;
1582
+ i = shortFlags.length;
1583
+ }
1023
1584
  break;
1024
1585
  }
1025
1586
  }
1587
+ void consumedValueFlag;
1026
1588
  ti++;
1027
1589
  }
1028
1590
  const pattern = explicitPatterns.length > 0 ? explicitPatterns[0] : tokens[ti];
@@ -1056,7 +1618,15 @@ async function handleGrepDirect(api, table, sessionsTable, params) {
1056
1618
  invertMatch: params.invertMatch,
1057
1619
  fixedString: params.fixedString
1058
1620
  };
1059
- const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath);
1621
+ let queryEmbedding = null;
1622
+ if (SEMANTIC_ENABLED && patternIsSemanticFriendly(params.pattern, params.fixedString)) {
1623
+ try {
1624
+ queryEmbedding = await getEmbedClient().embed(params.pattern, "query");
1625
+ } catch {
1626
+ queryEmbedding = null;
1627
+ }
1628
+ }
1629
+ const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, queryEmbedding);
1060
1630
  const joined = output.join("\n") || "(no matches)";
1061
1631
  return capOutputForClaude(joined, { kind: "grep" });
1062
1632
  }
@@ -1065,33 +1635,65 @@ async function handleGrepDirect(api, table, sessionsTable, params) {
1065
1635
  function normalizeSessionPart(path, content) {
1066
1636
  return normalizeContent(path, content);
1067
1637
  }
1068
- function buildVirtualIndexContent(summaryRows, sessionRows = []) {
1069
- const total = summaryRows.length + sessionRows.length;
1638
+ var INDEX_LIMIT_PER_SECTION = 50;
1639
+ function buildVirtualIndexContent(summaryRows, sessionRows = [], opts = {}) {
1070
1640
  const lines = [
1071
- "# Memory Index",
1641
+ "# Session Index",
1072
1642
  "",
1073
- `${total} entries (${summaryRows.length} summaries, ${sessionRows.length} sessions):`,
1643
+ "Two sources are available. Consult the section relevant to the question.",
1074
1644
  ""
1075
1645
  ];
1076
- if (summaryRows.length > 0) {
1077
- lines.push("## Summaries", "");
1646
+ lines.push("## memory", "");
1647
+ if (summaryRows.length === 0) {
1648
+ lines.push("_(empty \u2014 no summaries ingested yet)_");
1649
+ } else {
1650
+ lines.push("AI-generated summaries per session. Read these first for topic-level overviews.");
1651
+ lines.push("");
1652
+ if (opts.summaryTruncated) {
1653
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older summaries reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
1654
+ lines.push("");
1655
+ }
1656
+ lines.push("| Session | Created | Last Updated | Project | Description |");
1657
+ lines.push("|---------|---------|--------------|---------|-------------|");
1078
1658
  for (const row of summaryRows) {
1079
- const path = row["path"];
1659
+ const p = row["path"] || "";
1660
+ const match = p.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/);
1661
+ if (!match)
1662
+ continue;
1663
+ const summaryUser = match[1];
1664
+ const sessionId = match[2];
1665
+ const relPath = `summaries/${summaryUser}/${sessionId}.md`;
1080
1666
  const project = row["project"] || "";
1081
- const description = (row["description"] || "").slice(0, 120);
1082
- const date = (row["creation_date"] || "").slice(0, 10);
1083
- lines.push(`- [${path}](${path}) ${date} ${project ? `[${project}]` : ""} ${description}`);
1667
+ const description = row["description"] || "";
1668
+ const creationDate = row["creation_date"] || "";
1669
+ const lastUpdateDate = row["last_update_date"] || "";
1670
+ lines.push(`| [${sessionId}](${relPath}) | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`);
1084
1671
  }
1085
- lines.push("");
1086
1672
  }
1087
- if (sessionRows.length > 0) {
1088
- lines.push("## Sessions", "");
1673
+ lines.push("");
1674
+ lines.push("## sessions", "");
1675
+ if (sessionRows.length === 0) {
1676
+ lines.push("_(empty \u2014 no session records ingested yet)_");
1677
+ } else {
1678
+ lines.push("Raw session records (dialogue, tool calls). Read for exact detail / quotes.");
1679
+ lines.push("");
1680
+ if (opts.sessionTruncated) {
1681
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older sessions reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
1682
+ lines.push("");
1683
+ }
1684
+ lines.push("| Session | Created | Last Updated | Description |");
1685
+ lines.push("|---------|---------|--------------|-------------|");
1089
1686
  for (const row of sessionRows) {
1090
- const path = row["path"];
1091
- const description = (row["description"] || "").slice(0, 120);
1092
- lines.push(`- [${path}](${path}) ${description}`);
1687
+ const p = row["path"] || "";
1688
+ const rel = p.startsWith("/") ? p.slice(1) : p;
1689
+ const filename = p.split("/").pop() ?? p;
1690
+ const description = row["description"] || "";
1691
+ const creationDate = row["creation_date"] || "";
1692
+ const lastUpdateDate = row["last_update_date"] || "";
1693
+ lines.push(`| [${filename}](${rel}) | ${creationDate} | ${lastUpdateDate} | ${description} |`);
1093
1694
  }
1094
1695
  }
1696
+ lines.push("");
1095
1697
  return lines.join("\n");
1096
1698
  }
1097
1699
  function buildUnionQuery(memoryQuery, sessionsQuery) {
@@ -1153,11 +1755,14 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP
1153
1755
  }
1154
1756
  }
1155
1757
  if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) {
1758
+ const fetchLimit = INDEX_LIMIT_PER_SECTION + 1;
1156
1759
  const [summaryRows, sessionRows] = await Promise.all([
1157
- api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`).catch(() => []),
1158
- api.query(`SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY path`).catch(() => [])
1760
+ api.query(`SELECT path, project, description, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC LIMIT ${fetchLimit}`).catch(() => []),
1761
+ api.query(`SELECT path, MAX(description) AS description, MIN(creation_date) AS creation_date, MAX(last_update_date) AS last_update_date FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' GROUP BY path ORDER BY MAX(last_update_date) DESC LIMIT ${fetchLimit}`).catch(() => [])
1159
1762
  ]);
1160
- result.set("/index.md", buildVirtualIndexContent(summaryRows, sessionRows));
1763
+ const summaryTruncated = summaryRows.length > INDEX_LIMIT_PER_SECTION;
1764
+ const sessionTruncated = sessionRows.length > INDEX_LIMIT_PER_SECTION;
1765
+ result.set("/index.md", buildVirtualIndexContent(summaryRows.slice(0, INDEX_LIMIT_PER_SECTION), sessionRows.slice(0, INDEX_LIMIT_PER_SECTION), { summaryTruncated, sessionTruncated }));
1161
1766
  }
1162
1767
  return result;
1163
1768
  }
@@ -1677,20 +2282,20 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd,
1677
2282
  }
1678
2283
 
1679
2284
  // dist/src/hooks/query-cache.js
1680
- import { mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
1681
- import { join as join4 } from "node:path";
1682
- import { homedir as homedir3 } from "node:os";
1683
- var log3 = (msg) => log("query-cache", msg);
1684
- var DEFAULT_CACHE_ROOT = join4(homedir3(), ".deeplake", "query-cache");
2285
+ import { mkdirSync as mkdirSync2, readFileSync as readFileSync4, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
2286
+ import { join as join7 } from "node:path";
2287
+ import { homedir as homedir5 } from "node:os";
2288
+ var log4 = (msg) => log("query-cache", msg);
2289
+ var DEFAULT_CACHE_ROOT = join7(homedir5(), ".deeplake", "query-cache");
1685
2290
  var INDEX_CACHE_FILE = "index.md";
1686
2291
  function getSessionQueryCacheDir(sessionId, deps = {}) {
1687
2292
  const { cacheRoot = DEFAULT_CACHE_ROOT } = deps;
1688
- return join4(cacheRoot, sessionId);
2293
+ return join7(cacheRoot, sessionId);
1689
2294
  }
1690
2295
  function readCachedIndexContent(sessionId, deps = {}) {
1691
- const { logFn = log3 } = deps;
2296
+ const { logFn = log4 } = deps;
1692
2297
  try {
1693
- return readFileSync3(join4(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8");
2298
+ return readFileSync4(join7(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8");
1694
2299
  } catch (e) {
1695
2300
  if (e?.code === "ENOENT")
1696
2301
  return null;
@@ -1699,11 +2304,11 @@ function readCachedIndexContent(sessionId, deps = {}) {
1699
2304
  }
1700
2305
  }
1701
2306
  function writeCachedIndexContent(sessionId, content, deps = {}) {
1702
- const { logFn = log3 } = deps;
2307
+ const { logFn = log4 } = deps;
1703
2308
  try {
1704
2309
  const dir = getSessionQueryCacheDir(sessionId, deps);
1705
2310
  mkdirSync2(dir, { recursive: true });
1706
- writeFileSync2(join4(dir, INDEX_CACHE_FILE), content, "utf-8");
2311
+ writeFileSync2(join7(dir, INDEX_CACHE_FILE), content, "utf-8");
1707
2312
  } catch (e) {
1708
2313
  logFn(`write failed for session=${sessionId}: ${e.message}`);
1709
2314
  }
@@ -1711,22 +2316,22 @@ function writeCachedIndexContent(sessionId, content, deps = {}) {
1711
2316
 
1712
2317
  // dist/src/utils/direct-run.js
1713
2318
  import { resolve } from "node:path";
1714
- import { fileURLToPath } from "node:url";
2319
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
1715
2320
  function isDirectRun(metaUrl) {
1716
2321
  const entry = process.argv[1];
1717
2322
  if (!entry)
1718
2323
  return false;
1719
2324
  try {
1720
- return resolve(fileURLToPath(metaUrl)) === resolve(entry);
2325
+ return resolve(fileURLToPath2(metaUrl)) === resolve(entry);
1721
2326
  } catch {
1722
2327
  return false;
1723
2328
  }
1724
2329
  }
1725
2330
 
1726
2331
  // dist/src/hooks/memory-path-utils.js
1727
- import { homedir as homedir4 } from "node:os";
1728
- import { join as join5 } from "node:path";
1729
- var MEMORY_PATH = join5(homedir4(), ".deeplake", "memory");
2332
+ import { homedir as homedir6 } from "node:os";
2333
+ import { join as join8 } from "node:path";
2334
+ var MEMORY_PATH = join8(homedir6(), ".deeplake", "memory");
1730
2335
  var TILDE_PATH = "~/.deeplake/memory";
1731
2336
  var HOME_VAR_PATH = "$HOME/.deeplake/memory";
1732
2337
  var SAFE_BUILTINS = /* @__PURE__ */ new Set([
@@ -1842,13 +2447,13 @@ function rewritePaths(cmd) {
1842
2447
  }
1843
2448
 
1844
2449
  // dist/src/hooks/codex/pre-tool-use.js
1845
- var log4 = (msg) => log("codex-pre", msg);
1846
- var __bundleDir = dirname(fileURLToPath2(import.meta.url));
1847
- var SHELL_BUNDLE = existsSync3(join6(__bundleDir, "shell", "deeplake-shell.js")) ? join6(__bundleDir, "shell", "deeplake-shell.js") : join6(__bundleDir, "..", "shell", "deeplake-shell.js");
2450
+ var log5 = (msg) => log("codex-pre", msg);
2451
+ var __bundleDir = dirname2(fileURLToPath3(import.meta.url));
2452
+ var SHELL_BUNDLE = existsSync4(join9(__bundleDir, "shell", "deeplake-shell.js")) ? join9(__bundleDir, "shell", "deeplake-shell.js") : join9(__bundleDir, "..", "shell", "deeplake-shell.js");
1848
2453
  function buildUnsupportedGuidance() {
1849
2454
  return "This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry.";
1850
2455
  }
1851
- function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log4) {
2456
+ function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log5) {
1852
2457
  try {
1853
2458
  return execFileSync("node", [shellBundle, "-c", cmd], {
1854
2459
  encoding: "utf-8",
@@ -1873,7 +2478,7 @@ function buildIndexContent(rows) {
1873
2478
  return lines.join("\n");
1874
2479
  }
1875
2480
  async function processCodexPreToolUse(input, deps = {}) {
1876
- const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log4 } = deps;
2481
+ const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log5 } = deps;
1877
2482
  const cmd = input.tool_input?.command ?? "";
1878
2483
  logFn(`hook fired: cmd=${cmd}`);
1879
2484
  if (!touchesMemory(cmd))
@@ -2083,7 +2688,7 @@ async function main() {
2083
2688
  }
2084
2689
  if (isDirectRun(import.meta.url)) {
2085
2690
  main().catch((e) => {
2086
- log4(`fatal: ${e.message}`);
2691
+ log5(`fatal: ${e.message}`);
2087
2692
  process.exit(0);
2088
2693
  });
2089
2694
  }