@deeplake/hivemind 0.6.47 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +158 -51
  4. package/bundle/cli.js +4103 -282
  5. package/codex/bundle/capture.js +510 -90
  6. package/codex/bundle/commands/auth-login.js +219 -72
  7. package/codex/bundle/embeddings/embed-daemon.js +243 -0
  8. package/codex/bundle/pre-tool-use.js +713 -108
  9. package/codex/bundle/session-start-setup.js +209 -58
  10. package/codex/bundle/session-start.js +40 -11
  11. package/codex/bundle/shell/deeplake-shell.js +679 -112
  12. package/codex/bundle/stop.js +477 -59
  13. package/codex/bundle/wiki-worker.js +312 -11
  14. package/cursor/bundle/capture.js +768 -57
  15. package/cursor/bundle/commands/auth-login.js +219 -72
  16. package/cursor/bundle/embeddings/embed-daemon.js +243 -0
  17. package/cursor/bundle/pre-tool-use.js +1684 -0
  18. package/cursor/bundle/session-end.js +223 -2
  19. package/cursor/bundle/session-start.js +209 -57
  20. package/cursor/bundle/shell/deeplake-shell.js +679 -112
  21. package/cursor/bundle/wiki-worker.js +571 -0
  22. package/hermes/bundle/capture.js +1194 -0
  23. package/hermes/bundle/commands/auth-login.js +1009 -0
  24. package/hermes/bundle/embeddings/embed-daemon.js +243 -0
  25. package/hermes/bundle/package.json +1 -0
  26. package/hermes/bundle/pre-tool-use.js +1681 -0
  27. package/hermes/bundle/session-end.js +265 -0
  28. package/hermes/bundle/session-start.js +655 -0
  29. package/hermes/bundle/shell/deeplake-shell.js +69905 -0
  30. package/hermes/bundle/wiki-worker.js +572 -0
  31. package/mcp/bundle/server.js +289 -69
  32. package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
  33. package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
  34. package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
  35. package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
  36. package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
  37. package/openclaw/dist/index.js +752 -702
  38. package/openclaw/openclaw.plugin.json +1 -1
  39. package/openclaw/package.json +1 -1
  40. package/package.json +7 -3
  41. package/pi/extension-source/hivemind.ts +807 -0
@@ -46081,14 +46081,14 @@ var require_turndown_cjs = __commonJS({
46081
46081
  } else if (node.nodeType === 1) {
46082
46082
  replacement = replacementForNode.call(self2, node);
46083
46083
  }
46084
- return join7(output, replacement);
46084
+ return join11(output, replacement);
46085
46085
  }, "");
46086
46086
  }
46087
46087
  function postProcess(output) {
46088
46088
  var self2 = this;
46089
46089
  this.rules.forEach(function(rule) {
46090
46090
  if (typeof rule.append === "function") {
46091
- output = join7(output, rule.append(self2.options));
46091
+ output = join11(output, rule.append(self2.options));
46092
46092
  }
46093
46093
  });
46094
46094
  return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, "");
@@ -46100,7 +46100,7 @@ var require_turndown_cjs = __commonJS({
46100
46100
  if (whitespace.leading || whitespace.trailing) content = content.trim();
46101
46101
  return whitespace.leading + rule.replacement(content, node, this.options) + whitespace.trailing;
46102
46102
  }
46103
- function join7(output, replacement) {
46103
+ function join11(output, replacement) {
46104
46104
  var s12 = trimTrailingNewlines(output);
46105
46105
  var s22 = trimLeadingNewlines(replacement);
46106
46106
  var nls = Math.max(output.length - s12.length, replacement.length - s22.length);
@@ -59933,6 +59933,49 @@ var init_expansion_ENLSRCXJ = __esm({
59933
59933
  }
59934
59934
  });
59935
59935
 
59936
+ // dist/src/index-marker-store.js
59937
+ var index_marker_store_exports = {};
59938
+ __export(index_marker_store_exports, {
59939
+ buildIndexMarkerPath: () => buildIndexMarkerPath,
59940
+ getIndexMarkerDir: () => getIndexMarkerDir,
59941
+ hasFreshIndexMarker: () => hasFreshIndexMarker,
59942
+ writeIndexMarker: () => writeIndexMarker
59943
+ });
59944
+ import { existsSync as existsSync3, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
59945
+ import { join as join6 } from "node:path";
59946
+ import { tmpdir } from "node:os";
59947
+ function getIndexMarkerDir() {
59948
+ return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join6(tmpdir(), "hivemind-deeplake-indexes");
59949
+ }
59950
+ function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
59951
+ const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
59952
+ return join6(getIndexMarkerDir(), `${markerKey}.json`);
59953
+ }
59954
+ function hasFreshIndexMarker(markerPath) {
59955
+ if (!existsSync3(markerPath))
59956
+ return false;
59957
+ try {
59958
+ const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
59959
+ const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
59960
+ if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
59961
+ return false;
59962
+ return true;
59963
+ } catch {
59964
+ return false;
59965
+ }
59966
+ }
59967
+ function writeIndexMarker(markerPath) {
59968
+ mkdirSync(getIndexMarkerDir(), { recursive: true });
59969
+ writeFileSync(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
59970
+ }
59971
+ var INDEX_MARKER_TTL_MS;
59972
+ var init_index_marker_store = __esm({
59973
+ "dist/src/index-marker-store.js"() {
59974
+ "use strict";
59975
+ INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
59976
+ }
59977
+ });
59978
+
59936
59979
  // dist/src/shell/deeplake-shell.js
59937
59980
  import { createInterface } from "node:readline";
59938
59981
 
@@ -66754,9 +66797,6 @@ function loadConfig() {
66754
66797
 
66755
66798
  // dist/src/deeplake-api.js
66756
66799
  import { randomUUID } from "node:crypto";
66757
- import { existsSync as existsSync3, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
66758
- import { join as join6 } from "node:path";
66759
- import { tmpdir } from "node:os";
66760
66800
 
66761
66801
  // dist/src/utils/debug.js
66762
66802
  import { appendFileSync } from "node:fs";
@@ -66779,7 +66819,26 @@ function sqlLike(value) {
66779
66819
  return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_");
66780
66820
  }
66781
66821
 
66822
+ // dist/src/embeddings/columns.js
66823
+ var SUMMARY_EMBEDDING_COL = "summary_embedding";
66824
+ var MESSAGE_EMBEDDING_COL = "message_embedding";
66825
+
66826
+ // dist/src/utils/client-header.js
66827
+ var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
66828
+ function deeplakeClientValue() {
66829
+ return "hivemind";
66830
+ }
66831
+ function deeplakeClientHeader() {
66832
+ return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
66833
+ }
66834
+
66782
66835
  // dist/src/deeplake-api.js
66836
+ var indexMarkerStorePromise = null;
66837
+ function getIndexMarkerStore() {
66838
+ if (!indexMarkerStorePromise)
66839
+ indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
66840
+ return indexMarkerStorePromise;
66841
+ }
66783
66842
  var log2 = (msg) => log("sdk", msg);
66784
66843
  function summarizeSql(sql, maxLen = 220) {
66785
66844
  const compact = sql.replace(/\s+/g, " ").trim();
@@ -66799,7 +66858,6 @@ var MAX_RETRIES = 3;
66799
66858
  var BASE_DELAY_MS = 500;
66800
66859
  var MAX_CONCURRENCY = 5;
66801
66860
  var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
66802
- var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
66803
66861
  function sleep(ms3) {
66804
66862
  return new Promise((resolve5) => setTimeout(resolve5, ms3));
66805
66863
  }
@@ -66819,9 +66877,6 @@ function isTransientHtml403(text) {
66819
66877
  const body = text.toLowerCase();
66820
66878
  return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
66821
66879
  }
66822
- function getIndexMarkerDir() {
66823
- return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join6(tmpdir(), "hivemind-deeplake-indexes");
66824
- }
66825
66880
  var Semaphore = class {
66826
66881
  max;
66827
66882
  waiting = [];
@@ -66890,7 +66945,8 @@ var DeeplakeApi = class {
66890
66945
  headers: {
66891
66946
  Authorization: `Bearer ${this.token}`,
66892
66947
  "Content-Type": "application/json",
66893
- "X-Activeloop-Org-Id": this.orgId
66948
+ "X-Activeloop-Org-Id": this.orgId,
66949
+ ...deeplakeClientHeader()
66894
66950
  },
66895
66951
  signal,
66896
66952
  body: JSON.stringify({ query: sql })
@@ -66917,7 +66973,8 @@ var DeeplakeApi = class {
66917
66973
  }
66918
66974
  const text = await resp.text().catch(() => "");
66919
66975
  const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
66920
- if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
66976
+ const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
66977
+ if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
66921
66978
  const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
66922
66979
  log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
66923
66980
  await sleep(delay);
@@ -66951,7 +67008,7 @@ var DeeplakeApi = class {
66951
67008
  const lud = row.lastUpdateDate ?? ts3;
66952
67009
  const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
66953
67010
  if (exists.length > 0) {
66954
- let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
67011
+ let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
66955
67012
  if (row.project !== void 0)
66956
67013
  setClauses += `, project = '${sqlStr(row.project)}'`;
66957
67014
  if (row.description !== void 0)
@@ -66959,8 +67016,8 @@ var DeeplakeApi = class {
66959
67016
  await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
66960
67017
  } else {
66961
67018
  const id = randomUUID();
66962
- let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date";
66963
- let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
67019
+ let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
67020
+ let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
66964
67021
  if (row.project !== void 0) {
66965
67022
  cols += ", project";
66966
67023
  vals += `, '${sqlStr(row.project)}'`;
@@ -66985,48 +67042,83 @@ var DeeplakeApi = class {
66985
67042
  buildLookupIndexName(table, suffix) {
66986
67043
  return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
66987
67044
  }
66988
- getLookupIndexMarkerPath(table, suffix) {
66989
- const markerKey = [
66990
- this.workspaceId,
66991
- this.orgId,
66992
- table,
66993
- suffix
66994
- ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
66995
- return join6(getIndexMarkerDir(), `${markerKey}.json`);
66996
- }
66997
- hasFreshLookupIndexMarker(table, suffix) {
66998
- const markerPath = this.getLookupIndexMarkerPath(table, suffix);
66999
- if (!existsSync3(markerPath))
67000
- return false;
67001
- try {
67002
- const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
67003
- const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
67004
- if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
67005
- return false;
67006
- return true;
67007
- } catch {
67008
- return false;
67009
- }
67010
- }
67011
- markLookupIndexReady(table, suffix) {
67012
- mkdirSync(getIndexMarkerDir(), { recursive: true });
67013
- writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
67014
- }
67015
67045
  async ensureLookupIndex(table, suffix, columnsSql) {
67016
- if (this.hasFreshLookupIndexMarker(table, suffix))
67046
+ const markers = await getIndexMarkerStore();
67047
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
67048
+ if (markers.hasFreshIndexMarker(markerPath))
67017
67049
  return;
67018
67050
  const indexName = this.buildLookupIndexName(table, suffix);
67019
67051
  try {
67020
67052
  await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
67021
- this.markLookupIndexReady(table, suffix);
67053
+ markers.writeIndexMarker(markerPath);
67022
67054
  } catch (e6) {
67023
67055
  if (isDuplicateIndexError(e6)) {
67024
- this.markLookupIndexReady(table, suffix);
67056
+ markers.writeIndexMarker(markerPath);
67025
67057
  return;
67026
67058
  }
67027
67059
  log2(`index "${indexName}" skipped: ${e6.message}`);
67028
67060
  }
67029
67061
  }
67062
+ /**
67063
+ * Ensure a vector column exists on the given table.
67064
+ *
67065
+ * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
67066
+ * EXISTS …` on every SessionStart. On a long-running workspace that's
67067
+ * already migrated, every call returns 500 "Column already exists" — noisy
67068
+ * in the log and a wasted round-trip. Worse, the very first call after the
67069
+ * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
67070
+ * window (~30s) during which subsequent INSERTs fail; minimising the
67071
+ * number of ALTER calls minimises exposure to that window.
67072
+ *
67073
+ * New flow:
67074
+ * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
67075
+ * return — zero network calls.
67076
+ * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
67077
+ * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
67078
+ * bug. If the column is present → mark + return.
67079
+ * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
67080
+ * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
67081
+ * "already exists" (race: another client added it between our SELECT
67082
+ * and ALTER).
67083
+ *
67084
+ * Marker uses the same dir / TTL as ensureLookupIndex so both schema
67085
+ * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
67086
+ */
67087
+ async ensureEmbeddingColumn(table, column) {
67088
+ await this.ensureColumn(table, column, "FLOAT4[]");
67089
+ }
67090
+ /**
67091
+ * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
67092
+ * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
67093
+ * column that was added to the schema after the table was originally
67094
+ * created. Used today for `summary_embedding`, `message_embedding`, and
67095
+ * the `agent` column (added 2026-04-11) — the latter has no fallback if
67096
+ * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
67097
+ * with `column "agent" does not exist`.
67098
+ */
67099
+ async ensureColumn(table, column, sqlType) {
67100
+ const markers = await getIndexMarkerStore();
67101
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
67102
+ if (markers.hasFreshIndexMarker(markerPath))
67103
+ return;
67104
+ const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
67105
+ const rows = await this.query(colCheck);
67106
+ if (rows.length > 0) {
67107
+ markers.writeIndexMarker(markerPath);
67108
+ return;
67109
+ }
67110
+ try {
67111
+ await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
67112
+ } catch (e6) {
67113
+ const msg = e6 instanceof Error ? e6.message : String(e6);
67114
+ if (!/already exists/i.test(msg))
67115
+ throw e6;
67116
+ const recheck = await this.query(colCheck);
67117
+ if (recheck.length === 0)
67118
+ throw e6;
67119
+ }
67120
+ markers.writeIndexMarker(markerPath);
67121
+ }
67030
67122
  /** List all tables in the workspace (with retry). */
67031
67123
  async listTables(forceRefresh = false) {
67032
67124
  if (!forceRefresh && this._tablesCache)
@@ -67042,7 +67134,8 @@ var DeeplakeApi = class {
67042
67134
  const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
67043
67135
  headers: {
67044
67136
  Authorization: `Bearer ${this.token}`,
67045
- "X-Activeloop-Org-Id": this.orgId
67137
+ "X-Activeloop-Org-Id": this.orgId,
67138
+ ...deeplakeClientHeader()
67046
67139
  }
67047
67140
  });
67048
67141
  if (resp.ok) {
@@ -67067,28 +67160,60 @@ var DeeplakeApi = class {
67067
67160
  }
67068
67161
  return { tables: [], cacheable: false };
67069
67162
  }
67163
+ /**
67164
+ * Run a `CREATE TABLE` with an extra outer retry budget. The base
67165
+ * `query()` already retries 3 times on fetch errors (~3.5s total), but a
67166
+ * failed CREATE is permanent corruption — every subsequent SELECT against
67167
+ * the missing table fails. Wrapping in an outer loop with longer backoff
67168
+ * (2s, 5s, then 10s) gives us ~17s of reach across transient network
67169
+ * blips before giving up. Failures still propagate; getApi() resets its
67170
+ * cache on init failure (openclaw plugin) so the next call retries the
67171
+ * whole init flow.
67172
+ */
67173
+ async createTableWithRetry(sql, label) {
67174
+ const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
67175
+ let lastErr = null;
67176
+ for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
67177
+ try {
67178
+ await this.query(sql);
67179
+ return;
67180
+ } catch (err) {
67181
+ lastErr = err;
67182
+ const msg = err instanceof Error ? err.message : String(err);
67183
+ log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
67184
+ if (attempt < OUTER_BACKOFFS_MS.length) {
67185
+ await sleep(OUTER_BACKOFFS_MS[attempt]);
67186
+ }
67187
+ }
67188
+ }
67189
+ throw lastErr;
67190
+ }
67070
67191
  /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
67071
67192
  async ensureTable(name) {
67072
67193
  const tbl = name ?? this.tableName;
67073
67194
  const tables = await this.listTables();
67074
67195
  if (!tables.includes(tbl)) {
67075
67196
  log2(`table "${tbl}" not found, creating`);
67076
- await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
67197
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
67077
67198
  log2(`table "${tbl}" created`);
67078
67199
  if (!tables.includes(tbl))
67079
67200
  this._tablesCache = [...tables, tbl];
67080
67201
  }
67202
+ await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
67203
+ await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
67081
67204
  }
67082
67205
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
67083
67206
  async ensureSessionsTable(name) {
67084
67207
  const tables = await this.listTables();
67085
67208
  if (!tables.includes(name)) {
67086
67209
  log2(`table "${name}" not found, creating`);
67087
- await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
67210
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, name);
67088
67211
  log2(`table "${name}" created`);
67089
67212
  if (!tables.includes(name))
67090
67213
  this._tablesCache = [...tables, name];
67091
67214
  }
67215
+ await this.ensureEmbeddingColumn(name, MESSAGE_EMBEDDING_COL);
67216
+ await this.ensureColumn(name, "agent", "TEXT NOT NULL DEFAULT ''");
67092
67217
  await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
67093
67218
  }
67094
67219
  };
@@ -67096,6 +67221,8 @@ var DeeplakeApi = class {
67096
67221
  // dist/src/shell/deeplake-fs.js
67097
67222
  import { basename as basename4, posix } from "node:path";
67098
67223
  import { randomUUID as randomUUID2 } from "node:crypto";
67224
+ import { fileURLToPath } from "node:url";
67225
+ import { dirname as dirname4, join as join9 } from "node:path";
67099
67226
 
67100
67227
  // dist/src/shell/grep-core.js
67101
67228
  var TOOL_INPUT_FIELDS = [
@@ -67261,24 +67388,25 @@ function normalizeContent(path2, raw) {
67261
67388
  return raw;
67262
67389
  }
67263
67390
  if (Array.isArray(obj.turns)) {
67264
- const header = [];
67265
- if (obj.date_time)
67266
- header.push(`date: ${obj.date_time}`);
67267
- if (obj.speakers) {
67268
- const s10 = obj.speakers;
67269
- const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", ");
67270
- if (names)
67271
- header.push(`speakers: ${names}`);
67272
- }
67391
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
67273
67392
  const lines = obj.turns.map((t6) => {
67274
67393
  const sp = String(t6?.speaker ?? t6?.name ?? "?").trim();
67275
67394
  const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim();
67276
67395
  const tag = t6?.dia_id ? `[${t6.dia_id}] ` : "";
67277
- return `${tag}${sp}: ${tx}`;
67396
+ return `${dateHeader}${tag}${sp}: ${tx}`;
67278
67397
  });
67279
- const out2 = [...header, ...lines].join("\n");
67398
+ const out2 = lines.join("\n");
67280
67399
  return out2.trim() ? out2 : raw;
67281
67400
  }
67401
+ if (obj.turn && typeof obj.turn === "object" && !Array.isArray(obj.turn)) {
67402
+ const t6 = obj.turn;
67403
+ const sp = String(t6.speaker ?? t6.name ?? "?").trim();
67404
+ const tx = String(t6.text ?? t6.content ?? "").replace(/\s+/g, " ").trim();
67405
+ const tag = t6.dia_id ? `[${String(t6.dia_id)}] ` : "";
67406
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
67407
+ const line = `${dateHeader}${tag}${sp}: ${tx}`;
67408
+ return line.trim() ? line : raw;
67409
+ }
67282
67410
  const stripRecalled = (t6) => {
67283
67411
  const i11 = t6.indexOf("<recalled-memories>");
67284
67412
  if (i11 === -1)
@@ -67321,8 +67449,38 @@ function buildPathCondition(targetPath) {
67321
67449
  return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
67322
67450
  }
67323
67451
  async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
67324
- const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
67452
+ const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, queryEmbedding, multiWordPatterns } = opts;
67325
67453
  const limit = opts.limit ?? 100;
67454
+ if (queryEmbedding && queryEmbedding.length > 0) {
67455
+ const vecLit = serializeFloat4Array(queryEmbedding);
67456
+ const semanticLimit = Math.min(limit, Number(process.env.HIVEMIND_SEMANTIC_LIMIT ?? "20"));
67457
+ const lexicalLimit = Math.min(limit, Number(process.env.HIVEMIND_HYBRID_LEXICAL_LIMIT ?? "20"));
67458
+ const filterPatternsForLex = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
67459
+ const memLexFilter = buildContentFilter("summary::text", likeOp, filterPatternsForLex);
67460
+ const sessLexFilter = buildContentFilter("message::text", likeOp, filterPatternsForLex);
67461
+ const memLexQuery = memLexFilter ? `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, 1.0 AS score FROM "${memoryTable}" WHERE 1=1${pathFilter}${memLexFilter} LIMIT ${lexicalLimit}` : null;
67462
+ const sessLexQuery = sessLexFilter ? `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, 1.0 AS score FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessLexFilter} LIMIT ${lexicalLimit}` : null;
67463
+ const memSemQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, (summary_embedding <#> ${vecLit}) AS score FROM "${memoryTable}" WHERE ARRAY_LENGTH(summary_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
67464
+ const sessSemQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, (message_embedding <#> ${vecLit}) AS score FROM "${sessionsTable}" WHERE ARRAY_LENGTH(message_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
67465
+ const parts = [memSemQuery, sessSemQuery];
67466
+ if (memLexQuery)
67467
+ parts.push(memLexQuery);
67468
+ if (sessLexQuery)
67469
+ parts.push(sessLexQuery);
67470
+ const unionSql = parts.map((q17) => `(${q17})`).join(" UNION ALL ");
67471
+ const outerLimit = semanticLimit + lexicalLimit;
67472
+ const rows2 = await api.query(`SELECT path, content, source_order, creation_date, score FROM (` + unionSql + `) AS combined ORDER BY score DESC LIMIT ${outerLimit}`);
67473
+ const seen = /* @__PURE__ */ new Set();
67474
+ const unique = [];
67475
+ for (const row of rows2) {
67476
+ const p22 = String(row["path"]);
67477
+ if (seen.has(p22))
67478
+ continue;
67479
+ seen.add(p22);
67480
+ unique.push({ path: p22, content: String(row["content"] ?? "") });
67481
+ }
67482
+ return unique;
67483
+ }
67326
67484
  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
67327
67485
  const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
67328
67486
  const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
@@ -67334,6 +67492,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
67334
67492
  content: String(row["content"] ?? "")
67335
67493
  }));
67336
67494
  }
67495
+ function serializeFloat4Array(vec) {
67496
+ const parts = [];
67497
+ for (const v27 of vec) {
67498
+ if (!Number.isFinite(v27))
67499
+ return "NULL";
67500
+ parts.push(String(v27));
67501
+ }
67502
+ return `ARRAY[${parts.join(",")}]::float4[]`;
67503
+ }
67337
67504
  function buildPathFilter(targetPath) {
67338
67505
  const condition = buildPathCondition(targetPath);
67339
67506
  return condition ? ` AND ${condition}` : "";
@@ -67426,7 +67593,7 @@ function buildGrepSearchOptions(params, targetPath) {
67426
67593
  return {
67427
67594
  pathFilter: buildPathFilter(targetPath),
67428
67595
  contentScanOnly: hasRegexMeta,
67429
- likeOp: params.ignoreCase ? "ILIKE" : "LIKE",
67596
+ likeOp: process.env.HIVEMIND_GREP_LIKE === "case-sensitive" ? "LIKE" : "ILIKE",
67430
67597
  escapedPattern: sqlLike(params.pattern),
67431
67598
  prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
67432
67599
  prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
@@ -67482,6 +67649,346 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) {
67482
67649
  return output;
67483
67650
  }
67484
67651
 
67652
+ // dist/src/embeddings/client.js
67653
+ import { connect } from "node:net";
67654
+ import { spawn } from "node:child_process";
67655
+ import { openSync, closeSync, writeSync, unlinkSync, existsSync as existsSync4, readFileSync as readFileSync3 } from "node:fs";
67656
+ import { homedir as homedir3 } from "node:os";
67657
+ import { join as join7 } from "node:path";
67658
+
67659
+ // dist/src/embeddings/protocol.js
67660
+ var DEFAULT_SOCKET_DIR = "/tmp";
67661
+ var DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1e3;
67662
+ var DEFAULT_CLIENT_TIMEOUT_MS = 2e3;
67663
+ function socketPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
67664
+ return `${dir}/hivemind-embed-${uid}.sock`;
67665
+ }
67666
+ function pidPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
67667
+ return `${dir}/hivemind-embed-${uid}.pid`;
67668
+ }
67669
+
67670
+ // dist/src/embeddings/client.js
67671
+ var SHARED_DAEMON_PATH = join7(homedir3(), ".hivemind", "embed-deps", "embed-daemon.js");
67672
+ var log3 = (m26) => log("embed-client", m26);
67673
+ function getUid() {
67674
+ const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
67675
+ return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
67676
+ }
67677
+ var EmbedClient = class {
67678
+ socketPath;
67679
+ pidPath;
67680
+ timeoutMs;
67681
+ daemonEntry;
67682
+ autoSpawn;
67683
+ spawnWaitMs;
67684
+ nextId = 0;
67685
+ constructor(opts = {}) {
67686
+ const uid = getUid();
67687
+ const dir = opts.socketDir ?? "/tmp";
67688
+ this.socketPath = socketPathFor(uid, dir);
67689
+ this.pidPath = pidPathFor(uid, dir);
67690
+ this.timeoutMs = opts.timeoutMs ?? DEFAULT_CLIENT_TIMEOUT_MS;
67691
+ this.daemonEntry = opts.daemonEntry ?? process.env.HIVEMIND_EMBED_DAEMON ?? (existsSync4(SHARED_DAEMON_PATH) ? SHARED_DAEMON_PATH : void 0);
67692
+ this.autoSpawn = opts.autoSpawn ?? true;
67693
+ this.spawnWaitMs = opts.spawnWaitMs ?? 5e3;
67694
+ }
67695
+ /**
67696
+ * Returns an embedding vector, or null on timeout/failure. Hooks MUST treat
67697
+ * null as "skip embedding column" — never block the write path on us.
67698
+ *
67699
+ * Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
67700
+ * null AND kicks off a background spawn. The next call finds a ready daemon.
67701
+ */
67702
+ async embed(text, kind = "document") {
67703
+ let sock;
67704
+ try {
67705
+ sock = await this.connectOnce();
67706
+ } catch {
67707
+ if (this.autoSpawn)
67708
+ this.trySpawnDaemon();
67709
+ return null;
67710
+ }
67711
+ try {
67712
+ const id = String(++this.nextId);
67713
+ const req = { op: "embed", id, kind, text };
67714
+ const resp = await this.sendAndWait(sock, req);
67715
+ if (resp.error || !("embedding" in resp) || !resp.embedding) {
67716
+ log3(`embed err: ${resp.error ?? "no embedding"}`);
67717
+ return null;
67718
+ }
67719
+ return resp.embedding;
67720
+ } catch (e6) {
67721
+ const err = e6 instanceof Error ? e6.message : String(e6);
67722
+ log3(`embed failed: ${err}`);
67723
+ return null;
67724
+ } finally {
67725
+ try {
67726
+ sock.end();
67727
+ } catch {
67728
+ }
67729
+ }
67730
+ }
67731
+ /**
67732
+ * Wait up to spawnWaitMs for the daemon to accept connections, spawning if
67733
+ * necessary. Meant for SessionStart / long-running batches — not the hot path.
67734
+ */
67735
+ async warmup() {
67736
+ try {
67737
+ const s10 = await this.connectOnce();
67738
+ s10.end();
67739
+ return true;
67740
+ } catch {
67741
+ if (!this.autoSpawn)
67742
+ return false;
67743
+ this.trySpawnDaemon();
67744
+ try {
67745
+ const s10 = await this.waitForSocket();
67746
+ s10.end();
67747
+ return true;
67748
+ } catch {
67749
+ return false;
67750
+ }
67751
+ }
67752
+ }
67753
+ connectOnce() {
67754
+ return new Promise((resolve5, reject) => {
67755
+ const sock = connect(this.socketPath);
67756
+ const to3 = setTimeout(() => {
67757
+ sock.destroy();
67758
+ reject(new Error("connect timeout"));
67759
+ }, this.timeoutMs);
67760
+ sock.once("connect", () => {
67761
+ clearTimeout(to3);
67762
+ resolve5(sock);
67763
+ });
67764
+ sock.once("error", (e6) => {
67765
+ clearTimeout(to3);
67766
+ reject(e6);
67767
+ });
67768
+ });
67769
+ }
67770
+ trySpawnDaemon() {
67771
+ let fd;
67772
+ try {
67773
+ fd = openSync(this.pidPath, "wx", 384);
67774
+ writeSync(fd, String(process.pid));
67775
+ } catch (e6) {
67776
+ if (this.isPidFileStale()) {
67777
+ try {
67778
+ unlinkSync(this.pidPath);
67779
+ } catch {
67780
+ }
67781
+ try {
67782
+ fd = openSync(this.pidPath, "wx", 384);
67783
+ writeSync(fd, String(process.pid));
67784
+ } catch {
67785
+ return;
67786
+ }
67787
+ } else {
67788
+ return;
67789
+ }
67790
+ }
67791
+ if (!this.daemonEntry || !existsSync4(this.daemonEntry)) {
67792
+ log3(`daemonEntry not configured or missing: ${this.daemonEntry}`);
67793
+ try {
67794
+ closeSync(fd);
67795
+ unlinkSync(this.pidPath);
67796
+ } catch {
67797
+ }
67798
+ return;
67799
+ }
67800
+ try {
67801
+ const child = spawn(process.execPath, [this.daemonEntry], {
67802
+ detached: true,
67803
+ stdio: "ignore",
67804
+ env: process.env
67805
+ });
67806
+ child.unref();
67807
+ log3(`spawned daemon pid=${child.pid}`);
67808
+ } finally {
67809
+ closeSync(fd);
67810
+ }
67811
+ }
67812
+ isPidFileStale() {
67813
+ try {
67814
+ const raw = readFileSync3(this.pidPath, "utf-8").trim();
67815
+ const pid = Number(raw);
67816
+ if (!pid || Number.isNaN(pid))
67817
+ return true;
67818
+ try {
67819
+ process.kill(pid, 0);
67820
+ return false;
67821
+ } catch {
67822
+ return true;
67823
+ }
67824
+ } catch {
67825
+ return true;
67826
+ }
67827
+ }
67828
+ async waitForSocket() {
67829
+ const deadline = Date.now() + this.spawnWaitMs;
67830
+ let delay = 30;
67831
+ while (Date.now() < deadline) {
67832
+ await sleep2(delay);
67833
+ delay = Math.min(delay * 1.5, 300);
67834
+ if (!existsSync4(this.socketPath))
67835
+ continue;
67836
+ try {
67837
+ return await this.connectOnce();
67838
+ } catch {
67839
+ }
67840
+ }
67841
+ throw new Error("daemon did not become ready within spawnWaitMs");
67842
+ }
67843
+ sendAndWait(sock, req) {
67844
+ return new Promise((resolve5, reject) => {
67845
+ let buf = "";
67846
+ const to3 = setTimeout(() => {
67847
+ sock.destroy();
67848
+ reject(new Error("request timeout"));
67849
+ }, this.timeoutMs);
67850
+ sock.setEncoding("utf-8");
67851
+ sock.on("data", (chunk) => {
67852
+ buf += chunk;
67853
+ const nl3 = buf.indexOf("\n");
67854
+ if (nl3 === -1)
67855
+ return;
67856
+ const line = buf.slice(0, nl3);
67857
+ clearTimeout(to3);
67858
+ try {
67859
+ resolve5(JSON.parse(line));
67860
+ } catch (e6) {
67861
+ reject(e6);
67862
+ }
67863
+ });
67864
+ sock.on("error", (e6) => {
67865
+ clearTimeout(to3);
67866
+ reject(e6);
67867
+ });
67868
+ sock.on("end", () => {
67869
+ clearTimeout(to3);
67870
+ reject(new Error("connection closed without response"));
67871
+ });
67872
+ sock.write(JSON.stringify(req) + "\n");
67873
+ });
67874
+ }
67875
+ };
67876
+ function sleep2(ms3) {
67877
+ return new Promise((r10) => setTimeout(r10, ms3));
67878
+ }
67879
+
67880
+ // dist/src/embeddings/sql.js
67881
+ function embeddingSqlLiteral(vec) {
67882
+ if (!vec || vec.length === 0)
67883
+ return "NULL";
67884
+ const parts = [];
67885
+ for (const v27 of vec) {
67886
+ if (!Number.isFinite(v27))
67887
+ return "NULL";
67888
+ parts.push(String(v27));
67889
+ }
67890
+ return `ARRAY[${parts.join(",")}]::float4[]`;
67891
+ }
67892
+
67893
+ // dist/src/embeddings/disable.js
67894
+ import { createRequire } from "node:module";
67895
+ import { homedir as homedir4 } from "node:os";
67896
+ import { join as join8 } from "node:path";
67897
+ import { pathToFileURL } from "node:url";
67898
+ var cachedStatus = null;
67899
+ function defaultResolveTransformers() {
67900
+ try {
67901
+ createRequire(import.meta.url).resolve("@huggingface/transformers");
67902
+ return;
67903
+ } catch {
67904
+ }
67905
+ const sharedDir = join8(homedir4(), ".hivemind", "embed-deps");
67906
+ createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
67907
+ }
67908
+ var _resolve = defaultResolveTransformers;
67909
+ function detectStatus() {
67910
+ if (process.env.HIVEMIND_EMBEDDINGS === "false")
67911
+ return "env-disabled";
67912
+ try {
67913
+ _resolve();
67914
+ return "enabled";
67915
+ } catch {
67916
+ return "no-transformers";
67917
+ }
67918
+ }
67919
+ function embeddingsStatus() {
67920
+ if (cachedStatus !== null)
67921
+ return cachedStatus;
67922
+ cachedStatus = detectStatus();
67923
+ return cachedStatus;
67924
+ }
67925
+ function embeddingsDisabled() {
67926
+ return embeddingsStatus() !== "enabled";
67927
+ }
67928
+
67929
+ // dist/src/hooks/virtual-table-query.js
67930
+ var INDEX_LIMIT_PER_SECTION = 50;
67931
+ function buildVirtualIndexContent(summaryRows, sessionRows = [], opts = {}) {
67932
+ const lines = [
67933
+ "# Session Index",
67934
+ "",
67935
+ "Two sources are available. Consult the section relevant to the question.",
67936
+ ""
67937
+ ];
67938
+ lines.push("## memory", "");
67939
+ if (summaryRows.length === 0) {
67940
+ lines.push("_(empty \u2014 no summaries ingested yet)_");
67941
+ } else {
67942
+ lines.push("AI-generated summaries per session. Read these first for topic-level overviews.");
67943
+ lines.push("");
67944
+ if (opts.summaryTruncated) {
67945
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older summaries reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
67946
+ lines.push("");
67947
+ }
67948
+ lines.push("| Session | Created | Last Updated | Project | Description |");
67949
+ lines.push("|---------|---------|--------------|---------|-------------|");
67950
+ for (const row of summaryRows) {
67951
+ const p22 = row["path"] || "";
67952
+ const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/);
67953
+ if (!match2)
67954
+ continue;
67955
+ const summaryUser = match2[1];
67956
+ const sessionId = match2[2];
67957
+ const relPath = `summaries/${summaryUser}/${sessionId}.md`;
67958
+ const project = row["project"] || "";
67959
+ const description = row["description"] || "";
67960
+ const creationDate = row["creation_date"] || "";
67961
+ const lastUpdateDate = row["last_update_date"] || "";
67962
+ lines.push(`| [${sessionId}](${relPath}) | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`);
67963
+ }
67964
+ }
67965
+ lines.push("");
67966
+ lines.push("## sessions", "");
67967
+ if (sessionRows.length === 0) {
67968
+ lines.push("_(empty \u2014 no session records ingested yet)_");
67969
+ } else {
67970
+ lines.push("Raw session records (dialogue, tool calls). Read for exact detail / quotes.");
67971
+ lines.push("");
67972
+ if (opts.sessionTruncated) {
67973
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older sessions reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
67974
+ lines.push("");
67975
+ }
67976
+ lines.push("| Session | Created | Last Updated | Description |");
67977
+ lines.push("|---------|---------|--------------|-------------|");
67978
+ for (const row of sessionRows) {
67979
+ const p22 = row["path"] || "";
67980
+ const rel = p22.startsWith("/") ? p22.slice(1) : p22;
67981
+ const filename = p22.split("/").pop() ?? p22;
67982
+ const description = row["description"] || "";
67983
+ const creationDate = row["creation_date"] || "";
67984
+ const lastUpdateDate = row["last_update_date"] || "";
67985
+ lines.push(`| [${filename}](${rel}) | ${creationDate} | ${lastUpdateDate} | ${description} |`);
67986
+ }
67987
+ }
67988
+ lines.push("");
67989
+ return lines.join("\n");
67990
+ }
67991
+
67485
67992
  // dist/src/shell/deeplake-fs.js
67486
67993
  var BATCH_SIZE = 10;
67487
67994
  var PREFETCH_BATCH_SIZE = 50;
@@ -67510,6 +68017,9 @@ function normalizeSessionMessage(path2, message) {
67510
68017
  const raw = typeof message === "string" ? message : JSON.stringify(message);
67511
68018
  return normalizeContent(path2, raw);
67512
68019
  }
68020
+ function resolveEmbedDaemonPath() {
68021
+ return join9(dirname4(fileURLToPath(import.meta.url)), "embeddings", "embed-daemon.js");
68022
+ }
67513
68023
  function joinSessionMessages(path2, messages) {
67514
68024
  return messages.map((message) => normalizeSessionMessage(path2, message)).join("\n");
67515
68025
  }
@@ -67539,6 +68049,8 @@ var DeeplakeFs = class _DeeplakeFs {
67539
68049
  // Paths that live in the sessions table (multi-row, read by concatenation)
67540
68050
  sessionPaths = /* @__PURE__ */ new Set();
67541
68051
  sessionsTable = null;
68052
+ // Embedding client lazily created on first flush. Lives as long as the process.
68053
+ embedClient = null;
67542
68054
  constructor(client, table, mountPoint) {
67543
68055
  this.client = client;
67544
68056
  this.table = table;
@@ -67572,7 +68084,14 @@ var DeeplakeFs = class _DeeplakeFs {
67572
68084
  })();
67573
68085
  const sessionsBootstrap = sessionsTable && sessionSyncOk ? (async () => {
67574
68086
  try {
67575
- const sessionRows = await client.query(`SELECT path, SUM(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`);
68087
+ const sessionRows = await client.query(
68088
+ // NOTE: SUM(size_bytes) returns NULL on the Deeplake backend when combined
68089
+ // with GROUP BY path (confirmed against workspace `with_embedding`). MAX
68090
+ // works and — for the single-row-per-file layout — is equal to SUM. For
68091
+ // multi-row-per-turn layouts MAX under-reports total size but stays >0
68092
+ // so files don't look like empty placeholders in ls/stat.
68093
+ `SELECT path, MAX(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`
68094
+ );
67576
68095
  for (const row of sessionRows) {
67577
68096
  const p22 = row["path"];
67578
68097
  if (!fs3.files.has(p22)) {
@@ -67632,7 +68151,8 @@ var DeeplakeFs = class _DeeplakeFs {
67632
68151
  }
67633
68152
  const rows = [...this.pending.values()];
67634
68153
  this.pending.clear();
67635
- const results = await Promise.allSettled(rows.map((r10) => this.upsertRow(r10)));
68154
+ const embeddings = await this.computeEmbeddings(rows);
68155
+ const results = await Promise.allSettled(rows.map((r10, i11) => this.upsertRow(r10, embeddings[i11])));
67636
68156
  let failures = 0;
67637
68157
  for (let i11 = 0; i11 < results.length; i11++) {
67638
68158
  if (results[i11].status === "rejected") {
@@ -67646,7 +68166,17 @@ var DeeplakeFs = class _DeeplakeFs {
67646
68166
  throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`);
67647
68167
  }
67648
68168
  }
67649
- async upsertRow(r10) {
68169
+ async computeEmbeddings(rows) {
68170
+ if (rows.length === 0)
68171
+ return [];
68172
+ if (embeddingsDisabled())
68173
+ return rows.map(() => null);
68174
+ if (!this.embedClient) {
68175
+ this.embedClient = new EmbedClient({ daemonEntry: resolveEmbedDaemonPath() });
68176
+ }
68177
+ return Promise.all(rows.map((r10) => this.embedClient.embed(r10.contentText, "document")));
68178
+ }
68179
+ async upsertRow(r10, embedding) {
67650
68180
  const text = sqlStr(r10.contentText);
67651
68181
  const p22 = sqlStr(r10.path);
67652
68182
  const fname = sqlStr(r10.filename);
@@ -67654,8 +68184,9 @@ var DeeplakeFs = class _DeeplakeFs {
67654
68184
  const ts3 = (/* @__PURE__ */ new Date()).toISOString();
67655
68185
  const cd = r10.creationDate ?? ts3;
67656
68186
  const lud = r10.lastUpdateDate ?? ts3;
68187
+ const embSql = embeddingSqlLiteral(embedding);
67657
68188
  if (this.flushed.has(r10.path)) {
67658
- let setClauses = `filename = '${fname}', summary = E'${text}', mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`;
68189
+ let setClauses = `filename = '${fname}', summary = E'${text}', summary_embedding = ${embSql}, mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`;
67659
68190
  if (r10.project !== void 0)
67660
68191
  setClauses += `, project = '${sqlStr(r10.project)}'`;
67661
68192
  if (r10.description !== void 0)
@@ -67663,54 +68194,27 @@ var DeeplakeFs = class _DeeplakeFs {
67663
68194
  await this.client.query(`UPDATE "${this.table}" SET ${setClauses} WHERE path = '${p22}'`);
67664
68195
  } else {
67665
68196
  const id = randomUUID2();
67666
- const cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : "");
67667
- const vals = `'${id}', '${p22}', '${fname}', E'${text}', '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : "");
68197
+ const cols = "id, path, filename, summary, summary_embedding, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : "");
68198
+ const vals = `'${id}', '${p22}', '${fname}', E'${text}', ${embSql}, '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : "");
67668
68199
  await this.client.query(`INSERT INTO "${this.table}" (${cols}) VALUES (${vals})`);
67669
68200
  this.flushed.add(r10.path);
67670
68201
  }
67671
68202
  }
67672
68203
  // ── Virtual index.md generation ────────────────────────────────────────────
67673
68204
  async generateVirtualIndex() {
67674
- const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`);
67675
- const sessionPathsByKey = /* @__PURE__ */ new Map();
67676
- for (const sp of this.sessionPaths) {
67677
- const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/);
67678
- if (hivemind) {
67679
- sessionPathsByKey.set(hivemind[1], sp.slice(1));
67680
- } else {
67681
- const fname = sp.split("/").pop() ?? "";
67682
- const stem = fname.replace(/\.[^.]+$/, "");
67683
- if (stem)
67684
- sessionPathsByKey.set(stem, sp.slice(1));
67685
- }
67686
- }
67687
- const lines = [
67688
- "# Session Index",
67689
- "",
67690
- "List of all Claude Code sessions with summaries.",
67691
- "",
67692
- "| Session | Conversation | Created | Last Updated | Project | Description |",
67693
- "|---------|-------------|---------|--------------|---------|-------------|"
67694
- ];
67695
- for (const row of rows) {
67696
- const p22 = row["path"];
67697
- const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/);
67698
- if (!match2)
67699
- continue;
67700
- const summaryUser = match2[1];
67701
- const sessionId = match2[2];
67702
- const relPath = `summaries/${summaryUser}/${sessionId}.md`;
67703
- const baseName = sessionId.replace(/_summary$/, "");
67704
- const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName);
67705
- const convLink = convPath ? `[messages](${convPath})` : "";
67706
- const project = row["project"] || "";
67707
- const description = row["description"] || "";
67708
- const creationDate = row["creation_date"] || "";
67709
- const lastUpdateDate = row["last_update_date"] || "";
67710
- lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`);
68205
+ const fetchLimit = INDEX_LIMIT_PER_SECTION + 1;
68206
+ const summaryRows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC LIMIT ${fetchLimit}`);
68207
+ let sessionRows = [];
68208
+ if (this.sessionsTable) {
68209
+ try {
68210
+ sessionRows = await this.client.query(`SELECT path, MAX(description) AS description, MIN(creation_date) AS creation_date, MAX(last_update_date) AS last_update_date FROM "${this.sessionsTable}" WHERE path LIKE '${sqlStr("/sessions/")}%' GROUP BY path ORDER BY MAX(last_update_date) DESC LIMIT ${fetchLimit}`);
68211
+ } catch {
68212
+ sessionRows = [];
68213
+ }
67711
68214
  }
67712
- lines.push("");
67713
- return lines.join("\n");
68215
+ const summaryTruncated = summaryRows.length > INDEX_LIMIT_PER_SECTION;
68216
+ const sessionTruncated = sessionRows.length > INDEX_LIMIT_PER_SECTION;
68217
+ return buildVirtualIndexContent(summaryRows.slice(0, INDEX_LIMIT_PER_SECTION), sessionRows.slice(0, INDEX_LIMIT_PER_SECTION), { summaryTruncated, sessionTruncated });
67714
68218
  }
67715
68219
  // ── batch prefetch ────────────────────────────────────────────────────────
67716
68220
  /**
@@ -69018,8 +69522,8 @@ function stripQuotes(val) {
69018
69522
  }
69019
69523
 
69020
69524
  // node_modules/yargs-parser/build/lib/index.js
69021
- import { readFileSync as readFileSync3 } from "fs";
69022
- import { createRequire } from "node:module";
69525
+ import { readFileSync as readFileSync4 } from "fs";
69526
+ import { createRequire as createRequire2 } from "node:module";
69023
69527
  var _a3;
69024
69528
  var _b;
69025
69529
  var _c;
@@ -69032,7 +69536,7 @@ if (nodeVersion) {
69032
69536
  }
69033
69537
  }
69034
69538
  var env = process ? process.env : {};
69035
- var require2 = createRequire ? createRequire(import.meta.url) : void 0;
69539
+ var require2 = createRequire2 ? createRequire2(import.meta.url) : void 0;
69036
69540
  var parser = new YargsParser({
69037
69541
  cwd: process.cwd,
69038
69542
  env: () => {
@@ -69045,7 +69549,7 @@ var parser = new YargsParser({
69045
69549
  if (typeof require2 !== "undefined") {
69046
69550
  return require2(path2);
69047
69551
  } else if (path2.match(/\.json$/)) {
69048
- return JSON.parse(readFileSync3(path2, "utf8"));
69552
+ return JSON.parse(readFileSync4(path2, "utf8"));
69049
69553
  } else {
69050
69554
  throw Error("only .json config files are supported in ESM");
69051
69555
  }
@@ -69064,6 +69568,33 @@ yargsParser.looksLikeNumber = looksLikeNumber;
69064
69568
  var lib_default = yargsParser;
69065
69569
 
69066
69570
  // dist/src/shell/grep-interceptor.js
69571
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
69572
+ import { dirname as dirname5, join as join10 } from "node:path";
69573
+ var SEMANTIC_SEARCH_ENABLED = process.env.HIVEMIND_SEMANTIC_SEARCH !== "false" && !embeddingsDisabled();
69574
+ var SEMANTIC_EMBED_TIMEOUT_MS = Number(process.env.HIVEMIND_SEMANTIC_EMBED_TIMEOUT_MS ?? "500");
69575
+ function resolveGrepEmbedDaemonPath() {
69576
+ return join10(dirname5(fileURLToPath2(import.meta.url)), "..", "embeddings", "embed-daemon.js");
69577
+ }
69578
+ var sharedGrepEmbedClient = null;
69579
+ function getGrepEmbedClient() {
69580
+ if (!sharedGrepEmbedClient) {
69581
+ sharedGrepEmbedClient = new EmbedClient({
69582
+ daemonEntry: resolveGrepEmbedDaemonPath(),
69583
+ timeoutMs: SEMANTIC_EMBED_TIMEOUT_MS
69584
+ });
69585
+ }
69586
+ return sharedGrepEmbedClient;
69587
+ }
69588
+ function patternIsSemanticFriendly(pattern, fixedString) {
69589
+ if (!pattern || pattern.length < 2)
69590
+ return false;
69591
+ if (fixedString)
69592
+ return true;
69593
+ const metaMatches = pattern.match(/[|()\[\]{}+?^$\\]/g);
69594
+ if (!metaMatches)
69595
+ return true;
69596
+ return metaMatches.length <= 1;
69597
+ }
69067
69598
  var MAX_FALLBACK_CANDIDATES = 500;
69068
69599
  function createGrepCommand(client, fs3, table, sessionsTable) {
69069
69600
  return Yi2("grep", async (args, ctx) => {
@@ -69105,12 +69636,21 @@ function createGrepCommand(client, fs3, table, sessionsTable) {
69105
69636
  filesOnly: Boolean(parsed.l || parsed["files-with-matches"]),
69106
69637
  countOnly: Boolean(parsed.c || parsed["count"])
69107
69638
  };
69639
+ let queryEmbedding = null;
69640
+ if (SEMANTIC_SEARCH_ENABLED && patternIsSemanticFriendly(pattern, matchParams.fixedString)) {
69641
+ try {
69642
+ queryEmbedding = await getGrepEmbedClient().embed(pattern, "query");
69643
+ } catch {
69644
+ queryEmbedding = null;
69645
+ }
69646
+ }
69108
69647
  let rows = [];
69109
69648
  try {
69110
69649
  const searchOptions = {
69111
69650
  ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd),
69112
69651
  pathFilter: buildPathFilterForTargets(targets),
69113
- limit: 100
69652
+ limit: 100,
69653
+ queryEmbedding
69114
69654
  };
69115
69655
  const queryRows = await Promise.race([
69116
69656
  searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions),
@@ -69120,6 +69660,21 @@ function createGrepCommand(client, fs3, table, sessionsTable) {
69120
69660
  } catch {
69121
69661
  rows = [];
69122
69662
  }
69663
+ if (rows.length === 0 && queryEmbedding) {
69664
+ try {
69665
+ const lexicalOptions = {
69666
+ ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd),
69667
+ pathFilter: buildPathFilterForTargets(targets),
69668
+ limit: 100
69669
+ };
69670
+ const lexicalRows = await Promise.race([
69671
+ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", lexicalOptions),
69672
+ new Promise((_16, reject) => setTimeout(() => reject(new Error("timeout")), 3e3))
69673
+ ]);
69674
+ rows.push(...lexicalRows);
69675
+ } catch {
69676
+ }
69677
+ }
69123
69678
  const seen = /* @__PURE__ */ new Set();
69124
69679
  rows = rows.filter((r10) => seen.has(r10.path) ? false : (seen.add(r10.path), true));
69125
69680
  if (rows.length === 0) {
@@ -69133,7 +69688,19 @@ function createGrepCommand(client, fs3, table, sessionsTable) {
69133
69688
  }
69134
69689
  }
69135
69690
  const normalized = rows.map((r10) => ({ path: r10.path, content: normalizeContent(r10.path, r10.content) }));
69136
- const output = refineGrepMatches(normalized, matchParams);
69691
+ let output;
69692
+ if (queryEmbedding && queryEmbedding.length > 0 && process.env.HIVEMIND_SEMANTIC_EMIT_ALL !== "false") {
69693
+ output = [];
69694
+ for (const r10 of normalized) {
69695
+ for (const line of r10.content.split("\n")) {
69696
+ const trimmed = line.trim();
69697
+ if (trimmed)
69698
+ output.push(`${r10.path}:${line}`);
69699
+ }
69700
+ }
69701
+ } else {
69702
+ output = refineGrepMatches(normalized, matchParams);
69703
+ }
69137
69704
  return {
69138
69705
  stdout: output.length > 0 ? output.join("\n") + "\n" : "",
69139
69706
  stderr: "",