@deeplake/hivemind 0.6.48 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +244 -20
  4. package/bundle/cli.js +1369 -112
  5. package/codex/bundle/capture.js +546 -96
  6. package/codex/bundle/commands/auth-login.js +290 -81
  7. package/codex/bundle/embeddings/embed-daemon.js +243 -0
  8. package/codex/bundle/pre-tool-use.js +666 -111
  9. package/codex/bundle/session-start-setup.js +231 -64
  10. package/codex/bundle/session-start.js +52 -13
  11. package/codex/bundle/shell/deeplake-shell.js +716 -119
  12. package/codex/bundle/skilify-worker.js +907 -0
  13. package/codex/bundle/stop.js +819 -79
  14. package/codex/bundle/wiki-worker.js +312 -11
  15. package/cursor/bundle/capture.js +1116 -64
  16. package/cursor/bundle/commands/auth-login.js +290 -81
  17. package/cursor/bundle/embeddings/embed-daemon.js +243 -0
  18. package/cursor/bundle/pre-tool-use.js +598 -77
  19. package/cursor/bundle/session-end.js +520 -2
  20. package/cursor/bundle/session-start.js +257 -65
  21. package/cursor/bundle/shell/deeplake-shell.js +716 -119
  22. package/cursor/bundle/skilify-worker.js +907 -0
  23. package/cursor/bundle/wiki-worker.js +571 -0
  24. package/hermes/bundle/capture.js +1119 -65
  25. package/hermes/bundle/commands/auth-login.js +290 -81
  26. package/hermes/bundle/embeddings/embed-daemon.js +243 -0
  27. package/hermes/bundle/pre-tool-use.js +597 -76
  28. package/hermes/bundle/session-end.js +522 -1
  29. package/hermes/bundle/session-start.js +260 -65
  30. package/hermes/bundle/shell/deeplake-shell.js +716 -119
  31. package/hermes/bundle/skilify-worker.js +907 -0
  32. package/hermes/bundle/wiki-worker.js +572 -0
  33. package/mcp/bundle/server.js +290 -75
  34. package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
  35. package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
  36. package/openclaw/dist/chunks/config-ZLH6JFJS.js +34 -0
  37. package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
  38. package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
  39. package/openclaw/dist/index.js +929 -710
  40. package/openclaw/dist/skilify-worker.js +907 -0
  41. package/openclaw/openclaw.plugin.json +1 -1
  42. package/openclaw/package.json +1 -1
  43. package/openclaw/skills/SKILL.md +19 -0
  44. package/package.json +7 -1
  45. package/pi/extension-source/hivemind.ts +603 -22
@@ -46081,14 +46081,14 @@ var require_turndown_cjs = __commonJS({
46081
46081
  } else if (node.nodeType === 1) {
46082
46082
  replacement = replacementForNode.call(self2, node);
46083
46083
  }
46084
- return join7(output, replacement);
46084
+ return join11(output, replacement);
46085
46085
  }, "");
46086
46086
  }
46087
46087
  function postProcess(output) {
46088
46088
  var self2 = this;
46089
46089
  this.rules.forEach(function(rule) {
46090
46090
  if (typeof rule.append === "function") {
46091
- output = join7(output, rule.append(self2.options));
46091
+ output = join11(output, rule.append(self2.options));
46092
46092
  }
46093
46093
  });
46094
46094
  return output.replace(/^[\t\r\n]+/, "").replace(/[\t\r\n\s]+$/, "");
@@ -46100,7 +46100,7 @@ var require_turndown_cjs = __commonJS({
46100
46100
  if (whitespace.leading || whitespace.trailing) content = content.trim();
46101
46101
  return whitespace.leading + rule.replacement(content, node, this.options) + whitespace.trailing;
46102
46102
  }
46103
- function join7(output, replacement) {
46103
+ function join11(output, replacement) {
46104
46104
  var s12 = trimTrailingNewlines(output);
46105
46105
  var s22 = trimLeadingNewlines(replacement);
46106
46106
  var nls = Math.max(output.length - s12.length, replacement.length - s22.length);
@@ -59933,6 +59933,49 @@ var init_expansion_ENLSRCXJ = __esm({
59933
59933
  }
59934
59934
  });
59935
59935
 
59936
+ // dist/src/index-marker-store.js
59937
+ var index_marker_store_exports = {};
59938
+ __export(index_marker_store_exports, {
59939
+ buildIndexMarkerPath: () => buildIndexMarkerPath,
59940
+ getIndexMarkerDir: () => getIndexMarkerDir,
59941
+ hasFreshIndexMarker: () => hasFreshIndexMarker,
59942
+ writeIndexMarker: () => writeIndexMarker
59943
+ });
59944
+ import { existsSync as existsSync3, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
59945
+ import { join as join6 } from "node:path";
59946
+ import { tmpdir } from "node:os";
59947
+ function getIndexMarkerDir() {
59948
+ return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join6(tmpdir(), "hivemind-deeplake-indexes");
59949
+ }
59950
+ function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
59951
+ const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
59952
+ return join6(getIndexMarkerDir(), `${markerKey}.json`);
59953
+ }
59954
+ function hasFreshIndexMarker(markerPath) {
59955
+ if (!existsSync3(markerPath))
59956
+ return false;
59957
+ try {
59958
+ const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
59959
+ const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
59960
+ if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
59961
+ return false;
59962
+ return true;
59963
+ } catch {
59964
+ return false;
59965
+ }
59966
+ }
59967
+ function writeIndexMarker(markerPath) {
59968
+ mkdirSync(getIndexMarkerDir(), { recursive: true });
59969
+ writeFileSync(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
59970
+ }
59971
+ var INDEX_MARKER_TTL_MS;
59972
+ var init_index_marker_store = __esm({
59973
+ "dist/src/index-marker-store.js"() {
59974
+ "use strict";
59975
+ INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
59976
+ }
59977
+ });
59978
+
59936
59979
  // dist/src/shell/deeplake-shell.js
59937
59980
  import { createInterface } from "node:readline";
59938
59981
 
@@ -66748,15 +66791,13 @@ function loadConfig() {
66748
66791
  apiUrl: process.env.HIVEMIND_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai",
66749
66792
  tableName: process.env.HIVEMIND_TABLE ?? "memory",
66750
66793
  sessionsTableName: process.env.HIVEMIND_SESSIONS_TABLE ?? "sessions",
66794
+ skillsTableName: process.env.HIVEMIND_SKILLS_TABLE ?? "skills",
66751
66795
  memoryPath: process.env.HIVEMIND_MEMORY_PATH ?? join4(home, ".deeplake", "memory")
66752
66796
  };
66753
66797
  }
66754
66798
 
66755
66799
  // dist/src/deeplake-api.js
66756
66800
  import { randomUUID } from "node:crypto";
66757
- import { existsSync as existsSync3, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
66758
- import { join as join6 } from "node:path";
66759
- import { tmpdir } from "node:os";
66760
66801
 
66761
66802
  // dist/src/utils/debug.js
66762
66803
  import { appendFileSync } from "node:fs";
@@ -66778,8 +66819,33 @@ function sqlStr(value) {
66778
66819
  function sqlLike(value) {
66779
66820
  return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_");
66780
66821
  }
66822
+ function sqlIdent(name) {
66823
+ if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) {
66824
+ throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`);
66825
+ }
66826
+ return name;
66827
+ }
66828
+
66829
+ // dist/src/embeddings/columns.js
66830
+ var SUMMARY_EMBEDDING_COL = "summary_embedding";
66831
+ var MESSAGE_EMBEDDING_COL = "message_embedding";
66832
+
66833
+ // dist/src/utils/client-header.js
66834
+ var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
66835
+ function deeplakeClientValue() {
66836
+ return "hivemind";
66837
+ }
66838
+ function deeplakeClientHeader() {
66839
+ return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
66840
+ }
66781
66841
 
66782
66842
  // dist/src/deeplake-api.js
66843
+ var indexMarkerStorePromise = null;
66844
+ function getIndexMarkerStore() {
66845
+ if (!indexMarkerStorePromise)
66846
+ indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
66847
+ return indexMarkerStorePromise;
66848
+ }
66783
66849
  var log2 = (msg) => log("sdk", msg);
66784
66850
  function summarizeSql(sql, maxLen = 220) {
66785
66851
  const compact = sql.replace(/\s+/g, " ").trim();
@@ -66799,7 +66865,6 @@ var MAX_RETRIES = 3;
66799
66865
  var BASE_DELAY_MS = 500;
66800
66866
  var MAX_CONCURRENCY = 5;
66801
66867
  var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
66802
- var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
66803
66868
  function sleep(ms3) {
66804
66869
  return new Promise((resolve5) => setTimeout(resolve5, ms3));
66805
66870
  }
@@ -66819,9 +66884,6 @@ function isTransientHtml403(text) {
66819
66884
  const body = text.toLowerCase();
66820
66885
  return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
66821
66886
  }
66822
- function getIndexMarkerDir() {
66823
- return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join6(tmpdir(), "hivemind-deeplake-indexes");
66824
- }
66825
66887
  var Semaphore = class {
66826
66888
  max;
66827
66889
  waiting = [];
@@ -66890,7 +66952,8 @@ var DeeplakeApi = class {
66890
66952
  headers: {
66891
66953
  Authorization: `Bearer ${this.token}`,
66892
66954
  "Content-Type": "application/json",
66893
- "X-Activeloop-Org-Id": this.orgId
66955
+ "X-Activeloop-Org-Id": this.orgId,
66956
+ ...deeplakeClientHeader()
66894
66957
  },
66895
66958
  signal,
66896
66959
  body: JSON.stringify({ query: sql })
@@ -66917,7 +66980,8 @@ var DeeplakeApi = class {
66917
66980
  }
66918
66981
  const text = await resp.text().catch(() => "");
66919
66982
  const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
66920
- if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
66983
+ const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
66984
+ if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
66921
66985
  const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
66922
66986
  log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
66923
66987
  await sleep(delay);
@@ -66951,7 +67015,7 @@ var DeeplakeApi = class {
66951
67015
  const lud = row.lastUpdateDate ?? ts3;
66952
67016
  const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
66953
67017
  if (exists.length > 0) {
66954
- let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
67018
+ let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
66955
67019
  if (row.project !== void 0)
66956
67020
  setClauses += `, project = '${sqlStr(row.project)}'`;
66957
67021
  if (row.description !== void 0)
@@ -66959,8 +67023,8 @@ var DeeplakeApi = class {
66959
67023
  await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
66960
67024
  } else {
66961
67025
  const id = randomUUID();
66962
- let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date";
66963
- let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
67026
+ let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
67027
+ let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
66964
67028
  if (row.project !== void 0) {
66965
67029
  cols += ", project";
66966
67030
  vals += `, '${sqlStr(row.project)}'`;
@@ -66985,48 +67049,83 @@ var DeeplakeApi = class {
66985
67049
  buildLookupIndexName(table, suffix) {
66986
67050
  return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
66987
67051
  }
66988
- getLookupIndexMarkerPath(table, suffix) {
66989
- const markerKey = [
66990
- this.workspaceId,
66991
- this.orgId,
66992
- table,
66993
- suffix
66994
- ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
66995
- return join6(getIndexMarkerDir(), `${markerKey}.json`);
66996
- }
66997
- hasFreshLookupIndexMarker(table, suffix) {
66998
- const markerPath = this.getLookupIndexMarkerPath(table, suffix);
66999
- if (!existsSync3(markerPath))
67000
- return false;
67001
- try {
67002
- const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
67003
- const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
67004
- if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
67005
- return false;
67006
- return true;
67007
- } catch {
67008
- return false;
67009
- }
67010
- }
67011
- markLookupIndexReady(table, suffix) {
67012
- mkdirSync(getIndexMarkerDir(), { recursive: true });
67013
- writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
67014
- }
67015
67052
  async ensureLookupIndex(table, suffix, columnsSql) {
67016
- if (this.hasFreshLookupIndexMarker(table, suffix))
67053
+ const markers = await getIndexMarkerStore();
67054
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
67055
+ if (markers.hasFreshIndexMarker(markerPath))
67017
67056
  return;
67018
67057
  const indexName = this.buildLookupIndexName(table, suffix);
67019
67058
  try {
67020
67059
  await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
67021
- this.markLookupIndexReady(table, suffix);
67060
+ markers.writeIndexMarker(markerPath);
67022
67061
  } catch (e6) {
67023
67062
  if (isDuplicateIndexError(e6)) {
67024
- this.markLookupIndexReady(table, suffix);
67063
+ markers.writeIndexMarker(markerPath);
67025
67064
  return;
67026
67065
  }
67027
67066
  log2(`index "${indexName}" skipped: ${e6.message}`);
67028
67067
  }
67029
67068
  }
67069
+ /**
67070
+ * Ensure a vector column exists on the given table.
67071
+ *
67072
+ * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
67073
+ * EXISTS …` on every SessionStart. On a long-running workspace that's
67074
+ * already migrated, every call returns 500 "Column already exists" — noisy
67075
+ * in the log and a wasted round-trip. Worse, the very first call after the
67076
+ * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
67077
+ * window (~30s) during which subsequent INSERTs fail; minimising the
67078
+ * number of ALTER calls minimises exposure to that window.
67079
+ *
67080
+ * New flow:
67081
+ * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
67082
+ * return — zero network calls.
67083
+ * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
67084
+ * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
67085
+ * bug. If the column is present → mark + return.
67086
+ * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
67087
+ * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
67088
+ * "already exists" (race: another client added it between our SELECT
67089
+ * and ALTER).
67090
+ *
67091
+ * Marker uses the same dir / TTL as ensureLookupIndex so both schema
67092
+ * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
67093
+ */
67094
+ async ensureEmbeddingColumn(table, column) {
67095
+ await this.ensureColumn(table, column, "FLOAT4[]");
67096
+ }
67097
+ /**
67098
+ * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
67099
+ * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
67100
+ * column that was added to the schema after the table was originally
67101
+ * created. Used today for `summary_embedding`, `message_embedding`, and
67102
+ * the `agent` column (added 2026-04-11) — the latter has no fallback if
67103
+ * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
67104
+ * with `column "agent" does not exist`.
67105
+ */
67106
+ async ensureColumn(table, column, sqlType) {
67107
+ const markers = await getIndexMarkerStore();
67108
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
67109
+ if (markers.hasFreshIndexMarker(markerPath))
67110
+ return;
67111
+ const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
67112
+ const rows = await this.query(colCheck);
67113
+ if (rows.length > 0) {
67114
+ markers.writeIndexMarker(markerPath);
67115
+ return;
67116
+ }
67117
+ try {
67118
+ await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
67119
+ } catch (e6) {
67120
+ const msg = e6 instanceof Error ? e6.message : String(e6);
67121
+ if (!/already exists/i.test(msg))
67122
+ throw e6;
67123
+ const recheck = await this.query(colCheck);
67124
+ if (recheck.length === 0)
67125
+ throw e6;
67126
+ }
67127
+ markers.writeIndexMarker(markerPath);
67128
+ }
67030
67129
  /** List all tables in the workspace (with retry). */
67031
67130
  async listTables(forceRefresh = false) {
67032
67131
  if (!forceRefresh && this._tablesCache)
@@ -67042,7 +67141,8 @@ var DeeplakeApi = class {
67042
67141
  const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
67043
67142
  headers: {
67044
67143
  Authorization: `Bearer ${this.token}`,
67045
- "X-Activeloop-Org-Id": this.orgId
67144
+ "X-Activeloop-Org-Id": this.orgId,
67145
+ ...deeplakeClientHeader()
67046
67146
  }
67047
67147
  });
67048
67148
  if (resp.ok) {
@@ -67067,35 +67167,92 @@ var DeeplakeApi = class {
67067
67167
  }
67068
67168
  return { tables: [], cacheable: false };
67069
67169
  }
67170
+ /**
67171
+ * Run a `CREATE TABLE` with an extra outer retry budget. The base
67172
+ * `query()` already retries 3 times on fetch errors (~3.5s total), but a
67173
+ * failed CREATE is permanent corruption — every subsequent SELECT against
67174
+ * the missing table fails. Wrapping in an outer loop with longer backoff
67175
+ * (2s, 5s, then 10s) gives us ~17s of reach across transient network
67176
+ * blips before giving up. Failures still propagate; getApi() resets its
67177
+ * cache on init failure (openclaw plugin) so the next call retries the
67178
+ * whole init flow.
67179
+ */
67180
+ async createTableWithRetry(sql, label) {
67181
+ const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
67182
+ let lastErr = null;
67183
+ for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
67184
+ try {
67185
+ await this.query(sql);
67186
+ return;
67187
+ } catch (err) {
67188
+ lastErr = err;
67189
+ const msg = err instanceof Error ? err.message : String(err);
67190
+ log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
67191
+ if (attempt < OUTER_BACKOFFS_MS.length) {
67192
+ await sleep(OUTER_BACKOFFS_MS[attempt]);
67193
+ }
67194
+ }
67195
+ }
67196
+ throw lastErr;
67197
+ }
67070
67198
  /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
67071
67199
  async ensureTable(name) {
67072
- const tbl = name ?? this.tableName;
67200
+ const tbl = sqlIdent(name ?? this.tableName);
67073
67201
  const tables = await this.listTables();
67074
67202
  if (!tables.includes(tbl)) {
67075
67203
  log2(`table "${tbl}" not found, creating`);
67076
- await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
67204
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
67077
67205
  log2(`table "${tbl}" created`);
67078
67206
  if (!tables.includes(tbl))
67079
67207
  this._tablesCache = [...tables, tbl];
67080
67208
  }
67209
+ await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
67210
+ await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
67081
67211
  }
67082
67212
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
67083
67213
  async ensureSessionsTable(name) {
67214
+ const safe = sqlIdent(name);
67215
+ const tables = await this.listTables();
67216
+ if (!tables.includes(safe)) {
67217
+ log2(`table "${safe}" not found, creating`);
67218
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
67219
+ log2(`table "${safe}" created`);
67220
+ if (!tables.includes(safe))
67221
+ this._tablesCache = [...tables, safe];
67222
+ }
67223
+ await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
67224
+ await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
67225
+ await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
67226
+ }
67227
+ /**
67228
+ * Create the skills table.
67229
+ *
67230
+ * One row per skill version. Workers INSERT a fresh row on every KEEP /
67231
+ * MERGE rather than UPDATE-ing in place, so the full version history is
67232
+ * recoverable. Uniqueness in the *current* state is by (project_key, name)
67233
+ * — newer rows shadow older ones at read time (ORDER BY version DESC).
67234
+ * This sidesteps the Deeplake UPDATE-coalescing quirk that bit the wiki
67235
+ * worker.
67236
+ */
67237
+ async ensureSkillsTable(name) {
67238
+ const safe = sqlIdent(name);
67084
67239
  const tables = await this.listTables();
67085
- if (!tables.includes(name)) {
67086
- log2(`table "${name}" not found, creating`);
67087
- await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
67088
- log2(`table "${name}" created`);
67089
- if (!tables.includes(name))
67090
- this._tablesCache = [...tables, name];
67240
+ if (!tables.includes(safe)) {
67241
+ log2(`table "${safe}" not found, creating`);
67242
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
67243
+ log2(`table "${safe}" created`);
67244
+ if (!tables.includes(safe))
67245
+ this._tablesCache = [...tables, safe];
67091
67246
  }
67092
- await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
67247
+ await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
67093
67248
  }
67094
67249
  };
67095
67250
 
67096
67251
  // dist/src/shell/deeplake-fs.js
67097
67252
  import { basename as basename4, posix } from "node:path";
67098
67253
  import { randomUUID as randomUUID2 } from "node:crypto";
67254
+ import { fileURLToPath } from "node:url";
67255
+ import { dirname as dirname4, join as join9 } from "node:path";
67099
67256
 
67100
67257
  // dist/src/shell/grep-core.js
67101
67258
  var TOOL_INPUT_FIELDS = [
@@ -67261,24 +67418,25 @@ function normalizeContent(path2, raw) {
67261
67418
  return raw;
67262
67419
  }
67263
67420
  if (Array.isArray(obj.turns)) {
67264
- const header = [];
67265
- if (obj.date_time)
67266
- header.push(`date: ${obj.date_time}`);
67267
- if (obj.speakers) {
67268
- const s10 = obj.speakers;
67269
- const names = [s10.speaker_a, s10.speaker_b].filter(Boolean).join(", ");
67270
- if (names)
67271
- header.push(`speakers: ${names}`);
67272
- }
67421
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
67273
67422
  const lines = obj.turns.map((t6) => {
67274
67423
  const sp = String(t6?.speaker ?? t6?.name ?? "?").trim();
67275
67424
  const tx = String(t6?.text ?? t6?.content ?? "").replace(/\s+/g, " ").trim();
67276
67425
  const tag = t6?.dia_id ? `[${t6.dia_id}] ` : "";
67277
- return `${tag}${sp}: ${tx}`;
67426
+ return `${dateHeader}${tag}${sp}: ${tx}`;
67278
67427
  });
67279
- const out2 = [...header, ...lines].join("\n");
67428
+ const out2 = lines.join("\n");
67280
67429
  return out2.trim() ? out2 : raw;
67281
67430
  }
67431
+ if (obj.turn && typeof obj.turn === "object" && !Array.isArray(obj.turn)) {
67432
+ const t6 = obj.turn;
67433
+ const sp = String(t6.speaker ?? t6.name ?? "?").trim();
67434
+ const tx = String(t6.text ?? t6.content ?? "").replace(/\s+/g, " ").trim();
67435
+ const tag = t6.dia_id ? `[${String(t6.dia_id)}] ` : "";
67436
+ const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
67437
+ const line = `${dateHeader}${tag}${sp}: ${tx}`;
67438
+ return line.trim() ? line : raw;
67439
+ }
67282
67440
  const stripRecalled = (t6) => {
67283
67441
  const i11 = t6.indexOf("<recalled-memories>");
67284
67442
  if (i11 === -1)
@@ -67321,8 +67479,38 @@ function buildPathCondition(targetPath) {
67321
67479
  return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
67322
67480
  }
67323
67481
  async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
67324
- const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
67482
+ const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, queryEmbedding, multiWordPatterns } = opts;
67325
67483
  const limit = opts.limit ?? 100;
67484
+ if (queryEmbedding && queryEmbedding.length > 0) {
67485
+ const vecLit = serializeFloat4Array(queryEmbedding);
67486
+ const semanticLimit = Math.min(limit, Number(process.env.HIVEMIND_SEMANTIC_LIMIT ?? "20"));
67487
+ const lexicalLimit = Math.min(limit, Number(process.env.HIVEMIND_HYBRID_LEXICAL_LIMIT ?? "20"));
67488
+ const filterPatternsForLex = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
67489
+ const memLexFilter = buildContentFilter("summary::text", likeOp, filterPatternsForLex);
67490
+ const sessLexFilter = buildContentFilter("message::text", likeOp, filterPatternsForLex);
67491
+ const memLexQuery = memLexFilter ? `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, 1.0 AS score FROM "${memoryTable}" WHERE 1=1${pathFilter}${memLexFilter} LIMIT ${lexicalLimit}` : null;
67492
+ const sessLexQuery = sessLexFilter ? `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, 1.0 AS score FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessLexFilter} LIMIT ${lexicalLimit}` : null;
67493
+ const memSemQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, (summary_embedding <#> ${vecLit}) AS score FROM "${memoryTable}" WHERE ARRAY_LENGTH(summary_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
67494
+ const sessSemQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, (message_embedding <#> ${vecLit}) AS score FROM "${sessionsTable}" WHERE ARRAY_LENGTH(message_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
67495
+ const parts = [memSemQuery, sessSemQuery];
67496
+ if (memLexQuery)
67497
+ parts.push(memLexQuery);
67498
+ if (sessLexQuery)
67499
+ parts.push(sessLexQuery);
67500
+ const unionSql = parts.map((q17) => `(${q17})`).join(" UNION ALL ");
67501
+ const outerLimit = semanticLimit + lexicalLimit;
67502
+ const rows2 = await api.query(`SELECT path, content, source_order, creation_date, score FROM (` + unionSql + `) AS combined ORDER BY score DESC LIMIT ${outerLimit}`);
67503
+ const seen = /* @__PURE__ */ new Set();
67504
+ const unique = [];
67505
+ for (const row of rows2) {
67506
+ const p22 = String(row["path"]);
67507
+ if (seen.has(p22))
67508
+ continue;
67509
+ seen.add(p22);
67510
+ unique.push({ path: p22, content: String(row["content"] ?? "") });
67511
+ }
67512
+ return unique;
67513
+ }
67326
67514
  const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
67327
67515
  const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
67328
67516
  const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
@@ -67334,6 +67522,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
67334
67522
  content: String(row["content"] ?? "")
67335
67523
  }));
67336
67524
  }
67525
+ function serializeFloat4Array(vec) {
67526
+ const parts = [];
67527
+ for (const v27 of vec) {
67528
+ if (!Number.isFinite(v27))
67529
+ return "NULL";
67530
+ parts.push(String(v27));
67531
+ }
67532
+ return `ARRAY[${parts.join(",")}]::float4[]`;
67533
+ }
67337
67534
  function buildPathFilter(targetPath) {
67338
67535
  const condition = buildPathCondition(targetPath);
67339
67536
  return condition ? ` AND ${condition}` : "";
@@ -67426,7 +67623,7 @@ function buildGrepSearchOptions(params, targetPath) {
67426
67623
  return {
67427
67624
  pathFilter: buildPathFilter(targetPath),
67428
67625
  contentScanOnly: hasRegexMeta,
67429
- likeOp: params.ignoreCase ? "ILIKE" : "LIKE",
67626
+ likeOp: process.env.HIVEMIND_GREP_LIKE === "case-sensitive" ? "LIKE" : "ILIKE",
67430
67627
  escapedPattern: sqlLike(params.pattern),
67431
67628
  prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
67432
67629
  prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
@@ -67482,6 +67679,346 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) {
67482
67679
  return output;
67483
67680
  }
67484
67681
 
67682
+ // dist/src/embeddings/client.js
67683
+ import { connect } from "node:net";
67684
+ import { spawn } from "node:child_process";
67685
+ import { openSync, closeSync, writeSync, unlinkSync, existsSync as existsSync4, readFileSync as readFileSync3 } from "node:fs";
67686
+ import { homedir as homedir3 } from "node:os";
67687
+ import { join as join7 } from "node:path";
67688
+
67689
+ // dist/src/embeddings/protocol.js
67690
+ var DEFAULT_SOCKET_DIR = "/tmp";
67691
+ var DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1e3;
67692
+ var DEFAULT_CLIENT_TIMEOUT_MS = 2e3;
67693
+ function socketPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
67694
+ return `${dir}/hivemind-embed-${uid}.sock`;
67695
+ }
67696
+ function pidPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
67697
+ return `${dir}/hivemind-embed-${uid}.pid`;
67698
+ }
67699
+
67700
+ // dist/src/embeddings/client.js
67701
+ var SHARED_DAEMON_PATH = join7(homedir3(), ".hivemind", "embed-deps", "embed-daemon.js");
67702
+ var log3 = (m26) => log("embed-client", m26);
67703
+ function getUid() {
67704
+ const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
67705
+ return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
67706
+ }
67707
+ var EmbedClient = class {
67708
+ socketPath;
67709
+ pidPath;
67710
+ timeoutMs;
67711
+ daemonEntry;
67712
+ autoSpawn;
67713
+ spawnWaitMs;
67714
+ nextId = 0;
67715
+ constructor(opts = {}) {
67716
+ const uid = getUid();
67717
+ const dir = opts.socketDir ?? "/tmp";
67718
+ this.socketPath = socketPathFor(uid, dir);
67719
+ this.pidPath = pidPathFor(uid, dir);
67720
+ this.timeoutMs = opts.timeoutMs ?? DEFAULT_CLIENT_TIMEOUT_MS;
67721
+ this.daemonEntry = opts.daemonEntry ?? process.env.HIVEMIND_EMBED_DAEMON ?? (existsSync4(SHARED_DAEMON_PATH) ? SHARED_DAEMON_PATH : void 0);
67722
+ this.autoSpawn = opts.autoSpawn ?? true;
67723
+ this.spawnWaitMs = opts.spawnWaitMs ?? 5e3;
67724
+ }
67725
+ /**
67726
+ * Returns an embedding vector, or null on timeout/failure. Hooks MUST treat
67727
+ * null as "skip embedding column" — never block the write path on us.
67728
+ *
67729
+ * Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
67730
+ * null AND kicks off a background spawn. The next call finds a ready daemon.
67731
+ */
67732
+ async embed(text, kind = "document") {
67733
+ let sock;
67734
+ try {
67735
+ sock = await this.connectOnce();
67736
+ } catch {
67737
+ if (this.autoSpawn)
67738
+ this.trySpawnDaemon();
67739
+ return null;
67740
+ }
67741
+ try {
67742
+ const id = String(++this.nextId);
67743
+ const req = { op: "embed", id, kind, text };
67744
+ const resp = await this.sendAndWait(sock, req);
67745
+ if (resp.error || !("embedding" in resp) || !resp.embedding) {
67746
+ log3(`embed err: ${resp.error ?? "no embedding"}`);
67747
+ return null;
67748
+ }
67749
+ return resp.embedding;
67750
+ } catch (e6) {
67751
+ const err = e6 instanceof Error ? e6.message : String(e6);
67752
+ log3(`embed failed: ${err}`);
67753
+ return null;
67754
+ } finally {
67755
+ try {
67756
+ sock.end();
67757
+ } catch {
67758
+ }
67759
+ }
67760
+ }
67761
+ /**
67762
+ * Wait up to spawnWaitMs for the daemon to accept connections, spawning if
67763
+ * necessary. Meant for SessionStart / long-running batches — not the hot path.
67764
+ */
67765
+ async warmup() {
67766
+ try {
67767
+ const s10 = await this.connectOnce();
67768
+ s10.end();
67769
+ return true;
67770
+ } catch {
67771
+ if (!this.autoSpawn)
67772
+ return false;
67773
+ this.trySpawnDaemon();
67774
+ try {
67775
+ const s10 = await this.waitForSocket();
67776
+ s10.end();
67777
+ return true;
67778
+ } catch {
67779
+ return false;
67780
+ }
67781
+ }
67782
+ }
67783
+ connectOnce() {
67784
+ return new Promise((resolve5, reject) => {
67785
+ const sock = connect(this.socketPath);
67786
+ const to3 = setTimeout(() => {
67787
+ sock.destroy();
67788
+ reject(new Error("connect timeout"));
67789
+ }, this.timeoutMs);
67790
+ sock.once("connect", () => {
67791
+ clearTimeout(to3);
67792
+ resolve5(sock);
67793
+ });
67794
+ sock.once("error", (e6) => {
67795
+ clearTimeout(to3);
67796
+ reject(e6);
67797
+ });
67798
+ });
67799
+ }
67800
+ trySpawnDaemon() {
67801
+ let fd;
67802
+ try {
67803
+ fd = openSync(this.pidPath, "wx", 384);
67804
+ writeSync(fd, String(process.pid));
67805
+ } catch (e6) {
67806
+ if (this.isPidFileStale()) {
67807
+ try {
67808
+ unlinkSync(this.pidPath);
67809
+ } catch {
67810
+ }
67811
+ try {
67812
+ fd = openSync(this.pidPath, "wx", 384);
67813
+ writeSync(fd, String(process.pid));
67814
+ } catch {
67815
+ return;
67816
+ }
67817
+ } else {
67818
+ return;
67819
+ }
67820
+ }
67821
+ if (!this.daemonEntry || !existsSync4(this.daemonEntry)) {
67822
+ log3(`daemonEntry not configured or missing: ${this.daemonEntry}`);
67823
+ try {
67824
+ closeSync(fd);
67825
+ unlinkSync(this.pidPath);
67826
+ } catch {
67827
+ }
67828
+ return;
67829
+ }
67830
+ try {
67831
+ const child = spawn(process.execPath, [this.daemonEntry], {
67832
+ detached: true,
67833
+ stdio: "ignore",
67834
+ env: process.env
67835
+ });
67836
+ child.unref();
67837
+ log3(`spawned daemon pid=${child.pid}`);
67838
+ } finally {
67839
+ closeSync(fd);
67840
+ }
67841
+ }
67842
+ isPidFileStale() {
67843
+ try {
67844
+ const raw = readFileSync3(this.pidPath, "utf-8").trim();
67845
+ const pid = Number(raw);
67846
+ if (!pid || Number.isNaN(pid))
67847
+ return true;
67848
+ try {
67849
+ process.kill(pid, 0);
67850
+ return false;
67851
+ } catch {
67852
+ return true;
67853
+ }
67854
+ } catch {
67855
+ return true;
67856
+ }
67857
+ }
67858
+ async waitForSocket() {
67859
+ const deadline = Date.now() + this.spawnWaitMs;
67860
+ let delay = 30;
67861
+ while (Date.now() < deadline) {
67862
+ await sleep2(delay);
67863
+ delay = Math.min(delay * 1.5, 300);
67864
+ if (!existsSync4(this.socketPath))
67865
+ continue;
67866
+ try {
67867
+ return await this.connectOnce();
67868
+ } catch {
67869
+ }
67870
+ }
67871
+ throw new Error("daemon did not become ready within spawnWaitMs");
67872
+ }
67873
+ sendAndWait(sock, req) {
67874
+ return new Promise((resolve5, reject) => {
67875
+ let buf = "";
67876
+ const to3 = setTimeout(() => {
67877
+ sock.destroy();
67878
+ reject(new Error("request timeout"));
67879
+ }, this.timeoutMs);
67880
+ sock.setEncoding("utf-8");
67881
+ sock.on("data", (chunk) => {
67882
+ buf += chunk;
67883
+ const nl3 = buf.indexOf("\n");
67884
+ if (nl3 === -1)
67885
+ return;
67886
+ const line = buf.slice(0, nl3);
67887
+ clearTimeout(to3);
67888
+ try {
67889
+ resolve5(JSON.parse(line));
67890
+ } catch (e6) {
67891
+ reject(e6);
67892
+ }
67893
+ });
67894
+ sock.on("error", (e6) => {
67895
+ clearTimeout(to3);
67896
+ reject(e6);
67897
+ });
67898
+ sock.on("end", () => {
67899
+ clearTimeout(to3);
67900
+ reject(new Error("connection closed without response"));
67901
+ });
67902
+ sock.write(JSON.stringify(req) + "\n");
67903
+ });
67904
+ }
67905
+ };
67906
+ function sleep2(ms3) {
67907
+ return new Promise((r10) => setTimeout(r10, ms3));
67908
+ }
67909
+
67910
+ // dist/src/embeddings/sql.js
67911
+ function embeddingSqlLiteral(vec) {
67912
+ if (!vec || vec.length === 0)
67913
+ return "NULL";
67914
+ const parts = [];
67915
+ for (const v27 of vec) {
67916
+ if (!Number.isFinite(v27))
67917
+ return "NULL";
67918
+ parts.push(String(v27));
67919
+ }
67920
+ return `ARRAY[${parts.join(",")}]::float4[]`;
67921
+ }
67922
+
67923
+ // dist/src/embeddings/disable.js
67924
+ import { createRequire } from "node:module";
67925
+ import { homedir as homedir4 } from "node:os";
67926
+ import { join as join8 } from "node:path";
67927
+ import { pathToFileURL } from "node:url";
67928
+ var cachedStatus = null;
67929
+ function defaultResolveTransformers() {
67930
+ try {
67931
+ createRequire(import.meta.url).resolve("@huggingface/transformers");
67932
+ return;
67933
+ } catch {
67934
+ }
67935
+ const sharedDir = join8(homedir4(), ".hivemind", "embed-deps");
67936
+ createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
67937
+ }
67938
+ var _resolve = defaultResolveTransformers;
67939
+ function detectStatus() {
67940
+ if (process.env.HIVEMIND_EMBEDDINGS === "false")
67941
+ return "env-disabled";
67942
+ try {
67943
+ _resolve();
67944
+ return "enabled";
67945
+ } catch {
67946
+ return "no-transformers";
67947
+ }
67948
+ }
67949
+ function embeddingsStatus() {
67950
+ if (cachedStatus !== null)
67951
+ return cachedStatus;
67952
+ cachedStatus = detectStatus();
67953
+ return cachedStatus;
67954
+ }
67955
+ function embeddingsDisabled() {
67956
+ return embeddingsStatus() !== "enabled";
67957
+ }
67958
+
67959
+ // dist/src/hooks/virtual-table-query.js
67960
+ var INDEX_LIMIT_PER_SECTION = 50;
67961
+ function buildVirtualIndexContent(summaryRows, sessionRows = [], opts = {}) {
67962
+ const lines = [
67963
+ "# Session Index",
67964
+ "",
67965
+ "Two sources are available. Consult the section relevant to the question.",
67966
+ ""
67967
+ ];
67968
+ lines.push("## memory", "");
67969
+ if (summaryRows.length === 0) {
67970
+ lines.push("_(empty \u2014 no summaries ingested yet)_");
67971
+ } else {
67972
+ lines.push("AI-generated summaries per session. Read these first for topic-level overviews.");
67973
+ lines.push("");
67974
+ if (opts.summaryTruncated) {
67975
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older summaries reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
67976
+ lines.push("");
67977
+ }
67978
+ lines.push("| Session | Created | Last Updated | Project | Description |");
67979
+ lines.push("|---------|---------|--------------|---------|-------------|");
67980
+ for (const row of summaryRows) {
67981
+ const p22 = row["path"] || "";
67982
+ const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/);
67983
+ if (!match2)
67984
+ continue;
67985
+ const summaryUser = match2[1];
67986
+ const sessionId = match2[2];
67987
+ const relPath = `summaries/${summaryUser}/${sessionId}.md`;
67988
+ const project = row["project"] || "";
67989
+ const description = row["description"] || "";
67990
+ const creationDate = row["creation_date"] || "";
67991
+ const lastUpdateDate = row["last_update_date"] || "";
67992
+ lines.push(`| [${sessionId}](${relPath}) | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`);
67993
+ }
67994
+ }
67995
+ lines.push("");
67996
+ lines.push("## sessions", "");
67997
+ if (sessionRows.length === 0) {
67998
+ lines.push("_(empty \u2014 no session records ingested yet)_");
67999
+ } else {
68000
+ lines.push("Raw session records (dialogue, tool calls). Read for exact detail / quotes.");
68001
+ lines.push("");
68002
+ if (opts.sessionTruncated) {
68003
+ lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older sessions reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
68004
+ lines.push("");
68005
+ }
68006
+ lines.push("| Session | Created | Last Updated | Description |");
68007
+ lines.push("|---------|---------|--------------|-------------|");
68008
+ for (const row of sessionRows) {
68009
+ const p22 = row["path"] || "";
68010
+ const rel = p22.startsWith("/") ? p22.slice(1) : p22;
68011
+ const filename = p22.split("/").pop() ?? p22;
68012
+ const description = row["description"] || "";
68013
+ const creationDate = row["creation_date"] || "";
68014
+ const lastUpdateDate = row["last_update_date"] || "";
68015
+ lines.push(`| [${filename}](${rel}) | ${creationDate} | ${lastUpdateDate} | ${description} |`);
68016
+ }
68017
+ }
68018
+ lines.push("");
68019
+ return lines.join("\n");
68020
+ }
68021
+
67485
68022
  // dist/src/shell/deeplake-fs.js
67486
68023
  var BATCH_SIZE = 10;
67487
68024
  var PREFETCH_BATCH_SIZE = 50;
@@ -67510,6 +68047,9 @@ function normalizeSessionMessage(path2, message) {
67510
68047
  const raw = typeof message === "string" ? message : JSON.stringify(message);
67511
68048
  return normalizeContent(path2, raw);
67512
68049
  }
68050
+ function resolveEmbedDaemonPath() {
68051
+ return join9(dirname4(fileURLToPath(import.meta.url)), "embeddings", "embed-daemon.js");
68052
+ }
67513
68053
  function joinSessionMessages(path2, messages) {
67514
68054
  return messages.map((message) => normalizeSessionMessage(path2, message)).join("\n");
67515
68055
  }
@@ -67539,6 +68079,8 @@ var DeeplakeFs = class _DeeplakeFs {
67539
68079
  // Paths that live in the sessions table (multi-row, read by concatenation)
67540
68080
  sessionPaths = /* @__PURE__ */ new Set();
67541
68081
  sessionsTable = null;
68082
+ // Embedding client lazily created on first flush. Lives as long as the process.
68083
+ embedClient = null;
67542
68084
  constructor(client, table, mountPoint) {
67543
68085
  this.client = client;
67544
68086
  this.table = table;
@@ -67572,7 +68114,14 @@ var DeeplakeFs = class _DeeplakeFs {
67572
68114
  })();
67573
68115
  const sessionsBootstrap = sessionsTable && sessionSyncOk ? (async () => {
67574
68116
  try {
67575
- const sessionRows = await client.query(`SELECT path, SUM(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`);
68117
+ const sessionRows = await client.query(
68118
+ // NOTE: SUM(size_bytes) returns NULL on the Deeplake backend when combined
68119
+ // with GROUP BY path (confirmed against workspace `with_embedding`). MAX
68120
+ // works and — for the single-row-per-file layout — is equal to SUM. For
68121
+ // multi-row-per-turn layouts MAX under-reports total size but stays >0
68122
+ // so files don't look like empty placeholders in ls/stat.
68123
+ `SELECT path, MAX(size_bytes) as total_size FROM "${sessionsTable}" GROUP BY path ORDER BY path`
68124
+ );
67576
68125
  for (const row of sessionRows) {
67577
68126
  const p22 = row["path"];
67578
68127
  if (!fs3.files.has(p22)) {
@@ -67632,7 +68181,8 @@ var DeeplakeFs = class _DeeplakeFs {
67632
68181
  }
67633
68182
  const rows = [...this.pending.values()];
67634
68183
  this.pending.clear();
67635
- const results = await Promise.allSettled(rows.map((r10) => this.upsertRow(r10)));
68184
+ const embeddings = await this.computeEmbeddings(rows);
68185
+ const results = await Promise.allSettled(rows.map((r10, i11) => this.upsertRow(r10, embeddings[i11])));
67636
68186
  let failures = 0;
67637
68187
  for (let i11 = 0; i11 < results.length; i11++) {
67638
68188
  if (results[i11].status === "rejected") {
@@ -67646,7 +68196,17 @@ var DeeplakeFs = class _DeeplakeFs {
67646
68196
  throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`);
67647
68197
  }
67648
68198
  }
67649
- async upsertRow(r10) {
68199
+ async computeEmbeddings(rows) {
68200
+ if (rows.length === 0)
68201
+ return [];
68202
+ if (embeddingsDisabled())
68203
+ return rows.map(() => null);
68204
+ if (!this.embedClient) {
68205
+ this.embedClient = new EmbedClient({ daemonEntry: resolveEmbedDaemonPath() });
68206
+ }
68207
+ return Promise.all(rows.map((r10) => this.embedClient.embed(r10.contentText, "document")));
68208
+ }
68209
+ async upsertRow(r10, embedding) {
67650
68210
  const text = sqlStr(r10.contentText);
67651
68211
  const p22 = sqlStr(r10.path);
67652
68212
  const fname = sqlStr(r10.filename);
@@ -67654,8 +68214,9 @@ var DeeplakeFs = class _DeeplakeFs {
67654
68214
  const ts3 = (/* @__PURE__ */ new Date()).toISOString();
67655
68215
  const cd = r10.creationDate ?? ts3;
67656
68216
  const lud = r10.lastUpdateDate ?? ts3;
68217
+ const embSql = embeddingSqlLiteral(embedding);
67657
68218
  if (this.flushed.has(r10.path)) {
67658
- let setClauses = `filename = '${fname}', summary = E'${text}', mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`;
68219
+ let setClauses = `filename = '${fname}', summary = E'${text}', summary_embedding = ${embSql}, mime_type = '${mime}', size_bytes = ${r10.sizeBytes}, last_update_date = '${sqlStr(lud)}'`;
67659
68220
  if (r10.project !== void 0)
67660
68221
  setClauses += `, project = '${sqlStr(r10.project)}'`;
67661
68222
  if (r10.description !== void 0)
@@ -67663,54 +68224,27 @@ var DeeplakeFs = class _DeeplakeFs {
67663
68224
  await this.client.query(`UPDATE "${this.table}" SET ${setClauses} WHERE path = '${p22}'`);
67664
68225
  } else {
67665
68226
  const id = randomUUID2();
67666
- const cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : "");
67667
- const vals = `'${id}', '${p22}', '${fname}', E'${text}', '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : "");
68227
+ const cols = "id, path, filename, summary, summary_embedding, mime_type, size_bytes, creation_date, last_update_date" + (r10.project !== void 0 ? ", project" : "") + (r10.description !== void 0 ? ", description" : "");
68228
+ const vals = `'${id}', '${p22}', '${fname}', E'${text}', ${embSql}, '${mime}', ${r10.sizeBytes}, '${sqlStr(cd)}', '${sqlStr(lud)}'` + (r10.project !== void 0 ? `, '${sqlStr(r10.project)}'` : "") + (r10.description !== void 0 ? `, '${sqlStr(r10.description)}'` : "");
67668
68229
  await this.client.query(`INSERT INTO "${this.table}" (${cols}) VALUES (${vals})`);
67669
68230
  this.flushed.add(r10.path);
67670
68231
  }
67671
68232
  }
67672
68233
  // ── Virtual index.md generation ────────────────────────────────────────────
67673
68234
  async generateVirtualIndex() {
67674
- const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`);
67675
- const sessionPathsByKey = /* @__PURE__ */ new Map();
67676
- for (const sp of this.sessionPaths) {
67677
- const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/);
67678
- if (hivemind) {
67679
- sessionPathsByKey.set(hivemind[1], sp.slice(1));
67680
- } else {
67681
- const fname = sp.split("/").pop() ?? "";
67682
- const stem = fname.replace(/\.[^.]+$/, "");
67683
- if (stem)
67684
- sessionPathsByKey.set(stem, sp.slice(1));
67685
- }
67686
- }
67687
- const lines = [
67688
- "# Session Index",
67689
- "",
67690
- "List of all Claude Code sessions with summaries.",
67691
- "",
67692
- "| Session | Conversation | Created | Last Updated | Project | Description |",
67693
- "|---------|-------------|---------|--------------|---------|-------------|"
67694
- ];
67695
- for (const row of rows) {
67696
- const p22 = row["path"];
67697
- const match2 = p22.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/);
67698
- if (!match2)
67699
- continue;
67700
- const summaryUser = match2[1];
67701
- const sessionId = match2[2];
67702
- const relPath = `summaries/${summaryUser}/${sessionId}.md`;
67703
- const baseName = sessionId.replace(/_summary$/, "");
67704
- const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName);
67705
- const convLink = convPath ? `[messages](${convPath})` : "";
67706
- const project = row["project"] || "";
67707
- const description = row["description"] || "";
67708
- const creationDate = row["creation_date"] || "";
67709
- const lastUpdateDate = row["last_update_date"] || "";
67710
- lines.push(`| [${sessionId}](${relPath}) | ${convLink} | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`);
68235
+ const fetchLimit = INDEX_LIMIT_PER_SECTION + 1;
68236
+ const summaryRows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC LIMIT ${fetchLimit}`);
68237
+ let sessionRows = [];
68238
+ if (this.sessionsTable) {
68239
+ try {
68240
+ sessionRows = await this.client.query(`SELECT path, MAX(description) AS description, MIN(creation_date) AS creation_date, MAX(last_update_date) AS last_update_date FROM "${this.sessionsTable}" WHERE path LIKE '${sqlStr("/sessions/")}%' GROUP BY path ORDER BY MAX(last_update_date) DESC LIMIT ${fetchLimit}`);
68241
+ } catch {
68242
+ sessionRows = [];
68243
+ }
67711
68244
  }
67712
- lines.push("");
67713
- return lines.join("\n");
68245
+ const summaryTruncated = summaryRows.length > INDEX_LIMIT_PER_SECTION;
68246
+ const sessionTruncated = sessionRows.length > INDEX_LIMIT_PER_SECTION;
68247
+ return buildVirtualIndexContent(summaryRows.slice(0, INDEX_LIMIT_PER_SECTION), sessionRows.slice(0, INDEX_LIMIT_PER_SECTION), { summaryTruncated, sessionTruncated });
67714
68248
  }
67715
68249
  // ── batch prefetch ────────────────────────────────────────────────────────
67716
68250
  /**
@@ -69018,8 +69552,8 @@ function stripQuotes(val) {
69018
69552
  }
69019
69553
 
69020
69554
  // node_modules/yargs-parser/build/lib/index.js
69021
- import { readFileSync as readFileSync3 } from "fs";
69022
- import { createRequire } from "node:module";
69555
+ import { readFileSync as readFileSync4 } from "fs";
69556
+ import { createRequire as createRequire2 } from "node:module";
69023
69557
  var _a3;
69024
69558
  var _b;
69025
69559
  var _c;
@@ -69032,7 +69566,7 @@ if (nodeVersion) {
69032
69566
  }
69033
69567
  }
69034
69568
  var env = process ? process.env : {};
69035
- var require2 = createRequire ? createRequire(import.meta.url) : void 0;
69569
+ var require2 = createRequire2 ? createRequire2(import.meta.url) : void 0;
69036
69570
  var parser = new YargsParser({
69037
69571
  cwd: process.cwd,
69038
69572
  env: () => {
@@ -69045,7 +69579,7 @@ var parser = new YargsParser({
69045
69579
  if (typeof require2 !== "undefined") {
69046
69580
  return require2(path2);
69047
69581
  } else if (path2.match(/\.json$/)) {
69048
- return JSON.parse(readFileSync3(path2, "utf8"));
69582
+ return JSON.parse(readFileSync4(path2, "utf8"));
69049
69583
  } else {
69050
69584
  throw Error("only .json config files are supported in ESM");
69051
69585
  }
@@ -69064,6 +69598,33 @@ yargsParser.looksLikeNumber = looksLikeNumber;
69064
69598
  var lib_default = yargsParser;
69065
69599
 
69066
69600
  // dist/src/shell/grep-interceptor.js
69601
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
69602
+ import { dirname as dirname5, join as join10 } from "node:path";
69603
+ var SEMANTIC_SEARCH_ENABLED = process.env.HIVEMIND_SEMANTIC_SEARCH !== "false" && !embeddingsDisabled();
69604
+ var SEMANTIC_EMBED_TIMEOUT_MS = Number(process.env.HIVEMIND_SEMANTIC_EMBED_TIMEOUT_MS ?? "500");
69605
+ function resolveGrepEmbedDaemonPath() {
69606
+ return join10(dirname5(fileURLToPath2(import.meta.url)), "..", "embeddings", "embed-daemon.js");
69607
+ }
69608
+ var sharedGrepEmbedClient = null;
69609
+ function getGrepEmbedClient() {
69610
+ if (!sharedGrepEmbedClient) {
69611
+ sharedGrepEmbedClient = new EmbedClient({
69612
+ daemonEntry: resolveGrepEmbedDaemonPath(),
69613
+ timeoutMs: SEMANTIC_EMBED_TIMEOUT_MS
69614
+ });
69615
+ }
69616
+ return sharedGrepEmbedClient;
69617
+ }
69618
+ function patternIsSemanticFriendly(pattern, fixedString) {
69619
+ if (!pattern || pattern.length < 2)
69620
+ return false;
69621
+ if (fixedString)
69622
+ return true;
69623
+ const metaMatches = pattern.match(/[|()\[\]{}+?^$\\]/g);
69624
+ if (!metaMatches)
69625
+ return true;
69626
+ return metaMatches.length <= 1;
69627
+ }
69067
69628
  var MAX_FALLBACK_CANDIDATES = 500;
69068
69629
  function createGrepCommand(client, fs3, table, sessionsTable) {
69069
69630
  return Yi2("grep", async (args, ctx) => {
@@ -69105,12 +69666,21 @@ function createGrepCommand(client, fs3, table, sessionsTable) {
69105
69666
  filesOnly: Boolean(parsed.l || parsed["files-with-matches"]),
69106
69667
  countOnly: Boolean(parsed.c || parsed["count"])
69107
69668
  };
69669
+ let queryEmbedding = null;
69670
+ if (SEMANTIC_SEARCH_ENABLED && patternIsSemanticFriendly(pattern, matchParams.fixedString)) {
69671
+ try {
69672
+ queryEmbedding = await getGrepEmbedClient().embed(pattern, "query");
69673
+ } catch {
69674
+ queryEmbedding = null;
69675
+ }
69676
+ }
69108
69677
  let rows = [];
69109
69678
  try {
69110
69679
  const searchOptions = {
69111
69680
  ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd),
69112
69681
  pathFilter: buildPathFilterForTargets(targets),
69113
- limit: 100
69682
+ limit: 100,
69683
+ queryEmbedding
69114
69684
  };
69115
69685
  const queryRows = await Promise.race([
69116
69686
  searchDeeplakeTables(client, table, sessionsTable ?? "sessions", searchOptions),
@@ -69120,6 +69690,21 @@ function createGrepCommand(client, fs3, table, sessionsTable) {
69120
69690
  } catch {
69121
69691
  rows = [];
69122
69692
  }
69693
+ if (rows.length === 0 && queryEmbedding) {
69694
+ try {
69695
+ const lexicalOptions = {
69696
+ ...buildGrepSearchOptions(matchParams, targets[0] ?? ctx.cwd),
69697
+ pathFilter: buildPathFilterForTargets(targets),
69698
+ limit: 100
69699
+ };
69700
+ const lexicalRows = await Promise.race([
69701
+ searchDeeplakeTables(client, table, sessionsTable ?? "sessions", lexicalOptions),
69702
+ new Promise((_16, reject) => setTimeout(() => reject(new Error("timeout")), 3e3))
69703
+ ]);
69704
+ rows.push(...lexicalRows);
69705
+ } catch {
69706
+ }
69707
+ }
69123
69708
  const seen = /* @__PURE__ */ new Set();
69124
69709
  rows = rows.filter((r10) => seen.has(r10.path) ? false : (seen.add(r10.path), true));
69125
69710
  if (rows.length === 0) {
@@ -69133,7 +69718,19 @@ function createGrepCommand(client, fs3, table, sessionsTable) {
69133
69718
  }
69134
69719
  }
69135
69720
  const normalized = rows.map((r10) => ({ path: r10.path, content: normalizeContent(r10.path, r10.content) }));
69136
- const output = refineGrepMatches(normalized, matchParams);
69721
+ let output;
69722
+ if (queryEmbedding && queryEmbedding.length > 0 && process.env.HIVEMIND_SEMANTIC_EMIT_ALL !== "false") {
69723
+ output = [];
69724
+ for (const r10 of normalized) {
69725
+ for (const line of r10.content.split("\n")) {
69726
+ const trimmed = line.trim();
69727
+ if (trimmed)
69728
+ output.push(`${r10.path}:${line}`);
69729
+ }
69730
+ }
69731
+ } else {
69732
+ output = refineGrepMatches(normalized, matchParams);
69733
+ }
69137
69734
  return {
69138
69735
  stdout: output.length > 0 ? output.join("\n") + "\n" : "",
69139
69736
  stderr: "",