@deeplake/hivemind 0.6.48 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +147 -20
  4. package/bundle/cli.js +552 -95
  5. package/codex/bundle/capture.js +509 -89
  6. package/codex/bundle/commands/auth-login.js +209 -66
  7. package/codex/bundle/embeddings/embed-daemon.js +243 -0
  8. package/codex/bundle/pre-tool-use.js +629 -104
  9. package/codex/bundle/session-start-setup.js +194 -57
  10. package/codex/bundle/session-start.js +25 -10
  11. package/codex/bundle/shell/deeplake-shell.js +679 -112
  12. package/codex/bundle/stop.js +476 -58
  13. package/codex/bundle/wiki-worker.js +312 -11
  14. package/cursor/bundle/capture.js +768 -57
  15. package/cursor/bundle/commands/auth-login.js +209 -66
  16. package/cursor/bundle/embeddings/embed-daemon.js +243 -0
  17. package/cursor/bundle/pre-tool-use.js +561 -70
  18. package/cursor/bundle/session-end.js +223 -2
  19. package/cursor/bundle/session-start.js +192 -54
  20. package/cursor/bundle/shell/deeplake-shell.js +679 -112
  21. package/cursor/bundle/wiki-worker.js +571 -0
  22. package/hermes/bundle/capture.js +771 -58
  23. package/hermes/bundle/commands/auth-login.js +209 -66
  24. package/hermes/bundle/embeddings/embed-daemon.js +243 -0
  25. package/hermes/bundle/pre-tool-use.js +560 -69
  26. package/hermes/bundle/session-end.js +224 -1
  27. package/hermes/bundle/session-start.js +195 -54
  28. package/hermes/bundle/shell/deeplake-shell.js +679 -112
  29. package/hermes/bundle/wiki-worker.js +572 -0
  30. package/mcp/bundle/server.js +253 -68
  31. package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
  32. package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
  33. package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
  34. package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
  35. package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
  36. package/openclaw/dist/index.js +752 -702
  37. package/openclaw/openclaw.plugin.json +1 -1
  38. package/openclaw/package.json +1 -1
  39. package/package.json +2 -1
  40. package/pi/extension-source/hivemind.ts +473 -21
@@ -23,6 +23,9 @@ import { join } from "node:path";
23
23
  import { homedir } from "node:os";
24
24
  var DEBUG = process.env.HIVEMIND_DEBUG === "1";
25
25
  var LOG = join(homedir(), ".deeplake", "hook-debug.log");
26
+ function utcTimestamp(d = /* @__PURE__ */ new Date()) {
27
+ return d.toISOString().replace("T", " ").slice(0, 19) + " UTC";
28
+ }
26
29
  function log(tag, msg) {
27
30
  if (!DEBUG)
28
31
  return;
@@ -30,14 +33,232 @@ function log(tag, msg) {
30
33
  `);
31
34
  }
32
35
 
36
+ // dist/src/config.js
37
+ import { readFileSync, existsSync } from "node:fs";
38
+ import { join as join2 } from "node:path";
39
+ import { homedir as homedir2, userInfo } from "node:os";
40
+ function loadConfig() {
41
+ const home = homedir2();
42
+ const credPath = join2(home, ".deeplake", "credentials.json");
43
+ let creds = null;
44
+ if (existsSync(credPath)) {
45
+ try {
46
+ creds = JSON.parse(readFileSync(credPath, "utf-8"));
47
+ } catch {
48
+ return null;
49
+ }
50
+ }
51
+ const token = process.env.HIVEMIND_TOKEN ?? creds?.token;
52
+ const orgId = process.env.HIVEMIND_ORG_ID ?? creds?.orgId;
53
+ if (!token || !orgId)
54
+ return null;
55
+ return {
56
+ token,
57
+ orgId,
58
+ orgName: creds?.orgName ?? orgId,
59
+ userName: creds?.userName || userInfo().username || "unknown",
60
+ workspaceId: process.env.HIVEMIND_WORKSPACE_ID ?? creds?.workspaceId ?? "default",
61
+ apiUrl: process.env.HIVEMIND_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai",
62
+ tableName: process.env.HIVEMIND_TABLE ?? "memory",
63
+ sessionsTableName: process.env.HIVEMIND_SESSIONS_TABLE ?? "sessions",
64
+ memoryPath: process.env.HIVEMIND_MEMORY_PATH ?? join2(home, ".deeplake", "memory")
65
+ };
66
+ }
67
+
68
+ // dist/src/hooks/summary-state.js
69
+ import { readFileSync as readFileSync2, writeFileSync, writeSync, mkdirSync, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs";
70
+ import { homedir as homedir3 } from "node:os";
71
+ import { join as join3 } from "node:path";
72
+ var dlog = (msg) => log("summary-state", msg);
73
+ var STATE_DIR = join3(homedir3(), ".claude", "hooks", "summary-state");
74
+ var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4));
75
+ function lockPath(sessionId) {
76
+ return join3(STATE_DIR, `${sessionId}.lock`);
77
+ }
78
+ function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) {
79
+ mkdirSync(STATE_DIR, { recursive: true });
80
+ const p = lockPath(sessionId);
81
+ if (existsSync2(p)) {
82
+ try {
83
+ const ageMs = Date.now() - parseInt(readFileSync2(p, "utf-8"), 10);
84
+ if (Number.isFinite(ageMs) && ageMs < maxAgeMs)
85
+ return false;
86
+ } catch (readErr) {
87
+ dlog(`lock file unreadable for ${sessionId}, treating as stale: ${readErr.message}`);
88
+ }
89
+ try {
90
+ unlinkSync(p);
91
+ } catch (unlinkErr) {
92
+ dlog(`could not unlink stale lock for ${sessionId}: ${unlinkErr.message}`);
93
+ return false;
94
+ }
95
+ }
96
+ try {
97
+ const fd = openSync(p, "wx");
98
+ try {
99
+ writeSync(fd, String(Date.now()));
100
+ } finally {
101
+ closeSync(fd);
102
+ }
103
+ return true;
104
+ } catch (e) {
105
+ if (e.code === "EEXIST")
106
+ return false;
107
+ throw e;
108
+ }
109
+ }
110
+
111
+ // dist/src/hooks/cursor/spawn-wiki-worker.js
112
+ import { spawn, execSync } from "node:child_process";
113
+ import { fileURLToPath } from "node:url";
114
+ import { dirname, join as join5 } from "node:path";
115
+ import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "node:fs";
116
+ import { homedir as homedir4, tmpdir } from "node:os";
117
+
118
+ // dist/src/utils/wiki-log.js
119
+ import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs";
120
+ import { join as join4 } from "node:path";
121
+ function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") {
122
+ const path = join4(hooksDir, filename);
123
+ return {
124
+ path,
125
+ log(msg) {
126
+ try {
127
+ mkdirSync2(hooksDir, { recursive: true });
128
+ appendFileSync2(path, `[${utcTimestamp()}] ${msg}
129
+ `);
130
+ } catch {
131
+ }
132
+ }
133
+ };
134
+ }
135
+
136
+ // dist/src/hooks/cursor/spawn-wiki-worker.js
137
+ var HOME = homedir4();
138
+ var wikiLogger = makeWikiLogger(join5(HOME, ".cursor", "hooks"));
139
+ var WIKI_LOG = wikiLogger.path;
140
+ var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry.
141
+
142
+ SESSION JSONL path: __JSONL__
143
+ SUMMARY FILE to write: __SUMMARY__
144
+ SESSION ID: __SESSION_ID__
145
+ PROJECT: __PROJECT__
146
+ PREVIOUS JSONL OFFSET (lines already processed): __PREV_OFFSET__
147
+ CURRENT JSONL LINES: __JSONL_LINES__
148
+
149
+ Steps:
150
+ 1. Read the session JSONL at the path above.
151
+ - If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first,
152
+ then focus on lines AFTER the offset for new content. Merge new facts into the existing summary.
153
+ - If offset is 0, generate from scratch.
154
+
155
+ 2. Write the summary file at the path above with this EXACT format:
156
+
157
+ # Session __SESSION_ID__
158
+ - **Source**: __JSONL_SERVER_PATH__
159
+ - **Started**: <extract from JSONL>
160
+ - **Ended**: <now>
161
+ - **Project**: __PROJECT__
162
+ - **JSONL offset**: __JSONL_LINES__
163
+
164
+ ## What Happened
165
+ <2-3 dense sentences. What was the goal, what was accomplished, what's left.>
166
+
167
+ ## People
168
+ <For each person mentioned: name, role, what they did/said. Format: **Name** \u2014 role \u2014 action>
169
+
170
+ ## Entities
171
+ <Every named thing: repos, branches, files, APIs, tools, services, tables, features, bugs.
172
+ Format: **entity** (type) \u2014 what was done with it, its current state>
173
+
174
+ ## Decisions & Reasoning
175
+ <Every decision made and WHY.>
176
+
177
+ ## Key Facts
178
+ <Bullet list of atomic facts that could answer future questions.>
179
+
180
+ ## Files Modified
181
+ <bullet list: path (new/modified/deleted) \u2014 what changed>
182
+
183
+ ## Open Questions / TODO
184
+ <Anything unresolved, blocked, or explicitly deferred>
185
+
186
+ IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact.
187
+ PRIVACY: Never include absolute filesystem paths in the summary.
188
+ LENGTH LIMIT: Keep the total summary under 4000 characters.`;
189
+ var wikiLog = wikiLogger.log;
190
+ function findCursorBin() {
191
+ try {
192
+ return execSync("which cursor-agent 2>/dev/null", { encoding: "utf-8" }).trim() || "cursor-agent";
193
+ } catch {
194
+ return "cursor-agent";
195
+ }
196
+ }
197
+ function spawnCursorWikiWorker(opts) {
198
+ const { config, sessionId, cwd, bundleDir, reason } = opts;
199
+ const projectName = cwd.split("/").pop() || "unknown";
200
+ const tmpDir = join5(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`);
201
+ mkdirSync3(tmpDir, { recursive: true });
202
+ const configFile = join5(tmpDir, "config.json");
203
+ writeFileSync2(configFile, JSON.stringify({
204
+ apiUrl: config.apiUrl,
205
+ token: config.token,
206
+ orgId: config.orgId,
207
+ workspaceId: config.workspaceId,
208
+ memoryTable: config.tableName,
209
+ sessionsTable: config.sessionsTableName,
210
+ sessionId,
211
+ userName: config.userName,
212
+ project: projectName,
213
+ tmpDir,
214
+ cursorBin: findCursorBin(),
215
+ cursorModel: process.env.HIVEMIND_CURSOR_MODEL ?? "auto",
216
+ wikiLog: WIKI_LOG,
217
+ hooksDir: join5(HOME, ".cursor", "hooks"),
218
+ promptTemplate: WIKI_PROMPT_TEMPLATE
219
+ }));
220
+ wikiLog(`${reason}: spawning summary worker for ${sessionId}`);
221
+ const workerPath = join5(bundleDir, "wiki-worker.js");
222
+ spawn("nohup", ["node", workerPath, configFile], {
223
+ detached: true,
224
+ stdio: ["ignore", "ignore", "ignore"]
225
+ }).unref();
226
+ wikiLog(`${reason}: spawned summary worker for ${sessionId}`);
227
+ }
228
+ function bundleDirFromImportMeta(importMetaUrl) {
229
+ return dirname(fileURLToPath(importMetaUrl));
230
+ }
231
+
33
232
  // dist/src/hooks/cursor/session-end.js
34
233
  var log2 = (msg) => log("cursor-session-end", msg);
35
234
  async function main() {
36
235
  if (process.env.HIVEMIND_WIKI_WORKER === "1")
37
236
  return;
38
237
  const input = await readStdin();
39
- const sessionId = input.conversation_id ?? input.session_id ?? "?";
40
- log2(`session=${sessionId} reason=${input.reason ?? "?"} status=${input.final_status ?? "?"}`);
238
+ const sessionId = input.conversation_id ?? input.session_id ?? "";
239
+ log2(`session=${sessionId || "?"} reason=${input.reason ?? "?"} status=${input.final_status ?? "?"}`);
240
+ if (!sessionId)
241
+ return;
242
+ if (!tryAcquireLock(sessionId)) {
243
+ wikiLog(`SessionEnd: periodic worker already running for ${sessionId}, skipping final`);
244
+ return;
245
+ }
246
+ try {
247
+ const config = loadConfig();
248
+ if (!config) {
249
+ wikiLog(`SessionEnd: no config, skipping summary`);
250
+ return;
251
+ }
252
+ spawnCursorWikiWorker({
253
+ config,
254
+ sessionId,
255
+ cwd: process.cwd(),
256
+ bundleDir: bundleDirFromImportMeta(import.meta.url),
257
+ reason: "SessionEnd"
258
+ });
259
+ } catch (e) {
260
+ wikiLog(`SessionEnd: spawn failed: ${e?.message ?? e}`);
261
+ }
41
262
  }
42
263
  main().catch((e) => {
43
264
  log2(`fatal: ${e.message}`);
@@ -1,35 +1,100 @@
1
1
  #!/usr/bin/env node
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __esm = (fn, res) => function __init() {
5
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
6
+ };
7
+ var __export = (target, all) => {
8
+ for (var name in all)
9
+ __defProp(target, name, { get: all[name], enumerable: true });
10
+ };
11
+
12
+ // dist/src/index-marker-store.js
13
+ var index_marker_store_exports = {};
14
+ __export(index_marker_store_exports, {
15
+ buildIndexMarkerPath: () => buildIndexMarkerPath,
16
+ getIndexMarkerDir: () => getIndexMarkerDir,
17
+ hasFreshIndexMarker: () => hasFreshIndexMarker,
18
+ writeIndexMarker: () => writeIndexMarker
19
+ });
20
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs";
21
+ import { join as join4 } from "node:path";
22
+ import { tmpdir } from "node:os";
23
+ function getIndexMarkerDir() {
24
+ return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join4(tmpdir(), "hivemind-deeplake-indexes");
25
+ }
26
+ function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
27
+ const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
28
+ return join4(getIndexMarkerDir(), `${markerKey}.json`);
29
+ }
30
+ function hasFreshIndexMarker(markerPath) {
31
+ if (!existsSync2(markerPath))
32
+ return false;
33
+ try {
34
+ const raw = JSON.parse(readFileSync3(markerPath, "utf-8"));
35
+ const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
36
+ if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
37
+ return false;
38
+ return true;
39
+ } catch {
40
+ return false;
41
+ }
42
+ }
43
+ function writeIndexMarker(markerPath) {
44
+ mkdirSync2(getIndexMarkerDir(), { recursive: true });
45
+ writeFileSync2(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
46
+ }
47
+ var INDEX_MARKER_TTL_MS;
48
+ var init_index_marker_store = __esm({
49
+ "dist/src/index-marker-store.js"() {
50
+ "use strict";
51
+ INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
52
+ }
53
+ });
2
54
 
3
55
  // dist/src/hooks/cursor/session-start.js
4
56
  import { fileURLToPath } from "node:url";
5
57
  import { dirname as dirname2, join as join6 } from "node:path";
6
58
 
7
59
  // dist/src/commands/auth.js
8
- import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from "node:fs";
60
+ import { execSync } from "node:child_process";
61
+
62
+ // dist/src/utils/client-header.js
63
+ var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
64
+ function deeplakeClientValue() {
65
+ return "hivemind";
66
+ }
67
+ function deeplakeClientHeader() {
68
+ return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
69
+ }
70
+
71
+ // dist/src/commands/auth-creds.js
72
+ import { readFileSync, writeFileSync, mkdirSync, unlinkSync } from "node:fs";
9
73
  import { join } from "node:path";
10
74
  import { homedir } from "node:os";
11
- import { execSync } from "node:child_process";
12
- var CONFIG_DIR = join(homedir(), ".deeplake");
13
- var CREDS_PATH = join(CONFIG_DIR, "credentials.json");
75
+ function configDir() {
76
+ return join(homedir(), ".deeplake");
77
+ }
78
+ function credsPath() {
79
+ return join(configDir(), "credentials.json");
80
+ }
14
81
  function loadCredentials() {
15
- if (!existsSync(CREDS_PATH))
16
- return null;
17
82
  try {
18
- return JSON.parse(readFileSync(CREDS_PATH, "utf-8"));
83
+ return JSON.parse(readFileSync(credsPath(), "utf-8"));
19
84
  } catch {
20
85
  return null;
21
86
  }
22
87
  }
23
88
 
24
89
  // dist/src/config.js
25
- import { readFileSync as readFileSync2, existsSync as existsSync2 } from "node:fs";
90
+ import { readFileSync as readFileSync2, existsSync } from "node:fs";
26
91
  import { join as join2 } from "node:path";
27
92
  import { homedir as homedir2, userInfo } from "node:os";
28
93
  function loadConfig() {
29
94
  const home = homedir2();
30
95
  const credPath = join2(home, ".deeplake", "credentials.json");
31
96
  let creds = null;
32
- if (existsSync2(credPath)) {
97
+ if (existsSync(credPath)) {
33
98
  try {
34
99
  creds = JSON.parse(readFileSync2(credPath, "utf-8"));
35
100
  } catch {
@@ -55,9 +120,6 @@ function loadConfig() {
55
120
 
56
121
  // dist/src/deeplake-api.js
57
122
  import { randomUUID } from "node:crypto";
58
- import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs";
59
- import { join as join4 } from "node:path";
60
- import { tmpdir } from "node:os";
61
123
 
62
124
  // dist/src/utils/debug.js
63
125
  import { appendFileSync } from "node:fs";
@@ -77,7 +139,17 @@ function sqlStr(value) {
77
139
  return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
78
140
  }
79
141
 
142
+ // dist/src/embeddings/columns.js
143
+ var SUMMARY_EMBEDDING_COL = "summary_embedding";
144
+ var MESSAGE_EMBEDDING_COL = "message_embedding";
145
+
80
146
  // dist/src/deeplake-api.js
147
+ var indexMarkerStorePromise = null;
148
+ function getIndexMarkerStore() {
149
+ if (!indexMarkerStorePromise)
150
+ indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
151
+ return indexMarkerStorePromise;
152
+ }
81
153
  var log2 = (msg) => log("sdk", msg);
82
154
  function summarizeSql(sql, maxLen = 220) {
83
155
  const compact = sql.replace(/\s+/g, " ").trim();
@@ -97,7 +169,6 @@ var MAX_RETRIES = 3;
97
169
  var BASE_DELAY_MS = 500;
98
170
  var MAX_CONCURRENCY = 5;
99
171
  var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
100
- var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
101
172
  function sleep(ms) {
102
173
  return new Promise((resolve) => setTimeout(resolve, ms));
103
174
  }
@@ -117,9 +188,6 @@ function isTransientHtml403(text) {
117
188
  const body = text.toLowerCase();
118
189
  return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
119
190
  }
120
- function getIndexMarkerDir() {
121
- return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join4(tmpdir(), "hivemind-deeplake-indexes");
122
- }
123
191
  var Semaphore = class {
124
192
  max;
125
193
  waiting = [];
@@ -188,7 +256,8 @@ var DeeplakeApi = class {
188
256
  headers: {
189
257
  Authorization: `Bearer ${this.token}`,
190
258
  "Content-Type": "application/json",
191
- "X-Activeloop-Org-Id": this.orgId
259
+ "X-Activeloop-Org-Id": this.orgId,
260
+ ...deeplakeClientHeader()
192
261
  },
193
262
  signal,
194
263
  body: JSON.stringify({ query: sql })
@@ -215,7 +284,8 @@ var DeeplakeApi = class {
215
284
  }
216
285
  const text = await resp.text().catch(() => "");
217
286
  const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
218
- if (attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
287
+ const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
288
+ if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
219
289
  const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
220
290
  log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
221
291
  await sleep(delay);
@@ -249,7 +319,7 @@ var DeeplakeApi = class {
249
319
  const lud = row.lastUpdateDate ?? ts;
250
320
  const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
251
321
  if (exists.length > 0) {
252
- let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
322
+ let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
253
323
  if (row.project !== void 0)
254
324
  setClauses += `, project = '${sqlStr(row.project)}'`;
255
325
  if (row.description !== void 0)
@@ -257,8 +327,8 @@ var DeeplakeApi = class {
257
327
  await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
258
328
  } else {
259
329
  const id = randomUUID();
260
- let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date";
261
- let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
330
+ let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
331
+ let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
262
332
  if (row.project !== void 0) {
263
333
  cols += ", project";
264
334
  vals += `, '${sqlStr(row.project)}'`;
@@ -283,48 +353,83 @@ var DeeplakeApi = class {
283
353
  buildLookupIndexName(table, suffix) {
284
354
  return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
285
355
  }
286
- getLookupIndexMarkerPath(table, suffix) {
287
- const markerKey = [
288
- this.workspaceId,
289
- this.orgId,
290
- table,
291
- suffix
292
- ].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
293
- return join4(getIndexMarkerDir(), `${markerKey}.json`);
294
- }
295
- hasFreshLookupIndexMarker(table, suffix) {
296
- const markerPath = this.getLookupIndexMarkerPath(table, suffix);
297
- if (!existsSync3(markerPath))
298
- return false;
299
- try {
300
- const raw = JSON.parse(readFileSync3(markerPath, "utf-8"));
301
- const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
302
- if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
303
- return false;
304
- return true;
305
- } catch {
306
- return false;
307
- }
308
- }
309
- markLookupIndexReady(table, suffix) {
310
- mkdirSync2(getIndexMarkerDir(), { recursive: true });
311
- writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
312
- }
313
356
  async ensureLookupIndex(table, suffix, columnsSql) {
314
- if (this.hasFreshLookupIndexMarker(table, suffix))
357
+ const markers = await getIndexMarkerStore();
358
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
359
+ if (markers.hasFreshIndexMarker(markerPath))
315
360
  return;
316
361
  const indexName = this.buildLookupIndexName(table, suffix);
317
362
  try {
318
363
  await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
319
- this.markLookupIndexReady(table, suffix);
364
+ markers.writeIndexMarker(markerPath);
320
365
  } catch (e) {
321
366
  if (isDuplicateIndexError(e)) {
322
- this.markLookupIndexReady(table, suffix);
367
+ markers.writeIndexMarker(markerPath);
323
368
  return;
324
369
  }
325
370
  log2(`index "${indexName}" skipped: ${e.message}`);
326
371
  }
327
372
  }
373
+ /**
374
+ * Ensure a vector column exists on the given table.
375
+ *
376
+ * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
377
+ * EXISTS …` on every SessionStart. On a long-running workspace that's
378
+ * already migrated, every call returns 500 "Column already exists" — noisy
379
+ * in the log and a wasted round-trip. Worse, the very first call after the
380
+ * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
381
+ * window (~30s) during which subsequent INSERTs fail; minimising the
382
+ * number of ALTER calls minimises exposure to that window.
383
+ *
384
+ * New flow:
385
+ * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
386
+ * return — zero network calls.
387
+ * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
388
+ * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
389
+ * bug. If the column is present → mark + return.
390
+ * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
391
+ * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
392
+ * "already exists" (race: another client added it between our SELECT
393
+ * and ALTER).
394
+ *
395
+ * Marker uses the same dir / TTL as ensureLookupIndex so both schema
396
+ * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
397
+ */
398
+ async ensureEmbeddingColumn(table, column) {
399
+ await this.ensureColumn(table, column, "FLOAT4[]");
400
+ }
401
+ /**
402
+ * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
403
+ * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
404
+ * column that was added to the schema after the table was originally
405
+ * created. Used today for `summary_embedding`, `message_embedding`, and
406
+ * the `agent` column (added 2026-04-11) — the latter has no fallback if
407
+ * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
408
+ * with `column "agent" does not exist`.
409
+ */
410
+ async ensureColumn(table, column, sqlType) {
411
+ const markers = await getIndexMarkerStore();
412
+ const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
413
+ if (markers.hasFreshIndexMarker(markerPath))
414
+ return;
415
+ const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
416
+ const rows = await this.query(colCheck);
417
+ if (rows.length > 0) {
418
+ markers.writeIndexMarker(markerPath);
419
+ return;
420
+ }
421
+ try {
422
+ await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
423
+ } catch (e) {
424
+ const msg = e instanceof Error ? e.message : String(e);
425
+ if (!/already exists/i.test(msg))
426
+ throw e;
427
+ const recheck = await this.query(colCheck);
428
+ if (recheck.length === 0)
429
+ throw e;
430
+ }
431
+ markers.writeIndexMarker(markerPath);
432
+ }
328
433
  /** List all tables in the workspace (with retry). */
329
434
  async listTables(forceRefresh = false) {
330
435
  if (!forceRefresh && this._tablesCache)
@@ -340,7 +445,8 @@ var DeeplakeApi = class {
340
445
  const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
341
446
  headers: {
342
447
  Authorization: `Bearer ${this.token}`,
343
- "X-Activeloop-Org-Id": this.orgId
448
+ "X-Activeloop-Org-Id": this.orgId,
449
+ ...deeplakeClientHeader()
344
450
  }
345
451
  });
346
452
  if (resp.ok) {
@@ -365,28 +471,60 @@ var DeeplakeApi = class {
365
471
  }
366
472
  return { tables: [], cacheable: false };
367
473
  }
474
+ /**
475
+ * Run a `CREATE TABLE` with an extra outer retry budget. The base
476
+ * `query()` already retries 3 times on fetch errors (~3.5s total), but a
477
+ * failed CREATE is permanent corruption — every subsequent SELECT against
478
+ * the missing table fails. Wrapping in an outer loop with longer backoff
479
+ * (2s, 5s, then 10s) gives us ~17s of reach across transient network
480
+ * blips before giving up. Failures still propagate; getApi() resets its
481
+ * cache on init failure (openclaw plugin) so the next call retries the
482
+ * whole init flow.
483
+ */
484
+ async createTableWithRetry(sql, label) {
485
+ const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
486
+ let lastErr = null;
487
+ for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
488
+ try {
489
+ await this.query(sql);
490
+ return;
491
+ } catch (err) {
492
+ lastErr = err;
493
+ const msg = err instanceof Error ? err.message : String(err);
494
+ log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
495
+ if (attempt < OUTER_BACKOFFS_MS.length) {
496
+ await sleep(OUTER_BACKOFFS_MS[attempt]);
497
+ }
498
+ }
499
+ }
500
+ throw lastErr;
501
+ }
368
502
  /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
369
503
  async ensureTable(name) {
370
504
  const tbl = name ?? this.tableName;
371
505
  const tables = await this.listTables();
372
506
  if (!tables.includes(tbl)) {
373
507
  log2(`table "${tbl}" not found, creating`);
374
- await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
508
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
375
509
  log2(`table "${tbl}" created`);
376
510
  if (!tables.includes(tbl))
377
511
  this._tablesCache = [...tables, tbl];
378
512
  }
513
+ await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
514
+ await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
379
515
  }
380
516
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
381
517
  async ensureSessionsTable(name) {
382
518
  const tables = await this.listTables();
383
519
  if (!tables.includes(name)) {
384
520
  log2(`table "${name}" not found, creating`);
385
- await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`);
521
+ await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, name);
386
522
  log2(`table "${name}" created`);
387
523
  if (!tables.includes(name))
388
524
  this._tablesCache = [...tables, name];
389
525
  }
526
+ await this.ensureEmbeddingColumn(name, MESSAGE_EMBEDDING_COL);
527
+ await this.ensureColumn(name, "agent", "TEXT NOT NULL DEFAULT ''");
390
528
  await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
391
529
  }
392
530
  };