@deeplake/hivemind 0.6.48 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +147 -20
- package/bundle/cli.js +552 -95
- package/codex/bundle/capture.js +509 -89
- package/codex/bundle/commands/auth-login.js +209 -66
- package/codex/bundle/embeddings/embed-daemon.js +243 -0
- package/codex/bundle/pre-tool-use.js +629 -104
- package/codex/bundle/session-start-setup.js +194 -57
- package/codex/bundle/session-start.js +25 -10
- package/codex/bundle/shell/deeplake-shell.js +679 -112
- package/codex/bundle/stop.js +476 -58
- package/codex/bundle/wiki-worker.js +312 -11
- package/cursor/bundle/capture.js +768 -57
- package/cursor/bundle/commands/auth-login.js +209 -66
- package/cursor/bundle/embeddings/embed-daemon.js +243 -0
- package/cursor/bundle/pre-tool-use.js +561 -70
- package/cursor/bundle/session-end.js +223 -2
- package/cursor/bundle/session-start.js +192 -54
- package/cursor/bundle/shell/deeplake-shell.js +679 -112
- package/cursor/bundle/wiki-worker.js +571 -0
- package/hermes/bundle/capture.js +771 -58
- package/hermes/bundle/commands/auth-login.js +209 -66
- package/hermes/bundle/embeddings/embed-daemon.js +243 -0
- package/hermes/bundle/pre-tool-use.js +560 -69
- package/hermes/bundle/session-end.js +224 -1
- package/hermes/bundle/session-start.js +195 -54
- package/hermes/bundle/shell/deeplake-shell.js +679 -112
- package/hermes/bundle/wiki-worker.js +572 -0
- package/mcp/bundle/server.js +253 -68
- package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
- package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
- package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
- package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
- package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
- package/openclaw/dist/index.js +752 -702
- package/openclaw/openclaw.plugin.json +1 -1
- package/openclaw/package.json +1 -1
- package/package.json +2 -1
- package/pi/extension-source/hivemind.ts +473 -21
|
@@ -21,6 +21,9 @@ import { join } from "node:path";
|
|
|
21
21
|
import { homedir } from "node:os";
|
|
22
22
|
var DEBUG = process.env.HIVEMIND_DEBUG === "1";
|
|
23
23
|
var LOG = join(homedir(), ".deeplake", "hook-debug.log");
|
|
24
|
+
function utcTimestamp(d = /* @__PURE__ */ new Date()) {
|
|
25
|
+
return d.toISOString().replace("T", " ").slice(0, 19) + " UTC";
|
|
26
|
+
}
|
|
24
27
|
function log(tag, msg) {
|
|
25
28
|
if (!DEBUG)
|
|
26
29
|
return;
|
|
@@ -28,13 +31,233 @@ function log(tag, msg) {
|
|
|
28
31
|
`);
|
|
29
32
|
}
|
|
30
33
|
|
|
34
|
+
// dist/src/config.js
|
|
35
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
36
|
+
import { join as join2 } from "node:path";
|
|
37
|
+
import { homedir as homedir2, userInfo } from "node:os";
|
|
38
|
+
function loadConfig() {
|
|
39
|
+
const home = homedir2();
|
|
40
|
+
const credPath = join2(home, ".deeplake", "credentials.json");
|
|
41
|
+
let creds = null;
|
|
42
|
+
if (existsSync(credPath)) {
|
|
43
|
+
try {
|
|
44
|
+
creds = JSON.parse(readFileSync(credPath, "utf-8"));
|
|
45
|
+
} catch {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
const token = process.env.HIVEMIND_TOKEN ?? creds?.token;
|
|
50
|
+
const orgId = process.env.HIVEMIND_ORG_ID ?? creds?.orgId;
|
|
51
|
+
if (!token || !orgId)
|
|
52
|
+
return null;
|
|
53
|
+
return {
|
|
54
|
+
token,
|
|
55
|
+
orgId,
|
|
56
|
+
orgName: creds?.orgName ?? orgId,
|
|
57
|
+
userName: creds?.userName || userInfo().username || "unknown",
|
|
58
|
+
workspaceId: process.env.HIVEMIND_WORKSPACE_ID ?? creds?.workspaceId ?? "default",
|
|
59
|
+
apiUrl: process.env.HIVEMIND_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai",
|
|
60
|
+
tableName: process.env.HIVEMIND_TABLE ?? "memory",
|
|
61
|
+
sessionsTableName: process.env.HIVEMIND_SESSIONS_TABLE ?? "sessions",
|
|
62
|
+
memoryPath: process.env.HIVEMIND_MEMORY_PATH ?? join2(home, ".deeplake", "memory")
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// dist/src/hooks/summary-state.js
|
|
67
|
+
import { readFileSync as readFileSync2, writeFileSync, writeSync, mkdirSync, renameSync, existsSync as existsSync2, unlinkSync, openSync, closeSync } from "node:fs";
|
|
68
|
+
import { homedir as homedir3 } from "node:os";
|
|
69
|
+
import { join as join3 } from "node:path";
|
|
70
|
+
var dlog = (msg) => log("summary-state", msg);
|
|
71
|
+
var STATE_DIR = join3(homedir3(), ".claude", "hooks", "summary-state");
|
|
72
|
+
var YIELD_BUF = new Int32Array(new SharedArrayBuffer(4));
|
|
73
|
+
function lockPath(sessionId) {
|
|
74
|
+
return join3(STATE_DIR, `${sessionId}.lock`);
|
|
75
|
+
}
|
|
76
|
+
function tryAcquireLock(sessionId, maxAgeMs = 10 * 60 * 1e3) {
|
|
77
|
+
mkdirSync(STATE_DIR, { recursive: true });
|
|
78
|
+
const p = lockPath(sessionId);
|
|
79
|
+
if (existsSync2(p)) {
|
|
80
|
+
try {
|
|
81
|
+
const ageMs = Date.now() - parseInt(readFileSync2(p, "utf-8"), 10);
|
|
82
|
+
if (Number.isFinite(ageMs) && ageMs < maxAgeMs)
|
|
83
|
+
return false;
|
|
84
|
+
} catch (readErr) {
|
|
85
|
+
dlog(`lock file unreadable for ${sessionId}, treating as stale: ${readErr.message}`);
|
|
86
|
+
}
|
|
87
|
+
try {
|
|
88
|
+
unlinkSync(p);
|
|
89
|
+
} catch (unlinkErr) {
|
|
90
|
+
dlog(`could not unlink stale lock for ${sessionId}: ${unlinkErr.message}`);
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
try {
|
|
95
|
+
const fd = openSync(p, "wx");
|
|
96
|
+
try {
|
|
97
|
+
writeSync(fd, String(Date.now()));
|
|
98
|
+
} finally {
|
|
99
|
+
closeSync(fd);
|
|
100
|
+
}
|
|
101
|
+
return true;
|
|
102
|
+
} catch (e) {
|
|
103
|
+
if (e.code === "EEXIST")
|
|
104
|
+
return false;
|
|
105
|
+
throw e;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// dist/src/hooks/hermes/spawn-wiki-worker.js
|
|
110
|
+
import { spawn, execSync } from "node:child_process";
|
|
111
|
+
import { fileURLToPath } from "node:url";
|
|
112
|
+
import { dirname, join as join5 } from "node:path";
|
|
113
|
+
import { writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "node:fs";
|
|
114
|
+
import { homedir as homedir4, tmpdir } from "node:os";
|
|
115
|
+
|
|
116
|
+
// dist/src/utils/wiki-log.js
|
|
117
|
+
import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2 } from "node:fs";
|
|
118
|
+
import { join as join4 } from "node:path";
|
|
119
|
+
function makeWikiLogger(hooksDir, filename = "deeplake-wiki.log") {
|
|
120
|
+
const path = join4(hooksDir, filename);
|
|
121
|
+
return {
|
|
122
|
+
path,
|
|
123
|
+
log(msg) {
|
|
124
|
+
try {
|
|
125
|
+
mkdirSync2(hooksDir, { recursive: true });
|
|
126
|
+
appendFileSync2(path, `[${utcTimestamp()}] ${msg}
|
|
127
|
+
`);
|
|
128
|
+
} catch {
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// dist/src/hooks/hermes/spawn-wiki-worker.js
|
|
135
|
+
var HOME = homedir4();
|
|
136
|
+
var wikiLogger = makeWikiLogger(join5(HOME, ".hermes", "hooks"));
|
|
137
|
+
var WIKI_LOG = wikiLogger.path;
|
|
138
|
+
var WIKI_PROMPT_TEMPLATE = `You are building a personal wiki from a coding session. Your goal is to extract every piece of knowledge \u2014 entities, decisions, relationships, and facts \u2014 into a structured, searchable wiki entry.
|
|
139
|
+
|
|
140
|
+
SESSION JSONL path: __JSONL__
|
|
141
|
+
SUMMARY FILE to write: __SUMMARY__
|
|
142
|
+
SESSION ID: __SESSION_ID__
|
|
143
|
+
PROJECT: __PROJECT__
|
|
144
|
+
PREVIOUS JSONL OFFSET (lines already processed): __PREV_OFFSET__
|
|
145
|
+
CURRENT JSONL LINES: __JSONL_LINES__
|
|
146
|
+
|
|
147
|
+
Steps:
|
|
148
|
+
1. Read the session JSONL at the path above.
|
|
149
|
+
- If PREVIOUS JSONL OFFSET > 0, this is a resumed session. Read the existing summary file first,
|
|
150
|
+
then focus on lines AFTER the offset for new content. Merge new facts into the existing summary.
|
|
151
|
+
- If offset is 0, generate from scratch.
|
|
152
|
+
|
|
153
|
+
2. Write the summary file at the path above with this EXACT format:
|
|
154
|
+
|
|
155
|
+
# Session __SESSION_ID__
|
|
156
|
+
- **Source**: __JSONL_SERVER_PATH__
|
|
157
|
+
- **Started**: <extract from JSONL>
|
|
158
|
+
- **Ended**: <now>
|
|
159
|
+
- **Project**: __PROJECT__
|
|
160
|
+
- **JSONL offset**: __JSONL_LINES__
|
|
161
|
+
|
|
162
|
+
## What Happened
|
|
163
|
+
<2-3 dense sentences. What was the goal, what was accomplished, what's left.>
|
|
164
|
+
|
|
165
|
+
## People
|
|
166
|
+
<For each person mentioned: name, role, what they did/said. Format: **Name** \u2014 role \u2014 action>
|
|
167
|
+
|
|
168
|
+
## Entities
|
|
169
|
+
<Every named thing: repos, branches, files, APIs, tools, services, tables, features, bugs.
|
|
170
|
+
Format: **entity** (type) \u2014 what was done with it, its current state>
|
|
171
|
+
|
|
172
|
+
## Decisions & Reasoning
|
|
173
|
+
<Every decision made and WHY.>
|
|
174
|
+
|
|
175
|
+
## Key Facts
|
|
176
|
+
<Bullet list of atomic facts that could answer future questions.>
|
|
177
|
+
|
|
178
|
+
## Files Modified
|
|
179
|
+
<bullet list: path (new/modified/deleted) \u2014 what changed>
|
|
180
|
+
|
|
181
|
+
## Open Questions / TODO
|
|
182
|
+
<Anything unresolved, blocked, or explicitly deferred>
|
|
183
|
+
|
|
184
|
+
IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact.
|
|
185
|
+
PRIVACY: Never include absolute filesystem paths in the summary.
|
|
186
|
+
LENGTH LIMIT: Keep the total summary under 4000 characters.`;
|
|
187
|
+
var wikiLog = wikiLogger.log;
|
|
188
|
+
function findHermesBin() {
|
|
189
|
+
try {
|
|
190
|
+
return execSync("which hermes 2>/dev/null", { encoding: "utf-8" }).trim() || "hermes";
|
|
191
|
+
} catch {
|
|
192
|
+
return "hermes";
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
function spawnHermesWikiWorker(opts) {
|
|
196
|
+
const { config, sessionId, cwd, bundleDir, reason } = opts;
|
|
197
|
+
const projectName = cwd.split("/").pop() || "unknown";
|
|
198
|
+
const tmpDir = join5(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`);
|
|
199
|
+
mkdirSync3(tmpDir, { recursive: true });
|
|
200
|
+
const configFile = join5(tmpDir, "config.json");
|
|
201
|
+
writeFileSync2(configFile, JSON.stringify({
|
|
202
|
+
apiUrl: config.apiUrl,
|
|
203
|
+
token: config.token,
|
|
204
|
+
orgId: config.orgId,
|
|
205
|
+
workspaceId: config.workspaceId,
|
|
206
|
+
memoryTable: config.tableName,
|
|
207
|
+
sessionsTable: config.sessionsTableName,
|
|
208
|
+
sessionId,
|
|
209
|
+
userName: config.userName,
|
|
210
|
+
project: projectName,
|
|
211
|
+
tmpDir,
|
|
212
|
+
hermesBin: findHermesBin(),
|
|
213
|
+
hermesProvider: process.env.HIVEMIND_HERMES_PROVIDER ?? "openrouter",
|
|
214
|
+
hermesModel: process.env.HIVEMIND_HERMES_MODEL ?? "anthropic/claude-haiku-4-5",
|
|
215
|
+
wikiLog: WIKI_LOG,
|
|
216
|
+
hooksDir: join5(HOME, ".hermes", "hooks"),
|
|
217
|
+
promptTemplate: WIKI_PROMPT_TEMPLATE
|
|
218
|
+
}));
|
|
219
|
+
wikiLog(`${reason}: spawning summary worker for ${sessionId}`);
|
|
220
|
+
const workerPath = join5(bundleDir, "wiki-worker.js");
|
|
221
|
+
spawn("nohup", ["node", workerPath, configFile], {
|
|
222
|
+
detached: true,
|
|
223
|
+
stdio: ["ignore", "ignore", "ignore"]
|
|
224
|
+
}).unref();
|
|
225
|
+
wikiLog(`${reason}: spawned summary worker for ${sessionId}`);
|
|
226
|
+
}
|
|
227
|
+
function bundleDirFromImportMeta(importMetaUrl) {
|
|
228
|
+
return dirname(fileURLToPath(importMetaUrl));
|
|
229
|
+
}
|
|
230
|
+
|
|
31
231
|
// dist/src/hooks/hermes/session-end.js
|
|
32
232
|
var log2 = (msg) => log("hermes-session-end", msg);
|
|
33
233
|
async function main() {
|
|
34
234
|
if (process.env.HIVEMIND_WIKI_WORKER === "1")
|
|
35
235
|
return;
|
|
36
236
|
const input = await readStdin();
|
|
37
|
-
|
|
237
|
+
const sessionId = input.session_id ?? "";
|
|
238
|
+
log2(`session=${sessionId || "?"} cwd=${input.cwd ?? "?"}`);
|
|
239
|
+
if (!sessionId)
|
|
240
|
+
return;
|
|
241
|
+
if (!tryAcquireLock(sessionId)) {
|
|
242
|
+
wikiLog(`SessionEnd: periodic worker already running for ${sessionId}, skipping final`);
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
try {
|
|
246
|
+
const config = loadConfig();
|
|
247
|
+
if (!config) {
|
|
248
|
+
wikiLog(`SessionEnd: no config, skipping summary`);
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
spawnHermesWikiWorker({
|
|
252
|
+
config,
|
|
253
|
+
sessionId,
|
|
254
|
+
cwd: input.cwd ?? process.cwd(),
|
|
255
|
+
bundleDir: bundleDirFromImportMeta(import.meta.url),
|
|
256
|
+
reason: "SessionEnd"
|
|
257
|
+
});
|
|
258
|
+
} catch (e) {
|
|
259
|
+
wikiLog(`SessionEnd: spawn failed: ${e?.message ?? e}`);
|
|
260
|
+
}
|
|
38
261
|
}
|
|
39
262
|
main().catch((e) => {
|
|
40
263
|
log2(`fatal: ${e.message}`);
|
|
@@ -1,33 +1,99 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
+
var __esm = (fn, res) => function __init() {
|
|
4
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
+
};
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
// dist/src/index-marker-store.js
|
|
12
|
+
var index_marker_store_exports = {};
|
|
13
|
+
__export(index_marker_store_exports, {
|
|
14
|
+
buildIndexMarkerPath: () => buildIndexMarkerPath,
|
|
15
|
+
getIndexMarkerDir: () => getIndexMarkerDir,
|
|
16
|
+
hasFreshIndexMarker: () => hasFreshIndexMarker,
|
|
17
|
+
writeIndexMarker: () => writeIndexMarker
|
|
18
|
+
});
|
|
19
|
+
import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs";
|
|
20
|
+
import { join as join4 } from "node:path";
|
|
21
|
+
import { tmpdir } from "node:os";
|
|
22
|
+
function getIndexMarkerDir() {
|
|
23
|
+
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join4(tmpdir(), "hivemind-deeplake-indexes");
|
|
24
|
+
}
|
|
25
|
+
function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
|
|
26
|
+
const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
27
|
+
return join4(getIndexMarkerDir(), `${markerKey}.json`);
|
|
28
|
+
}
|
|
29
|
+
function hasFreshIndexMarker(markerPath) {
|
|
30
|
+
if (!existsSync2(markerPath))
|
|
31
|
+
return false;
|
|
32
|
+
try {
|
|
33
|
+
const raw = JSON.parse(readFileSync3(markerPath, "utf-8"));
|
|
34
|
+
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
35
|
+
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
36
|
+
return false;
|
|
37
|
+
return true;
|
|
38
|
+
} catch {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function writeIndexMarker(markerPath) {
|
|
43
|
+
mkdirSync2(getIndexMarkerDir(), { recursive: true });
|
|
44
|
+
writeFileSync2(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
45
|
+
}
|
|
46
|
+
var INDEX_MARKER_TTL_MS;
|
|
47
|
+
var init_index_marker_store = __esm({
|
|
48
|
+
"dist/src/index-marker-store.js"() {
|
|
49
|
+
"use strict";
|
|
50
|
+
INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
|
|
1
54
|
// dist/src/hooks/hermes/session-start.js
|
|
2
55
|
import { fileURLToPath } from "node:url";
|
|
3
56
|
import { dirname as dirname2, join as join6 } from "node:path";
|
|
4
57
|
|
|
5
58
|
// dist/src/commands/auth.js
|
|
6
|
-
import {
|
|
59
|
+
import { execSync } from "node:child_process";
|
|
60
|
+
|
|
61
|
+
// dist/src/utils/client-header.js
|
|
62
|
+
var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
|
|
63
|
+
function deeplakeClientValue() {
|
|
64
|
+
return "hivemind";
|
|
65
|
+
}
|
|
66
|
+
function deeplakeClientHeader() {
|
|
67
|
+
return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// dist/src/commands/auth-creds.js
|
|
71
|
+
import { readFileSync, writeFileSync, mkdirSync, unlinkSync } from "node:fs";
|
|
7
72
|
import { join } from "node:path";
|
|
8
73
|
import { homedir } from "node:os";
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
74
|
+
function configDir() {
|
|
75
|
+
return join(homedir(), ".deeplake");
|
|
76
|
+
}
|
|
77
|
+
function credsPath() {
|
|
78
|
+
return join(configDir(), "credentials.json");
|
|
79
|
+
}
|
|
12
80
|
function loadCredentials() {
|
|
13
|
-
if (!existsSync(CREDS_PATH))
|
|
14
|
-
return null;
|
|
15
81
|
try {
|
|
16
|
-
return JSON.parse(readFileSync(
|
|
82
|
+
return JSON.parse(readFileSync(credsPath(), "utf-8"));
|
|
17
83
|
} catch {
|
|
18
84
|
return null;
|
|
19
85
|
}
|
|
20
86
|
}
|
|
21
87
|
|
|
22
88
|
// dist/src/config.js
|
|
23
|
-
import { readFileSync as readFileSync2, existsSync
|
|
89
|
+
import { readFileSync as readFileSync2, existsSync } from "node:fs";
|
|
24
90
|
import { join as join2 } from "node:path";
|
|
25
91
|
import { homedir as homedir2, userInfo } from "node:os";
|
|
26
92
|
function loadConfig() {
|
|
27
93
|
const home = homedir2();
|
|
28
94
|
const credPath = join2(home, ".deeplake", "credentials.json");
|
|
29
95
|
let creds = null;
|
|
30
|
-
if (
|
|
96
|
+
if (existsSync(credPath)) {
|
|
31
97
|
try {
|
|
32
98
|
creds = JSON.parse(readFileSync2(credPath, "utf-8"));
|
|
33
99
|
} catch {
|
|
@@ -53,9 +119,6 @@ function loadConfig() {
|
|
|
53
119
|
|
|
54
120
|
// dist/src/deeplake-api.js
|
|
55
121
|
import { randomUUID } from "node:crypto";
|
|
56
|
-
import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, writeFileSync as writeFileSync2 } from "node:fs";
|
|
57
|
-
import { join as join4 } from "node:path";
|
|
58
|
-
import { tmpdir } from "node:os";
|
|
59
122
|
|
|
60
123
|
// dist/src/utils/debug.js
|
|
61
124
|
import { appendFileSync } from "node:fs";
|
|
@@ -75,7 +138,17 @@ function sqlStr(value) {
|
|
|
75
138
|
return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
|
|
76
139
|
}
|
|
77
140
|
|
|
141
|
+
// dist/src/embeddings/columns.js
|
|
142
|
+
var SUMMARY_EMBEDDING_COL = "summary_embedding";
|
|
143
|
+
var MESSAGE_EMBEDDING_COL = "message_embedding";
|
|
144
|
+
|
|
78
145
|
// dist/src/deeplake-api.js
|
|
146
|
+
var indexMarkerStorePromise = null;
|
|
147
|
+
function getIndexMarkerStore() {
|
|
148
|
+
if (!indexMarkerStorePromise)
|
|
149
|
+
indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
|
|
150
|
+
return indexMarkerStorePromise;
|
|
151
|
+
}
|
|
79
152
|
var log2 = (msg) => log("sdk", msg);
|
|
80
153
|
function summarizeSql(sql, maxLen = 220) {
|
|
81
154
|
const compact = sql.replace(/\s+/g, " ").trim();
|
|
@@ -95,7 +168,6 @@ var MAX_RETRIES = 3;
|
|
|
95
168
|
var BASE_DELAY_MS = 500;
|
|
96
169
|
var MAX_CONCURRENCY = 5;
|
|
97
170
|
var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
|
|
98
|
-
var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
99
171
|
function sleep(ms) {
|
|
100
172
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
101
173
|
}
|
|
@@ -115,9 +187,6 @@ function isTransientHtml403(text) {
|
|
|
115
187
|
const body = text.toLowerCase();
|
|
116
188
|
return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
|
|
117
189
|
}
|
|
118
|
-
function getIndexMarkerDir() {
|
|
119
|
-
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join4(tmpdir(), "hivemind-deeplake-indexes");
|
|
120
|
-
}
|
|
121
190
|
var Semaphore = class {
|
|
122
191
|
max;
|
|
123
192
|
waiting = [];
|
|
@@ -186,7 +255,8 @@ var DeeplakeApi = class {
|
|
|
186
255
|
headers: {
|
|
187
256
|
Authorization: `Bearer ${this.token}`,
|
|
188
257
|
"Content-Type": "application/json",
|
|
189
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
258
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
259
|
+
...deeplakeClientHeader()
|
|
190
260
|
},
|
|
191
261
|
signal,
|
|
192
262
|
body: JSON.stringify({ query: sql })
|
|
@@ -213,7 +283,8 @@ var DeeplakeApi = class {
|
|
|
213
283
|
}
|
|
214
284
|
const text = await resp.text().catch(() => "");
|
|
215
285
|
const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
|
|
216
|
-
|
|
286
|
+
const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
|
|
287
|
+
if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
|
|
217
288
|
const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
|
|
218
289
|
log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
|
|
219
290
|
await sleep(delay);
|
|
@@ -247,7 +318,7 @@ var DeeplakeApi = class {
|
|
|
247
318
|
const lud = row.lastUpdateDate ?? ts;
|
|
248
319
|
const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
|
|
249
320
|
if (exists.length > 0) {
|
|
250
|
-
let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
321
|
+
let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
251
322
|
if (row.project !== void 0)
|
|
252
323
|
setClauses += `, project = '${sqlStr(row.project)}'`;
|
|
253
324
|
if (row.description !== void 0)
|
|
@@ -255,8 +326,8 @@ var DeeplakeApi = class {
|
|
|
255
326
|
await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
|
|
256
327
|
} else {
|
|
257
328
|
const id = randomUUID();
|
|
258
|
-
let cols =
|
|
259
|
-
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
329
|
+
let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
|
|
330
|
+
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
260
331
|
if (row.project !== void 0) {
|
|
261
332
|
cols += ", project";
|
|
262
333
|
vals += `, '${sqlStr(row.project)}'`;
|
|
@@ -281,48 +352,83 @@ var DeeplakeApi = class {
|
|
|
281
352
|
buildLookupIndexName(table, suffix) {
|
|
282
353
|
return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
283
354
|
}
|
|
284
|
-
getLookupIndexMarkerPath(table, suffix) {
|
|
285
|
-
const markerKey = [
|
|
286
|
-
this.workspaceId,
|
|
287
|
-
this.orgId,
|
|
288
|
-
table,
|
|
289
|
-
suffix
|
|
290
|
-
].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
291
|
-
return join4(getIndexMarkerDir(), `${markerKey}.json`);
|
|
292
|
-
}
|
|
293
|
-
hasFreshLookupIndexMarker(table, suffix) {
|
|
294
|
-
const markerPath = this.getLookupIndexMarkerPath(table, suffix);
|
|
295
|
-
if (!existsSync3(markerPath))
|
|
296
|
-
return false;
|
|
297
|
-
try {
|
|
298
|
-
const raw = JSON.parse(readFileSync3(markerPath, "utf-8"));
|
|
299
|
-
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
300
|
-
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
301
|
-
return false;
|
|
302
|
-
return true;
|
|
303
|
-
} catch {
|
|
304
|
-
return false;
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
markLookupIndexReady(table, suffix) {
|
|
308
|
-
mkdirSync2(getIndexMarkerDir(), { recursive: true });
|
|
309
|
-
writeFileSync2(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
310
|
-
}
|
|
311
355
|
async ensureLookupIndex(table, suffix, columnsSql) {
|
|
312
|
-
|
|
356
|
+
const markers = await getIndexMarkerStore();
|
|
357
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
|
|
358
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
313
359
|
return;
|
|
314
360
|
const indexName = this.buildLookupIndexName(table, suffix);
|
|
315
361
|
try {
|
|
316
362
|
await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
|
|
317
|
-
|
|
363
|
+
markers.writeIndexMarker(markerPath);
|
|
318
364
|
} catch (e) {
|
|
319
365
|
if (isDuplicateIndexError(e)) {
|
|
320
|
-
|
|
366
|
+
markers.writeIndexMarker(markerPath);
|
|
321
367
|
return;
|
|
322
368
|
}
|
|
323
369
|
log2(`index "${indexName}" skipped: ${e.message}`);
|
|
324
370
|
}
|
|
325
371
|
}
|
|
372
|
+
/**
|
|
373
|
+
* Ensure a vector column exists on the given table.
|
|
374
|
+
*
|
|
375
|
+
* The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
|
|
376
|
+
* EXISTS …` on every SessionStart. On a long-running workspace that's
|
|
377
|
+
* already migrated, every call returns 500 "Column already exists" — noisy
|
|
378
|
+
* in the log and a wasted round-trip. Worse, the very first call after the
|
|
379
|
+
* column is genuinely added triggers Deeplake's post-ALTER `vector::at`
|
|
380
|
+
* window (~30s) during which subsequent INSERTs fail; minimising the
|
|
381
|
+
* number of ALTER calls minimises exposure to that window.
|
|
382
|
+
*
|
|
383
|
+
* New flow:
|
|
384
|
+
* 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
|
|
385
|
+
* return — zero network calls.
|
|
386
|
+
* 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
|
|
387
|
+
* column_name = C. Read-only, idempotent, can't tickle the post-ALTER
|
|
388
|
+
* bug. If the column is present → mark + return.
|
|
389
|
+
* 3. Only if step 2 says the column is missing, fall back to ALTER ADD
|
|
390
|
+
* COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
|
|
391
|
+
* "already exists" (race: another client added it between our SELECT
|
|
392
|
+
* and ALTER).
|
|
393
|
+
*
|
|
394
|
+
* Marker uses the same dir / TTL as ensureLookupIndex so both schema
|
|
395
|
+
* caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
|
|
396
|
+
*/
|
|
397
|
+
async ensureEmbeddingColumn(table, column) {
|
|
398
|
+
await this.ensureColumn(table, column, "FLOAT4[]");
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Generic marker-gated column migration. Same SELECT-then-ALTER flow as
|
|
402
|
+
* ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
|
|
403
|
+
* column that was added to the schema after the table was originally
|
|
404
|
+
* created. Used today for `summary_embedding`, `message_embedding`, and
|
|
405
|
+
* the `agent` column (added 2026-04-11) — the latter has no fallback if
|
|
406
|
+
* a user upgraded over a pre-2026-04-11 table, so every INSERT fails
|
|
407
|
+
* with `column "agent" does not exist`.
|
|
408
|
+
*/
|
|
409
|
+
async ensureColumn(table, column, sqlType) {
|
|
410
|
+
const markers = await getIndexMarkerStore();
|
|
411
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
|
|
412
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
413
|
+
return;
|
|
414
|
+
const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
|
|
415
|
+
const rows = await this.query(colCheck);
|
|
416
|
+
if (rows.length > 0) {
|
|
417
|
+
markers.writeIndexMarker(markerPath);
|
|
418
|
+
return;
|
|
419
|
+
}
|
|
420
|
+
try {
|
|
421
|
+
await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
|
|
422
|
+
} catch (e) {
|
|
423
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
424
|
+
if (!/already exists/i.test(msg))
|
|
425
|
+
throw e;
|
|
426
|
+
const recheck = await this.query(colCheck);
|
|
427
|
+
if (recheck.length === 0)
|
|
428
|
+
throw e;
|
|
429
|
+
}
|
|
430
|
+
markers.writeIndexMarker(markerPath);
|
|
431
|
+
}
|
|
326
432
|
/** List all tables in the workspace (with retry). */
|
|
327
433
|
async listTables(forceRefresh = false) {
|
|
328
434
|
if (!forceRefresh && this._tablesCache)
|
|
@@ -338,7 +444,8 @@ var DeeplakeApi = class {
|
|
|
338
444
|
const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
|
|
339
445
|
headers: {
|
|
340
446
|
Authorization: `Bearer ${this.token}`,
|
|
341
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
447
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
448
|
+
...deeplakeClientHeader()
|
|
342
449
|
}
|
|
343
450
|
});
|
|
344
451
|
if (resp.ok) {
|
|
@@ -363,28 +470,60 @@ var DeeplakeApi = class {
|
|
|
363
470
|
}
|
|
364
471
|
return { tables: [], cacheable: false };
|
|
365
472
|
}
|
|
473
|
+
/**
|
|
474
|
+
* Run a `CREATE TABLE` with an extra outer retry budget. The base
|
|
475
|
+
* `query()` already retries 3 times on fetch errors (~3.5s total), but a
|
|
476
|
+
* failed CREATE is permanent corruption — every subsequent SELECT against
|
|
477
|
+
* the missing table fails. Wrapping in an outer loop with longer backoff
|
|
478
|
+
* (2s, 5s, then 10s) gives us ~17s of reach across transient network
|
|
479
|
+
* blips before giving up. Failures still propagate; getApi() resets its
|
|
480
|
+
* cache on init failure (openclaw plugin) so the next call retries the
|
|
481
|
+
* whole init flow.
|
|
482
|
+
*/
|
|
483
|
+
async createTableWithRetry(sql, label) {
|
|
484
|
+
const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
|
|
485
|
+
let lastErr = null;
|
|
486
|
+
for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
|
|
487
|
+
try {
|
|
488
|
+
await this.query(sql);
|
|
489
|
+
return;
|
|
490
|
+
} catch (err) {
|
|
491
|
+
lastErr = err;
|
|
492
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
493
|
+
log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
|
|
494
|
+
if (attempt < OUTER_BACKOFFS_MS.length) {
|
|
495
|
+
await sleep(OUTER_BACKOFFS_MS[attempt]);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
throw lastErr;
|
|
500
|
+
}
|
|
366
501
|
/** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
|
|
367
502
|
async ensureTable(name) {
|
|
368
503
|
const tbl = name ?? this.tableName;
|
|
369
504
|
const tables = await this.listTables();
|
|
370
505
|
if (!tables.includes(tbl)) {
|
|
371
506
|
log2(`table "${tbl}" not found, creating`);
|
|
372
|
-
await this.
|
|
507
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
|
|
373
508
|
log2(`table "${tbl}" created`);
|
|
374
509
|
if (!tables.includes(tbl))
|
|
375
510
|
this._tablesCache = [...tables, tbl];
|
|
376
511
|
}
|
|
512
|
+
await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
|
|
513
|
+
await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
377
514
|
}
|
|
378
515
|
/** Create the sessions table (uses JSONB for message since every row is a JSON event). */
|
|
379
516
|
async ensureSessionsTable(name) {
|
|
380
517
|
const tables = await this.listTables();
|
|
381
518
|
if (!tables.includes(name)) {
|
|
382
519
|
log2(`table "${name}" not found, creating`);
|
|
383
|
-
await this.
|
|
520
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, name);
|
|
384
521
|
log2(`table "${name}" created`);
|
|
385
522
|
if (!tables.includes(name))
|
|
386
523
|
this._tablesCache = [...tables, name];
|
|
387
524
|
}
|
|
525
|
+
await this.ensureEmbeddingColumn(name, MESSAGE_EMBEDDING_COL);
|
|
526
|
+
await this.ensureColumn(name, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
388
527
|
await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
|
|
389
528
|
}
|
|
390
529
|
};
|
|
@@ -491,6 +630,8 @@ async function main() {
|
|
|
491
630
|
const config = loadConfig();
|
|
492
631
|
if (config) {
|
|
493
632
|
const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName);
|
|
633
|
+
await api.ensureTable();
|
|
634
|
+
await api.ensureSessionsTable(config.sessionsTableName);
|
|
494
635
|
await createPlaceholder(api, config.tableName, sessionId, cwd, config.userName, config.orgName, config.workspaceId);
|
|
495
636
|
log3("placeholder created");
|
|
496
637
|
}
|