@deeplake/hivemind 0.6.48 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +147 -20
- package/bundle/cli.js +552 -95
- package/codex/bundle/capture.js +509 -89
- package/codex/bundle/commands/auth-login.js +209 -66
- package/codex/bundle/embeddings/embed-daemon.js +243 -0
- package/codex/bundle/pre-tool-use.js +629 -104
- package/codex/bundle/session-start-setup.js +194 -57
- package/codex/bundle/session-start.js +25 -10
- package/codex/bundle/shell/deeplake-shell.js +679 -112
- package/codex/bundle/stop.js +476 -58
- package/codex/bundle/wiki-worker.js +312 -11
- package/cursor/bundle/capture.js +768 -57
- package/cursor/bundle/commands/auth-login.js +209 -66
- package/cursor/bundle/embeddings/embed-daemon.js +243 -0
- package/cursor/bundle/pre-tool-use.js +561 -70
- package/cursor/bundle/session-end.js +223 -2
- package/cursor/bundle/session-start.js +192 -54
- package/cursor/bundle/shell/deeplake-shell.js +679 -112
- package/cursor/bundle/wiki-worker.js +571 -0
- package/hermes/bundle/capture.js +771 -58
- package/hermes/bundle/commands/auth-login.js +209 -66
- package/hermes/bundle/embeddings/embed-daemon.js +243 -0
- package/hermes/bundle/pre-tool-use.js +560 -69
- package/hermes/bundle/session-end.js +224 -1
- package/hermes/bundle/session-start.js +195 -54
- package/hermes/bundle/shell/deeplake-shell.js +679 -112
- package/hermes/bundle/wiki-worker.js +572 -0
- package/mcp/bundle/server.js +253 -68
- package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
- package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
- package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
- package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
- package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
- package/openclaw/dist/index.js +752 -702
- package/openclaw/openclaw.plugin.json +1 -1
- package/openclaw/package.json +1 -1
- package/package.json +2 -1
- package/pi/extension-source/hivemind.ts +473 -21
|
@@ -1,10 +1,62 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __esm = (fn, res) => function __init() {
|
|
5
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
6
|
+
};
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
// dist/src/index-marker-store.js
|
|
13
|
+
var index_marker_store_exports = {};
|
|
14
|
+
__export(index_marker_store_exports, {
|
|
15
|
+
buildIndexMarkerPath: () => buildIndexMarkerPath,
|
|
16
|
+
getIndexMarkerDir: () => getIndexMarkerDir,
|
|
17
|
+
hasFreshIndexMarker: () => hasFreshIndexMarker,
|
|
18
|
+
writeIndexMarker: () => writeIndexMarker
|
|
19
|
+
});
|
|
20
|
+
import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
21
|
+
import { join as join3 } from "node:path";
|
|
22
|
+
import { tmpdir } from "node:os";
|
|
23
|
+
function getIndexMarkerDir() {
|
|
24
|
+
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
|
|
25
|
+
}
|
|
26
|
+
function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
|
|
27
|
+
const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
28
|
+
return join3(getIndexMarkerDir(), `${markerKey}.json`);
|
|
29
|
+
}
|
|
30
|
+
function hasFreshIndexMarker(markerPath) {
|
|
31
|
+
if (!existsSync2(markerPath))
|
|
32
|
+
return false;
|
|
33
|
+
try {
|
|
34
|
+
const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
|
|
35
|
+
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
36
|
+
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
37
|
+
return false;
|
|
38
|
+
return true;
|
|
39
|
+
} catch {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
function writeIndexMarker(markerPath) {
|
|
44
|
+
mkdirSync(getIndexMarkerDir(), { recursive: true });
|
|
45
|
+
writeFileSync(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
46
|
+
}
|
|
47
|
+
var INDEX_MARKER_TTL_MS;
|
|
48
|
+
var init_index_marker_store = __esm({
|
|
49
|
+
"dist/src/index-marker-store.js"() {
|
|
50
|
+
"use strict";
|
|
51
|
+
INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
2
54
|
|
|
3
55
|
// dist/src/hooks/codex/pre-tool-use.js
|
|
4
56
|
import { execFileSync } from "node:child_process";
|
|
5
|
-
import { existsSync as
|
|
6
|
-
import { join as
|
|
7
|
-
import { fileURLToPath as
|
|
57
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
58
|
+
import { join as join9, dirname as dirname2 } from "node:path";
|
|
59
|
+
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
8
60
|
|
|
9
61
|
// dist/src/utils/stdin.js
|
|
10
62
|
function readStdin() {
|
|
@@ -57,9 +109,6 @@ function loadConfig() {
|
|
|
57
109
|
|
|
58
110
|
// dist/src/deeplake-api.js
|
|
59
111
|
import { randomUUID } from "node:crypto";
|
|
60
|
-
import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
61
|
-
import { join as join3 } from "node:path";
|
|
62
|
-
import { tmpdir } from "node:os";
|
|
63
112
|
|
|
64
113
|
// dist/src/utils/debug.js
|
|
65
114
|
import { appendFileSync } from "node:fs";
|
|
@@ -82,7 +131,26 @@ function sqlLike(value) {
|
|
|
82
131
|
return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_");
|
|
83
132
|
}
|
|
84
133
|
|
|
134
|
+
// dist/src/embeddings/columns.js
|
|
135
|
+
var SUMMARY_EMBEDDING_COL = "summary_embedding";
|
|
136
|
+
var MESSAGE_EMBEDDING_COL = "message_embedding";
|
|
137
|
+
|
|
138
|
+
// dist/src/utils/client-header.js
|
|
139
|
+
var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
|
|
140
|
+
function deeplakeClientValue() {
|
|
141
|
+
return "hivemind";
|
|
142
|
+
}
|
|
143
|
+
function deeplakeClientHeader() {
|
|
144
|
+
return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
|
|
145
|
+
}
|
|
146
|
+
|
|
85
147
|
// dist/src/deeplake-api.js
|
|
148
|
+
var indexMarkerStorePromise = null;
|
|
149
|
+
function getIndexMarkerStore() {
|
|
150
|
+
if (!indexMarkerStorePromise)
|
|
151
|
+
indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
|
|
152
|
+
return indexMarkerStorePromise;
|
|
153
|
+
}
|
|
86
154
|
var log2 = (msg) => log("sdk", msg);
|
|
87
155
|
function summarizeSql(sql, maxLen = 220) {
|
|
88
156
|
const compact = sql.replace(/\s+/g, " ").trim();
|
|
@@ -102,7 +170,6 @@ var MAX_RETRIES = 3;
|
|
|
102
170
|
var BASE_DELAY_MS = 500;
|
|
103
171
|
var MAX_CONCURRENCY = 5;
|
|
104
172
|
var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
|
|
105
|
-
var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
106
173
|
function sleep(ms) {
|
|
107
174
|
return new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
108
175
|
}
|
|
@@ -122,9 +189,6 @@ function isTransientHtml403(text) {
|
|
|
122
189
|
const body = text.toLowerCase();
|
|
123
190
|
return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
|
|
124
191
|
}
|
|
125
|
-
function getIndexMarkerDir() {
|
|
126
|
-
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
|
|
127
|
-
}
|
|
128
192
|
var Semaphore = class {
|
|
129
193
|
max;
|
|
130
194
|
waiting = [];
|
|
@@ -193,7 +257,8 @@ var DeeplakeApi = class {
|
|
|
193
257
|
headers: {
|
|
194
258
|
Authorization: `Bearer ${this.token}`,
|
|
195
259
|
"Content-Type": "application/json",
|
|
196
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
260
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
261
|
+
...deeplakeClientHeader()
|
|
197
262
|
},
|
|
198
263
|
signal,
|
|
199
264
|
body: JSON.stringify({ query: sql })
|
|
@@ -220,7 +285,8 @@ var DeeplakeApi = class {
|
|
|
220
285
|
}
|
|
221
286
|
const text = await resp.text().catch(() => "");
|
|
222
287
|
const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
|
|
223
|
-
|
|
288
|
+
const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
|
|
289
|
+
if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
|
|
224
290
|
const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
|
|
225
291
|
log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
|
|
226
292
|
await sleep(delay);
|
|
@@ -254,7 +320,7 @@ var DeeplakeApi = class {
|
|
|
254
320
|
const lud = row.lastUpdateDate ?? ts;
|
|
255
321
|
const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
|
|
256
322
|
if (exists.length > 0) {
|
|
257
|
-
let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
323
|
+
let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
258
324
|
if (row.project !== void 0)
|
|
259
325
|
setClauses += `, project = '${sqlStr(row.project)}'`;
|
|
260
326
|
if (row.description !== void 0)
|
|
@@ -262,8 +328,8 @@ var DeeplakeApi = class {
|
|
|
262
328
|
await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
|
|
263
329
|
} else {
|
|
264
330
|
const id = randomUUID();
|
|
265
|
-
let cols =
|
|
266
|
-
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
331
|
+
let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
|
|
332
|
+
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
267
333
|
if (row.project !== void 0) {
|
|
268
334
|
cols += ", project";
|
|
269
335
|
vals += `, '${sqlStr(row.project)}'`;
|
|
@@ -288,48 +354,83 @@ var DeeplakeApi = class {
|
|
|
288
354
|
buildLookupIndexName(table, suffix) {
|
|
289
355
|
return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
290
356
|
}
|
|
291
|
-
getLookupIndexMarkerPath(table, suffix) {
|
|
292
|
-
const markerKey = [
|
|
293
|
-
this.workspaceId,
|
|
294
|
-
this.orgId,
|
|
295
|
-
table,
|
|
296
|
-
suffix
|
|
297
|
-
].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
298
|
-
return join3(getIndexMarkerDir(), `${markerKey}.json`);
|
|
299
|
-
}
|
|
300
|
-
hasFreshLookupIndexMarker(table, suffix) {
|
|
301
|
-
const markerPath = this.getLookupIndexMarkerPath(table, suffix);
|
|
302
|
-
if (!existsSync2(markerPath))
|
|
303
|
-
return false;
|
|
304
|
-
try {
|
|
305
|
-
const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
|
|
306
|
-
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
307
|
-
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
308
|
-
return false;
|
|
309
|
-
return true;
|
|
310
|
-
} catch {
|
|
311
|
-
return false;
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
markLookupIndexReady(table, suffix) {
|
|
315
|
-
mkdirSync(getIndexMarkerDir(), { recursive: true });
|
|
316
|
-
writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
317
|
-
}
|
|
318
357
|
async ensureLookupIndex(table, suffix, columnsSql) {
|
|
319
|
-
|
|
358
|
+
const markers = await getIndexMarkerStore();
|
|
359
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
|
|
360
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
320
361
|
return;
|
|
321
362
|
const indexName = this.buildLookupIndexName(table, suffix);
|
|
322
363
|
try {
|
|
323
364
|
await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
|
|
324
|
-
|
|
365
|
+
markers.writeIndexMarker(markerPath);
|
|
325
366
|
} catch (e) {
|
|
326
367
|
if (isDuplicateIndexError(e)) {
|
|
327
|
-
|
|
368
|
+
markers.writeIndexMarker(markerPath);
|
|
328
369
|
return;
|
|
329
370
|
}
|
|
330
371
|
log2(`index "${indexName}" skipped: ${e.message}`);
|
|
331
372
|
}
|
|
332
373
|
}
|
|
374
|
+
/**
|
|
375
|
+
* Ensure a vector column exists on the given table.
|
|
376
|
+
*
|
|
377
|
+
* The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
|
|
378
|
+
* EXISTS …` on every SessionStart. On a long-running workspace that's
|
|
379
|
+
* already migrated, every call returns 500 "Column already exists" — noisy
|
|
380
|
+
* in the log and a wasted round-trip. Worse, the very first call after the
|
|
381
|
+
* column is genuinely added triggers Deeplake's post-ALTER `vector::at`
|
|
382
|
+
* window (~30s) during which subsequent INSERTs fail; minimising the
|
|
383
|
+
* number of ALTER calls minimises exposure to that window.
|
|
384
|
+
*
|
|
385
|
+
* New flow:
|
|
386
|
+
* 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
|
|
387
|
+
* return — zero network calls.
|
|
388
|
+
* 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
|
|
389
|
+
* column_name = C. Read-only, idempotent, can't tickle the post-ALTER
|
|
390
|
+
* bug. If the column is present → mark + return.
|
|
391
|
+
* 3. Only if step 2 says the column is missing, fall back to ALTER ADD
|
|
392
|
+
* COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
|
|
393
|
+
* "already exists" (race: another client added it between our SELECT
|
|
394
|
+
* and ALTER).
|
|
395
|
+
*
|
|
396
|
+
* Marker uses the same dir / TTL as ensureLookupIndex so both schema
|
|
397
|
+
* caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
|
|
398
|
+
*/
|
|
399
|
+
async ensureEmbeddingColumn(table, column) {
|
|
400
|
+
await this.ensureColumn(table, column, "FLOAT4[]");
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Generic marker-gated column migration. Same SELECT-then-ALTER flow as
|
|
404
|
+
* ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
|
|
405
|
+
* column that was added to the schema after the table was originally
|
|
406
|
+
* created. Used today for `summary_embedding`, `message_embedding`, and
|
|
407
|
+
* the `agent` column (added 2026-04-11) — the latter has no fallback if
|
|
408
|
+
* a user upgraded over a pre-2026-04-11 table, so every INSERT fails
|
|
409
|
+
* with `column "agent" does not exist`.
|
|
410
|
+
*/
|
|
411
|
+
async ensureColumn(table, column, sqlType) {
|
|
412
|
+
const markers = await getIndexMarkerStore();
|
|
413
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
|
|
414
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
415
|
+
return;
|
|
416
|
+
const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
|
|
417
|
+
const rows = await this.query(colCheck);
|
|
418
|
+
if (rows.length > 0) {
|
|
419
|
+
markers.writeIndexMarker(markerPath);
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
422
|
+
try {
|
|
423
|
+
await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
|
|
424
|
+
} catch (e) {
|
|
425
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
426
|
+
if (!/already exists/i.test(msg))
|
|
427
|
+
throw e;
|
|
428
|
+
const recheck = await this.query(colCheck);
|
|
429
|
+
if (recheck.length === 0)
|
|
430
|
+
throw e;
|
|
431
|
+
}
|
|
432
|
+
markers.writeIndexMarker(markerPath);
|
|
433
|
+
}
|
|
333
434
|
/** List all tables in the workspace (with retry). */
|
|
334
435
|
async listTables(forceRefresh = false) {
|
|
335
436
|
if (!forceRefresh && this._tablesCache)
|
|
@@ -345,7 +446,8 @@ var DeeplakeApi = class {
|
|
|
345
446
|
const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
|
|
346
447
|
headers: {
|
|
347
448
|
Authorization: `Bearer ${this.token}`,
|
|
348
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
449
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
450
|
+
...deeplakeClientHeader()
|
|
349
451
|
}
|
|
350
452
|
});
|
|
351
453
|
if (resp.ok) {
|
|
@@ -370,28 +472,60 @@ var DeeplakeApi = class {
|
|
|
370
472
|
}
|
|
371
473
|
return { tables: [], cacheable: false };
|
|
372
474
|
}
|
|
475
|
+
/**
|
|
476
|
+
* Run a `CREATE TABLE` with an extra outer retry budget. The base
|
|
477
|
+
* `query()` already retries 3 times on fetch errors (~3.5s total), but a
|
|
478
|
+
* failed CREATE is permanent corruption — every subsequent SELECT against
|
|
479
|
+
* the missing table fails. Wrapping in an outer loop with longer backoff
|
|
480
|
+
* (2s, 5s, then 10s) gives us ~17s of reach across transient network
|
|
481
|
+
* blips before giving up. Failures still propagate; getApi() resets its
|
|
482
|
+
* cache on init failure (openclaw plugin) so the next call retries the
|
|
483
|
+
* whole init flow.
|
|
484
|
+
*/
|
|
485
|
+
async createTableWithRetry(sql, label) {
|
|
486
|
+
const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
|
|
487
|
+
let lastErr = null;
|
|
488
|
+
for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
|
|
489
|
+
try {
|
|
490
|
+
await this.query(sql);
|
|
491
|
+
return;
|
|
492
|
+
} catch (err) {
|
|
493
|
+
lastErr = err;
|
|
494
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
495
|
+
log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
|
|
496
|
+
if (attempt < OUTER_BACKOFFS_MS.length) {
|
|
497
|
+
await sleep(OUTER_BACKOFFS_MS[attempt]);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
throw lastErr;
|
|
502
|
+
}
|
|
373
503
|
/** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
|
|
374
504
|
async ensureTable(name) {
|
|
375
505
|
const tbl = name ?? this.tableName;
|
|
376
506
|
const tables = await this.listTables();
|
|
377
507
|
if (!tables.includes(tbl)) {
|
|
378
508
|
log2(`table "${tbl}" not found, creating`);
|
|
379
|
-
await this.
|
|
509
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
|
|
380
510
|
log2(`table "${tbl}" created`);
|
|
381
511
|
if (!tables.includes(tbl))
|
|
382
512
|
this._tablesCache = [...tables, tbl];
|
|
383
513
|
}
|
|
514
|
+
await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
|
|
515
|
+
await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
384
516
|
}
|
|
385
517
|
/** Create the sessions table (uses JSONB for message since every row is a JSON event). */
|
|
386
518
|
async ensureSessionsTable(name) {
|
|
387
519
|
const tables = await this.listTables();
|
|
388
520
|
if (!tables.includes(name)) {
|
|
389
521
|
log2(`table "${name}" not found, creating`);
|
|
390
|
-
await this.
|
|
522
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, name);
|
|
391
523
|
log2(`table "${name}" created`);
|
|
392
524
|
if (!tables.includes(name))
|
|
393
525
|
this._tablesCache = [...tables, name];
|
|
394
526
|
}
|
|
527
|
+
await this.ensureEmbeddingColumn(name, MESSAGE_EMBEDDING_COL);
|
|
528
|
+
await this.ensureColumn(name, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
395
529
|
await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
|
|
396
530
|
}
|
|
397
531
|
};
|
|
@@ -560,24 +694,25 @@ function normalizeContent(path, raw) {
|
|
|
560
694
|
return raw;
|
|
561
695
|
}
|
|
562
696
|
if (Array.isArray(obj.turns)) {
|
|
563
|
-
const
|
|
564
|
-
if (obj.date_time)
|
|
565
|
-
header.push(`date: ${obj.date_time}`);
|
|
566
|
-
if (obj.speakers) {
|
|
567
|
-
const s = obj.speakers;
|
|
568
|
-
const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", ");
|
|
569
|
-
if (names)
|
|
570
|
-
header.push(`speakers: ${names}`);
|
|
571
|
-
}
|
|
697
|
+
const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
|
|
572
698
|
const lines = obj.turns.map((t) => {
|
|
573
699
|
const sp = String(t?.speaker ?? t?.name ?? "?").trim();
|
|
574
700
|
const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim();
|
|
575
701
|
const tag = t?.dia_id ? `[${t.dia_id}] ` : "";
|
|
576
|
-
return `${tag}${sp}: ${tx}`;
|
|
702
|
+
return `${dateHeader}${tag}${sp}: ${tx}`;
|
|
577
703
|
});
|
|
578
|
-
const out2 =
|
|
704
|
+
const out2 = lines.join("\n");
|
|
579
705
|
return out2.trim() ? out2 : raw;
|
|
580
706
|
}
|
|
707
|
+
if (obj.turn && typeof obj.turn === "object" && !Array.isArray(obj.turn)) {
|
|
708
|
+
const t = obj.turn;
|
|
709
|
+
const sp = String(t.speaker ?? t.name ?? "?").trim();
|
|
710
|
+
const tx = String(t.text ?? t.content ?? "").replace(/\s+/g, " ").trim();
|
|
711
|
+
const tag = t.dia_id ? `[${String(t.dia_id)}] ` : "";
|
|
712
|
+
const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
|
|
713
|
+
const line = `${dateHeader}${tag}${sp}: ${tx}`;
|
|
714
|
+
return line.trim() ? line : raw;
|
|
715
|
+
}
|
|
581
716
|
const stripRecalled = (t) => {
|
|
582
717
|
const i = t.indexOf("<recalled-memories>");
|
|
583
718
|
if (i === -1)
|
|
@@ -620,8 +755,38 @@ function buildPathCondition(targetPath) {
|
|
|
620
755
|
return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
|
|
621
756
|
}
|
|
622
757
|
async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
|
|
623
|
-
const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
|
|
758
|
+
const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, queryEmbedding, multiWordPatterns } = opts;
|
|
624
759
|
const limit = opts.limit ?? 100;
|
|
760
|
+
if (queryEmbedding && queryEmbedding.length > 0) {
|
|
761
|
+
const vecLit = serializeFloat4Array(queryEmbedding);
|
|
762
|
+
const semanticLimit = Math.min(limit, Number(process.env.HIVEMIND_SEMANTIC_LIMIT ?? "20"));
|
|
763
|
+
const lexicalLimit = Math.min(limit, Number(process.env.HIVEMIND_HYBRID_LEXICAL_LIMIT ?? "20"));
|
|
764
|
+
const filterPatternsForLex = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
|
|
765
|
+
const memLexFilter = buildContentFilter("summary::text", likeOp, filterPatternsForLex);
|
|
766
|
+
const sessLexFilter = buildContentFilter("message::text", likeOp, filterPatternsForLex);
|
|
767
|
+
const memLexQuery = memLexFilter ? `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, 1.0 AS score FROM "${memoryTable}" WHERE 1=1${pathFilter}${memLexFilter} LIMIT ${lexicalLimit}` : null;
|
|
768
|
+
const sessLexQuery = sessLexFilter ? `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, 1.0 AS score FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessLexFilter} LIMIT ${lexicalLimit}` : null;
|
|
769
|
+
const memSemQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, (summary_embedding <#> ${vecLit}) AS score FROM "${memoryTable}" WHERE ARRAY_LENGTH(summary_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
|
|
770
|
+
const sessSemQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, (message_embedding <#> ${vecLit}) AS score FROM "${sessionsTable}" WHERE ARRAY_LENGTH(message_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
|
|
771
|
+
const parts = [memSemQuery, sessSemQuery];
|
|
772
|
+
if (memLexQuery)
|
|
773
|
+
parts.push(memLexQuery);
|
|
774
|
+
if (sessLexQuery)
|
|
775
|
+
parts.push(sessLexQuery);
|
|
776
|
+
const unionSql = parts.map((q) => `(${q})`).join(" UNION ALL ");
|
|
777
|
+
const outerLimit = semanticLimit + lexicalLimit;
|
|
778
|
+
const rows2 = await api.query(`SELECT path, content, source_order, creation_date, score FROM (` + unionSql + `) AS combined ORDER BY score DESC LIMIT ${outerLimit}`);
|
|
779
|
+
const seen = /* @__PURE__ */ new Set();
|
|
780
|
+
const unique = [];
|
|
781
|
+
for (const row of rows2) {
|
|
782
|
+
const p = String(row["path"]);
|
|
783
|
+
if (seen.has(p))
|
|
784
|
+
continue;
|
|
785
|
+
seen.add(p);
|
|
786
|
+
unique.push({ path: p, content: String(row["content"] ?? "") });
|
|
787
|
+
}
|
|
788
|
+
return unique;
|
|
789
|
+
}
|
|
625
790
|
const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
|
|
626
791
|
const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
|
|
627
792
|
const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
|
|
@@ -633,6 +798,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
|
|
|
633
798
|
content: String(row["content"] ?? "")
|
|
634
799
|
}));
|
|
635
800
|
}
|
|
801
|
+
function serializeFloat4Array(vec) {
|
|
802
|
+
const parts = [];
|
|
803
|
+
for (const v of vec) {
|
|
804
|
+
if (!Number.isFinite(v))
|
|
805
|
+
return "NULL";
|
|
806
|
+
parts.push(String(v));
|
|
807
|
+
}
|
|
808
|
+
return `ARRAY[${parts.join(",")}]::float4[]`;
|
|
809
|
+
}
|
|
636
810
|
function buildPathFilter(targetPath) {
|
|
637
811
|
const condition = buildPathCondition(targetPath);
|
|
638
812
|
return condition ? ` AND ${condition}` : "";
|
|
@@ -715,7 +889,7 @@ function buildGrepSearchOptions(params, targetPath) {
|
|
|
715
889
|
return {
|
|
716
890
|
pathFilter: buildPathFilter(targetPath),
|
|
717
891
|
contentScanOnly: hasRegexMeta,
|
|
718
|
-
likeOp:
|
|
892
|
+
likeOp: process.env.HIVEMIND_GREP_LIKE === "case-sensitive" ? "LIKE" : "ILIKE",
|
|
719
893
|
escapedPattern: sqlLike(params.pattern),
|
|
720
894
|
prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
|
|
721
895
|
prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
|
|
@@ -770,11 +944,28 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) {
|
|
|
770
944
|
}
|
|
771
945
|
return output;
|
|
772
946
|
}
|
|
773
|
-
async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) {
|
|
774
|
-
const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable,
|
|
947
|
+
async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, queryEmbedding) {
|
|
948
|
+
const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, {
|
|
949
|
+
...buildGrepSearchOptions(params, targetPath),
|
|
950
|
+
queryEmbedding
|
|
951
|
+
});
|
|
775
952
|
const seen = /* @__PURE__ */ new Set();
|
|
776
953
|
const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true));
|
|
777
954
|
const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) }));
|
|
955
|
+
if (queryEmbedding && queryEmbedding.length > 0) {
|
|
956
|
+
const emitAllLines = process.env.HIVEMIND_SEMANTIC_EMIT_ALL !== "false";
|
|
957
|
+
if (emitAllLines) {
|
|
958
|
+
const lines = [];
|
|
959
|
+
for (const r of normalized) {
|
|
960
|
+
for (const line of r.content.split("\n")) {
|
|
961
|
+
const trimmed = line.trim();
|
|
962
|
+
if (trimmed)
|
|
963
|
+
lines.push(`${r.path}:${line}`);
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
return lines;
|
|
967
|
+
}
|
|
968
|
+
}
|
|
778
969
|
return refineGrepMatches(normalized, params);
|
|
779
970
|
}
|
|
780
971
|
|
|
@@ -818,7 +1009,298 @@ function capOutputForClaude(output, options = {}) {
|
|
|
818
1009
|
return keptLines.join("\n") + footer;
|
|
819
1010
|
}
|
|
820
1011
|
|
|
1012
|
+
// dist/src/embeddings/client.js
|
|
1013
|
+
import { connect } from "node:net";
|
|
1014
|
+
import { spawn } from "node:child_process";
|
|
1015
|
+
import { openSync, closeSync, writeSync, unlinkSync, existsSync as existsSync3, readFileSync as readFileSync3 } from "node:fs";
|
|
1016
|
+
import { homedir as homedir3 } from "node:os";
|
|
1017
|
+
import { join as join4 } from "node:path";
|
|
1018
|
+
|
|
1019
|
+
// dist/src/embeddings/protocol.js
|
|
1020
|
+
var DEFAULT_SOCKET_DIR = "/tmp";
|
|
1021
|
+
var DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
1022
|
+
var DEFAULT_CLIENT_TIMEOUT_MS = 2e3;
|
|
1023
|
+
function socketPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
|
|
1024
|
+
return `${dir}/hivemind-embed-${uid}.sock`;
|
|
1025
|
+
}
|
|
1026
|
+
function pidPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
|
|
1027
|
+
return `${dir}/hivemind-embed-${uid}.pid`;
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
// dist/src/embeddings/client.js
|
|
1031
|
+
var SHARED_DAEMON_PATH = join4(homedir3(), ".hivemind", "embed-deps", "embed-daemon.js");
|
|
1032
|
+
var log3 = (m) => log("embed-client", m);
|
|
1033
|
+
function getUid() {
|
|
1034
|
+
const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
|
|
1035
|
+
return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
|
|
1036
|
+
}
|
|
1037
|
+
var EmbedClient = class {
|
|
1038
|
+
socketPath;
|
|
1039
|
+
pidPath;
|
|
1040
|
+
timeoutMs;
|
|
1041
|
+
daemonEntry;
|
|
1042
|
+
autoSpawn;
|
|
1043
|
+
spawnWaitMs;
|
|
1044
|
+
nextId = 0;
|
|
1045
|
+
constructor(opts = {}) {
|
|
1046
|
+
const uid = getUid();
|
|
1047
|
+
const dir = opts.socketDir ?? "/tmp";
|
|
1048
|
+
this.socketPath = socketPathFor(uid, dir);
|
|
1049
|
+
this.pidPath = pidPathFor(uid, dir);
|
|
1050
|
+
this.timeoutMs = opts.timeoutMs ?? DEFAULT_CLIENT_TIMEOUT_MS;
|
|
1051
|
+
this.daemonEntry = opts.daemonEntry ?? process.env.HIVEMIND_EMBED_DAEMON ?? (existsSync3(SHARED_DAEMON_PATH) ? SHARED_DAEMON_PATH : void 0);
|
|
1052
|
+
this.autoSpawn = opts.autoSpawn ?? true;
|
|
1053
|
+
this.spawnWaitMs = opts.spawnWaitMs ?? 5e3;
|
|
1054
|
+
}
|
|
1055
|
+
/**
|
|
1056
|
+
* Returns an embedding vector, or null on timeout/failure. Hooks MUST treat
|
|
1057
|
+
* null as "skip embedding column" — never block the write path on us.
|
|
1058
|
+
*
|
|
1059
|
+
* Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
|
|
1060
|
+
* null AND kicks off a background spawn. The next call finds a ready daemon.
|
|
1061
|
+
*/
|
|
1062
|
+
async embed(text, kind = "document") {
|
|
1063
|
+
let sock;
|
|
1064
|
+
try {
|
|
1065
|
+
sock = await this.connectOnce();
|
|
1066
|
+
} catch {
|
|
1067
|
+
if (this.autoSpawn)
|
|
1068
|
+
this.trySpawnDaemon();
|
|
1069
|
+
return null;
|
|
1070
|
+
}
|
|
1071
|
+
try {
|
|
1072
|
+
const id = String(++this.nextId);
|
|
1073
|
+
const req = { op: "embed", id, kind, text };
|
|
1074
|
+
const resp = await this.sendAndWait(sock, req);
|
|
1075
|
+
if (resp.error || !("embedding" in resp) || !resp.embedding) {
|
|
1076
|
+
log3(`embed err: ${resp.error ?? "no embedding"}`);
|
|
1077
|
+
return null;
|
|
1078
|
+
}
|
|
1079
|
+
return resp.embedding;
|
|
1080
|
+
} catch (e) {
|
|
1081
|
+
const err = e instanceof Error ? e.message : String(e);
|
|
1082
|
+
log3(`embed failed: ${err}`);
|
|
1083
|
+
return null;
|
|
1084
|
+
} finally {
|
|
1085
|
+
try {
|
|
1086
|
+
sock.end();
|
|
1087
|
+
} catch {
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
/**
|
|
1092
|
+
* Wait up to spawnWaitMs for the daemon to accept connections, spawning if
|
|
1093
|
+
* necessary. Meant for SessionStart / long-running batches — not the hot path.
|
|
1094
|
+
*/
|
|
1095
|
+
async warmup() {
|
|
1096
|
+
try {
|
|
1097
|
+
const s = await this.connectOnce();
|
|
1098
|
+
s.end();
|
|
1099
|
+
return true;
|
|
1100
|
+
} catch {
|
|
1101
|
+
if (!this.autoSpawn)
|
|
1102
|
+
return false;
|
|
1103
|
+
this.trySpawnDaemon();
|
|
1104
|
+
try {
|
|
1105
|
+
const s = await this.waitForSocket();
|
|
1106
|
+
s.end();
|
|
1107
|
+
return true;
|
|
1108
|
+
} catch {
|
|
1109
|
+
return false;
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
connectOnce() {
|
|
1114
|
+
return new Promise((resolve2, reject) => {
|
|
1115
|
+
const sock = connect(this.socketPath);
|
|
1116
|
+
const to = setTimeout(() => {
|
|
1117
|
+
sock.destroy();
|
|
1118
|
+
reject(new Error("connect timeout"));
|
|
1119
|
+
}, this.timeoutMs);
|
|
1120
|
+
sock.once("connect", () => {
|
|
1121
|
+
clearTimeout(to);
|
|
1122
|
+
resolve2(sock);
|
|
1123
|
+
});
|
|
1124
|
+
sock.once("error", (e) => {
|
|
1125
|
+
clearTimeout(to);
|
|
1126
|
+
reject(e);
|
|
1127
|
+
});
|
|
1128
|
+
});
|
|
1129
|
+
}
|
|
1130
|
+
trySpawnDaemon() {
|
|
1131
|
+
let fd;
|
|
1132
|
+
try {
|
|
1133
|
+
fd = openSync(this.pidPath, "wx", 384);
|
|
1134
|
+
writeSync(fd, String(process.pid));
|
|
1135
|
+
} catch (e) {
|
|
1136
|
+
if (this.isPidFileStale()) {
|
|
1137
|
+
try {
|
|
1138
|
+
unlinkSync(this.pidPath);
|
|
1139
|
+
} catch {
|
|
1140
|
+
}
|
|
1141
|
+
try {
|
|
1142
|
+
fd = openSync(this.pidPath, "wx", 384);
|
|
1143
|
+
writeSync(fd, String(process.pid));
|
|
1144
|
+
} catch {
|
|
1145
|
+
return;
|
|
1146
|
+
}
|
|
1147
|
+
} else {
|
|
1148
|
+
return;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
if (!this.daemonEntry || !existsSync3(this.daemonEntry)) {
|
|
1152
|
+
log3(`daemonEntry not configured or missing: ${this.daemonEntry}`);
|
|
1153
|
+
try {
|
|
1154
|
+
closeSync(fd);
|
|
1155
|
+
unlinkSync(this.pidPath);
|
|
1156
|
+
} catch {
|
|
1157
|
+
}
|
|
1158
|
+
return;
|
|
1159
|
+
}
|
|
1160
|
+
try {
|
|
1161
|
+
const child = spawn(process.execPath, [this.daemonEntry], {
|
|
1162
|
+
detached: true,
|
|
1163
|
+
stdio: "ignore",
|
|
1164
|
+
env: process.env
|
|
1165
|
+
});
|
|
1166
|
+
child.unref();
|
|
1167
|
+
log3(`spawned daemon pid=${child.pid}`);
|
|
1168
|
+
} finally {
|
|
1169
|
+
closeSync(fd);
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
isPidFileStale() {
|
|
1173
|
+
try {
|
|
1174
|
+
const raw = readFileSync3(this.pidPath, "utf-8").trim();
|
|
1175
|
+
const pid = Number(raw);
|
|
1176
|
+
if (!pid || Number.isNaN(pid))
|
|
1177
|
+
return true;
|
|
1178
|
+
try {
|
|
1179
|
+
process.kill(pid, 0);
|
|
1180
|
+
return false;
|
|
1181
|
+
} catch {
|
|
1182
|
+
return true;
|
|
1183
|
+
}
|
|
1184
|
+
} catch {
|
|
1185
|
+
return true;
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
async waitForSocket() {
|
|
1189
|
+
const deadline = Date.now() + this.spawnWaitMs;
|
|
1190
|
+
let delay = 30;
|
|
1191
|
+
while (Date.now() < deadline) {
|
|
1192
|
+
await sleep2(delay);
|
|
1193
|
+
delay = Math.min(delay * 1.5, 300);
|
|
1194
|
+
if (!existsSync3(this.socketPath))
|
|
1195
|
+
continue;
|
|
1196
|
+
try {
|
|
1197
|
+
return await this.connectOnce();
|
|
1198
|
+
} catch {
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
throw new Error("daemon did not become ready within spawnWaitMs");
|
|
1202
|
+
}
|
|
1203
|
+
sendAndWait(sock, req) {
|
|
1204
|
+
return new Promise((resolve2, reject) => {
|
|
1205
|
+
let buf = "";
|
|
1206
|
+
const to = setTimeout(() => {
|
|
1207
|
+
sock.destroy();
|
|
1208
|
+
reject(new Error("request timeout"));
|
|
1209
|
+
}, this.timeoutMs);
|
|
1210
|
+
sock.setEncoding("utf-8");
|
|
1211
|
+
sock.on("data", (chunk) => {
|
|
1212
|
+
buf += chunk;
|
|
1213
|
+
const nl = buf.indexOf("\n");
|
|
1214
|
+
if (nl === -1)
|
|
1215
|
+
return;
|
|
1216
|
+
const line = buf.slice(0, nl);
|
|
1217
|
+
clearTimeout(to);
|
|
1218
|
+
try {
|
|
1219
|
+
resolve2(JSON.parse(line));
|
|
1220
|
+
} catch (e) {
|
|
1221
|
+
reject(e);
|
|
1222
|
+
}
|
|
1223
|
+
});
|
|
1224
|
+
sock.on("error", (e) => {
|
|
1225
|
+
clearTimeout(to);
|
|
1226
|
+
reject(e);
|
|
1227
|
+
});
|
|
1228
|
+
sock.on("end", () => {
|
|
1229
|
+
clearTimeout(to);
|
|
1230
|
+
reject(new Error("connection closed without response"));
|
|
1231
|
+
});
|
|
1232
|
+
sock.write(JSON.stringify(req) + "\n");
|
|
1233
|
+
});
|
|
1234
|
+
}
|
|
1235
|
+
};
|
|
1236
|
+
function sleep2(ms) {
|
|
1237
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
// dist/src/embeddings/disable.js
|
|
1241
|
+
import { createRequire } from "node:module";
|
|
1242
|
+
import { homedir as homedir4 } from "node:os";
|
|
1243
|
+
import { join as join5 } from "node:path";
|
|
1244
|
+
import { pathToFileURL } from "node:url";
|
|
1245
|
+
var cachedStatus = null;
|
|
1246
|
+
function defaultResolveTransformers() {
|
|
1247
|
+
try {
|
|
1248
|
+
createRequire(import.meta.url).resolve("@huggingface/transformers");
|
|
1249
|
+
return;
|
|
1250
|
+
} catch {
|
|
1251
|
+
}
|
|
1252
|
+
const sharedDir = join5(homedir4(), ".hivemind", "embed-deps");
|
|
1253
|
+
createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
|
|
1254
|
+
}
|
|
1255
|
+
var _resolve = defaultResolveTransformers;
|
|
1256
|
+
function detectStatus() {
|
|
1257
|
+
if (process.env.HIVEMIND_EMBEDDINGS === "false")
|
|
1258
|
+
return "env-disabled";
|
|
1259
|
+
try {
|
|
1260
|
+
_resolve();
|
|
1261
|
+
return "enabled";
|
|
1262
|
+
} catch {
|
|
1263
|
+
return "no-transformers";
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
function embeddingsStatus() {
|
|
1267
|
+
if (cachedStatus !== null)
|
|
1268
|
+
return cachedStatus;
|
|
1269
|
+
cachedStatus = detectStatus();
|
|
1270
|
+
return cachedStatus;
|
|
1271
|
+
}
|
|
1272
|
+
function embeddingsDisabled() {
|
|
1273
|
+
return embeddingsStatus() !== "enabled";
|
|
1274
|
+
}
|
|
1275
|
+
|
|
821
1276
|
// dist/src/hooks/grep-direct.js
|
|
1277
|
+
import { fileURLToPath } from "node:url";
|
|
1278
|
+
import { dirname, join as join6 } from "node:path";
|
|
1279
|
+
var SEMANTIC_ENABLED = process.env.HIVEMIND_SEMANTIC_SEARCH !== "false" && !embeddingsDisabled();
|
|
1280
|
+
var SEMANTIC_TIMEOUT_MS = Number(process.env.HIVEMIND_SEMANTIC_EMBED_TIMEOUT_MS ?? "500");
|
|
1281
|
+
function resolveDaemonPath() {
|
|
1282
|
+
return join6(dirname(fileURLToPath(import.meta.url)), "..", "embeddings", "embed-daemon.js");
|
|
1283
|
+
}
|
|
1284
|
+
var sharedEmbedClient = null;
|
|
1285
|
+
function getEmbedClient() {
|
|
1286
|
+
if (!sharedEmbedClient) {
|
|
1287
|
+
sharedEmbedClient = new EmbedClient({
|
|
1288
|
+
daemonEntry: resolveDaemonPath(),
|
|
1289
|
+
timeoutMs: SEMANTIC_TIMEOUT_MS
|
|
1290
|
+
});
|
|
1291
|
+
}
|
|
1292
|
+
return sharedEmbedClient;
|
|
1293
|
+
}
|
|
1294
|
+
function patternIsSemanticFriendly(pattern, fixedString) {
|
|
1295
|
+
if (!pattern || pattern.length < 2)
|
|
1296
|
+
return false;
|
|
1297
|
+
if (fixedString)
|
|
1298
|
+
return true;
|
|
1299
|
+
const meta = pattern.match(/[|()\[\]{}+?^$\\]/g);
|
|
1300
|
+
if (!meta)
|
|
1301
|
+
return true;
|
|
1302
|
+
return meta.length <= 1;
|
|
1303
|
+
}
|
|
822
1304
|
function splitFirstPipelineStage(cmd) {
|
|
823
1305
|
const input = cmd.trim();
|
|
824
1306
|
let quote = null;
|
|
@@ -1136,7 +1618,15 @@ async function handleGrepDirect(api, table, sessionsTable, params) {
|
|
|
1136
1618
|
invertMatch: params.invertMatch,
|
|
1137
1619
|
fixedString: params.fixedString
|
|
1138
1620
|
};
|
|
1139
|
-
|
|
1621
|
+
let queryEmbedding = null;
|
|
1622
|
+
if (SEMANTIC_ENABLED && patternIsSemanticFriendly(params.pattern, params.fixedString)) {
|
|
1623
|
+
try {
|
|
1624
|
+
queryEmbedding = await getEmbedClient().embed(params.pattern, "query");
|
|
1625
|
+
} catch {
|
|
1626
|
+
queryEmbedding = null;
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, queryEmbedding);
|
|
1140
1630
|
const joined = output.join("\n") || "(no matches)";
|
|
1141
1631
|
return capOutputForClaude(joined, { kind: "grep" });
|
|
1142
1632
|
}
|
|
@@ -1145,33 +1635,65 @@ async function handleGrepDirect(api, table, sessionsTable, params) {
|
|
|
1145
1635
|
function normalizeSessionPart(path, content) {
|
|
1146
1636
|
return normalizeContent(path, content);
|
|
1147
1637
|
}
|
|
1148
|
-
|
|
1149
|
-
|
|
1638
|
+
var INDEX_LIMIT_PER_SECTION = 50;
|
|
1639
|
+
function buildVirtualIndexContent(summaryRows, sessionRows = [], opts = {}) {
|
|
1150
1640
|
const lines = [
|
|
1151
|
-
"#
|
|
1641
|
+
"# Session Index",
|
|
1152
1642
|
"",
|
|
1153
|
-
|
|
1643
|
+
"Two sources are available. Consult the section relevant to the question.",
|
|
1154
1644
|
""
|
|
1155
1645
|
];
|
|
1156
|
-
|
|
1157
|
-
|
|
1646
|
+
lines.push("## memory", "");
|
|
1647
|
+
if (summaryRows.length === 0) {
|
|
1648
|
+
lines.push("_(empty \u2014 no summaries ingested yet)_");
|
|
1649
|
+
} else {
|
|
1650
|
+
lines.push("AI-generated summaries per session. Read these first for topic-level overviews.");
|
|
1651
|
+
lines.push("");
|
|
1652
|
+
if (opts.summaryTruncated) {
|
|
1653
|
+
lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older summaries reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
|
|
1654
|
+
lines.push("");
|
|
1655
|
+
}
|
|
1656
|
+
lines.push("| Session | Created | Last Updated | Project | Description |");
|
|
1657
|
+
lines.push("|---------|---------|--------------|---------|-------------|");
|
|
1158
1658
|
for (const row of summaryRows) {
|
|
1159
|
-
const
|
|
1659
|
+
const p = row["path"] || "";
|
|
1660
|
+
const match = p.match(/\/summaries\/([^/]+)\/([^/]+)\.md$/);
|
|
1661
|
+
if (!match)
|
|
1662
|
+
continue;
|
|
1663
|
+
const summaryUser = match[1];
|
|
1664
|
+
const sessionId = match[2];
|
|
1665
|
+
const relPath = `summaries/${summaryUser}/${sessionId}.md`;
|
|
1160
1666
|
const project = row["project"] || "";
|
|
1161
|
-
const description =
|
|
1162
|
-
const
|
|
1163
|
-
|
|
1667
|
+
const description = row["description"] || "";
|
|
1668
|
+
const creationDate = row["creation_date"] || "";
|
|
1669
|
+
const lastUpdateDate = row["last_update_date"] || "";
|
|
1670
|
+
lines.push(`| [${sessionId}](${relPath}) | ${creationDate} | ${lastUpdateDate} | ${project} | ${description} |`);
|
|
1164
1671
|
}
|
|
1165
|
-
lines.push("");
|
|
1166
1672
|
}
|
|
1167
|
-
|
|
1168
|
-
|
|
1673
|
+
lines.push("");
|
|
1674
|
+
lines.push("## sessions", "");
|
|
1675
|
+
if (sessionRows.length === 0) {
|
|
1676
|
+
lines.push("_(empty \u2014 no session records ingested yet)_");
|
|
1677
|
+
} else {
|
|
1678
|
+
lines.push("Raw session records (dialogue, tool calls). Read for exact detail / quotes.");
|
|
1679
|
+
lines.push("");
|
|
1680
|
+
if (opts.sessionTruncated) {
|
|
1681
|
+
lines.push(`_Showing ${INDEX_LIMIT_PER_SECTION} most-recent of many \u2014 older sessions reachable via \`Grep pattern="..." path="~/.deeplake/memory"\`._`);
|
|
1682
|
+
lines.push("");
|
|
1683
|
+
}
|
|
1684
|
+
lines.push("| Session | Created | Last Updated | Description |");
|
|
1685
|
+
lines.push("|---------|---------|--------------|-------------|");
|
|
1169
1686
|
for (const row of sessionRows) {
|
|
1170
|
-
const
|
|
1171
|
-
const
|
|
1172
|
-
|
|
1687
|
+
const p = row["path"] || "";
|
|
1688
|
+
const rel = p.startsWith("/") ? p.slice(1) : p;
|
|
1689
|
+
const filename = p.split("/").pop() ?? p;
|
|
1690
|
+
const description = row["description"] || "";
|
|
1691
|
+
const creationDate = row["creation_date"] || "";
|
|
1692
|
+
const lastUpdateDate = row["last_update_date"] || "";
|
|
1693
|
+
lines.push(`| [${filename}](${rel}) | ${creationDate} | ${lastUpdateDate} | ${description} |`);
|
|
1173
1694
|
}
|
|
1174
1695
|
}
|
|
1696
|
+
lines.push("");
|
|
1175
1697
|
return lines.join("\n");
|
|
1176
1698
|
}
|
|
1177
1699
|
function buildUnionQuery(memoryQuery, sessionsQuery) {
|
|
@@ -1233,11 +1755,14 @@ async function readVirtualPathContents(api, memoryTable, sessionsTable, virtualP
|
|
|
1233
1755
|
}
|
|
1234
1756
|
}
|
|
1235
1757
|
if (result.get("/index.md") === null && uniquePaths.includes("/index.md")) {
|
|
1758
|
+
const fetchLimit = INDEX_LIMIT_PER_SECTION + 1;
|
|
1236
1759
|
const [summaryRows, sessionRows] = await Promise.all([
|
|
1237
|
-
api.query(`SELECT path, project, description, creation_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY
|
|
1238
|
-
api.query(`SELECT path, description FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' ORDER BY
|
|
1760
|
+
api.query(`SELECT path, project, description, creation_date, last_update_date FROM "${memoryTable}" WHERE path LIKE '/summaries/%' ORDER BY last_update_date DESC LIMIT ${fetchLimit}`).catch(() => []),
|
|
1761
|
+
api.query(`SELECT path, MAX(description) AS description, MIN(creation_date) AS creation_date, MAX(last_update_date) AS last_update_date FROM "${sessionsTable}" WHERE path LIKE '/sessions/%' GROUP BY path ORDER BY MAX(last_update_date) DESC LIMIT ${fetchLimit}`).catch(() => [])
|
|
1239
1762
|
]);
|
|
1240
|
-
|
|
1763
|
+
const summaryTruncated = summaryRows.length > INDEX_LIMIT_PER_SECTION;
|
|
1764
|
+
const sessionTruncated = sessionRows.length > INDEX_LIMIT_PER_SECTION;
|
|
1765
|
+
result.set("/index.md", buildVirtualIndexContent(summaryRows.slice(0, INDEX_LIMIT_PER_SECTION), sessionRows.slice(0, INDEX_LIMIT_PER_SECTION), { summaryTruncated, sessionTruncated }));
|
|
1241
1766
|
}
|
|
1242
1767
|
return result;
|
|
1243
1768
|
}
|
|
@@ -1757,20 +2282,20 @@ async function executeCompiledBashCommand(api, memoryTable, sessionsTable, cmd,
|
|
|
1757
2282
|
}
|
|
1758
2283
|
|
|
1759
2284
|
// dist/src/hooks/query-cache.js
|
|
1760
|
-
import { mkdirSync as mkdirSync2, readFileSync as
|
|
1761
|
-
import { join as
|
|
1762
|
-
import { homedir as
|
|
1763
|
-
var
|
|
1764
|
-
var DEFAULT_CACHE_ROOT =
|
|
2285
|
+
import { mkdirSync as mkdirSync2, readFileSync as readFileSync4, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
2286
|
+
import { join as join7 } from "node:path";
|
|
2287
|
+
import { homedir as homedir5 } from "node:os";
|
|
2288
|
+
var log4 = (msg) => log("query-cache", msg);
|
|
2289
|
+
var DEFAULT_CACHE_ROOT = join7(homedir5(), ".deeplake", "query-cache");
|
|
1765
2290
|
var INDEX_CACHE_FILE = "index.md";
|
|
1766
2291
|
function getSessionQueryCacheDir(sessionId, deps = {}) {
|
|
1767
2292
|
const { cacheRoot = DEFAULT_CACHE_ROOT } = deps;
|
|
1768
|
-
return
|
|
2293
|
+
return join7(cacheRoot, sessionId);
|
|
1769
2294
|
}
|
|
1770
2295
|
function readCachedIndexContent(sessionId, deps = {}) {
|
|
1771
|
-
const { logFn =
|
|
2296
|
+
const { logFn = log4 } = deps;
|
|
1772
2297
|
try {
|
|
1773
|
-
return
|
|
2298
|
+
return readFileSync4(join7(getSessionQueryCacheDir(sessionId, deps), INDEX_CACHE_FILE), "utf-8");
|
|
1774
2299
|
} catch (e) {
|
|
1775
2300
|
if (e?.code === "ENOENT")
|
|
1776
2301
|
return null;
|
|
@@ -1779,11 +2304,11 @@ function readCachedIndexContent(sessionId, deps = {}) {
|
|
|
1779
2304
|
}
|
|
1780
2305
|
}
|
|
1781
2306
|
function writeCachedIndexContent(sessionId, content, deps = {}) {
|
|
1782
|
-
const { logFn =
|
|
2307
|
+
const { logFn = log4 } = deps;
|
|
1783
2308
|
try {
|
|
1784
2309
|
const dir = getSessionQueryCacheDir(sessionId, deps);
|
|
1785
2310
|
mkdirSync2(dir, { recursive: true });
|
|
1786
|
-
writeFileSync2(
|
|
2311
|
+
writeFileSync2(join7(dir, INDEX_CACHE_FILE), content, "utf-8");
|
|
1787
2312
|
} catch (e) {
|
|
1788
2313
|
logFn(`write failed for session=${sessionId}: ${e.message}`);
|
|
1789
2314
|
}
|
|
@@ -1791,22 +2316,22 @@ function writeCachedIndexContent(sessionId, content, deps = {}) {
|
|
|
1791
2316
|
|
|
1792
2317
|
// dist/src/utils/direct-run.js
|
|
1793
2318
|
import { resolve } from "node:path";
|
|
1794
|
-
import { fileURLToPath } from "node:url";
|
|
2319
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
1795
2320
|
function isDirectRun(metaUrl) {
|
|
1796
2321
|
const entry = process.argv[1];
|
|
1797
2322
|
if (!entry)
|
|
1798
2323
|
return false;
|
|
1799
2324
|
try {
|
|
1800
|
-
return resolve(
|
|
2325
|
+
return resolve(fileURLToPath2(metaUrl)) === resolve(entry);
|
|
1801
2326
|
} catch {
|
|
1802
2327
|
return false;
|
|
1803
2328
|
}
|
|
1804
2329
|
}
|
|
1805
2330
|
|
|
1806
2331
|
// dist/src/hooks/memory-path-utils.js
|
|
1807
|
-
import { homedir as
|
|
1808
|
-
import { join as
|
|
1809
|
-
var MEMORY_PATH =
|
|
2332
|
+
import { homedir as homedir6 } from "node:os";
|
|
2333
|
+
import { join as join8 } from "node:path";
|
|
2334
|
+
var MEMORY_PATH = join8(homedir6(), ".deeplake", "memory");
|
|
1810
2335
|
var TILDE_PATH = "~/.deeplake/memory";
|
|
1811
2336
|
var HOME_VAR_PATH = "$HOME/.deeplake/memory";
|
|
1812
2337
|
var SAFE_BUILTINS = /* @__PURE__ */ new Set([
|
|
@@ -1922,13 +2447,13 @@ function rewritePaths(cmd) {
|
|
|
1922
2447
|
}
|
|
1923
2448
|
|
|
1924
2449
|
// dist/src/hooks/codex/pre-tool-use.js
|
|
1925
|
-
var
|
|
1926
|
-
var __bundleDir =
|
|
1927
|
-
var SHELL_BUNDLE =
|
|
2450
|
+
var log5 = (msg) => log("codex-pre", msg);
|
|
2451
|
+
var __bundleDir = dirname2(fileURLToPath3(import.meta.url));
|
|
2452
|
+
var SHELL_BUNDLE = existsSync4(join9(__bundleDir, "shell", "deeplake-shell.js")) ? join9(__bundleDir, "shell", "deeplake-shell.js") : join9(__bundleDir, "..", "shell", "deeplake-shell.js");
|
|
1928
2453
|
function buildUnsupportedGuidance() {
|
|
1929
2454
|
return "This command is not supported for ~/.deeplake/memory/ operations. Only bash builtins are available: cat, ls, grep, echo, jq, head, tail, sed, awk, wc, sort, find, etc. Do NOT use python, python3, node, curl, or other interpreters. Rewrite your command using only bash tools and retry.";
|
|
1930
2455
|
}
|
|
1931
|
-
function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn =
|
|
2456
|
+
function runVirtualShell(cmd, shellBundle = SHELL_BUNDLE, logFn = log5) {
|
|
1932
2457
|
try {
|
|
1933
2458
|
return execFileSync("node", [shellBundle, "-c", cmd], {
|
|
1934
2459
|
encoding: "utf-8",
|
|
@@ -1953,7 +2478,7 @@ function buildIndexContent(rows) {
|
|
|
1953
2478
|
return lines.join("\n");
|
|
1954
2479
|
}
|
|
1955
2480
|
async function processCodexPreToolUse(input, deps = {}) {
|
|
1956
|
-
const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn =
|
|
2481
|
+
const { config = loadConfig(), createApi = (table, activeConfig) => new DeeplakeApi(activeConfig.token, activeConfig.apiUrl, activeConfig.orgId, activeConfig.workspaceId, table), executeCompiledBashCommandFn = executeCompiledBashCommand, readVirtualPathContentsFn = readVirtualPathContents, readVirtualPathContentFn = readVirtualPathContent, listVirtualPathRowsFn = listVirtualPathRows, findVirtualPathsFn = findVirtualPaths, handleGrepDirectFn = handleGrepDirect, readCachedIndexContentFn = readCachedIndexContent, writeCachedIndexContentFn = writeCachedIndexContent, runVirtualShellFn = runVirtualShell, shellBundle = SHELL_BUNDLE, logFn = log5 } = deps;
|
|
1957
2482
|
const cmd = input.tool_input?.command ?? "";
|
|
1958
2483
|
logFn(`hook fired: cmd=${cmd}`);
|
|
1959
2484
|
if (!touchesMemory(cmd))
|
|
@@ -2163,7 +2688,7 @@ async function main() {
|
|
|
2163
2688
|
}
|
|
2164
2689
|
if (isDirectRun(import.meta.url)) {
|
|
2165
2690
|
main().catch((e) => {
|
|
2166
|
-
|
|
2691
|
+
log5(`fatal: ${e.message}`);
|
|
2167
2692
|
process.exit(0);
|
|
2168
2693
|
});
|
|
2169
2694
|
}
|