@deeplake/hivemind 0.6.48 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +147 -20
- package/bundle/cli.js +552 -95
- package/codex/bundle/capture.js +509 -89
- package/codex/bundle/commands/auth-login.js +209 -66
- package/codex/bundle/embeddings/embed-daemon.js +243 -0
- package/codex/bundle/pre-tool-use.js +629 -104
- package/codex/bundle/session-start-setup.js +194 -57
- package/codex/bundle/session-start.js +25 -10
- package/codex/bundle/shell/deeplake-shell.js +679 -112
- package/codex/bundle/stop.js +476 -58
- package/codex/bundle/wiki-worker.js +312 -11
- package/cursor/bundle/capture.js +768 -57
- package/cursor/bundle/commands/auth-login.js +209 -66
- package/cursor/bundle/embeddings/embed-daemon.js +243 -0
- package/cursor/bundle/pre-tool-use.js +561 -70
- package/cursor/bundle/session-end.js +223 -2
- package/cursor/bundle/session-start.js +192 -54
- package/cursor/bundle/shell/deeplake-shell.js +679 -112
- package/cursor/bundle/wiki-worker.js +571 -0
- package/hermes/bundle/capture.js +771 -58
- package/hermes/bundle/commands/auth-login.js +209 -66
- package/hermes/bundle/embeddings/embed-daemon.js +243 -0
- package/hermes/bundle/pre-tool-use.js +560 -69
- package/hermes/bundle/session-end.js +224 -1
- package/hermes/bundle/session-start.js +195 -54
- package/hermes/bundle/shell/deeplake-shell.js +679 -112
- package/hermes/bundle/wiki-worker.js +572 -0
- package/mcp/bundle/server.js +253 -68
- package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
- package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
- package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
- package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
- package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
- package/openclaw/dist/index.js +752 -702
- package/openclaw/openclaw.plugin.json +1 -1
- package/openclaw/package.json +1 -1
- package/package.json +2 -1
- package/pi/extension-source/hivemind.ts +473 -21
|
@@ -1,3 +1,56 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
+
var __esm = (fn, res) => function __init() {
|
|
4
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
+
};
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
// dist/src/index-marker-store.js
|
|
12
|
+
var index_marker_store_exports = {};
|
|
13
|
+
__export(index_marker_store_exports, {
|
|
14
|
+
buildIndexMarkerPath: () => buildIndexMarkerPath,
|
|
15
|
+
getIndexMarkerDir: () => getIndexMarkerDir,
|
|
16
|
+
hasFreshIndexMarker: () => hasFreshIndexMarker,
|
|
17
|
+
writeIndexMarker: () => writeIndexMarker
|
|
18
|
+
});
|
|
19
|
+
import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
20
|
+
import { join as join3 } from "node:path";
|
|
21
|
+
import { tmpdir } from "node:os";
|
|
22
|
+
function getIndexMarkerDir() {
|
|
23
|
+
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
|
|
24
|
+
}
|
|
25
|
+
function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
|
|
26
|
+
const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
27
|
+
return join3(getIndexMarkerDir(), `${markerKey}.json`);
|
|
28
|
+
}
|
|
29
|
+
function hasFreshIndexMarker(markerPath) {
|
|
30
|
+
if (!existsSync2(markerPath))
|
|
31
|
+
return false;
|
|
32
|
+
try {
|
|
33
|
+
const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
|
|
34
|
+
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
35
|
+
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
36
|
+
return false;
|
|
37
|
+
return true;
|
|
38
|
+
} catch {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function writeIndexMarker(markerPath) {
|
|
43
|
+
mkdirSync(getIndexMarkerDir(), { recursive: true });
|
|
44
|
+
writeFileSync(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
45
|
+
}
|
|
46
|
+
var INDEX_MARKER_TTL_MS;
|
|
47
|
+
var init_index_marker_store = __esm({
|
|
48
|
+
"dist/src/index-marker-store.js"() {
|
|
49
|
+
"use strict";
|
|
50
|
+
INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
|
|
1
54
|
// dist/src/utils/stdin.js
|
|
2
55
|
function readStdin() {
|
|
3
56
|
return new Promise((resolve, reject) => {
|
|
@@ -49,9 +102,6 @@ function loadConfig() {
|
|
|
49
102
|
|
|
50
103
|
// dist/src/deeplake-api.js
|
|
51
104
|
import { randomUUID } from "node:crypto";
|
|
52
|
-
import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
53
|
-
import { join as join3 } from "node:path";
|
|
54
|
-
import { tmpdir } from "node:os";
|
|
55
105
|
|
|
56
106
|
// dist/src/utils/debug.js
|
|
57
107
|
import { appendFileSync } from "node:fs";
|
|
@@ -74,7 +124,26 @@ function sqlLike(value) {
|
|
|
74
124
|
return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_");
|
|
75
125
|
}
|
|
76
126
|
|
|
127
|
+
// dist/src/embeddings/columns.js
|
|
128
|
+
var SUMMARY_EMBEDDING_COL = "summary_embedding";
|
|
129
|
+
var MESSAGE_EMBEDDING_COL = "message_embedding";
|
|
130
|
+
|
|
131
|
+
// dist/src/utils/client-header.js
|
|
132
|
+
var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
|
|
133
|
+
function deeplakeClientValue() {
|
|
134
|
+
return "hivemind";
|
|
135
|
+
}
|
|
136
|
+
function deeplakeClientHeader() {
|
|
137
|
+
return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
|
|
138
|
+
}
|
|
139
|
+
|
|
77
140
|
// dist/src/deeplake-api.js
|
|
141
|
+
var indexMarkerStorePromise = null;
|
|
142
|
+
function getIndexMarkerStore() {
|
|
143
|
+
if (!indexMarkerStorePromise)
|
|
144
|
+
indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
|
|
145
|
+
return indexMarkerStorePromise;
|
|
146
|
+
}
|
|
78
147
|
var log2 = (msg) => log("sdk", msg);
|
|
79
148
|
function summarizeSql(sql, maxLen = 220) {
|
|
80
149
|
const compact = sql.replace(/\s+/g, " ").trim();
|
|
@@ -94,7 +163,6 @@ var MAX_RETRIES = 3;
|
|
|
94
163
|
var BASE_DELAY_MS = 500;
|
|
95
164
|
var MAX_CONCURRENCY = 5;
|
|
96
165
|
var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
|
|
97
|
-
var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
98
166
|
function sleep(ms) {
|
|
99
167
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
100
168
|
}
|
|
@@ -114,9 +182,6 @@ function isTransientHtml403(text) {
|
|
|
114
182
|
const body = text.toLowerCase();
|
|
115
183
|
return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
|
|
116
184
|
}
|
|
117
|
-
function getIndexMarkerDir() {
|
|
118
|
-
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
|
|
119
|
-
}
|
|
120
185
|
var Semaphore = class {
|
|
121
186
|
max;
|
|
122
187
|
waiting = [];
|
|
@@ -185,7 +250,8 @@ var DeeplakeApi = class {
|
|
|
185
250
|
headers: {
|
|
186
251
|
Authorization: `Bearer ${this.token}`,
|
|
187
252
|
"Content-Type": "application/json",
|
|
188
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
253
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
254
|
+
...deeplakeClientHeader()
|
|
189
255
|
},
|
|
190
256
|
signal,
|
|
191
257
|
body: JSON.stringify({ query: sql })
|
|
@@ -212,7 +278,8 @@ var DeeplakeApi = class {
|
|
|
212
278
|
}
|
|
213
279
|
const text = await resp.text().catch(() => "");
|
|
214
280
|
const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
|
|
215
|
-
|
|
281
|
+
const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
|
|
282
|
+
if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
|
|
216
283
|
const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
|
|
217
284
|
log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
|
|
218
285
|
await sleep(delay);
|
|
@@ -246,7 +313,7 @@ var DeeplakeApi = class {
|
|
|
246
313
|
const lud = row.lastUpdateDate ?? ts;
|
|
247
314
|
const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
|
|
248
315
|
if (exists.length > 0) {
|
|
249
|
-
let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
316
|
+
let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
250
317
|
if (row.project !== void 0)
|
|
251
318
|
setClauses += `, project = '${sqlStr(row.project)}'`;
|
|
252
319
|
if (row.description !== void 0)
|
|
@@ -254,8 +321,8 @@ var DeeplakeApi = class {
|
|
|
254
321
|
await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
|
|
255
322
|
} else {
|
|
256
323
|
const id = randomUUID();
|
|
257
|
-
let cols =
|
|
258
|
-
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
324
|
+
let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
|
|
325
|
+
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
259
326
|
if (row.project !== void 0) {
|
|
260
327
|
cols += ", project";
|
|
261
328
|
vals += `, '${sqlStr(row.project)}'`;
|
|
@@ -280,48 +347,83 @@ var DeeplakeApi = class {
|
|
|
280
347
|
buildLookupIndexName(table, suffix) {
|
|
281
348
|
return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
282
349
|
}
|
|
283
|
-
getLookupIndexMarkerPath(table, suffix) {
|
|
284
|
-
const markerKey = [
|
|
285
|
-
this.workspaceId,
|
|
286
|
-
this.orgId,
|
|
287
|
-
table,
|
|
288
|
-
suffix
|
|
289
|
-
].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
290
|
-
return join3(getIndexMarkerDir(), `${markerKey}.json`);
|
|
291
|
-
}
|
|
292
|
-
hasFreshLookupIndexMarker(table, suffix) {
|
|
293
|
-
const markerPath = this.getLookupIndexMarkerPath(table, suffix);
|
|
294
|
-
if (!existsSync2(markerPath))
|
|
295
|
-
return false;
|
|
296
|
-
try {
|
|
297
|
-
const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
|
|
298
|
-
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
299
|
-
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
300
|
-
return false;
|
|
301
|
-
return true;
|
|
302
|
-
} catch {
|
|
303
|
-
return false;
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
markLookupIndexReady(table, suffix) {
|
|
307
|
-
mkdirSync(getIndexMarkerDir(), { recursive: true });
|
|
308
|
-
writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
309
|
-
}
|
|
310
350
|
async ensureLookupIndex(table, suffix, columnsSql) {
|
|
311
|
-
|
|
351
|
+
const markers = await getIndexMarkerStore();
|
|
352
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
|
|
353
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
312
354
|
return;
|
|
313
355
|
const indexName = this.buildLookupIndexName(table, suffix);
|
|
314
356
|
try {
|
|
315
357
|
await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
|
|
316
|
-
|
|
358
|
+
markers.writeIndexMarker(markerPath);
|
|
317
359
|
} catch (e) {
|
|
318
360
|
if (isDuplicateIndexError(e)) {
|
|
319
|
-
|
|
361
|
+
markers.writeIndexMarker(markerPath);
|
|
320
362
|
return;
|
|
321
363
|
}
|
|
322
364
|
log2(`index "${indexName}" skipped: ${e.message}`);
|
|
323
365
|
}
|
|
324
366
|
}
|
|
367
|
+
/**
|
|
368
|
+
* Ensure a vector column exists on the given table.
|
|
369
|
+
*
|
|
370
|
+
* The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
|
|
371
|
+
* EXISTS …` on every SessionStart. On a long-running workspace that's
|
|
372
|
+
* already migrated, every call returns 500 "Column already exists" — noisy
|
|
373
|
+
* in the log and a wasted round-trip. Worse, the very first call after the
|
|
374
|
+
* column is genuinely added triggers Deeplake's post-ALTER `vector::at`
|
|
375
|
+
* window (~30s) during which subsequent INSERTs fail; minimising the
|
|
376
|
+
* number of ALTER calls minimises exposure to that window.
|
|
377
|
+
*
|
|
378
|
+
* New flow:
|
|
379
|
+
* 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
|
|
380
|
+
* return — zero network calls.
|
|
381
|
+
* 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
|
|
382
|
+
* column_name = C. Read-only, idempotent, can't tickle the post-ALTER
|
|
383
|
+
* bug. If the column is present → mark + return.
|
|
384
|
+
* 3. Only if step 2 says the column is missing, fall back to ALTER ADD
|
|
385
|
+
* COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
|
|
386
|
+
* "already exists" (race: another client added it between our SELECT
|
|
387
|
+
* and ALTER).
|
|
388
|
+
*
|
|
389
|
+
* Marker uses the same dir / TTL as ensureLookupIndex so both schema
|
|
390
|
+
* caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
|
|
391
|
+
*/
|
|
392
|
+
async ensureEmbeddingColumn(table, column) {
|
|
393
|
+
await this.ensureColumn(table, column, "FLOAT4[]");
|
|
394
|
+
}
|
|
395
|
+
/**
|
|
396
|
+
* Generic marker-gated column migration. Same SELECT-then-ALTER flow as
|
|
397
|
+
* ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
|
|
398
|
+
* column that was added to the schema after the table was originally
|
|
399
|
+
* created. Used today for `summary_embedding`, `message_embedding`, and
|
|
400
|
+
* the `agent` column (added 2026-04-11) — the latter has no fallback if
|
|
401
|
+
* a user upgraded over a pre-2026-04-11 table, so every INSERT fails
|
|
402
|
+
* with `column "agent" does not exist`.
|
|
403
|
+
*/
|
|
404
|
+
async ensureColumn(table, column, sqlType) {
|
|
405
|
+
const markers = await getIndexMarkerStore();
|
|
406
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
|
|
407
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
408
|
+
return;
|
|
409
|
+
const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
|
|
410
|
+
const rows = await this.query(colCheck);
|
|
411
|
+
if (rows.length > 0) {
|
|
412
|
+
markers.writeIndexMarker(markerPath);
|
|
413
|
+
return;
|
|
414
|
+
}
|
|
415
|
+
try {
|
|
416
|
+
await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
|
|
417
|
+
} catch (e) {
|
|
418
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
419
|
+
if (!/already exists/i.test(msg))
|
|
420
|
+
throw e;
|
|
421
|
+
const recheck = await this.query(colCheck);
|
|
422
|
+
if (recheck.length === 0)
|
|
423
|
+
throw e;
|
|
424
|
+
}
|
|
425
|
+
markers.writeIndexMarker(markerPath);
|
|
426
|
+
}
|
|
325
427
|
/** List all tables in the workspace (with retry). */
|
|
326
428
|
async listTables(forceRefresh = false) {
|
|
327
429
|
if (!forceRefresh && this._tablesCache)
|
|
@@ -337,7 +439,8 @@ var DeeplakeApi = class {
|
|
|
337
439
|
const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
|
|
338
440
|
headers: {
|
|
339
441
|
Authorization: `Bearer ${this.token}`,
|
|
340
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
442
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
443
|
+
...deeplakeClientHeader()
|
|
341
444
|
}
|
|
342
445
|
});
|
|
343
446
|
if (resp.ok) {
|
|
@@ -362,28 +465,60 @@ var DeeplakeApi = class {
|
|
|
362
465
|
}
|
|
363
466
|
return { tables: [], cacheable: false };
|
|
364
467
|
}
|
|
468
|
+
/**
|
|
469
|
+
* Run a `CREATE TABLE` with an extra outer retry budget. The base
|
|
470
|
+
* `query()` already retries 3 times on fetch errors (~3.5s total), but a
|
|
471
|
+
* failed CREATE is permanent corruption — every subsequent SELECT against
|
|
472
|
+
* the missing table fails. Wrapping in an outer loop with longer backoff
|
|
473
|
+
* (2s, 5s, then 10s) gives us ~17s of reach across transient network
|
|
474
|
+
* blips before giving up. Failures still propagate; getApi() resets its
|
|
475
|
+
* cache on init failure (openclaw plugin) so the next call retries the
|
|
476
|
+
* whole init flow.
|
|
477
|
+
*/
|
|
478
|
+
async createTableWithRetry(sql, label) {
|
|
479
|
+
const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
|
|
480
|
+
let lastErr = null;
|
|
481
|
+
for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
|
|
482
|
+
try {
|
|
483
|
+
await this.query(sql);
|
|
484
|
+
return;
|
|
485
|
+
} catch (err) {
|
|
486
|
+
lastErr = err;
|
|
487
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
488
|
+
log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
|
|
489
|
+
if (attempt < OUTER_BACKOFFS_MS.length) {
|
|
490
|
+
await sleep(OUTER_BACKOFFS_MS[attempt]);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
throw lastErr;
|
|
495
|
+
}
|
|
365
496
|
/** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
|
|
366
497
|
async ensureTable(name) {
|
|
367
498
|
const tbl = name ?? this.tableName;
|
|
368
499
|
const tables = await this.listTables();
|
|
369
500
|
if (!tables.includes(tbl)) {
|
|
370
501
|
log2(`table "${tbl}" not found, creating`);
|
|
371
|
-
await this.
|
|
502
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
|
|
372
503
|
log2(`table "${tbl}" created`);
|
|
373
504
|
if (!tables.includes(tbl))
|
|
374
505
|
this._tablesCache = [...tables, tbl];
|
|
375
506
|
}
|
|
507
|
+
await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
|
|
508
|
+
await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
376
509
|
}
|
|
377
510
|
/** Create the sessions table (uses JSONB for message since every row is a JSON event). */
|
|
378
511
|
async ensureSessionsTable(name) {
|
|
379
512
|
const tables = await this.listTables();
|
|
380
513
|
if (!tables.includes(name)) {
|
|
381
514
|
log2(`table "${name}" not found, creating`);
|
|
382
|
-
await this.
|
|
515
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, name);
|
|
383
516
|
log2(`table "${name}" created`);
|
|
384
517
|
if (!tables.includes(name))
|
|
385
518
|
this._tablesCache = [...tables, name];
|
|
386
519
|
}
|
|
520
|
+
await this.ensureEmbeddingColumn(name, MESSAGE_EMBEDDING_COL);
|
|
521
|
+
await this.ensureColumn(name, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
387
522
|
await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
|
|
388
523
|
}
|
|
389
524
|
};
|
|
@@ -552,24 +687,25 @@ function normalizeContent(path, raw) {
|
|
|
552
687
|
return raw;
|
|
553
688
|
}
|
|
554
689
|
if (Array.isArray(obj.turns)) {
|
|
555
|
-
const
|
|
556
|
-
if (obj.date_time)
|
|
557
|
-
header.push(`date: ${obj.date_time}`);
|
|
558
|
-
if (obj.speakers) {
|
|
559
|
-
const s = obj.speakers;
|
|
560
|
-
const names = [s.speaker_a, s.speaker_b].filter(Boolean).join(", ");
|
|
561
|
-
if (names)
|
|
562
|
-
header.push(`speakers: ${names}`);
|
|
563
|
-
}
|
|
690
|
+
const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
|
|
564
691
|
const lines = obj.turns.map((t) => {
|
|
565
692
|
const sp = String(t?.speaker ?? t?.name ?? "?").trim();
|
|
566
693
|
const tx = String(t?.text ?? t?.content ?? "").replace(/\s+/g, " ").trim();
|
|
567
694
|
const tag = t?.dia_id ? `[${t.dia_id}] ` : "";
|
|
568
|
-
return `${tag}${sp}: ${tx}`;
|
|
695
|
+
return `${dateHeader}${tag}${sp}: ${tx}`;
|
|
569
696
|
});
|
|
570
|
-
const out2 =
|
|
697
|
+
const out2 = lines.join("\n");
|
|
571
698
|
return out2.trim() ? out2 : raw;
|
|
572
699
|
}
|
|
700
|
+
if (obj.turn && typeof obj.turn === "object" && !Array.isArray(obj.turn)) {
|
|
701
|
+
const t = obj.turn;
|
|
702
|
+
const sp = String(t.speaker ?? t.name ?? "?").trim();
|
|
703
|
+
const tx = String(t.text ?? t.content ?? "").replace(/\s+/g, " ").trim();
|
|
704
|
+
const tag = t.dia_id ? `[${String(t.dia_id)}] ` : "";
|
|
705
|
+
const dateHeader = obj.date_time ? `(${String(obj.date_time)}) ` : "";
|
|
706
|
+
const line = `${dateHeader}${tag}${sp}: ${tx}`;
|
|
707
|
+
return line.trim() ? line : raw;
|
|
708
|
+
}
|
|
573
709
|
const stripRecalled = (t) => {
|
|
574
710
|
const i = t.indexOf("<recalled-memories>");
|
|
575
711
|
if (i === -1)
|
|
@@ -612,8 +748,38 @@ function buildPathCondition(targetPath) {
|
|
|
612
748
|
return `(path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%' ESCAPE '\\')`;
|
|
613
749
|
}
|
|
614
750
|
async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
|
|
615
|
-
const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, multiWordPatterns } = opts;
|
|
751
|
+
const { pathFilter, contentScanOnly, likeOp, escapedPattern, prefilterPattern, prefilterPatterns, queryEmbedding, multiWordPatterns } = opts;
|
|
616
752
|
const limit = opts.limit ?? 100;
|
|
753
|
+
if (queryEmbedding && queryEmbedding.length > 0) {
|
|
754
|
+
const vecLit = serializeFloat4Array(queryEmbedding);
|
|
755
|
+
const semanticLimit = Math.min(limit, Number(process.env.HIVEMIND_SEMANTIC_LIMIT ?? "20"));
|
|
756
|
+
const lexicalLimit = Math.min(limit, Number(process.env.HIVEMIND_HYBRID_LEXICAL_LIMIT ?? "20"));
|
|
757
|
+
const filterPatternsForLex = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : [escapedPattern];
|
|
758
|
+
const memLexFilter = buildContentFilter("summary::text", likeOp, filterPatternsForLex);
|
|
759
|
+
const sessLexFilter = buildContentFilter("message::text", likeOp, filterPatternsForLex);
|
|
760
|
+
const memLexQuery = memLexFilter ? `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, 1.0 AS score FROM "${memoryTable}" WHERE 1=1${pathFilter}${memLexFilter} LIMIT ${lexicalLimit}` : null;
|
|
761
|
+
const sessLexQuery = sessLexFilter ? `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, 1.0 AS score FROM "${sessionsTable}" WHERE 1=1${pathFilter}${sessLexFilter} LIMIT ${lexicalLimit}` : null;
|
|
762
|
+
const memSemQuery = `SELECT path, summary::text AS content, 0 AS source_order, '' AS creation_date, (summary_embedding <#> ${vecLit}) AS score FROM "${memoryTable}" WHERE ARRAY_LENGTH(summary_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
|
|
763
|
+
const sessSemQuery = `SELECT path, message::text AS content, 1 AS source_order, COALESCE(creation_date::text, '') AS creation_date, (message_embedding <#> ${vecLit}) AS score FROM "${sessionsTable}" WHERE ARRAY_LENGTH(message_embedding, 1) > 0${pathFilter} ORDER BY score DESC LIMIT ${semanticLimit}`;
|
|
764
|
+
const parts = [memSemQuery, sessSemQuery];
|
|
765
|
+
if (memLexQuery)
|
|
766
|
+
parts.push(memLexQuery);
|
|
767
|
+
if (sessLexQuery)
|
|
768
|
+
parts.push(sessLexQuery);
|
|
769
|
+
const unionSql = parts.map((q) => `(${q})`).join(" UNION ALL ");
|
|
770
|
+
const outerLimit = semanticLimit + lexicalLimit;
|
|
771
|
+
const rows2 = await api.query(`SELECT path, content, source_order, creation_date, score FROM (` + unionSql + `) AS combined ORDER BY score DESC LIMIT ${outerLimit}`);
|
|
772
|
+
const seen = /* @__PURE__ */ new Set();
|
|
773
|
+
const unique = [];
|
|
774
|
+
for (const row of rows2) {
|
|
775
|
+
const p = String(row["path"]);
|
|
776
|
+
if (seen.has(p))
|
|
777
|
+
continue;
|
|
778
|
+
seen.add(p);
|
|
779
|
+
unique.push({ path: p, content: String(row["content"] ?? "") });
|
|
780
|
+
}
|
|
781
|
+
return unique;
|
|
782
|
+
}
|
|
617
783
|
const filterPatterns = contentScanOnly ? prefilterPatterns && prefilterPatterns.length > 0 ? prefilterPatterns : prefilterPattern ? [prefilterPattern] : [] : multiWordPatterns && multiWordPatterns.length > 1 ? multiWordPatterns : [escapedPattern];
|
|
618
784
|
const memFilter = buildContentFilter("summary::text", likeOp, filterPatterns);
|
|
619
785
|
const sessFilter = buildContentFilter("message::text", likeOp, filterPatterns);
|
|
@@ -625,6 +791,15 @@ async function searchDeeplakeTables(api, memoryTable, sessionsTable, opts) {
|
|
|
625
791
|
content: String(row["content"] ?? "")
|
|
626
792
|
}));
|
|
627
793
|
}
|
|
794
|
+
function serializeFloat4Array(vec) {
|
|
795
|
+
const parts = [];
|
|
796
|
+
for (const v of vec) {
|
|
797
|
+
if (!Number.isFinite(v))
|
|
798
|
+
return "NULL";
|
|
799
|
+
parts.push(String(v));
|
|
800
|
+
}
|
|
801
|
+
return `ARRAY[${parts.join(",")}]::float4[]`;
|
|
802
|
+
}
|
|
628
803
|
function buildPathFilter(targetPath) {
|
|
629
804
|
const condition = buildPathCondition(targetPath);
|
|
630
805
|
return condition ? ` AND ${condition}` : "";
|
|
@@ -707,7 +882,7 @@ function buildGrepSearchOptions(params, targetPath) {
|
|
|
707
882
|
return {
|
|
708
883
|
pathFilter: buildPathFilter(targetPath),
|
|
709
884
|
contentScanOnly: hasRegexMeta,
|
|
710
|
-
likeOp:
|
|
885
|
+
likeOp: process.env.HIVEMIND_GREP_LIKE === "case-sensitive" ? "LIKE" : "ILIKE",
|
|
711
886
|
escapedPattern: sqlLike(params.pattern),
|
|
712
887
|
prefilterPattern: literalPrefilter ? sqlLike(literalPrefilter) : void 0,
|
|
713
888
|
prefilterPatterns: alternationPrefilters?.map((literal) => sqlLike(literal)),
|
|
@@ -762,11 +937,28 @@ function refineGrepMatches(rows, params, forceMultiFilePrefix) {
|
|
|
762
937
|
}
|
|
763
938
|
return output;
|
|
764
939
|
}
|
|
765
|
-
async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath) {
|
|
766
|
-
const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable,
|
|
940
|
+
async function grepBothTables(api, memoryTable, sessionsTable, params, targetPath, queryEmbedding) {
|
|
941
|
+
const rows = await searchDeeplakeTables(api, memoryTable, sessionsTable, {
|
|
942
|
+
...buildGrepSearchOptions(params, targetPath),
|
|
943
|
+
queryEmbedding
|
|
944
|
+
});
|
|
767
945
|
const seen = /* @__PURE__ */ new Set();
|
|
768
946
|
const unique = rows.filter((r) => seen.has(r.path) ? false : (seen.add(r.path), true));
|
|
769
947
|
const normalized = unique.map((r) => ({ path: r.path, content: normalizeContent(r.path, r.content) }));
|
|
948
|
+
if (queryEmbedding && queryEmbedding.length > 0) {
|
|
949
|
+
const emitAllLines = process.env.HIVEMIND_SEMANTIC_EMIT_ALL !== "false";
|
|
950
|
+
if (emitAllLines) {
|
|
951
|
+
const lines = [];
|
|
952
|
+
for (const r of normalized) {
|
|
953
|
+
for (const line of r.content.split("\n")) {
|
|
954
|
+
const trimmed = line.trim();
|
|
955
|
+
if (trimmed)
|
|
956
|
+
lines.push(`${r.path}:${line}`);
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
return lines;
|
|
960
|
+
}
|
|
961
|
+
}
|
|
770
962
|
return refineGrepMatches(normalized, params);
|
|
771
963
|
}
|
|
772
964
|
|
|
@@ -810,7 +1002,298 @@ function capOutputForClaude(output, options = {}) {
|
|
|
810
1002
|
return keptLines.join("\n") + footer;
|
|
811
1003
|
}
|
|
812
1004
|
|
|
1005
|
+
// dist/src/embeddings/client.js
|
|
1006
|
+
import { connect } from "node:net";
|
|
1007
|
+
import { spawn } from "node:child_process";
|
|
1008
|
+
import { openSync, closeSync, writeSync, unlinkSync, existsSync as existsSync3, readFileSync as readFileSync3 } from "node:fs";
|
|
1009
|
+
import { homedir as homedir3 } from "node:os";
|
|
1010
|
+
import { join as join4 } from "node:path";
|
|
1011
|
+
|
|
1012
|
+
// dist/src/embeddings/protocol.js
|
|
1013
|
+
var DEFAULT_SOCKET_DIR = "/tmp";
|
|
1014
|
+
var DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
1015
|
+
var DEFAULT_CLIENT_TIMEOUT_MS = 2e3;
|
|
1016
|
+
function socketPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
|
|
1017
|
+
return `${dir}/hivemind-embed-${uid}.sock`;
|
|
1018
|
+
}
|
|
1019
|
+
function pidPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
|
|
1020
|
+
return `${dir}/hivemind-embed-${uid}.pid`;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
// dist/src/embeddings/client.js
|
|
1024
|
+
var SHARED_DAEMON_PATH = join4(homedir3(), ".hivemind", "embed-deps", "embed-daemon.js");
|
|
1025
|
+
var log3 = (m) => log("embed-client", m);
|
|
1026
|
+
function getUid() {
|
|
1027
|
+
const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
|
|
1028
|
+
return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
|
|
1029
|
+
}
|
|
1030
|
+
var EmbedClient = class {
|
|
1031
|
+
socketPath;
|
|
1032
|
+
pidPath;
|
|
1033
|
+
timeoutMs;
|
|
1034
|
+
daemonEntry;
|
|
1035
|
+
autoSpawn;
|
|
1036
|
+
spawnWaitMs;
|
|
1037
|
+
nextId = 0;
|
|
1038
|
+
constructor(opts = {}) {
|
|
1039
|
+
const uid = getUid();
|
|
1040
|
+
const dir = opts.socketDir ?? "/tmp";
|
|
1041
|
+
this.socketPath = socketPathFor(uid, dir);
|
|
1042
|
+
this.pidPath = pidPathFor(uid, dir);
|
|
1043
|
+
this.timeoutMs = opts.timeoutMs ?? DEFAULT_CLIENT_TIMEOUT_MS;
|
|
1044
|
+
this.daemonEntry = opts.daemonEntry ?? process.env.HIVEMIND_EMBED_DAEMON ?? (existsSync3(SHARED_DAEMON_PATH) ? SHARED_DAEMON_PATH : void 0);
|
|
1045
|
+
this.autoSpawn = opts.autoSpawn ?? true;
|
|
1046
|
+
this.spawnWaitMs = opts.spawnWaitMs ?? 5e3;
|
|
1047
|
+
}
|
|
1048
|
+
/**
|
|
1049
|
+
* Returns an embedding vector, or null on timeout/failure. Hooks MUST treat
|
|
1050
|
+
* null as "skip embedding column" — never block the write path on us.
|
|
1051
|
+
*
|
|
1052
|
+
* Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
|
|
1053
|
+
* null AND kicks off a background spawn. The next call finds a ready daemon.
|
|
1054
|
+
*/
|
|
1055
|
+
async embed(text, kind = "document") {
|
|
1056
|
+
let sock;
|
|
1057
|
+
try {
|
|
1058
|
+
sock = await this.connectOnce();
|
|
1059
|
+
} catch {
|
|
1060
|
+
if (this.autoSpawn)
|
|
1061
|
+
this.trySpawnDaemon();
|
|
1062
|
+
return null;
|
|
1063
|
+
}
|
|
1064
|
+
try {
|
|
1065
|
+
const id = String(++this.nextId);
|
|
1066
|
+
const req = { op: "embed", id, kind, text };
|
|
1067
|
+
const resp = await this.sendAndWait(sock, req);
|
|
1068
|
+
if (resp.error || !("embedding" in resp) || !resp.embedding) {
|
|
1069
|
+
log3(`embed err: ${resp.error ?? "no embedding"}`);
|
|
1070
|
+
return null;
|
|
1071
|
+
}
|
|
1072
|
+
return resp.embedding;
|
|
1073
|
+
} catch (e) {
|
|
1074
|
+
const err = e instanceof Error ? e.message : String(e);
|
|
1075
|
+
log3(`embed failed: ${err}`);
|
|
1076
|
+
return null;
|
|
1077
|
+
} finally {
|
|
1078
|
+
try {
|
|
1079
|
+
sock.end();
|
|
1080
|
+
} catch {
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
}
|
|
1084
|
+
/**
|
|
1085
|
+
* Wait up to spawnWaitMs for the daemon to accept connections, spawning if
|
|
1086
|
+
* necessary. Meant for SessionStart / long-running batches — not the hot path.
|
|
1087
|
+
*/
|
|
1088
|
+
async warmup() {
|
|
1089
|
+
try {
|
|
1090
|
+
const s = await this.connectOnce();
|
|
1091
|
+
s.end();
|
|
1092
|
+
return true;
|
|
1093
|
+
} catch {
|
|
1094
|
+
if (!this.autoSpawn)
|
|
1095
|
+
return false;
|
|
1096
|
+
this.trySpawnDaemon();
|
|
1097
|
+
try {
|
|
1098
|
+
const s = await this.waitForSocket();
|
|
1099
|
+
s.end();
|
|
1100
|
+
return true;
|
|
1101
|
+
} catch {
|
|
1102
|
+
return false;
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
connectOnce() {
|
|
1107
|
+
return new Promise((resolve, reject) => {
|
|
1108
|
+
const sock = connect(this.socketPath);
|
|
1109
|
+
const to = setTimeout(() => {
|
|
1110
|
+
sock.destroy();
|
|
1111
|
+
reject(new Error("connect timeout"));
|
|
1112
|
+
}, this.timeoutMs);
|
|
1113
|
+
sock.once("connect", () => {
|
|
1114
|
+
clearTimeout(to);
|
|
1115
|
+
resolve(sock);
|
|
1116
|
+
});
|
|
1117
|
+
sock.once("error", (e) => {
|
|
1118
|
+
clearTimeout(to);
|
|
1119
|
+
reject(e);
|
|
1120
|
+
});
|
|
1121
|
+
});
|
|
1122
|
+
}
|
|
1123
|
+
trySpawnDaemon() {
|
|
1124
|
+
let fd;
|
|
1125
|
+
try {
|
|
1126
|
+
fd = openSync(this.pidPath, "wx", 384);
|
|
1127
|
+
writeSync(fd, String(process.pid));
|
|
1128
|
+
} catch (e) {
|
|
1129
|
+
if (this.isPidFileStale()) {
|
|
1130
|
+
try {
|
|
1131
|
+
unlinkSync(this.pidPath);
|
|
1132
|
+
} catch {
|
|
1133
|
+
}
|
|
1134
|
+
try {
|
|
1135
|
+
fd = openSync(this.pidPath, "wx", 384);
|
|
1136
|
+
writeSync(fd, String(process.pid));
|
|
1137
|
+
} catch {
|
|
1138
|
+
return;
|
|
1139
|
+
}
|
|
1140
|
+
} else {
|
|
1141
|
+
return;
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
if (!this.daemonEntry || !existsSync3(this.daemonEntry)) {
|
|
1145
|
+
log3(`daemonEntry not configured or missing: ${this.daemonEntry}`);
|
|
1146
|
+
try {
|
|
1147
|
+
closeSync(fd);
|
|
1148
|
+
unlinkSync(this.pidPath);
|
|
1149
|
+
} catch {
|
|
1150
|
+
}
|
|
1151
|
+
return;
|
|
1152
|
+
}
|
|
1153
|
+
try {
|
|
1154
|
+
const child = spawn(process.execPath, [this.daemonEntry], {
|
|
1155
|
+
detached: true,
|
|
1156
|
+
stdio: "ignore",
|
|
1157
|
+
env: process.env
|
|
1158
|
+
});
|
|
1159
|
+
child.unref();
|
|
1160
|
+
log3(`spawned daemon pid=${child.pid}`);
|
|
1161
|
+
} finally {
|
|
1162
|
+
closeSync(fd);
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
isPidFileStale() {
|
|
1166
|
+
try {
|
|
1167
|
+
const raw = readFileSync3(this.pidPath, "utf-8").trim();
|
|
1168
|
+
const pid = Number(raw);
|
|
1169
|
+
if (!pid || Number.isNaN(pid))
|
|
1170
|
+
return true;
|
|
1171
|
+
try {
|
|
1172
|
+
process.kill(pid, 0);
|
|
1173
|
+
return false;
|
|
1174
|
+
} catch {
|
|
1175
|
+
return true;
|
|
1176
|
+
}
|
|
1177
|
+
} catch {
|
|
1178
|
+
return true;
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
async waitForSocket() {
|
|
1182
|
+
const deadline = Date.now() + this.spawnWaitMs;
|
|
1183
|
+
let delay = 30;
|
|
1184
|
+
while (Date.now() < deadline) {
|
|
1185
|
+
await sleep2(delay);
|
|
1186
|
+
delay = Math.min(delay * 1.5, 300);
|
|
1187
|
+
if (!existsSync3(this.socketPath))
|
|
1188
|
+
continue;
|
|
1189
|
+
try {
|
|
1190
|
+
return await this.connectOnce();
|
|
1191
|
+
} catch {
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
throw new Error("daemon did not become ready within spawnWaitMs");
|
|
1195
|
+
}
|
|
1196
|
+
sendAndWait(sock, req) {
|
|
1197
|
+
return new Promise((resolve, reject) => {
|
|
1198
|
+
let buf = "";
|
|
1199
|
+
const to = setTimeout(() => {
|
|
1200
|
+
sock.destroy();
|
|
1201
|
+
reject(new Error("request timeout"));
|
|
1202
|
+
}, this.timeoutMs);
|
|
1203
|
+
sock.setEncoding("utf-8");
|
|
1204
|
+
sock.on("data", (chunk) => {
|
|
1205
|
+
buf += chunk;
|
|
1206
|
+
const nl = buf.indexOf("\n");
|
|
1207
|
+
if (nl === -1)
|
|
1208
|
+
return;
|
|
1209
|
+
const line = buf.slice(0, nl);
|
|
1210
|
+
clearTimeout(to);
|
|
1211
|
+
try {
|
|
1212
|
+
resolve(JSON.parse(line));
|
|
1213
|
+
} catch (e) {
|
|
1214
|
+
reject(e);
|
|
1215
|
+
}
|
|
1216
|
+
});
|
|
1217
|
+
sock.on("error", (e) => {
|
|
1218
|
+
clearTimeout(to);
|
|
1219
|
+
reject(e);
|
|
1220
|
+
});
|
|
1221
|
+
sock.on("end", () => {
|
|
1222
|
+
clearTimeout(to);
|
|
1223
|
+
reject(new Error("connection closed without response"));
|
|
1224
|
+
});
|
|
1225
|
+
sock.write(JSON.stringify(req) + "\n");
|
|
1226
|
+
});
|
|
1227
|
+
}
|
|
1228
|
+
};
|
|
1229
|
+
function sleep2(ms) {
|
|
1230
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
// dist/src/embeddings/disable.js
|
|
1234
|
+
import { createRequire } from "node:module";
|
|
1235
|
+
import { homedir as homedir4 } from "node:os";
|
|
1236
|
+
import { join as join5 } from "node:path";
|
|
1237
|
+
import { pathToFileURL } from "node:url";
|
|
1238
|
+
var cachedStatus = null;
|
|
1239
|
+
function defaultResolveTransformers() {
|
|
1240
|
+
try {
|
|
1241
|
+
createRequire(import.meta.url).resolve("@huggingface/transformers");
|
|
1242
|
+
return;
|
|
1243
|
+
} catch {
|
|
1244
|
+
}
|
|
1245
|
+
const sharedDir = join5(homedir4(), ".hivemind", "embed-deps");
|
|
1246
|
+
createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
|
|
1247
|
+
}
|
|
1248
|
+
var _resolve = defaultResolveTransformers;
|
|
1249
|
+
function detectStatus() {
|
|
1250
|
+
if (process.env.HIVEMIND_EMBEDDINGS === "false")
|
|
1251
|
+
return "env-disabled";
|
|
1252
|
+
try {
|
|
1253
|
+
_resolve();
|
|
1254
|
+
return "enabled";
|
|
1255
|
+
} catch {
|
|
1256
|
+
return "no-transformers";
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
function embeddingsStatus() {
|
|
1260
|
+
if (cachedStatus !== null)
|
|
1261
|
+
return cachedStatus;
|
|
1262
|
+
cachedStatus = detectStatus();
|
|
1263
|
+
return cachedStatus;
|
|
1264
|
+
}
|
|
1265
|
+
function embeddingsDisabled() {
|
|
1266
|
+
return embeddingsStatus() !== "enabled";
|
|
1267
|
+
}
|
|
1268
|
+
|
|
813
1269
|
// dist/src/hooks/grep-direct.js
|
|
1270
|
+
import { fileURLToPath } from "node:url";
|
|
1271
|
+
import { dirname, join as join6 } from "node:path";
|
|
1272
|
+
var SEMANTIC_ENABLED = process.env.HIVEMIND_SEMANTIC_SEARCH !== "false" && !embeddingsDisabled();
|
|
1273
|
+
var SEMANTIC_TIMEOUT_MS = Number(process.env.HIVEMIND_SEMANTIC_EMBED_TIMEOUT_MS ?? "500");
|
|
1274
|
+
function resolveDaemonPath() {
|
|
1275
|
+
return join6(dirname(fileURLToPath(import.meta.url)), "..", "embeddings", "embed-daemon.js");
|
|
1276
|
+
}
|
|
1277
|
+
var sharedEmbedClient = null;
|
|
1278
|
+
function getEmbedClient() {
|
|
1279
|
+
if (!sharedEmbedClient) {
|
|
1280
|
+
sharedEmbedClient = new EmbedClient({
|
|
1281
|
+
daemonEntry: resolveDaemonPath(),
|
|
1282
|
+
timeoutMs: SEMANTIC_TIMEOUT_MS
|
|
1283
|
+
});
|
|
1284
|
+
}
|
|
1285
|
+
return sharedEmbedClient;
|
|
1286
|
+
}
|
|
1287
|
+
function patternIsSemanticFriendly(pattern, fixedString) {
|
|
1288
|
+
if (!pattern || pattern.length < 2)
|
|
1289
|
+
return false;
|
|
1290
|
+
if (fixedString)
|
|
1291
|
+
return true;
|
|
1292
|
+
const meta = pattern.match(/[|()\[\]{}+?^$\\]/g);
|
|
1293
|
+
if (!meta)
|
|
1294
|
+
return true;
|
|
1295
|
+
return meta.length <= 1;
|
|
1296
|
+
}
|
|
814
1297
|
function splitFirstPipelineStage(cmd) {
|
|
815
1298
|
const input = cmd.trim();
|
|
816
1299
|
let quote = null;
|
|
@@ -1128,15 +1611,23 @@ async function handleGrepDirect(api, table, sessionsTable, params) {
|
|
|
1128
1611
|
invertMatch: params.invertMatch,
|
|
1129
1612
|
fixedString: params.fixedString
|
|
1130
1613
|
};
|
|
1131
|
-
|
|
1614
|
+
let queryEmbedding = null;
|
|
1615
|
+
if (SEMANTIC_ENABLED && patternIsSemanticFriendly(params.pattern, params.fixedString)) {
|
|
1616
|
+
try {
|
|
1617
|
+
queryEmbedding = await getEmbedClient().embed(params.pattern, "query");
|
|
1618
|
+
} catch {
|
|
1619
|
+
queryEmbedding = null;
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
const output = await grepBothTables(api, table, sessionsTable, matchParams, params.targetPath, queryEmbedding);
|
|
1132
1623
|
const joined = output.join("\n") || "(no matches)";
|
|
1133
1624
|
return capOutputForClaude(joined, { kind: "grep" });
|
|
1134
1625
|
}
|
|
1135
1626
|
|
|
1136
1627
|
// dist/src/hooks/memory-path-utils.js
|
|
1137
|
-
import { homedir as
|
|
1138
|
-
import { join as
|
|
1139
|
-
var MEMORY_PATH =
|
|
1628
|
+
import { homedir as homedir5 } from "node:os";
|
|
1629
|
+
import { join as join7 } from "node:path";
|
|
1630
|
+
var MEMORY_PATH = join7(homedir5(), ".deeplake", "memory");
|
|
1140
1631
|
var TILDE_PATH = "~/.deeplake/memory";
|
|
1141
1632
|
var HOME_VAR_PATH = "$HOME/.deeplake/memory";
|
|
1142
1633
|
function touchesMemory(p) {
|
|
@@ -1147,7 +1638,7 @@ function rewritePaths(cmd) {
|
|
|
1147
1638
|
}
|
|
1148
1639
|
|
|
1149
1640
|
// dist/src/hooks/hermes/pre-tool-use.js
|
|
1150
|
-
var
|
|
1641
|
+
var log4 = (msg) => log("hermes-pre-tool-use", msg);
|
|
1151
1642
|
async function main() {
|
|
1152
1643
|
const input = await readStdin();
|
|
1153
1644
|
if (input.tool_name !== "terminal")
|
|
@@ -1164,7 +1655,7 @@ async function main() {
|
|
|
1164
1655
|
return;
|
|
1165
1656
|
const config = loadConfig();
|
|
1166
1657
|
if (!config) {
|
|
1167
|
-
|
|
1658
|
+
log4("no config \u2014 falling through to Hermes");
|
|
1168
1659
|
return;
|
|
1169
1660
|
}
|
|
1170
1661
|
const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName);
|
|
@@ -1172,7 +1663,7 @@ async function main() {
|
|
|
1172
1663
|
const result = await handleGrepDirect(api, config.tableName, config.sessionsTableName, grepParams);
|
|
1173
1664
|
if (result === null)
|
|
1174
1665
|
return;
|
|
1175
|
-
|
|
1666
|
+
log4(`intercepted ${command.slice(0, 80)} \u2192 ${result.length} chars from SQL fast-path`);
|
|
1176
1667
|
const message = [
|
|
1177
1668
|
result,
|
|
1178
1669
|
"",
|
|
@@ -1181,10 +1672,10 @@ async function main() {
|
|
|
1181
1672
|
process.stdout.write(JSON.stringify({ action: "block", message }));
|
|
1182
1673
|
} catch (err) {
|
|
1183
1674
|
const msg = err instanceof Error ? err.message : String(err);
|
|
1184
|
-
|
|
1675
|
+
log4(`fast-path failed, falling through: ${msg}`);
|
|
1185
1676
|
}
|
|
1186
1677
|
}
|
|
1187
1678
|
main().catch((e) => {
|
|
1188
|
-
|
|
1679
|
+
log4(`fatal: ${e.message}`);
|
|
1189
1680
|
process.exit(0);
|
|
1190
1681
|
});
|