@deeplake/hivemind 0.6.48 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +147 -20
- package/bundle/cli.js +552 -95
- package/codex/bundle/capture.js +509 -89
- package/codex/bundle/commands/auth-login.js +209 -66
- package/codex/bundle/embeddings/embed-daemon.js +243 -0
- package/codex/bundle/pre-tool-use.js +629 -104
- package/codex/bundle/session-start-setup.js +194 -57
- package/codex/bundle/session-start.js +25 -10
- package/codex/bundle/shell/deeplake-shell.js +679 -112
- package/codex/bundle/stop.js +476 -58
- package/codex/bundle/wiki-worker.js +312 -11
- package/cursor/bundle/capture.js +768 -57
- package/cursor/bundle/commands/auth-login.js +209 -66
- package/cursor/bundle/embeddings/embed-daemon.js +243 -0
- package/cursor/bundle/pre-tool-use.js +561 -70
- package/cursor/bundle/session-end.js +223 -2
- package/cursor/bundle/session-start.js +192 -54
- package/cursor/bundle/shell/deeplake-shell.js +679 -112
- package/cursor/bundle/wiki-worker.js +571 -0
- package/hermes/bundle/capture.js +771 -58
- package/hermes/bundle/commands/auth-login.js +209 -66
- package/hermes/bundle/embeddings/embed-daemon.js +243 -0
- package/hermes/bundle/pre-tool-use.js +560 -69
- package/hermes/bundle/session-end.js +224 -1
- package/hermes/bundle/session-start.js +195 -54
- package/hermes/bundle/shell/deeplake-shell.js +679 -112
- package/hermes/bundle/wiki-worker.js +572 -0
- package/mcp/bundle/server.js +253 -68
- package/openclaw/dist/chunks/auth-creds-AEKS6D3P.js +14 -0
- package/openclaw/dist/chunks/chunk-SRCBBT4H.js +37 -0
- package/openclaw/dist/chunks/config-G23NI5TV.js +33 -0
- package/openclaw/dist/chunks/index-marker-store-PGT5CW6T.js +33 -0
- package/openclaw/dist/chunks/setup-config-C35UK4LP.js +114 -0
- package/openclaw/dist/index.js +752 -702
- package/openclaw/openclaw.plugin.json +1 -1
- package/openclaw/package.json +1 -1
- package/package.json +2 -1
- package/pi/extension-source/hivemind.ts +473 -21
package/codex/bundle/stop.js
CHANGED
|
@@ -1,7 +1,61 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __esm = (fn, res) => function __init() {
|
|
5
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
6
|
+
};
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
// dist/src/index-marker-store.js
|
|
13
|
+
var index_marker_store_exports = {};
|
|
14
|
+
__export(index_marker_store_exports, {
|
|
15
|
+
buildIndexMarkerPath: () => buildIndexMarkerPath,
|
|
16
|
+
getIndexMarkerDir: () => getIndexMarkerDir,
|
|
17
|
+
hasFreshIndexMarker: () => hasFreshIndexMarker,
|
|
18
|
+
writeIndexMarker: () => writeIndexMarker
|
|
19
|
+
});
|
|
20
|
+
import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
21
|
+
import { join as join3 } from "node:path";
|
|
22
|
+
import { tmpdir } from "node:os";
|
|
23
|
+
function getIndexMarkerDir() {
|
|
24
|
+
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
|
|
25
|
+
}
|
|
26
|
+
function buildIndexMarkerPath(workspaceId, orgId, table, suffix) {
|
|
27
|
+
const markerKey = [workspaceId, orgId, table, suffix].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
28
|
+
return join3(getIndexMarkerDir(), `${markerKey}.json`);
|
|
29
|
+
}
|
|
30
|
+
function hasFreshIndexMarker(markerPath) {
|
|
31
|
+
if (!existsSync2(markerPath))
|
|
32
|
+
return false;
|
|
33
|
+
try {
|
|
34
|
+
const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
|
|
35
|
+
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
36
|
+
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
37
|
+
return false;
|
|
38
|
+
return true;
|
|
39
|
+
} catch {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
function writeIndexMarker(markerPath) {
|
|
44
|
+
mkdirSync(getIndexMarkerDir(), { recursive: true });
|
|
45
|
+
writeFileSync(markerPath, JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
46
|
+
}
|
|
47
|
+
var INDEX_MARKER_TTL_MS;
|
|
48
|
+
var init_index_marker_store = __esm({
|
|
49
|
+
"dist/src/index-marker-store.js"() {
|
|
50
|
+
"use strict";
|
|
51
|
+
INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
2
54
|
|
|
3
55
|
// dist/src/hooks/codex/stop.js
|
|
4
|
-
import { readFileSync as
|
|
56
|
+
import { readFileSync as readFileSync5, existsSync as existsSync5 } from "node:fs";
|
|
57
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
58
|
+
import { dirname as dirname2, join as join9 } from "node:path";
|
|
5
59
|
|
|
6
60
|
// dist/src/utils/stdin.js
|
|
7
61
|
function readStdin() {
|
|
@@ -54,9 +108,6 @@ function loadConfig() {
|
|
|
54
108
|
|
|
55
109
|
// dist/src/deeplake-api.js
|
|
56
110
|
import { randomUUID } from "node:crypto";
|
|
57
|
-
import { existsSync as existsSync2, mkdirSync, readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
58
|
-
import { join as join3 } from "node:path";
|
|
59
|
-
import { tmpdir } from "node:os";
|
|
60
111
|
|
|
61
112
|
// dist/src/utils/debug.js
|
|
62
113
|
import { appendFileSync } from "node:fs";
|
|
@@ -79,7 +130,26 @@ function sqlStr(value) {
|
|
|
79
130
|
return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
|
|
80
131
|
}
|
|
81
132
|
|
|
133
|
+
// dist/src/embeddings/columns.js
|
|
134
|
+
var SUMMARY_EMBEDDING_COL = "summary_embedding";
|
|
135
|
+
var MESSAGE_EMBEDDING_COL = "message_embedding";
|
|
136
|
+
|
|
137
|
+
// dist/src/utils/client-header.js
|
|
138
|
+
var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
|
|
139
|
+
function deeplakeClientValue() {
|
|
140
|
+
return "hivemind";
|
|
141
|
+
}
|
|
142
|
+
function deeplakeClientHeader() {
|
|
143
|
+
return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
|
|
144
|
+
}
|
|
145
|
+
|
|
82
146
|
// dist/src/deeplake-api.js
|
|
147
|
+
var indexMarkerStorePromise = null;
|
|
148
|
+
function getIndexMarkerStore() {
|
|
149
|
+
if (!indexMarkerStorePromise)
|
|
150
|
+
indexMarkerStorePromise = Promise.resolve().then(() => (init_index_marker_store(), index_marker_store_exports));
|
|
151
|
+
return indexMarkerStorePromise;
|
|
152
|
+
}
|
|
83
153
|
var log2 = (msg) => log("sdk", msg);
|
|
84
154
|
function summarizeSql(sql, maxLen = 220) {
|
|
85
155
|
const compact = sql.replace(/\s+/g, " ").trim();
|
|
@@ -99,7 +169,6 @@ var MAX_RETRIES = 3;
|
|
|
99
169
|
var BASE_DELAY_MS = 500;
|
|
100
170
|
var MAX_CONCURRENCY = 5;
|
|
101
171
|
var QUERY_TIMEOUT_MS = Number(process.env.HIVEMIND_QUERY_TIMEOUT_MS ?? 1e4);
|
|
102
|
-
var INDEX_MARKER_TTL_MS = Number(process.env.HIVEMIND_INDEX_MARKER_TTL_MS ?? 6 * 60 * 6e4);
|
|
103
172
|
function sleep(ms) {
|
|
104
173
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
105
174
|
}
|
|
@@ -119,9 +188,6 @@ function isTransientHtml403(text) {
|
|
|
119
188
|
const body = text.toLowerCase();
|
|
120
189
|
return body.includes("<html") || body.includes("403 forbidden") || body.includes("cloudflare") || body.includes("nginx");
|
|
121
190
|
}
|
|
122
|
-
function getIndexMarkerDir() {
|
|
123
|
-
return process.env.HIVEMIND_INDEX_MARKER_DIR ?? join3(tmpdir(), "hivemind-deeplake-indexes");
|
|
124
|
-
}
|
|
125
191
|
var Semaphore = class {
|
|
126
192
|
max;
|
|
127
193
|
waiting = [];
|
|
@@ -190,7 +256,8 @@ var DeeplakeApi = class {
|
|
|
190
256
|
headers: {
|
|
191
257
|
Authorization: `Bearer ${this.token}`,
|
|
192
258
|
"Content-Type": "application/json",
|
|
193
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
259
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
260
|
+
...deeplakeClientHeader()
|
|
194
261
|
},
|
|
195
262
|
signal,
|
|
196
263
|
body: JSON.stringify({ query: sql })
|
|
@@ -217,7 +284,8 @@ var DeeplakeApi = class {
|
|
|
217
284
|
}
|
|
218
285
|
const text = await resp.text().catch(() => "");
|
|
219
286
|
const retryable403 = isSessionInsertQuery(sql) && (resp.status === 401 || resp.status === 403 && (text.length === 0 || isTransientHtml403(text)));
|
|
220
|
-
|
|
287
|
+
const alreadyExists = resp.status === 500 && isDuplicateIndexError(text);
|
|
288
|
+
if (!alreadyExists && attempt < MAX_RETRIES && (RETRYABLE_CODES.has(resp.status) || retryable403)) {
|
|
221
289
|
const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200;
|
|
222
290
|
log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`);
|
|
223
291
|
await sleep(delay);
|
|
@@ -251,7 +319,7 @@ var DeeplakeApi = class {
|
|
|
251
319
|
const lud = row.lastUpdateDate ?? ts;
|
|
252
320
|
const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`);
|
|
253
321
|
if (exists.length > 0) {
|
|
254
|
-
let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
322
|
+
let setClauses = `summary = E'${sqlStr(row.contentText)}', ${SUMMARY_EMBEDDING_COL} = NULL, mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`;
|
|
255
323
|
if (row.project !== void 0)
|
|
256
324
|
setClauses += `, project = '${sqlStr(row.project)}'`;
|
|
257
325
|
if (row.description !== void 0)
|
|
@@ -259,8 +327,8 @@ var DeeplakeApi = class {
|
|
|
259
327
|
await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`);
|
|
260
328
|
} else {
|
|
261
329
|
const id = randomUUID();
|
|
262
|
-
let cols =
|
|
263
|
-
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
330
|
+
let cols = `id, path, filename, summary, ${SUMMARY_EMBEDDING_COL}, mime_type, size_bytes, creation_date, last_update_date`;
|
|
331
|
+
let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', NULL, '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`;
|
|
264
332
|
if (row.project !== void 0) {
|
|
265
333
|
cols += ", project";
|
|
266
334
|
vals += `, '${sqlStr(row.project)}'`;
|
|
@@ -285,48 +353,83 @@ var DeeplakeApi = class {
|
|
|
285
353
|
buildLookupIndexName(table, suffix) {
|
|
286
354
|
return `idx_${table}_${suffix}`.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
287
355
|
}
|
|
288
|
-
getLookupIndexMarkerPath(table, suffix) {
|
|
289
|
-
const markerKey = [
|
|
290
|
-
this.workspaceId,
|
|
291
|
-
this.orgId,
|
|
292
|
-
table,
|
|
293
|
-
suffix
|
|
294
|
-
].join("__").replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
295
|
-
return join3(getIndexMarkerDir(), `${markerKey}.json`);
|
|
296
|
-
}
|
|
297
|
-
hasFreshLookupIndexMarker(table, suffix) {
|
|
298
|
-
const markerPath = this.getLookupIndexMarkerPath(table, suffix);
|
|
299
|
-
if (!existsSync2(markerPath))
|
|
300
|
-
return false;
|
|
301
|
-
try {
|
|
302
|
-
const raw = JSON.parse(readFileSync2(markerPath, "utf-8"));
|
|
303
|
-
const updatedAt = raw.updatedAt ? new Date(raw.updatedAt).getTime() : NaN;
|
|
304
|
-
if (!Number.isFinite(updatedAt) || Date.now() - updatedAt > INDEX_MARKER_TTL_MS)
|
|
305
|
-
return false;
|
|
306
|
-
return true;
|
|
307
|
-
} catch {
|
|
308
|
-
return false;
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
markLookupIndexReady(table, suffix) {
|
|
312
|
-
mkdirSync(getIndexMarkerDir(), { recursive: true });
|
|
313
|
-
writeFileSync(this.getLookupIndexMarkerPath(table, suffix), JSON.stringify({ updatedAt: (/* @__PURE__ */ new Date()).toISOString() }), "utf-8");
|
|
314
|
-
}
|
|
315
356
|
async ensureLookupIndex(table, suffix, columnsSql) {
|
|
316
|
-
|
|
357
|
+
const markers = await getIndexMarkerStore();
|
|
358
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, suffix);
|
|
359
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
317
360
|
return;
|
|
318
361
|
const indexName = this.buildLookupIndexName(table, suffix);
|
|
319
362
|
try {
|
|
320
363
|
await this.query(`CREATE INDEX IF NOT EXISTS "${indexName}" ON "${table}" ${columnsSql}`);
|
|
321
|
-
|
|
364
|
+
markers.writeIndexMarker(markerPath);
|
|
322
365
|
} catch (e) {
|
|
323
366
|
if (isDuplicateIndexError(e)) {
|
|
324
|
-
|
|
367
|
+
markers.writeIndexMarker(markerPath);
|
|
325
368
|
return;
|
|
326
369
|
}
|
|
327
370
|
log2(`index "${indexName}" skipped: ${e.message}`);
|
|
328
371
|
}
|
|
329
372
|
}
|
|
373
|
+
/**
|
|
374
|
+
* Ensure a vector column exists on the given table.
|
|
375
|
+
*
|
|
376
|
+
* The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
|
|
377
|
+
* EXISTS …` on every SessionStart. On a long-running workspace that's
|
|
378
|
+
* already migrated, every call returns 500 "Column already exists" — noisy
|
|
379
|
+
* in the log and a wasted round-trip. Worse, the very first call after the
|
|
380
|
+
* column is genuinely added triggers Deeplake's post-ALTER `vector::at`
|
|
381
|
+
* window (~30s) during which subsequent INSERTs fail; minimising the
|
|
382
|
+
* number of ALTER calls minimises exposure to that window.
|
|
383
|
+
*
|
|
384
|
+
* New flow:
|
|
385
|
+
* 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
|
|
386
|
+
* return — zero network calls.
|
|
387
|
+
* 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
|
|
388
|
+
* column_name = C. Read-only, idempotent, can't tickle the post-ALTER
|
|
389
|
+
* bug. If the column is present → mark + return.
|
|
390
|
+
* 3. Only if step 2 says the column is missing, fall back to ALTER ADD
|
|
391
|
+
* COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
|
|
392
|
+
* "already exists" (race: another client added it between our SELECT
|
|
393
|
+
* and ALTER).
|
|
394
|
+
*
|
|
395
|
+
* Marker uses the same dir / TTL as ensureLookupIndex so both schema
|
|
396
|
+
* caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
|
|
397
|
+
*/
|
|
398
|
+
async ensureEmbeddingColumn(table, column) {
|
|
399
|
+
await this.ensureColumn(table, column, "FLOAT4[]");
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Generic marker-gated column migration. Same SELECT-then-ALTER flow as
|
|
403
|
+
* ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
|
|
404
|
+
* column that was added to the schema after the table was originally
|
|
405
|
+
* created. Used today for `summary_embedding`, `message_embedding`, and
|
|
406
|
+
* the `agent` column (added 2026-04-11) — the latter has no fallback if
|
|
407
|
+
* a user upgraded over a pre-2026-04-11 table, so every INSERT fails
|
|
408
|
+
* with `column "agent" does not exist`.
|
|
409
|
+
*/
|
|
410
|
+
async ensureColumn(table, column, sqlType) {
|
|
411
|
+
const markers = await getIndexMarkerStore();
|
|
412
|
+
const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
|
|
413
|
+
if (markers.hasFreshIndexMarker(markerPath))
|
|
414
|
+
return;
|
|
415
|
+
const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
|
|
416
|
+
const rows = await this.query(colCheck);
|
|
417
|
+
if (rows.length > 0) {
|
|
418
|
+
markers.writeIndexMarker(markerPath);
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
try {
|
|
422
|
+
await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
|
|
423
|
+
} catch (e) {
|
|
424
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
425
|
+
if (!/already exists/i.test(msg))
|
|
426
|
+
throw e;
|
|
427
|
+
const recheck = await this.query(colCheck);
|
|
428
|
+
if (recheck.length === 0)
|
|
429
|
+
throw e;
|
|
430
|
+
}
|
|
431
|
+
markers.writeIndexMarker(markerPath);
|
|
432
|
+
}
|
|
330
433
|
/** List all tables in the workspace (with retry). */
|
|
331
434
|
async listTables(forceRefresh = false) {
|
|
332
435
|
if (!forceRefresh && this._tablesCache)
|
|
@@ -342,7 +445,8 @@ var DeeplakeApi = class {
|
|
|
342
445
|
const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, {
|
|
343
446
|
headers: {
|
|
344
447
|
Authorization: `Bearer ${this.token}`,
|
|
345
|
-
"X-Activeloop-Org-Id": this.orgId
|
|
448
|
+
"X-Activeloop-Org-Id": this.orgId,
|
|
449
|
+
...deeplakeClientHeader()
|
|
346
450
|
}
|
|
347
451
|
});
|
|
348
452
|
if (resp.ok) {
|
|
@@ -367,28 +471,60 @@ var DeeplakeApi = class {
|
|
|
367
471
|
}
|
|
368
472
|
return { tables: [], cacheable: false };
|
|
369
473
|
}
|
|
474
|
+
/**
|
|
475
|
+
* Run a `CREATE TABLE` with an extra outer retry budget. The base
|
|
476
|
+
* `query()` already retries 3 times on fetch errors (~3.5s total), but a
|
|
477
|
+
* failed CREATE is permanent corruption — every subsequent SELECT against
|
|
478
|
+
* the missing table fails. Wrapping in an outer loop with longer backoff
|
|
479
|
+
* (2s, 5s, then 10s) gives us ~17s of reach across transient network
|
|
480
|
+
* blips before giving up. Failures still propagate; getApi() resets its
|
|
481
|
+
* cache on init failure (openclaw plugin) so the next call retries the
|
|
482
|
+
* whole init flow.
|
|
483
|
+
*/
|
|
484
|
+
async createTableWithRetry(sql, label) {
|
|
485
|
+
const OUTER_BACKOFFS_MS = [2e3, 5e3, 1e4];
|
|
486
|
+
let lastErr = null;
|
|
487
|
+
for (let attempt = 0; attempt <= OUTER_BACKOFFS_MS.length; attempt++) {
|
|
488
|
+
try {
|
|
489
|
+
await this.query(sql);
|
|
490
|
+
return;
|
|
491
|
+
} catch (err) {
|
|
492
|
+
lastErr = err;
|
|
493
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
494
|
+
log2(`CREATE TABLE "${label}" attempt ${attempt + 1}/${OUTER_BACKOFFS_MS.length + 1} failed: ${msg}`);
|
|
495
|
+
if (attempt < OUTER_BACKOFFS_MS.length) {
|
|
496
|
+
await sleep(OUTER_BACKOFFS_MS[attempt]);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
throw lastErr;
|
|
501
|
+
}
|
|
370
502
|
/** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
|
|
371
503
|
async ensureTable(name) {
|
|
372
504
|
const tbl = name ?? this.tableName;
|
|
373
505
|
const tables = await this.listTables();
|
|
374
506
|
if (!tables.includes(tbl)) {
|
|
375
507
|
log2(`table "${tbl}" not found, creating`);
|
|
376
|
-
await this.
|
|
508
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
|
|
377
509
|
log2(`table "${tbl}" created`);
|
|
378
510
|
if (!tables.includes(tbl))
|
|
379
511
|
this._tablesCache = [...tables, tbl];
|
|
380
512
|
}
|
|
513
|
+
await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
|
|
514
|
+
await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
381
515
|
}
|
|
382
516
|
/** Create the sessions table (uses JSONB for message since every row is a JSON event). */
|
|
383
517
|
async ensureSessionsTable(name) {
|
|
384
518
|
const tables = await this.listTables();
|
|
385
519
|
if (!tables.includes(name)) {
|
|
386
520
|
log2(`table "${name}" not found, creating`);
|
|
387
|
-
await this.
|
|
521
|
+
await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, name);
|
|
388
522
|
log2(`table "${name}" created`);
|
|
389
523
|
if (!tables.includes(name))
|
|
390
524
|
this._tablesCache = [...tables, name];
|
|
391
525
|
}
|
|
526
|
+
await this.ensureEmbeddingColumn(name, MESSAGE_EMBEDDING_COL);
|
|
527
|
+
await this.ensureColumn(name, "agent", "TEXT NOT NULL DEFAULT ''");
|
|
392
528
|
await this.ensureLookupIndex(name, "path_creation_date", `("path", "creation_date")`);
|
|
393
529
|
}
|
|
394
530
|
};
|
|
@@ -571,8 +707,288 @@ function buildSessionPath(config, sessionId) {
|
|
|
571
707
|
return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${workspace}_${sessionId}.jsonl`;
|
|
572
708
|
}
|
|
573
709
|
|
|
710
|
+
// dist/src/embeddings/client.js
|
|
711
|
+
import { connect } from "node:net";
|
|
712
|
+
import { spawn as spawn2 } from "node:child_process";
|
|
713
|
+
import { openSync as openSync2, closeSync as closeSync2, writeSync as writeSync2, unlinkSync as unlinkSync2, existsSync as existsSync4, readFileSync as readFileSync4 } from "node:fs";
|
|
714
|
+
import { homedir as homedir5 } from "node:os";
|
|
715
|
+
import { join as join7 } from "node:path";
|
|
716
|
+
|
|
717
|
+
// dist/src/embeddings/protocol.js
|
|
718
|
+
var DEFAULT_SOCKET_DIR = "/tmp";
|
|
719
|
+
var DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
720
|
+
var DEFAULT_CLIENT_TIMEOUT_MS = 2e3;
|
|
721
|
+
function socketPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
|
|
722
|
+
return `${dir}/hivemind-embed-${uid}.sock`;
|
|
723
|
+
}
|
|
724
|
+
function pidPathFor(uid, dir = DEFAULT_SOCKET_DIR) {
|
|
725
|
+
return `${dir}/hivemind-embed-${uid}.pid`;
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// dist/src/embeddings/client.js
|
|
729
|
+
var SHARED_DAEMON_PATH = join7(homedir5(), ".hivemind", "embed-deps", "embed-daemon.js");
|
|
730
|
+
var log3 = (m) => log("embed-client", m);
|
|
731
|
+
function getUid() {
|
|
732
|
+
const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
|
|
733
|
+
return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
|
|
734
|
+
}
|
|
735
|
+
var EmbedClient = class {
|
|
736
|
+
socketPath;
|
|
737
|
+
pidPath;
|
|
738
|
+
timeoutMs;
|
|
739
|
+
daemonEntry;
|
|
740
|
+
autoSpawn;
|
|
741
|
+
spawnWaitMs;
|
|
742
|
+
nextId = 0;
|
|
743
|
+
constructor(opts = {}) {
|
|
744
|
+
const uid = getUid();
|
|
745
|
+
const dir = opts.socketDir ?? "/tmp";
|
|
746
|
+
this.socketPath = socketPathFor(uid, dir);
|
|
747
|
+
this.pidPath = pidPathFor(uid, dir);
|
|
748
|
+
this.timeoutMs = opts.timeoutMs ?? DEFAULT_CLIENT_TIMEOUT_MS;
|
|
749
|
+
this.daemonEntry = opts.daemonEntry ?? process.env.HIVEMIND_EMBED_DAEMON ?? (existsSync4(SHARED_DAEMON_PATH) ? SHARED_DAEMON_PATH : void 0);
|
|
750
|
+
this.autoSpawn = opts.autoSpawn ?? true;
|
|
751
|
+
this.spawnWaitMs = opts.spawnWaitMs ?? 5e3;
|
|
752
|
+
}
|
|
753
|
+
/**
|
|
754
|
+
* Returns an embedding vector, or null on timeout/failure. Hooks MUST treat
|
|
755
|
+
* null as "skip embedding column" — never block the write path on us.
|
|
756
|
+
*
|
|
757
|
+
* Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
|
|
758
|
+
* null AND kicks off a background spawn. The next call finds a ready daemon.
|
|
759
|
+
*/
|
|
760
|
+
async embed(text, kind = "document") {
|
|
761
|
+
let sock;
|
|
762
|
+
try {
|
|
763
|
+
sock = await this.connectOnce();
|
|
764
|
+
} catch {
|
|
765
|
+
if (this.autoSpawn)
|
|
766
|
+
this.trySpawnDaemon();
|
|
767
|
+
return null;
|
|
768
|
+
}
|
|
769
|
+
try {
|
|
770
|
+
const id = String(++this.nextId);
|
|
771
|
+
const req = { op: "embed", id, kind, text };
|
|
772
|
+
const resp = await this.sendAndWait(sock, req);
|
|
773
|
+
if (resp.error || !("embedding" in resp) || !resp.embedding) {
|
|
774
|
+
log3(`embed err: ${resp.error ?? "no embedding"}`);
|
|
775
|
+
return null;
|
|
776
|
+
}
|
|
777
|
+
return resp.embedding;
|
|
778
|
+
} catch (e) {
|
|
779
|
+
const err = e instanceof Error ? e.message : String(e);
|
|
780
|
+
log3(`embed failed: ${err}`);
|
|
781
|
+
return null;
|
|
782
|
+
} finally {
|
|
783
|
+
try {
|
|
784
|
+
sock.end();
|
|
785
|
+
} catch {
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
/**
|
|
790
|
+
* Wait up to spawnWaitMs for the daemon to accept connections, spawning if
|
|
791
|
+
* necessary. Meant for SessionStart / long-running batches — not the hot path.
|
|
792
|
+
*/
|
|
793
|
+
async warmup() {
|
|
794
|
+
try {
|
|
795
|
+
const s = await this.connectOnce();
|
|
796
|
+
s.end();
|
|
797
|
+
return true;
|
|
798
|
+
} catch {
|
|
799
|
+
if (!this.autoSpawn)
|
|
800
|
+
return false;
|
|
801
|
+
this.trySpawnDaemon();
|
|
802
|
+
try {
|
|
803
|
+
const s = await this.waitForSocket();
|
|
804
|
+
s.end();
|
|
805
|
+
return true;
|
|
806
|
+
} catch {
|
|
807
|
+
return false;
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
connectOnce() {
|
|
812
|
+
return new Promise((resolve, reject) => {
|
|
813
|
+
const sock = connect(this.socketPath);
|
|
814
|
+
const to = setTimeout(() => {
|
|
815
|
+
sock.destroy();
|
|
816
|
+
reject(new Error("connect timeout"));
|
|
817
|
+
}, this.timeoutMs);
|
|
818
|
+
sock.once("connect", () => {
|
|
819
|
+
clearTimeout(to);
|
|
820
|
+
resolve(sock);
|
|
821
|
+
});
|
|
822
|
+
sock.once("error", (e) => {
|
|
823
|
+
clearTimeout(to);
|
|
824
|
+
reject(e);
|
|
825
|
+
});
|
|
826
|
+
});
|
|
827
|
+
}
|
|
828
|
+
trySpawnDaemon() {
|
|
829
|
+
let fd;
|
|
830
|
+
try {
|
|
831
|
+
fd = openSync2(this.pidPath, "wx", 384);
|
|
832
|
+
writeSync2(fd, String(process.pid));
|
|
833
|
+
} catch (e) {
|
|
834
|
+
if (this.isPidFileStale()) {
|
|
835
|
+
try {
|
|
836
|
+
unlinkSync2(this.pidPath);
|
|
837
|
+
} catch {
|
|
838
|
+
}
|
|
839
|
+
try {
|
|
840
|
+
fd = openSync2(this.pidPath, "wx", 384);
|
|
841
|
+
writeSync2(fd, String(process.pid));
|
|
842
|
+
} catch {
|
|
843
|
+
return;
|
|
844
|
+
}
|
|
845
|
+
} else {
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
if (!this.daemonEntry || !existsSync4(this.daemonEntry)) {
|
|
850
|
+
log3(`daemonEntry not configured or missing: ${this.daemonEntry}`);
|
|
851
|
+
try {
|
|
852
|
+
closeSync2(fd);
|
|
853
|
+
unlinkSync2(this.pidPath);
|
|
854
|
+
} catch {
|
|
855
|
+
}
|
|
856
|
+
return;
|
|
857
|
+
}
|
|
858
|
+
try {
|
|
859
|
+
const child = spawn2(process.execPath, [this.daemonEntry], {
|
|
860
|
+
detached: true,
|
|
861
|
+
stdio: "ignore",
|
|
862
|
+
env: process.env
|
|
863
|
+
});
|
|
864
|
+
child.unref();
|
|
865
|
+
log3(`spawned daemon pid=${child.pid}`);
|
|
866
|
+
} finally {
|
|
867
|
+
closeSync2(fd);
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
isPidFileStale() {
|
|
871
|
+
try {
|
|
872
|
+
const raw = readFileSync4(this.pidPath, "utf-8").trim();
|
|
873
|
+
const pid = Number(raw);
|
|
874
|
+
if (!pid || Number.isNaN(pid))
|
|
875
|
+
return true;
|
|
876
|
+
try {
|
|
877
|
+
process.kill(pid, 0);
|
|
878
|
+
return false;
|
|
879
|
+
} catch {
|
|
880
|
+
return true;
|
|
881
|
+
}
|
|
882
|
+
} catch {
|
|
883
|
+
return true;
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
async waitForSocket() {
|
|
887
|
+
const deadline = Date.now() + this.spawnWaitMs;
|
|
888
|
+
let delay = 30;
|
|
889
|
+
while (Date.now() < deadline) {
|
|
890
|
+
await sleep2(delay);
|
|
891
|
+
delay = Math.min(delay * 1.5, 300);
|
|
892
|
+
if (!existsSync4(this.socketPath))
|
|
893
|
+
continue;
|
|
894
|
+
try {
|
|
895
|
+
return await this.connectOnce();
|
|
896
|
+
} catch {
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
throw new Error("daemon did not become ready within spawnWaitMs");
|
|
900
|
+
}
|
|
901
|
+
sendAndWait(sock, req) {
|
|
902
|
+
return new Promise((resolve, reject) => {
|
|
903
|
+
let buf = "";
|
|
904
|
+
const to = setTimeout(() => {
|
|
905
|
+
sock.destroy();
|
|
906
|
+
reject(new Error("request timeout"));
|
|
907
|
+
}, this.timeoutMs);
|
|
908
|
+
sock.setEncoding("utf-8");
|
|
909
|
+
sock.on("data", (chunk) => {
|
|
910
|
+
buf += chunk;
|
|
911
|
+
const nl = buf.indexOf("\n");
|
|
912
|
+
if (nl === -1)
|
|
913
|
+
return;
|
|
914
|
+
const line = buf.slice(0, nl);
|
|
915
|
+
clearTimeout(to);
|
|
916
|
+
try {
|
|
917
|
+
resolve(JSON.parse(line));
|
|
918
|
+
} catch (e) {
|
|
919
|
+
reject(e);
|
|
920
|
+
}
|
|
921
|
+
});
|
|
922
|
+
sock.on("error", (e) => {
|
|
923
|
+
clearTimeout(to);
|
|
924
|
+
reject(e);
|
|
925
|
+
});
|
|
926
|
+
sock.on("end", () => {
|
|
927
|
+
clearTimeout(to);
|
|
928
|
+
reject(new Error("connection closed without response"));
|
|
929
|
+
});
|
|
930
|
+
sock.write(JSON.stringify(req) + "\n");
|
|
931
|
+
});
|
|
932
|
+
}
|
|
933
|
+
};
|
|
934
|
+
function sleep2(ms) {
|
|
935
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
// dist/src/embeddings/sql.js
|
|
939
|
+
function embeddingSqlLiteral(vec) {
|
|
940
|
+
if (!vec || vec.length === 0)
|
|
941
|
+
return "NULL";
|
|
942
|
+
const parts = [];
|
|
943
|
+
for (const v of vec) {
|
|
944
|
+
if (!Number.isFinite(v))
|
|
945
|
+
return "NULL";
|
|
946
|
+
parts.push(String(v));
|
|
947
|
+
}
|
|
948
|
+
return `ARRAY[${parts.join(",")}]::float4[]`;
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
// dist/src/embeddings/disable.js
|
|
952
|
+
import { createRequire } from "node:module";
|
|
953
|
+
import { homedir as homedir6 } from "node:os";
|
|
954
|
+
import { join as join8 } from "node:path";
|
|
955
|
+
import { pathToFileURL } from "node:url";
|
|
956
|
+
var cachedStatus = null;
|
|
957
|
+
function defaultResolveTransformers() {
|
|
958
|
+
try {
|
|
959
|
+
createRequire(import.meta.url).resolve("@huggingface/transformers");
|
|
960
|
+
return;
|
|
961
|
+
} catch {
|
|
962
|
+
}
|
|
963
|
+
const sharedDir = join8(homedir6(), ".hivemind", "embed-deps");
|
|
964
|
+
createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
|
|
965
|
+
}
|
|
966
|
+
var _resolve = defaultResolveTransformers;
|
|
967
|
+
function detectStatus() {
|
|
968
|
+
if (process.env.HIVEMIND_EMBEDDINGS === "false")
|
|
969
|
+
return "env-disabled";
|
|
970
|
+
try {
|
|
971
|
+
_resolve();
|
|
972
|
+
return "enabled";
|
|
973
|
+
} catch {
|
|
974
|
+
return "no-transformers";
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
function embeddingsStatus() {
|
|
978
|
+
if (cachedStatus !== null)
|
|
979
|
+
return cachedStatus;
|
|
980
|
+
cachedStatus = detectStatus();
|
|
981
|
+
return cachedStatus;
|
|
982
|
+
}
|
|
983
|
+
function embeddingsDisabled() {
|
|
984
|
+
return embeddingsStatus() !== "enabled";
|
|
985
|
+
}
|
|
986
|
+
|
|
574
987
|
// dist/src/hooks/codex/stop.js
|
|
575
|
-
var
|
|
988
|
+
var log4 = (msg) => log("codex-stop", msg);
|
|
989
|
+
function resolveEmbedDaemonPath() {
|
|
990
|
+
return join9(dirname2(fileURLToPath2(import.meta.url)), "embeddings", "embed-daemon.js");
|
|
991
|
+
}
|
|
576
992
|
var CAPTURE = process.env.HIVEMIND_CAPTURE !== "false";
|
|
577
993
|
async function main() {
|
|
578
994
|
if (process.env.HIVEMIND_WIKI_WORKER === "1")
|
|
@@ -583,7 +999,7 @@ async function main() {
|
|
|
583
999
|
return;
|
|
584
1000
|
const config = loadConfig();
|
|
585
1001
|
if (!config) {
|
|
586
|
-
|
|
1002
|
+
log4("no config");
|
|
587
1003
|
return;
|
|
588
1004
|
}
|
|
589
1005
|
if (CAPTURE) {
|
|
@@ -595,8 +1011,8 @@ async function main() {
|
|
|
595
1011
|
if (input.transcript_path) {
|
|
596
1012
|
try {
|
|
597
1013
|
const transcriptPath = input.transcript_path;
|
|
598
|
-
if (
|
|
599
|
-
const transcript =
|
|
1014
|
+
if (existsSync5(transcriptPath)) {
|
|
1015
|
+
const transcript = readFileSync5(transcriptPath, "utf-8");
|
|
600
1016
|
const lines = transcript.trim().split("\n").reverse();
|
|
601
1017
|
for (const line2 of lines) {
|
|
602
1018
|
try {
|
|
@@ -613,10 +1029,10 @@ async function main() {
|
|
|
613
1029
|
}
|
|
614
1030
|
}
|
|
615
1031
|
if (lastAssistantMessage)
|
|
616
|
-
|
|
1032
|
+
log4(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`);
|
|
617
1033
|
}
|
|
618
1034
|
} catch (e) {
|
|
619
|
-
|
|
1035
|
+
log4(`transcript read failed: ${e.message}`);
|
|
620
1036
|
}
|
|
621
1037
|
}
|
|
622
1038
|
const entry = {
|
|
@@ -635,11 +1051,13 @@ async function main() {
|
|
|
635
1051
|
const projectName = (input.cwd ?? "").split("/").pop() || "unknown";
|
|
636
1052
|
const filename = sessionPath.split("/").pop() ?? "";
|
|
637
1053
|
const jsonForSql = line.replace(/'/g, "''");
|
|
638
|
-
const
|
|
1054
|
+
const embedding = embeddingsDisabled() ? null : await new EmbedClient({ daemonEntry: resolveEmbedDaemonPath() }).embed(line, "document");
|
|
1055
|
+
const embeddingSql = embeddingSqlLiteral(embedding);
|
|
1056
|
+
const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, message_embedding, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, ${embeddingSql}, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', 'Stop', 'codex', '${ts}', '${ts}')`;
|
|
639
1057
|
await api.query(insertSql);
|
|
640
|
-
|
|
1058
|
+
log4("stop event captured");
|
|
641
1059
|
} catch (e) {
|
|
642
|
-
|
|
1060
|
+
log4(`capture failed: ${e.message}`);
|
|
643
1061
|
}
|
|
644
1062
|
}
|
|
645
1063
|
if (!CAPTURE)
|
|
@@ -658,16 +1076,16 @@ async function main() {
|
|
|
658
1076
|
reason: "Stop"
|
|
659
1077
|
});
|
|
660
1078
|
} catch (e) {
|
|
661
|
-
|
|
1079
|
+
log4(`spawn failed: ${e.message}`);
|
|
662
1080
|
try {
|
|
663
1081
|
releaseLock(sessionId);
|
|
664
1082
|
} catch (releaseErr) {
|
|
665
|
-
|
|
1083
|
+
log4(`releaseLock after spawn failure also failed: ${releaseErr.message}`);
|
|
666
1084
|
}
|
|
667
1085
|
throw e;
|
|
668
1086
|
}
|
|
669
1087
|
}
|
|
670
1088
|
main().catch((e) => {
|
|
671
|
-
|
|
1089
|
+
log4(`fatal: ${e.message}`);
|
|
672
1090
|
process.exit(0);
|
|
673
1091
|
});
|