@deeplake/hivemind 0.7.31 → 0.7.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // dist/src/hooks/hermes/wiki-worker.js
4
- import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, existsSync as existsSync3, appendFileSync as appendFileSync2, mkdirSync as mkdirSync2, rmSync } from "node:fs";
4
+ import { readFileSync as readFileSync4, writeFileSync as writeFileSync3, existsSync as existsSync4, appendFileSync as appendFileSync2, mkdirSync as mkdirSync3, rmSync } from "node:fs";
5
5
  import { execFileSync } from "node:child_process";
6
- import { dirname, join as join5 } from "node:path";
6
+ import { dirname as dirname2, join as join6 } from "node:path";
7
7
  import { fileURLToPath } from "node:url";
8
8
 
9
9
  // dist/src/hooks/summary-state.js
@@ -176,6 +176,7 @@ function getUid() {
176
176
  const uid = typeof process.getuid === "function" ? process.getuid() : void 0;
177
177
  return uid !== void 0 ? String(uid) : process.env.USER ?? "default";
178
178
  }
179
+ var _recycledStuckDaemon = false;
179
180
  var EmbedClient = class {
180
181
  socketPath;
181
182
  pidPath;
@@ -184,6 +185,7 @@ var EmbedClient = class {
184
185
  autoSpawn;
185
186
  spawnWaitMs;
186
187
  nextId = 0;
188
+ helloVerified = false;
187
189
  constructor(opts = {}) {
188
190
  const uid = getUid();
189
191
  const dir = opts.socketDir ?? "/tmp";
@@ -200,8 +202,33 @@ var EmbedClient = class {
200
202
  *
201
203
  * Fire-and-forget spawn on miss: if the daemon isn't up, this call returns
202
204
  * null AND kicks off a background spawn. The next call finds a ready daemon.
205
+ *
206
+ * Stuck-daemon recycle: if the daemon returns a transformers-missing
207
+ * error (typical after a marketplace upgrade left an older daemon process
208
+ * alive but with no node_modules accessible from its bundle path), we
209
+ * SIGTERM it and clear its sock/pid so the very next call spawns a fresh
210
+ * daemon from the current bundle. Without this, the stuck daemon would
211
+ * keep poisoning every session until its 10-minute idle-out fires.
203
212
  */
204
213
  async embed(text, kind = "document") {
214
+ const v = await this.embedAttempt(text, kind);
215
+ if (v !== "recycled")
216
+ return v;
217
+ if (!this.autoSpawn)
218
+ return null;
219
+ this.trySpawnDaemon();
220
+ await this.waitForDaemonReady();
221
+ const retry = await this.embedAttempt(text, kind);
222
+ return retry === "recycled" ? null : retry;
223
+ }
224
+ /**
225
+ * One round-trip: connect → verify → embed. Returns:
226
+ * - number[] : embedding vector (happy path)
227
+ * - null : timeout / daemon error / transformers-missing
228
+ * - "recycled": verifyDaemonOnce killed the daemon mid-call;
229
+ * caller should respawn and retry once.
230
+ */
231
+ async embedAttempt(text, kind) {
205
232
  let sock;
206
233
  try {
207
234
  sock = await this.connectOnce();
@@ -211,11 +238,19 @@ var EmbedClient = class {
211
238
  return null;
212
239
  }
213
240
  try {
241
+ const recycled = await this.verifyDaemonOnce(sock);
242
+ if (recycled) {
243
+ return "recycled";
244
+ }
214
245
  const id = String(++this.nextId);
215
246
  const req = { op: "embed", id, kind, text };
216
247
  const resp = await this.sendAndWait(sock, req);
217
248
  if (resp.error || !("embedding" in resp) || !resp.embedding) {
218
- log2(`embed err: ${resp.error ?? "no embedding"}`);
249
+ const err = resp.error ?? "no embedding";
250
+ log2(`embed err: ${err}`);
251
+ if (isTransformersMissingError(err)) {
252
+ this.handleTransformersMissing(err);
253
+ }
219
254
  return null;
220
255
  }
221
256
  return resp.embedding;
@@ -230,6 +265,123 @@ var EmbedClient = class {
230
265
  }
231
266
  }
232
267
  }
268
+ /**
269
+ * Poll for the sock file to come back after `trySpawnDaemon` — used by
270
+ * the recycle retry path. Best-effort: caps at `spawnWaitMs` and
271
+ * returns regardless so the retry attempt can run.
272
+ */
273
+ async waitForDaemonReady() {
274
+ const deadline = Date.now() + this.spawnWaitMs;
275
+ while (Date.now() < deadline) {
276
+ if (existsSync2(this.socketPath))
277
+ return;
278
+ await new Promise((r) => setTimeout(r, 50));
279
+ }
280
+ }
281
+ /**
282
+ * Send a `hello` on first successful connect per EmbedClient instance.
283
+ * If the daemon answers with a path that doesn't match our configured
284
+ * daemonEntry — typical after a marketplace upgrade replaced the bundle
285
+ * — SIGTERM the daemon + clear sock/pid so the next call spawns from the
286
+ * current bundle.
287
+ *
288
+ * `helloVerified` is set ONLY after we've seen a compatible response,
289
+ * so a transient probe failure or a recycle-triggering mismatch leaves
290
+ * the flag false; the next reconnect re-runs verification against
291
+ * whatever daemon is then live (typically the fresh spawn).
292
+ */
293
+ async verifyDaemonOnce(sock) {
294
+ if (this.helloVerified)
295
+ return false;
296
+ if (!this.daemonEntry) {
297
+ this.helloVerified = true;
298
+ return false;
299
+ }
300
+ const id = String(++this.nextId);
301
+ const req = { op: "hello", id };
302
+ let resp;
303
+ try {
304
+ resp = await this.sendAndWait(sock, req);
305
+ } catch (e) {
306
+ log2(`hello probe failed (inconclusive, will retry next connect): ${e instanceof Error ? e.message : String(e)}`);
307
+ return false;
308
+ }
309
+ const hello = resp;
310
+ if (_recycledStuckDaemon) {
311
+ return false;
312
+ }
313
+ if (!hello.daemonPath) {
314
+ _recycledStuckDaemon = true;
315
+ log2(`daemon does not implement hello (older protocol); recycling`);
316
+ this.recycleDaemon(hello.pid);
317
+ return true;
318
+ }
319
+ if (hello.daemonPath !== this.daemonEntry && !existsSync2(hello.daemonPath)) {
320
+ _recycledStuckDaemon = true;
321
+ log2(`daemon path no longer on disk \u2014 running=${hello.daemonPath} (gone) expected=${this.daemonEntry}; recycling`);
322
+ this.recycleDaemon(hello.pid);
323
+ return true;
324
+ }
325
+ this.helloVerified = true;
326
+ return false;
327
+ }
328
+ /**
329
+ * On a transformers-missing error from the daemon, SIGTERM the stuck
330
+ * daemon (the bundle daemon that can't find its deps) and clear
331
+ * sock/pid so the next call spawns fresh.
332
+ *
333
+ * Previously this also enqueued a user-visible "Hivemind embeddings
334
+ * disabled — deps missing" notification telling the user to run
335
+ * `hivemind embeddings install`. The notification was removed because
336
+ * (a) the recycle alone often fixes the issue silently, and (b) the
337
+ * warning kept stacking on top of the primary session-start banner
338
+ * which clashed with the single-slot priority model. The `detail`
339
+ * argument is retained for future telemetry / debug logging.
340
+ */
341
+ handleTransformersMissing(_detail) {
342
+ if (!_recycledStuckDaemon) {
343
+ _recycledStuckDaemon = true;
344
+ this.recycleDaemon(null);
345
+ }
346
+ }
347
+ /**
348
+ * Best-effort SIGTERM + sock/pid cleanup. Tolerant of every missing-file
349
+ * combination and dead-PID cases.
350
+ *
351
+ * Identity check: gate the SIGTERM on the daemon's socket file still
352
+ * existing. We know the daemon was alive moments ago (we either just
353
+ * got a hello response or the caller saw a transformers-missing error
354
+ * the daemon emitted), but if the socket file is gone by the time we
355
+ * try to kill, the daemon process is also gone and the PID we
356
+ * captured may already have been recycled by the OS to an unrelated
357
+ * user process. Mirrors the gate added to `killEmbedDaemon` in the
358
+ * CLI — same failure mode, rarer trigger.
359
+ */
360
+ recycleDaemon(reportedPid) {
361
+ let pid = reportedPid;
362
+ if (pid === null) {
363
+ try {
364
+ pid = Number.parseInt(readFileSync2(this.pidPath, "utf-8").trim(), 10);
365
+ } catch {
366
+ }
367
+ }
368
+ if (Number.isFinite(pid) && pid !== null && pid > 0 && existsSync2(this.socketPath)) {
369
+ try {
370
+ process.kill(pid, "SIGTERM");
371
+ } catch {
372
+ }
373
+ } else if (pid !== null) {
374
+ log2(`recycle: socket gone, skipping SIGTERM on possibly-stale pid ${pid}`);
375
+ }
376
+ try {
377
+ unlinkSync2(this.socketPath);
378
+ } catch {
379
+ }
380
+ try {
381
+ unlinkSync2(this.pidPath);
382
+ } catch {
383
+ }
384
+ }
233
385
  /**
234
386
  * Wait up to spawnWaitMs for the daemon to accept connections, spawning if
235
387
  * necessary. Meant for SessionStart / long-running batches — not the hot path.
@@ -378,26 +530,113 @@ var EmbedClient = class {
378
530
  function sleep(ms) {
379
531
  return new Promise((r) => setTimeout(r, ms));
380
532
  }
533
+ function isTransformersMissingError(err) {
534
+ if (/hivemind embeddings install/i.test(err))
535
+ return true;
536
+ return /@huggingface\/transformers/i.test(err);
537
+ }
381
538
 
382
539
  // dist/src/embeddings/disable.js
383
540
  import { createRequire } from "node:module";
384
- import { homedir as homedir4 } from "node:os";
385
- import { join as join4 } from "node:path";
541
+ import { homedir as homedir5 } from "node:os";
542
+ import { join as join5 } from "node:path";
386
543
  import { pathToFileURL } from "node:url";
544
+
545
+ // dist/src/user-config.js
546
+ import { existsSync as existsSync3, mkdirSync as mkdirSync2, readFileSync as readFileSync3, renameSync as renameSync2, writeFileSync as writeFileSync2 } from "node:fs";
547
+ import { homedir as homedir4 } from "node:os";
548
+ import { dirname, join as join4 } from "node:path";
549
+ var _configPath = () => process.env.HIVEMIND_CONFIG_PATH ?? join4(homedir4(), ".deeplake", "config.json");
550
+ var _cache = null;
551
+ var _migrated = false;
552
+ function readUserConfig() {
553
+ if (_cache !== null)
554
+ return _cache;
555
+ const path = _configPath();
556
+ if (!existsSync3(path)) {
557
+ _cache = {};
558
+ return _cache;
559
+ }
560
+ try {
561
+ const raw = readFileSync3(path, "utf-8");
562
+ const parsed = JSON.parse(raw);
563
+ _cache = isPlainObject(parsed) ? parsed : {};
564
+ } catch {
565
+ _cache = {};
566
+ }
567
+ return _cache;
568
+ }
569
+ function writeUserConfig(patch) {
570
+ const current = readUserConfig();
571
+ const merged = deepMerge(current, patch);
572
+ const path = _configPath();
573
+ const dir = dirname(path);
574
+ if (!existsSync3(dir))
575
+ mkdirSync2(dir, { recursive: true });
576
+ const tmp = `${path}.tmp.${process.pid}`;
577
+ writeFileSync2(tmp, JSON.stringify(merged, null, 2) + "\n", "utf-8");
578
+ renameSync2(tmp, path);
579
+ _cache = merged;
580
+ return merged;
581
+ }
582
+ function getEmbeddingsEnabled() {
583
+ const cfg2 = readUserConfig();
584
+ if (cfg2.embeddings && typeof cfg2.embeddings.enabled === "boolean") {
585
+ return cfg2.embeddings.enabled;
586
+ }
587
+ if (_migrated) {
588
+ return migrationValueFromEnv();
589
+ }
590
+ _migrated = true;
591
+ const enabled = migrationValueFromEnv();
592
+ try {
593
+ writeUserConfig({ embeddings: { enabled } });
594
+ } catch {
595
+ _cache = { ...cfg2 ?? {}, embeddings: { ...cfg2?.embeddings ?? {}, enabled } };
596
+ }
597
+ return enabled;
598
+ }
599
+ function migrationValueFromEnv() {
600
+ const raw = process.env.HIVEMIND_EMBEDDINGS;
601
+ if (raw === void 0)
602
+ return false;
603
+ if (raw === "false")
604
+ return false;
605
+ return true;
606
+ }
607
+ function isPlainObject(value) {
608
+ return typeof value === "object" && value !== null && !Array.isArray(value);
609
+ }
610
+ function deepMerge(base, patch) {
611
+ const out = { ...base };
612
+ for (const key of Object.keys(patch)) {
613
+ const patchVal = patch[key];
614
+ const baseVal = base[key];
615
+ if (isPlainObject(patchVal) && isPlainObject(baseVal)) {
616
+ out[key] = { ...baseVal, ...patchVal };
617
+ } else if (patchVal !== void 0) {
618
+ out[key] = patchVal;
619
+ }
620
+ }
621
+ return out;
622
+ }
623
+
624
+ // dist/src/embeddings/disable.js
387
625
  var cachedStatus = null;
388
626
  function defaultResolveTransformers() {
627
+ const sharedDir = join5(homedir5(), ".hivemind", "embed-deps");
389
628
  try {
390
- createRequire(import.meta.url).resolve("@huggingface/transformers");
629
+ createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
391
630
  return;
392
631
  } catch {
393
632
  }
394
- const sharedDir = join4(homedir4(), ".hivemind", "embed-deps");
395
- createRequire(pathToFileURL(`${sharedDir}/`).href).resolve("@huggingface/transformers");
633
+ createRequire(import.meta.url).resolve("@huggingface/transformers");
396
634
  }
397
635
  var _resolve = defaultResolveTransformers;
636
+ var _readEnabled = getEmbeddingsEnabled;
398
637
  function detectStatus() {
399
- if (process.env.HIVEMIND_EMBEDDINGS === "false")
400
- return "env-disabled";
638
+ if (!_readEnabled())
639
+ return "user-disabled";
401
640
  try {
402
641
  _resolve();
403
642
  return "enabled";
@@ -426,13 +665,13 @@ function deeplakeClientHeader() {
426
665
 
427
666
  // dist/src/hooks/hermes/wiki-worker.js
428
667
  var dlog2 = (msg) => log("hermes-wiki-worker", msg);
429
- var cfg = JSON.parse(readFileSync3(process.argv[2], "utf-8"));
668
+ var cfg = JSON.parse(readFileSync4(process.argv[2], "utf-8"));
430
669
  var tmpDir = cfg.tmpDir;
431
- var tmpJsonl = join5(tmpDir, "session.jsonl");
432
- var tmpSummary = join5(tmpDir, "summary.md");
670
+ var tmpJsonl = join6(tmpDir, "session.jsonl");
671
+ var tmpSummary = join6(tmpDir, "summary.md");
433
672
  function wlog(msg) {
434
673
  try {
435
- mkdirSync2(cfg.hooksDir, { recursive: true });
674
+ mkdirSync3(cfg.hooksDir, { recursive: true });
436
675
  appendFileSync2(cfg.wikiLog, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] wiki-worker(${cfg.sessionId}): ${msg}
437
676
  `);
438
677
  } catch {
@@ -490,7 +729,7 @@ async function main() {
490
729
  const jsonlLines = rows.length;
491
730
  const pathRows = await query(`SELECT DISTINCT path FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc2(`/sessions/%${cfg.sessionId}%`)}' LIMIT 1`);
492
731
  const jsonlServerPath = pathRows.length > 0 ? pathRows[0].path : `/sessions/unknown/${cfg.sessionId}.jsonl`;
493
- writeFileSync2(tmpJsonl, jsonlContent);
732
+ writeFileSync3(tmpJsonl, jsonlContent);
494
733
  wlog(`found ${jsonlLines} events at ${jsonlServerPath}`);
495
734
  let prevOffset = 0;
496
735
  try {
@@ -500,7 +739,7 @@ async function main() {
500
739
  const match = existing.match(/\*\*JSONL offset\*\*:\s*(\d+)/);
501
740
  if (match)
502
741
  prevOffset = parseInt(match[1], 10);
503
- writeFileSync2(tmpSummary, existing);
742
+ writeFileSync3(tmpSummary, existing);
504
743
  wlog(`existing summary found, offset=${prevOffset}`);
505
744
  }
506
745
  } catch {
@@ -526,15 +765,15 @@ async function main() {
526
765
  } catch (e) {
527
766
  wlog(`hermes -z failed: ${e.status ?? e.message}`);
528
767
  }
529
- if (existsSync3(tmpSummary)) {
530
- const text = readFileSync3(tmpSummary, "utf-8");
768
+ if (existsSync4(tmpSummary)) {
769
+ const text = readFileSync4(tmpSummary, "utf-8");
531
770
  if (text.trim()) {
532
771
  const fname = `${cfg.sessionId}.md`;
533
772
  const vpath = `/summaries/${cfg.userName}/${fname}`;
534
773
  let embedding = null;
535
774
  if (!embeddingsDisabled()) {
536
775
  try {
537
- const daemonEntry = join5(dirname(fileURLToPath(import.meta.url)), "embeddings", "embed-daemon.js");
776
+ const daemonEntry = join6(dirname2(fileURLToPath(import.meta.url)), "embeddings", "embed-daemon.js");
538
777
  embedding = await new EmbedClient({ daemonEntry }).embed(text, "document");
539
778
  } catch (e) {
540
779
  wlog(`summary embedding failed, writing NULL: ${e.message}`);