@deeplake/hivemind 0.7.34 → 0.7.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -317,6 +317,9 @@ ${s.body}
317
317
  import { randomUUID } from "node:crypto";
318
318
 
319
319
  // dist/src/utils/sql.js
320
+ function sqlStr(value) {
321
+ return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
322
+ }
320
323
  function sqlIdent(name) {
321
324
  if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) {
322
325
  throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`);
@@ -324,29 +327,142 @@ function sqlIdent(name) {
324
327
  return name;
325
328
  }
326
329
 
327
- // dist/src/skillify/skills-table.js
328
- function createSkillsTableSql(tableName) {
329
- const safe = sqlIdent(tableName);
330
- return `CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', contributors TEXT NOT NULL DEFAULT '[]', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`;
330
+ // dist/src/deeplake-schema.js
331
+ var MEMORY_COLUMNS = Object.freeze([
332
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
333
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
334
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
335
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
336
+ { name: "summary_embedding", sql: "FLOAT4[]" },
337
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
338
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
339
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
340
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
341
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
342
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
343
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
344
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
345
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
346
+ ]);
347
+ var SESSIONS_COLUMNS = Object.freeze([
348
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
349
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
350
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
351
+ { name: "message", sql: "JSONB" },
352
+ { name: "message_embedding", sql: "FLOAT4[]" },
353
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
354
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
355
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
356
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
357
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
358
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
359
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
360
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
361
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
362
+ ]);
363
+ var SKILLS_COLUMNS = Object.freeze([
364
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
365
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
366
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
367
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
368
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
369
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
370
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
371
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
372
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
373
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
374
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
375
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
376
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
377
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
378
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
379
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
380
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
381
+ ]);
382
+ function validateSchema(label, cols) {
383
+ const seen = /* @__PURE__ */ new Set();
384
+ for (const col of cols) {
385
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
386
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
387
+ }
388
+ if (seen.has(col.name)) {
389
+ throw new Error(`${label}: duplicate column "${col.name}"`);
390
+ }
391
+ seen.add(col.name);
392
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
393
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
394
+ if (notNull && !hasDefault) {
395
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
396
+ }
397
+ }
331
398
  }
332
- function addContributorsColumnSql(tableName) {
399
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
400
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
401
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
402
+ function buildCreateTableSql(tableName, cols) {
333
403
  const safe = sqlIdent(tableName);
334
- return `ALTER TABLE "${safe}" ADD COLUMN IF NOT EXISTS contributors TEXT NOT NULL DEFAULT '[]'`;
404
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
405
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
335
406
  }
336
- function esc(s) {
337
- return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
407
+ function buildIntrospectionSql(tableName, workspaceId) {
408
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
409
+ }
410
+ async function healMissingColumns(args) {
411
+ const safeTable = sqlIdent(args.tableName);
412
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
413
+ const rows = await args.query(introspectSql);
414
+ const existing = /* @__PURE__ */ new Set();
415
+ for (const row of rows) {
416
+ const v = row?.column_name;
417
+ if (typeof v === "string")
418
+ existing.add(v.toLowerCase());
419
+ }
420
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
421
+ const missing = missingCols.map((c) => c.name);
422
+ if (missingCols.length === 0)
423
+ return { missing, altered: [] };
424
+ const altered = [];
425
+ for (const col of missingCols) {
426
+ try {
427
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
428
+ altered.push(col.name);
429
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
430
+ } catch (e) {
431
+ const msg = e instanceof Error ? e.message : String(e);
432
+ if (!/already exists/i.test(msg))
433
+ throw e;
434
+ const recheck = await args.query(introspectSql);
435
+ const present = recheck.some((r) => {
436
+ const v = r?.column_name;
437
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
438
+ });
439
+ if (!present)
440
+ throw e;
441
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
442
+ }
443
+ }
444
+ return { missing, altered };
338
445
  }
339
446
  function isMissingTableError(message) {
340
447
  if (!message)
341
448
  return false;
449
+ if (/permission denied|must be owner/i.test(message))
450
+ return false;
342
451
  if (/\bcolumn\b/i.test(message))
343
452
  return false;
344
453
  return /Table does not exist|relation .* does not exist|no such table/i.test(message);
345
454
  }
346
- function isMissingContributorsColumnError(message) {
455
+ function isMissingColumnError(message) {
347
456
  if (!message)
348
457
  return false;
349
- return /contributors.*(?:does not exist|not found|unknown)/i.test(message) || /(?:does not exist|unknown column).*contributors/i.test(message);
458
+ if (/permission denied|must be owner/i.test(message))
459
+ return false;
460
+ return /column ["']?[A-Za-z_][A-Za-z0-9_]*["']? .*does not exist/i.test(message) || /unknown column/i.test(message) || /no such column/i.test(message);
461
+ }
462
+
463
+ // dist/src/skillify/skills-table.js
464
+ function esc(s) {
465
+ return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
350
466
  }
351
467
  async function insertSkillRow(args) {
352
468
  const id = args.id ?? randomUUID();
@@ -355,14 +471,29 @@ async function insertSkillRow(args) {
355
471
  const sql = `INSERT INTO "${sqlIdent(args.tableName)}" (id, name, project, project_key, local_path, install, source_sessions, source_agent, scope, author, contributors, description, trigger_text, body, version, created_at, updated_at) VALUES ('${esc(id)}', '${esc(args.name)}', '${esc(args.project)}', '${esc(args.projectKey)}', '${esc(args.localPath)}', '${esc(args.install)}', '${esc(sourceSessionsJson)}', '${esc(args.sourceAgent)}', '${esc(args.scope)}', '${esc(args.author)}', '${esc(contributorsJson)}', '${esc(args.description)}', '${esc(args.trigger ?? "")}', '${esc(args.body)}', ${args.version}, '${esc(args.createdAt)}', '${esc(args.updatedAt)}')`;
356
472
  try {
357
473
  await args.query(sql);
474
+ return;
358
475
  } catch (e) {
359
- if (isMissingTableError(e?.message)) {
360
- await args.query(createSkillsTableSql(args.tableName));
476
+ const msg = e instanceof Error ? e.message : String(e);
477
+ if (isMissingTableError(msg)) {
478
+ await args.query(buildCreateTableSql(args.tableName, SKILLS_COLUMNS));
479
+ await healMissingColumns({
480
+ query: args.query,
481
+ tableName: args.tableName,
482
+ workspaceId: args.workspaceId,
483
+ columns: SKILLS_COLUMNS
484
+ });
361
485
  await args.query(sql);
362
486
  return;
363
487
  }
364
- if (isMissingContributorsColumnError(e?.message)) {
365
- await args.query(addContributorsColumnSql(args.tableName));
488
+ if (isMissingColumnError(msg)) {
489
+ const result = await healMissingColumns({
490
+ query: args.query,
491
+ tableName: args.tableName,
492
+ workspaceId: args.workspaceId,
493
+ columns: SKILLS_COLUMNS
494
+ });
495
+ if (result.missing.length === 0)
496
+ throw e;
366
497
  await args.query(sql);
367
498
  return;
368
499
  }
@@ -1002,6 +1133,7 @@ async function main() {
1002
1133
  await insertSkillRow({
1003
1134
  query,
1004
1135
  tableName: cfg.skillsTable,
1136
+ workspaceId: cfg.workspaceId,
1005
1137
  name: verdict2.name,
1006
1138
  project: cfg.project,
1007
1139
  projectKey: cfg.projectKey,
@@ -52,5 +52,5 @@
52
52
  }
53
53
  }
54
54
  },
55
- "version": "0.7.34"
55
+ "version": "0.7.36"
56
56
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hivemind",
3
- "version": "0.7.34",
3
+ "version": "0.7.36",
4
4
  "type": "module",
5
5
  "description": "Hivemind — cloud-backed persistent shared memory for AI agents, powered by DeepLake",
6
6
  "license": "Apache-2.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@deeplake/hivemind",
3
- "version": "0.7.34",
3
+ "version": "0.7.36",
4
4
  "description": "Cloud-backed persistent shared memory for AI agents powered by Deeplake",
5
5
  "type": "module",
6
6
  "repository": {
@@ -26,7 +26,7 @@
26
26
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
27
27
  import {
28
28
  readFileSync, existsSync, appendFileSync, mkdirSync, writeFileSync,
29
- openSync, closeSync, renameSync, readdirSync, statSync, unlinkSync,
29
+ openSync, closeSync, writeSync, renameSync, readdirSync, statSync, unlinkSync,
30
30
  constants as fsConstants,
31
31
  } from "node:fs";
32
32
  import { homedir, tmpdir } from "node:os";
@@ -164,51 +164,204 @@ async function dlQuery(creds: Creds, sql: string): Promise<unknown[]> {
164
164
  // Pi avoids importing EmbedClient (which is bundled into other agents but
165
165
  // here would break the "raw .ts, zero deps" promise of pi extensions).
166
166
  // Instead we open a Unix socket directly to the daemon at the same well-known
167
- // path EmbedClient uses. If the socket isn't there yet, we spawn the
168
- // canonical daemon at ~/.hivemind/embed-deps/embed-daemon.js (deposited by
169
- // `hivemind embeddings install`) and wait for it to listen, mirroring the
170
- // auto-spawn-on-miss logic in src/embeddings/client.ts. Subsequent agents
171
- // (codex, CC, cursor, hermes, …) connect to the SAME daemon — pi pays the
172
- // cold-start cost only when it's the first user on the box.
167
+ // path EmbedClient uses. If the socket isn't there yet AND the canonical
168
+ // daemon binary exists at ~/.hivemind/embed-deps/embed-daemon.js (deposited
169
+ // by `hivemind embeddings install`), we spawn it under an O_EXCL pidfile
170
+ // lock and wait for it to listen. Subsequent agents (codex, CC, cursor,
171
+ // hermes, …) connect to the SAME daemon — pi pays the cold-start cost only
172
+ // when it's the first user on the box. This logic matches the source-tree
173
+ // helper at src/embeddings/standalone-embed-client.ts (kept in lockstep:
174
+ // the unit tests there cover the 11 edge cases mirrored here).
173
175
  //
174
176
  // Graceful fallback: any failure → return null → caller writes NULL into
175
- // message_embedding. Embedding is never on the critical path.
177
+ // message_embedding. Embedding is NEVER on the critical path; pi must keep
178
+ // working when the daemon is unreachable.
176
179
 
177
180
  const EMBED_DAEMON_ENTRY = join(homedir(), ".hivemind", "embed-deps", "embed-daemon.js");
178
- const EMBED_SOCKET_PATH = (() => {
179
- const uid = typeof process.getuid === "function" ? String(process.getuid()) : (process.env.USER ?? "default");
180
- return `/tmp/hivemind-embed-${uid}.sock`;
181
- })();
181
+ // `process.env.USER` removed as a fallback: even though pi doesn't go
182
+ // through ClawHub static-scan, we keep the source in lockstep with
183
+ // src/embeddings/standalone-embed-client.ts (which DOES) so the two
184
+ // implementations stay byte-identical. On Linux/macOS `process.getuid`
185
+ // is always present; "default" is a fine sentinel elsewhere.
186
+ const EMBED_UID = typeof process.getuid === "function" ? String(process.getuid()) : "default";
187
+ const EMBED_SOCKET_PATH = `/tmp/hivemind-embed-${EMBED_UID}.sock`;
188
+ const EMBED_PID_PATH = `/tmp/hivemind-embed-${EMBED_UID}.pid`;
189
+
190
+ function isPidAlive(pid: number): boolean {
191
+ if (!Number.isFinite(pid) || pid <= 0) return false;
192
+ try { process.kill(pid, 0); return true; } catch { return false; }
193
+ }
194
+
195
+ // Three-state read: "empty" means the file exists but hasn't been
196
+ // written yet — another caller is mid-spawn between openSync(wx) and
197
+ // writeSync(pid). Treating that as stale lets two racing callers each
198
+ // spawn a daemon, the second crashing on bind(). Mirrors
199
+ // src/embeddings/standalone-embed-client.ts:readPidFile.
200
+ function readPidFileInline(path: string): number | "empty" | null {
201
+ let raw: string;
202
+ try { raw = readFileSync(path, "utf-8").trim(); } catch { return null; }
203
+ if (raw === "") return "empty";
204
+ const pid = Number(raw);
205
+ if (!pid || Number.isNaN(pid)) return null;
206
+ return pid;
207
+ }
182
208
 
183
- function tryEmbedOverSocket(text: string, kind: "document" | "query"): Promise<number[] | null> {
209
+ function connectDaemonOnce(timeoutMs: number): Promise<ReturnType<typeof connect> | null> {
184
210
  return new Promise((resolve) => {
185
- let resolved = false;
186
- const settle = (v: number[] | null) => { if (!resolved) { resolved = true; resolve(v); } };
187
211
  const sock = connect(EMBED_SOCKET_PATH);
188
- let buf = "";
189
- const timer = setTimeout(() => { sock.destroy(); settle(null); }, 5000);
190
- sock.on("connect", () => {
191
- // Protocol shape comes from src/embeddings/protocol.ts: {op, id, kind, text}.
192
- // id is a string ("1"), not a number, and the verb field is "op" not "type".
193
- sock.write(JSON.stringify({ op: "embed", id: "1", kind, text }) + "\n");
212
+ const to = setTimeout(() => { try { sock.destroy(); } catch { /* */ } resolve(null); }, timeoutMs);
213
+ sock.once("connect", () => { clearTimeout(to); resolve(sock); });
214
+ sock.once("error", () => { clearTimeout(to); resolve(null); });
215
+ });
216
+ }
217
+
218
+ /**
219
+ * Spawn the canonical daemon under an O_EXCL pidfile lock. Returns true
220
+ * if THIS pi turn owns the spawn. Mirrors the helper in
221
+ * src/embeddings/standalone-embed-client.ts:
222
+ * - live pidfile owner (case 6/7) → don't SIGTERM (PID-reuse risk from PR #168), let caller wait
223
+ * - dead/garbage pidfile (case 5) → cleanup + spawn
224
+ * - spawn() throws (case 8) → roll pidfile back so the next turn can retry
225
+ */
226
+ function trySpawnDaemonInline(): boolean {
227
+ let fd: number;
228
+ try {
229
+ fd = openSync(EMBED_PID_PATH, "wx", 0o600);
230
+ // Write the placeholder PID through the open fd. The previous version
231
+ // used writeFileSync(path, ...) which races with concurrent unlink +
232
+ // re-open elsewhere — it could overwrite another caller's pidfile
233
+ // entirely. writeSync(fd, ...) writes to OUR fd only.
234
+ writeSync(fd, String(process.pid));
235
+ } catch {
236
+ const existing = readPidFileInline(EMBED_PID_PATH);
237
+ // Empty file: another caller won openSync(wx) but hasn't written its
238
+ // PID yet. We MUST NOT unlink + respawn — that lets us race past
239
+ // the legitimate writer and spawn a duplicate daemon. Wait instead.
240
+ if (existing === "empty") return false;
241
+ if (existing !== null && isPidAlive(existing)) {
242
+ // Live owner: another agent / pi turn is bringing the daemon up. Wait.
243
+ return false;
244
+ }
245
+ try { unlinkSync(EMBED_PID_PATH); } catch { /* */ }
246
+ try {
247
+ fd = openSync(EMBED_PID_PATH, "wx", 0o600);
248
+ writeSync(fd, String(process.pid));
249
+ } catch {
250
+ return false; // sub-ms race: another caller claimed it between our unlink and reopen
251
+ }
252
+ }
253
+ try {
254
+ // No explicit `env: process.env` — it's the spawn default, and a
255
+ // literal `process.env` reference in source kept in lockstep with
256
+ // src/embeddings/standalone-embed-client.ts (which DOES go through
257
+ // ClawHub static-scan from the openclaw bundle).
258
+ const child = spawn(process.execPath, [EMBED_DAEMON_ENTRY], {
259
+ detached: true,
260
+ stdio: "ignore",
194
261
  });
262
+ child.unref();
263
+ logHm(`embed: spawned daemon pid=${child.pid}`);
264
+ return true;
265
+ } catch (e: any) {
266
+ logHm(`embed: spawn failed: ${e?.message ?? e}`);
267
+ try { unlinkSync(EMBED_PID_PATH); } catch { /* */ }
268
+ return false;
269
+ } finally {
270
+ try { closeSync(fd); } catch { /* */ }
271
+ }
272
+ }
273
+
274
+ // After a spawnWaitMs timeout with daemon never opening socket, the
275
+ // pidfile still holds OUR placeholder PID. Every subsequent pi turn
276
+ // would see "live owner" (we're still running) and wait forever instead
277
+ // of retrying the spawn. Clean up the placeholder, but only if it's
278
+ // still ours — the daemon may have already overwritten it.
279
+ //
280
+ // Also clears an empty pidfile: if a prior pi turn was SIGKILL'd
281
+ // between openSync(wx) and writeSync(pid), the empty file would persist
282
+ // and every later turn would wait forever. By the time we hit this
283
+ // cleanup we've waited 5s — orders of magnitude longer than the
284
+ // legitimate openSync→writeSync gap.
285
+ function maybeCleanupOwnPlaceholderInline(): void {
286
+ const existing = readPidFileInline(EMBED_PID_PATH);
287
+ if (existing === process.pid || existing === "empty") {
288
+ try { unlinkSync(EMBED_PID_PATH); } catch { /* already gone */ }
289
+ }
290
+ }
291
+
292
+ async function sendEmbedRequest(sock: ReturnType<typeof connect>, text: string, kind: "document" | "query", timeoutMs: number): Promise<number[] | null> {
293
+ return new Promise((resolve) => {
294
+ let resolved = false;
295
+ const settle = (v: number[] | null) => { if (!resolved) { resolved = true; resolve(v); try { sock.destroy(); } catch { /* */ } } };
296
+ let buf = "";
297
+ const timer = setTimeout(() => settle(null), timeoutMs);
195
298
  sock.on("data", (chunk: Buffer) => {
196
299
  buf += chunk.toString("utf-8");
197
300
  const nl = buf.indexOf("\n");
198
- if (nl !== -1) {
199
- clearTimeout(timer);
200
- try {
201
- const resp = JSON.parse(buf.slice(0, nl));
202
- settle(Array.isArray(resp.embedding) ? resp.embedding : null);
203
- } catch { settle(null); }
204
- sock.destroy();
205
- }
301
+ if (nl === -1) return;
302
+ clearTimeout(timer);
303
+ try {
304
+ const resp = JSON.parse(buf.slice(0, nl));
305
+ // Daemon may return `{ error: "unknown op" }` from an older protocol — graceful NULL.
306
+ if (!Array.isArray(resp.embedding)) return settle(null);
307
+ // JSON-over-socket is untrusted at runtime. Reject any non-finite
308
+ // element (string, null, NaN, Infinity, object). Without this, a
309
+ // misbehaving daemon could ship bad values that flow into the
310
+ // ARRAY[...]::FLOAT4[] SQL literal.
311
+ for (const v of resp.embedding) {
312
+ if (typeof v !== "number" || !Number.isFinite(v)) return settle(null);
313
+ }
314
+ settle(resp.embedding);
315
+ } catch { settle(null); }
206
316
  });
207
317
  sock.on("error", () => { clearTimeout(timer); settle(null); });
208
318
  sock.on("close", () => { clearTimeout(timer); settle(null); });
319
+ // Protocol shape comes from src/embeddings/protocol.ts: { op, id, kind, text }.
320
+ // id is a string ("1"), not a number, and the verb field is "op" not "type".
321
+ sock.write(JSON.stringify({ op: "embed", id: "1", kind, text }) + "\n");
209
322
  });
210
323
  }
211
324
 
325
+ /**
326
+ * Full spawn-on-miss embedding flow. Returns null on any failure; never
327
+ * throws. 11 edge cases mirror the unit tests in
328
+ * tests/shared/standalone-embed-client.test.ts.
329
+ */
330
+ async function tryEmbedOverSocket(text: string, kind: "document" | "query"): Promise<number[] | null> {
331
+ // Case 3 — happy path: socket alive, daemon ready.
332
+ let sock = await connectDaemonOnce(1000);
333
+ if (!sock) {
334
+ // Case 1 — binary missing: never spawn.
335
+ if (!existsSync(EMBED_DAEMON_ENTRY)) {
336
+ logHm(`embed: no daemon at ${EMBED_DAEMON_ENTRY} — run 'hivemind embeddings install'`);
337
+ return null;
338
+ }
339
+ // Cases 2 / 4 / 5 / 7 / 8 — trySpawn handles them; loser waits.
340
+ trySpawnDaemonInline();
341
+ // Case 9 — poll for socket up to 5s.
342
+ const deadline = Date.now() + 5000;
343
+ let delay = 30;
344
+ while (Date.now() < deadline) {
345
+ await new Promise(r => setTimeout(r, delay));
346
+ delay = Math.min(delay * 1.5, 300);
347
+ if (!existsSync(EMBED_SOCKET_PATH)) continue;
348
+ sock = await connectDaemonOnce(1000);
349
+ if (sock) break;
350
+ }
351
+ if (!sock) {
352
+ // Clean up our placeholder PID so the next pi turn can retry the
353
+ // spawn instead of waiting on us forever.
354
+ maybeCleanupOwnPlaceholderInline();
355
+ logHm(`embed: daemon never opened socket within 5s`);
356
+ return null;
357
+ }
358
+ }
359
+ // Cases 10 / 11 — request timeout / daemon error → null.
360
+ const v = await sendEmbedRequest(sock, text, kind, 5000);
361
+ if (v === null) logHm(`embed: daemon returned null (timeout or error)`);
362
+ return v;
363
+ }
364
+
212
365
  // ---------- summary state + wiki-worker spawn ---------------------------------
213
366
  //
214
367
  // Mirror of src/hooks/summary-state.ts (same dir, same JSON shape, shared
@@ -569,39 +722,12 @@ async function embed(text: string): Promise<number[] | null> {
569
722
  logHm(`embed: skipped (empty text)`);
570
723
  return null;
571
724
  }
572
- // 1) socket already up (another agent or us in a previous turn) → fast path
573
- let v = await tryEmbedOverSocket(text, "document");
574
- if (v !== null) {
575
- logHm(`embed: ok via existing socket (dims=${v.length})`);
576
- return v;
577
- }
578
- // 2) no daemon binary deposited → fallback NULL
579
- if (!existsSync(EMBED_DAEMON_ENTRY)) {
580
- logHm(`embed: no daemon at ${EMBED_DAEMON_ENTRY} — run 'hivemind embeddings install'`);
581
- return null;
582
- }
583
- // 3) spawn the canonical daemon detached; daemon's own pidfile lock guards
584
- // against double-spawn if multiple pi turns race.
585
- logHm(`embed: spawning daemon at ${EMBED_DAEMON_ENTRY}`);
586
- try {
587
- spawn(process.execPath, [EMBED_DAEMON_ENTRY], { detached: true, stdio: "ignore" }).unref();
588
- } catch (e: any) {
589
- logHm(`embed: spawn failed: ${e?.message ?? e}`);
590
- return null;
591
- }
592
- // 4) poll for the socket up to ~5s, then retry the embed once
593
- for (let i = 0; i < 25; i++) {
594
- await new Promise(r => setTimeout(r, 200));
595
- if (existsSync(EMBED_SOCKET_PATH)) {
596
- v = await tryEmbedOverSocket(text, "document");
597
- if (v !== null) {
598
- logHm(`embed: ok after spawn (dims=${v.length}, polls=${i + 1})`);
599
- return v;
600
- }
601
- }
602
- }
603
- logHm(`embed: timed out after spawn (5s)`);
604
- return null;
725
+ // Single round-trip: tryEmbedOverSocket spawns the daemon on miss
726
+ // (O_EXCL race-safe, mirrors src/embeddings/standalone-embed-client.ts)
727
+ // and embeds in one call. Returns null on any failure.
728
+ const v = await tryEmbedOverSocket(text, "document");
729
+ if (v !== null) logHm(`embed: ok (dims=${v.length})`);
730
+ return v;
605
731
  }
606
732
 
607
733
  function embedSqlLiteral(emb: number[] | null): string {