@deeplake/hivemind 0.7.35 → 0.7.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -158,7 +158,123 @@ function sqlIdent(name) {
158
158
 
159
159
  // dist/src/embeddings/columns.js
160
160
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
161
- var MESSAGE_EMBEDDING_COL = "message_embedding";
161
+
162
+ // dist/src/deeplake-schema.js
163
+ var MEMORY_COLUMNS = Object.freeze([
164
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
165
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
166
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
167
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
168
+ { name: "summary_embedding", sql: "FLOAT4[]" },
169
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
170
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
171
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
172
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
173
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
174
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
175
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
176
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
177
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
178
+ ]);
179
+ var SESSIONS_COLUMNS = Object.freeze([
180
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
181
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
182
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
183
+ { name: "message", sql: "JSONB" },
184
+ { name: "message_embedding", sql: "FLOAT4[]" },
185
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
186
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
187
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
188
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
189
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
190
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
191
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
192
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
193
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
194
+ ]);
195
+ var SKILLS_COLUMNS = Object.freeze([
196
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
197
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
198
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
199
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
200
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
201
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
202
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
203
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
204
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
205
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
206
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
207
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
208
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
209
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
210
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
211
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
212
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
213
+ ]);
214
+ function validateSchema(label, cols) {
215
+ const seen = /* @__PURE__ */ new Set();
216
+ for (const col of cols) {
217
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
218
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
219
+ }
220
+ if (seen.has(col.name)) {
221
+ throw new Error(`${label}: duplicate column "${col.name}"`);
222
+ }
223
+ seen.add(col.name);
224
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
225
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
226
+ if (notNull && !hasDefault) {
227
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
228
+ }
229
+ }
230
+ }
231
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
232
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
233
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
234
+ function buildCreateTableSql(tableName, cols) {
235
+ const safe = sqlIdent(tableName);
236
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
237
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
238
+ }
239
+ function buildIntrospectionSql(tableName, workspaceId) {
240
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
241
+ }
242
+ async function healMissingColumns(args) {
243
+ const safeTable = sqlIdent(args.tableName);
244
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
245
+ const rows = await args.query(introspectSql);
246
+ const existing = /* @__PURE__ */ new Set();
247
+ for (const row of rows) {
248
+ const v = row?.column_name;
249
+ if (typeof v === "string")
250
+ existing.add(v.toLowerCase());
251
+ }
252
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
253
+ const missing = missingCols.map((c) => c.name);
254
+ if (missingCols.length === 0)
255
+ return { missing, altered: [] };
256
+ const altered = [];
257
+ for (const col of missingCols) {
258
+ try {
259
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
260
+ altered.push(col.name);
261
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
262
+ } catch (e) {
263
+ const msg = e instanceof Error ? e.message : String(e);
264
+ if (!/already exists/i.test(msg))
265
+ throw e;
266
+ const recheck = await args.query(introspectSql);
267
+ const present = recheck.some((r) => {
268
+ const v = r?.column_name;
269
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
270
+ });
271
+ if (!present)
272
+ throw e;
273
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
274
+ }
275
+ }
276
+ return { missing, altered };
277
+ }
162
278
 
163
279
  // dist/src/notifications/queue.js
164
280
  import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, renameSync, mkdirSync as mkdirSync2, openSync, closeSync, unlinkSync as unlinkSync2, statSync } from "node:fs";
@@ -524,64 +640,33 @@ var DeeplakeApi = class {
524
640
  }
525
641
  }
526
642
  /**
527
- * Ensure a vector column exists on the given table.
528
- *
529
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
530
- * EXISTS …` on every SessionStart. On a long-running workspace that's
531
- * already migrated, every call returns 500 "Column already exists" — noisy
532
- * in the log and a wasted round-trip. Worse, the very first call after the
533
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
534
- * window (~30s) during which subsequent INSERTs fail; minimising the
535
- * number of ALTER calls minimises exposure to that window.
643
+ * Heal any missing columns on a table so it matches one of the schema
644
+ * definitions in `deeplake-schema.ts`. One SELECT against
645
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
646
+ * only the genuinely missing ones never blanket, never `IF NOT
647
+ * EXISTS`.
536
648
  *
537
- * New flow:
538
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
539
- * return zero network calls.
540
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
541
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
542
- * bug. If the column is present mark + return.
543
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
544
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
545
- * "already exists" (race: another client added it between our SELECT
546
- * and ALTER).
547
- *
548
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
549
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
649
+ * History: an earlier path used a local marker file (`col_<name>` under
650
+ * the index-marker dir) to skip even the SELECT after the first
651
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
652
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
653
+ * because Deeplake used to expose a ~30s post-ALTER bug where
654
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
655
+ * minimum. The bug was re-verified on 2026-05-18 against
656
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
657
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
658
+ * + targeted ALTER pattern survives the marker removal because: each
659
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
660
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
550
661
  */
551
- async ensureEmbeddingColumn(table, column) {
552
- await this.ensureColumn(table, column, "FLOAT4[]");
553
- }
554
- /**
555
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
556
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
557
- * column that was added to the schema after the table was originally
558
- * created. Used today for `summary_embedding`, `message_embedding`, and
559
- * the `agent` column (added 2026-04-11) — the latter has no fallback if
560
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
561
- * with `column "agent" does not exist`.
562
- */
563
- async ensureColumn(table, column, sqlType) {
564
- const markers = await getIndexMarkerStore();
565
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
566
- if (markers.hasFreshIndexMarker(markerPath))
567
- return;
568
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
569
- const rows = await this.query(colCheck);
570
- if (rows.length > 0) {
571
- markers.writeIndexMarker(markerPath);
572
- return;
573
- }
574
- try {
575
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
576
- } catch (e) {
577
- const msg = e instanceof Error ? e.message : String(e);
578
- if (!/already exists/i.test(msg))
579
- throw e;
580
- const recheck = await this.query(colCheck);
581
- if (recheck.length === 0)
582
- throw e;
583
- }
584
- markers.writeIndexMarker(markerPath);
662
+ async healSchema(table, columns) {
663
+ await healMissingColumns({
664
+ query: (sql) => this.query(sql),
665
+ tableName: table,
666
+ workspaceId: this.workspaceId,
667
+ columns,
668
+ log: log3
669
+ });
585
670
  }
586
671
  /** List all tables in the workspace (with retry). */
587
672
  async listTables(forceRefresh = false) {
@@ -652,20 +737,21 @@ var DeeplakeApi = class {
652
737
  }
653
738
  throw lastErr;
654
739
  }
655
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
740
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
656
741
  async ensureTable(name) {
742
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
743
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
744
+ }
657
745
  const tbl = sqlIdent(name ?? this.tableName);
658
746
  const tables = await this.listTables();
659
747
  if (!tables.includes(tbl)) {
660
748
  log3(`table "${tbl}" not found, creating`);
661
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
749
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
662
750
  log3(`table "${tbl}" created`);
663
751
  if (!tables.includes(tbl))
664
752
  this._tablesCache = [...tables, tbl];
665
753
  }
666
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
667
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
668
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
754
+ await this.healSchema(tbl, MEMORY_COLUMNS);
669
755
  }
670
756
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
671
757
  async ensureSessionsTable(name) {
@@ -673,14 +759,12 @@ var DeeplakeApi = class {
673
759
  const tables = await this.listTables();
674
760
  if (!tables.includes(safe)) {
675
761
  log3(`table "${safe}" not found, creating`);
676
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
762
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
677
763
  log3(`table "${safe}" created`);
678
764
  if (!tables.includes(safe))
679
765
  this._tablesCache = [...tables, safe];
680
766
  }
681
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
682
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
683
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
767
+ await this.healSchema(safe, SESSIONS_COLUMNS);
684
768
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
685
769
  }
686
770
  /**
@@ -698,11 +782,12 @@ var DeeplakeApi = class {
698
782
  const tables = await this.listTables();
699
783
  if (!tables.includes(safe)) {
700
784
  log3(`table "${safe}" not found, creating`);
701
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
785
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
702
786
  log3(`table "${safe}" created`);
703
787
  if (!tables.includes(safe))
704
788
  this._tablesCache = [...tables, safe];
705
789
  }
790
+ await this.healSchema(safe, SKILLS_COLUMNS);
706
791
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
707
792
  }
708
793
  };
@@ -337,7 +337,123 @@ function sqlIdent(name) {
337
337
 
338
338
  // dist/src/embeddings/columns.js
339
339
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
340
- var MESSAGE_EMBEDDING_COL = "message_embedding";
340
+
341
+ // dist/src/deeplake-schema.js
342
+ var MEMORY_COLUMNS = Object.freeze([
343
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
344
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
345
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
346
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
347
+ { name: "summary_embedding", sql: "FLOAT4[]" },
348
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
349
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
350
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
351
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
352
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
353
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
354
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
355
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
356
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
357
+ ]);
358
+ var SESSIONS_COLUMNS = Object.freeze([
359
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
360
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
361
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
362
+ { name: "message", sql: "JSONB" },
363
+ { name: "message_embedding", sql: "FLOAT4[]" },
364
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
365
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
366
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
367
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
368
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
369
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
370
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
371
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
372
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
373
+ ]);
374
+ var SKILLS_COLUMNS = Object.freeze([
375
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
376
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
377
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
378
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
379
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
380
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
381
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
382
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
383
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
384
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
385
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
386
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
387
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
388
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
389
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
390
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
391
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
392
+ ]);
393
+ function validateSchema(label, cols) {
394
+ const seen = /* @__PURE__ */ new Set();
395
+ for (const col of cols) {
396
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
397
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
398
+ }
399
+ if (seen.has(col.name)) {
400
+ throw new Error(`${label}: duplicate column "${col.name}"`);
401
+ }
402
+ seen.add(col.name);
403
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
404
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
405
+ if (notNull && !hasDefault) {
406
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
407
+ }
408
+ }
409
+ }
410
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
411
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
412
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
413
+ function buildCreateTableSql(tableName, cols) {
414
+ const safe = sqlIdent(tableName);
415
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
416
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
417
+ }
418
+ function buildIntrospectionSql(tableName, workspaceId) {
419
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
420
+ }
421
+ async function healMissingColumns(args) {
422
+ const safeTable = sqlIdent(args.tableName);
423
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
424
+ const rows = await args.query(introspectSql);
425
+ const existing = /* @__PURE__ */ new Set();
426
+ for (const row of rows) {
427
+ const v = row?.column_name;
428
+ if (typeof v === "string")
429
+ existing.add(v.toLowerCase());
430
+ }
431
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
432
+ const missing = missingCols.map((c) => c.name);
433
+ if (missingCols.length === 0)
434
+ return { missing, altered: [] };
435
+ const altered = [];
436
+ for (const col of missingCols) {
437
+ try {
438
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
439
+ altered.push(col.name);
440
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
441
+ } catch (e) {
442
+ const msg = e instanceof Error ? e.message : String(e);
443
+ if (!/already exists/i.test(msg))
444
+ throw e;
445
+ const recheck = await args.query(introspectSql);
446
+ const present = recheck.some((r) => {
447
+ const v = r?.column_name;
448
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
449
+ });
450
+ if (!present)
451
+ throw e;
452
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
453
+ }
454
+ }
455
+ return { missing, altered };
456
+ }
341
457
 
342
458
  // dist/src/notifications/queue.js
343
459
  import { readFileSync as readFileSync5, writeFileSync as writeFileSync3, renameSync, mkdirSync as mkdirSync4, openSync as openSync2, closeSync as closeSync2, unlinkSync as unlinkSync3, statSync as statSync2 } from "node:fs";
@@ -703,64 +819,33 @@ var DeeplakeApi = class {
703
819
  }
704
820
  }
705
821
  /**
706
- * Ensure a vector column exists on the given table.
707
- *
708
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
709
- * EXISTS …` on every SessionStart. On a long-running workspace that's
710
- * already migrated, every call returns 500 "Column already exists" — noisy
711
- * in the log and a wasted round-trip. Worse, the very first call after the
712
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
713
- * window (~30s) during which subsequent INSERTs fail; minimising the
714
- * number of ALTER calls minimises exposure to that window.
715
- *
716
- * New flow:
717
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
718
- * return — zero network calls.
719
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
720
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
721
- * bug. If the column is present → mark + return.
722
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
723
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
724
- * "already exists" (race: another client added it between our SELECT
725
- * and ALTER).
822
+ * Heal any missing columns on a table so it matches one of the schema
823
+ * definitions in `deeplake-schema.ts`. One SELECT against
824
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
825
+ * only the genuinely missing ones never blanket, never `IF NOT
826
+ * EXISTS`.
726
827
  *
727
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
728
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
828
+ * History: an earlier path used a local marker file (`col_<name>` under
829
+ * the index-marker dir) to skip even the SELECT after the first
830
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
831
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
832
+ * because Deeplake used to expose a ~30s post-ALTER bug where
833
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
834
+ * minimum. The bug was re-verified on 2026-05-18 against
835
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
836
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
837
+ * + targeted ALTER pattern survives the marker removal because: each
838
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
839
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
729
840
  */
730
- async ensureEmbeddingColumn(table, column) {
731
- await this.ensureColumn(table, column, "FLOAT4[]");
732
- }
733
- /**
734
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
735
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
736
- * column that was added to the schema after the table was originally
737
- * created. Used today for `summary_embedding`, `message_embedding`, and
738
- * the `agent` column (added 2026-04-11) — the latter has no fallback if
739
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
740
- * with `column "agent" does not exist`.
741
- */
742
- async ensureColumn(table, column, sqlType) {
743
- const markers = await getIndexMarkerStore();
744
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
745
- if (markers.hasFreshIndexMarker(markerPath))
746
- return;
747
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
748
- const rows = await this.query(colCheck);
749
- if (rows.length > 0) {
750
- markers.writeIndexMarker(markerPath);
751
- return;
752
- }
753
- try {
754
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
755
- } catch (e) {
756
- const msg = e instanceof Error ? e.message : String(e);
757
- if (!/already exists/i.test(msg))
758
- throw e;
759
- const recheck = await this.query(colCheck);
760
- if (recheck.length === 0)
761
- throw e;
762
- }
763
- markers.writeIndexMarker(markerPath);
841
+ async healSchema(table, columns) {
842
+ await healMissingColumns({
843
+ query: (sql) => this.query(sql),
844
+ tableName: table,
845
+ workspaceId: this.workspaceId,
846
+ columns,
847
+ log: log3
848
+ });
764
849
  }
765
850
  /** List all tables in the workspace (with retry). */
766
851
  async listTables(forceRefresh = false) {
@@ -831,20 +916,21 @@ var DeeplakeApi = class {
831
916
  }
832
917
  throw lastErr;
833
918
  }
834
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
919
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
835
920
  async ensureTable(name) {
921
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
922
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
923
+ }
836
924
  const tbl = sqlIdent(name ?? this.tableName);
837
925
  const tables = await this.listTables();
838
926
  if (!tables.includes(tbl)) {
839
927
  log3(`table "${tbl}" not found, creating`);
840
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
928
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
841
929
  log3(`table "${tbl}" created`);
842
930
  if (!tables.includes(tbl))
843
931
  this._tablesCache = [...tables, tbl];
844
932
  }
845
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
846
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
847
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
933
+ await this.healSchema(tbl, MEMORY_COLUMNS);
848
934
  }
849
935
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
850
936
  async ensureSessionsTable(name) {
@@ -852,14 +938,12 @@ var DeeplakeApi = class {
852
938
  const tables = await this.listTables();
853
939
  if (!tables.includes(safe)) {
854
940
  log3(`table "${safe}" not found, creating`);
855
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
941
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
856
942
  log3(`table "${safe}" created`);
857
943
  if (!tables.includes(safe))
858
944
  this._tablesCache = [...tables, safe];
859
945
  }
860
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
861
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
862
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
946
+ await this.healSchema(safe, SESSIONS_COLUMNS);
863
947
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
864
948
  }
865
949
  /**
@@ -877,11 +961,12 @@ var DeeplakeApi = class {
877
961
  const tables = await this.listTables();
878
962
  if (!tables.includes(safe)) {
879
963
  log3(`table "${safe}" not found, creating`);
880
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
964
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
881
965
  log3(`table "${safe}" created`);
882
966
  if (!tables.includes(safe))
883
967
  this._tablesCache = [...tables, safe];
884
968
  }
969
+ await this.healSchema(safe, SKILLS_COLUMNS);
885
970
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
886
971
  }
887
972
  };