@deeplake/hivemind 0.7.35 → 0.7.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -335,7 +335,123 @@ function sqlIdent(name) {
335
335
 
336
336
  // dist/src/embeddings/columns.js
337
337
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
338
- var MESSAGE_EMBEDDING_COL = "message_embedding";
338
+
339
+ // dist/src/deeplake-schema.js
340
+ var MEMORY_COLUMNS = Object.freeze([
341
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
342
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
343
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
344
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
345
+ { name: "summary_embedding", sql: "FLOAT4[]" },
346
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
347
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
348
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
349
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
350
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
351
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
352
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
353
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
354
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
355
+ ]);
356
+ var SESSIONS_COLUMNS = Object.freeze([
357
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
358
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
359
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
360
+ { name: "message", sql: "JSONB" },
361
+ { name: "message_embedding", sql: "FLOAT4[]" },
362
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
363
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
364
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
365
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
366
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
367
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
368
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
369
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
370
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
371
+ ]);
372
+ var SKILLS_COLUMNS = Object.freeze([
373
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
374
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
375
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
376
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
377
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
378
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
379
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
380
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
381
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
382
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
383
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
384
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
385
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
386
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
387
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
388
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
389
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
390
+ ]);
391
+ function validateSchema(label, cols) {
392
+ const seen = /* @__PURE__ */ new Set();
393
+ for (const col of cols) {
394
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
395
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
396
+ }
397
+ if (seen.has(col.name)) {
398
+ throw new Error(`${label}: duplicate column "${col.name}"`);
399
+ }
400
+ seen.add(col.name);
401
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
402
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
403
+ if (notNull && !hasDefault) {
404
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
405
+ }
406
+ }
407
+ }
408
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
409
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
410
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
411
+ function buildCreateTableSql(tableName, cols) {
412
+ const safe = sqlIdent(tableName);
413
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
414
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
415
+ }
416
+ function buildIntrospectionSql(tableName, workspaceId) {
417
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
418
+ }
419
+ async function healMissingColumns(args) {
420
+ const safeTable = sqlIdent(args.tableName);
421
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
422
+ const rows = await args.query(introspectSql);
423
+ const existing = /* @__PURE__ */ new Set();
424
+ for (const row of rows) {
425
+ const v = row?.column_name;
426
+ if (typeof v === "string")
427
+ existing.add(v.toLowerCase());
428
+ }
429
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
430
+ const missing = missingCols.map((c) => c.name);
431
+ if (missingCols.length === 0)
432
+ return { missing, altered: [] };
433
+ const altered = [];
434
+ for (const col of missingCols) {
435
+ try {
436
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
437
+ altered.push(col.name);
438
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
439
+ } catch (e) {
440
+ const msg = e instanceof Error ? e.message : String(e);
441
+ if (!/already exists/i.test(msg))
442
+ throw e;
443
+ const recheck = await args.query(introspectSql);
444
+ const present = recheck.some((r) => {
445
+ const v = r?.column_name;
446
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
447
+ });
448
+ if (!present)
449
+ throw e;
450
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
451
+ }
452
+ }
453
+ return { missing, altered };
454
+ }
339
455
 
340
456
  // dist/src/notifications/queue.js
341
457
  import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, renameSync, mkdirSync as mkdirSync2, openSync, closeSync, unlinkSync as unlinkSync2, statSync } from "node:fs";
@@ -701,64 +817,33 @@ var DeeplakeApi = class {
701
817
  }
702
818
  }
703
819
  /**
704
- * Ensure a vector column exists on the given table.
705
- *
706
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
707
- * EXISTS …` on every SessionStart. On a long-running workspace that's
708
- * already migrated, every call returns 500 "Column already exists" — noisy
709
- * in the log and a wasted round-trip. Worse, the very first call after the
710
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
711
- * window (~30s) during which subsequent INSERTs fail; minimising the
712
- * number of ALTER calls minimises exposure to that window.
820
+ * Heal any missing columns on a table so it matches one of the schema
821
+ * definitions in `deeplake-schema.ts`. One SELECT against
822
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
823
+ * only the genuinely missing ones never blanket, never `IF NOT
824
+ * EXISTS`.
713
825
  *
714
- * New flow:
715
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
716
- * return zero network calls.
717
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
718
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
719
- * bug. If the column is present mark + return.
720
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
721
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
722
- * "already exists" (race: another client added it between our SELECT
723
- * and ALTER).
724
- *
725
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
726
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
826
+ * History: an earlier path used a local marker file (`col_<name>` under
827
+ * the index-marker dir) to skip even the SELECT after the first
828
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
829
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
830
+ * because Deeplake used to expose a ~30s post-ALTER bug where
831
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
832
+ * minimum. The bug was re-verified on 2026-05-18 against
833
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
834
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
835
+ * + targeted ALTER pattern survives the marker removal because: each
836
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
837
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
727
838
  */
728
- async ensureEmbeddingColumn(table, column) {
729
- await this.ensureColumn(table, column, "FLOAT4[]");
730
- }
731
- /**
732
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
733
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
734
- * column that was added to the schema after the table was originally
735
- * created. Used today for `summary_embedding`, `message_embedding`, and
736
- * the `agent` column (added 2026-04-11) — the latter has no fallback if
737
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
738
- * with `column "agent" does not exist`.
739
- */
740
- async ensureColumn(table, column, sqlType) {
741
- const markers = await getIndexMarkerStore();
742
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
743
- if (markers.hasFreshIndexMarker(markerPath))
744
- return;
745
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
746
- const rows = await this.query(colCheck);
747
- if (rows.length > 0) {
748
- markers.writeIndexMarker(markerPath);
749
- return;
750
- }
751
- try {
752
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
753
- } catch (e) {
754
- const msg = e instanceof Error ? e.message : String(e);
755
- if (!/already exists/i.test(msg))
756
- throw e;
757
- const recheck = await this.query(colCheck);
758
- if (recheck.length === 0)
759
- throw e;
760
- }
761
- markers.writeIndexMarker(markerPath);
839
+ async healSchema(table, columns) {
840
+ await healMissingColumns({
841
+ query: (sql) => this.query(sql),
842
+ tableName: table,
843
+ workspaceId: this.workspaceId,
844
+ columns,
845
+ log: log3
846
+ });
762
847
  }
763
848
  /** List all tables in the workspace (with retry). */
764
849
  async listTables(forceRefresh = false) {
@@ -829,20 +914,21 @@ var DeeplakeApi = class {
829
914
  }
830
915
  throw lastErr;
831
916
  }
832
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
917
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
833
918
  async ensureTable(name) {
919
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
920
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
921
+ }
834
922
  const tbl = sqlIdent(name ?? this.tableName);
835
923
  const tables = await this.listTables();
836
924
  if (!tables.includes(tbl)) {
837
925
  log3(`table "${tbl}" not found, creating`);
838
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
926
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
839
927
  log3(`table "${tbl}" created`);
840
928
  if (!tables.includes(tbl))
841
929
  this._tablesCache = [...tables, tbl];
842
930
  }
843
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
844
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
845
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
931
+ await this.healSchema(tbl, MEMORY_COLUMNS);
846
932
  }
847
933
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
848
934
  async ensureSessionsTable(name) {
@@ -850,14 +936,12 @@ var DeeplakeApi = class {
850
936
  const tables = await this.listTables();
851
937
  if (!tables.includes(safe)) {
852
938
  log3(`table "${safe}" not found, creating`);
853
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
939
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
854
940
  log3(`table "${safe}" created`);
855
941
  if (!tables.includes(safe))
856
942
  this._tablesCache = [...tables, safe];
857
943
  }
858
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
859
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
860
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
944
+ await this.healSchema(safe, SESSIONS_COLUMNS);
861
945
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
862
946
  }
863
947
  /**
@@ -875,11 +959,12 @@ var DeeplakeApi = class {
875
959
  const tables = await this.listTables();
876
960
  if (!tables.includes(safe)) {
877
961
  log3(`table "${safe}" not found, creating`);
878
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
962
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
879
963
  log3(`table "${safe}" created`);
880
964
  if (!tables.includes(safe))
881
965
  this._tablesCache = [...tables, safe];
882
966
  }
967
+ await this.healSchema(safe, SKILLS_COLUMNS);
883
968
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
884
969
  }
885
970
  };
@@ -142,7 +142,6 @@ function sqlIdent(name) {
142
142
 
143
143
  // dist/src/embeddings/columns.js
144
144
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
145
- var MESSAGE_EMBEDDING_COL = "message_embedding";
146
145
 
147
146
  // dist/src/utils/client-header.js
148
147
  var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
@@ -153,6 +152,123 @@ function deeplakeClientHeader() {
153
152
  return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
154
153
  }
155
154
 
155
+ // dist/src/deeplake-schema.js
156
+ var MEMORY_COLUMNS = Object.freeze([
157
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
158
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
159
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
160
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
161
+ { name: "summary_embedding", sql: "FLOAT4[]" },
162
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
163
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
164
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
165
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
166
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
167
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
168
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
169
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
170
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
171
+ ]);
172
+ var SESSIONS_COLUMNS = Object.freeze([
173
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
174
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
175
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
176
+ { name: "message", sql: "JSONB" },
177
+ { name: "message_embedding", sql: "FLOAT4[]" },
178
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
179
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
180
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
181
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
182
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
183
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
184
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
185
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
186
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
187
+ ]);
188
+ var SKILLS_COLUMNS = Object.freeze([
189
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
190
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
191
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
192
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
193
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
194
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
195
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
196
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
197
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
198
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
199
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
200
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
201
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
202
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
203
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
204
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
205
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
206
+ ]);
207
+ function validateSchema(label, cols) {
208
+ const seen = /* @__PURE__ */ new Set();
209
+ for (const col of cols) {
210
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
211
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
212
+ }
213
+ if (seen.has(col.name)) {
214
+ throw new Error(`${label}: duplicate column "${col.name}"`);
215
+ }
216
+ seen.add(col.name);
217
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
218
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
219
+ if (notNull && !hasDefault) {
220
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
221
+ }
222
+ }
223
+ }
224
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
225
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
226
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
227
+ function buildCreateTableSql(tableName, cols) {
228
+ const safe = sqlIdent(tableName);
229
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
230
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
231
+ }
232
+ function buildIntrospectionSql(tableName, workspaceId) {
233
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
234
+ }
235
+ async function healMissingColumns(args) {
236
+ const safeTable = sqlIdent(args.tableName);
237
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
238
+ const rows = await args.query(introspectSql);
239
+ const existing = /* @__PURE__ */ new Set();
240
+ for (const row of rows) {
241
+ const v = row?.column_name;
242
+ if (typeof v === "string")
243
+ existing.add(v.toLowerCase());
244
+ }
245
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
246
+ const missing = missingCols.map((c) => c.name);
247
+ if (missingCols.length === 0)
248
+ return { missing, altered: [] };
249
+ const altered = [];
250
+ for (const col of missingCols) {
251
+ try {
252
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
253
+ altered.push(col.name);
254
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
255
+ } catch (e) {
256
+ const msg = e instanceof Error ? e.message : String(e);
257
+ if (!/already exists/i.test(msg))
258
+ throw e;
259
+ const recheck = await args.query(introspectSql);
260
+ const present = recheck.some((r) => {
261
+ const v = r?.column_name;
262
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
263
+ });
264
+ if (!present)
265
+ throw e;
266
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
267
+ }
268
+ }
269
+ return { missing, altered };
270
+ }
271
+
156
272
  // dist/src/notifications/queue.js
157
273
  import { readFileSync as readFileSync2, writeFileSync, renameSync, mkdirSync, openSync, closeSync, unlinkSync, statSync } from "node:fs";
158
274
  import { join as join3, resolve } from "node:path";
@@ -535,64 +651,33 @@ var DeeplakeApi = class {
535
651
  }
536
652
  }
537
653
  /**
538
- * Ensure a vector column exists on the given table.
539
- *
540
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
541
- * EXISTS …` on every SessionStart. On a long-running workspace that's
542
- * already migrated, every call returns 500 "Column already exists" — noisy
543
- * in the log and a wasted round-trip. Worse, the very first call after the
544
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
545
- * window (~30s) during which subsequent INSERTs fail; minimising the
546
- * number of ALTER calls minimises exposure to that window.
547
- *
548
- * New flow:
549
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
550
- * return — zero network calls.
551
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
552
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
553
- * bug. If the column is present → mark + return.
554
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
555
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
556
- * "already exists" (race: another client added it between our SELECT
557
- * and ALTER).
654
+ * Heal any missing columns on a table so it matches one of the schema
655
+ * definitions in `deeplake-schema.ts`. One SELECT against
656
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
657
+ * only the genuinely missing ones never blanket, never `IF NOT
658
+ * EXISTS`.
558
659
  *
559
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
560
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
561
- */
562
- async ensureEmbeddingColumn(table, column) {
563
- await this.ensureColumn(table, column, "FLOAT4[]");
564
- }
565
- /**
566
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
567
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
568
- * column that was added to the schema after the table was originally
569
- * created. Used today for `summary_embedding`, `message_embedding`, and
570
- * the `agent` column (added 2026-04-11) the latter has no fallback if
571
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
572
- * with `column "agent" does not exist`.
660
+ * History: an earlier path used a local marker file (`col_<name>` under
661
+ * the index-marker dir) to skip even the SELECT after the first
662
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
663
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
664
+ * because Deeplake used to expose a ~30s post-ALTER bug where
665
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
666
+ * minimum. The bug was re-verified on 2026-05-18 against
667
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
668
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
669
+ * + targeted ALTER pattern survives the marker removal because: each
670
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
671
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
573
672
  */
574
- async ensureColumn(table, column, sqlType) {
575
- const markers = await getIndexMarkerStore();
576
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
577
- if (markers.hasFreshIndexMarker(markerPath))
578
- return;
579
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
580
- const rows = await this.query(colCheck);
581
- if (rows.length > 0) {
582
- markers.writeIndexMarker(markerPath);
583
- return;
584
- }
585
- try {
586
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
587
- } catch (e) {
588
- const msg = e instanceof Error ? e.message : String(e);
589
- if (!/already exists/i.test(msg))
590
- throw e;
591
- const recheck = await this.query(colCheck);
592
- if (recheck.length === 0)
593
- throw e;
594
- }
595
- markers.writeIndexMarker(markerPath);
673
+ async healSchema(table, columns) {
674
+ await healMissingColumns({
675
+ query: (sql) => this.query(sql),
676
+ tableName: table,
677
+ workspaceId: this.workspaceId,
678
+ columns,
679
+ log: log3
680
+ });
596
681
  }
597
682
  /** List all tables in the workspace (with retry). */
598
683
  async listTables(forceRefresh = false) {
@@ -663,20 +748,21 @@ var DeeplakeApi = class {
663
748
  }
664
749
  throw lastErr;
665
750
  }
666
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
751
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
667
752
  async ensureTable(name) {
753
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
754
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
755
+ }
668
756
  const tbl = sqlIdent(name ?? this.tableName);
669
757
  const tables = await this.listTables();
670
758
  if (!tables.includes(tbl)) {
671
759
  log3(`table "${tbl}" not found, creating`);
672
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
760
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
673
761
  log3(`table "${tbl}" created`);
674
762
  if (!tables.includes(tbl))
675
763
  this._tablesCache = [...tables, tbl];
676
764
  }
677
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
678
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
679
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
765
+ await this.healSchema(tbl, MEMORY_COLUMNS);
680
766
  }
681
767
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
682
768
  async ensureSessionsTable(name) {
@@ -684,14 +770,12 @@ var DeeplakeApi = class {
684
770
  const tables = await this.listTables();
685
771
  if (!tables.includes(safe)) {
686
772
  log3(`table "${safe}" not found, creating`);
687
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
773
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
688
774
  log3(`table "${safe}" created`);
689
775
  if (!tables.includes(safe))
690
776
  this._tablesCache = [...tables, safe];
691
777
  }
692
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
693
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
694
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
778
+ await this.healSchema(safe, SESSIONS_COLUMNS);
695
779
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
696
780
  }
697
781
  /**
@@ -709,11 +793,12 @@ var DeeplakeApi = class {
709
793
  const tables = await this.listTables();
710
794
  if (!tables.includes(safe)) {
711
795
  log3(`table "${safe}" not found, creating`);
712
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
796
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
713
797
  log3(`table "${safe}" created`);
714
798
  if (!tables.includes(safe))
715
799
  this._tablesCache = [...tables, safe];
716
800
  }
801
+ await this.healSchema(safe, SKILLS_COLUMNS);
717
802
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
718
803
  }
719
804
  };