@deeplake/hivemind 0.7.35 → 0.7.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -141,7 +141,6 @@ function sqlIdent(name) {
141
141
 
142
142
  // dist/src/embeddings/columns.js
143
143
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
144
- var MESSAGE_EMBEDDING_COL = "message_embedding";
145
144
 
146
145
  // dist/src/utils/client-header.js
147
146
  var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
@@ -152,6 +151,123 @@ function deeplakeClientHeader() {
152
151
  return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
153
152
  }
154
153
 
154
+ // dist/src/deeplake-schema.js
155
+ var MEMORY_COLUMNS = Object.freeze([
156
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
157
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
158
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
159
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
160
+ { name: "summary_embedding", sql: "FLOAT4[]" },
161
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
162
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
163
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
164
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
165
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
166
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
167
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
168
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
169
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
170
+ ]);
171
+ var SESSIONS_COLUMNS = Object.freeze([
172
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
173
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
174
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
175
+ { name: "message", sql: "JSONB" },
176
+ { name: "message_embedding", sql: "FLOAT4[]" },
177
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
178
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
179
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
180
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
181
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
182
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
183
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
184
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
185
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
186
+ ]);
187
+ var SKILLS_COLUMNS = Object.freeze([
188
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
189
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
190
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
191
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
192
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
193
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
194
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
195
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
196
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
197
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
198
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
199
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
200
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
201
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
202
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
203
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
204
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
205
+ ]);
206
+ function validateSchema(label, cols) {
207
+ const seen = /* @__PURE__ */ new Set();
208
+ for (const col of cols) {
209
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
210
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
211
+ }
212
+ if (seen.has(col.name)) {
213
+ throw new Error(`${label}: duplicate column "${col.name}"`);
214
+ }
215
+ seen.add(col.name);
216
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
217
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
218
+ if (notNull && !hasDefault) {
219
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
220
+ }
221
+ }
222
+ }
223
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
224
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
225
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
226
+ function buildCreateTableSql(tableName, cols) {
227
+ const safe = sqlIdent(tableName);
228
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
229
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
230
+ }
231
+ function buildIntrospectionSql(tableName, workspaceId) {
232
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
233
+ }
234
+ async function healMissingColumns(args) {
235
+ const safeTable = sqlIdent(args.tableName);
236
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
237
+ const rows = await args.query(introspectSql);
238
+ const existing = /* @__PURE__ */ new Set();
239
+ for (const row of rows) {
240
+ const v = row?.column_name;
241
+ if (typeof v === "string")
242
+ existing.add(v.toLowerCase());
243
+ }
244
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
245
+ const missing = missingCols.map((c) => c.name);
246
+ if (missingCols.length === 0)
247
+ return { missing, altered: [] };
248
+ const altered = [];
249
+ for (const col of missingCols) {
250
+ try {
251
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
252
+ altered.push(col.name);
253
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
254
+ } catch (e) {
255
+ const msg = e instanceof Error ? e.message : String(e);
256
+ if (!/already exists/i.test(msg))
257
+ throw e;
258
+ const recheck = await args.query(introspectSql);
259
+ const present = recheck.some((r) => {
260
+ const v = r?.column_name;
261
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
262
+ });
263
+ if (!present)
264
+ throw e;
265
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
266
+ }
267
+ }
268
+ return { missing, altered };
269
+ }
270
+
155
271
  // dist/src/notifications/queue.js
156
272
  import { readFileSync as readFileSync2, writeFileSync, renameSync, mkdirSync, openSync, closeSync, unlinkSync, statSync } from "node:fs";
157
273
  import { join as join3, resolve } from "node:path";
@@ -534,64 +650,33 @@ var DeeplakeApi = class {
534
650
  }
535
651
  }
536
652
  /**
537
- * Ensure a vector column exists on the given table.
538
- *
539
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
540
- * EXISTS …` on every SessionStart. On a long-running workspace that's
541
- * already migrated, every call returns 500 "Column already exists" — noisy
542
- * in the log and a wasted round-trip. Worse, the very first call after the
543
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
544
- * window (~30s) during which subsequent INSERTs fail; minimising the
545
- * number of ALTER calls minimises exposure to that window.
546
- *
547
- * New flow:
548
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
549
- * return — zero network calls.
550
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
551
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
552
- * bug. If the column is present → mark + return.
553
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
554
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
555
- * "already exists" (race: another client added it between our SELECT
556
- * and ALTER).
653
+ * Heal any missing columns on a table so it matches one of the schema
654
+ * definitions in `deeplake-schema.ts`. One SELECT against
655
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
656
+ * only the genuinely missing ones never blanket, never `IF NOT
657
+ * EXISTS`.
557
658
  *
558
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
559
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
659
+ * History: an earlier path used a local marker file (`col_<name>` under
660
+ * the index-marker dir) to skip even the SELECT after the first
661
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
662
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
663
+ * because Deeplake used to expose a ~30s post-ALTER bug where
664
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
665
+ * minimum. The bug was re-verified on 2026-05-18 against
666
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
667
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
668
+ * + targeted ALTER pattern survives the marker removal because: each
669
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
670
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
560
671
  */
561
- async ensureEmbeddingColumn(table, column) {
562
- await this.ensureColumn(table, column, "FLOAT4[]");
563
- }
564
- /**
565
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
566
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
567
- * column that was added to the schema after the table was originally
568
- * created. Used today for `summary_embedding`, `message_embedding`, and
569
- * the `agent` column (added 2026-04-11) — the latter has no fallback if
570
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
571
- * with `column "agent" does not exist`.
572
- */
573
- async ensureColumn(table, column, sqlType) {
574
- const markers = await getIndexMarkerStore();
575
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
576
- if (markers.hasFreshIndexMarker(markerPath))
577
- return;
578
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
579
- const rows = await this.query(colCheck);
580
- if (rows.length > 0) {
581
- markers.writeIndexMarker(markerPath);
582
- return;
583
- }
584
- try {
585
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
586
- } catch (e) {
587
- const msg = e instanceof Error ? e.message : String(e);
588
- if (!/already exists/i.test(msg))
589
- throw e;
590
- const recheck = await this.query(colCheck);
591
- if (recheck.length === 0)
592
- throw e;
593
- }
594
- markers.writeIndexMarker(markerPath);
672
+ async healSchema(table, columns) {
673
+ await healMissingColumns({
674
+ query: (sql) => this.query(sql),
675
+ tableName: table,
676
+ workspaceId: this.workspaceId,
677
+ columns,
678
+ log: log3
679
+ });
595
680
  }
596
681
  /** List all tables in the workspace (with retry). */
597
682
  async listTables(forceRefresh = false) {
@@ -662,20 +747,21 @@ var DeeplakeApi = class {
662
747
  }
663
748
  throw lastErr;
664
749
  }
665
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
750
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
666
751
  async ensureTable(name) {
752
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
753
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
754
+ }
667
755
  const tbl = sqlIdent(name ?? this.tableName);
668
756
  const tables = await this.listTables();
669
757
  if (!tables.includes(tbl)) {
670
758
  log3(`table "${tbl}" not found, creating`);
671
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
759
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
672
760
  log3(`table "${tbl}" created`);
673
761
  if (!tables.includes(tbl))
674
762
  this._tablesCache = [...tables, tbl];
675
763
  }
676
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
677
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
678
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
764
+ await this.healSchema(tbl, MEMORY_COLUMNS);
679
765
  }
680
766
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
681
767
  async ensureSessionsTable(name) {
@@ -683,14 +769,12 @@ var DeeplakeApi = class {
683
769
  const tables = await this.listTables();
684
770
  if (!tables.includes(safe)) {
685
771
  log3(`table "${safe}" not found, creating`);
686
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
772
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
687
773
  log3(`table "${safe}" created`);
688
774
  if (!tables.includes(safe))
689
775
  this._tablesCache = [...tables, safe];
690
776
  }
691
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
692
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
693
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
777
+ await this.healSchema(safe, SESSIONS_COLUMNS);
694
778
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
695
779
  }
696
780
  /**
@@ -708,11 +792,12 @@ var DeeplakeApi = class {
708
792
  const tables = await this.listTables();
709
793
  if (!tables.includes(safe)) {
710
794
  log3(`table "${safe}" not found, creating`);
711
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
795
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
712
796
  log3(`table "${safe}" created`);
713
797
  if (!tables.includes(safe))
714
798
  this._tablesCache = [...tables, safe];
715
799
  }
800
+ await this.healSchema(safe, SKILLS_COLUMNS);
716
801
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
717
802
  }
718
803
  };
@@ -136,7 +136,6 @@ function sqlIdent(name) {
136
136
 
137
137
  // dist/src/embeddings/columns.js
138
138
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
139
- var MESSAGE_EMBEDDING_COL = "message_embedding";
140
139
 
141
140
  // dist/src/utils/client-header.js
142
141
  var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
@@ -147,6 +146,123 @@ function deeplakeClientHeader() {
147
146
  return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
148
147
  }
149
148
 
149
+ // dist/src/deeplake-schema.js
150
+ var MEMORY_COLUMNS = Object.freeze([
151
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
152
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
153
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
154
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
155
+ { name: "summary_embedding", sql: "FLOAT4[]" },
156
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
157
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
158
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
159
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
160
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
161
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
162
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
163
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
164
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
165
+ ]);
166
+ var SESSIONS_COLUMNS = Object.freeze([
167
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
168
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
169
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
170
+ { name: "message", sql: "JSONB" },
171
+ { name: "message_embedding", sql: "FLOAT4[]" },
172
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
173
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
174
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
175
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
176
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
177
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
178
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
179
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
180
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
181
+ ]);
182
+ var SKILLS_COLUMNS = Object.freeze([
183
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
184
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
185
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
186
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
187
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
188
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
189
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
190
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
191
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
192
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
193
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
194
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
195
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
196
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
197
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
198
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
199
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
200
+ ]);
201
+ function validateSchema(label, cols) {
202
+ const seen = /* @__PURE__ */ new Set();
203
+ for (const col of cols) {
204
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
205
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
206
+ }
207
+ if (seen.has(col.name)) {
208
+ throw new Error(`${label}: duplicate column "${col.name}"`);
209
+ }
210
+ seen.add(col.name);
211
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
212
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
213
+ if (notNull && !hasDefault) {
214
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
215
+ }
216
+ }
217
+ }
218
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
219
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
220
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
221
+ function buildCreateTableSql(tableName, cols) {
222
+ const safe = sqlIdent(tableName);
223
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
224
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
225
+ }
226
+ function buildIntrospectionSql(tableName, workspaceId) {
227
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
228
+ }
229
+ async function healMissingColumns(args) {
230
+ const safeTable = sqlIdent(args.tableName);
231
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
232
+ const rows = await args.query(introspectSql);
233
+ const existing = /* @__PURE__ */ new Set();
234
+ for (const row of rows) {
235
+ const v = row?.column_name;
236
+ if (typeof v === "string")
237
+ existing.add(v.toLowerCase());
238
+ }
239
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
240
+ const missing = missingCols.map((c) => c.name);
241
+ if (missingCols.length === 0)
242
+ return { missing, altered: [] };
243
+ const altered = [];
244
+ for (const col of missingCols) {
245
+ try {
246
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
247
+ altered.push(col.name);
248
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
249
+ } catch (e) {
250
+ const msg = e instanceof Error ? e.message : String(e);
251
+ if (!/already exists/i.test(msg))
252
+ throw e;
253
+ const recheck = await args.query(introspectSql);
254
+ const present = recheck.some((r) => {
255
+ const v = r?.column_name;
256
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
257
+ });
258
+ if (!present)
259
+ throw e;
260
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
261
+ }
262
+ }
263
+ return { missing, altered };
264
+ }
265
+
150
266
  // dist/src/notifications/queue.js
151
267
  import { readFileSync as readFileSync2, writeFileSync, renameSync, mkdirSync, openSync, closeSync, unlinkSync, statSync } from "node:fs";
152
268
  import { join as join3, resolve } from "node:path";
@@ -529,64 +645,33 @@ var DeeplakeApi = class {
529
645
  }
530
646
  }
531
647
  /**
532
- * Ensure a vector column exists on the given table.
533
- *
534
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
535
- * EXISTS …` on every SessionStart. On a long-running workspace that's
536
- * already migrated, every call returns 500 "Column already exists" — noisy
537
- * in the log and a wasted round-trip. Worse, the very first call after the
538
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
539
- * window (~30s) during which subsequent INSERTs fail; minimising the
540
- * number of ALTER calls minimises exposure to that window.
648
+ * Heal any missing columns on a table so it matches one of the schema
649
+ * definitions in `deeplake-schema.ts`. One SELECT against
650
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
651
+ * only the genuinely missing ones never blanket, never `IF NOT
652
+ * EXISTS`.
541
653
  *
542
- * New flow:
543
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
544
- * return zero network calls.
545
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
546
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
547
- * bug. If the column is present mark + return.
548
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
549
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
550
- * "already exists" (race: another client added it between our SELECT
551
- * and ALTER).
552
- *
553
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
554
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
555
- */
556
- async ensureEmbeddingColumn(table, column) {
557
- await this.ensureColumn(table, column, "FLOAT4[]");
558
- }
559
- /**
560
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
561
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
562
- * column that was added to the schema after the table was originally
563
- * created. Used today for `summary_embedding`, `message_embedding`, and
564
- * the `agent` column (added 2026-04-11) — the latter has no fallback if
565
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
566
- * with `column "agent" does not exist`.
654
+ * History: an earlier path used a local marker file (`col_<name>` under
655
+ * the index-marker dir) to skip even the SELECT after the first
656
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
657
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
658
+ * because Deeplake used to expose a ~30s post-ALTER bug where
659
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
660
+ * minimum. The bug was re-verified on 2026-05-18 against
661
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
662
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
663
+ * + targeted ALTER pattern survives the marker removal because: each
664
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
665
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
567
666
  */
568
- async ensureColumn(table, column, sqlType) {
569
- const markers = await getIndexMarkerStore();
570
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
571
- if (markers.hasFreshIndexMarker(markerPath))
572
- return;
573
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
574
- const rows = await this.query(colCheck);
575
- if (rows.length > 0) {
576
- markers.writeIndexMarker(markerPath);
577
- return;
578
- }
579
- try {
580
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
581
- } catch (e) {
582
- const msg = e instanceof Error ? e.message : String(e);
583
- if (!/already exists/i.test(msg))
584
- throw e;
585
- const recheck = await this.query(colCheck);
586
- if (recheck.length === 0)
587
- throw e;
588
- }
589
- markers.writeIndexMarker(markerPath);
667
+ async healSchema(table, columns) {
668
+ await healMissingColumns({
669
+ query: (sql) => this.query(sql),
670
+ tableName: table,
671
+ workspaceId: this.workspaceId,
672
+ columns,
673
+ log: log3
674
+ });
590
675
  }
591
676
  /** List all tables in the workspace (with retry). */
592
677
  async listTables(forceRefresh = false) {
@@ -657,20 +742,21 @@ var DeeplakeApi = class {
657
742
  }
658
743
  throw lastErr;
659
744
  }
660
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
745
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
661
746
  async ensureTable(name) {
747
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
748
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
749
+ }
662
750
  const tbl = sqlIdent(name ?? this.tableName);
663
751
  const tables = await this.listTables();
664
752
  if (!tables.includes(tbl)) {
665
753
  log3(`table "${tbl}" not found, creating`);
666
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
754
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
667
755
  log3(`table "${tbl}" created`);
668
756
  if (!tables.includes(tbl))
669
757
  this._tablesCache = [...tables, tbl];
670
758
  }
671
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
672
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
673
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
759
+ await this.healSchema(tbl, MEMORY_COLUMNS);
674
760
  }
675
761
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
676
762
  async ensureSessionsTable(name) {
@@ -678,14 +764,12 @@ var DeeplakeApi = class {
678
764
  const tables = await this.listTables();
679
765
  if (!tables.includes(safe)) {
680
766
  log3(`table "${safe}" not found, creating`);
681
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
767
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
682
768
  log3(`table "${safe}" created`);
683
769
  if (!tables.includes(safe))
684
770
  this._tablesCache = [...tables, safe];
685
771
  }
686
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
687
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
688
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
772
+ await this.healSchema(safe, SESSIONS_COLUMNS);
689
773
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
690
774
  }
691
775
  /**
@@ -703,11 +787,12 @@ var DeeplakeApi = class {
703
787
  const tables = await this.listTables();
704
788
  if (!tables.includes(safe)) {
705
789
  log3(`table "${safe}" not found, creating`);
706
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
790
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
707
791
  log3(`table "${safe}" created`);
708
792
  if (!tables.includes(safe))
709
793
  this._tablesCache = [...tables, safe];
710
794
  }
795
+ await this.healSchema(safe, SKILLS_COLUMNS);
711
796
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
712
797
  }
713
798
  };