@deeplake/hivemind 0.7.34 → 0.7.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -316,6 +316,9 @@ ${s.body}
316
316
  import { randomUUID } from "node:crypto";
317
317
 
318
318
  // dist/src/utils/sql.js
319
+ function sqlStr(value) {
320
+ return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
321
+ }
319
322
  function sqlIdent(name) {
320
323
  if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) {
321
324
  throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`);
@@ -323,29 +326,142 @@ function sqlIdent(name) {
323
326
  return name;
324
327
  }
325
328
 
326
- // dist/src/skillify/skills-table.js
327
- function createSkillsTableSql(tableName) {
328
- const safe = sqlIdent(tableName);
329
- return `CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', contributors TEXT NOT NULL DEFAULT '[]', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`;
329
+ // dist/src/deeplake-schema.js
330
+ var MEMORY_COLUMNS = Object.freeze([
331
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
332
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
333
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
334
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
335
+ { name: "summary_embedding", sql: "FLOAT4[]" },
336
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
337
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
338
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
339
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
340
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
341
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
342
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
343
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
344
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
345
+ ]);
346
+ var SESSIONS_COLUMNS = Object.freeze([
347
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
348
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
349
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
350
+ { name: "message", sql: "JSONB" },
351
+ { name: "message_embedding", sql: "FLOAT4[]" },
352
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
353
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
354
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
355
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
356
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
357
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
358
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
359
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
360
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
361
+ ]);
362
+ var SKILLS_COLUMNS = Object.freeze([
363
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
364
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
365
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
366
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
367
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
368
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
369
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
370
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
371
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
372
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
373
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
374
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
375
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
376
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
377
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
378
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
379
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
380
+ ]);
381
+ function validateSchema(label, cols) {
382
+ const seen = /* @__PURE__ */ new Set();
383
+ for (const col of cols) {
384
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
385
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
386
+ }
387
+ if (seen.has(col.name)) {
388
+ throw new Error(`${label}: duplicate column "${col.name}"`);
389
+ }
390
+ seen.add(col.name);
391
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
392
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
393
+ if (notNull && !hasDefault) {
394
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
395
+ }
396
+ }
330
397
  }
331
- function addContributorsColumnSql(tableName) {
398
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
399
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
400
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
401
+ function buildCreateTableSql(tableName, cols) {
332
402
  const safe = sqlIdent(tableName);
333
- return `ALTER TABLE "${safe}" ADD COLUMN IF NOT EXISTS contributors TEXT NOT NULL DEFAULT '[]'`;
403
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
404
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
334
405
  }
335
- function esc(s) {
336
- return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
406
+ function buildIntrospectionSql(tableName, workspaceId) {
407
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
408
+ }
409
+ async function healMissingColumns(args) {
410
+ const safeTable = sqlIdent(args.tableName);
411
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
412
+ const rows = await args.query(introspectSql);
413
+ const existing = /* @__PURE__ */ new Set();
414
+ for (const row of rows) {
415
+ const v = row?.column_name;
416
+ if (typeof v === "string")
417
+ existing.add(v.toLowerCase());
418
+ }
419
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
420
+ const missing = missingCols.map((c) => c.name);
421
+ if (missingCols.length === 0)
422
+ return { missing, altered: [] };
423
+ const altered = [];
424
+ for (const col of missingCols) {
425
+ try {
426
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
427
+ altered.push(col.name);
428
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
429
+ } catch (e) {
430
+ const msg = e instanceof Error ? e.message : String(e);
431
+ if (!/already exists/i.test(msg))
432
+ throw e;
433
+ const recheck = await args.query(introspectSql);
434
+ const present = recheck.some((r) => {
435
+ const v = r?.column_name;
436
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
437
+ });
438
+ if (!present)
439
+ throw e;
440
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
441
+ }
442
+ }
443
+ return { missing, altered };
337
444
  }
338
445
  function isMissingTableError(message) {
339
446
  if (!message)
340
447
  return false;
448
+ if (/permission denied|must be owner/i.test(message))
449
+ return false;
341
450
  if (/\bcolumn\b/i.test(message))
342
451
  return false;
343
452
  return /Table does not exist|relation .* does not exist|no such table/i.test(message);
344
453
  }
345
- function isMissingContributorsColumnError(message) {
454
+ function isMissingColumnError(message) {
346
455
  if (!message)
347
456
  return false;
348
- return /contributors.*(?:does not exist|not found|unknown)/i.test(message) || /(?:does not exist|unknown column).*contributors/i.test(message);
457
+ if (/permission denied|must be owner/i.test(message))
458
+ return false;
459
+ return /column ["']?[A-Za-z_][A-Za-z0-9_]*["']? .*does not exist/i.test(message) || /unknown column/i.test(message) || /no such column/i.test(message);
460
+ }
461
+
462
+ // dist/src/skillify/skills-table.js
463
+ function esc(s) {
464
+ return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
349
465
  }
350
466
  async function insertSkillRow(args) {
351
467
  const id = args.id ?? randomUUID();
@@ -354,14 +470,29 @@ async function insertSkillRow(args) {
354
470
  const sql = `INSERT INTO "${sqlIdent(args.tableName)}" (id, name, project, project_key, local_path, install, source_sessions, source_agent, scope, author, contributors, description, trigger_text, body, version, created_at, updated_at) VALUES ('${esc(id)}', '${esc(args.name)}', '${esc(args.project)}', '${esc(args.projectKey)}', '${esc(args.localPath)}', '${esc(args.install)}', '${esc(sourceSessionsJson)}', '${esc(args.sourceAgent)}', '${esc(args.scope)}', '${esc(args.author)}', '${esc(contributorsJson)}', '${esc(args.description)}', '${esc(args.trigger ?? "")}', '${esc(args.body)}', ${args.version}, '${esc(args.createdAt)}', '${esc(args.updatedAt)}')`;
355
471
  try {
356
472
  await args.query(sql);
473
+ return;
357
474
  } catch (e) {
358
- if (isMissingTableError(e?.message)) {
359
- await args.query(createSkillsTableSql(args.tableName));
475
+ const msg = e instanceof Error ? e.message : String(e);
476
+ if (isMissingTableError(msg)) {
477
+ await args.query(buildCreateTableSql(args.tableName, SKILLS_COLUMNS));
478
+ await healMissingColumns({
479
+ query: args.query,
480
+ tableName: args.tableName,
481
+ workspaceId: args.workspaceId,
482
+ columns: SKILLS_COLUMNS
483
+ });
360
484
  await args.query(sql);
361
485
  return;
362
486
  }
363
- if (isMissingContributorsColumnError(e?.message)) {
364
- await args.query(addContributorsColumnSql(args.tableName));
487
+ if (isMissingColumnError(msg)) {
488
+ const result = await healMissingColumns({
489
+ query: args.query,
490
+ tableName: args.tableName,
491
+ workspaceId: args.workspaceId,
492
+ columns: SKILLS_COLUMNS
493
+ });
494
+ if (result.missing.length === 0)
495
+ throw e;
365
496
  await args.query(sql);
366
497
  return;
367
498
  }
@@ -1001,6 +1132,7 @@ async function main() {
1001
1132
  await insertSkillRow({
1002
1133
  query,
1003
1134
  tableName: cfg.skillsTable,
1135
+ workspaceId: cfg.workspaceId,
1004
1136
  name: verdict2.name,
1005
1137
  project: cfg.project,
1006
1138
  projectKey: cfg.projectKey,
@@ -135,7 +135,6 @@ function sqlIdent(name) {
135
135
 
136
136
  // dist/src/embeddings/columns.js
137
137
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
138
- var MESSAGE_EMBEDDING_COL = "message_embedding";
139
138
 
140
139
  // dist/src/utils/client-header.js
141
140
  var DEEPLAKE_CLIENT_HEADER = "X-Deeplake-Client";
@@ -146,6 +145,123 @@ function deeplakeClientHeader() {
146
145
  return { [DEEPLAKE_CLIENT_HEADER]: deeplakeClientValue() };
147
146
  }
148
147
 
148
+ // dist/src/deeplake-schema.js
149
+ var MEMORY_COLUMNS = Object.freeze([
150
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
151
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
152
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
153
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
154
+ { name: "summary_embedding", sql: "FLOAT4[]" },
155
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
156
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
157
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
158
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
159
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
160
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
161
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
162
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
163
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
164
+ ]);
165
+ var SESSIONS_COLUMNS = Object.freeze([
166
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
167
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
168
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
169
+ { name: "message", sql: "JSONB" },
170
+ { name: "message_embedding", sql: "FLOAT4[]" },
171
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
172
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
173
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
174
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
175
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
176
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
177
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
178
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
179
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
180
+ ]);
181
+ var SKILLS_COLUMNS = Object.freeze([
182
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
183
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
184
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
185
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
186
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
187
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
188
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
189
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
190
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
191
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
192
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
193
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
194
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
195
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
196
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
197
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
198
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
199
+ ]);
200
+ function validateSchema(label, cols) {
201
+ const seen = /* @__PURE__ */ new Set();
202
+ for (const col of cols) {
203
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
204
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
205
+ }
206
+ if (seen.has(col.name)) {
207
+ throw new Error(`${label}: duplicate column "${col.name}"`);
208
+ }
209
+ seen.add(col.name);
210
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
211
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
212
+ if (notNull && !hasDefault) {
213
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
214
+ }
215
+ }
216
+ }
217
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
218
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
219
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
220
+ function buildCreateTableSql(tableName, cols) {
221
+ const safe = sqlIdent(tableName);
222
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
223
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
224
+ }
225
+ function buildIntrospectionSql(tableName, workspaceId) {
226
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
227
+ }
228
+ async function healMissingColumns(args) {
229
+ const safeTable = sqlIdent(args.tableName);
230
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
231
+ const rows = await args.query(introspectSql);
232
+ const existing = /* @__PURE__ */ new Set();
233
+ for (const row of rows) {
234
+ const v = row?.column_name;
235
+ if (typeof v === "string")
236
+ existing.add(v.toLowerCase());
237
+ }
238
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
239
+ const missing = missingCols.map((c) => c.name);
240
+ if (missingCols.length === 0)
241
+ return { missing, altered: [] };
242
+ const altered = [];
243
+ for (const col of missingCols) {
244
+ try {
245
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
246
+ altered.push(col.name);
247
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
248
+ } catch (e) {
249
+ const msg = e instanceof Error ? e.message : String(e);
250
+ if (!/already exists/i.test(msg))
251
+ throw e;
252
+ const recheck = await args.query(introspectSql);
253
+ const present = recheck.some((r) => {
254
+ const v = r?.column_name;
255
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
256
+ });
257
+ if (!present)
258
+ throw e;
259
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
260
+ }
261
+ }
262
+ return { missing, altered };
263
+ }
264
+
149
265
  // dist/src/notifications/queue.js
150
266
  import { readFileSync as readFileSync2, writeFileSync, renameSync, mkdirSync, openSync, closeSync, unlinkSync, statSync } from "node:fs";
151
267
  import { join as join3, resolve } from "node:path";
@@ -528,64 +644,33 @@ var DeeplakeApi = class {
528
644
  }
529
645
  }
530
646
  /**
531
- * Ensure a vector column exists on the given table.
532
- *
533
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
534
- * EXISTS …` on every SessionStart. On a long-running workspace that's
535
- * already migrated, every call returns 500 "Column already exists" — noisy
536
- * in the log and a wasted round-trip. Worse, the very first call after the
537
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
538
- * window (~30s) during which subsequent INSERTs fail; minimising the
539
- * number of ALTER calls minimises exposure to that window.
647
+ * Heal any missing columns on a table so it matches one of the schema
648
+ * definitions in `deeplake-schema.ts`. One SELECT against
649
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
650
+ * only the genuinely missing ones never blanket, never `IF NOT
651
+ * EXISTS`.
540
652
  *
541
- * New flow:
542
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
543
- * return zero network calls.
544
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
545
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
546
- * bug. If the column is present mark + return.
547
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
548
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
549
- * "already exists" (race: another client added it between our SELECT
550
- * and ALTER).
551
- *
552
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
553
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
554
- */
555
- async ensureEmbeddingColumn(table, column) {
556
- await this.ensureColumn(table, column, "FLOAT4[]");
557
- }
558
- /**
559
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
560
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
561
- * column that was added to the schema after the table was originally
562
- * created. Used today for `summary_embedding`, `message_embedding`, and
563
- * the `agent` column (added 2026-04-11) — the latter has no fallback if
564
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
565
- * with `column "agent" does not exist`.
653
+ * History: an earlier path used a local marker file (`col_<name>` under
654
+ * the index-marker dir) to skip even the SELECT after the first
655
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
656
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
657
+ * because Deeplake used to expose a ~30s post-ALTER bug where
658
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
659
+ * minimum. The bug was re-verified on 2026-05-18 against
660
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
661
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
662
+ * + targeted ALTER pattern survives the marker removal because: each
663
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
664
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
566
665
  */
567
- async ensureColumn(table, column, sqlType) {
568
- const markers = await getIndexMarkerStore();
569
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
570
- if (markers.hasFreshIndexMarker(markerPath))
571
- return;
572
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
573
- const rows = await this.query(colCheck);
574
- if (rows.length > 0) {
575
- markers.writeIndexMarker(markerPath);
576
- return;
577
- }
578
- try {
579
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
580
- } catch (e) {
581
- const msg = e instanceof Error ? e.message : String(e);
582
- if (!/already exists/i.test(msg))
583
- throw e;
584
- const recheck = await this.query(colCheck);
585
- if (recheck.length === 0)
586
- throw e;
587
- }
588
- markers.writeIndexMarker(markerPath);
666
+ async healSchema(table, columns) {
667
+ await healMissingColumns({
668
+ query: (sql) => this.query(sql),
669
+ tableName: table,
670
+ workspaceId: this.workspaceId,
671
+ columns,
672
+ log: log3
673
+ });
589
674
  }
590
675
  /** List all tables in the workspace (with retry). */
591
676
  async listTables(forceRefresh = false) {
@@ -656,20 +741,21 @@ var DeeplakeApi = class {
656
741
  }
657
742
  throw lastErr;
658
743
  }
659
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
744
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
660
745
  async ensureTable(name) {
746
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
747
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
748
+ }
661
749
  const tbl = sqlIdent(name ?? this.tableName);
662
750
  const tables = await this.listTables();
663
751
  if (!tables.includes(tbl)) {
664
752
  log3(`table "${tbl}" not found, creating`);
665
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
753
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
666
754
  log3(`table "${tbl}" created`);
667
755
  if (!tables.includes(tbl))
668
756
  this._tablesCache = [...tables, tbl];
669
757
  }
670
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
671
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
672
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
758
+ await this.healSchema(tbl, MEMORY_COLUMNS);
673
759
  }
674
760
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
675
761
  async ensureSessionsTable(name) {
@@ -677,14 +763,12 @@ var DeeplakeApi = class {
677
763
  const tables = await this.listTables();
678
764
  if (!tables.includes(safe)) {
679
765
  log3(`table "${safe}" not found, creating`);
680
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
766
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
681
767
  log3(`table "${safe}" created`);
682
768
  if (!tables.includes(safe))
683
769
  this._tablesCache = [...tables, safe];
684
770
  }
685
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
686
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
687
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
771
+ await this.healSchema(safe, SESSIONS_COLUMNS);
688
772
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
689
773
  }
690
774
  /**
@@ -702,11 +786,12 @@ var DeeplakeApi = class {
702
786
  const tables = await this.listTables();
703
787
  if (!tables.includes(safe)) {
704
788
  log3(`table "${safe}" not found, creating`);
705
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
789
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
706
790
  log3(`table "${safe}" created`);
707
791
  if (!tables.includes(safe))
708
792
  this._tablesCache = [...tables, safe];
709
793
  }
794
+ await this.healSchema(safe, SKILLS_COLUMNS);
710
795
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
711
796
  }
712
797
  };