@deeplake/hivemind 0.7.35 → 0.7.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -316,6 +316,9 @@ ${s.body}
316
316
  import { randomUUID } from "node:crypto";
317
317
 
318
318
  // dist/src/utils/sql.js
319
+ function sqlStr(value) {
320
+ return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
321
+ }
319
322
  function sqlIdent(name) {
320
323
  if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) {
321
324
  throw new Error(`Invalid SQL identifier: ${JSON.stringify(name)}`);
@@ -323,29 +326,142 @@ function sqlIdent(name) {
323
326
  return name;
324
327
  }
325
328
 
326
- // dist/src/skillify/skills-table.js
327
- function createSkillsTableSql(tableName) {
328
- const safe = sqlIdent(tableName);
329
- return `CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', contributors TEXT NOT NULL DEFAULT '[]', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`;
329
+ // dist/src/deeplake-schema.js
330
+ var MEMORY_COLUMNS = Object.freeze([
331
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
332
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
333
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
334
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
335
+ { name: "summary_embedding", sql: "FLOAT4[]" },
336
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
337
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
338
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
339
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
340
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
341
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
342
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
343
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
344
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
345
+ ]);
346
+ var SESSIONS_COLUMNS = Object.freeze([
347
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
348
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
349
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
350
+ { name: "message", sql: "JSONB" },
351
+ { name: "message_embedding", sql: "FLOAT4[]" },
352
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
353
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
354
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
355
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
356
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
357
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
358
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
359
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
360
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
361
+ ]);
362
+ var SKILLS_COLUMNS = Object.freeze([
363
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
364
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
365
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
366
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
367
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
368
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
369
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
370
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
371
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
372
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
373
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
374
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
375
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
376
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
377
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
378
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
379
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
380
+ ]);
381
+ function validateSchema(label, cols) {
382
+ const seen = /* @__PURE__ */ new Set();
383
+ for (const col of cols) {
384
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
385
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
386
+ }
387
+ if (seen.has(col.name)) {
388
+ throw new Error(`${label}: duplicate column "${col.name}"`);
389
+ }
390
+ seen.add(col.name);
391
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
392
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
393
+ if (notNull && !hasDefault) {
394
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
395
+ }
396
+ }
330
397
  }
331
- function addContributorsColumnSql(tableName) {
398
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
399
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
400
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
401
+ function buildCreateTableSql(tableName, cols) {
332
402
  const safe = sqlIdent(tableName);
333
- return `ALTER TABLE "${safe}" ADD COLUMN IF NOT EXISTS contributors TEXT NOT NULL DEFAULT '[]'`;
403
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
404
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
334
405
  }
335
- function esc(s) {
336
- return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
406
+ function buildIntrospectionSql(tableName, workspaceId) {
407
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
408
+ }
409
+ async function healMissingColumns(args) {
410
+ const safeTable = sqlIdent(args.tableName);
411
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
412
+ const rows = await args.query(introspectSql);
413
+ const existing = /* @__PURE__ */ new Set();
414
+ for (const row of rows) {
415
+ const v = row?.column_name;
416
+ if (typeof v === "string")
417
+ existing.add(v.toLowerCase());
418
+ }
419
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
420
+ const missing = missingCols.map((c) => c.name);
421
+ if (missingCols.length === 0)
422
+ return { missing, altered: [] };
423
+ const altered = [];
424
+ for (const col of missingCols) {
425
+ try {
426
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
427
+ altered.push(col.name);
428
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
429
+ } catch (e) {
430
+ const msg = e instanceof Error ? e.message : String(e);
431
+ if (!/already exists/i.test(msg))
432
+ throw e;
433
+ const recheck = await args.query(introspectSql);
434
+ const present = recheck.some((r) => {
435
+ const v = r?.column_name;
436
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
437
+ });
438
+ if (!present)
439
+ throw e;
440
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
441
+ }
442
+ }
443
+ return { missing, altered };
337
444
  }
338
445
  function isMissingTableError(message) {
339
446
  if (!message)
340
447
  return false;
448
+ if (/permission denied|must be owner/i.test(message))
449
+ return false;
341
450
  if (/\bcolumn\b/i.test(message))
342
451
  return false;
343
452
  return /Table does not exist|relation .* does not exist|no such table/i.test(message);
344
453
  }
345
- function isMissingContributorsColumnError(message) {
454
+ function isMissingColumnError(message) {
346
455
  if (!message)
347
456
  return false;
348
- return /contributors.*(?:does not exist|not found|unknown)/i.test(message) || /(?:does not exist|unknown column).*contributors/i.test(message);
457
+ if (/permission denied|must be owner/i.test(message))
458
+ return false;
459
+ return /column ["']?[A-Za-z_][A-Za-z0-9_]*["']? .*does not exist/i.test(message) || /unknown column/i.test(message) || /no such column/i.test(message);
460
+ }
461
+
462
+ // dist/src/skillify/skills-table.js
463
+ function esc(s) {
464
+ return s.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, "");
349
465
  }
350
466
  async function insertSkillRow(args) {
351
467
  const id = args.id ?? randomUUID();
@@ -354,14 +470,29 @@ async function insertSkillRow(args) {
354
470
  const sql = `INSERT INTO "${sqlIdent(args.tableName)}" (id, name, project, project_key, local_path, install, source_sessions, source_agent, scope, author, contributors, description, trigger_text, body, version, created_at, updated_at) VALUES ('${esc(id)}', '${esc(args.name)}', '${esc(args.project)}', '${esc(args.projectKey)}', '${esc(args.localPath)}', '${esc(args.install)}', '${esc(sourceSessionsJson)}', '${esc(args.sourceAgent)}', '${esc(args.scope)}', '${esc(args.author)}', '${esc(contributorsJson)}', '${esc(args.description)}', '${esc(args.trigger ?? "")}', '${esc(args.body)}', ${args.version}, '${esc(args.createdAt)}', '${esc(args.updatedAt)}')`;
355
471
  try {
356
472
  await args.query(sql);
473
+ return;
357
474
  } catch (e) {
358
- if (isMissingTableError(e?.message)) {
359
- await args.query(createSkillsTableSql(args.tableName));
475
+ const msg = e instanceof Error ? e.message : String(e);
476
+ if (isMissingTableError(msg)) {
477
+ await args.query(buildCreateTableSql(args.tableName, SKILLS_COLUMNS));
478
+ await healMissingColumns({
479
+ query: args.query,
480
+ tableName: args.tableName,
481
+ workspaceId: args.workspaceId,
482
+ columns: SKILLS_COLUMNS
483
+ });
360
484
  await args.query(sql);
361
485
  return;
362
486
  }
363
- if (isMissingContributorsColumnError(e?.message)) {
364
- await args.query(addContributorsColumnSql(args.tableName));
487
+ if (isMissingColumnError(msg)) {
488
+ const result = await healMissingColumns({
489
+ query: args.query,
490
+ tableName: args.tableName,
491
+ workspaceId: args.workspaceId,
492
+ columns: SKILLS_COLUMNS
493
+ });
494
+ if (result.missing.length === 0)
495
+ throw e;
365
496
  await args.query(sql);
366
497
  return;
367
498
  }
@@ -1001,6 +1132,7 @@ async function main() {
1001
1132
  await insertSkillRow({
1002
1133
  query,
1003
1134
  tableName: cfg.skillsTable,
1135
+ workspaceId: cfg.workspaceId,
1004
1136
  name: verdict2.name,
1005
1137
  project: cfg.project,
1006
1138
  projectKey: cfg.projectKey,
@@ -4262,7 +4262,7 @@ var require_core = __commonJS({
4262
4262
  }
4263
4263
  }
4264
4264
  }
4265
- _addSchema(schema, meta3, baseId, validateSchema = this.opts.validateSchema, addSchema = this.opts.addUsedSchema) {
4265
+ _addSchema(schema, meta3, baseId, validateSchema2 = this.opts.validateSchema, addSchema = this.opts.addUsedSchema) {
4266
4266
  let id;
4267
4267
  const { schemaId } = this.opts;
4268
4268
  if (typeof schema == "object") {
@@ -4285,7 +4285,7 @@ var require_core = __commonJS({
4285
4285
  this._checkUnique(baseId);
4286
4286
  this.refs[baseId] = sch;
4287
4287
  }
4288
- if (validateSchema)
4288
+ if (validateSchema2)
4289
4289
  this.validateSchema(schema, true);
4290
4290
  return sch;
4291
4291
  }
@@ -23362,7 +23362,123 @@ function sqlIdent(name) {
23362
23362
 
23363
23363
  // dist/src/embeddings/columns.js
23364
23364
  var SUMMARY_EMBEDDING_COL = "summary_embedding";
23365
- var MESSAGE_EMBEDDING_COL = "message_embedding";
23365
+
23366
+ // dist/src/deeplake-schema.js
23367
+ var MEMORY_COLUMNS = Object.freeze([
23368
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
23369
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
23370
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
23371
+ { name: "summary", sql: "TEXT NOT NULL DEFAULT ''" },
23372
+ { name: "summary_embedding", sql: "FLOAT4[]" },
23373
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
23374
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'text/plain'" },
23375
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
23376
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
23377
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
23378
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
23379
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
23380
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
23381
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
23382
+ ]);
23383
+ var SESSIONS_COLUMNS = Object.freeze([
23384
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
23385
+ { name: "path", sql: "TEXT NOT NULL DEFAULT ''" },
23386
+ { name: "filename", sql: "TEXT NOT NULL DEFAULT ''" },
23387
+ { name: "message", sql: "JSONB" },
23388
+ { name: "message_embedding", sql: "FLOAT4[]" },
23389
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
23390
+ { name: "mime_type", sql: "TEXT NOT NULL DEFAULT 'application/json'" },
23391
+ { name: "size_bytes", sql: "BIGINT NOT NULL DEFAULT 0" },
23392
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
23393
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
23394
+ { name: "agent", sql: "TEXT NOT NULL DEFAULT ''" },
23395
+ { name: "plugin_version", sql: "TEXT NOT NULL DEFAULT ''" },
23396
+ { name: "creation_date", sql: "TEXT NOT NULL DEFAULT ''" },
23397
+ { name: "last_update_date", sql: "TEXT NOT NULL DEFAULT ''" }
23398
+ ]);
23399
+ var SKILLS_COLUMNS = Object.freeze([
23400
+ { name: "id", sql: "TEXT NOT NULL DEFAULT ''" },
23401
+ { name: "name", sql: "TEXT NOT NULL DEFAULT ''" },
23402
+ { name: "project", sql: "TEXT NOT NULL DEFAULT ''" },
23403
+ { name: "project_key", sql: "TEXT NOT NULL DEFAULT ''" },
23404
+ { name: "local_path", sql: "TEXT NOT NULL DEFAULT ''" },
23405
+ { name: "install", sql: "TEXT NOT NULL DEFAULT 'project'" },
23406
+ { name: "source_sessions", sql: "TEXT NOT NULL DEFAULT '[]'" },
23407
+ { name: "source_agent", sql: "TEXT NOT NULL DEFAULT ''" },
23408
+ { name: "scope", sql: "TEXT NOT NULL DEFAULT 'me'" },
23409
+ { name: "author", sql: "TEXT NOT NULL DEFAULT ''" },
23410
+ { name: "contributors", sql: "TEXT NOT NULL DEFAULT '[]'" },
23411
+ { name: "description", sql: "TEXT NOT NULL DEFAULT ''" },
23412
+ { name: "trigger_text", sql: "TEXT NOT NULL DEFAULT ''" },
23413
+ { name: "body", sql: "TEXT NOT NULL DEFAULT ''" },
23414
+ { name: "version", sql: "BIGINT NOT NULL DEFAULT 1" },
23415
+ { name: "created_at", sql: "TEXT NOT NULL DEFAULT ''" },
23416
+ { name: "updated_at", sql: "TEXT NOT NULL DEFAULT ''" }
23417
+ ]);
23418
+ function validateSchema(label, cols) {
23419
+ const seen = /* @__PURE__ */ new Set();
23420
+ for (const col of cols) {
23421
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(col.name)) {
23422
+ throw new Error(`${label}: column name "${col.name}" is not a valid SQL identifier`);
23423
+ }
23424
+ if (seen.has(col.name)) {
23425
+ throw new Error(`${label}: duplicate column "${col.name}"`);
23426
+ }
23427
+ seen.add(col.name);
23428
+ const notNull = /\bNOT\s+NULL\b/i.test(col.sql);
23429
+ const hasDefault = /\bDEFAULT\b/i.test(col.sql);
23430
+ if (notNull && !hasDefault) {
23431
+ throw new Error(`${label}: column "${col.name}" is NOT NULL but has no DEFAULT \u2014 ALTER TABLE ADD COLUMN on a populated table would fail.`);
23432
+ }
23433
+ }
23434
+ }
23435
+ validateSchema("MEMORY_COLUMNS", MEMORY_COLUMNS);
23436
+ validateSchema("SESSIONS_COLUMNS", SESSIONS_COLUMNS);
23437
+ validateSchema("SKILLS_COLUMNS", SKILLS_COLUMNS);
23438
+ function buildCreateTableSql(tableName, cols) {
23439
+ const safe = sqlIdent(tableName);
23440
+ const colSql = cols.map((c) => `${c.name} ${c.sql}`).join(", ");
23441
+ return `CREATE TABLE IF NOT EXISTS "${safe}" (${colSql}) USING deeplake`;
23442
+ }
23443
+ function buildIntrospectionSql(tableName, workspaceId) {
23444
+ return `SELECT column_name FROM information_schema.columns WHERE table_name = '${sqlStr(tableName)}' AND table_schema = '${sqlStr(workspaceId)}'`;
23445
+ }
23446
+ async function healMissingColumns(args) {
23447
+ const safeTable = sqlIdent(args.tableName);
23448
+ const introspectSql = buildIntrospectionSql(args.tableName, args.workspaceId);
23449
+ const rows = await args.query(introspectSql);
23450
+ const existing = /* @__PURE__ */ new Set();
23451
+ for (const row of rows) {
23452
+ const v = row?.column_name;
23453
+ if (typeof v === "string")
23454
+ existing.add(v.toLowerCase());
23455
+ }
23456
+ const missingCols = args.columns.filter((c) => !existing.has(c.name.toLowerCase()));
23457
+ const missing = missingCols.map((c) => c.name);
23458
+ if (missingCols.length === 0)
23459
+ return { missing, altered: [] };
23460
+ const altered = [];
23461
+ for (const col of missingCols) {
23462
+ try {
23463
+ await args.query(`ALTER TABLE "${safeTable}" ADD COLUMN ${col.name} ${col.sql}`);
23464
+ altered.push(col.name);
23465
+ args.log?.(`schema-heal: added "${args.tableName}"."${col.name}"`);
23466
+ } catch (e) {
23467
+ const msg = e instanceof Error ? e.message : String(e);
23468
+ if (!/already exists/i.test(msg))
23469
+ throw e;
23470
+ const recheck = await args.query(introspectSql);
23471
+ const present = recheck.some((r) => {
23472
+ const v = r?.column_name;
23473
+ return typeof v === "string" && v.toLowerCase() === col.name.toLowerCase();
23474
+ });
23475
+ if (!present)
23476
+ throw e;
23477
+ args.log?.(`schema-heal: "${args.tableName}"."${col.name}" appeared via race, treating as success`);
23478
+ }
23479
+ }
23480
+ return { missing, altered };
23481
+ }
23366
23482
 
23367
23483
  // dist/src/notifications/queue.js
23368
23484
  import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, renameSync, mkdirSync as mkdirSync2, openSync, closeSync, unlinkSync as unlinkSync2, statSync } from "node:fs";
@@ -23728,64 +23844,33 @@ var DeeplakeApi = class {
23728
23844
  }
23729
23845
  }
23730
23846
  /**
23731
- * Ensure a vector column exists on the given table.
23732
- *
23733
- * The previous implementation always issued `ALTER TABLE ADD COLUMN IF NOT
23734
- * EXISTS …` on every SessionStart. On a long-running workspace that's
23735
- * already migrated, every call returns 500 "Column already exists" — noisy
23736
- * in the log and a wasted round-trip. Worse, the very first call after the
23737
- * column is genuinely added triggers Deeplake's post-ALTER `vector::at`
23738
- * window (~30s) during which subsequent INSERTs fail; minimising the
23739
- * number of ALTER calls minimises exposure to that window.
23847
+ * Heal any missing columns on a table so it matches one of the schema
23848
+ * definitions in `deeplake-schema.ts`. One SELECT against
23849
+ * `information_schema.columns` per call, then `ALTER TABLE ADD COLUMN`
23850
+ * only the genuinely missing ones never blanket, never `IF NOT
23851
+ * EXISTS`.
23740
23852
  *
23741
- * New flow:
23742
- * 1. Check the local marker file (mirrors ensureLookupIndex). If fresh,
23743
- * return zero network calls.
23744
- * 2. SELECT 1 FROM information_schema.columns WHERE table_name = T AND
23745
- * column_name = C. Read-only, idempotent, can't tickle the post-ALTER
23746
- * bug. If the column is present mark + return.
23747
- * 3. Only if step 2 says the column is missing, fall back to ALTER ADD
23748
- * COLUMN IF NOT EXISTS. Mark on success, also mark if Deeplake reports
23749
- * "already exists" (race: another client added it between our SELECT
23750
- * and ALTER).
23751
- *
23752
- * Marker uses the same dir / TTL as ensureLookupIndex so both schema
23753
- * caches share an opt-out (HIVEMIND_INDEX_MARKER_DIR) and a TTL knob.
23754
- */
23755
- async ensureEmbeddingColumn(table, column) {
23756
- await this.ensureColumn(table, column, "FLOAT4[]");
23757
- }
23758
- /**
23759
- * Generic marker-gated column migration. Same SELECT-then-ALTER flow as
23760
- * ensureEmbeddingColumn, parameterized by SQL type so it can patch up any
23761
- * column that was added to the schema after the table was originally
23762
- * created. Used today for `summary_embedding`, `message_embedding`, and
23763
- * the `agent` column (added 2026-04-11) — the latter has no fallback if
23764
- * a user upgraded over a pre-2026-04-11 table, so every INSERT fails
23765
- * with `column "agent" does not exist`.
23853
+ * History: an earlier path used a local marker file (`col_<name>` under
23854
+ * the index-marker dir) to skip even the SELECT after the first
23855
+ * confirmation, plus per-column ALTERs for `summary_embedding`,
23856
+ * `message_embedding`, `agent`, `plugin_version`. The marker existed
23857
+ * because Deeplake used to expose a ~30s post-ALTER bug where
23858
+ * subsequent INSERTs failed, so we wanted to keep ALTER traffic to a
23859
+ * minimum. The bug was re-verified on 2026-05-18 against
23860
+ * `api.deeplake.ai` (`test_plugin` org) and no longer reproduces
23861
+ * (71/71 INSERTs OK, first success 2ms after ALTER). The single SELECT
23862
+ * + targeted ALTER pattern survives the marker removal because: each
23863
+ * ALTER still costs ~800ms (so blanket sweeps are wasteful) and the
23864
+ * diff produces clearer logs than "ALTER all with IF NOT EXISTS".
23766
23865
  */
23767
- async ensureColumn(table, column, sqlType) {
23768
- const markers = await getIndexMarkerStore();
23769
- const markerPath = markers.buildIndexMarkerPath(this.workspaceId, this.orgId, table, `col_${column}`);
23770
- if (markers.hasFreshIndexMarker(markerPath))
23771
- return;
23772
- const colCheck = `SELECT 1 FROM information_schema.columns WHERE table_name = '${sqlStr(table)}' AND column_name = '${sqlStr(column)}' AND table_schema = '${sqlStr(this.workspaceId)}' LIMIT 1`;
23773
- const rows = await this.query(colCheck);
23774
- if (rows.length > 0) {
23775
- markers.writeIndexMarker(markerPath);
23776
- return;
23777
- }
23778
- try {
23779
- await this.query(`ALTER TABLE "${table}" ADD COLUMN ${column} ${sqlType}`);
23780
- } catch (e) {
23781
- const msg = e instanceof Error ? e.message : String(e);
23782
- if (!/already exists/i.test(msg))
23783
- throw e;
23784
- const recheck = await this.query(colCheck);
23785
- if (recheck.length === 0)
23786
- throw e;
23787
- }
23788
- markers.writeIndexMarker(markerPath);
23866
+ async healSchema(table, columns) {
23867
+ await healMissingColumns({
23868
+ query: (sql) => this.query(sql),
23869
+ tableName: table,
23870
+ workspaceId: this.workspaceId,
23871
+ columns,
23872
+ log: log3
23873
+ });
23789
23874
  }
23790
23875
  /** List all tables in the workspace (with retry). */
23791
23876
  async listTables(forceRefresh = false) {
@@ -23856,20 +23941,21 @@ var DeeplakeApi = class {
23856
23941
  }
23857
23942
  throw lastErr;
23858
23943
  }
23859
- /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */
23944
+ /** Create the memory table if it doesn't already exist. Heal missing columns on existing tables. */
23860
23945
  async ensureTable(name) {
23946
+ if (!MEMORY_COLUMNS.some((c) => c.name === SUMMARY_EMBEDDING_COL)) {
23947
+ throw new Error(`MEMORY_COLUMNS missing "${SUMMARY_EMBEDDING_COL}" (embeddings/columns.ts drift)`);
23948
+ }
23861
23949
  const tbl = sqlIdent(name ?? this.tableName);
23862
23950
  const tables = await this.listTables();
23863
23951
  if (!tables.includes(tbl)) {
23864
23952
  log3(`table "${tbl}" not found, creating`);
23865
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', summary_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, tbl);
23953
+ await this.createTableWithRetry(buildCreateTableSql(tbl, MEMORY_COLUMNS), tbl);
23866
23954
  log3(`table "${tbl}" created`);
23867
23955
  if (!tables.includes(tbl))
23868
23956
  this._tablesCache = [...tables, tbl];
23869
23957
  }
23870
- await this.ensureEmbeddingColumn(tbl, SUMMARY_EMBEDDING_COL);
23871
- await this.ensureColumn(tbl, "agent", "TEXT NOT NULL DEFAULT ''");
23872
- await this.ensureColumn(tbl, "plugin_version", "TEXT NOT NULL DEFAULT ''");
23958
+ await this.healSchema(tbl, MEMORY_COLUMNS);
23873
23959
  }
23874
23960
  /** Create the sessions table (uses JSONB for message since every row is a JSON event). */
23875
23961
  async ensureSessionsTable(name) {
@@ -23877,14 +23963,12 @@ var DeeplakeApi = class {
23877
23963
  const tables = await this.listTables();
23878
23964
  if (!tables.includes(safe)) {
23879
23965
  log3(`table "${safe}" not found, creating`);
23880
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, message_embedding FLOAT4[], author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', plugin_version TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
23966
+ await this.createTableWithRetry(buildCreateTableSql(safe, SESSIONS_COLUMNS), safe);
23881
23967
  log3(`table "${safe}" created`);
23882
23968
  if (!tables.includes(safe))
23883
23969
  this._tablesCache = [...tables, safe];
23884
23970
  }
23885
- await this.ensureEmbeddingColumn(safe, MESSAGE_EMBEDDING_COL);
23886
- await this.ensureColumn(safe, "agent", "TEXT NOT NULL DEFAULT ''");
23887
- await this.ensureColumn(safe, "plugin_version", "TEXT NOT NULL DEFAULT ''");
23971
+ await this.healSchema(safe, SESSIONS_COLUMNS);
23888
23972
  await this.ensureLookupIndex(safe, "path_creation_date", `("path", "creation_date")`);
23889
23973
  }
23890
23974
  /**
@@ -23902,11 +23986,12 @@ var DeeplakeApi = class {
23902
23986
  const tables = await this.listTables();
23903
23987
  if (!tables.includes(safe)) {
23904
23988
  log3(`table "${safe}" not found, creating`);
23905
- await this.createTableWithRetry(`CREATE TABLE IF NOT EXISTS "${safe}" (id TEXT NOT NULL DEFAULT '', name TEXT NOT NULL DEFAULT '', project TEXT NOT NULL DEFAULT '', project_key TEXT NOT NULL DEFAULT '', local_path TEXT NOT NULL DEFAULT '', install TEXT NOT NULL DEFAULT 'project', source_sessions TEXT NOT NULL DEFAULT '[]', source_agent TEXT NOT NULL DEFAULT '', scope TEXT NOT NULL DEFAULT 'me', author TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', trigger_text TEXT NOT NULL DEFAULT '', body TEXT NOT NULL DEFAULT '', version BIGINT NOT NULL DEFAULT 1, created_at TEXT NOT NULL DEFAULT '', updated_at TEXT NOT NULL DEFAULT '') USING deeplake`, safe);
23989
+ await this.createTableWithRetry(buildCreateTableSql(safe, SKILLS_COLUMNS), safe);
23906
23990
  log3(`table "${safe}" created`);
23907
23991
  if (!tables.includes(safe))
23908
23992
  this._tablesCache = [...tables, safe];
23909
23993
  }
23994
+ await this.healSchema(safe, SKILLS_COLUMNS);
23910
23995
  await this.ensureLookupIndex(safe, "project_key_name", `("project_key", "name")`);
23911
23996
  }
23912
23997
  };