@martian-engineering/lossless-claw 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,10 @@ import type { DatabaseSync } from "node:sqlite";
2
2
  import { getLcmDbFeatures } from "./features.js";
3
3
  import { parseUtcTimestampOrNull } from "../store/parse-utc-timestamp.js";
4
4
 
5
+ type MigrationLogger = {
6
+ info?: (message: string) => void;
7
+ };
8
+
5
9
  type SummaryColumnInfo = {
6
10
  name?: string;
7
11
  };
@@ -27,6 +31,18 @@ type SummaryParentEdgeRow = {
27
31
  parent_summary_id: string;
28
32
  };
29
33
 
34
+ type TableNameRow = {
35
+ name?: string;
36
+ };
37
+
38
+ type FtsTableSpec = {
39
+ tableName: string;
40
+ createSql: string;
41
+ seedSql: string;
42
+ expectedColumns: string[];
43
+ staleSchemaPatterns?: string[];
44
+ };
45
+
30
46
  function ensureSummaryDepthColumn(db: DatabaseSync): void {
31
47
  const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
32
48
  const hasDepth = summaryColumns.some((col) => col.name === "depth");
@@ -107,6 +123,29 @@ function ensureCompactionTelemetryColumns(db: DatabaseSync): void {
107
123
  }
108
124
  }
109
125
 
126
+ function describeMigrationError(error: unknown): string {
127
+ return error instanceof Error ? error.message : String(error);
128
+ }
129
+
130
+ function runMigrationStep(
131
+ name: string,
132
+ log: MigrationLogger | undefined,
133
+ step: () => void,
134
+ ): void {
135
+ const startedAt = Date.now();
136
+ try {
137
+ step();
138
+ log?.info?.(
139
+ `[lcm] migration step complete: step=${name} durationMs=${Date.now() - startedAt}`,
140
+ );
141
+ } catch (error) {
142
+ log?.info?.(
143
+ `[lcm] migration step failed: step=${name} durationMs=${Date.now() - startedAt} error=${describeMigrationError(error)}`,
144
+ );
145
+ throw error;
146
+ }
147
+ }
148
+
110
149
  function backfillSummaryDepths(db: DatabaseSync): void {
111
150
  // Leaves are always depth 0, even if legacy rows had malformed values.
112
151
  db.exec(`UPDATE summaries SET depth = 0 WHERE kind = 'leaf'`);
@@ -444,10 +483,89 @@ function backfillToolCallColumns(db: DatabaseSync): void {
444
483
  );
445
484
  }
446
485
 
486
+ function getExistingTableNames(db: DatabaseSync, names: string[]): Set<string> {
487
+ if (names.length === 0) {
488
+ return new Set();
489
+ }
490
+ const placeholders = names.map(() => "?").join(", ");
491
+ const rows = db
492
+ .prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name IN (${placeholders})`)
493
+ .all(...names) as TableNameRow[];
494
+ return new Set(
495
+ rows
496
+ .map((row) => row.name)
497
+ .filter((name): name is string => typeof name === "string" && name.length > 0),
498
+ );
499
+ }
500
+
501
+ function getFtsShadowTableNames(tableName: string): string[] {
502
+ return [
503
+ `${tableName}_data`,
504
+ `${tableName}_idx`,
505
+ `${tableName}_content`,
506
+ `${tableName}_docsize`,
507
+ `${tableName}_config`,
508
+ ];
509
+ }
510
+
511
+ function quoteSqlIdentifier(identifier: string): string {
512
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(identifier)) {
513
+ throw new Error(`Invalid SQL identifier: ${identifier}`);
514
+ }
515
+ return `"${identifier.replaceAll(`"`, `""`)}"`;
516
+ }
517
+
518
+ function shouldRecreateStandaloneFtsTable(db: DatabaseSync, spec: FtsTableSpec): boolean {
519
+ const shadowTables = getFtsShadowTableNames(spec.tableName);
520
+ const existingTables = getExistingTableNames(db, [spec.tableName, ...shadowTables]);
521
+ if (!existingTables.has(spec.tableName)) {
522
+ return true;
523
+ }
524
+ if (shadowTables.some((name) => !existingTables.has(name))) {
525
+ return true;
526
+ }
527
+
528
+ try {
529
+ const info = db
530
+ .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name = ?")
531
+ .get(spec.tableName) as { sql?: string } | undefined;
532
+ const sql = info?.sql ?? "";
533
+ if (spec.staleSchemaPatterns?.some((pattern) => sql.includes(pattern))) {
534
+ return true;
535
+ }
536
+
537
+ const columns = db
538
+ .prepare(`PRAGMA table_info(${quoteSqlIdentifier(spec.tableName)})`)
539
+ .all() as SummaryColumnInfo[];
540
+ const columnNames = new Set(
541
+ columns
542
+ .map((col) => col.name)
543
+ .filter((name): name is string => typeof name === "string" && name.length > 0),
544
+ );
545
+ return spec.expectedColumns.some((column) => !columnNames.has(column));
546
+ } catch {
547
+ return true;
548
+ }
549
+ }
550
+
551
+ function ensureStandaloneFtsTable(db: DatabaseSync, spec: FtsTableSpec): void {
552
+ if (!shouldRecreateStandaloneFtsTable(db, spec)) {
553
+ return;
554
+ }
555
+
556
+ db.exec(`DROP TABLE IF EXISTS ${quoteSqlIdentifier(spec.tableName)}`);
557
+ for (const shadowTableName of getFtsShadowTableNames(spec.tableName)) {
558
+ db.exec(`DROP TABLE IF EXISTS ${quoteSqlIdentifier(shadowTableName)}`);
559
+ }
560
+ db.exec(spec.createSql);
561
+ db.exec(spec.seedSql);
562
+ }
563
+
447
564
  export function runLcmMigrations(
448
565
  db: DatabaseSync,
449
- options?: { fts5Available?: boolean },
566
+ options?: { fts5Available?: boolean; log?: MigrationLogger },
450
567
  ): void {
568
+ const log = options?.log;
451
569
  db.exec(`
452
570
  CREATE TABLE IF NOT EXISTS conversations (
453
571
  conversation_id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -645,79 +763,80 @@ export function runLcmMigrations(
645
763
  ON conversations (session_key, active, created_at)
646
764
  `);
647
765
  db.exec(`DROP INDEX IF EXISTS conversations_session_key_idx`);
648
- ensureSummaryDepthColumn(db);
649
- ensureSummaryMetadataColumns(db);
650
- ensureSummaryModelColumn(db);
651
- ensureCompactionTelemetryColumns(db);
652
- backfillSummaryDepths(db);
766
+ runMigrationStep("ensureSummaryDepthColumn", log, () => ensureSummaryDepthColumn(db));
767
+ runMigrationStep("ensureSummaryMetadataColumns", log, () =>
768
+ ensureSummaryMetadataColumns(db),
769
+ );
770
+ runMigrationStep("ensureSummaryModelColumn", log, () => ensureSummaryModelColumn(db));
771
+ runMigrationStep("ensureCompactionTelemetryColumns", log, () =>
772
+ ensureCompactionTelemetryColumns(db),
773
+ );
774
+ runMigrationStep("backfillSummaryDepths", log, () => backfillSummaryDepths(db));
653
775
  // Index on depth — created AFTER backfillSummaryDepths to avoid index
654
776
  // maintenance overhead during bulk depth updates on large existing DBs.
655
- db.exec(`CREATE INDEX IF NOT EXISTS summaries_conv_depth_kind_idx ON summaries (conversation_id, depth, kind)`);
656
- backfillSummaryMetadata(db);
657
- backfillToolCallColumns(db);
777
+ runMigrationStep("createSummariesDepthIndex", log, () =>
778
+ db.exec(
779
+ `CREATE INDEX IF NOT EXISTS summaries_conv_depth_kind_idx ON summaries (conversation_id, depth, kind)`,
780
+ ),
781
+ );
782
+ runMigrationStep("backfillSummaryMetadata", log, () => backfillSummaryMetadata(db));
783
+ runMigrationStep("backfillToolCallColumns", log, () => backfillToolCallColumns(db));
658
784
 
659
- const fts5Available = options?.fts5Available ?? getLcmDbFeatures(db).fts5Available;
785
+ const detectedFeatures = options?.fts5Available === false ? null : getLcmDbFeatures(db);
786
+ const fts5Available = options?.fts5Available ?? detectedFeatures?.fts5Available ?? false;
660
787
  if (!fts5Available) {
661
788
  return;
662
789
  }
663
790
 
791
+ const trigramTokenizerAvailable = detectedFeatures?.trigramTokenizerAvailable ?? false;
792
+ if (!trigramTokenizerAvailable) {
793
+ try {
794
+ db.exec(`DROP TABLE IF EXISTS summaries_fts_cjk`);
795
+ } catch {
796
+ // Best effort only. A stale virtual table should not block core migration.
797
+ }
798
+ }
799
+
664
800
  // FTS5 virtual tables for full-text search (cannot use IF NOT EXISTS, so check manually)
665
- const hasFts = db
666
- .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='messages_fts'")
667
- .get();
668
-
669
- if (hasFts) {
670
- // Check for stale schema: external-content FTS tables with content_rowid cause errors.
671
- // Drop and recreate as standalone FTS if the old schema is detected.
672
- const ftsSchema = (
673
- db
674
- .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='messages_fts'")
675
- .get() as { sql: string } | undefined
676
- )?.sql;
677
- if (ftsSchema && ftsSchema.includes("content_rowid")) {
678
- db.exec("DROP TABLE messages_fts");
679
- db.exec(`
801
+ runMigrationStep("ensureMessagesFts", log, () => {
802
+ ensureStandaloneFtsTable(db, {
803
+ tableName: "messages_fts",
804
+ createSql: `
680
805
  CREATE VIRTUAL TABLE messages_fts USING fts5(
681
806
  content,
682
807
  tokenize='porter unicode61'
683
- );
684
- INSERT INTO messages_fts(rowid, content) SELECT message_id, content FROM messages;
685
- `);
686
- }
687
- } else {
688
- db.exec(`
689
- CREATE VIRTUAL TABLE messages_fts USING fts5(
690
- content,
691
- tokenize='porter unicode61'
692
- );
693
- `);
694
- }
695
-
696
- const summariesFtsInfo = db
697
- .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='summaries_fts'")
698
- .get() as { sql?: string } | undefined;
699
- const summariesFtsSql = summariesFtsInfo?.sql ?? "";
700
- const summariesFtsColumns = db.prepare(`PRAGMA table_info(summaries_fts)`).all() as Array<{
701
- name?: string;
702
- }>;
703
- const hasSummaryIdColumn = summariesFtsColumns.some((col) => col.name === "summary_id");
704
- const shouldRecreateSummariesFts =
705
- !summariesFtsInfo ||
706
- !hasSummaryIdColumn ||
707
- summariesFtsSql.includes("content_rowid='summary_id'") ||
708
- summariesFtsSql.includes('content_rowid="summary_id"');
709
- if (shouldRecreateSummariesFts) {
710
- db.exec(`
711
- DROP TABLE IF EXISTS summaries_fts;
712
- CREATE VIRTUAL TABLE summaries_fts USING fts5(
713
- summary_id UNINDEXED,
714
- content,
715
- tokenize='porter unicode61'
716
- );
717
- INSERT INTO summaries_fts(summary_id, content)
718
- SELECT summary_id, content FROM summaries;
719
- `);
720
- }
808
+ )
809
+ `,
810
+ seedSql: `
811
+ INSERT INTO messages_fts(rowid, content)
812
+ SELECT message_id, content FROM messages
813
+ `,
814
+ expectedColumns: ["content"],
815
+ staleSchemaPatterns: ["content_rowid"],
816
+ });
817
+ });
818
+
819
+ runMigrationStep("ensureSummariesFts", log, () => {
820
+ ensureStandaloneFtsTable(db, {
821
+ tableName: "summaries_fts",
822
+ createSql: `
823
+ CREATE VIRTUAL TABLE summaries_fts USING fts5(
824
+ summary_id UNINDEXED,
825
+ content,
826
+ tokenize='porter unicode61'
827
+ )
828
+ `,
829
+ seedSql: `
830
+ INSERT INTO summaries_fts(summary_id, content)
831
+ SELECT summary_id, content FROM summaries
832
+ `,
833
+ expectedColumns: ["summary_id", "content"],
834
+ staleSchemaPatterns: [
835
+ "content_rowid='summary_id'",
836
+ 'content_rowid="summary_id"',
837
+ ],
838
+ });
839
+ });
721
840
 
722
841
  // ── CJK trigram FTS table ────────────────────────────────────────────────
723
842
  // FTS5 unicode61 (porter) tokenizer cannot segment CJK ideographs, so CJK
@@ -727,20 +846,23 @@ export function runLcmMigrations(
727
846
  //
728
847
  // A trigram-tokenized table indexes every 3-character substring, enabling
729
848
  // native CJK substring matching via FTS5 MATCH with OR semantics.
730
- const cjkTableExists = db
731
- .prepare(
732
- "SELECT 1 FROM sqlite_master WHERE type='table' AND name='summaries_fts_cjk'",
733
- )
734
- .get();
735
- if (!cjkTableExists) {
736
- db.exec(`
737
- CREATE VIRTUAL TABLE summaries_fts_cjk USING fts5(
738
- summary_id UNINDEXED,
739
- content,
740
- tokenize='trigram'
741
- );
742
- INSERT INTO summaries_fts_cjk(summary_id, content)
743
- SELECT summary_id, content FROM summaries;
744
- `);
745
- }
849
+ runMigrationStep("ensureSummariesFtsCjk", log, () => {
850
+ if (trigramTokenizerAvailable) {
851
+ ensureStandaloneFtsTable(db, {
852
+ tableName: "summaries_fts_cjk",
853
+ createSql: `
854
+ CREATE VIRTUAL TABLE summaries_fts_cjk USING fts5(
855
+ summary_id UNINDEXED,
856
+ content,
857
+ tokenize='trigram'
858
+ )
859
+ `,
860
+ seedSql: `
861
+ INSERT INTO summaries_fts_cjk(summary_id, content)
862
+ SELECT summary_id, content FROM summaries
863
+ `,
864
+ expectedColumns: ["summary_id", "content"],
865
+ });
866
+ }
867
+ });
746
868
  }