@martian-engineering/lossless-claw 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -3
- package/docs/agent-tools.md +9 -4
- package/docs/configuration.md +9 -0
- package/package.json +1 -1
- package/skills/lossless-claw/SKILL.md +3 -2
- package/skills/lossless-claw/references/architecture.md +12 -0
- package/skills/lossless-claw/references/diagnostics.md +13 -0
- package/src/assembler.ts +12 -4
- package/src/compaction.ts +12 -15
- package/src/db/connection.ts +15 -5
- package/src/db/features.ts +24 -5
- package/src/db/migration.ts +201 -79
- package/src/engine.ts +199 -19
- package/src/estimate-tokens.ts +80 -0
- package/src/plugin/index.ts +95 -18
- package/src/plugin/lcm-command.ts +278 -3
- package/src/plugin/lcm-doctor-apply.ts +1 -3
- package/src/plugin/lcm-doctor-cleaners.ts +655 -0
- package/src/retrieval.ts +1 -4
- package/src/summarize.ts +1 -4
- package/src/tools/lcm-expand-query-tool.ts +598 -194
- package/src/tools/lcm-grep-tool.ts +2 -2
package/src/db/migration.ts
CHANGED
|
@@ -2,6 +2,10 @@ import type { DatabaseSync } from "node:sqlite";
|
|
|
2
2
|
import { getLcmDbFeatures } from "./features.js";
|
|
3
3
|
import { parseUtcTimestampOrNull } from "../store/parse-utc-timestamp.js";
|
|
4
4
|
|
|
5
|
+
type MigrationLogger = {
|
|
6
|
+
info?: (message: string) => void;
|
|
7
|
+
};
|
|
8
|
+
|
|
5
9
|
type SummaryColumnInfo = {
|
|
6
10
|
name?: string;
|
|
7
11
|
};
|
|
@@ -27,6 +31,18 @@ type SummaryParentEdgeRow = {
|
|
|
27
31
|
parent_summary_id: string;
|
|
28
32
|
};
|
|
29
33
|
|
|
34
|
+
type TableNameRow = {
|
|
35
|
+
name?: string;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
type FtsTableSpec = {
|
|
39
|
+
tableName: string;
|
|
40
|
+
createSql: string;
|
|
41
|
+
seedSql: string;
|
|
42
|
+
expectedColumns: string[];
|
|
43
|
+
staleSchemaPatterns?: string[];
|
|
44
|
+
};
|
|
45
|
+
|
|
30
46
|
function ensureSummaryDepthColumn(db: DatabaseSync): void {
|
|
31
47
|
const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
|
|
32
48
|
const hasDepth = summaryColumns.some((col) => col.name === "depth");
|
|
@@ -107,6 +123,29 @@ function ensureCompactionTelemetryColumns(db: DatabaseSync): void {
|
|
|
107
123
|
}
|
|
108
124
|
}
|
|
109
125
|
|
|
126
|
+
function describeMigrationError(error: unknown): string {
|
|
127
|
+
return error instanceof Error ? error.message : String(error);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function runMigrationStep(
|
|
131
|
+
name: string,
|
|
132
|
+
log: MigrationLogger | undefined,
|
|
133
|
+
step: () => void,
|
|
134
|
+
): void {
|
|
135
|
+
const startedAt = Date.now();
|
|
136
|
+
try {
|
|
137
|
+
step();
|
|
138
|
+
log?.info?.(
|
|
139
|
+
`[lcm] migration step complete: step=${name} durationMs=${Date.now() - startedAt}`,
|
|
140
|
+
);
|
|
141
|
+
} catch (error) {
|
|
142
|
+
log?.info?.(
|
|
143
|
+
`[lcm] migration step failed: step=${name} durationMs=${Date.now() - startedAt} error=${describeMigrationError(error)}`,
|
|
144
|
+
);
|
|
145
|
+
throw error;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
110
149
|
function backfillSummaryDepths(db: DatabaseSync): void {
|
|
111
150
|
// Leaves are always depth 0, even if legacy rows had malformed values.
|
|
112
151
|
db.exec(`UPDATE summaries SET depth = 0 WHERE kind = 'leaf'`);
|
|
@@ -444,10 +483,89 @@ function backfillToolCallColumns(db: DatabaseSync): void {
|
|
|
444
483
|
);
|
|
445
484
|
}
|
|
446
485
|
|
|
486
|
+
function getExistingTableNames(db: DatabaseSync, names: string[]): Set<string> {
|
|
487
|
+
if (names.length === 0) {
|
|
488
|
+
return new Set();
|
|
489
|
+
}
|
|
490
|
+
const placeholders = names.map(() => "?").join(", ");
|
|
491
|
+
const rows = db
|
|
492
|
+
.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name IN (${placeholders})`)
|
|
493
|
+
.all(...names) as TableNameRow[];
|
|
494
|
+
return new Set(
|
|
495
|
+
rows
|
|
496
|
+
.map((row) => row.name)
|
|
497
|
+
.filter((name): name is string => typeof name === "string" && name.length > 0),
|
|
498
|
+
);
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
function getFtsShadowTableNames(tableName: string): string[] {
|
|
502
|
+
return [
|
|
503
|
+
`${tableName}_data`,
|
|
504
|
+
`${tableName}_idx`,
|
|
505
|
+
`${tableName}_content`,
|
|
506
|
+
`${tableName}_docsize`,
|
|
507
|
+
`${tableName}_config`,
|
|
508
|
+
];
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function quoteSqlIdentifier(identifier: string): string {
|
|
512
|
+
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(identifier)) {
|
|
513
|
+
throw new Error(`Invalid SQL identifier: ${identifier}`);
|
|
514
|
+
}
|
|
515
|
+
return `"${identifier.replaceAll(`"`, `""`)}"`;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function shouldRecreateStandaloneFtsTable(db: DatabaseSync, spec: FtsTableSpec): boolean {
|
|
519
|
+
const shadowTables = getFtsShadowTableNames(spec.tableName);
|
|
520
|
+
const existingTables = getExistingTableNames(db, [spec.tableName, ...shadowTables]);
|
|
521
|
+
if (!existingTables.has(spec.tableName)) {
|
|
522
|
+
return true;
|
|
523
|
+
}
|
|
524
|
+
if (shadowTables.some((name) => !existingTables.has(name))) {
|
|
525
|
+
return true;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
try {
|
|
529
|
+
const info = db
|
|
530
|
+
.prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name = ?")
|
|
531
|
+
.get(spec.tableName) as { sql?: string } | undefined;
|
|
532
|
+
const sql = info?.sql ?? "";
|
|
533
|
+
if (spec.staleSchemaPatterns?.some((pattern) => sql.includes(pattern))) {
|
|
534
|
+
return true;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
const columns = db
|
|
538
|
+
.prepare(`PRAGMA table_info(${quoteSqlIdentifier(spec.tableName)})`)
|
|
539
|
+
.all() as SummaryColumnInfo[];
|
|
540
|
+
const columnNames = new Set(
|
|
541
|
+
columns
|
|
542
|
+
.map((col) => col.name)
|
|
543
|
+
.filter((name): name is string => typeof name === "string" && name.length > 0),
|
|
544
|
+
);
|
|
545
|
+
return spec.expectedColumns.some((column) => !columnNames.has(column));
|
|
546
|
+
} catch {
|
|
547
|
+
return true;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
function ensureStandaloneFtsTable(db: DatabaseSync, spec: FtsTableSpec): void {
|
|
552
|
+
if (!shouldRecreateStandaloneFtsTable(db, spec)) {
|
|
553
|
+
return;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
db.exec(`DROP TABLE IF EXISTS ${quoteSqlIdentifier(spec.tableName)}`);
|
|
557
|
+
for (const shadowTableName of getFtsShadowTableNames(spec.tableName)) {
|
|
558
|
+
db.exec(`DROP TABLE IF EXISTS ${quoteSqlIdentifier(shadowTableName)}`);
|
|
559
|
+
}
|
|
560
|
+
db.exec(spec.createSql);
|
|
561
|
+
db.exec(spec.seedSql);
|
|
562
|
+
}
|
|
563
|
+
|
|
447
564
|
export function runLcmMigrations(
|
|
448
565
|
db: DatabaseSync,
|
|
449
|
-
options?: { fts5Available?: boolean },
|
|
566
|
+
options?: { fts5Available?: boolean; log?: MigrationLogger },
|
|
450
567
|
): void {
|
|
568
|
+
const log = options?.log;
|
|
451
569
|
db.exec(`
|
|
452
570
|
CREATE TABLE IF NOT EXISTS conversations (
|
|
453
571
|
conversation_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -645,79 +763,80 @@ export function runLcmMigrations(
|
|
|
645
763
|
ON conversations (session_key, active, created_at)
|
|
646
764
|
`);
|
|
647
765
|
db.exec(`DROP INDEX IF EXISTS conversations_session_key_idx`);
|
|
648
|
-
ensureSummaryDepthColumn(db);
|
|
649
|
-
ensureSummaryMetadataColumns(
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
766
|
+
runMigrationStep("ensureSummaryDepthColumn", log, () => ensureSummaryDepthColumn(db));
|
|
767
|
+
runMigrationStep("ensureSummaryMetadataColumns", log, () =>
|
|
768
|
+
ensureSummaryMetadataColumns(db),
|
|
769
|
+
);
|
|
770
|
+
runMigrationStep("ensureSummaryModelColumn", log, () => ensureSummaryModelColumn(db));
|
|
771
|
+
runMigrationStep("ensureCompactionTelemetryColumns", log, () =>
|
|
772
|
+
ensureCompactionTelemetryColumns(db),
|
|
773
|
+
);
|
|
774
|
+
runMigrationStep("backfillSummaryDepths", log, () => backfillSummaryDepths(db));
|
|
653
775
|
// Index on depth — created AFTER backfillSummaryDepths to avoid index
|
|
654
776
|
// maintenance overhead during bulk depth updates on large existing DBs.
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
777
|
+
runMigrationStep("createSummariesDepthIndex", log, () =>
|
|
778
|
+
db.exec(
|
|
779
|
+
`CREATE INDEX IF NOT EXISTS summaries_conv_depth_kind_idx ON summaries (conversation_id, depth, kind)`,
|
|
780
|
+
),
|
|
781
|
+
);
|
|
782
|
+
runMigrationStep("backfillSummaryMetadata", log, () => backfillSummaryMetadata(db));
|
|
783
|
+
runMigrationStep("backfillToolCallColumns", log, () => backfillToolCallColumns(db));
|
|
658
784
|
|
|
659
|
-
const
|
|
785
|
+
const detectedFeatures = options?.fts5Available === false ? null : getLcmDbFeatures(db);
|
|
786
|
+
const fts5Available = options?.fts5Available ?? detectedFeatures?.fts5Available ?? false;
|
|
660
787
|
if (!fts5Available) {
|
|
661
788
|
return;
|
|
662
789
|
}
|
|
663
790
|
|
|
791
|
+
const trigramTokenizerAvailable = detectedFeatures?.trigramTokenizerAvailable ?? false;
|
|
792
|
+
if (!trigramTokenizerAvailable) {
|
|
793
|
+
try {
|
|
794
|
+
db.exec(`DROP TABLE IF EXISTS summaries_fts_cjk`);
|
|
795
|
+
} catch {
|
|
796
|
+
// Best effort only. A stale virtual table should not block core migration.
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
664
800
|
// FTS5 virtual tables for full-text search (cannot use IF NOT EXISTS, so check manually)
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
if (hasFts) {
|
|
670
|
-
// Check for stale schema: external-content FTS tables with content_rowid cause errors.
|
|
671
|
-
// Drop and recreate as standalone FTS if the old schema is detected.
|
|
672
|
-
const ftsSchema = (
|
|
673
|
-
db
|
|
674
|
-
.prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='messages_fts'")
|
|
675
|
-
.get() as { sql: string } | undefined
|
|
676
|
-
)?.sql;
|
|
677
|
-
if (ftsSchema && ftsSchema.includes("content_rowid")) {
|
|
678
|
-
db.exec("DROP TABLE messages_fts");
|
|
679
|
-
db.exec(`
|
|
801
|
+
runMigrationStep("ensureMessagesFts", log, () => {
|
|
802
|
+
ensureStandaloneFtsTable(db, {
|
|
803
|
+
tableName: "messages_fts",
|
|
804
|
+
createSql: `
|
|
680
805
|
CREATE VIRTUAL TABLE messages_fts USING fts5(
|
|
681
806
|
content,
|
|
682
807
|
tokenize='porter unicode61'
|
|
683
|
-
)
|
|
684
|
-
|
|
685
|
-
`
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
tokenize='porter unicode61'
|
|
716
|
-
);
|
|
717
|
-
INSERT INTO summaries_fts(summary_id, content)
|
|
718
|
-
SELECT summary_id, content FROM summaries;
|
|
719
|
-
`);
|
|
720
|
-
}
|
|
808
|
+
)
|
|
809
|
+
`,
|
|
810
|
+
seedSql: `
|
|
811
|
+
INSERT INTO messages_fts(rowid, content)
|
|
812
|
+
SELECT message_id, content FROM messages
|
|
813
|
+
`,
|
|
814
|
+
expectedColumns: ["content"],
|
|
815
|
+
staleSchemaPatterns: ["content_rowid"],
|
|
816
|
+
});
|
|
817
|
+
});
|
|
818
|
+
|
|
819
|
+
runMigrationStep("ensureSummariesFts", log, () => {
|
|
820
|
+
ensureStandaloneFtsTable(db, {
|
|
821
|
+
tableName: "summaries_fts",
|
|
822
|
+
createSql: `
|
|
823
|
+
CREATE VIRTUAL TABLE summaries_fts USING fts5(
|
|
824
|
+
summary_id UNINDEXED,
|
|
825
|
+
content,
|
|
826
|
+
tokenize='porter unicode61'
|
|
827
|
+
)
|
|
828
|
+
`,
|
|
829
|
+
seedSql: `
|
|
830
|
+
INSERT INTO summaries_fts(summary_id, content)
|
|
831
|
+
SELECT summary_id, content FROM summaries
|
|
832
|
+
`,
|
|
833
|
+
expectedColumns: ["summary_id", "content"],
|
|
834
|
+
staleSchemaPatterns: [
|
|
835
|
+
"content_rowid='summary_id'",
|
|
836
|
+
'content_rowid="summary_id"',
|
|
837
|
+
],
|
|
838
|
+
});
|
|
839
|
+
});
|
|
721
840
|
|
|
722
841
|
// ── CJK trigram FTS table ────────────────────────────────────────────────
|
|
723
842
|
// FTS5 unicode61 (porter) tokenizer cannot segment CJK ideographs, so CJK
|
|
@@ -727,20 +846,23 @@ export function runLcmMigrations(
|
|
|
727
846
|
//
|
|
728
847
|
// A trigram-tokenized table indexes every 3-character substring, enabling
|
|
729
848
|
// native CJK substring matching via FTS5 MATCH with OR semantics.
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
849
|
+
runMigrationStep("ensureSummariesFtsCjk", log, () => {
|
|
850
|
+
if (trigramTokenizerAvailable) {
|
|
851
|
+
ensureStandaloneFtsTable(db, {
|
|
852
|
+
tableName: "summaries_fts_cjk",
|
|
853
|
+
createSql: `
|
|
854
|
+
CREATE VIRTUAL TABLE summaries_fts_cjk USING fts5(
|
|
855
|
+
summary_id UNINDEXED,
|
|
856
|
+
content,
|
|
857
|
+
tokenize='trigram'
|
|
858
|
+
)
|
|
859
|
+
`,
|
|
860
|
+
seedSql: `
|
|
861
|
+
INSERT INTO summaries_fts_cjk(summary_id, content)
|
|
862
|
+
SELECT summary_id, content FROM summaries
|
|
863
|
+
`,
|
|
864
|
+
expectedColumns: ["summary_id", "content"],
|
|
865
|
+
});
|
|
866
|
+
}
|
|
867
|
+
});
|
|
746
868
|
}
|