@martian-engineering/lossless-claw 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +8 -0
  2. package/dist/index.js +971 -0
  3. package/docs/configuration.md +15 -5
  4. package/openclaw.plugin.json +27 -3
  5. package/package.json +7 -6
  6. package/skills/lossless-claw/references/config.md +37 -0
  7. package/index.ts +0 -2
  8. package/src/assembler.ts +0 -1196
  9. package/src/compaction.ts +0 -1753
  10. package/src/db/config.ts +0 -345
  11. package/src/db/connection.ts +0 -151
  12. package/src/db/features.ts +0 -61
  13. package/src/db/migration.ts +0 -868
  14. package/src/engine.ts +0 -4486
  15. package/src/estimate-tokens.ts +0 -80
  16. package/src/expansion-auth.ts +0 -365
  17. package/src/expansion-policy.ts +0 -303
  18. package/src/expansion.ts +0 -383
  19. package/src/integrity.ts +0 -600
  20. package/src/large-files.ts +0 -546
  21. package/src/lcm-log.ts +0 -37
  22. package/src/openclaw-bridge.ts +0 -22
  23. package/src/plugin/index.ts +0 -2037
  24. package/src/plugin/lcm-command.ts +0 -1040
  25. package/src/plugin/lcm-doctor-apply.ts +0 -540
  26. package/src/plugin/lcm-doctor-cleaners.ts +0 -655
  27. package/src/plugin/lcm-doctor-shared.ts +0 -210
  28. package/src/plugin/shared-init.ts +0 -59
  29. package/src/prune.ts +0 -391
  30. package/src/retrieval.ts +0 -360
  31. package/src/session-patterns.ts +0 -23
  32. package/src/startup-banner-log.ts +0 -49
  33. package/src/store/compaction-telemetry-store.ts +0 -156
  34. package/src/store/conversation-store.ts +0 -929
  35. package/src/store/fts5-sanitize.ts +0 -50
  36. package/src/store/full-text-fallback.ts +0 -83
  37. package/src/store/full-text-sort.ts +0 -21
  38. package/src/store/index.ts +0 -39
  39. package/src/store/parse-utc-timestamp.ts +0 -25
  40. package/src/store/summary-store.ts +0 -1519
  41. package/src/summarize.ts +0 -1508
  42. package/src/tools/common.ts +0 -53
  43. package/src/tools/lcm-conversation-scope.ts +0 -127
  44. package/src/tools/lcm-describe-tool.ts +0 -245
  45. package/src/tools/lcm-expand-query-tool.ts +0 -1235
  46. package/src/tools/lcm-expand-tool.delegation.ts +0 -580
  47. package/src/tools/lcm-expand-tool.ts +0 -453
  48. package/src/tools/lcm-expansion-recursion-guard.ts +0 -373
  49. package/src/tools/lcm-grep-tool.ts +0 -228
  50. package/src/transaction-mutex.ts +0 -136
  51. package/src/transcript-repair.ts +0 -301
  52. package/src/types.ts +0 -165
@@ -1,868 +0,0 @@
1
- import type { DatabaseSync } from "node:sqlite";
2
- import { getLcmDbFeatures } from "./features.js";
3
- import { parseUtcTimestampOrNull } from "../store/parse-utc-timestamp.js";
4
-
5
- type MigrationLogger = {
6
- info?: (message: string) => void;
7
- };
8
-
9
- type SummaryColumnInfo = {
10
- name?: string;
11
- };
12
-
13
- type SummaryDepthRow = {
14
- summary_id: string;
15
- conversation_id: number;
16
- kind: "leaf" | "condensed";
17
- depth: number;
18
- token_count: number;
19
- created_at: string;
20
- };
21
-
22
- type SummaryMessageTimeRangeRow = {
23
- summary_id: string;
24
- earliest_at: string | null;
25
- latest_at: string | null;
26
- source_message_token_count: number | null;
27
- };
28
-
29
- type SummaryParentEdgeRow = {
30
- summary_id: string;
31
- parent_summary_id: string;
32
- };
33
-
34
- type TableNameRow = {
35
- name?: string;
36
- };
37
-
38
- type FtsTableSpec = {
39
- tableName: string;
40
- createSql: string;
41
- seedSql: string;
42
- expectedColumns: string[];
43
- staleSchemaPatterns?: string[];
44
- };
45
-
46
- function ensureSummaryDepthColumn(db: DatabaseSync): void {
47
- const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
48
- const hasDepth = summaryColumns.some((col) => col.name === "depth");
49
- if (!hasDepth) {
50
- db.exec(`ALTER TABLE summaries ADD COLUMN depth INTEGER NOT NULL DEFAULT 0`);
51
- }
52
- }
53
-
54
- function ensureSummaryMetadataColumns(db: DatabaseSync): void {
55
- const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
56
- const hasEarliestAt = summaryColumns.some((col) => col.name === "earliest_at");
57
- const hasLatestAt = summaryColumns.some((col) => col.name === "latest_at");
58
- const hasDescendantCount = summaryColumns.some((col) => col.name === "descendant_count");
59
- const hasDescendantTokenCount = summaryColumns.some((col) => col.name === "descendant_token_count");
60
- const hasSourceMessageTokenCount = summaryColumns.some(
61
- (col) => col.name === "source_message_token_count",
62
- );
63
-
64
- if (!hasEarliestAt) {
65
- db.exec(`ALTER TABLE summaries ADD COLUMN earliest_at TEXT`);
66
- }
67
- if (!hasLatestAt) {
68
- db.exec(`ALTER TABLE summaries ADD COLUMN latest_at TEXT`);
69
- }
70
- if (!hasDescendantCount) {
71
- db.exec(`ALTER TABLE summaries ADD COLUMN descendant_count INTEGER NOT NULL DEFAULT 0`);
72
- }
73
- if (!hasDescendantTokenCount) {
74
- db.exec(`ALTER TABLE summaries ADD COLUMN descendant_token_count INTEGER NOT NULL DEFAULT 0`);
75
- }
76
- if (!hasSourceMessageTokenCount) {
77
- db.exec(`ALTER TABLE summaries ADD COLUMN source_message_token_count INTEGER NOT NULL DEFAULT 0`);
78
- }
79
- }
80
-
81
- function parseTimestamp(value: string | null | undefined): Date | null {
82
- return parseUtcTimestampOrNull(value);
83
- }
84
-
85
- function isoStringOrNull(value: Date | null): string | null {
86
- return value ? value.toISOString() : null;
87
- }
88
-
89
- function ensureSummaryModelColumn(db: DatabaseSync): void {
90
- const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
91
- const hasModel = summaryColumns.some((col) => col.name === "model");
92
- if (!hasModel) {
93
- db.exec(`ALTER TABLE summaries ADD COLUMN model TEXT NOT NULL DEFAULT 'unknown'`);
94
- }
95
- }
96
-
97
- function ensureCompactionTelemetryColumns(db: DatabaseSync): void {
98
- const telemetryColumns = db.prepare(`PRAGMA table_info(conversation_compaction_telemetry)`).all() as SummaryColumnInfo[];
99
- const hasLastLeafCompactionAt = telemetryColumns.some((col) => col.name === "last_leaf_compaction_at");
100
- const hasTurnsSinceLeafCompaction = telemetryColumns.some((col) => col.name === "turns_since_leaf_compaction");
101
- const hasTokensAccumulatedSinceLeafCompaction = telemetryColumns.some(
102
- (col) => col.name === "tokens_accumulated_since_leaf_compaction",
103
- );
104
- const hasLastActivityBand = telemetryColumns.some((col) => col.name === "last_activity_band");
105
-
106
- if (!hasLastLeafCompactionAt) {
107
- db.exec(`ALTER TABLE conversation_compaction_telemetry ADD COLUMN last_leaf_compaction_at TEXT`);
108
- }
109
- if (!hasTurnsSinceLeafCompaction) {
110
- db.exec(
111
- `ALTER TABLE conversation_compaction_telemetry ADD COLUMN turns_since_leaf_compaction INTEGER NOT NULL DEFAULT 0`,
112
- );
113
- }
114
- if (!hasTokensAccumulatedSinceLeafCompaction) {
115
- db.exec(
116
- `ALTER TABLE conversation_compaction_telemetry ADD COLUMN tokens_accumulated_since_leaf_compaction INTEGER NOT NULL DEFAULT 0`,
117
- );
118
- }
119
- if (!hasLastActivityBand) {
120
- db.exec(
121
- `ALTER TABLE conversation_compaction_telemetry ADD COLUMN last_activity_band TEXT NOT NULL DEFAULT 'low' CHECK (last_activity_band IN ('low', 'medium', 'high'))`,
122
- );
123
- }
124
- }
125
-
126
- function describeMigrationError(error: unknown): string {
127
- return error instanceof Error ? error.message : String(error);
128
- }
129
-
130
- function runMigrationStep(
131
- name: string,
132
- log: MigrationLogger | undefined,
133
- step: () => void,
134
- ): void {
135
- const startedAt = Date.now();
136
- try {
137
- step();
138
- log?.info?.(
139
- `[lcm] migration step complete: step=${name} durationMs=${Date.now() - startedAt}`,
140
- );
141
- } catch (error) {
142
- log?.info?.(
143
- `[lcm] migration step failed: step=${name} durationMs=${Date.now() - startedAt} error=${describeMigrationError(error)}`,
144
- );
145
- throw error;
146
- }
147
- }
148
-
149
- function backfillSummaryDepths(db: DatabaseSync): void {
150
- // Leaves are always depth 0, even if legacy rows had malformed values.
151
- db.exec(`UPDATE summaries SET depth = 0 WHERE kind = 'leaf'`);
152
-
153
- const conversationRows = db
154
- .prepare(`SELECT DISTINCT conversation_id FROM summaries WHERE kind = 'condensed'`)
155
- .all() as Array<{ conversation_id: number }>;
156
- if (conversationRows.length === 0) {
157
- return;
158
- }
159
-
160
- const updateDepthStmt = db.prepare(`UPDATE summaries SET depth = ? WHERE summary_id = ?`);
161
-
162
- for (const row of conversationRows) {
163
- const conversationId = row.conversation_id;
164
- const summaries = db
165
- .prepare(
166
- `SELECT summary_id, conversation_id, kind, depth, token_count, created_at
167
- FROM summaries
168
- WHERE conversation_id = ?`,
169
- )
170
- .all(conversationId) as SummaryDepthRow[];
171
-
172
- const depthBySummaryId = new Map<string, number>();
173
- const unresolvedCondensedIds = new Set<string>();
174
- for (const summary of summaries) {
175
- if (summary.kind === "leaf") {
176
- depthBySummaryId.set(summary.summary_id, 0);
177
- continue;
178
- }
179
- unresolvedCondensedIds.add(summary.summary_id);
180
- }
181
-
182
- const edges = db
183
- .prepare(
184
- `SELECT summary_id, parent_summary_id
185
- FROM summary_parents
186
- WHERE summary_id IN (
187
- SELECT summary_id FROM summaries
188
- WHERE conversation_id = ? AND kind = 'condensed'
189
- )`,
190
- )
191
- .all(conversationId) as SummaryParentEdgeRow[];
192
- const parentsBySummaryId = new Map<string, string[]>();
193
- for (const edge of edges) {
194
- const existing = parentsBySummaryId.get(edge.summary_id) ?? [];
195
- existing.push(edge.parent_summary_id);
196
- parentsBySummaryId.set(edge.summary_id, existing);
197
- }
198
-
199
- while (unresolvedCondensedIds.size > 0) {
200
- let progressed = false;
201
-
202
- for (const summaryId of [...unresolvedCondensedIds]) {
203
- const parentIds = parentsBySummaryId.get(summaryId) ?? [];
204
- if (parentIds.length === 0) {
205
- depthBySummaryId.set(summaryId, 1);
206
- unresolvedCondensedIds.delete(summaryId);
207
- progressed = true;
208
- continue;
209
- }
210
-
211
- let maxParentDepth = -1;
212
- let allParentsResolved = true;
213
- for (const parentId of parentIds) {
214
- const parentDepth = depthBySummaryId.get(parentId);
215
- if (parentDepth == null) {
216
- allParentsResolved = false;
217
- break;
218
- }
219
- if (parentDepth > maxParentDepth) {
220
- maxParentDepth = parentDepth;
221
- }
222
- }
223
-
224
- if (!allParentsResolved) {
225
- continue;
226
- }
227
-
228
- depthBySummaryId.set(summaryId, maxParentDepth + 1);
229
- unresolvedCondensedIds.delete(summaryId);
230
- progressed = true;
231
- }
232
-
233
- // Guard against malformed cycles/cross-conversation references.
234
- if (!progressed) {
235
- for (const summaryId of unresolvedCondensedIds) {
236
- depthBySummaryId.set(summaryId, 1);
237
- }
238
- unresolvedCondensedIds.clear();
239
- }
240
- }
241
-
242
- for (const summary of summaries) {
243
- const depth = depthBySummaryId.get(summary.summary_id);
244
- if (depth == null) {
245
- continue;
246
- }
247
- updateDepthStmt.run(depth, summary.summary_id);
248
- }
249
- }
250
- }
251
-
252
- function backfillSummaryMetadata(db: DatabaseSync): void {
253
- const conversationRows = db
254
- .prepare(`SELECT DISTINCT conversation_id FROM summaries`)
255
- .all() as Array<{ conversation_id: number }>;
256
- if (conversationRows.length === 0) {
257
- return;
258
- }
259
-
260
- const updateMetadataStmt = db.prepare(
261
- `UPDATE summaries
262
- SET earliest_at = ?, latest_at = ?, descendant_count = ?,
263
- descendant_token_count = ?, source_message_token_count = ?
264
- WHERE summary_id = ?`,
265
- );
266
-
267
- for (const conversationRow of conversationRows) {
268
- const conversationId = conversationRow.conversation_id;
269
- const summaries = db
270
- .prepare(
271
- `SELECT summary_id, conversation_id, kind, depth, token_count, created_at
272
- FROM summaries
273
- WHERE conversation_id = ?
274
- ORDER BY depth ASC, created_at ASC`,
275
- )
276
- .all(conversationId) as SummaryDepthRow[];
277
- if (summaries.length === 0) {
278
- continue;
279
- }
280
-
281
- const leafRanges = db
282
- .prepare(
283
- `SELECT
284
- sm.summary_id,
285
- MIN(m.created_at) AS earliest_at,
286
- MAX(m.created_at) AS latest_at,
287
- COALESCE(SUM(m.token_count), 0) AS source_message_token_count
288
- FROM summary_messages sm
289
- JOIN messages m ON m.message_id = sm.message_id
290
- JOIN summaries s ON s.summary_id = sm.summary_id
291
- WHERE s.conversation_id = ? AND s.kind = 'leaf'
292
- GROUP BY sm.summary_id`,
293
- )
294
- .all(conversationId) as SummaryMessageTimeRangeRow[];
295
- const leafRangeBySummaryId = new Map(
296
- leafRanges.map((row) => [
297
- row.summary_id,
298
- {
299
- earliestAt: row.earliest_at,
300
- latestAt: row.latest_at,
301
- sourceMessageTokenCount: row.source_message_token_count,
302
- },
303
- ]),
304
- );
305
-
306
- const edges = db
307
- .prepare(
308
- `SELECT summary_id, parent_summary_id
309
- FROM summary_parents
310
- WHERE summary_id IN (
311
- SELECT summary_id FROM summaries WHERE conversation_id = ?
312
- )`,
313
- )
314
- .all(conversationId) as SummaryParentEdgeRow[];
315
- const parentsBySummaryId = new Map<string, string[]>();
316
- for (const edge of edges) {
317
- const existing = parentsBySummaryId.get(edge.summary_id) ?? [];
318
- existing.push(edge.parent_summary_id);
319
- parentsBySummaryId.set(edge.summary_id, existing);
320
- }
321
-
322
- const metadataBySummaryId = new Map<
323
- string,
324
- {
325
- earliestAt: Date | null;
326
- latestAt: Date | null;
327
- descendantCount: number;
328
- descendantTokenCount: number;
329
- sourceMessageTokenCount: number;
330
- }
331
- >();
332
- const tokenCountBySummaryId = new Map(
333
- summaries.map((summary) => [summary.summary_id, Math.max(0, Math.floor(summary.token_count ?? 0))]),
334
- );
335
-
336
- for (const summary of summaries) {
337
- const fallbackDate = parseTimestamp(summary.created_at);
338
- if (summary.kind === "leaf") {
339
- const range = leafRangeBySummaryId.get(summary.summary_id);
340
- const earliestAt = parseTimestamp(range?.earliestAt ?? summary.created_at) ?? fallbackDate;
341
- const latestAt = parseTimestamp(range?.latestAt ?? summary.created_at) ?? fallbackDate;
342
-
343
- metadataBySummaryId.set(summary.summary_id, {
344
- earliestAt,
345
- latestAt,
346
- descendantCount: 0,
347
- descendantTokenCount: 0,
348
- sourceMessageTokenCount: Math.max(
349
- 0,
350
- Math.floor(range?.sourceMessageTokenCount ?? 0),
351
- ),
352
- });
353
- continue;
354
- }
355
-
356
- const parentIds = parentsBySummaryId.get(summary.summary_id) ?? [];
357
- if (parentIds.length === 0) {
358
- metadataBySummaryId.set(summary.summary_id, {
359
- earliestAt: fallbackDate,
360
- latestAt: fallbackDate,
361
- descendantCount: 0,
362
- descendantTokenCount: 0,
363
- sourceMessageTokenCount: 0,
364
- });
365
- continue;
366
- }
367
-
368
- let earliestAt: Date | null = null;
369
- let latestAt: Date | null = null;
370
- let descendantCount = 0;
371
- let descendantTokenCount = 0;
372
- let sourceMessageTokenCount = 0;
373
-
374
- for (const parentId of parentIds) {
375
- const parentMetadata = metadataBySummaryId.get(parentId);
376
- if (!parentMetadata) {
377
- continue;
378
- }
379
-
380
- const parentEarliest = parentMetadata.earliestAt;
381
- if (parentEarliest && (!earliestAt || parentEarliest < earliestAt)) {
382
- earliestAt = parentEarliest;
383
- }
384
-
385
- const parentLatest = parentMetadata.latestAt;
386
- if (parentLatest && (!latestAt || parentLatest > latestAt)) {
387
- latestAt = parentLatest;
388
- }
389
-
390
- descendantCount += Math.max(0, parentMetadata.descendantCount) + 1;
391
- const parentTokenCount = tokenCountBySummaryId.get(parentId) ?? 0;
392
- descendantTokenCount +=
393
- Math.max(0, parentTokenCount) + Math.max(0, parentMetadata.descendantTokenCount);
394
- sourceMessageTokenCount += Math.max(0, parentMetadata.sourceMessageTokenCount);
395
- }
396
-
397
- metadataBySummaryId.set(summary.summary_id, {
398
- earliestAt: earliestAt ?? fallbackDate,
399
- latestAt: latestAt ?? fallbackDate,
400
- descendantCount: Math.max(0, descendantCount),
401
- descendantTokenCount: Math.max(0, descendantTokenCount),
402
- sourceMessageTokenCount: Math.max(0, sourceMessageTokenCount),
403
- });
404
- }
405
-
406
- for (const summary of summaries) {
407
- const metadata = metadataBySummaryId.get(summary.summary_id);
408
- if (!metadata) {
409
- continue;
410
- }
411
-
412
- updateMetadataStmt.run(
413
- isoStringOrNull(metadata.earliestAt),
414
- isoStringOrNull(metadata.latestAt),
415
- Math.max(0, metadata.descendantCount),
416
- Math.max(0, metadata.descendantTokenCount),
417
- Math.max(0, metadata.sourceMessageTokenCount),
418
- summary.summary_id,
419
- );
420
- }
421
- }
422
- }
423
-
424
- /**
425
- * Backfill tool_call_id, tool_name, and tool_input from metadata JSON for rows
426
- * where the DB columns are NULL but the values exist in metadata. This covers
427
- * legacy text-type parts where the string-content ingestion path stored tool
428
- * info only in the metadata JSON (see #158).
429
- */
430
- function backfillToolCallColumns(db: DatabaseSync): void {
431
- db.exec(
432
- `UPDATE message_parts
433
- SET tool_call_id = COALESCE(
434
- json_extract(metadata, '$.toolCallId'),
435
- json_extract(metadata, '$.raw.id'),
436
- json_extract(metadata, '$.raw.call_id'),
437
- json_extract(metadata, '$.raw.toolCallId'),
438
- json_extract(metadata, '$.raw.tool_call_id')
439
- )
440
- WHERE tool_call_id IS NULL
441
- AND metadata IS NOT NULL
442
- AND COALESCE(
443
- json_extract(metadata, '$.toolCallId'),
444
- json_extract(metadata, '$.raw.id'),
445
- json_extract(metadata, '$.raw.call_id'),
446
- json_extract(metadata, '$.raw.toolCallId'),
447
- json_extract(metadata, '$.raw.tool_call_id')
448
- ) IS NOT NULL`,
449
- );
450
-
451
- db.exec(
452
- `UPDATE message_parts
453
- SET tool_name = COALESCE(
454
- json_extract(metadata, '$.toolName'),
455
- json_extract(metadata, '$.raw.name'),
456
- json_extract(metadata, '$.raw.toolName'),
457
- json_extract(metadata, '$.raw.tool_name')
458
- )
459
- WHERE tool_name IS NULL
460
- AND metadata IS NOT NULL
461
- AND COALESCE(
462
- json_extract(metadata, '$.toolName'),
463
- json_extract(metadata, '$.raw.name'),
464
- json_extract(metadata, '$.raw.toolName'),
465
- json_extract(metadata, '$.raw.tool_name')
466
- ) IS NOT NULL`,
467
- );
468
-
469
- db.exec(
470
- `UPDATE message_parts
471
- SET tool_input = COALESCE(
472
- json_extract(metadata, '$.raw.input'),
473
- json_extract(metadata, '$.raw.arguments'),
474
- json_extract(metadata, '$.raw.toolInput')
475
- )
476
- WHERE tool_input IS NULL
477
- AND metadata IS NOT NULL
478
- AND COALESCE(
479
- json_extract(metadata, '$.raw.input'),
480
- json_extract(metadata, '$.raw.arguments'),
481
- json_extract(metadata, '$.raw.toolInput')
482
- ) IS NOT NULL`,
483
- );
484
- }
485
-
486
- function getExistingTableNames(db: DatabaseSync, names: string[]): Set<string> {
487
- if (names.length === 0) {
488
- return new Set();
489
- }
490
- const placeholders = names.map(() => "?").join(", ");
491
- const rows = db
492
- .prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name IN (${placeholders})`)
493
- .all(...names) as TableNameRow[];
494
- return new Set(
495
- rows
496
- .map((row) => row.name)
497
- .filter((name): name is string => typeof name === "string" && name.length > 0),
498
- );
499
- }
500
-
501
- function getFtsShadowTableNames(tableName: string): string[] {
502
- return [
503
- `${tableName}_data`,
504
- `${tableName}_idx`,
505
- `${tableName}_content`,
506
- `${tableName}_docsize`,
507
- `${tableName}_config`,
508
- ];
509
- }
510
-
511
- function quoteSqlIdentifier(identifier: string): string {
512
- if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(identifier)) {
513
- throw new Error(`Invalid SQL identifier: ${identifier}`);
514
- }
515
- return `"${identifier.replaceAll(`"`, `""`)}"`;
516
- }
517
-
518
- function shouldRecreateStandaloneFtsTable(db: DatabaseSync, spec: FtsTableSpec): boolean {
519
- const shadowTables = getFtsShadowTableNames(spec.tableName);
520
- const existingTables = getExistingTableNames(db, [spec.tableName, ...shadowTables]);
521
- if (!existingTables.has(spec.tableName)) {
522
- return true;
523
- }
524
- if (shadowTables.some((name) => !existingTables.has(name))) {
525
- return true;
526
- }
527
-
528
- try {
529
- const info = db
530
- .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name = ?")
531
- .get(spec.tableName) as { sql?: string } | undefined;
532
- const sql = info?.sql ?? "";
533
- if (spec.staleSchemaPatterns?.some((pattern) => sql.includes(pattern))) {
534
- return true;
535
- }
536
-
537
- const columns = db
538
- .prepare(`PRAGMA table_info(${quoteSqlIdentifier(spec.tableName)})`)
539
- .all() as SummaryColumnInfo[];
540
- const columnNames = new Set(
541
- columns
542
- .map((col) => col.name)
543
- .filter((name): name is string => typeof name === "string" && name.length > 0),
544
- );
545
- return spec.expectedColumns.some((column) => !columnNames.has(column));
546
- } catch {
547
- return true;
548
- }
549
- }
550
-
551
- function ensureStandaloneFtsTable(db: DatabaseSync, spec: FtsTableSpec): void {
552
- if (!shouldRecreateStandaloneFtsTable(db, spec)) {
553
- return;
554
- }
555
-
556
- db.exec(`DROP TABLE IF EXISTS ${quoteSqlIdentifier(spec.tableName)}`);
557
- for (const shadowTableName of getFtsShadowTableNames(spec.tableName)) {
558
- db.exec(`DROP TABLE IF EXISTS ${quoteSqlIdentifier(shadowTableName)}`);
559
- }
560
- db.exec(spec.createSql);
561
- db.exec(spec.seedSql);
562
- }
563
-
564
- export function runLcmMigrations(
565
- db: DatabaseSync,
566
- options?: { fts5Available?: boolean; log?: MigrationLogger },
567
- ): void {
568
- const log = options?.log;
569
- db.exec(`
570
- CREATE TABLE IF NOT EXISTS conversations (
571
- conversation_id INTEGER PRIMARY KEY AUTOINCREMENT,
572
- session_id TEXT NOT NULL,
573
- session_key TEXT,
574
- active INTEGER NOT NULL DEFAULT 1,
575
- archived_at TEXT,
576
- title TEXT,
577
- bootstrapped_at TEXT,
578
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
579
- updated_at TEXT NOT NULL DEFAULT (datetime('now'))
580
- );
581
-
582
- CREATE TABLE IF NOT EXISTS messages (
583
- message_id INTEGER PRIMARY KEY AUTOINCREMENT,
584
- conversation_id INTEGER NOT NULL REFERENCES conversations(conversation_id) ON DELETE CASCADE,
585
- seq INTEGER NOT NULL,
586
- role TEXT NOT NULL CHECK (role IN ('system', 'user', 'assistant', 'tool')),
587
- content TEXT NOT NULL,
588
- token_count INTEGER NOT NULL,
589
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
590
- UNIQUE (conversation_id, seq)
591
- );
592
-
593
- CREATE TABLE IF NOT EXISTS summaries (
594
- summary_id TEXT PRIMARY KEY,
595
- conversation_id INTEGER NOT NULL REFERENCES conversations(conversation_id) ON DELETE CASCADE,
596
- kind TEXT NOT NULL CHECK (kind IN ('leaf', 'condensed')),
597
- depth INTEGER NOT NULL DEFAULT 0,
598
- content TEXT NOT NULL,
599
- token_count INTEGER NOT NULL,
600
- earliest_at TEXT,
601
- latest_at TEXT,
602
- descendant_count INTEGER NOT NULL DEFAULT 0,
603
- descendant_token_count INTEGER NOT NULL DEFAULT 0,
604
- source_message_token_count INTEGER NOT NULL DEFAULT 0,
605
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
606
- file_ids TEXT NOT NULL DEFAULT '[]'
607
- );
608
-
609
- CREATE TABLE IF NOT EXISTS message_parts (
610
- part_id TEXT PRIMARY KEY,
611
- message_id INTEGER NOT NULL REFERENCES messages(message_id) ON DELETE CASCADE,
612
- session_id TEXT NOT NULL,
613
- part_type TEXT NOT NULL CHECK (part_type IN (
614
- 'text', 'reasoning', 'tool', 'patch', 'file',
615
- 'subtask', 'compaction', 'step_start', 'step_finish',
616
- 'snapshot', 'agent', 'retry'
617
- )),
618
- ordinal INTEGER NOT NULL,
619
- text_content TEXT,
620
- is_ignored INTEGER,
621
- is_synthetic INTEGER,
622
- tool_call_id TEXT,
623
- tool_name TEXT,
624
- tool_status TEXT,
625
- tool_input TEXT,
626
- tool_output TEXT,
627
- tool_error TEXT,
628
- tool_title TEXT,
629
- patch_hash TEXT,
630
- patch_files TEXT,
631
- file_mime TEXT,
632
- file_name TEXT,
633
- file_url TEXT,
634
- subtask_prompt TEXT,
635
- subtask_desc TEXT,
636
- subtask_agent TEXT,
637
- step_reason TEXT,
638
- step_cost REAL,
639
- step_tokens_in INTEGER,
640
- step_tokens_out INTEGER,
641
- snapshot_hash TEXT,
642
- compaction_auto INTEGER,
643
- metadata TEXT,
644
- UNIQUE (message_id, ordinal)
645
- );
646
-
647
- CREATE TABLE IF NOT EXISTS summary_messages (
648
- summary_id TEXT NOT NULL REFERENCES summaries(summary_id) ON DELETE CASCADE,
649
- message_id INTEGER NOT NULL REFERENCES messages(message_id) ON DELETE RESTRICT,
650
- ordinal INTEGER NOT NULL,
651
- PRIMARY KEY (summary_id, message_id)
652
- );
653
-
654
- CREATE TABLE IF NOT EXISTS summary_parents (
655
- summary_id TEXT NOT NULL REFERENCES summaries(summary_id) ON DELETE CASCADE,
656
- parent_summary_id TEXT NOT NULL REFERENCES summaries(summary_id) ON DELETE RESTRICT,
657
- ordinal INTEGER NOT NULL,
658
- PRIMARY KEY (summary_id, parent_summary_id)
659
- );
660
-
661
- CREATE TABLE IF NOT EXISTS context_items (
662
- conversation_id INTEGER NOT NULL REFERENCES conversations(conversation_id) ON DELETE CASCADE,
663
- ordinal INTEGER NOT NULL,
664
- item_type TEXT NOT NULL CHECK (item_type IN ('message', 'summary')),
665
- message_id INTEGER REFERENCES messages(message_id) ON DELETE RESTRICT,
666
- summary_id TEXT REFERENCES summaries(summary_id) ON DELETE RESTRICT,
667
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
668
- PRIMARY KEY (conversation_id, ordinal),
669
- CHECK (
670
- (item_type = 'message' AND message_id IS NOT NULL AND summary_id IS NULL) OR
671
- (item_type = 'summary' AND summary_id IS NOT NULL AND message_id IS NULL)
672
- )
673
- );
674
-
675
- CREATE TABLE IF NOT EXISTS large_files (
676
- file_id TEXT PRIMARY KEY,
677
- conversation_id INTEGER NOT NULL REFERENCES conversations(conversation_id) ON DELETE CASCADE,
678
- file_name TEXT,
679
- mime_type TEXT,
680
- byte_size INTEGER,
681
- storage_uri TEXT NOT NULL,
682
- exploration_summary TEXT,
683
- created_at TEXT NOT NULL DEFAULT (datetime('now'))
684
- );
685
-
686
- CREATE TABLE IF NOT EXISTS conversation_bootstrap_state (
687
- conversation_id INTEGER PRIMARY KEY REFERENCES conversations(conversation_id) ON DELETE CASCADE,
688
- session_file_path TEXT NOT NULL,
689
- last_seen_size INTEGER NOT NULL,
690
- last_seen_mtime_ms INTEGER NOT NULL,
691
- last_processed_offset INTEGER NOT NULL,
692
- last_processed_entry_hash TEXT,
693
- updated_at TEXT NOT NULL DEFAULT (datetime('now'))
694
- );
695
-
696
- CREATE TABLE IF NOT EXISTS conversation_compaction_telemetry (
697
- conversation_id INTEGER PRIMARY KEY REFERENCES conversations(conversation_id) ON DELETE CASCADE,
698
- last_observed_cache_read INTEGER,
699
- last_observed_cache_write INTEGER,
700
- last_observed_cache_hit_at TEXT,
701
- last_observed_cache_break_at TEXT,
702
- cache_state TEXT NOT NULL DEFAULT 'unknown'
703
- CHECK (cache_state IN ('hot', 'cold', 'unknown')),
704
- retention TEXT,
705
- last_leaf_compaction_at TEXT,
706
- turns_since_leaf_compaction INTEGER NOT NULL DEFAULT 0,
707
- tokens_accumulated_since_leaf_compaction INTEGER NOT NULL DEFAULT 0,
708
- last_activity_band TEXT NOT NULL DEFAULT 'low'
709
- CHECK (last_activity_band IN ('low', 'medium', 'high')),
710
- updated_at TEXT NOT NULL DEFAULT (datetime('now'))
711
- );
712
-
713
- -- Indexes
714
- CREATE INDEX IF NOT EXISTS messages_conv_seq_idx ON messages (conversation_id, seq);
715
- CREATE INDEX IF NOT EXISTS summaries_conv_created_idx ON summaries (conversation_id, created_at);
716
- CREATE INDEX IF NOT EXISTS summary_messages_message_idx ON summary_messages (message_id);
717
- CREATE INDEX IF NOT EXISTS summary_parents_parent_summary_idx ON summary_parents (parent_summary_id);
718
- CREATE INDEX IF NOT EXISTS message_parts_message_idx ON message_parts (message_id);
719
- CREATE INDEX IF NOT EXISTS message_parts_type_idx ON message_parts (part_type);
720
- CREATE INDEX IF NOT EXISTS context_items_conv_idx ON context_items (conversation_id, ordinal);
721
- CREATE INDEX IF NOT EXISTS large_files_conv_idx ON large_files (conversation_id, created_at);
722
- CREATE INDEX IF NOT EXISTS bootstrap_state_path_idx
723
- ON conversation_bootstrap_state (session_file_path, updated_at);
724
- CREATE INDEX IF NOT EXISTS compaction_telemetry_state_idx
725
- ON conversation_compaction_telemetry (cache_state, updated_at);
726
-
727
- -- Speed up summary_messages lookups by message_id (PK is summary_id,message_id)
728
- CREATE INDEX IF NOT EXISTS summary_messages_message_idx ON summary_messages (message_id);
729
- `);
730
-
731
- // Forward-compatible conversations migration for existing DBs.
732
- const conversationColumns = db.prepare(`PRAGMA table_info(conversations)`).all() as Array<{
733
- name?: string;
734
- }>;
735
- const hasBootstrappedAt = conversationColumns.some((col) => col.name === "bootstrapped_at");
736
- if (!hasBootstrappedAt) {
737
- db.exec(`ALTER TABLE conversations ADD COLUMN bootstrapped_at TEXT`);
738
- }
739
-
740
- const hasSessionKey = conversationColumns.some((col) => col.name === "session_key");
741
- if (!hasSessionKey) {
742
- db.exec(`ALTER TABLE conversations ADD COLUMN session_key TEXT`);
743
- }
744
-
745
- const hasActive = conversationColumns.some((col) => col.name === "active");
746
- if (!hasActive) {
747
- db.exec(`ALTER TABLE conversations ADD COLUMN active INTEGER NOT NULL DEFAULT 1`);
748
- }
749
-
750
- const hasArchivedAt = conversationColumns.some((col) => col.name === "archived_at");
751
- if (!hasArchivedAt) {
752
- db.exec(`ALTER TABLE conversations ADD COLUMN archived_at TEXT`);
753
- }
754
-
755
- db.exec(`UPDATE conversations SET active = 1 WHERE active IS NULL`);
756
- db.exec(`
757
- CREATE UNIQUE INDEX IF NOT EXISTS conversations_active_session_key_idx
758
- ON conversations (session_key)
759
- WHERE session_key IS NOT NULL AND active = 1
760
- `);
761
- db.exec(`
762
- CREATE INDEX IF NOT EXISTS conversations_session_key_active_created_idx
763
- ON conversations (session_key, active, created_at)
764
- `);
765
- db.exec(`DROP INDEX IF EXISTS conversations_session_key_idx`);
766
- runMigrationStep("ensureSummaryDepthColumn", log, () => ensureSummaryDepthColumn(db));
767
- runMigrationStep("ensureSummaryMetadataColumns", log, () =>
768
- ensureSummaryMetadataColumns(db),
769
- );
770
- runMigrationStep("ensureSummaryModelColumn", log, () => ensureSummaryModelColumn(db));
771
- runMigrationStep("ensureCompactionTelemetryColumns", log, () =>
772
- ensureCompactionTelemetryColumns(db),
773
- );
774
- runMigrationStep("backfillSummaryDepths", log, () => backfillSummaryDepths(db));
775
- // Index on depth — created AFTER backfillSummaryDepths to avoid index
776
- // maintenance overhead during bulk depth updates on large existing DBs.
777
- runMigrationStep("createSummariesDepthIndex", log, () =>
778
- db.exec(
779
- `CREATE INDEX IF NOT EXISTS summaries_conv_depth_kind_idx ON summaries (conversation_id, depth, kind)`,
780
- ),
781
- );
782
- runMigrationStep("backfillSummaryMetadata", log, () => backfillSummaryMetadata(db));
783
- runMigrationStep("backfillToolCallColumns", log, () => backfillToolCallColumns(db));
784
-
785
- const detectedFeatures = options?.fts5Available === false ? null : getLcmDbFeatures(db);
786
- const fts5Available = options?.fts5Available ?? detectedFeatures?.fts5Available ?? false;
787
- if (!fts5Available) {
788
- return;
789
- }
790
-
791
- const trigramTokenizerAvailable = detectedFeatures?.trigramTokenizerAvailable ?? false;
792
- if (!trigramTokenizerAvailable) {
793
- try {
794
- db.exec(`DROP TABLE IF EXISTS summaries_fts_cjk`);
795
- } catch {
796
- // Best effort only. A stale virtual table should not block core migration.
797
- }
798
- }
799
-
800
- // FTS5 virtual tables for full-text search (cannot use IF NOT EXISTS, so check manually)
801
- runMigrationStep("ensureMessagesFts", log, () => {
802
- ensureStandaloneFtsTable(db, {
803
- tableName: "messages_fts",
804
- createSql: `
805
- CREATE VIRTUAL TABLE messages_fts USING fts5(
806
- content,
807
- tokenize='porter unicode61'
808
- )
809
- `,
810
- seedSql: `
811
- INSERT INTO messages_fts(rowid, content)
812
- SELECT message_id, content FROM messages
813
- `,
814
- expectedColumns: ["content"],
815
- staleSchemaPatterns: ["content_rowid"],
816
- });
817
- });
818
-
819
- runMigrationStep("ensureSummariesFts", log, () => {
820
- ensureStandaloneFtsTable(db, {
821
- tableName: "summaries_fts",
822
- createSql: `
823
- CREATE VIRTUAL TABLE summaries_fts USING fts5(
824
- summary_id UNINDEXED,
825
- content,
826
- tokenize='porter unicode61'
827
- )
828
- `,
829
- seedSql: `
830
- INSERT INTO summaries_fts(summary_id, content)
831
- SELECT summary_id, content FROM summaries
832
- `,
833
- expectedColumns: ["summary_id", "content"],
834
- staleSchemaPatterns: [
835
- "content_rowid='summary_id'",
836
- 'content_rowid="summary_id"',
837
- ],
838
- });
839
- });
840
-
841
- // ── CJK trigram FTS table ────────────────────────────────────────────────
842
- // FTS5 unicode61 (porter) tokenizer cannot segment CJK ideographs, so CJK
843
- // queries currently fall back to a LIKE path with AND logic. When the user's
844
- // phrasing doesn't match the summary verbatim (e.g. "端到端测试结果" vs
845
- // "端到端测试"), ALL terms must match and the query returns 0 candidates.
846
- //
847
- // A trigram-tokenized table indexes every 3-character substring, enabling
848
- // native CJK substring matching via FTS5 MATCH with OR semantics.
849
- runMigrationStep("ensureSummariesFtsCjk", log, () => {
850
- if (trigramTokenizerAvailable) {
851
- ensureStandaloneFtsTable(db, {
852
- tableName: "summaries_fts_cjk",
853
- createSql: `
854
- CREATE VIRTUAL TABLE summaries_fts_cjk USING fts5(
855
- summary_id UNINDEXED,
856
- content,
857
- tokenize='trigram'
858
- )
859
- `,
860
- seedSql: `
861
- INSERT INTO summaries_fts_cjk(summary_id, content)
862
- SELECT summary_id, content FROM summaries
863
- `,
864
- expectedColumns: ["summary_id", "content"],
865
- });
866
- }
867
- });
868
- }