@martian-engineering/lossless-claw 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,655 @@
1
+ import type { DatabaseSync } from "node:sqlite";
2
+ import { basename, dirname, join } from "node:path";
3
+ import { getFileBackedDatabasePath } from "../db/connection.js";
4
+
5
+ export type DoctorCleanerId =
6
+ | "archived_subagents"
7
+ | "cron_sessions"
8
+ | "null_subagent_context";
9
+
10
+ export type DoctorCleanerExample = {
11
+ conversationId: number;
12
+ sessionKey: string | null;
13
+ messageCount: number;
14
+ firstMessagePreview: string | null;
15
+ };
16
+
17
+ export type DoctorCleanerFilterStat = {
18
+ id: DoctorCleanerId;
19
+ label: string;
20
+ description: string;
21
+ conversationCount: number;
22
+ messageCount: number;
23
+ examples: DoctorCleanerExample[];
24
+ };
25
+
26
+ export type DoctorCleanerScan = {
27
+ filters: DoctorCleanerFilterStat[];
28
+ totalDistinctConversations: number;
29
+ totalDistinctMessages: number;
30
+ };
31
+
32
+ export type DoctorCleanerApplyResult =
33
+ | {
34
+ kind: "applied";
35
+ filterIds: DoctorCleanerId[];
36
+ deletedConversations: number;
37
+ deletedMessages: number;
38
+ vacuumed: boolean;
39
+ backupPath: string;
40
+ }
41
+ | {
42
+ kind: "unavailable";
43
+ reason: string;
44
+ };
45
+
46
+ type CleanerDefinition = {
47
+ id: DoctorCleanerId;
48
+ label: string;
49
+ description: string;
50
+ candidatePredicateSql: string;
51
+ predicateSql: string;
52
+ needsFirstMessage?: boolean;
53
+ };
54
+
55
+ type CleanerCountRow = {
56
+ filter_id?: DoctorCleanerId;
57
+ conversation_count: number | null;
58
+ message_count: number | null;
59
+ };
60
+
61
+ type CleanerExampleRow = {
62
+ filter_id: DoctorCleanerId;
63
+ conversation_id: number;
64
+ session_key: string | null;
65
+ message_count: number | null;
66
+ first_message_preview: string | null;
67
+ };
68
+
69
+ const SCAN_FIRST_MESSAGE_PREVIEW_LIMIT = 256;
70
+
71
+ const CLEANER_DEFINITIONS: CleanerDefinition[] = [
72
+ {
73
+ id: "archived_subagents",
74
+ label: "Archived subagents",
75
+ description: "Archived subagent conversations keyed as agent:main:subagent:*.",
76
+ candidatePredicateSql: "(c.active = 0 AND c.session_key LIKE 'agent:main:subagent:%')",
77
+ predicateSql: "(c.active = 0 AND c.session_key LIKE 'agent:main:subagent:%')",
78
+ },
79
+ {
80
+ id: "cron_sessions",
81
+ label: "Cron sessions",
82
+ description: "Background cron conversations keyed as agent:main:cron:*.",
83
+ candidatePredicateSql: "(c.session_key LIKE 'agent:main:cron:%')",
84
+ predicateSql: "(c.session_key LIKE 'agent:main:cron:%')",
85
+ },
86
+ {
87
+ id: "null_subagent_context",
88
+ label: "NULL-key subagent context",
89
+ description:
90
+ "Archived conversations with NULL session_key whose first stored message begins with [Subagent Context].",
91
+ candidatePredicateSql: "(c.session_key IS NULL AND c.active = 0 AND c.archived_at IS NOT NULL)",
92
+ predicateSql:
93
+ "(c.session_key IS NULL AND c.active = 0 AND c.archived_at IS NOT NULL AND message_stats.first_message_preview LIKE '[Subagent Context]%')",
94
+ needsFirstMessage: true,
95
+ },
96
+ ];
97
+
98
+ const DOCTOR_CLEANER_IDS = CLEANER_DEFINITIONS.map(
99
+ (definition) => definition.id,
100
+ ) as DoctorCleanerId[];
101
+
102
+ function getCleanerDefinitions(filterIds?: DoctorCleanerId[]): CleanerDefinition[] {
103
+ if (!filterIds || filterIds.length === 0) {
104
+ return CLEANER_DEFINITIONS;
105
+ }
106
+ const requested = new Set(filterIds);
107
+ return CLEANER_DEFINITIONS.filter((definition) => requested.has(definition.id));
108
+ }
109
+
110
+ function truncatePreview(value: string | null): string | null {
111
+ if (!value) {
112
+ return null;
113
+ }
114
+ const normalized = value.replace(/\s+/g, " ").trim();
115
+ if (!normalized) {
116
+ return null;
117
+ }
118
+ return normalized.length <= 120 ? normalized : `${normalized.slice(0, 117)}...`;
119
+ }
120
+
121
+ function buildMatchedConversationsSql(params: {
122
+ definitions: CleanerDefinition[];
123
+ includeFilterId?: boolean;
124
+ messageStatsTableName?: string;
125
+ }): string {
126
+ const { definitions, includeFilterId = true, messageStatsTableName } = params;
127
+ if (definitions.length === 0) {
128
+ return includeFilterId
129
+ ? `SELECT NULL AS filter_id, NULL AS conversation_id WHERE 0`
130
+ : `SELECT NULL AS conversation_id WHERE 0`;
131
+ }
132
+ return definitions
133
+ .map((definition) => {
134
+ const selectSql = includeFilterId
135
+ ? `SELECT '${definition.id}' AS filter_id, c.conversation_id`
136
+ : `SELECT c.conversation_id`;
137
+ const joinSql =
138
+ definition.needsFirstMessage && messageStatsTableName
139
+ ? `LEFT JOIN ${messageStatsTableName} message_stats ON message_stats.conversation_id = c.conversation_id`
140
+ : "";
141
+ return `${selectSql}
142
+ FROM conversations c
143
+ ${joinSql}
144
+ WHERE ${definition.predicateSql}`;
145
+ })
146
+ .join(`\nUNION ALL\n`);
147
+ }
148
+
149
+ function buildCandidateConversationsSql(definitions: CleanerDefinition[]): string {
150
+ if (definitions.length === 0) {
151
+ return `SELECT NULL AS conversation_id WHERE 0`;
152
+ }
153
+ return definitions
154
+ .map(
155
+ (definition) => `SELECT c.conversation_id
156
+ FROM conversations c
157
+ WHERE ${definition.candidatePredicateSql}`,
158
+ )
159
+ .join(`\nUNION\n`);
160
+ }
161
+
162
+ function dropTempCleanerScanTables(db: DatabaseSync): void {
163
+ db.exec(`DROP TABLE IF EXISTS temp.doctor_cleaner_scan_matches`);
164
+ db.exec(`DROP TABLE IF EXISTS temp.doctor_cleaner_scan_message_stats`);
165
+ db.exec(`DROP TABLE IF EXISTS temp.doctor_cleaner_candidate_conversations`);
166
+ }
167
+
168
+ function stageCleanerScanTables(db: DatabaseSync, definitions: CleanerDefinition[]): void {
169
+ dropTempCleanerScanTables(db);
170
+ if (definitions.length === 0) {
171
+ return;
172
+ }
173
+ db.exec(`
174
+ CREATE TEMP TABLE doctor_cleaner_candidate_conversations (
175
+ conversation_id INTEGER PRIMARY KEY
176
+ ) WITHOUT ROWID
177
+ `);
178
+ db.exec(`
179
+ INSERT INTO temp.doctor_cleaner_candidate_conversations (conversation_id)
180
+ ${buildCandidateConversationsSql(definitions)}
181
+ `);
182
+ db.exec(`
183
+ CREATE TEMP TABLE doctor_cleaner_scan_message_stats (
184
+ conversation_id INTEGER PRIMARY KEY,
185
+ first_message_preview TEXT,
186
+ message_count INTEGER NOT NULL
187
+ )
188
+ `);
189
+ if (definitions.some((definition) => definition.needsFirstMessage)) {
190
+ db.exec(`
191
+ WITH ranked_messages AS (
192
+ SELECT
193
+ m.conversation_id,
194
+ m.content,
195
+ ROW_NUMBER() OVER (
196
+ PARTITION BY m.conversation_id
197
+ ORDER BY m.seq ASC, m.created_at ASC, m.message_id ASC
198
+ ) AS row_num,
199
+ COUNT(*) OVER (PARTITION BY m.conversation_id) AS message_count
200
+ FROM messages m
201
+ JOIN temp.doctor_cleaner_candidate_conversations candidates
202
+ ON candidates.conversation_id = m.conversation_id
203
+ )
204
+ INSERT INTO temp.doctor_cleaner_scan_message_stats (
205
+ conversation_id,
206
+ first_message_preview,
207
+ message_count
208
+ )
209
+ SELECT
210
+ conversation_id,
211
+ MAX(CASE WHEN row_num = 1 THEN substr(content, 1, ${SCAN_FIRST_MESSAGE_PREVIEW_LIMIT}) END) AS first_message_preview,
212
+ MAX(message_count) AS message_count
213
+ FROM ranked_messages
214
+ GROUP BY conversation_id
215
+ `);
216
+ } else {
217
+ db.exec(`
218
+ INSERT INTO temp.doctor_cleaner_scan_message_stats (
219
+ conversation_id,
220
+ first_message_preview,
221
+ message_count
222
+ )
223
+ SELECT
224
+ m.conversation_id,
225
+ NULL AS first_message_preview,
226
+ COUNT(*) AS message_count
227
+ FROM messages m
228
+ JOIN temp.doctor_cleaner_candidate_conversations candidates
229
+ ON candidates.conversation_id = m.conversation_id
230
+ GROUP BY m.conversation_id
231
+ `);
232
+ }
233
+ db.exec(`
234
+ CREATE TEMP TABLE doctor_cleaner_scan_matches (
235
+ filter_id TEXT NOT NULL,
236
+ conversation_id INTEGER NOT NULL,
237
+ PRIMARY KEY (filter_id, conversation_id)
238
+ ) WITHOUT ROWID
239
+ `);
240
+ const matchedConversationsSql = buildMatchedConversationsSql({
241
+ definitions,
242
+ includeFilterId: true,
243
+ messageStatsTableName: "temp.doctor_cleaner_scan_message_stats",
244
+ });
245
+ db.exec(`
246
+ INSERT INTO temp.doctor_cleaner_scan_matches (filter_id, conversation_id)
247
+ ${matchedConversationsSql}
248
+ `);
249
+ }
250
+
251
+ export function getDoctorCleanerFilters(): Array<Pick<DoctorCleanerFilterStat, "id" | "label" | "description">> {
252
+ return CLEANER_DEFINITIONS.map(({ id, label, description }) => ({
253
+ id,
254
+ label,
255
+ description,
256
+ }));
257
+ }
258
+
259
+ export function getDoctorCleanerFilterIds(): DoctorCleanerId[] {
260
+ return [...DOCTOR_CLEANER_IDS];
261
+ }
262
+
263
+ export function scanDoctorCleaners(
264
+ db: DatabaseSync,
265
+ filterIds?: DoctorCleanerId[],
266
+ ): DoctorCleanerScan {
267
+ const definitions = getCleanerDefinitions(filterIds);
268
+ if (definitions.length === 0) {
269
+ return {
270
+ filters: [],
271
+ totalDistinctConversations: 0,
272
+ totalDistinctMessages: 0,
273
+ };
274
+ }
275
+ try {
276
+ stageCleanerScanTables(db, definitions);
277
+ const counts = db
278
+ .prepare(
279
+ `WITH filter_counts AS (
280
+ SELECT
281
+ matches.filter_id,
282
+ COUNT(*) AS conversation_count,
283
+ COALESCE(SUM(COALESCE(stats.message_count, 0)), 0) AS message_count
284
+ FROM temp.doctor_cleaner_scan_matches matches
285
+ LEFT JOIN temp.doctor_cleaner_scan_message_stats stats
286
+ ON stats.conversation_id = matches.conversation_id
287
+ GROUP BY matches.filter_id
288
+ ),
289
+ distinct_conversations AS (
290
+ SELECT DISTINCT conversation_id
291
+ FROM temp.doctor_cleaner_scan_matches
292
+ )
293
+ SELECT
294
+ fc.filter_id,
295
+ fc.conversation_count,
296
+ fc.message_count,
297
+ COALESCE((SELECT COUNT(*) FROM distinct_conversations), 0) AS total_conversation_count,
298
+ COALESCE((
299
+ SELECT SUM(COALESCE(stats.message_count, 0))
300
+ FROM distinct_conversations dc
301
+ LEFT JOIN temp.doctor_cleaner_scan_message_stats stats
302
+ ON stats.conversation_id = dc.conversation_id
303
+ ), 0) AS total_message_count
304
+ FROM filter_counts fc`,
305
+ )
306
+ .all() as Array<
307
+ CleanerCountRow & {
308
+ filter_id: DoctorCleanerId;
309
+ total_conversation_count: number | null;
310
+ total_message_count: number | null;
311
+ }
312
+ >;
313
+
314
+ const examples = db
315
+ .prepare(
316
+ `WITH ranked_examples AS (
317
+ SELECT
318
+ matches.filter_id,
319
+ c.conversation_id,
320
+ c.session_key,
321
+ COALESCE(stats.message_count, 0) AS message_count,
322
+ stats.first_message_preview,
323
+ ROW_NUMBER() OVER (
324
+ PARTITION BY matches.filter_id
325
+ ORDER BY COALESCE(stats.message_count, 0) DESC, c.created_at DESC, c.conversation_id DESC
326
+ ) AS example_rank
327
+ FROM temp.doctor_cleaner_scan_matches matches
328
+ JOIN conversations c ON c.conversation_id = matches.conversation_id
329
+ LEFT JOIN temp.doctor_cleaner_scan_message_stats stats
330
+ ON stats.conversation_id = matches.conversation_id
331
+ )
332
+ SELECT
333
+ filter_id,
334
+ conversation_id,
335
+ session_key,
336
+ message_count,
337
+ first_message_preview
338
+ FROM ranked_examples
339
+ WHERE example_rank <= 3
340
+ ORDER BY filter_id, example_rank`,
341
+ )
342
+ .all() as CleanerExampleRow[];
343
+
344
+ const countsById = new Map(counts.map((row) => [row.filter_id, row]));
345
+ const examplesById = new Map<DoctorCleanerId, CleanerExampleRow[]>();
346
+ for (const row of examples) {
347
+ const rows = examplesById.get(row.filter_id) ?? [];
348
+ rows.push(row);
349
+ examplesById.set(row.filter_id, rows);
350
+ }
351
+
352
+ const filters = definitions.map((definition) => {
353
+ const countRow = countsById.get(definition.id);
354
+ const exampleRows = examplesById.get(definition.id) ?? [];
355
+ return {
356
+ id: definition.id,
357
+ label: definition.label,
358
+ description: definition.description,
359
+ conversationCount: countRow?.conversation_count ?? 0,
360
+ messageCount: countRow?.message_count ?? 0,
361
+ examples: exampleRows.map((row) => ({
362
+ conversationId: row.conversation_id,
363
+ sessionKey: row.session_key ?? null,
364
+ messageCount: row.message_count ?? 0,
365
+ firstMessagePreview: truncatePreview(row.first_message_preview ?? null),
366
+ })),
367
+ };
368
+ });
369
+
370
+ const totals = counts[0];
371
+
372
+ return {
373
+ filters,
374
+ totalDistinctConversations: totals?.total_conversation_count ?? 0,
375
+ totalDistinctMessages: totals?.total_message_count ?? 0,
376
+ };
377
+ } finally {
378
+ dropTempCleanerScanTables(db);
379
+ }
380
+ }
381
+
382
+ function hasTable(db: DatabaseSync, tableName: string): boolean {
383
+ const row = db
384
+ .prepare(`SELECT 1 AS found FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1`)
385
+ .get(tableName) as { found?: number } | undefined;
386
+ return row?.found === 1;
387
+ }
388
+
389
+ function dropTempCleanerTables(db: DatabaseSync): void {
390
+ db.exec(`DROP TABLE IF EXISTS temp.doctor_cleaner_first_messages`);
391
+ db.exec(`DROP TABLE IF EXISTS temp.doctor_cleaner_message_ids`);
392
+ db.exec(`DROP TABLE IF EXISTS temp.doctor_cleaner_summary_ids`);
393
+ db.exec(`DROP TABLE IF EXISTS temp.doctor_cleaner_conversation_ids`);
394
+ }
395
+
396
+ function stageTempCleanerFirstMessages(db: DatabaseSync): void {
397
+ db.exec(`
398
+ CREATE TEMP TABLE doctor_cleaner_first_messages (
399
+ conversation_id INTEGER PRIMARY KEY,
400
+ first_message_preview TEXT
401
+ )
402
+ `);
403
+ db.exec(`
404
+ WITH ranked_messages AS (
405
+ SELECT
406
+ m.conversation_id,
407
+ substr(m.content, 1, ${SCAN_FIRST_MESSAGE_PREVIEW_LIMIT}) AS content,
408
+ ROW_NUMBER() OVER (
409
+ PARTITION BY m.conversation_id
410
+ ORDER BY m.seq ASC, m.created_at ASC, m.message_id ASC
411
+ ) AS row_num
412
+ FROM messages m
413
+ )
414
+ INSERT INTO temp.doctor_cleaner_first_messages (
415
+ conversation_id,
416
+ first_message_preview
417
+ )
418
+ SELECT
419
+ conversation_id,
420
+ MAX(CASE WHEN row_num = 1 THEN content END) AS first_message_preview
421
+ FROM ranked_messages
422
+ GROUP BY conversation_id
423
+ `);
424
+ }
425
+
426
+ function stageCleanerConversationIds(
427
+ db: DatabaseSync,
428
+ definitions: CleanerDefinition[],
429
+ ): void {
430
+ dropTempCleanerTables(db);
431
+ db.exec(`CREATE TEMP TABLE doctor_cleaner_conversation_ids (conversation_id INTEGER PRIMARY KEY)`);
432
+ db.exec(`CREATE TEMP TABLE doctor_cleaner_summary_ids (summary_id TEXT PRIMARY KEY)`);
433
+ db.exec(`CREATE TEMP TABLE doctor_cleaner_message_ids (message_id INTEGER PRIMARY KEY)`);
434
+
435
+ if (definitions.length === 0) {
436
+ return;
437
+ }
438
+
439
+ const needsFirstMessage = definitions.some((definition) => definition.needsFirstMessage);
440
+ if (needsFirstMessage) {
441
+ stageTempCleanerFirstMessages(db);
442
+ }
443
+ const matchedConversationsSql = buildMatchedConversationsSql({
444
+ definitions,
445
+ includeFilterId: false,
446
+ messageStatsTableName: needsFirstMessage
447
+ ? "temp.doctor_cleaner_first_messages"
448
+ : undefined,
449
+ });
450
+ db.exec(`
451
+ INSERT INTO temp.doctor_cleaner_conversation_ids (conversation_id)
452
+ SELECT DISTINCT conversation_id
453
+ FROM (
454
+ ${matchedConversationsSql}
455
+ )
456
+ `);
457
+
458
+ db.exec(`
459
+ INSERT INTO temp.doctor_cleaner_summary_ids (summary_id)
460
+ SELECT s.summary_id
461
+ FROM summaries s
462
+ JOIN temp.doctor_cleaner_conversation_ids ids
463
+ ON ids.conversation_id = s.conversation_id
464
+ `);
465
+
466
+ db.exec(`
467
+ INSERT INTO temp.doctor_cleaner_message_ids (message_id)
468
+ SELECT m.message_id
469
+ FROM messages m
470
+ JOIN temp.doctor_cleaner_conversation_ids ids
471
+ ON ids.conversation_id = m.conversation_id
472
+ `);
473
+ }
474
+
475
+ function readTempCleanerDeleteCounts(db: DatabaseSync): {
476
+ conversationCount: number;
477
+ messageCount: number;
478
+ } {
479
+ const row = db
480
+ .prepare(
481
+ `SELECT
482
+ COALESCE((SELECT COUNT(*) FROM temp.doctor_cleaner_conversation_ids), 0) AS conversation_count,
483
+ COALESCE((SELECT COUNT(*) FROM temp.doctor_cleaner_message_ids), 0) AS message_count`,
484
+ )
485
+ .get() as CleanerCountRow | undefined;
486
+ return {
487
+ conversationCount: row?.conversation_count ?? 0,
488
+ messageCount: row?.message_count ?? 0,
489
+ };
490
+ }
491
+
492
+ function deleteTempCleanerCandidates(db: DatabaseSync): number {
493
+ const hasMessagesFts = hasTable(db, "messages_fts");
494
+ const hasSummariesFts = hasTable(db, "summaries_fts");
495
+ const hasSummariesFtsCjk = hasTable(db, "summaries_fts_cjk");
496
+
497
+ db.prepare(
498
+ `DELETE FROM summary_messages
499
+ WHERE summary_id IN (SELECT summary_id FROM temp.doctor_cleaner_summary_ids)`,
500
+ ).run();
501
+ db.prepare(
502
+ `DELETE FROM summary_messages
503
+ WHERE message_id IN (SELECT message_id FROM temp.doctor_cleaner_message_ids)`,
504
+ ).run();
505
+
506
+ db.prepare(
507
+ `DELETE FROM summary_parents
508
+ WHERE summary_id IN (SELECT summary_id FROM temp.doctor_cleaner_summary_ids)`,
509
+ ).run();
510
+ db.prepare(
511
+ `DELETE FROM summary_parents
512
+ WHERE parent_summary_id IN (SELECT summary_id FROM temp.doctor_cleaner_summary_ids)`,
513
+ ).run();
514
+
515
+ db.prepare(
516
+ `DELETE FROM context_items
517
+ WHERE message_id IN (SELECT message_id FROM temp.doctor_cleaner_message_ids)`,
518
+ ).run();
519
+ db.prepare(
520
+ `DELETE FROM context_items
521
+ WHERE summary_id IN (SELECT summary_id FROM temp.doctor_cleaner_summary_ids)`,
522
+ ).run();
523
+ db.prepare(
524
+ `DELETE FROM context_items
525
+ WHERE conversation_id IN (SELECT conversation_id FROM temp.doctor_cleaner_conversation_ids)`,
526
+ ).run();
527
+
528
+ if (hasMessagesFts) {
529
+ db.prepare(
530
+ `DELETE FROM messages_fts
531
+ WHERE rowid IN (SELECT message_id FROM temp.doctor_cleaner_message_ids)`,
532
+ ).run();
533
+ }
534
+ if (hasSummariesFts) {
535
+ db.prepare(
536
+ `DELETE FROM summaries_fts
537
+ WHERE summary_id IN (SELECT summary_id FROM temp.doctor_cleaner_summary_ids)`,
538
+ ).run();
539
+ }
540
+ if (hasSummariesFtsCjk) {
541
+ db.prepare(
542
+ `DELETE FROM summaries_fts_cjk
543
+ WHERE summary_id IN (SELECT summary_id FROM temp.doctor_cleaner_summary_ids)`,
544
+ ).run();
545
+ }
546
+
547
+ return Number(
548
+ db
549
+ .prepare(
550
+ `DELETE FROM conversations
551
+ WHERE conversation_id IN (SELECT conversation_id FROM temp.doctor_cleaner_conversation_ids)`,
552
+ )
553
+ .run().changes ?? 0,
554
+ );
555
+ }
556
+
557
+ function quoteSqlString(value: string): string {
558
+ return `'${value.replaceAll("'", "''")}'`;
559
+ }
560
+
561
+ export function getDoctorCleanerApplyUnavailableReason(databasePath: string): string | null {
562
+ return getFileBackedDatabasePath(databasePath)
563
+ ? null
564
+ : "Cleaner apply requires a file-backed SQLite database so Lossless Claw can create a backup first.";
565
+ }
566
+
567
+ function buildCleanerBackupPath(databasePath: string): string | null {
568
+ const fileBackedDatabasePath = getFileBackedDatabasePath(databasePath);
569
+ if (!fileBackedDatabasePath) {
570
+ return null;
571
+ }
572
+
573
+ const timestamp = new Date().toISOString().replace(/[-:.]/g, "");
574
+ const suffix = Math.random().toString(36).slice(2, 8);
575
+ return join(
576
+ dirname(fileBackedDatabasePath),
577
+ `${basename(fileBackedDatabasePath)}.doctor-cleaners-${timestamp}-${suffix}.bak`,
578
+ );
579
+ }
580
+
581
+ export function applyDoctorCleaners(
582
+ db: DatabaseSync,
583
+ options: {
584
+ databasePath: string;
585
+ filterIds?: DoctorCleanerId[];
586
+ vacuum?: boolean;
587
+ },
588
+ ): DoctorCleanerApplyResult {
589
+ const definitions = getCleanerDefinitions(options.filterIds);
590
+ if (definitions.length === 0) {
591
+ return {
592
+ kind: "unavailable",
593
+ reason: "No valid doctor cleaner filters were selected.",
594
+ };
595
+ }
596
+
597
+ const unavailableReason = getDoctorCleanerApplyUnavailableReason(options.databasePath);
598
+ if (unavailableReason) {
599
+ return {
600
+ kind: "unavailable",
601
+ reason: unavailableReason,
602
+ };
603
+ }
604
+ const backupPath = buildCleanerBackupPath(options.databasePath);
605
+ if (!backupPath) {
606
+ return {
607
+ kind: "unavailable",
608
+ reason:
609
+ getDoctorCleanerApplyUnavailableReason(options.databasePath)
610
+ ?? "Cleaner apply could not determine a backup path.",
611
+ };
612
+ }
613
+
614
+ db.exec(`VACUUM INTO ${quoteSqlString(backupPath)}`);
615
+
616
+ let deletedConversations = 0;
617
+ let deletedMessages = 0;
618
+ let vacuumed = false;
619
+ let transactionActive = false;
620
+
621
+ try {
622
+ db.exec("BEGIN IMMEDIATE");
623
+ transactionActive = true;
624
+ stageCleanerConversationIds(db, definitions);
625
+ const counts = readTempCleanerDeleteCounts(db);
626
+ deletedMessages = counts.messageCount;
627
+ if (counts.conversationCount > 0) {
628
+ deletedConversations = deleteTempCleanerCandidates(db);
629
+ }
630
+ db.exec("COMMIT");
631
+ transactionActive = false;
632
+ } catch (error) {
633
+ if (transactionActive) {
634
+ db.exec("ROLLBACK");
635
+ }
636
+ throw error;
637
+ } finally {
638
+ dropTempCleanerTables(db);
639
+ }
640
+
641
+ if (options.vacuum && deletedConversations > 0) {
642
+ db.exec("VACUUM");
643
+ db.exec("PRAGMA wal_checkpoint(TRUNCATE)");
644
+ vacuumed = true;
645
+ }
646
+
647
+ return {
648
+ kind: "applied",
649
+ filterIds: definitions.map((definition) => definition.id),
650
+ deletedConversations,
651
+ deletedMessages,
652
+ vacuumed,
653
+ backupPath,
654
+ };
655
+ }
package/src/retrieval.ts CHANGED
@@ -10,6 +10,7 @@ import type {
10
10
  LargeFileRecord,
11
11
  } from "./store/summary-store.js";
12
12
  import type { SearchSort } from "./store/full-text-sort.js";
13
+ import { estimateTokens } from "./estimate-tokens.js";
13
14
 
14
15
  // ── Public interfaces ────────────────────────────────────────────────────────
15
16
 
@@ -114,10 +115,6 @@ export interface ExpandResult {
114
115
 
115
116
  // ── Helpers ──────────────────────────────────────────────────────────────────
116
117
 
117
- /** Rough token estimate: ~4 chars per token. */
118
- function estimateTokens(content: string): number {
119
- return Math.ceil(content.length / 4);
120
- }
121
118
 
122
119
  // ── RetrievalEngine ──────────────────────────────────────────────────────────
123
120
 
package/src/summarize.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { describeLogError } from "./lcm-log.js";
2
2
  import type { LcmDependencies } from "./types.js";
3
+ import { estimateTokens } from "./estimate-tokens.js";
3
4
 
4
5
  export type LcmSummarizeOptions = {
5
6
  previousSummary?: string;
@@ -174,10 +175,6 @@ function resolveProviderApiFromLegacyConfig(
174
175
  return undefined;
175
176
  }
176
177
 
177
- /** Approximate token estimate used for target-sizing prompts. */
178
- function estimateTokens(text: string): number {
179
- return Math.ceil(text.length / 4);
180
- }
181
178
 
182
179
  /** Narrow unknown values to plain object records. */
183
180
  function isRecord(value: unknown): value is Record<string, unknown> {