@martian-engineering/lossless-claw 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/compaction.ts CHANGED
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
2
2
  import type { ConversationStore, CreateMessagePartInput } from "./store/conversation-store.js";
3
3
  import type { SummaryStore, SummaryRecord, ContextItemRecord } from "./store/summary-store.js";
4
4
  import { extractFileIdsFromContent } from "./large-files.js";
5
+ import { NOOP_LCM_LOGGER, type LcmLogger } from "./lcm-log.js";
5
6
  import { LcmProviderAuthError } from "./summarize.js";
6
7
 
7
8
  // ── Public types ─────────────────────────────────────────────────────────────
@@ -68,7 +69,14 @@ type CompactionSummarizeFn = (
68
69
  aggressive?: boolean,
69
70
  options?: CompactionSummarizeOptions,
70
71
  ) => Promise<string>;
71
- type PassResult = { summaryId: string; level: CompactionLevel };
72
+ type PassResult = {
73
+ summaryId: string;
74
+ level: CompactionLevel;
75
+ /** Token count of source items removed from context. */
76
+ removedTokens: number;
77
+ /** Token count of the newly created summary. */
78
+ addedTokens: number;
79
+ };
72
80
  type LeafChunkSelection = {
73
81
  items: ContextItemRecord[];
74
82
  rawTokensOutsideTail: number;
@@ -335,12 +343,59 @@ function isMediaAttachmentPart(part: CreateMessagePartInput | { partType: string
335
343
  // ── CompactionEngine ─────────────────────────────────────────────────────────
336
344
 
337
345
  export class CompactionEngine {
346
+ /**
347
+ * Per-conversation context items cache, active only during compaction
348
+ * entry points. null when inactive — external callers (e.g., engine.ts
349
+ * evaluateLeafTrigger) get uncached reads.
350
+ *
351
+ * Uses a reference count so concurrent compactions on different
352
+ * conversations don't interfere: each withContextCache increments
353
+ * on entry and decrements on exit; the cache is only destroyed
354
+ * when all users have exited.
355
+ */
356
+ private _contextItemsCache: Map<number, ContextItemRecord[]> | null = null;
357
+ private _contextItemsCacheRefCount = 0;
358
+
338
359
  constructor(
339
360
  private conversationStore: ConversationStore,
340
361
  private summaryStore: SummaryStore,
341
362
  private config: CompactionConfig,
363
+ private log: LcmLogger = NOOP_LCM_LOGGER,
342
364
  ) {}
343
365
 
366
+ /** Read context items, using per-phase cache when active. */
367
+ private async getContextItemsCached(conversationId: number): Promise<ContextItemRecord[]> {
368
+ if (this._contextItemsCache) {
369
+ if (this._contextItemsCache.has(conversationId)) {
370
+ return this._contextItemsCache.get(conversationId)!;
371
+ }
372
+ const items = await this.summaryStore.getContextItems(conversationId);
373
+ this._contextItemsCache.set(conversationId, items);
374
+ return items;
375
+ }
376
+ return this.summaryStore.getContextItems(conversationId);
377
+ }
378
+
379
+ /** Invalidate cache for a conversation after context mutation. */
380
+ private invalidateContextCache(conversationId: number): void {
381
+ this._contextItemsCache?.delete(conversationId);
382
+ }
383
+
384
+ /** Execute with context cache active. Reference-counted for concurrent use. */
385
+ private async withContextCache<T>(fn: () => Promise<T>): Promise<T> {
386
+ if (!this._contextItemsCache) this._contextItemsCache = new Map();
387
+ this._contextItemsCacheRefCount++;
388
+ try {
389
+ return await fn();
390
+ } finally {
391
+ this._contextItemsCacheRefCount--;
392
+ if (this._contextItemsCacheRefCount <= 0) {
393
+ this._contextItemsCache = null;
394
+ this._contextItemsCacheRefCount = 0;
395
+ }
396
+ }
397
+ }
398
+
344
399
  // ── evaluate ─────────────────────────────────────────────────────────────
345
400
 
346
401
  /** Evaluate whether compaction is needed. */
@@ -383,13 +438,13 @@ export class CompactionEngine {
383
438
  * `leafChunkTokens`. This lets callers trigger a soft incremental leaf pass
384
439
  * before the full context threshold is breached.
385
440
  */
386
- async evaluateLeafTrigger(conversationId: number): Promise<{
441
+ async evaluateLeafTrigger(conversationId: number, leafChunkTokensOverride?: number): Promise<{
387
442
  shouldCompact: boolean;
388
443
  rawTokensOutsideTail: number;
389
444
  threshold: number;
390
445
  }> {
391
446
  const rawTokensOutsideTail = await this.countRawTokensOutsideFreshTail(conversationId);
392
- const threshold = this.resolveLeafChunkTokens();
447
+ const threshold = this.resolveLeafChunkTokens(leafChunkTokensOverride);
393
448
  return {
394
449
  shouldCompact: rawTokensOutsideTail >= threshold,
395
450
  rawTokensOutsideTail,
@@ -409,7 +464,7 @@ export class CompactionEngine {
409
464
  hardTrigger?: boolean;
410
465
  summaryModel?: string;
411
466
  }): Promise<CompactionResult> {
412
- return this.compactFullSweep(input);
467
+ return this.withContextCache(() => this.compactFullSweep(input));
413
468
  }
414
469
 
415
470
  /**
@@ -421,6 +476,20 @@ export class CompactionEngine {
421
476
  conversationId: number;
422
477
  tokenBudget: number;
423
478
  summarize: CompactionSummarizeFn;
479
+ leafChunkTokens?: number;
480
+ force?: boolean;
481
+ previousSummaryContent?: string;
482
+ summaryModel?: string;
483
+ allowCondensedPasses?: boolean;
484
+ }): Promise<CompactionResult> {
485
+ return this.withContextCache(() => this._compactLeafImpl(input));
486
+ }
487
+
488
+ private async _compactLeafImpl(input: {
489
+ conversationId: number;
490
+ tokenBudget: number;
491
+ summarize: CompactionSummarizeFn;
492
+ leafChunkTokens?: number;
424
493
  force?: boolean;
425
494
  previousSummaryContent?: string;
426
495
  summaryModel?: string;
@@ -429,7 +498,7 @@ export class CompactionEngine {
429
498
 
430
499
  const tokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
431
500
  const threshold = Math.floor(this.config.contextThreshold * tokenBudget);
432
- const leafTrigger = await this.evaluateLeafTrigger(conversationId);
501
+ const leafTrigger = await this.evaluateLeafTrigger(conversationId, input.leafChunkTokens);
433
502
 
434
503
  if (!force && tokensBefore <= threshold && !leafTrigger.shouldCompact) {
435
504
  return {
@@ -440,7 +509,7 @@ export class CompactionEngine {
440
509
  };
441
510
  }
442
511
 
443
- const leafChunk = await this.selectOldestLeafChunk(conversationId);
512
+ const leafChunk = await this.selectOldestLeafChunk(conversationId, input.leafChunkTokens);
444
513
  if (leafChunk.items.length === 0) {
445
514
  return {
446
515
  actionTaken: false,
@@ -470,7 +539,8 @@ export class CompactionEngine {
470
539
  authFailure: true,
471
540
  };
472
541
  }
473
- const tokensAfterLeaf = await this.summaryStore.getContextTokenCount(conversationId);
542
+ // Delta tracking: compute token change from pass results instead of re-querying DB
543
+ const tokensAfterLeaf = tokensBefore - leafResult.removedTokens + leafResult.addedTokens;
474
544
 
475
545
  await this.persistCompactionEvents({
476
546
  conversationId,
@@ -488,7 +558,8 @@ export class CompactionEngine {
488
558
 
489
559
  const incrementalMaxDepth = this.resolveIncrementalMaxDepth();
490
560
  const condensedMinChunkTokens = this.resolveCondensedMinChunkTokens();
491
- if (incrementalMaxDepth > 0) {
561
+ let runningTokens = tokensAfterLeaf;
562
+ if (incrementalMaxDepth > 0 && input.allowCondensedPasses !== false) {
492
563
  for (let targetDepth = 0; targetDepth < incrementalMaxDepth; targetDepth++) {
493
564
  const fanout = this.resolveFanoutForDepth(targetDepth, false);
494
565
  const chunk = await this.selectOldestChunkAtDepth(conversationId, targetDepth);
@@ -496,7 +567,7 @@ export class CompactionEngine {
496
567
  break;
497
568
  }
498
569
 
499
- const passTokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
570
+ const passTokensBefore = runningTokens;
500
571
  const condenseResult = await this.condensedPass(
501
572
  conversationId,
502
573
  chunk.items,
@@ -507,7 +578,7 @@ export class CompactionEngine {
507
578
  if (!condenseResult) {
508
579
  break;
509
580
  }
510
- const passTokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
581
+ const passTokensAfter = passTokensBefore - condenseResult.removedTokens + condenseResult.addedTokens;
511
582
  await this.persistCompactionEvents({
512
583
  conversationId,
513
584
  tokensBefore: passTokensBefore,
@@ -518,6 +589,7 @@ export class CompactionEngine {
518
589
  });
519
590
 
520
591
  tokensAfter = passTokensAfter;
592
+ runningTokens = passTokensAfter;
521
593
  condensed = true;
522
594
  createdSummaryId = condenseResult.summaryId;
523
595
  level = condenseResult.level;
@@ -568,7 +640,7 @@ export class CompactionEngine {
568
640
  };
569
641
  }
570
642
 
571
- const contextItems = await this.summaryStore.getContextItems(conversationId);
643
+ const contextItems = await this.getContextItemsCached(conversationId);
572
644
  if (contextItems.length === 0) {
573
645
  return {
574
646
  actionTaken: false,
@@ -587,13 +659,16 @@ export class CompactionEngine {
587
659
  let hadAuthFailure = false;
588
660
 
589
661
  // Phase 1: leaf passes over oldest raw chunks outside the protected tail.
662
+ // Delta tracking: maintain a running token count instead of re-querying DB
663
+ // after each pass. The arithmetic is exact: tokensAfter = tokensBefore - removed + added.
664
+ let runningTokens = tokensBefore;
590
665
  while (true) {
591
666
  const leafChunk = await this.selectOldestLeafChunk(conversationId);
592
667
  if (leafChunk.items.length === 0) {
593
668
  break;
594
669
  }
595
670
 
596
- const passTokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
671
+ const passTokensBefore = runningTokens;
597
672
  const leafResult = await this.leafPass(
598
673
  conversationId,
599
674
  leafChunk.items,
@@ -605,7 +680,7 @@ export class CompactionEngine {
605
680
  hadAuthFailure = true;
606
681
  break;
607
682
  }
608
- const passTokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
683
+ const passTokensAfter = passTokensBefore - leafResult.removedTokens + leafResult.addedTokens;
609
684
  await this.persistCompactionEvents({
610
685
  conversationId,
611
686
  tokensBefore: passTokensBefore,
@@ -619,6 +694,7 @@ export class CompactionEngine {
619
694
  createdSummaryId = leafResult.summaryId;
620
695
  level = leafResult.level;
621
696
  previousSummaryContent = leafResult.content;
697
+ runningTokens = passTokensAfter;
622
698
 
623
699
  if (!force && passTokensAfter <= threshold) {
624
700
  previousTokens = passTokensAfter;
@@ -640,7 +716,7 @@ export class CompactionEngine {
640
716
  break;
641
717
  }
642
718
 
643
- const passTokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
719
+ const passTokensBefore = runningTokens;
644
720
  const condenseResult = await this.condensedPass(
645
721
  conversationId,
646
722
  candidate.chunk.items,
@@ -652,7 +728,7 @@ export class CompactionEngine {
652
728
  hadAuthFailure = true;
653
729
  break;
654
730
  }
655
- const passTokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
731
+ const passTokensAfter = passTokensBefore - condenseResult.removedTokens + condenseResult.addedTokens;
656
732
  await this.persistCompactionEvents({
657
733
  conversationId,
658
734
  tokensBefore: passTokensBefore,
@@ -666,6 +742,7 @@ export class CompactionEngine {
666
742
  condensed = true;
667
743
  createdSummaryId = condenseResult.summaryId;
668
744
  level = condenseResult.level;
745
+ runningTokens = passTokensAfter;
669
746
 
670
747
  if (!force && passTokensAfter <= threshold) {
671
748
  previousTokens = passTokensAfter;
@@ -677,7 +754,7 @@ export class CompactionEngine {
677
754
  previousTokens = passTokensAfter;
678
755
  }
679
756
 
680
- const tokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
757
+ const tokensAfter = runningTokens;
681
758
 
682
759
  return {
683
760
  actionTaken,
@@ -700,6 +777,17 @@ export class CompactionEngine {
700
777
  currentTokens?: number;
701
778
  summarize: CompactionSummarizeFn;
702
779
  summaryModel?: string;
780
+ }): Promise<{ success: boolean; rounds: number; finalTokens: number; authFailure?: boolean }> {
781
+ return this.withContextCache(() => this._compactUntilUnderImpl(input));
782
+ }
783
+
784
+ private async _compactUntilUnderImpl(input: {
785
+ conversationId: number;
786
+ tokenBudget: number;
787
+ targetTokens?: number;
788
+ currentTokens?: number;
789
+ summarize: CompactionSummarizeFn;
790
+ summaryModel?: string;
703
791
  }): Promise<{ success: boolean; rounds: number; finalTokens: number; authFailure?: boolean }> {
704
792
  const { conversationId, tokenBudget, summarize } = input;
705
793
  const targetTokens =
@@ -763,8 +851,8 @@ export class CompactionEngine {
763
851
  lastTokens = result.tokensAfter;
764
852
  }
765
853
 
766
- // Exhausted all rounds
767
- const finalTokens = await this.summaryStore.getContextTokenCount(conversationId);
854
+ // Exhausted all rounds — use the last known token count from compact() result
855
+ const finalTokens = lastTokens;
768
856
  return {
769
857
  success: finalTokens <= targetTokens,
770
858
  rounds: this.config.maxRounds,
@@ -775,7 +863,14 @@ export class CompactionEngine {
775
863
  // ── Private helpers ──────────────────────────────────────────────────────
776
864
 
777
865
  /** Normalize configured leaf chunk size to a safe positive integer. */
778
- private resolveLeafChunkTokens(): number {
866
+ private resolveLeafChunkTokens(leafChunkTokensOverride?: number): number {
867
+ if (
868
+ typeof leafChunkTokensOverride === "number" &&
869
+ Number.isFinite(leafChunkTokensOverride) &&
870
+ leafChunkTokensOverride > 0
871
+ ) {
872
+ return Math.floor(leafChunkTokensOverride);
873
+ }
779
874
  if (
780
875
  typeof this.config.leafChunkTokens === "number" &&
781
876
  Number.isFinite(this.config.leafChunkTokens) &&
@@ -838,7 +933,7 @@ export class CompactionEngine {
838
933
 
839
934
  /** Sum raw message tokens outside the protected fresh tail. */
840
935
  private async countRawTokensOutsideFreshTail(conversationId: number): Promise<number> {
841
- const contextItems = await this.summaryStore.getContextItems(conversationId);
936
+ const contextItems = await this.getContextItemsCached(conversationId);
842
937
  const freshTailOrdinal = this.resolveFreshTailOrdinal(contextItems);
843
938
  let rawTokens = 0;
844
939
 
@@ -861,10 +956,13 @@ export class CompactionEngine {
861
956
  * The selected chunk size is capped by `leafChunkTokens`, but we always pick
862
957
  * at least one message when any compactable message exists.
863
958
  */
864
- private async selectOldestLeafChunk(conversationId: number): Promise<LeafChunkSelection> {
865
- const contextItems = await this.summaryStore.getContextItems(conversationId);
959
+ private async selectOldestLeafChunk(
960
+ conversationId: number,
961
+ leafChunkTokensOverride?: number,
962
+ ): Promise<LeafChunkSelection> {
963
+ const contextItems = await this.getContextItemsCached(conversationId);
866
964
  const freshTailOrdinal = this.resolveFreshTailOrdinal(contextItems);
867
- const threshold = this.resolveLeafChunkTokens();
965
+ const threshold = this.resolveLeafChunkTokens(leafChunkTokensOverride);
868
966
 
869
967
  let rawTokensOutsideTail = 0;
870
968
  for (const item of contextItems) {
@@ -927,7 +1025,7 @@ export class CompactionEngine {
927
1025
  }
928
1026
 
929
1027
  const startOrdinal = Math.min(...messageItems.map((item) => item.ordinal));
930
- const priorSummaryItems = (await this.summaryStore.getContextItems(conversationId))
1028
+ const priorSummaryItems = (await this.getContextItemsCached(conversationId))
931
1029
  .filter(
932
1030
  (item) =>
933
1031
  item.ordinal < startOrdinal &&
@@ -1051,7 +1149,7 @@ export class CompactionEngine {
1051
1149
  hardTrigger: boolean;
1052
1150
  }): Promise<CondensedPhaseCandidate | null> {
1053
1151
  const { conversationId, hardTrigger } = params;
1054
- const contextItems = await this.summaryStore.getContextItems(conversationId);
1152
+ const contextItems = await this.getContextItemsCached(conversationId);
1055
1153
  const freshTailOrdinal = this.resolveFreshTailOrdinal(contextItems);
1056
1154
  const minChunkTokens = this.resolveCondensedMinChunkTokens();
1057
1155
  const depthLevels = await this.summaryStore.getDistinctDepthsInContext(conversationId, {
@@ -1088,7 +1186,7 @@ export class CompactionEngine {
1088
1186
  targetDepth: number,
1089
1187
  freshTailOrdinalOverride?: number,
1090
1188
  ): Promise<CondensedChunkSelection> {
1091
- const contextItems = await this.summaryStore.getContextItems(conversationId);
1189
+ const contextItems = await this.getContextItemsCached(conversationId);
1092
1190
  const freshTailOrdinal =
1093
1191
  typeof freshTailOrdinalOverride === "number"
1094
1192
  ? freshTailOrdinalOverride
@@ -1147,7 +1245,7 @@ export class CompactionEngine {
1147
1245
  }
1148
1246
 
1149
1247
  const startOrdinal = Math.min(...summaryItems.map((item) => item.ordinal));
1150
- const priorSummaryItems = (await this.summaryStore.getContextItems(conversationId))
1248
+ const priorSummaryItems = (await this.getContextItemsCached(conversationId))
1151
1249
  .filter(
1152
1250
  (item) =>
1153
1251
  item.ordinal < startOrdinal &&
@@ -1264,7 +1362,7 @@ export class CompactionEngine {
1264
1362
  const maxTokens = Math.ceil(params.targetTokens * this.config.summaryMaxOverageFactor);
1265
1363
 
1266
1364
  if (summaryTokens > Math.ceil(params.targetTokens * 1.5)) {
1267
- console.warn(
1365
+ this.log.warn(
1268
1366
  `[lcm] summary exceeds target by ${Math.round((summaryTokens / params.targetTokens - 1) * 100)}%: ${summaryTokens} tokens vs target ${params.targetTokens}`,
1269
1367
  );
1270
1368
  }
@@ -1330,7 +1428,7 @@ export class CompactionEngine {
1330
1428
  summarize: CompactionSummarizeFn,
1331
1429
  previousSummaryContent?: string,
1332
1430
  summaryModel?: string,
1333
- ): Promise<{ summaryId: string; level: CompactionLevel; content: string } | null> {
1431
+ ): Promise<{ summaryId: string; level: CompactionLevel; content: string; removedTokens: number; addedTokens: number } | null> {
1334
1432
  // Fetch full message content for each context item
1335
1433
  const messageContents: { messageId: number; content: string; createdAt: Date; tokenCount: number }[] =
1336
1434
  [];
@@ -1369,7 +1467,7 @@ export class CompactionEngine {
1369
1467
  targetTokens: this.config.leafTargetTokens,
1370
1468
  });
1371
1469
  if (!summary) {
1372
- console.warn(
1470
+ this.log.warn(
1373
1471
  `[lcm] leaf compaction skipped summary write; conversationId=${conversationId}; chunkMessages=${messageContents.length}`,
1374
1472
  );
1375
1473
  return null;
@@ -1378,6 +1476,16 @@ export class CompactionEngine {
1378
1476
  // Persist the leaf summary
1379
1477
  const summaryId = generateSummaryId(summary.content);
1380
1478
  const tokenCount = estimateTokens(summary.content);
1479
+ // Note: removedTokens uses resolveMessageTokenCount values (which fall back to
1480
+ // estimateTokens for messages with token_count <= 0). This can diverge from
1481
+ // getContextTokenCount() which would sum the stored 0. The delta feeds into
1482
+ // stopping decisions (threshold checks, progress guards), but the divergence
1483
+ // is bounded to empty/corrupt messages (token_count=0) which are rare.
1484
+ // For summaries, removedTokens matches the DB exactly (same tokenCount column).
1485
+ const removedTokens = messageContents.reduce(
1486
+ (sum, message) => sum + Math.max(0, Math.floor(message.tokenCount)),
1487
+ 0,
1488
+ );
1381
1489
 
1382
1490
  await this.summaryStore.withTransaction(async () => {
1383
1491
  await this.summaryStore.insertSummary({
@@ -1398,10 +1506,7 @@ export class CompactionEngine {
1398
1506
  : undefined,
1399
1507
  descendantCount: 0,
1400
1508
  descendantTokenCount: 0,
1401
- sourceMessageTokenCount: messageContents.reduce(
1402
- (sum, message) => sum + Math.max(0, Math.floor(message.tokenCount)),
1403
- 0,
1404
- ),
1509
+ sourceMessageTokenCount: removedTokens,
1405
1510
  model: summaryModel,
1406
1511
  });
1407
1512
 
@@ -1421,8 +1526,9 @@ export class CompactionEngine {
1421
1526
  summaryId,
1422
1527
  });
1423
1528
  });
1529
+ this.invalidateContextCache(conversationId);
1424
1530
 
1425
- return { summaryId, level: summary.level, content: summary.content };
1531
+ return { summaryId, level: summary.level, content: summary.content, removedTokens, addedTokens: tokenCount };
1426
1532
  }
1427
1533
 
1428
1534
  // ── Private: Condensed Pass ──────────────────────────────────────────────
@@ -1479,7 +1585,7 @@ export class CompactionEngine {
1479
1585
  targetTokens: this.config.condensedTargetTokens,
1480
1586
  });
1481
1587
  if (!condensed) {
1482
- console.warn(
1588
+ this.log.warn(
1483
1589
  `[lcm] condensed compaction skipped summary write; conversationId=${conversationId}; depth=${targetDepth}; chunkSummaries=${summaryRecords.length}`,
1484
1590
  );
1485
1591
  return null;
@@ -1560,8 +1666,13 @@ export class CompactionEngine {
1560
1666
  summaryId,
1561
1667
  });
1562
1668
  });
1669
+ this.invalidateContextCache(conversationId);
1563
1670
 
1564
- return { summaryId, level: condensed.level };
1671
+ const removedTokens = summaryRecords.reduce(
1672
+ (sum, s) => sum + Math.max(0, Math.floor(s.tokenCount)),
1673
+ 0,
1674
+ );
1675
+ return { summaryId, level: condensed.level, removedTokens, addedTokens: tokenCount };
1565
1676
  }
1566
1677
 
1567
1678
  /** Emit compaction telemetry without mutating canonical conversation history. */
@@ -1638,7 +1749,7 @@ export class CompactionEngine {
1638
1749
  condensedPassOccurred: boolean;
1639
1750
  }): Promise<void> {
1640
1751
  const content = `LCM compaction ${input.pass} pass (${input.level}): ${input.tokensBefore} -> ${input.tokensAfter}`;
1641
- console.info(
1752
+ this.log.info(
1642
1753
  `[lcm] ${content} conversation=${input.conversationId} summary=${input.createdSummaryId}`,
1643
1754
  );
1644
1755
  }
package/src/db/config.ts CHANGED
@@ -1,6 +1,18 @@
1
1
  import { homedir } from "os";
2
2
  import { join } from "path";
3
3
 
4
+ export type CacheAwareCompactionConfig = {
5
+ enabled: boolean;
6
+ maxColdCacheCatchupPasses: number;
7
+ hotCachePressureFactor: number;
8
+ hotCacheBudgetHeadroomRatio: number;
9
+ };
10
+
11
+ export type DynamicLeafChunkTokensConfig = {
12
+ enabled: boolean;
13
+ max: number;
14
+ };
15
+
4
16
  export type LcmConfig = {
5
17
  enabled: boolean;
6
18
  databasePath: string;
@@ -32,10 +44,6 @@ export type LcmConfig = {
32
44
  largeFileSummaryProvider: string;
33
45
  /** Model override for large-file text summarization. */
34
46
  largeFileSummaryModel: string;
35
- /** Model override for conversation summarization. */
36
- summaryModel: string;
37
- /** Provider override for conversation summarization. */
38
- summaryProvider: string;
39
47
  /** Provider override for lcm_expand_query sub-agent. */
40
48
  expansionProvider: string;
41
49
  /** Model override for lcm_expand_query sub-agent. */
@@ -58,6 +66,12 @@ export type LcmConfig = {
58
66
  circuitBreakerThreshold: number;
59
67
  /** Cooldown in milliseconds before the circuit breaker auto-resets (default 30 min). */
60
68
  circuitBreakerCooldownMs: number;
69
+ /** Explicit fallback provider/model pairs for compaction summarization. */
70
+ fallbackProviders: Array<{ provider: string; model: string }>;
71
+ /** Cache-sensitive policy for incremental leaf compaction. */
72
+ cacheAwareCompaction: CacheAwareCompactionConfig;
73
+ /** Dynamic step-band policy for incremental leaf chunk sizing. */
74
+ dynamicLeafChunkTokens: DynamicLeafChunkTokensConfig;
61
75
  };
62
76
 
63
77
  /** Safely coerce an unknown value to a finite number, or return undefined. */
@@ -85,6 +99,39 @@ function parseFiniteNumber(value: string | undefined): number | undefined {
85
99
  return Number.isFinite(parsed) ? parsed : undefined;
86
100
  }
87
101
 
102
+ /** Parse fallback providers from env string (format: "provider/model,provider/model"). */
103
+ function parseFallbackProviders(value: string | undefined): Array<{ provider: string; model: string }> | undefined {
104
+ if (!value?.trim()) return undefined;
105
+ const entries: Array<{ provider: string; model: string }> = [];
106
+ for (const part of value.split(",")) {
107
+ const trimmed = part.trim();
108
+ if (!trimmed) continue;
109
+ const slashIdx = trimmed.indexOf("/");
110
+ if (slashIdx > 0 && slashIdx < trimmed.length - 1) {
111
+ const provider = trimmed.slice(0, slashIdx).trim();
112
+ const model = trimmed.slice(slashIdx + 1).trim();
113
+ if (provider && model) {
114
+ entries.push({ provider, model });
115
+ }
116
+ }
117
+ }
118
+ return entries.length > 0 ? entries : undefined;
119
+ }
120
+
121
+ /** Parse fallback providers from plugin config array (object items only). */
122
+ function toFallbackProviderArray(value: unknown): Array<{ provider: string; model: string }> | undefined {
123
+ if (!Array.isArray(value)) return undefined;
124
+ const entries: Array<{ provider: string; model: string }> = [];
125
+ for (const item of value) {
126
+ if (item && typeof item === "object" && !Array.isArray(item)) {
127
+ const p = toStr((item as Record<string, unknown>).provider);
128
+ const m = toStr((item as Record<string, unknown>).model);
129
+ if (p && m) entries.push({ provider: p, model: m });
130
+ }
131
+ }
132
+ return entries.length > 0 ? entries : undefined;
133
+ }
134
+
88
135
  /** Safely coerce an unknown value to a boolean, or return undefined. */
89
136
  function toBool(value: unknown): boolean | undefined {
90
137
  if (typeof value === "boolean") return value;
@@ -120,6 +167,12 @@ function toStrArray(value: unknown): string[] | undefined {
120
167
  .filter(Boolean);
121
168
  }
122
169
 
170
+ function toRecord(value: unknown): Record<string, unknown> | undefined {
171
+ return value && typeof value === "object" && !Array.isArray(value)
172
+ ? (value as Record<string, unknown>)
173
+ : undefined;
174
+ }
175
+
123
176
  /**
124
177
  * Resolve LCM configuration with three-tier precedence:
125
178
  * 1. Environment variables (highest — backward compat)
@@ -131,6 +184,8 @@ export function resolveLcmConfig(
131
184
  pluginConfig?: Record<string, unknown>,
132
185
  ): LcmConfig {
133
186
  const pc = pluginConfig ?? {};
187
+ const cacheAwareCompaction = toRecord(pc.cacheAwareCompaction);
188
+ const dynamicLeafChunkTokens = toRecord(pc.dynamicLeafChunkTokens);
134
189
  const resolvedLeafChunkTokens =
135
190
  parseFiniteInt(env.LCM_LEAF_CHUNK_TOKENS)
136
191
  ?? toNumber(pc.leafChunkTokens) ?? 20000;
@@ -142,6 +197,27 @@ export function resolveLcmConfig(
142
197
  env.LCM_DELEGATION_TIMEOUT_MS !== undefined
143
198
  ? toNumber(env.LCM_DELEGATION_TIMEOUT_MS)
144
199
  : undefined;
200
+ const resolvedDynamicLeafChunkMax = Math.max(
201
+ resolvedLeafChunkTokens,
202
+ parseFiniteInt(env.LCM_DYNAMIC_LEAF_CHUNK_TOKENS_MAX)
203
+ ?? toNumber(dynamicLeafChunkTokens?.max)
204
+ ?? Math.floor(resolvedLeafChunkTokens * 2),
205
+ );
206
+ const resolvedHotCachePressureFactor = Math.max(
207
+ 1,
208
+ parseFiniteNumber(env.LCM_HOT_CACHE_PRESSURE_FACTOR)
209
+ ?? toNumber(cacheAwareCompaction?.hotCachePressureFactor)
210
+ ?? 4,
211
+ );
212
+ const resolvedHotCacheBudgetHeadroomRatio = Math.min(
213
+ 0.95,
214
+ Math.max(
215
+ 0,
216
+ parseFiniteNumber(env.LCM_HOT_CACHE_BUDGET_HEADROOM_RATIO)
217
+ ?? toNumber(cacheAwareCompaction?.hotCacheBudgetHeadroomRatio)
218
+ ?? 0.2,
219
+ ),
220
+ );
145
221
 
146
222
  return {
147
223
  enabled:
@@ -243,5 +319,27 @@ export function resolveLcmConfig(
243
319
  circuitBreakerCooldownMs:
244
320
  parseFiniteInt(env.LCM_CIRCUIT_BREAKER_COOLDOWN_MS)
245
321
  ?? toNumber(pc.circuitBreakerCooldownMs) ?? 1_800_000,
322
+ fallbackProviders:
323
+ parseFallbackProviders(env.LCM_FALLBACK_PROVIDERS)
324
+ ?? toFallbackProviderArray(pc.fallbackProviders) ?? [],
325
+ cacheAwareCompaction: {
326
+ enabled:
327
+ env.LCM_CACHE_AWARE_COMPACTION_ENABLED !== undefined
328
+ ? env.LCM_CACHE_AWARE_COMPACTION_ENABLED !== "false"
329
+ : toBool(cacheAwareCompaction?.enabled) ?? true,
330
+ maxColdCacheCatchupPasses:
331
+ parseFiniteInt(env.LCM_MAX_COLD_CACHE_CATCHUP_PASSES)
332
+ ?? toNumber(cacheAwareCompaction?.maxColdCacheCatchupPasses)
333
+ ?? 2,
334
+ hotCachePressureFactor: resolvedHotCachePressureFactor,
335
+ hotCacheBudgetHeadroomRatio: resolvedHotCacheBudgetHeadroomRatio,
336
+ },
337
+ dynamicLeafChunkTokens: {
338
+ enabled:
339
+ env.LCM_DYNAMIC_LEAF_CHUNK_TOKENS_ENABLED !== undefined
340
+ ? env.LCM_DYNAMIC_LEAF_CHUNK_TOKENS_ENABLED === "true"
341
+ : toBool(dynamicLeafChunkTokens?.enabled) ?? true,
342
+ max: resolvedDynamicLeafChunkMax,
343
+ },
246
344
  };
247
345
  }
@@ -13,7 +13,7 @@ function isInMemoryPath(dbPath: string): boolean {
13
13
  return normalized === ":memory:" || normalized.startsWith("file::memory:");
14
14
  }
15
15
 
16
- function normalizePath(dbPath: string): ConnectionKey {
16
+ export function normalizePath(dbPath: string): ConnectionKey {
17
17
  if (isInMemoryPath(dbPath)) {
18
18
  const trimmed = dbPath.trim();
19
19
  return trimmed.length > 0 ? trimmed : ":memory:";
@@ -32,6 +32,15 @@ function configureConnection(db: DatabaseSync): DatabaseSync {
32
32
  db.exec("PRAGMA journal_mode = WAL");
33
33
  db.exec(`PRAGMA busy_timeout = ${SQLITE_BUSY_TIMEOUT_MS}`);
34
34
  db.exec("PRAGMA foreign_keys = ON");
35
+ // 64MB page cache (default 2MB is severely undersized for multi-GB databases
36
+ // with concurrent agents). Memory is demand-allocated, released on close.
37
+ db.exec("PRAGMA cache_size = -65536");
38
+ // NORMAL is officially recommended for WAL mode — crash-safe for app crashes,
39
+ // only risks data loss on power failure (OS/kernel crash). The bootstrap
40
+ // process re-ingests any lost transactions from session files.
41
+ db.exec("PRAGMA synchronous = NORMAL");
42
+ // Keep temp tables/indexes in RAM (helps ordinal resequencing).
43
+ db.exec("PRAGMA temp_store = MEMORY");
35
44
  return db;
36
45
  }
37
46
 
@@ -66,6 +75,9 @@ function closeDatabase(db: DatabaseSync | undefined): void {
66
75
  return;
67
76
  }
68
77
  try {
78
+ // Update query planner statistics for tables that changed since last optimize.
79
+ // Separate try so a SQLITE_BUSY/SQLITE_READONLY from optimize doesn't skip close.
80
+ try { db.exec("PRAGMA optimize"); } catch { /* best-effort */ }
69
81
  db.close();
70
82
  } catch {
71
83
  // Ignore close failures; callers are shutting down anyway.
@@ -81,7 +93,13 @@ function closeDatabase(db: DatabaseSync | undefined): void {
81
93
  */
82
94
  export function createLcmDatabaseConnection(dbPath: string): DatabaseSync {
83
95
  ensureDbDirectory(dbPath);
84
- const db = configureConnection(new DatabaseSync(dbPath));
96
+ const db = new DatabaseSync(dbPath);
97
+ try {
98
+ configureConnection(db);
99
+ } catch (err) {
100
+ try { db.close(); } catch { /* ignore cleanup failure */ }
101
+ throw err;
102
+ }
85
103
  trackConnection(dbPath, db);
86
104
  return db;
87
105
  }