@martian-engineering/lossless-claw 0.6.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +26 -6
  2. package/docs/agent-tools.md +16 -5
  3. package/docs/configuration.md +223 -214
  4. package/openclaw.plugin.json +123 -0
  5. package/package.json +1 -1
  6. package/skills/lossless-claw/SKILL.md +3 -2
  7. package/skills/lossless-claw/references/architecture.md +12 -0
  8. package/skills/lossless-claw/references/config.md +135 -3
  9. package/skills/lossless-claw/references/diagnostics.md +13 -0
  10. package/src/assembler.ts +17 -5
  11. package/src/compaction.ts +161 -53
  12. package/src/db/config.ts +102 -4
  13. package/src/db/connection.ts +35 -7
  14. package/src/db/features.ts +24 -5
  15. package/src/db/migration.ts +257 -78
  16. package/src/engine.ts +1007 -110
  17. package/src/estimate-tokens.ts +80 -0
  18. package/src/lcm-log.ts +37 -0
  19. package/src/plugin/index.ts +493 -101
  20. package/src/plugin/lcm-command.ts +288 -7
  21. package/src/plugin/lcm-doctor-apply.ts +1 -3
  22. package/src/plugin/lcm-doctor-cleaners.ts +655 -0
  23. package/src/plugin/shared-init.ts +59 -0
  24. package/src/prune.ts +391 -0
  25. package/src/retrieval.ts +8 -9
  26. package/src/startup-banner-log.ts +1 -0
  27. package/src/store/compaction-telemetry-store.ts +156 -0
  28. package/src/store/conversation-store.ts +6 -1
  29. package/src/store/fts5-sanitize.ts +25 -4
  30. package/src/store/full-text-sort.ts +21 -0
  31. package/src/store/index.ts +8 -0
  32. package/src/store/summary-store.ts +21 -14
  33. package/src/summarize.ts +55 -34
  34. package/src/tools/lcm-describe-tool.ts +9 -4
  35. package/src/tools/lcm-expand-query-tool.ts +609 -200
  36. package/src/tools/lcm-expand-tool.ts +9 -4
  37. package/src/tools/lcm-grep-tool.ts +22 -8
  38. package/src/types.ts +1 -0
package/src/compaction.ts CHANGED
@@ -1,7 +1,9 @@
1
1
  import { createHash } from "node:crypto";
2
2
  import type { ConversationStore, CreateMessagePartInput } from "./store/conversation-store.js";
3
3
  import type { SummaryStore, SummaryRecord, ContextItemRecord } from "./store/summary-store.js";
4
+ import { estimateTokens, truncateTextToEstimatedTokens } from "./estimate-tokens.js";
4
5
  import { extractFileIdsFromContent } from "./large-files.js";
6
+ import { NOOP_LCM_LOGGER, type LcmLogger } from "./lcm-log.js";
5
7
  import { LcmProviderAuthError } from "./summarize.js";
6
8
 
7
9
  // ── Public types ─────────────────────────────────────────────────────────────
@@ -68,7 +70,14 @@ type CompactionSummarizeFn = (
68
70
  aggressive?: boolean,
69
71
  options?: CompactionSummarizeOptions,
70
72
  ) => Promise<string>;
71
- type PassResult = { summaryId: string; level: CompactionLevel };
73
+ type PassResult = {
74
+ summaryId: string;
75
+ level: CompactionLevel;
76
+ /** Token count of source items removed from context. */
77
+ removedTokens: number;
78
+ /** Token count of the newly created summary. */
79
+ addedTokens: number;
80
+ };
72
81
  type LeafChunkSelection = {
73
82
  items: ContextItemRecord[];
74
83
  rawTokensOutsideTail: number;
@@ -85,10 +94,6 @@ type CondensedPhaseCandidate = {
85
94
 
86
95
  // ── Helpers ──────────────────────────────────────────────────────────────────
87
96
 
88
- /** Estimate token count from character length (~4 chars per token). */
89
- function estimateTokens(content: string): number {
90
- return Math.ceil(content.length / 4);
91
- }
92
97
 
93
98
  /** Deterministically cap summary text so the persisted output stays within maxTokens. */
94
99
  function capSummaryText(
@@ -104,14 +109,14 @@ function capSummaryText(
104
109
  ];
105
110
 
106
111
  for (const suffix of suffixes) {
107
- const maxChars = Math.max(0, maxTokens * 4 - suffix.length);
108
- const capped = `${content.slice(0, maxChars)}${suffix}`;
112
+ const contentBudget = Math.max(0, maxTokens - estimateTokens(suffix));
113
+ const capped = `${truncateTextToEstimatedTokens(content, contentBudget)}${suffix}`;
109
114
  if (estimateTokens(capped) <= maxTokens) {
110
115
  return capped;
111
116
  }
112
117
  }
113
118
 
114
- return content.slice(0, Math.max(0, maxTokens * 4));
119
+ return truncateTextToEstimatedTokens(content, maxTokens);
115
120
  }
116
121
 
117
122
  /** Format a timestamp as `YYYY-MM-DD HH:mm TZ` for prompt source text. */
@@ -168,8 +173,8 @@ function generateSummaryId(content: string): string {
168
173
  );
169
174
  }
170
175
 
171
- /** Maximum characters for the deterministic fallback truncation (512 tokens * 4 chars). */
172
- const FALLBACK_MAX_CHARS = 512 * 4;
176
+ /** Maximum estimated tokens for the deterministic fallback truncation. */
177
+ const FALLBACK_MAX_TOKENS = 512;
173
178
  const DEFAULT_LEAF_CHUNK_TOKENS = 20_000;
174
179
 
175
180
  /**
@@ -335,12 +340,59 @@ function isMediaAttachmentPart(part: CreateMessagePartInput | { partType: string
335
340
  // ── CompactionEngine ─────────────────────────────────────────────────────────
336
341
 
337
342
  export class CompactionEngine {
343
+ /**
344
+ * Per-conversation context items cache, active only during compaction
345
+ * entry points. null when inactive — external callers (e.g., engine.ts
346
+ * evaluateLeafTrigger) get uncached reads.
347
+ *
348
+ * Uses a reference count so concurrent compactions on different
349
+ * conversations don't interfere: each withContextCache increments
350
+ * on entry and decrements on exit; the cache is only destroyed
351
+ * when all users have exited.
352
+ */
353
+ private _contextItemsCache: Map<number, ContextItemRecord[]> | null = null;
354
+ private _contextItemsCacheRefCount = 0;
355
+
338
356
  constructor(
339
357
  private conversationStore: ConversationStore,
340
358
  private summaryStore: SummaryStore,
341
359
  private config: CompactionConfig,
360
+ private log: LcmLogger = NOOP_LCM_LOGGER,
342
361
  ) {}
343
362
 
363
+ /** Read context items, using per-phase cache when active. */
364
+ private async getContextItemsCached(conversationId: number): Promise<ContextItemRecord[]> {
365
+ if (this._contextItemsCache) {
366
+ if (this._contextItemsCache.has(conversationId)) {
367
+ return this._contextItemsCache.get(conversationId)!;
368
+ }
369
+ const items = await this.summaryStore.getContextItems(conversationId);
370
+ this._contextItemsCache.set(conversationId, items);
371
+ return items;
372
+ }
373
+ return this.summaryStore.getContextItems(conversationId);
374
+ }
375
+
376
+ /** Invalidate cache for a conversation after context mutation. */
377
+ private invalidateContextCache(conversationId: number): void {
378
+ this._contextItemsCache?.delete(conversationId);
379
+ }
380
+
381
+ /** Execute with context cache active. Reference-counted for concurrent use. */
382
+ private async withContextCache<T>(fn: () => Promise<T>): Promise<T> {
383
+ if (!this._contextItemsCache) this._contextItemsCache = new Map();
384
+ this._contextItemsCacheRefCount++;
385
+ try {
386
+ return await fn();
387
+ } finally {
388
+ this._contextItemsCacheRefCount--;
389
+ if (this._contextItemsCacheRefCount <= 0) {
390
+ this._contextItemsCache = null;
391
+ this._contextItemsCacheRefCount = 0;
392
+ }
393
+ }
394
+ }
395
+
344
396
  // ── evaluate ─────────────────────────────────────────────────────────────
345
397
 
346
398
  /** Evaluate whether compaction is needed. */
@@ -383,13 +435,13 @@ export class CompactionEngine {
383
435
  * `leafChunkTokens`. This lets callers trigger a soft incremental leaf pass
384
436
  * before the full context threshold is breached.
385
437
  */
386
- async evaluateLeafTrigger(conversationId: number): Promise<{
438
+ async evaluateLeafTrigger(conversationId: number, leafChunkTokensOverride?: number): Promise<{
387
439
  shouldCompact: boolean;
388
440
  rawTokensOutsideTail: number;
389
441
  threshold: number;
390
442
  }> {
391
443
  const rawTokensOutsideTail = await this.countRawTokensOutsideFreshTail(conversationId);
392
- const threshold = this.resolveLeafChunkTokens();
444
+ const threshold = this.resolveLeafChunkTokens(leafChunkTokensOverride);
393
445
  return {
394
446
  shouldCompact: rawTokensOutsideTail >= threshold,
395
447
  rawTokensOutsideTail,
@@ -409,7 +461,7 @@ export class CompactionEngine {
409
461
  hardTrigger?: boolean;
410
462
  summaryModel?: string;
411
463
  }): Promise<CompactionResult> {
412
- return this.compactFullSweep(input);
464
+ return this.withContextCache(() => this.compactFullSweep(input));
413
465
  }
414
466
 
415
467
  /**
@@ -421,6 +473,20 @@ export class CompactionEngine {
421
473
  conversationId: number;
422
474
  tokenBudget: number;
423
475
  summarize: CompactionSummarizeFn;
476
+ leafChunkTokens?: number;
477
+ force?: boolean;
478
+ previousSummaryContent?: string;
479
+ summaryModel?: string;
480
+ allowCondensedPasses?: boolean;
481
+ }): Promise<CompactionResult> {
482
+ return this.withContextCache(() => this._compactLeafImpl(input));
483
+ }
484
+
485
+ private async _compactLeafImpl(input: {
486
+ conversationId: number;
487
+ tokenBudget: number;
488
+ summarize: CompactionSummarizeFn;
489
+ leafChunkTokens?: number;
424
490
  force?: boolean;
425
491
  previousSummaryContent?: string;
426
492
  summaryModel?: string;
@@ -429,7 +495,7 @@ export class CompactionEngine {
429
495
 
430
496
  const tokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
431
497
  const threshold = Math.floor(this.config.contextThreshold * tokenBudget);
432
- const leafTrigger = await this.evaluateLeafTrigger(conversationId);
498
+ const leafTrigger = await this.evaluateLeafTrigger(conversationId, input.leafChunkTokens);
433
499
 
434
500
  if (!force && tokensBefore <= threshold && !leafTrigger.shouldCompact) {
435
501
  return {
@@ -440,7 +506,7 @@ export class CompactionEngine {
440
506
  };
441
507
  }
442
508
 
443
- const leafChunk = await this.selectOldestLeafChunk(conversationId);
509
+ const leafChunk = await this.selectOldestLeafChunk(conversationId, input.leafChunkTokens);
444
510
  if (leafChunk.items.length === 0) {
445
511
  return {
446
512
  actionTaken: false,
@@ -470,7 +536,8 @@ export class CompactionEngine {
470
536
  authFailure: true,
471
537
  };
472
538
  }
473
- const tokensAfterLeaf = await this.summaryStore.getContextTokenCount(conversationId);
539
+ // Delta tracking: compute token change from pass results instead of re-querying DB
540
+ const tokensAfterLeaf = tokensBefore - leafResult.removedTokens + leafResult.addedTokens;
474
541
 
475
542
  await this.persistCompactionEvents({
476
543
  conversationId,
@@ -488,7 +555,8 @@ export class CompactionEngine {
488
555
 
489
556
  const incrementalMaxDepth = this.resolveIncrementalMaxDepth();
490
557
  const condensedMinChunkTokens = this.resolveCondensedMinChunkTokens();
491
- if (incrementalMaxDepth > 0) {
558
+ let runningTokens = tokensAfterLeaf;
559
+ if (incrementalMaxDepth > 0 && input.allowCondensedPasses !== false) {
492
560
  for (let targetDepth = 0; targetDepth < incrementalMaxDepth; targetDepth++) {
493
561
  const fanout = this.resolveFanoutForDepth(targetDepth, false);
494
562
  const chunk = await this.selectOldestChunkAtDepth(conversationId, targetDepth);
@@ -496,7 +564,7 @@ export class CompactionEngine {
496
564
  break;
497
565
  }
498
566
 
499
- const passTokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
567
+ const passTokensBefore = runningTokens;
500
568
  const condenseResult = await this.condensedPass(
501
569
  conversationId,
502
570
  chunk.items,
@@ -507,7 +575,7 @@ export class CompactionEngine {
507
575
  if (!condenseResult) {
508
576
  break;
509
577
  }
510
- const passTokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
578
+ const passTokensAfter = passTokensBefore - condenseResult.removedTokens + condenseResult.addedTokens;
511
579
  await this.persistCompactionEvents({
512
580
  conversationId,
513
581
  tokensBefore: passTokensBefore,
@@ -518,6 +586,7 @@ export class CompactionEngine {
518
586
  });
519
587
 
520
588
  tokensAfter = passTokensAfter;
589
+ runningTokens = passTokensAfter;
521
590
  condensed = true;
522
591
  createdSummaryId = condenseResult.summaryId;
523
592
  level = condenseResult.level;
@@ -568,7 +637,7 @@ export class CompactionEngine {
568
637
  };
569
638
  }
570
639
 
571
- const contextItems = await this.summaryStore.getContextItems(conversationId);
640
+ const contextItems = await this.getContextItemsCached(conversationId);
572
641
  if (contextItems.length === 0) {
573
642
  return {
574
643
  actionTaken: false,
@@ -587,13 +656,16 @@ export class CompactionEngine {
587
656
  let hadAuthFailure = false;
588
657
 
589
658
  // Phase 1: leaf passes over oldest raw chunks outside the protected tail.
659
+ // Delta tracking: maintain a running token count instead of re-querying DB
660
+ // after each pass. The arithmetic is exact: tokensAfter = tokensBefore - removed + added.
661
+ let runningTokens = tokensBefore;
590
662
  while (true) {
591
663
  const leafChunk = await this.selectOldestLeafChunk(conversationId);
592
664
  if (leafChunk.items.length === 0) {
593
665
  break;
594
666
  }
595
667
 
596
- const passTokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
668
+ const passTokensBefore = runningTokens;
597
669
  const leafResult = await this.leafPass(
598
670
  conversationId,
599
671
  leafChunk.items,
@@ -605,7 +677,7 @@ export class CompactionEngine {
605
677
  hadAuthFailure = true;
606
678
  break;
607
679
  }
608
- const passTokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
680
+ const passTokensAfter = passTokensBefore - leafResult.removedTokens + leafResult.addedTokens;
609
681
  await this.persistCompactionEvents({
610
682
  conversationId,
611
683
  tokensBefore: passTokensBefore,
@@ -619,6 +691,7 @@ export class CompactionEngine {
619
691
  createdSummaryId = leafResult.summaryId;
620
692
  level = leafResult.level;
621
693
  previousSummaryContent = leafResult.content;
694
+ runningTokens = passTokensAfter;
622
695
 
623
696
  if (!force && passTokensAfter <= threshold) {
624
697
  previousTokens = passTokensAfter;
@@ -640,7 +713,7 @@ export class CompactionEngine {
640
713
  break;
641
714
  }
642
715
 
643
- const passTokensBefore = await this.summaryStore.getContextTokenCount(conversationId);
716
+ const passTokensBefore = runningTokens;
644
717
  const condenseResult = await this.condensedPass(
645
718
  conversationId,
646
719
  candidate.chunk.items,
@@ -652,7 +725,7 @@ export class CompactionEngine {
652
725
  hadAuthFailure = true;
653
726
  break;
654
727
  }
655
- const passTokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
728
+ const passTokensAfter = passTokensBefore - condenseResult.removedTokens + condenseResult.addedTokens;
656
729
  await this.persistCompactionEvents({
657
730
  conversationId,
658
731
  tokensBefore: passTokensBefore,
@@ -666,6 +739,7 @@ export class CompactionEngine {
666
739
  condensed = true;
667
740
  createdSummaryId = condenseResult.summaryId;
668
741
  level = condenseResult.level;
742
+ runningTokens = passTokensAfter;
669
743
 
670
744
  if (!force && passTokensAfter <= threshold) {
671
745
  previousTokens = passTokensAfter;
@@ -677,7 +751,7 @@ export class CompactionEngine {
677
751
  previousTokens = passTokensAfter;
678
752
  }
679
753
 
680
- const tokensAfter = await this.summaryStore.getContextTokenCount(conversationId);
754
+ const tokensAfter = runningTokens;
681
755
 
682
756
  return {
683
757
  actionTaken,
@@ -700,6 +774,17 @@ export class CompactionEngine {
700
774
  currentTokens?: number;
701
775
  summarize: CompactionSummarizeFn;
702
776
  summaryModel?: string;
777
+ }): Promise<{ success: boolean; rounds: number; finalTokens: number; authFailure?: boolean }> {
778
+ return this.withContextCache(() => this._compactUntilUnderImpl(input));
779
+ }
780
+
781
+ private async _compactUntilUnderImpl(input: {
782
+ conversationId: number;
783
+ tokenBudget: number;
784
+ targetTokens?: number;
785
+ currentTokens?: number;
786
+ summarize: CompactionSummarizeFn;
787
+ summaryModel?: string;
703
788
  }): Promise<{ success: boolean; rounds: number; finalTokens: number; authFailure?: boolean }> {
704
789
  const { conversationId, tokenBudget, summarize } = input;
705
790
  const targetTokens =
@@ -763,8 +848,8 @@ export class CompactionEngine {
763
848
  lastTokens = result.tokensAfter;
764
849
  }
765
850
 
766
- // Exhausted all rounds
767
- const finalTokens = await this.summaryStore.getContextTokenCount(conversationId);
851
+ // Exhausted all rounds — use the last known token count from compact() result
852
+ const finalTokens = lastTokens;
768
853
  return {
769
854
  success: finalTokens <= targetTokens,
770
855
  rounds: this.config.maxRounds,
@@ -775,7 +860,14 @@ export class CompactionEngine {
775
860
  // ── Private helpers ──────────────────────────────────────────────────────
776
861
 
777
862
  /** Normalize configured leaf chunk size to a safe positive integer. */
778
- private resolveLeafChunkTokens(): number {
863
+ private resolveLeafChunkTokens(leafChunkTokensOverride?: number): number {
864
+ if (
865
+ typeof leafChunkTokensOverride === "number" &&
866
+ Number.isFinite(leafChunkTokensOverride) &&
867
+ leafChunkTokensOverride > 0
868
+ ) {
869
+ return Math.floor(leafChunkTokensOverride);
870
+ }
779
871
  if (
780
872
  typeof this.config.leafChunkTokens === "number" &&
781
873
  Number.isFinite(this.config.leafChunkTokens) &&
@@ -838,7 +930,7 @@ export class CompactionEngine {
838
930
 
839
931
  /** Sum raw message tokens outside the protected fresh tail. */
840
932
  private async countRawTokensOutsideFreshTail(conversationId: number): Promise<number> {
841
- const contextItems = await this.summaryStore.getContextItems(conversationId);
933
+ const contextItems = await this.getContextItemsCached(conversationId);
842
934
  const freshTailOrdinal = this.resolveFreshTailOrdinal(contextItems);
843
935
  let rawTokens = 0;
844
936
 
@@ -861,10 +953,13 @@ export class CompactionEngine {
861
953
  * The selected chunk size is capped by `leafChunkTokens`, but we always pick
862
954
  * at least one message when any compactable message exists.
863
955
  */
864
- private async selectOldestLeafChunk(conversationId: number): Promise<LeafChunkSelection> {
865
- const contextItems = await this.summaryStore.getContextItems(conversationId);
956
+ private async selectOldestLeafChunk(
957
+ conversationId: number,
958
+ leafChunkTokensOverride?: number,
959
+ ): Promise<LeafChunkSelection> {
960
+ const contextItems = await this.getContextItemsCached(conversationId);
866
961
  const freshTailOrdinal = this.resolveFreshTailOrdinal(contextItems);
867
- const threshold = this.resolveLeafChunkTokens();
962
+ const threshold = this.resolveLeafChunkTokens(leafChunkTokensOverride);
868
963
 
869
964
  let rawTokensOutsideTail = 0;
870
965
  for (const item of contextItems) {
@@ -927,7 +1022,7 @@ export class CompactionEngine {
927
1022
  }
928
1023
 
929
1024
  const startOrdinal = Math.min(...messageItems.map((item) => item.ordinal));
930
- const priorSummaryItems = (await this.summaryStore.getContextItems(conversationId))
1025
+ const priorSummaryItems = (await this.getContextItemsCached(conversationId))
931
1026
  .filter(
932
1027
  (item) =>
933
1028
  item.ordinal < startOrdinal &&
@@ -1051,7 +1146,7 @@ export class CompactionEngine {
1051
1146
  hardTrigger: boolean;
1052
1147
  }): Promise<CondensedPhaseCandidate | null> {
1053
1148
  const { conversationId, hardTrigger } = params;
1054
- const contextItems = await this.summaryStore.getContextItems(conversationId);
1149
+ const contextItems = await this.getContextItemsCached(conversationId);
1055
1150
  const freshTailOrdinal = this.resolveFreshTailOrdinal(contextItems);
1056
1151
  const minChunkTokens = this.resolveCondensedMinChunkTokens();
1057
1152
  const depthLevels = await this.summaryStore.getDistinctDepthsInContext(conversationId, {
@@ -1088,7 +1183,7 @@ export class CompactionEngine {
1088
1183
  targetDepth: number,
1089
1184
  freshTailOrdinalOverride?: number,
1090
1185
  ): Promise<CondensedChunkSelection> {
1091
- const contextItems = await this.summaryStore.getContextItems(conversationId);
1186
+ const contextItems = await this.getContextItemsCached(conversationId);
1092
1187
  const freshTailOrdinal =
1093
1188
  typeof freshTailOrdinalOverride === "number"
1094
1189
  ? freshTailOrdinalOverride
@@ -1147,7 +1242,7 @@ export class CompactionEngine {
1147
1242
  }
1148
1243
 
1149
1244
  const startOrdinal = Math.min(...summaryItems.map((item) => item.ordinal));
1150
- const priorSummaryItems = (await this.summaryStore.getContextItems(conversationId))
1245
+ const priorSummaryItems = (await this.getContextItemsCached(conversationId))
1151
1246
  .filter(
1152
1247
  (item) =>
1153
1248
  item.ordinal < startOrdinal &&
@@ -1203,13 +1298,13 @@ export class CompactionEngine {
1203
1298
  }
1204
1299
  const inputTokens = Math.max(1, estimateTokens(sourceText));
1205
1300
  const buildDeterministicFallback = (): { content: string; level: CompactionLevel } => {
1206
- const truncated =
1207
- sourceText.length > FALLBACK_MAX_CHARS
1208
- ? sourceText.slice(0, FALLBACK_MAX_CHARS)
1209
- : sourceText;
1301
+ const suffix = `\n[Truncated from ${inputTokens} tokens]`;
1302
+ const truncated = truncateTextToEstimatedTokens(
1303
+ sourceText,
1304
+ Math.max(0, FALLBACK_MAX_TOKENS - estimateTokens(suffix)),
1305
+ );
1210
1306
  return {
1211
- content: `${truncated}
1212
- [Truncated from ${inputTokens} tokens]`,
1307
+ content: `${truncated}${suffix}`,
1213
1308
  level: "fallback",
1214
1309
  };
1215
1310
  };
@@ -1264,7 +1359,7 @@ export class CompactionEngine {
1264
1359
  const maxTokens = Math.ceil(params.targetTokens * this.config.summaryMaxOverageFactor);
1265
1360
 
1266
1361
  if (summaryTokens > Math.ceil(params.targetTokens * 1.5)) {
1267
- console.warn(
1362
+ this.log.warn(
1268
1363
  `[lcm] summary exceeds target by ${Math.round((summaryTokens / params.targetTokens - 1) * 100)}%: ${summaryTokens} tokens vs target ${params.targetTokens}`,
1269
1364
  );
1270
1365
  }
@@ -1330,7 +1425,7 @@ export class CompactionEngine {
1330
1425
  summarize: CompactionSummarizeFn,
1331
1426
  previousSummaryContent?: string,
1332
1427
  summaryModel?: string,
1333
- ): Promise<{ summaryId: string; level: CompactionLevel; content: string } | null> {
1428
+ ): Promise<{ summaryId: string; level: CompactionLevel; content: string; removedTokens: number; addedTokens: number } | null> {
1334
1429
  // Fetch full message content for each context item
1335
1430
  const messageContents: { messageId: number; content: string; createdAt: Date; tokenCount: number }[] =
1336
1431
  [];
@@ -1369,7 +1464,7 @@ export class CompactionEngine {
1369
1464
  targetTokens: this.config.leafTargetTokens,
1370
1465
  });
1371
1466
  if (!summary) {
1372
- console.warn(
1467
+ this.log.warn(
1373
1468
  `[lcm] leaf compaction skipped summary write; conversationId=${conversationId}; chunkMessages=${messageContents.length}`,
1374
1469
  );
1375
1470
  return null;
@@ -1378,6 +1473,16 @@ export class CompactionEngine {
1378
1473
  // Persist the leaf summary
1379
1474
  const summaryId = generateSummaryId(summary.content);
1380
1475
  const tokenCount = estimateTokens(summary.content);
1476
+ // Note: removedTokens uses resolveMessageTokenCount values (which fall back to
1477
+ // estimateTokens for messages with token_count <= 0). This can diverge from
1478
+ // getContextTokenCount() which would sum the stored 0. The delta feeds into
1479
+ // stopping decisions (threshold checks, progress guards), but the divergence
1480
+ // is bounded to empty/corrupt messages (token_count=0) which are rare.
1481
+ // For summaries, removedTokens matches the DB exactly (same tokenCount column).
1482
+ const removedTokens = messageContents.reduce(
1483
+ (sum, message) => sum + Math.max(0, Math.floor(message.tokenCount)),
1484
+ 0,
1485
+ );
1381
1486
 
1382
1487
  await this.summaryStore.withTransaction(async () => {
1383
1488
  await this.summaryStore.insertSummary({
@@ -1398,10 +1503,7 @@ export class CompactionEngine {
1398
1503
  : undefined,
1399
1504
  descendantCount: 0,
1400
1505
  descendantTokenCount: 0,
1401
- sourceMessageTokenCount: messageContents.reduce(
1402
- (sum, message) => sum + Math.max(0, Math.floor(message.tokenCount)),
1403
- 0,
1404
- ),
1506
+ sourceMessageTokenCount: removedTokens,
1405
1507
  model: summaryModel,
1406
1508
  });
1407
1509
 
@@ -1421,8 +1523,9 @@ export class CompactionEngine {
1421
1523
  summaryId,
1422
1524
  });
1423
1525
  });
1526
+ this.invalidateContextCache(conversationId);
1424
1527
 
1425
- return { summaryId, level: summary.level, content: summary.content };
1528
+ return { summaryId, level: summary.level, content: summary.content, removedTokens, addedTokens: tokenCount };
1426
1529
  }
1427
1530
 
1428
1531
  // ── Private: Condensed Pass ──────────────────────────────────────────────
@@ -1479,7 +1582,7 @@ export class CompactionEngine {
1479
1582
  targetTokens: this.config.condensedTargetTokens,
1480
1583
  });
1481
1584
  if (!condensed) {
1482
- console.warn(
1585
+ this.log.warn(
1483
1586
  `[lcm] condensed compaction skipped summary write; conversationId=${conversationId}; depth=${targetDepth}; chunkSummaries=${summaryRecords.length}`,
1484
1587
  );
1485
1588
  return null;
@@ -1560,8 +1663,13 @@ export class CompactionEngine {
1560
1663
  summaryId,
1561
1664
  });
1562
1665
  });
1666
+ this.invalidateContextCache(conversationId);
1563
1667
 
1564
- return { summaryId, level: condensed.level };
1668
+ const removedTokens = summaryRecords.reduce(
1669
+ (sum, s) => sum + Math.max(0, Math.floor(s.tokenCount)),
1670
+ 0,
1671
+ );
1672
+ return { summaryId, level: condensed.level, removedTokens, addedTokens: tokenCount };
1565
1673
  }
1566
1674
 
1567
1675
  /** Emit compaction telemetry without mutating canonical conversation history. */
@@ -1638,7 +1746,7 @@ export class CompactionEngine {
1638
1746
  condensedPassOccurred: boolean;
1639
1747
  }): Promise<void> {
1640
1748
  const content = `LCM compaction ${input.pass} pass (${input.level}): ${input.tokensBefore} -> ${input.tokensAfter}`;
1641
- console.info(
1749
+ this.log.info(
1642
1750
  `[lcm] ${content} conversation=${input.conversationId} summary=${input.createdSummaryId}`,
1643
1751
  );
1644
1752
  }
package/src/db/config.ts CHANGED
@@ -1,6 +1,18 @@
1
1
  import { homedir } from "os";
2
2
  import { join } from "path";
3
3
 
4
+ export type CacheAwareCompactionConfig = {
5
+ enabled: boolean;
6
+ maxColdCacheCatchupPasses: number;
7
+ hotCachePressureFactor: number;
8
+ hotCacheBudgetHeadroomRatio: number;
9
+ };
10
+
11
+ export type DynamicLeafChunkTokensConfig = {
12
+ enabled: boolean;
13
+ max: number;
14
+ };
15
+
4
16
  export type LcmConfig = {
5
17
  enabled: boolean;
6
18
  databasePath: string;
@@ -32,10 +44,6 @@ export type LcmConfig = {
32
44
  largeFileSummaryProvider: string;
33
45
  /** Model override for large-file text summarization. */
34
46
  largeFileSummaryModel: string;
35
- /** Model override for conversation summarization. */
36
- summaryModel: string;
37
- /** Provider override for conversation summarization. */
38
- summaryProvider: string;
39
47
  /** Provider override for lcm_expand_query sub-agent. */
40
48
  expansionProvider: string;
41
49
  /** Model override for lcm_expand_query sub-agent. */
@@ -58,6 +66,12 @@ export type LcmConfig = {
58
66
  circuitBreakerThreshold: number;
59
67
  /** Cooldown in milliseconds before the circuit breaker auto-resets (default 30 min). */
60
68
  circuitBreakerCooldownMs: number;
69
+ /** Explicit fallback provider/model pairs for compaction summarization. */
70
+ fallbackProviders: Array<{ provider: string; model: string }>;
71
+ /** Cache-sensitive policy for incremental leaf compaction. */
72
+ cacheAwareCompaction: CacheAwareCompactionConfig;
73
+ /** Dynamic step-band policy for incremental leaf chunk sizing. */
74
+ dynamicLeafChunkTokens: DynamicLeafChunkTokensConfig;
61
75
  };
62
76
 
63
77
  /** Safely coerce an unknown value to a finite number, or return undefined. */
@@ -85,6 +99,39 @@ function parseFiniteNumber(value: string | undefined): number | undefined {
85
99
  return Number.isFinite(parsed) ? parsed : undefined;
86
100
  }
87
101
 
102
+ /** Parse fallback providers from env string (format: "provider/model,provider/model"). */
103
+ function parseFallbackProviders(value: string | undefined): Array<{ provider: string; model: string }> | undefined {
104
+ if (!value?.trim()) return undefined;
105
+ const entries: Array<{ provider: string; model: string }> = [];
106
+ for (const part of value.split(",")) {
107
+ const trimmed = part.trim();
108
+ if (!trimmed) continue;
109
+ const slashIdx = trimmed.indexOf("/");
110
+ if (slashIdx > 0 && slashIdx < trimmed.length - 1) {
111
+ const provider = trimmed.slice(0, slashIdx).trim();
112
+ const model = trimmed.slice(slashIdx + 1).trim();
113
+ if (provider && model) {
114
+ entries.push({ provider, model });
115
+ }
116
+ }
117
+ }
118
+ return entries.length > 0 ? entries : undefined;
119
+ }
120
+
121
+ /** Parse fallback providers from plugin config array (object items only). */
122
+ function toFallbackProviderArray(value: unknown): Array<{ provider: string; model: string }> | undefined {
123
+ if (!Array.isArray(value)) return undefined;
124
+ const entries: Array<{ provider: string; model: string }> = [];
125
+ for (const item of value) {
126
+ if (item && typeof item === "object" && !Array.isArray(item)) {
127
+ const p = toStr((item as Record<string, unknown>).provider);
128
+ const m = toStr((item as Record<string, unknown>).model);
129
+ if (p && m) entries.push({ provider: p, model: m });
130
+ }
131
+ }
132
+ return entries.length > 0 ? entries : undefined;
133
+ }
134
+
88
135
  /** Safely coerce an unknown value to a boolean, or return undefined. */
89
136
  function toBool(value: unknown): boolean | undefined {
90
137
  if (typeof value === "boolean") return value;
@@ -120,6 +167,12 @@ function toStrArray(value: unknown): string[] | undefined {
120
167
  .filter(Boolean);
121
168
  }
122
169
 
170
+ function toRecord(value: unknown): Record<string, unknown> | undefined {
171
+ return value && typeof value === "object" && !Array.isArray(value)
172
+ ? (value as Record<string, unknown>)
173
+ : undefined;
174
+ }
175
+
123
176
  /**
124
177
  * Resolve LCM configuration with three-tier precedence:
125
178
  * 1. Environment variables (highest — backward compat)
@@ -131,6 +184,8 @@ export function resolveLcmConfig(
131
184
  pluginConfig?: Record<string, unknown>,
132
185
  ): LcmConfig {
133
186
  const pc = pluginConfig ?? {};
187
+ const cacheAwareCompaction = toRecord(pc.cacheAwareCompaction);
188
+ const dynamicLeafChunkTokens = toRecord(pc.dynamicLeafChunkTokens);
134
189
  const resolvedLeafChunkTokens =
135
190
  parseFiniteInt(env.LCM_LEAF_CHUNK_TOKENS)
136
191
  ?? toNumber(pc.leafChunkTokens) ?? 20000;
@@ -142,6 +197,27 @@ export function resolveLcmConfig(
142
197
  env.LCM_DELEGATION_TIMEOUT_MS !== undefined
143
198
  ? toNumber(env.LCM_DELEGATION_TIMEOUT_MS)
144
199
  : undefined;
200
+ const resolvedDynamicLeafChunkMax = Math.max(
201
+ resolvedLeafChunkTokens,
202
+ parseFiniteInt(env.LCM_DYNAMIC_LEAF_CHUNK_TOKENS_MAX)
203
+ ?? toNumber(dynamicLeafChunkTokens?.max)
204
+ ?? Math.floor(resolvedLeafChunkTokens * 2),
205
+ );
206
+ const resolvedHotCachePressureFactor = Math.max(
207
+ 1,
208
+ parseFiniteNumber(env.LCM_HOT_CACHE_PRESSURE_FACTOR)
209
+ ?? toNumber(cacheAwareCompaction?.hotCachePressureFactor)
210
+ ?? 4,
211
+ );
212
+ const resolvedHotCacheBudgetHeadroomRatio = Math.min(
213
+ 0.95,
214
+ Math.max(
215
+ 0,
216
+ parseFiniteNumber(env.LCM_HOT_CACHE_BUDGET_HEADROOM_RATIO)
217
+ ?? toNumber(cacheAwareCompaction?.hotCacheBudgetHeadroomRatio)
218
+ ?? 0.2,
219
+ ),
220
+ );
145
221
 
146
222
  return {
147
223
  enabled:
@@ -243,5 +319,27 @@ export function resolveLcmConfig(
243
319
  circuitBreakerCooldownMs:
244
320
  parseFiniteInt(env.LCM_CIRCUIT_BREAKER_COOLDOWN_MS)
245
321
  ?? toNumber(pc.circuitBreakerCooldownMs) ?? 1_800_000,
322
+ fallbackProviders:
323
+ parseFallbackProviders(env.LCM_FALLBACK_PROVIDERS)
324
+ ?? toFallbackProviderArray(pc.fallbackProviders) ?? [],
325
+ cacheAwareCompaction: {
326
+ enabled:
327
+ env.LCM_CACHE_AWARE_COMPACTION_ENABLED !== undefined
328
+ ? env.LCM_CACHE_AWARE_COMPACTION_ENABLED !== "false"
329
+ : toBool(cacheAwareCompaction?.enabled) ?? true,
330
+ maxColdCacheCatchupPasses:
331
+ parseFiniteInt(env.LCM_MAX_COLD_CACHE_CATCHUP_PASSES)
332
+ ?? toNumber(cacheAwareCompaction?.maxColdCacheCatchupPasses)
333
+ ?? 2,
334
+ hotCachePressureFactor: resolvedHotCachePressureFactor,
335
+ hotCacheBudgetHeadroomRatio: resolvedHotCacheBudgetHeadroomRatio,
336
+ },
337
+ dynamicLeafChunkTokens: {
338
+ enabled:
339
+ env.LCM_DYNAMIC_LEAF_CHUNK_TOKENS_ENABLED !== undefined
340
+ ? env.LCM_DYNAMIC_LEAF_CHUNK_TOKENS_ENABLED === "true"
341
+ : toBool(dynamicLeafChunkTokens?.enabled) ?? true,
342
+ max: resolvedDynamicLeafChunkMax,
343
+ },
246
344
  };
247
345
  }