@loreai/core 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/dist/bun/agents-file.d.ts.map +1 -1
  2. package/dist/bun/config.d.ts.map +1 -1
  3. package/dist/bun/curator.d.ts.map +1 -1
  4. package/dist/bun/db.d.ts +86 -1
  5. package/dist/bun/db.d.ts.map +1 -1
  6. package/dist/bun/distillation.d.ts +2 -13
  7. package/dist/bun/distillation.d.ts.map +1 -1
  8. package/dist/bun/embedding.d.ts +5 -1
  9. package/dist/bun/embedding.d.ts.map +1 -1
  10. package/dist/bun/git.d.ts.map +1 -1
  11. package/dist/bun/gradient.d.ts +13 -1
  12. package/dist/bun/gradient.d.ts.map +1 -1
  13. package/dist/bun/hosted.d.ts +36 -0
  14. package/dist/bun/hosted.d.ts.map +1 -0
  15. package/dist/bun/index.d.ts +3 -2
  16. package/dist/bun/index.d.ts.map +1 -1
  17. package/dist/bun/index.js +1049 -247
  18. package/dist/bun/index.js.map +4 -4
  19. package/dist/bun/lat-reader.d.ts.map +1 -1
  20. package/dist/bun/ltm.d.ts +99 -5
  21. package/dist/bun/ltm.d.ts.map +1 -1
  22. package/dist/bun/session-limiter.d.ts +26 -0
  23. package/dist/bun/session-limiter.d.ts.map +1 -0
  24. package/dist/bun/temporal.d.ts +2 -0
  25. package/dist/bun/temporal.d.ts.map +1 -1
  26. package/dist/node/agents-file.d.ts.map +1 -1
  27. package/dist/node/config.d.ts.map +1 -1
  28. package/dist/node/curator.d.ts.map +1 -1
  29. package/dist/node/db.d.ts +86 -1
  30. package/dist/node/db.d.ts.map +1 -1
  31. package/dist/node/distillation.d.ts +2 -13
  32. package/dist/node/distillation.d.ts.map +1 -1
  33. package/dist/node/embedding.d.ts +5 -1
  34. package/dist/node/embedding.d.ts.map +1 -1
  35. package/dist/node/git.d.ts.map +1 -1
  36. package/dist/node/gradient.d.ts +13 -1
  37. package/dist/node/gradient.d.ts.map +1 -1
  38. package/dist/node/hosted.d.ts +36 -0
  39. package/dist/node/hosted.d.ts.map +1 -0
  40. package/dist/node/index.d.ts +3 -2
  41. package/dist/node/index.d.ts.map +1 -1
  42. package/dist/node/index.js +1049 -247
  43. package/dist/node/index.js.map +4 -4
  44. package/dist/node/lat-reader.d.ts.map +1 -1
  45. package/dist/node/ltm.d.ts +99 -5
  46. package/dist/node/ltm.d.ts.map +1 -1
  47. package/dist/node/session-limiter.d.ts +26 -0
  48. package/dist/node/session-limiter.d.ts.map +1 -0
  49. package/dist/node/temporal.d.ts +2 -0
  50. package/dist/node/temporal.d.ts.map +1 -1
  51. package/dist/types/agents-file.d.ts.map +1 -1
  52. package/dist/types/config.d.ts.map +1 -1
  53. package/dist/types/curator.d.ts.map +1 -1
  54. package/dist/types/db.d.ts +86 -1
  55. package/dist/types/db.d.ts.map +1 -1
  56. package/dist/types/distillation.d.ts +2 -13
  57. package/dist/types/distillation.d.ts.map +1 -1
  58. package/dist/types/embedding.d.ts +5 -1
  59. package/dist/types/embedding.d.ts.map +1 -1
  60. package/dist/types/git.d.ts.map +1 -1
  61. package/dist/types/gradient.d.ts +13 -1
  62. package/dist/types/gradient.d.ts.map +1 -1
  63. package/dist/types/hosted.d.ts +36 -0
  64. package/dist/types/hosted.d.ts.map +1 -0
  65. package/dist/types/index.d.ts +3 -2
  66. package/dist/types/index.d.ts.map +1 -1
  67. package/dist/types/lat-reader.d.ts.map +1 -1
  68. package/dist/types/ltm.d.ts +99 -5
  69. package/dist/types/ltm.d.ts.map +1 -1
  70. package/dist/types/session-limiter.d.ts +26 -0
  71. package/dist/types/session-limiter.d.ts.map +1 -0
  72. package/dist/types/temporal.d.ts +2 -0
  73. package/dist/types/temporal.d.ts.map +1 -1
  74. package/package.json +3 -1
  75. package/src/agents-file.ts +12 -0
  76. package/src/config.ts +10 -5
  77. package/src/curator.ts +54 -2
  78. package/src/db.ts +386 -6
  79. package/src/distillation.ts +55 -14
  80. package/src/embedding.ts +71 -8
  81. package/src/git.ts +4 -0
  82. package/src/gradient.ts +227 -74
  83. package/src/hosted.ts +46 -0
  84. package/src/index.ts +12 -0
  85. package/src/lat-reader.ts +4 -0
  86. package/src/ltm.ts +480 -45
  87. package/src/session-limiter.ts +47 -0
  88. package/src/temporal.ts +10 -0
package/src/embedding.ts CHANGED
@@ -28,6 +28,27 @@ import type {
28
28
  * embedding calls but bounded enough to avoid minutes-long hangs. */
29
29
  const EMBED_TIMEOUT_MS = 10_000;
30
30
 
31
+ /**
32
+ * Safe per-text character limit for local ONNX inference. The Nomic v1.5 model
33
+ * supports up to 8192 tokens, but ONNX runtime OOMs on inputs near that ceiling
34
+ * (error codes 284432024, 287180544, 144786472). Pre-truncating to ~4096 tokens
35
+ * worth of characters keeps the tensor well within safe allocation bounds.
36
+ * The worker's `truncation: true` remains as a safety net.
37
+ */
38
+ const LOCAL_MAX_CHARS = 4096 * 4; // ~4096 tokens × ~4 chars/token
39
+
40
+ /**
41
+ * Truncate a string to LOCAL_MAX_CHARS without splitting a UTF-16 surrogate pair.
42
+ * If the cut falls on a high surrogate (0xD800-0xDBFF), backs up one char.
43
+ */
44
+ function safeLocalTruncate(text: string): string {
45
+ if (text.length <= LOCAL_MAX_CHARS) return text;
46
+ let end = LOCAL_MAX_CHARS;
47
+ const code = text.charCodeAt(end - 1);
48
+ if (code >= 0xD800 && code <= 0xDBFF) end--; // don't split surrogate pair
49
+ return text.slice(0, end);
50
+ }
51
+
31
52
  // ---------------------------------------------------------------------------
32
53
  // Provider interface
33
54
  // ---------------------------------------------------------------------------
@@ -272,7 +293,21 @@ class LocalProvider implements EmbeddingProvider {
272
293
  workerUrl = vendorWorkerUrl;
273
294
  }
274
295
  } else {
275
- workerUrl = new URL(`./embedding-worker${import.meta.url.endsWith(".ts") ? ".ts" : ".js"}`, import.meta.url);
296
+ // In CJS bundles (gateway npm package), esbuild shims import.meta as
297
+ // an empty object {}, so import.meta.url is undefined. Fall back to
298
+ // __filename which esbuild defines in CJS output.
299
+ const selfUrl = typeof import.meta.url === "string" ? import.meta.url : undefined;
300
+ if (selfUrl) {
301
+ workerUrl = new URL(
302
+ `./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
303
+ selfUrl,
304
+ );
305
+ } else {
306
+ // CJS fallback: __filename is defined by esbuild's CJS output.
307
+ // The embedding-worker.cjs is built alongside the main bundle.
308
+ const { pathToFileURL } = await import("node:url");
309
+ workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
310
+ }
276
311
  }
277
312
 
278
313
  const vendor = vendorModelInfo();
@@ -318,9 +353,10 @@ class LocalProvider implements EmbeddingProvider {
318
353
  localProviderKnownBroken = true;
319
354
  if (!localProviderErrorLogged) {
320
355
  localProviderErrorLogged = true;
321
- log.info(
356
+ log.error(
322
357
  `local embedding provider failed to init: ${msg.error}. ` +
323
358
  `Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`,
359
+ new Error(`embedding worker init failed: ${msg.error}`),
324
360
  );
325
361
  }
326
362
  for (const [, p] of this.pendingRequests) {
@@ -337,6 +373,7 @@ class LocalProvider implements EmbeddingProvider {
337
373
  this.worker.on("error", (err: Error) => {
338
374
  this.workerInitError = err.message;
339
375
  this.workerReady = false;
376
+ log.error("embedding worker crashed:", err);
340
377
  for (const [, p] of this.pendingRequests) {
341
378
  p.reject(new LocalProviderUnavailableError(err));
342
379
  }
@@ -347,6 +384,10 @@ class LocalProvider implements EmbeddingProvider {
347
384
  this.worker.on("exit", (code) => {
348
385
  if (code !== 0 && !this.workerInitError) {
349
386
  this.workerInitError = `embedding worker exited with code ${code}`;
387
+ log.error(
388
+ this.workerInitError,
389
+ new Error(this.workerInitError),
390
+ );
350
391
  }
351
392
  this.workerReady = false;
352
393
  for (const [, p] of this.pendingRequests) {
@@ -382,9 +423,13 @@ class LocalProvider implements EmbeddingProvider {
382
423
  async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
383
424
  await this.ensureWorker();
384
425
 
426
+ // Pre-truncate texts that exceed the safe ONNX inference limit.
427
+ // This prevents OOM on single inputs near the model's 8192-token max.
428
+ const truncated = texts.map(safeLocalTruncate);
429
+
385
430
  // Prepend Nomic task instruction prefix.
386
431
  const prefix = inputType === "document" ? "search_document: " : "search_query: ";
387
- const prefixed = texts.map((t) => prefix + t);
432
+ const prefixed = truncated.map((t) => prefix + t);
388
433
 
389
434
  const id = this.nextRequestId++;
390
435
  // Recall queries (single query-type texts) get high priority so they
@@ -706,14 +751,25 @@ type VectorHit = { id: string; similarity: number };
706
751
  * Search all knowledge entries with embeddings by cosine similarity.
707
752
  * Returns top-k entries sorted by similarity descending.
708
753
  * Pure brute-force — fine for <100 entries (microseconds).
754
+ *
755
+ * @param excludeCategories Optional category names to exclude from results.
756
+ * Useful when preferences are injected in a separate system block and
757
+ * shouldn't compete for vector search slots with context-bound entries.
709
758
  */
710
759
  export function vectorSearch(
711
760
  queryEmbedding: Float32Array,
712
761
  limit = 10,
762
+ excludeCategories?: string[],
713
763
  ): VectorHit[] {
764
+ let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
765
+ const params: string[] = [];
766
+ if (excludeCategories?.length) {
767
+ sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
768
+ params.push(...excludeCategories);
769
+ }
714
770
  const rows = db()
715
- .query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2")
716
- .all() as Array<{ id: string; embedding: Buffer }>;
771
+ .query(sql)
772
+ .all(...params) as Array<{ id: string; embedding: Buffer }>;
717
773
 
718
774
  const scored: VectorHit[] = [];
719
775
  for (const row of rows) {
@@ -817,6 +873,7 @@ export function embedKnowledgeEntry(
817
873
  title: string,
818
874
  content: string,
819
875
  ): void {
876
+ if (!isAvailable()) return;
820
877
  const text = `${title}\n${content}`;
821
878
  embed([text], "document")
822
879
  .then(([vec]) => {
@@ -825,7 +882,7 @@ export function embedKnowledgeEntry(
825
882
  .run(toBlob(vec), id);
826
883
  })
827
884
  .catch((err) => {
828
- log.info("embedding failed for knowledge entry", id, ":", err);
885
+ log.error("embedding failed for knowledge entry", id, ":", err);
829
886
  });
830
887
  }
831
888
 
@@ -838,6 +895,7 @@ export function embedDistillation(
838
895
  id: string,
839
896
  observations: string,
840
897
  ): void {
898
+ if (!isAvailable()) return;
841
899
  embed([observations], "document")
842
900
  .then(([vec]) => {
843
901
  db()
@@ -845,7 +903,7 @@ export function embedDistillation(
845
903
  .run(toBlob(vec), id);
846
904
  })
847
905
  .catch((err) => {
848
- log.info("embedding failed for distillation", id, ":", err);
906
+ log.error("embedding failed for distillation", id, ":", err);
849
907
  });
850
908
  }
851
909
 
@@ -859,6 +917,7 @@ export function embedTemporalMessage(
859
917
  id: string,
860
918
  content: string,
861
919
  ): void {
920
+ if (!isAvailable()) return;
862
921
  // Skip very short messages — they don't carry enough semantic signal
863
922
  // to be useful in vector search and would waste embedding capacity.
864
923
  if (content.length < 50) return;
@@ -870,7 +929,7 @@ export function embedTemporalMessage(
870
929
  .run(toBlob(vec), id);
871
930
  })
872
931
  .catch((err) => {
873
- log.info("embedding failed for temporal message", id, ":", err);
932
+ log.error("embedding failed for temporal message", id, ":", err);
874
933
  });
875
934
  }
876
935
 
@@ -1174,6 +1233,8 @@ export async function backfillEmbeddings(): Promise<number> {
1174
1233
  } catch (err) {
1175
1234
  // log.error sends to Sentry via captureException
1176
1235
  log.error(`embedding backfill batch failed (${batch.length} items):`, err);
1236
+ // Provider is dead — no point retrying remaining batches.
1237
+ if (err instanceof LocalProviderUnavailableError) break;
1177
1238
  }
1178
1239
  // No yieldToEventLoop() needed — embed() is truly async (worker thread).
1179
1240
  }
@@ -1234,6 +1295,8 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
1234
1295
  } catch (err) {
1235
1296
  // log.error sends to Sentry via captureException
1236
1297
  log.error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
1298
+ // Provider is dead — no point retrying remaining batches.
1299
+ if (err instanceof LocalProviderUnavailableError) break;
1237
1300
  }
1238
1301
 
1239
1302
  if (embedded >= nextProgressAt) {
package/src/git.ts CHANGED
@@ -13,6 +13,7 @@
13
13
  */
14
14
 
15
15
  import { execSync } from "child_process";
16
+ import { isHostedMode } from "./hosted";
16
17
 
17
18
  // ---------------------------------------------------------------------------
18
19
  // URL normalization
@@ -95,6 +96,9 @@ export function clearGitRemoteCache(): void {
95
96
  * subprocess calls — `git remote -v` only runs once per unique path.
96
97
  */
97
98
  export function getGitRemote(path: string): string | null {
99
+ // In hosted mode, never run git subprocesses with client-controlled cwd.
100
+ if (isHostedMode()) return null;
101
+
98
102
  const cached = gitRemoteCache.get(path);
99
103
  if (cached !== undefined) return cached;
100
104
 
package/src/gradient.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import type { LoreMessage, LorePart, LoreMessageWithParts, LoreToolPart, LoreTextPart, LoreToolState, LoreToolStateCompleted } from "./types";
2
2
  import { isTextPart, isReasoningPart, isToolPart } from "./types";
3
- import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
3
+ import { db, ensureProject, loadForceMinLayer, saveForceMinLayer, saveSessionTracking, loadSessionTracking } from "./db";
4
4
  import { config } from "./config";
5
5
  import { formatDistillations } from "./prompt";
6
6
  import { normalize } from "./markdown";
@@ -98,9 +98,42 @@ export function updateBustRate(
98
98
  cacheWrite: number,
99
99
  cacheRead: number,
100
100
  sessionID?: string,
101
+ lastLayer?: number,
101
102
  ): void {
102
103
  if (!sessionID) return;
103
104
  const state = getSessionState(sessionID);
105
+
106
+ // Layer 4 (emergency) is structurally a full cache write — feeding its
107
+ // bust stats into the EMA and cap adaptation creates a death spiral where
108
+ // the cap ratchets down to MIN_CONTEXT_FLOOR and prevents the session from
109
+ // ever fitting in layers 1-3 again. Skip EMA updates entirely.
110
+ // This check is BEFORE the total===0 guard so that the consecutiveLayer4
111
+ // counter is always updated regardless of whether usage was reported.
112
+ if (lastLayer === 4) {
113
+ state.consecutiveLayer4++;
114
+
115
+ // Recovery hatch: after 5+ consecutive Layer 4 turns, the shrunken cap
116
+ // may be what's trapping us. Relax it by 10% per turn to give layers
117
+ // 1-3 a chance to fit. From 130K floor: turns 5-9 → 143K→157K→173K→190K→209K.
118
+ if (
119
+ state.consecutiveLayer4 >= 5 &&
120
+ state.dynamicContextCap > 0 &&
121
+ maxContextTokensCeiling > 0
122
+ ) {
123
+ state.dynamicContextCap = Math.min(
124
+ maxContextTokensCeiling,
125
+ Math.floor(state.dynamicContextCap * 1.10),
126
+ );
127
+ }
128
+ return;
129
+ }
130
+
131
+ // Non-Layer-4 turn: reset the consecutive counter (also before total===0
132
+ // guard — a zero-usage non-L4 turn must not leave a stale count).
133
+ if (lastLayer !== undefined) {
134
+ state.consecutiveLayer4 = 0;
135
+ }
136
+
104
137
  const total = cacheWrite + cacheRead;
105
138
  if (total === 0) return;
106
139
 
@@ -253,6 +286,10 @@ type SessionState = {
253
286
  postIdleCompact: boolean;
254
287
  /** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
255
288
  consecutiveHighLayer: number;
289
+ /** Consecutive Layer 4 turns — used to skip bust-rate EMA updates
290
+ * (Layer 4 busts are structural, not a caching signal) and to trigger
291
+ * a recovery hatch that relaxes dynamicContextCap after prolonged trapping. */
292
+ consecutiveLayer4: number;
256
293
 
257
294
  // --- Cost-aware context cap dynamic state ---
258
295
 
@@ -298,6 +335,7 @@ function makeSessionState(): SessionState {
298
335
  cameOutOfIdle: false,
299
336
  postIdleCompact: false,
300
337
  consecutiveHighLayer: 0,
338
+ consecutiveLayer4: 0,
301
339
 
302
340
  bustRateEMA: -1,
303
341
  interBustIntervalEMA: -1,
@@ -319,6 +357,27 @@ function getSessionState(sessionID: string): SessionState {
319
357
  // forceMinLayer=2, but if OpenCode restarts before the next turn,
320
358
  // the in-memory escalation would be lost without this.
321
359
  state.forceMinLayer = loadForceMinLayer(sessionID) as SafetyLayer;
360
+
361
+ // Restore gradient calibration state from DB (v24) — avoids uncalibrated
362
+ // first turns after restart. Without this, dynamicContextCap reverts to
363
+ // the static ceiling, bustRateEMA is uninitialized, and lastTurnAt=0
364
+ // prevents onIdleResume() from detecting idle gaps.
365
+ //
366
+ // Atomic restore: lastTurnAt > 0 is the proxy for "gradient state was
367
+ // ever flushed to DB". Restore all fields together or none — avoids
368
+ // per-field sentinel fragility where a valid value (e.g. lastLayer=0)
369
+ // could be mistaken for "never persisted".
370
+ const persisted = loadSessionTracking(sessionID);
371
+ if (persisted && persisted.lastTurnAt > 0) {
372
+ state.dynamicContextCap = persisted.dynamicContextCap;
373
+ state.bustRateEMA = persisted.bustRateEMA;
374
+ state.interBustIntervalEMA = persisted.interBustIntervalEMA;
375
+ state.lastLayer = persisted.lastLayer as SafetyLayer;
376
+ state.lastKnownInput = persisted.lastKnownInput;
377
+ state.lastTurnAt = persisted.lastTurnAt;
378
+ state.lastBustAt = persisted.lastBustAt;
379
+ }
380
+
322
381
  sessionStates.set(sessionID, state);
323
382
  }
324
383
  return state;
@@ -584,6 +643,9 @@ export function inspectSessionState(sessionID: string): {
584
643
  postIdleCompact: boolean;
585
644
  lastTurnAt: number;
586
645
  distillationSnapshot: DistillationSnapshot | null;
646
+ bustRateEMA: number;
647
+ dynamicContextCap: number;
648
+ consecutiveLayer4: number;
587
649
  } | null {
588
650
  const state = sessionStates.get(sessionID);
589
651
  if (!state) return null;
@@ -594,6 +656,9 @@ export function inspectSessionState(sessionID: string): {
594
656
  postIdleCompact: state.postIdleCompact,
595
657
  lastTurnAt: state.lastTurnAt,
596
658
  distillationSnapshot: state.distillationSnapshot,
659
+ bustRateEMA: state.bustRateEMA,
660
+ dynamicContextCap: state.dynamicContextCap,
661
+ consecutiveLayer4: state.consecutiveLayer4,
597
662
  };
598
663
  }
599
664
 
@@ -606,6 +671,28 @@ export function setLastTurnAtForTest(sessionID: string, ms: number): void {
606
671
  getSessionState(sessionID).lastTurnAt = ms;
607
672
  }
608
673
 
674
+ /**
675
+ * Persist gradient calibration state to the session_state table.
676
+ *
677
+ * Designed to be called periodically (e.g. every 30s from the idle scheduler
678
+ * tick) rather than on every mutation, to avoid write amplification on the
679
+ * hot path. Max data loss on crash is one tick interval (~30s).
680
+ */
681
+ export function saveGradientState(sessionID: string): void {
682
+ const state = sessionStates.get(sessionID);
683
+ if (!state) return;
684
+
685
+ saveSessionTracking(sessionID, {
686
+ dynamicContextCap: state.dynamicContextCap,
687
+ bustRateEMA: state.bustRateEMA,
688
+ interBustIntervalEMA: state.interBustIntervalEMA,
689
+ lastLayer: state.lastLayer,
690
+ lastKnownInput: state.lastKnownInput,
691
+ lastTurnAt: state.lastTurnAt,
692
+ lastBustAt: state.lastBustAt,
693
+ });
694
+ }
695
+
609
696
  type Distillation = {
610
697
  id: string;
611
698
  observations: string;
@@ -1132,8 +1219,54 @@ function buildPrefixMessages(formatted: string): MessageWithParts[] {
1132
1219
  ];
1133
1220
  }
1134
1221
 
1222
+ // --- Importance-aware distillation selection ---
1223
+ //
1224
+ // When a compression stage limits distillation count (distLimit < Infinity),
1225
+ // selects the most valuable distillations rather than blindly taking the last N.
1226
+ // Scoring: 70% recency (position in chronological order) + 30% content signal.
1227
+ // Results are re-sorted chronologically after selection so the prefix cache
1228
+ // (Approach C) remains byte-stable when the same distillations are selected.
1229
+ //
1230
+ // Content signals (lightweight keyword detection, no LLM call):
1231
+ // - Decisions: "decision"/"decided"/"chose" → +0.3
1232
+ // - Gotchas/bugs: "gotcha"/"bug"/"fix"/"error" → +0.2
1233
+ // - Architecture: "architecture"/"pattern" → +0.1
1234
+ // - Meta-distilled (gen >= 1): +0.2 (consolidation = higher value density)
1235
+
1236
+ const DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
1237
+ const GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
1238
+ const ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
1239
+
1240
+ function importanceBonus(d: Distillation): number {
1241
+ let bonus = 0;
1242
+ if (DECISION_RE.test(d.observations)) bonus += 0.3;
1243
+ if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
1244
+ if (ARCH_RE.test(d.observations)) bonus += 0.1;
1245
+ if (d.generation >= 1) bonus += 0.2;
1246
+ return Math.min(bonus, 1.0);
1247
+ }
1248
+
1249
+ function selectDistillations(all: Distillation[], limit: number): Distillation[] {
1250
+ if (all.length <= limit) return all;
1251
+
1252
+ // Recency: normalize to [0, 0.7] where oldest = 0.0, newest = 0.7.
1253
+ // Use (length - 1) as divisor so the last entry gets full recency weight.
1254
+ const maxIdx = all.length - 1;
1255
+ const scored = all.map((d, i) => ({
1256
+ d,
1257
+ score: (maxIdx > 0 ? (i / maxIdx) : 1) * 0.7 + importanceBonus(d) * 0.3,
1258
+ }));
1259
+
1260
+ // Keep top N by score, then re-sort chronologically (cache-safe).
1261
+ return scored
1262
+ .sort((a, b) => b.score - a.score)
1263
+ .slice(0, limit)
1264
+ .map((s) => s.d)
1265
+ .sort((a, b) => a.created_at - b.created_at);
1266
+ }
1267
+
1135
1268
  // Build a synthetic message pair containing the distilled history.
1136
- // Non-cached path — used by layers 2-4 which already cause full cache invalidation.
1269
+ // Non-cached path — used by layers 2+ which already cause full cache invalidation.
1137
1270
  function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
1138
1271
  if (!distillations.length) return [];
1139
1272
  const formatted = formatDistillations(distillations);
@@ -1324,7 +1457,7 @@ function tryFitStable(input: {
1324
1457
  rawBudget: number;
1325
1458
  sessionID: string;
1326
1459
  sessState: SessionState;
1327
- }): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
1460
+ }): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
1328
1461
  // If the prefix already overflows its budget there's no point trying.
1329
1462
  if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
1330
1463
  return null;
@@ -1425,6 +1558,25 @@ function tryFitStable(input: {
1425
1558
 
1426
1559
  export type SafetyLayer = 0 | 1 | 2 | 3 | 4;
1427
1560
 
1561
+ // --- Compression stage table ---
1562
+ // Defines the escalation path for layers 1-3. Each stage tries increasingly
1563
+ // aggressive compression: tool stripping, tighter budgets, distillation trimming.
1564
+ // Adding a new intermediate stage = one table entry.
1565
+ type CompressionStage = {
1566
+ strip: "none" | "old-tools" | "all-tools";
1567
+ rawFrac: number | null; // fraction of usable; null = use default rawBudget
1568
+ distFrac: number | null; // fraction of usable; null = use default distilledBudget
1569
+ distLimit: number; // Infinity = all, 5 = last 5, etc.
1570
+ protectedTurns: number; // turns exempt from tool stripping
1571
+ useStableWindow: boolean; // use tryFitStable (Approach B pin cache)
1572
+ };
1573
+
1574
+ const COMPRESSION_STAGES: CompressionStage[] = [
1575
+ { strip: "none", rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
1576
+ { strip: "old-tools", rawFrac: 0.50, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
1577
+ { strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5, protectedTurns: 0, useStableWindow: false },
1578
+ ];
1579
+
1428
1580
  export type TransformResult = {
1429
1581
  messages: MessageWithParts[];
1430
1582
  layer: SafetyLayer;
@@ -1435,6 +1587,10 @@ export type TransformResult = {
1435
1587
  usable: number;
1436
1588
  distilledBudget: number;
1437
1589
  rawBudget: number;
1590
+ // Signals that the pipeline should re-run forSession() to refresh LTM
1591
+ // relevance scoring. Set on Layer 4 (emergency) where the context is
1592
+ // fully reset and mid-session knowledge may have changed relevance.
1593
+ refreshLtm: boolean;
1438
1594
  };
1439
1595
 
1440
1596
  // Per-session urgent distillation tracking.
@@ -1530,7 +1686,10 @@ function transformInner(input: {
1530
1686
  // Pinning to the *actual* last layer prevents all downward oscillation.
1531
1687
  // Only applied when calibrated (same session, per-session state) to avoid
1532
1688
  // affecting other sessions including worker sessions.
1533
- if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
1689
+ // Layer 4 (emergency) already blows the cache stickiness there just traps
1690
+ // the session at emergency permanently. Only apply stickiness for layers 1-3
1691
+ // where dropping back would bust a warm cache.
1692
+ if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
1534
1693
  effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer) as SafetyLayer;
1535
1694
  }
1536
1695
 
@@ -1608,6 +1767,7 @@ function transformInner(input: {
1608
1767
  usable,
1609
1768
  distilledBudget,
1610
1769
  rawBudget,
1770
+ refreshLtm: false,
1611
1771
  };
1612
1772
  }
1613
1773
 
@@ -1627,7 +1787,7 @@ function transformInner(input: {
1627
1787
 
1628
1788
  // Layer 1 uses the append-only cached prefix (Approach C) to keep the
1629
1789
  // distilled content byte-identical between distillation runs, preserving
1630
- // the prompt cache. Layers 2-4 already cause full cache invalidation via
1790
+ // the prompt cache. Layers 2+ already cause full cache invalidation via
1631
1791
  // tool stripping / message restructuring, so they use the non-cached path.
1632
1792
  const cached = sid
1633
1793
  ? distilledPrefixCached(distillations, sid, sessState)
@@ -1636,79 +1796,71 @@ function transformInner(input: {
1636
1796
  return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
1637
1797
  })();
1638
1798
 
1639
- // Layer 1: Normal budget allocation with lazy raw window eviction (Approach B).
1640
- // tryFitStable reuses the previous cutoff when it still fits, keeping the raw
1641
- // window byte-identical across turns for prompt caching. Only advances the
1642
- // cutoff when a genuinely oversized message forces eviction.
1643
- // Skipped when force-escalated to layer 2+ (previous attempt already failed at this level).
1644
- if (effectiveMinLayer <= 1) {
1645
- const layer1 = sid
1646
- ? tryFitStable({
1647
- messages: dedupMessages,
1648
- prefix: cached.messages,
1649
- prefixTokens: cached.tokens,
1650
- distilledBudget,
1651
- rawBudget,
1652
- sessionID: sid,
1653
- sessState,
1654
- })
1655
- : tryFit({
1656
- messages: dedupMessages,
1657
- prefix: cached.messages,
1658
- prefixTokens: cached.tokens,
1659
- distilledBudget,
1660
- rawBudget,
1661
- strip: "none",
1662
- });
1663
- if (fitsWithSafetyMargin(layer1)) {
1664
- if (cached.tokens === 0 && sid) {
1665
- urgentDistillationMap.set(sid, true);
1666
- }
1667
- return { ...layer1!, layer: 1, usable, distilledBudget, rawBudget };
1799
+ // --- Compression stages (layers 1-3) ---
1800
+ // Data-driven table replaces three hardcoded layer blocks. Each stage
1801
+ // escalates tool stripping and/or tightens distillation budgets.
1802
+ // Stage 0 (layer 1): stable window (Approach B), no stripping
1803
+ // Stage 1 (layer 2): strip old tool outputs, protect last 2 turns
1804
+ // Stage 2 (layer 3): strip ALL tool outputs, keep only 5 distillations
1805
+ for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
1806
+ const stageLayer = (s + 1) as SafetyLayer;
1807
+ if (effectiveMinLayer > stageLayer) continue;
1808
+
1809
+ const stage = COMPRESSION_STAGES[s];
1810
+ const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
1811
+ const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
1812
+
1813
+ // Determine prefix: if distLimit is finite, re-render with trimmed distillations.
1814
+ // Otherwise use the cached prefix (Approach C, byte-identical for cache).
1815
+ let stagePrefix = cached.messages;
1816
+ let stagePrefixTokens = cached.tokens;
1817
+ if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
1818
+ const trimmed = selectDistillations(distillations, stage.distLimit);
1819
+ stagePrefix = distilledPrefix(trimmed);
1820
+ stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
1668
1821
  }
1669
- }
1670
1822
 
1671
- // Layer 1 didn't fit (or was force-skipped) reset the raw window cache.
1672
- // Layers 2-4 use full scans and already break the prompt cache.
1673
- sessState.rawWindowCache = null;
1823
+ // Stage 0 (layer 1) uses tryFitStable for Approach B pin cache.
1824
+ // Higher stages reset the raw window cache and use plain tryFit.
1825
+ let result: Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null;
1826
+ if (stage.useStableWindow && sid) {
1827
+ result = tryFitStable({
1828
+ messages: dedupMessages,
1829
+ prefix: stagePrefix,
1830
+ prefixTokens: stagePrefixTokens,
1831
+ distilledBudget: stageDistBudget,
1832
+ rawBudget: stageRawBudget,
1833
+ sessionID: sid,
1834
+ sessState,
1835
+ });
1836
+ } else {
1837
+ // Reset raw window cache when leaving stage 0 — higher stages use full
1838
+ // scans and already break the prompt cache. Must fire even when stage 1
1839
+ // is skipped via effectiveMinLayer (e.g. forceMinLayer = 3).
1840
+ sessState.rawWindowCache = null;
1841
+ result = tryFit({
1842
+ messages: dedupMessages,
1843
+ prefix: stagePrefix,
1844
+ prefixTokens: stagePrefixTokens,
1845
+ distilledBudget: stageDistBudget,
1846
+ rawBudget: stageRawBudget,
1847
+ strip: stage.strip,
1848
+ protectedTurns: stage.protectedTurns,
1849
+ });
1850
+ }
1674
1851
 
1675
- // Layer 2: Strip tool outputs from older messages, keep last 2 turns
1676
- // Skipped when force-escalated to layer 3+.
1677
- if (effectiveMinLayer <= 2) {
1678
- const layer2 = tryFit({
1679
- messages: dedupMessages,
1680
- prefix: cached.messages,
1681
- prefixTokens: cached.tokens,
1682
- distilledBudget,
1683
- rawBudget: Math.floor(usable * 0.5), // give raw more room
1684
- strip: "old-tools",
1685
- protectedTurns: 2,
1686
- });
1687
- if (fitsWithSafetyMargin(layer2)) {
1688
- if (sid) urgentDistillationMap.set(sid, true);
1689
- return { ...layer2!, layer: 2, usable, distilledBudget, rawBudget };
1852
+ if (fitsWithSafetyMargin(result)) {
1853
+ // Trigger urgent distillation when: (a) higher stages always need it, or
1854
+ // (b) stage 0 with no distillations = first time in gradient mode.
1855
+ if (sid && (s > 0 || cached.tokens === 0)) {
1856
+ urgentDistillationMap.set(sid, true);
1857
+ }
1858
+ return { ...result!, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
1690
1859
  }
1691
1860
  }
1692
1861
 
1693
- // Layer 3: Strip ALL tool outputs, drop oldest distillations
1694
- const trimmedDistillations = distillations.slice(-5);
1695
- const trimmedPrefix = distilledPrefix(trimmedDistillations);
1696
- const trimmedPrefixTokens = trimmedPrefix.reduce(
1697
- (sum, m) => sum + estimateMessage(m),
1698
- 0,
1699
- );
1700
- const layer3 = tryFit({
1701
- messages: dedupMessages,
1702
- prefix: trimmedPrefix,
1703
- prefixTokens: trimmedPrefixTokens,
1704
- distilledBudget: Math.floor(usable * 0.15),
1705
- rawBudget: Math.floor(usable * 0.55),
1706
- strip: "all-tools",
1707
- });
1708
- if (fitsWithSafetyMargin(layer3)) {
1709
- if (sid) urgentDistillationMap.set(sid, true);
1710
- return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
1711
- }
1862
+ // All compression stages exhausted reset raw window cache before emergency.
1863
+ sessState.rawWindowCache = null;
1712
1864
 
1713
1865
  // Layer 4: Emergency — last 2 distillations + token-budget raw tail.
1714
1866
  // We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
@@ -1724,7 +1876,7 @@ function transformInner(input: {
1724
1876
  // and must always return. Remaining budget is filled backward with older
1725
1877
  // messages.
1726
1878
  if (sid) urgentDistillationMap.set(sid, true);
1727
- const nuclearDistillations = distillations.slice(-2);
1879
+ const nuclearDistillations = selectDistillations(distillations, 2);
1728
1880
  const nuclearPrefix = distilledPrefix(nuclearDistillations);
1729
1881
  const nuclearPrefixTokens = nuclearPrefix.reduce(
1730
1882
  (sum, m) => sum + estimateMessage(m),
@@ -1773,6 +1925,7 @@ function transformInner(input: {
1773
1925
  usable,
1774
1926
  distilledBudget,
1775
1927
  rawBudget,
1928
+ refreshLtm: true,
1776
1929
  };
1777
1930
  }
1778
1931
 
@@ -1893,7 +2046,7 @@ function tryFit(input: {
1893
2046
  rawBudget: number;
1894
2047
  strip: "none" | "old-tools" | "all-tools";
1895
2048
  protectedTurns?: number;
1896
- }): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
2049
+ }): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
1897
2050
  // If distilled prefix exceeds its budget, fail this layer
1898
2051
  if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
1899
2052
  return null;