@animus-labs/cortex 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +73 -0
  3. package/dist/budget-guard.d.ts +75 -0
  4. package/dist/budget-guard.d.ts.map +1 -0
  5. package/dist/budget-guard.js +142 -0
  6. package/dist/budget-guard.js.map +1 -0
  7. package/dist/compaction/compaction.d.ts +99 -0
  8. package/dist/compaction/compaction.d.ts.map +1 -0
  9. package/dist/compaction/compaction.js +302 -0
  10. package/dist/compaction/compaction.js.map +1 -0
  11. package/dist/compaction/failsafe.d.ts +57 -0
  12. package/dist/compaction/failsafe.d.ts.map +1 -0
  13. package/dist/compaction/failsafe.js +135 -0
  14. package/dist/compaction/failsafe.js.map +1 -0
  15. package/dist/compaction/index.d.ts +381 -0
  16. package/dist/compaction/index.d.ts.map +1 -0
  17. package/dist/compaction/index.js +979 -0
  18. package/dist/compaction/index.js.map +1 -0
  19. package/dist/compaction/microcompaction.d.ts +219 -0
  20. package/dist/compaction/microcompaction.d.ts.map +1 -0
  21. package/dist/compaction/microcompaction.js +536 -0
  22. package/dist/compaction/microcompaction.js.map +1 -0
  23. package/dist/compaction/observational/buffering.d.ts +225 -0
  24. package/dist/compaction/observational/buffering.d.ts.map +1 -0
  25. package/dist/compaction/observational/buffering.js +354 -0
  26. package/dist/compaction/observational/buffering.js.map +1 -0
  27. package/dist/compaction/observational/constants.d.ts +70 -0
  28. package/dist/compaction/observational/constants.d.ts.map +1 -0
  29. package/dist/compaction/observational/constants.js +507 -0
  30. package/dist/compaction/observational/constants.js.map +1 -0
  31. package/dist/compaction/observational/index.d.ts +219 -0
  32. package/dist/compaction/observational/index.d.ts.map +1 -0
  33. package/dist/compaction/observational/index.js +641 -0
  34. package/dist/compaction/observational/index.js.map +1 -0
  35. package/dist/compaction/observational/observer.d.ts +97 -0
  36. package/dist/compaction/observational/observer.d.ts.map +1 -0
  37. package/dist/compaction/observational/observer.js +424 -0
  38. package/dist/compaction/observational/observer.js.map +1 -0
  39. package/dist/compaction/observational/recall-tool.d.ts +27 -0
  40. package/dist/compaction/observational/recall-tool.d.ts.map +1 -0
  41. package/dist/compaction/observational/recall-tool.js +93 -0
  42. package/dist/compaction/observational/recall-tool.js.map +1 -0
  43. package/dist/compaction/observational/reflector.d.ts +94 -0
  44. package/dist/compaction/observational/reflector.d.ts.map +1 -0
  45. package/dist/compaction/observational/reflector.js +167 -0
  46. package/dist/compaction/observational/reflector.js.map +1 -0
  47. package/dist/compaction/observational/types.d.ts +271 -0
  48. package/dist/compaction/observational/types.d.ts.map +1 -0
  49. package/dist/compaction/observational/types.js +15 -0
  50. package/dist/compaction/observational/types.js.map +1 -0
  51. package/dist/context-manager.d.ts +134 -0
  52. package/dist/context-manager.d.ts.map +1 -0
  53. package/dist/context-manager.js +170 -0
  54. package/dist/context-manager.js.map +1 -0
  55. package/dist/cortex-agent.d.ts +1020 -0
  56. package/dist/cortex-agent.d.ts.map +1 -0
  57. package/dist/cortex-agent.js +3589 -0
  58. package/dist/cortex-agent.js.map +1 -0
  59. package/dist/error-classifier.d.ts +48 -0
  60. package/dist/error-classifier.d.ts.map +1 -0
  61. package/dist/error-classifier.js +152 -0
  62. package/dist/error-classifier.js.map +1 -0
  63. package/dist/event-bridge.d.ts +166 -0
  64. package/dist/event-bridge.d.ts.map +1 -0
  65. package/dist/event-bridge.js +381 -0
  66. package/dist/event-bridge.js.map +1 -0
  67. package/dist/index.d.ts +55 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +57 -0
  70. package/dist/index.js.map +1 -0
  71. package/dist/mcp-client.d.ts +119 -0
  72. package/dist/mcp-client.d.ts.map +1 -0
  73. package/dist/mcp-client.js +474 -0
  74. package/dist/mcp-client.js.map +1 -0
  75. package/dist/model-wrapper.d.ts +58 -0
  76. package/dist/model-wrapper.d.ts.map +1 -0
  77. package/dist/model-wrapper.js +86 -0
  78. package/dist/model-wrapper.js.map +1 -0
  79. package/dist/noop-logger.d.ts +4 -0
  80. package/dist/noop-logger.d.ts.map +1 -0
  81. package/dist/noop-logger.js +8 -0
  82. package/dist/noop-logger.js.map +1 -0
  83. package/dist/prompt-diagnostics.d.ts +47 -0
  84. package/dist/prompt-diagnostics.d.ts.map +1 -0
  85. package/dist/prompt-diagnostics.js +230 -0
  86. package/dist/prompt-diagnostics.js.map +1 -0
  87. package/dist/provider-manager.d.ts +224 -0
  88. package/dist/provider-manager.d.ts.map +1 -0
  89. package/dist/provider-manager.js +563 -0
  90. package/dist/provider-manager.js.map +1 -0
  91. package/dist/provider-registry.d.ts +115 -0
  92. package/dist/provider-registry.d.ts.map +1 -0
  93. package/dist/provider-registry.js +305 -0
  94. package/dist/provider-registry.js.map +1 -0
  95. package/dist/schema-converter.d.ts +20 -0
  96. package/dist/schema-converter.d.ts.map +1 -0
  97. package/dist/schema-converter.js +48 -0
  98. package/dist/schema-converter.js.map +1 -0
  99. package/dist/skill-preprocessor.d.ts +46 -0
  100. package/dist/skill-preprocessor.d.ts.map +1 -0
  101. package/dist/skill-preprocessor.js +237 -0
  102. package/dist/skill-preprocessor.js.map +1 -0
  103. package/dist/skill-registry.d.ts +107 -0
  104. package/dist/skill-registry.d.ts.map +1 -0
  105. package/dist/skill-registry.js +330 -0
  106. package/dist/skill-registry.js.map +1 -0
  107. package/dist/skill-tool.d.ts +54 -0
  108. package/dist/skill-tool.d.ts.map +1 -0
  109. package/dist/skill-tool.js +88 -0
  110. package/dist/skill-tool.js.map +1 -0
  111. package/dist/sub-agent-manager.d.ts +90 -0
  112. package/dist/sub-agent-manager.d.ts.map +1 -0
  113. package/dist/sub-agent-manager.js +192 -0
  114. package/dist/sub-agent-manager.js.map +1 -0
  115. package/dist/token-estimator.d.ts +23 -0
  116. package/dist/token-estimator.d.ts.map +1 -0
  117. package/dist/token-estimator.js +27 -0
  118. package/dist/token-estimator.js.map +1 -0
  119. package/dist/tool-contract.d.ts +68 -0
  120. package/dist/tool-contract.d.ts.map +1 -0
  121. package/dist/tool-contract.js +35 -0
  122. package/dist/tool-contract.js.map +1 -0
  123. package/dist/tool-result-persistence.d.ts +89 -0
  124. package/dist/tool-result-persistence.d.ts.map +1 -0
  125. package/dist/tool-result-persistence.js +152 -0
  126. package/dist/tool-result-persistence.js.map +1 -0
  127. package/dist/tools/bash/index.d.ts +71 -0
  128. package/dist/tools/bash/index.d.ts.map +1 -0
  129. package/dist/tools/bash/index.js +485 -0
  130. package/dist/tools/bash/index.js.map +1 -0
  131. package/dist/tools/bash/interactive.d.ts +47 -0
  132. package/dist/tools/bash/interactive.d.ts.map +1 -0
  133. package/dist/tools/bash/interactive.js +262 -0
  134. package/dist/tools/bash/interactive.js.map +1 -0
  135. package/dist/tools/bash/safety.d.ts +149 -0
  136. package/dist/tools/bash/safety.d.ts.map +1 -0
  137. package/dist/tools/bash/safety.js +1116 -0
  138. package/dist/tools/bash/safety.js.map +1 -0
  139. package/dist/tools/edit.d.ts +57 -0
  140. package/dist/tools/edit.d.ts.map +1 -0
  141. package/dist/tools/edit.js +310 -0
  142. package/dist/tools/edit.js.map +1 -0
  143. package/dist/tools/glob.d.ts +34 -0
  144. package/dist/tools/glob.d.ts.map +1 -0
  145. package/dist/tools/glob.js +268 -0
  146. package/dist/tools/glob.js.map +1 -0
  147. package/dist/tools/grep.d.ts +53 -0
  148. package/dist/tools/grep.d.ts.map +1 -0
  149. package/dist/tools/grep.js +673 -0
  150. package/dist/tools/grep.js.map +1 -0
  151. package/dist/tools/index.d.ts +62 -0
  152. package/dist/tools/index.d.ts.map +1 -0
  153. package/dist/tools/index.js +52 -0
  154. package/dist/tools/index.js.map +1 -0
  155. package/dist/tools/read.d.ts +43 -0
  156. package/dist/tools/read.d.ts.map +1 -0
  157. package/dist/tools/read.js +459 -0
  158. package/dist/tools/read.js.map +1 -0
  159. package/dist/tools/runtime.d.ts +62 -0
  160. package/dist/tools/runtime.d.ts.map +1 -0
  161. package/dist/tools/runtime.js +116 -0
  162. package/dist/tools/runtime.js.map +1 -0
  163. package/dist/tools/shared/cwd-tracker.d.ts +32 -0
  164. package/dist/tools/shared/cwd-tracker.d.ts.map +1 -0
  165. package/dist/tools/shared/cwd-tracker.js +44 -0
  166. package/dist/tools/shared/cwd-tracker.js.map +1 -0
  167. package/dist/tools/shared/edit-history.d.ts +55 -0
  168. package/dist/tools/shared/edit-history.d.ts.map +1 -0
  169. package/dist/tools/shared/edit-history.js +72 -0
  170. package/dist/tools/shared/edit-history.js.map +1 -0
  171. package/dist/tools/shared/edit-matcher.d.ts +83 -0
  172. package/dist/tools/shared/edit-matcher.d.ts.map +1 -0
  173. package/dist/tools/shared/edit-matcher.js +359 -0
  174. package/dist/tools/shared/edit-matcher.js.map +1 -0
  175. package/dist/tools/shared/file-mutation-lock.d.ts +22 -0
  176. package/dist/tools/shared/file-mutation-lock.d.ts.map +1 -0
  177. package/dist/tools/shared/file-mutation-lock.js +35 -0
  178. package/dist/tools/shared/file-mutation-lock.js.map +1 -0
  179. package/dist/tools/shared/gitignore.d.ts +17 -0
  180. package/dist/tools/shared/gitignore.d.ts.map +1 -0
  181. package/dist/tools/shared/gitignore.js +59 -0
  182. package/dist/tools/shared/gitignore.js.map +1 -0
  183. package/dist/tools/shared/pdf-extractor.d.ts +96 -0
  184. package/dist/tools/shared/pdf-extractor.d.ts.map +1 -0
  185. package/dist/tools/shared/pdf-extractor.js +196 -0
  186. package/dist/tools/shared/pdf-extractor.js.map +1 -0
  187. package/dist/tools/shared/read-registry.d.ts +66 -0
  188. package/dist/tools/shared/read-registry.d.ts.map +1 -0
  189. package/dist/tools/shared/read-registry.js +65 -0
  190. package/dist/tools/shared/read-registry.js.map +1 -0
  191. package/dist/tools/shared/safe-env.d.ts +18 -0
  192. package/dist/tools/shared/safe-env.d.ts.map +1 -0
  193. package/dist/tools/shared/safe-env.js +70 -0
  194. package/dist/tools/shared/safe-env.js.map +1 -0
  195. package/dist/tools/sub-agent.d.ts +91 -0
  196. package/dist/tools/sub-agent.d.ts.map +1 -0
  197. package/dist/tools/sub-agent.js +89 -0
  198. package/dist/tools/sub-agent.js.map +1 -0
  199. package/dist/tools/task-output.d.ts +38 -0
  200. package/dist/tools/task-output.d.ts.map +1 -0
  201. package/dist/tools/task-output.js +186 -0
  202. package/dist/tools/task-output.js.map +1 -0
  203. package/dist/tools/tool-search/index.d.ts +40 -0
  204. package/dist/tools/tool-search/index.d.ts.map +1 -0
  205. package/dist/tools/tool-search/index.js +110 -0
  206. package/dist/tools/tool-search/index.js.map +1 -0
  207. package/dist/tools/tool-search/registry.d.ts +82 -0
  208. package/dist/tools/tool-search/registry.d.ts.map +1 -0
  209. package/dist/tools/tool-search/registry.js +238 -0
  210. package/dist/tools/tool-search/registry.js.map +1 -0
  211. package/dist/tools/undo-edit.d.ts +51 -0
  212. package/dist/tools/undo-edit.d.ts.map +1 -0
  213. package/dist/tools/undo-edit.js +231 -0
  214. package/dist/tools/undo-edit.js.map +1 -0
  215. package/dist/tools/web-fetch/cache.d.ts +49 -0
  216. package/dist/tools/web-fetch/cache.d.ts.map +1 -0
  217. package/dist/tools/web-fetch/cache.js +89 -0
  218. package/dist/tools/web-fetch/cache.js.map +1 -0
  219. package/dist/tools/web-fetch/index.d.ts +53 -0
  220. package/dist/tools/web-fetch/index.d.ts.map +1 -0
  221. package/dist/tools/web-fetch/index.js +513 -0
  222. package/dist/tools/web-fetch/index.js.map +1 -0
  223. package/dist/tools/write.d.ts +59 -0
  224. package/dist/tools/write.d.ts.map +1 -0
  225. package/dist/tools/write.js +316 -0
  226. package/dist/tools/write.js.map +1 -0
  227. package/dist/types.d.ts +881 -0
  228. package/dist/types.d.ts.map +1 -0
  229. package/dist/types.js +16 -0
  230. package/dist/types.js.map +1 -0
  231. package/dist/working-tags.d.ts +44 -0
  232. package/dist/working-tags.d.ts.map +1 -0
  233. package/dist/working-tags.js +103 -0
  234. package/dist/working-tags.js.map +1 -0
  235. package/package.json +87 -0
  236. package/src/budget-guard.ts +170 -0
  237. package/src/compaction/compaction.ts +386 -0
  238. package/src/compaction/failsafe.ts +185 -0
  239. package/src/compaction/index.ts +1199 -0
  240. package/src/compaction/microcompaction.ts +709 -0
  241. package/src/compaction/observational/buffering.ts +430 -0
  242. package/src/compaction/observational/constants.ts +532 -0
  243. package/src/compaction/observational/index.ts +837 -0
  244. package/src/compaction/observational/observer.ts +510 -0
  245. package/src/compaction/observational/recall-tool.ts +130 -0
  246. package/src/compaction/observational/reflector.ts +221 -0
  247. package/src/compaction/observational/types.ts +343 -0
  248. package/src/context-manager.ts +237 -0
  249. package/src/cortex-agent.ts +4297 -0
  250. package/src/error-classifier.ts +199 -0
  251. package/src/event-bridge.ts +508 -0
  252. package/src/index.ts +292 -0
  253. package/src/mcp-client.ts +582 -0
  254. package/src/model-wrapper.ts +128 -0
  255. package/src/noop-logger.ts +9 -0
  256. package/src/prompt-diagnostics.ts +296 -0
  257. package/src/provider-manager.ts +823 -0
  258. package/src/provider-registry.ts +386 -0
  259. package/src/schema-converter.ts +51 -0
  260. package/src/skill-preprocessor.ts +314 -0
  261. package/src/skill-registry.ts +378 -0
  262. package/src/skill-tool.ts +130 -0
  263. package/src/sub-agent-manager.ts +236 -0
  264. package/src/token-estimator.ts +26 -0
  265. package/src/tool-contract.ts +113 -0
  266. package/src/tool-result-persistence.ts +197 -0
  267. package/src/tools/bash/index.ts +633 -0
  268. package/src/tools/bash/interactive.ts +302 -0
  269. package/src/tools/bash/safety.ts +1297 -0
  270. package/src/tools/edit.ts +422 -0
  271. package/src/tools/glob.ts +330 -0
  272. package/src/tools/grep.ts +819 -0
  273. package/src/tools/index.ts +110 -0
  274. package/src/tools/read.ts +580 -0
  275. package/src/tools/runtime.ts +173 -0
  276. package/src/tools/shared/cwd-tracker.ts +50 -0
  277. package/src/tools/shared/edit-history.ts +96 -0
  278. package/src/tools/shared/edit-matcher.ts +457 -0
  279. package/src/tools/shared/file-mutation-lock.ts +40 -0
  280. package/src/tools/shared/gitignore.ts +61 -0
  281. package/src/tools/shared/pdf-extractor.ts +290 -0
  282. package/src/tools/shared/read-registry.ts +93 -0
  283. package/src/tools/shared/safe-env.ts +82 -0
  284. package/src/tools/sub-agent.ts +171 -0
  285. package/src/tools/task-output.ts +236 -0
  286. package/src/tools/tool-search/index.ts +167 -0
  287. package/src/tools/tool-search/registry.ts +278 -0
  288. package/src/tools/undo-edit.ts +314 -0
  289. package/src/tools/web-fetch/cache.ts +112 -0
  290. package/src/tools/web-fetch/index.ts +604 -0
  291. package/src/tools/write.ts +385 -0
  292. package/src/types.ts +1057 -0
  293. package/src/working-tags.ts +118 -0
@@ -0,0 +1,1199 @@
1
+ /**
2
+ * Compaction composition: wires all three layers into the transformContext chain.
3
+ *
4
+ * Layer 1 (Microcompaction): tool result trimming at threshold crossings
5
+ * Layer 2 (Compaction): conversation summarization via LLM
6
+ * Layer 3 (Failsafe): emergency truncation, purely mechanical
7
+ *
8
+ * All three layers run inside transformContext, which fires before every LLM
9
+ * call. Compaction is fully self-contained within Cortex; no external calls
10
+ * from the backend are needed to trigger it. Layer 2 fires when token usage
11
+ * exceeds 70% of the context window and a completeFn + source accessors are
12
+ * provided. Layer 3 fires whenever tokens exceed 90% of the model's context
13
+ * window.
14
+ *
15
+ * References:
16
+ * - compaction-strategy.md
17
+ * - phase-5-compaction.md (5.5)
18
+ */
19
+
20
+ import type { AgentMessage, AgentContext } from '../context-manager.js';
21
+ import type {
22
+ CortexLogger,
23
+ CortexCompactionConfig,
24
+ AdaptiveThresholdConfig,
25
+ CompactionResult,
26
+ CompactionTarget,
27
+ CompactionDegradedInfo,
28
+ CompactionExhaustedInfo,
29
+ } from '../types.js';
30
+ import { NOOP_LOGGER } from '../noop-logger.js';
31
+ import { estimateTokens } from '../token-estimator.js';
32
+ import { MicrocompactionEngine, MICROCOMPACTION_DEFAULTS, extractTextContent, isToolResultMessage, capToolResult, extractToolName, getToolCategory, applyBookend } from './microcompaction.js';
33
+ import {
34
+ runCompaction,
35
+ shouldCompact,
36
+ COMPACTION_DEFAULTS,
37
+ } from './compaction.js';
38
+ import type { CompleteFn, BeforeCompactionHandler, PostCompactionHandler, CompactionErrorHandler } from './compaction.js';
39
+ import {
40
+ emergencyTruncate,
41
+ shouldTruncate,
42
+ FAILSAFE_DEFAULTS,
43
+ } from './failsafe.js';
44
+ import { ObservationalMemoryEngine } from './observational/index.js';
45
+ import type { ObservationalMemoryConfig, ObservationalMemoryState, ObservationEvent, ReflectionEvent } from './observational/types.js';
46
+ import { PROVIDER_CACHE_CONFIG, type CacheRetention } from '../provider-registry.js';
47
+
48
+ // ---------------------------------------------------------------------------
49
+ // Re-exports for consumer convenience
50
+ // ---------------------------------------------------------------------------
51
+
52
+ export { MicrocompactionEngine, capToolResult } from './microcompaction.js';
53
+ export type { TrimAction, TrimState } from './microcompaction.js';
54
+ export { runCompaction, shouldCompact, partitionHistory, buildSummaryMessage } from './compaction.js';
55
+ export type { CompleteFn } from './compaction.js';
56
+ export { emergencyTruncate, shouldTruncate, isContextOverflow } from './failsafe.js';
57
+ export type { FailsafeTruncationResult } from './failsafe.js';
58
+ export { ObservationalMemoryEngine } from './observational/index.js';
59
+ export type { ObservationalMemoryConfig, ObservationalMemoryState, ObservationChunk, ObservationEvent, ReflectionEvent, RecallResult, RecallConfig } from './observational/types.js';
60
+ export { createRecallTool } from './observational/recall-tool.js';
61
+ // computeAdaptiveThreshold is defined below in this file and exported at the declaration site
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Default config
65
+ // ---------------------------------------------------------------------------
66
+
67
+ export const ADAPTIVE_DEFAULTS: AdaptiveThresholdConfig = {
68
+ enabled: true,
69
+ recentWindowMs: 300_000, // 5 minutes
70
+ idleWindowMs: 1_800_000, // 30 minutes
71
+ recentReduction: 0.0, // no change when recent
72
+ moderateReduction: 0.10, // lower threshold by 0.10 when moderately idle
73
+ idleReduction: 0.20, // lower threshold by 0.20 when fully idle
74
+ };
75
+
76
+ export const DEFAULT_COMPACTION_CONFIG: CortexCompactionConfig = {
77
+ microcompaction: MICROCOMPACTION_DEFAULTS,
78
+ compaction: COMPACTION_DEFAULTS,
79
+ failsafe: FAILSAFE_DEFAULTS,
80
+ adaptive: ADAPTIVE_DEFAULTS,
81
+ };
82
+
83
+ /**
84
+ * Build a full compaction config from partial overrides.
85
+ */
86
+ export function buildCompactionConfig(
87
+ partial?: Partial<CortexCompactionConfig>,
88
+ ): CortexCompactionConfig {
89
+ if (!partial) return DEFAULT_COMPACTION_CONFIG;
90
+
91
+ const config: CortexCompactionConfig = {
92
+ microcompaction: {
93
+ ...MICROCOMPACTION_DEFAULTS,
94
+ ...partial.microcompaction,
95
+ },
96
+ compaction: {
97
+ ...COMPACTION_DEFAULTS,
98
+ ...partial.compaction,
99
+ },
100
+ failsafe: {
101
+ ...FAILSAFE_DEFAULTS,
102
+ ...partial.failsafe,
103
+ },
104
+ adaptive: {
105
+ ...ADAPTIVE_DEFAULTS,
106
+ ...partial.adaptive,
107
+ },
108
+ };
109
+
110
+ if (partial.strategy !== undefined) {
111
+ config.strategy = partial.strategy;
112
+ }
113
+
114
+ if (partial.observational !== undefined) {
115
+ config.observational = partial.observational;
116
+ }
117
+
118
+ return config;
119
+ }
120
+
121
+ // ---------------------------------------------------------------------------
122
+ // Adaptive threshold calculation
123
+ // ---------------------------------------------------------------------------
124
+
125
+ /**
126
+ * Compute the effective Layer 2 compaction threshold adjusted by interaction
127
+ * recency. When the user has not interacted recently, the threshold is lowered
128
+ * (i.e., compaction fires sooner), reducing token costs for idle sessions.
129
+ *
130
+ * @param baseThreshold - The configured Layer 2 threshold (e.g., 0.70)
131
+ * @param adaptiveConfig - Adaptive threshold configuration
132
+ * @param lastInteractionTime - Timestamp (ms) of the last user interaction, or null if never
133
+ * @param now - Current timestamp (ms), injectable for testing
134
+ * @returns The adjusted threshold (always >= 0)
135
+ */
136
+ export function computeAdaptiveThreshold(
137
+ baseThreshold: number,
138
+ adaptiveConfig: AdaptiveThresholdConfig,
139
+ lastInteractionTime: number | null,
140
+ now: number = Date.now(),
141
+ ): number {
142
+ if (!adaptiveConfig.enabled) {
143
+ return baseThreshold;
144
+ }
145
+
146
+ // No interaction recorded yet: treat as fully idle
147
+ if (lastInteractionTime === null) {
148
+ return Math.max(0, baseThreshold - adaptiveConfig.idleReduction);
149
+ }
150
+
151
+ const elapsed = now - lastInteractionTime;
152
+
153
+ if (elapsed < adaptiveConfig.recentWindowMs) {
154
+ // Recent interaction: apply recentReduction (default 0, no change)
155
+ return Math.max(0, baseThreshold - adaptiveConfig.recentReduction);
156
+ }
157
+
158
+ if (elapsed < adaptiveConfig.idleWindowMs) {
159
+ // Moderate idle: apply moderateReduction
160
+ return Math.max(0, baseThreshold - adaptiveConfig.moderateReduction);
161
+ }
162
+
163
+ // Fully idle: apply idleReduction
164
+ return Math.max(0, baseThreshold - adaptiveConfig.idleReduction);
165
+ }
166
+
167
+ // ---------------------------------------------------------------------------
168
+ // CompactionManager
169
+ // ---------------------------------------------------------------------------
170
+
171
+ /**
172
+ * CompactionManager orchestrates all three compaction layers.
173
+ *
174
+ * It is stateful: it tracks the current token count and the microcompaction
175
+ * cache. The CortexAgent creates one instance and delegates all compaction
176
+ * decisions to it. Compaction is fully autonomous: all three layers run
177
+ * inside applyInTransformContext(), which fires before every LLM call.
178
+ */
179
+ export class CompactionManager {
180
+ private readonly config: CortexCompactionConfig;
181
+ private readonly microcompaction: MicrocompactionEngine;
182
+ private readonly slotCount: number;
183
+ private readonly _strategy: 'observational' | 'classic';
184
+ private observationalEngine: ObservationalMemoryEngine | null = null;
185
+
186
+ /** Post-hoc current-context token count, updated after each parent LLM call. */
187
+ private _currentContextTokenCount = 0;
188
+
189
+ /** Context budget for Layer 1/2 compaction decisions (may be artificially limited). */
190
+ private _contextWindow = 0;
191
+
192
+ /** Actual model context window for Layer 3 failsafe (never artificially limited). */
193
+ private _modelContextWindow = 0;
194
+
195
+ /**
196
+ * Timestamp (ms) of the last user interaction. Used by the adaptive
197
+ * threshold system to decide how aggressively to compact. Updated by
198
+ * the consumer (backend) when a message-triggered tick fires.
199
+ * Null means no interaction has been recorded yet.
200
+ */
201
+ private _lastInteractionTime: number | null = null;
202
+
203
+ /**
204
+ * Timestamp (ms) of the last LLM call. Used by L1 to decide whether the
205
+ * prompt cache has gone cold. Updated automatically in
206
+ * updateCurrentContextTokenCount() (which fires after every LLM response).
207
+ * Null means no LLM call has been recorded yet (treated as cold).
208
+ */
209
+ private _lastLlmCallTimestamp: number | null = null;
210
+
211
+ /**
212
+ * Effective cache TTL (ms) for the current provider + cache retention.
213
+ * Zero means caching is unsupported or disabled, in which case L1 treats
214
+ * the cache as perpetually cold (trim freely). Set via setCacheInfo().
215
+ */
216
+ private _providerCacheTtlMs = 0;
217
+
218
+ /** Consumer handlers for compaction lifecycle events. */
219
+ private beforeCompactionHandlers: BeforeCompactionHandler[] = [];
220
+ private postCompactionHandlers: PostCompactionHandler[] = [];
221
+ private compactionErrorHandlers: CompactionErrorHandler[] = [];
222
+ private compactionResultHandlers: Array<(result: CompactionResult) => void> = [];
223
+ private compactionDegradedHandlers: Array<(info: CompactionDegradedInfo) => void> = [];
224
+ private compactionExhaustedHandlers: Array<(info: CompactionExhaustedInfo) => void> = [];
225
+
226
+ /** Consecutive Layer 2 failure count for circuit breaker. Reset on success. */
227
+ private _consecutiveLayer2Failures = 0;
228
+
229
+ /** LLM completion function, set by CortexAgent. */
230
+ private completeFn: CompleteFn | null = null;
231
+
232
+ /** Logger for compaction diagnostics. */
233
+ private logger: CortexLogger = NOOP_LOGGER;
234
+
235
+ constructor(
236
+ config: CortexCompactionConfig,
237
+ slotCount: number,
238
+ ) {
239
+ this.config = config;
240
+ this.slotCount = slotCount;
241
+ this.microcompaction = new MicrocompactionEngine(config.microcompaction);
242
+ this._strategy = config.strategy ?? 'observational';
243
+
244
+ if (this._strategy === 'observational') {
245
+ this.observationalEngine = new ObservationalMemoryEngine(
246
+ config.observational ?? {},
247
+ slotCount - 1,
248
+ );
249
+ }
250
+ }
251
+
252
+ // -----------------------------------------------------------------------
253
+ // Configuration
254
+ // -----------------------------------------------------------------------
255
+
256
+ /** Get the compaction strategy. */
257
+ get strategy(): 'observational' | 'classic' { return this._strategy; }
258
+
259
+ /**
260
+ * Set the context budget (the effective limit for Layer 1/2 compaction).
261
+ * This may be smaller than the model's actual context window when a
262
+ * user-configured limit is applied.
263
+ */
264
+ setContextWindow(contextWindow: number): void {
265
+ this._contextWindow = contextWindow;
266
+ this.observationalEngine?.setContextWindow(contextWindow);
267
+ }
268
+
269
+ /**
270
+ * Set the model's actual context window (for Layer 3 failsafe only).
271
+ * Layer 3 emergency truncation uses this to avoid dropping messages
272
+ * when the model still has capacity, even if the user-configured
273
+ * budget has been exceeded.
274
+ *
275
+ * Also used as a proxy for the utility model context window until the
276
+ * actual utility model window is set via setUtilityModelContextWindow().
277
+ */
278
+ setModelContextWindow(modelContextWindow: number): void {
279
+ this._modelContextWindow = modelContextWindow;
280
+ this.observationalEngine?.setUtilityModelContextWindow(modelContextWindow);
281
+ }
282
+
283
+ /**
284
+ * Set the LLM completion function for Layer 2 summarization.
285
+ */
286
+ setCompleteFn(fn: CompleteFn): void {
287
+ this.completeFn = fn;
288
+ }
289
+
290
+ /**
291
+ * Set the LLM completion function for observational memory (utility model).
292
+ */
293
+ setObservationalCompleteFn(fn: CompleteFn): void {
294
+ this.observationalEngine?.setCompleteFn(fn);
295
+ }
296
+
297
+ /**
298
+ * Update the utility model context window for observer/reflector clamps.
299
+ */
300
+ setUtilityModelContextWindow(utilityModelContextWindow: number): void {
301
+ this.observationalEngine?.setUtilityModelContextWindow(utilityModelContextWindow);
302
+ }
303
+
304
+ /**
305
+ * Set a logger for compaction diagnostics.
306
+ */
307
+ setLogger(logger: CortexLogger): void {
308
+ this.logger = logger;
309
+ this.observationalEngine?.setLogger(logger);
310
+ }
311
+
312
+ /**
313
+ * Signal when the user last interacted with the system.
314
+ * The consumer (backend) calls this during GATHER when a message-triggered
315
+ * tick fires. For interval ticks, it is not called, so the timestamp
316
+ * naturally ages.
317
+ */
318
+ setLastInteractionTime(timestamp: number): void {
319
+ this._lastInteractionTime = timestamp;
320
+ }
321
+
322
+ /**
323
+ * Get the timestamp of the last user interaction, or null if none recorded.
324
+ */
325
+ get lastInteractionTime(): number | null {
326
+ return this._lastInteractionTime;
327
+ }
328
+
329
+ /**
330
+ * Set the active provider and cache retention. Resolves the effective
331
+ * cache TTL from PROVIDER_CACHE_CONFIG and stores it for L1's cache-aware
332
+ * gating. Called by CortexAgent at construction, on provider changes, and
333
+ * on cache retention changes.
334
+ *
335
+ * @param provider - The active provider name (e.g., "anthropic", "openai")
336
+ * @param cacheRetention - The configured cache retention ('none' | 'short' | 'long')
337
+ */
338
+ setCacheInfo(provider: string, cacheRetention: CacheRetention): void {
339
+ const cfg = PROVIDER_CACHE_CONFIG[provider];
340
+ if (!cfg || !cfg.supported || cacheRetention === 'none') {
341
+ this._providerCacheTtlMs = 0;
342
+ return;
343
+ }
344
+ this._providerCacheTtlMs = cacheRetention === 'long' ? cfg.longTtlMs : cfg.shortTtlMs;
345
+ }
346
+
347
+ /**
348
+ * Check whether the prompt cache has gone cold (or is unused).
349
+ *
350
+ * Returns true when:
351
+ * - Caching is unsupported / disabled (TTL <= 0), OR
352
+ * - No LLM call has been recorded yet, OR
353
+ * - The elapsed time since the last LLM call >= the cache TTL.
354
+ *
355
+ * @param now - Current timestamp (ms), injectable for testing
356
+ */
357
+ isCacheCold(now: number = Date.now()): boolean {
358
+ if (this._providerCacheTtlMs <= 0) return true;
359
+ if (this._lastLlmCallTimestamp === null) return true;
360
+ return (now - this._lastLlmCallTimestamp) >= this._providerCacheTtlMs;
361
+ }
362
+
363
+ /**
364
+ * Get the effective cache TTL (ms) for the current provider + retention.
365
+ * Zero means caching is unsupported or disabled.
366
+ */
367
+ get providerCacheTtlMs(): number {
368
+ return this._providerCacheTtlMs;
369
+ }
370
+
371
+ /**
372
+ * Get the timestamp of the last LLM call, or null if none recorded.
373
+ */
374
+ get lastLlmCallTimestamp(): number | null {
375
+ return this._lastLlmCallTimestamp;
376
+ }
377
+
378
+ /**
379
+ * Compute the effective Layer 2 compaction threshold, adjusted for
380
+ * interaction recency when adaptive thresholds are enabled.
381
+ *
382
+ * @param now - Current timestamp (ms), injectable for testing
383
+ */
384
+ getEffectiveThreshold(now?: number): number {
385
+ return computeAdaptiveThreshold(
386
+ this.config.compaction.threshold,
387
+ this.config.adaptive,
388
+ this._lastInteractionTime,
389
+ now,
390
+ );
391
+ }
392
+
393
+ // -----------------------------------------------------------------------
394
+ // Token Tracking
395
+ // -----------------------------------------------------------------------
396
+
397
+ /**
398
+ * Update the post-hoc current-context token count from LLM usage data.
399
+ */
400
+ updateCurrentContextTokenCount(inputTokens: number): void {
401
+ const prev = this._currentContextTokenCount;
402
+ this._currentContextTokenCount = inputTokens;
403
+ // Track the LLM call timestamp so L1 can decide whether the prompt cache
404
+ // is still warm. updateCurrentContextTokenCount() is called after every
405
+ // parent LLM call, so this is the natural point to record it.
406
+ this._lastLlmCallTimestamp = Date.now();
407
+ this.logger.debug('[Compaction] updateCurrentContextTokenCount', { prev, inputTokens });
408
+ // Log significant drops to help diagnose token count display issues
409
+ if (prev > 0 && inputTokens < prev * 0.5) {
410
+ this.logger.warn('[Compaction] currentContextTokenCount dropped >50%', {
411
+ prev,
412
+ inputTokens,
413
+ drop: `${((1 - inputTokens / prev) * 100).toFixed(1)}%`,
414
+ });
415
+ }
416
+ }
417
+
418
+ /**
419
+ * Get the post-hoc current-context token count from the most recent parent turn.
420
+ */
421
+ get currentContextTokenCount(): number {
422
+ return this._currentContextTokenCount;
423
+ }
424
+
425
+ /**
426
+ * Get the context budget (effective limit for Layer 1/2).
427
+ */
428
+ get contextWindow(): number {
429
+ return this._contextWindow;
430
+ }
431
+
432
+ /**
433
+ * Get the model's actual context window (for Layer 3 failsafe).
434
+ */
435
+ get modelContextWindow(): number {
436
+ return this._modelContextWindow;
437
+ }
438
+
439
+ /**
440
+ * Get the current context usage ratio.
441
+ */
442
+ get usageRatio(): number {
443
+ if (this._contextWindow <= 0) return 0;
444
+ return this._currentContextTokenCount / this._contextWindow;
445
+ }
446
+
447
+ /**
448
+ * Estimate current context tokens from a transformed AgentContext snapshot.
449
+ *
450
+ * Returns the larger of:
451
+ * - the heuristic estimate of the provided context snapshot
452
+ * - the post-hoc token count from the most recent parent turn
453
+ *
454
+ * This mirrors the compaction decision logic so consumers can reason about
455
+ * context pressure using the same semantics Cortex uses internally.
456
+ */
457
+ estimateCurrentContextTokens(context: AgentContext): number {
458
+ const estimated = this.estimateContextTokens(context);
459
+ return this._currentContextTokenCount > 0
460
+ ? Math.max(this._currentContextTokenCount, estimated)
461
+ : estimated;
462
+ }
463
+
464
+ // -----------------------------------------------------------------------
465
+ // Event Handlers
466
+ // -----------------------------------------------------------------------
467
+
468
+ /**
469
+ * Register a handler called before compaction starts (awaited).
470
+ */
471
+ onBeforeCompaction(handler: BeforeCompactionHandler): void {
472
+ this.beforeCompactionHandlers.push(handler);
473
+ }
474
+
475
+ /**
476
+ * Register a handler called after compaction completes.
477
+ */
478
+ onPostCompaction(handler: PostCompactionHandler): void {
479
+ this.postCompactionHandlers.push(handler);
480
+ }
481
+
482
+ /**
483
+ * Register a handler called if compaction fails.
484
+ */
485
+ onCompactionError(handler: CompactionErrorHandler): void {
486
+ this.compactionErrorHandlers.push(handler);
487
+ }
488
+
489
+ /**
490
+ * Register a handler that receives the CompactionResult (for CortexAgent event emission).
491
+ */
492
+ onCompactionResult(handler: (result: CompactionResult) => void): void {
493
+ this.compactionResultHandlers.push(handler);
494
+ }
495
+
496
+ /**
497
+ * Register a handler called when Layer 2 failed and Layer 3 was used as fallback.
498
+ */
499
+ onCompactionDegraded(handler: (info: CompactionDegradedInfo) => void): void {
500
+ this.compactionDegradedHandlers.push(handler);
501
+ }
502
+
503
+ /**
504
+ * Register a handler called when all compaction layers have failed.
505
+ */
506
+ onCompactionExhausted(handler: (info: CompactionExhaustedInfo) => void): void {
507
+ this.compactionExhaustedHandlers.push(handler);
508
+ }
509
+
510
+ // -----------------------------------------------------------------------
511
+ // Observational Memory
512
+ // -----------------------------------------------------------------------
513
+
514
+ /**
515
+ * Called at turn_end to trigger async buffer checks.
516
+ */
517
+ onTurnEnd(totalTokens: number, contextWindow: number, messages: AgentMessage[], slotCount: number): void {
518
+ this.observationalEngine?.onTurnEnd(totalTokens, contextWindow, messages, slotCount);
519
+ }
520
+
521
+ /**
522
+ * Register observation event handler.
523
+ */
524
+ onObservation(handler: (event: ObservationEvent) => void): void {
525
+ this.observationalEngine?.onObservation(handler);
526
+ }
527
+
528
+ /**
529
+ * Register reflection event handler.
530
+ */
531
+ onReflection(handler: (event: ReflectionEvent) => void): void {
532
+ this.observationalEngine?.onReflection(handler);
533
+ }
534
+
535
+ /**
536
+ * Get observational memory state for persistence.
537
+ */
538
+ getObservationalMemoryState(): ObservationalMemoryState | null {
539
+ return this.observationalEngine?.getState() ?? null;
540
+ }
541
+
542
+ /**
543
+ * Restore observational memory state from a previous session.
544
+ */
545
+ restoreObservationalMemoryState(state: ObservationalMemoryState): void {
546
+ this.observationalEngine?.restoreState(state);
547
+ }
548
+
549
+ /**
550
+ * Force a synchronous observation cycle.
551
+ */
552
+ async triggerObservation(messages: AgentMessage[], slotCount: number): Promise<void> {
553
+ await this.observationalEngine?.triggerObservation(messages, slotCount);
554
+ }
555
+
556
+ /**
557
+ * Kick off an initial async buffer on unobserved messages.
558
+ * Called during session resumption for a head start before the first prompt().
559
+ */
560
+ kickstartBuffer(messages: AgentMessage[], slotCount: number): void {
561
+ this.observationalEngine?.kickstartBuffer(messages, slotCount);
562
+ }
563
+
564
+ /**
565
+ * Get the observation slot content string (for ContextManager.setSlot).
566
+ */
567
+ getObservationSlotContent(): string {
568
+ return this.observationalEngine?.getSlotContent() ?? '';
569
+ }
570
+
571
+ /**
572
+ * Whether observations have been produced (non-empty observation text).
573
+ */
574
+ hasObservations(): boolean {
575
+ return (this.observationalEngine?.getObservations() ?? '').length > 0;
576
+ }
577
+
578
+ /**
579
+ * Whether the recall tool should be registered.
580
+ */
581
+ hasRecallTool(): boolean {
582
+ return this.observationalEngine?.hasRecall() ?? false;
583
+ }
584
+
585
+ /**
586
+ * Get the recall config if available.
587
+ */
588
+ getRecallConfig() {
589
+ return this.observationalEngine?.getRecallConfig();
590
+ }
591
+
592
+ /**
593
+ * Current token count of activated observations only.
594
+ * Returns 0 when not using the observational strategy.
595
+ */
596
+ getObservationTokenCount(): number {
597
+ return this.observationalEngine?.getObservationTokenCount() ?? 0;
598
+ }
599
+
600
+
601
+ /**
602
+ * Whether the observer or reflector is currently running in the background.
603
+ * Returns false when not using the observational strategy.
604
+ */
605
+ isObservationalProcessing(): boolean {
606
+ return this.observationalEngine?.isProcessing() ?? false;
607
+ }
608
+
609
+ /**
610
+ * Whether the observer specifically is in-flight.
611
+ */
612
+ isObserverInFlight(): boolean {
613
+ return this.observationalEngine?.isObserverInFlight() ?? false;
614
+ }
615
+
616
+ /**
617
+ * Whether the reflector specifically is in-flight.
618
+ */
619
+ isReflectorInFlight(): boolean {
620
+ return this.observationalEngine?.isReflectorInFlight() ?? false;
621
+ }
622
+
623
+ // -----------------------------------------------------------------------
624
+ // Insertion-time cap
625
+ // -----------------------------------------------------------------------
626
+
627
+ /**
628
+ * Cap a tool result at insertion time (before it enters conversation history).
629
+ */
630
+ capToolResult(content: string): string {
631
+ return this.microcompaction.capAtInsertion(content);
632
+ }
633
+
634
+ /**
635
+ * Apply insertion-time cap to all uncapped tool results in the source
636
+ * messages array (mutates in place).
637
+ *
638
+ * Called from the transformContext hook on `agent.state.messages` so that
639
+ * Tier 1 capping is automatically applied when tool results enter
640
+ * conversation history through pi-agent-core's internal tool execution
641
+ * loop. The cap is applied at most once per tool result part; already
642
+ * capped content (containing the insertion marker) is skipped.
643
+ *
644
+ * @param messages - The source messages array (mutated in place)
645
+ * @param slotCount - Number of slot messages to skip at the start
646
+ */
647
+ async applyInsertionCap(messages: AgentMessage[], slotCount: number): Promise<void> {
648
+ const config = this.microcompaction.getConfig();
649
+
650
+ // Phase 1: Individual per-result cap
651
+ for (let i = slotCount; i < messages.length; i++) {
652
+ const msg = messages[i]!;
653
+ if (!isToolResultMessage(msg)) continue;
654
+ if (typeof msg.content === 'string') continue;
655
+
656
+ let modified = false;
657
+ const newContent = msg.content.map(part => {
658
+ const isLegacyToolResult = part.type === 'tool_result' && typeof part.text === 'string';
659
+ const isRuntimeToolResultText = msg.role === 'toolResult' && part.type === 'text' && typeof part.text === 'string';
660
+ if (!isLegacyToolResult && !isRuntimeToolResultText) {
661
+ return part;
662
+ }
663
+ // Skip already-capped content
664
+ if ((part.text as string).includes('tokens trimmed at insertion')) {
665
+ return part;
666
+ }
667
+ const capped = capToolResult(part.text as string, config);
668
+ if (capped !== part.text) {
669
+ modified = true;
670
+ return { ...part, text: capped };
671
+ }
672
+ return part;
673
+ });
674
+
675
+ if (modified) {
676
+ messages[i] = { ...msg, content: newContent };
677
+ }
678
+ }
679
+
680
+ // Phase 2: Aggregate per-message budget
681
+ const aggregateLimit = config.maxAggregateTurnTokens ?? 150_000;
682
+ if (aggregateLimit <= 0) return;
683
+
684
+ for (let i = slotCount; i < messages.length; i++) {
685
+ const msg = messages[i]!;
686
+ if (!isToolResultMessage(msg)) continue;
687
+ if (typeof msg.content === 'string') continue;
688
+
689
+ const parts = msg.content;
690
+ const partInfos: Array<{ index: number; tokens: number; text: string; toolName: string }> = [];
691
+ let totalTokens = 0;
692
+
693
+ for (let p = 0; p < parts.length; p++) {
694
+ const part = parts[p]!;
695
+ const isLegacyToolResult = part.type === 'tool_result' && typeof part.text === 'string';
696
+ const isRuntimeToolResultText = msg.role === 'toolResult' && part.type === 'text' && typeof part.text === 'string';
697
+ if (!isLegacyToolResult && !isRuntimeToolResultText) {
698
+ continue;
699
+ }
700
+
701
+ const text = part.text as string;
702
+ const tokens = estimateTokens(text);
703
+ const name = (typeof (part as Record<string, unknown>)['name'] === 'string'
704
+ ? (part as Record<string, unknown>)['name'] as string
705
+ : null) ?? extractToolName(msg) ?? 'unknown';
706
+ partInfos.push({ index: p, tokens, text, toolName: name });
707
+ totalTokens += tokens;
708
+ }
709
+
710
+ if (totalTokens <= aggregateLimit) continue;
711
+
712
+ const sorted = [...partInfos].sort((a, b) => b.tokens - a.tokens);
713
+ const newParts = [...parts];
714
+ let currentTotal = totalTokens;
715
+
716
+ for (const info of sorted) {
717
+ if (currentTotal <= aggregateLimit) break;
718
+ if (info.tokens <= config.maxResultTokens / 2) break;
719
+
720
+ const part = newParts[info.index]!;
721
+ let replacement: string;
722
+
723
+ if (config.persistResult) {
724
+ const category = getToolCategory(info.toolName, config.toolCategories);
725
+ try {
726
+ const path = await config.persistResult(info.text, {
727
+ toolName: info.toolName,
728
+ messageIndex: i,
729
+ category: category ?? 'rereadable',
730
+ });
731
+ const bookended = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
732
+ replacement = `${bookended}\n\n[Full content persisted to ${path} -- use Read to access]`;
733
+ } catch {
734
+ replacement = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
735
+ }
736
+ } else {
737
+ replacement = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
738
+ }
739
+
740
+ const newTokens = estimateTokens(replacement);
741
+ currentTotal = currentTotal - info.tokens + newTokens;
742
+ newParts[info.index] = { ...part, text: replacement };
743
+ }
744
+
745
+ messages[i] = { ...msg, content: newParts };
746
+ }
747
+ }
748
+
749
+ // -----------------------------------------------------------------------
750
+ // transformContext hook
751
+ // -----------------------------------------------------------------------
752
+
753
+ /**
754
+ * Apply compaction layers to the context in transformContext.
755
+ *
756
+ * This is the main entry point called from CortexAgent.getTransformContextHook().
757
+ * It is fully self-contained: all three compaction layers are integrated here,
758
+ * triggered autonomously based on token thresholds. No external calls from
759
+ * the backend are needed to trigger compaction.
760
+ *
761
+ * Execution order:
762
+ * 1. Layer 1 (microcompaction): tool result trimming at threshold crossings
763
+ * 2. Layer 2 (summarization): if tokens exceed 70% after Layer 1, run LLM
764
+ * summarization on agent.state.messages (the original transcript), then
765
+ * rebuild context from the updated messages
766
+ * 3. Layer 3 (failsafe): if tokens still exceed 90% after Layers 1-2,
767
+ * emergency truncation drops the oldest turns
768
+ *
769
+ * @param context - The AgentContext from transformContext
770
+ * @param getHistory - Function to get conversation history from the context
771
+ * @param setHistory - Function to set conversation history in the context
772
+ * @param getSourceHistory - Function to get the original transcript history (agent.state.messages post-slot)
773
+ * @param setSourceHistory - Function to replace the original transcript history (agent.state.messages)
774
+ * @returns Modified context with compacted history
775
+ */
776
+ async applyInTransformContext(
777
+ context: AgentContext,
778
+ getHistory: (ctx: AgentContext) => AgentMessage[],
779
+ setHistory: (ctx: AgentContext, history: AgentMessage[]) => AgentContext,
780
+ getSourceHistory?: () => AgentMessage[],
781
+ setSourceHistory?: (history: AgentMessage[]) => void,
782
+ ): Promise<AgentContext> {
783
+ if (this._contextWindow <= 0) {
784
+ // contextWindow not set, skip compaction
785
+ return context;
786
+ }
787
+
788
+ let history = getHistory(context);
789
+ if (history.length === 0) {
790
+ return context;
791
+ }
792
+
793
+ // Use the current transformed context estimate as a first-class input.
794
+ // Post-hoc token tracking from the previous turn is useful, but it can be
795
+ // stale when transformContext injects large ephemeral content on this turn.
796
+ const estimatedCurrentTokens = this.estimateContextTokens(context);
797
+ const currentTokens = this.estimateCurrentContextTokens(context);
798
+
799
+ this.logger.debug('[Compaction] transformContext', {
800
+ historyLen: history.length,
801
+ currentContextTokens: this._currentContextTokenCount,
802
+ heuristic: estimatedCurrentTokens,
803
+ currentTokens,
804
+ ctxWindow: this._contextWindow,
805
+ });
806
+
807
+ // Compute utilization and slot tokens (shared by both strategies and L3)
808
+ const originalHistoryTokens = this.estimateHistoryTokens(getHistory(context));
809
+ const slotTokens = Math.max(0, currentTokens - originalHistoryTokens);
810
+ const utilization = this._contextWindow > 0 ? currentTokens / this._contextWindow : 0;
811
+
812
+ let layer2Failed = false;
813
+ let lastLayer2Error: Error | undefined;
814
+ let effectiveThreshold = 0;
815
+
816
+ const cacheCold = this.isCacheCold();
817
+
818
+ if (this._strategy === 'observational' && this.observationalEngine && getSourceHistory && setSourceHistory) {
819
+ // Observational memory path: observer/reflector handle conversation
820
+ // compression. L2 summarization is skipped, but L1 still runs in
821
+ // cache-aware mode on the unobserved tail to trim large tool results
822
+ // before they hit the LLM.
823
+ context = await this.observationalEngine.applyInTransformContext(
824
+ context, utilization, this.slotCount, getHistory, setHistory, getSourceHistory, setSourceHistory,
825
+ );
826
+ history = getHistory(context);
827
+
828
+ // Run L1 on the surviving (post-observation) history. Cache-aware
829
+ // gating ensures we only trim when the prompt cache has gone cold,
830
+ // preserving cache hits during active use. Re-estimate from the
831
+ // updated context so the observation slot's new size is reflected.
832
+ const postObsTotal = this.estimateCurrentContextTokens(context);
833
+ const trimmedHistory = await this.microcompaction.apply(
834
+ history, this._contextWindow, postObsTotal, { cacheCold },
835
+ );
836
+ if (trimmedHistory !== history) {
837
+ context = setHistory(context, trimmedHistory);
838
+ history = trimmedHistory;
839
+ }
840
+ } else {
841
+ // Classic path: L1 + L2
842
+
843
+ // Layer 1: Microcompaction. Cache-aware gating: only trims when the
844
+ // prompt cache is cold (or unsupported). When warm, returns history
845
+ // untouched to preserve cache hits.
846
+ history = await this.microcompaction.apply(
847
+ history, this._contextWindow, currentTokens, { cacheCold },
848
+ );
849
+
850
+ // Layer 2: Conversation summarization (70% threshold)
851
+ // Operates on the original transcript (agent.state.messages), not the
852
+ // in-memory microcompacted context. After Layer 2 modifies the source,
853
+ // we rebuild the context from the updated messages.
854
+ const postMicroTokens = this.estimateHistoryTokens(history);
855
+ const totalAfterMicro = slotTokens + postMicroTokens;
856
+
857
+ effectiveThreshold = this.getEffectiveThreshold();
858
+
859
+ this.logger.debug('[Compaction] Layer2 evaluation', {
860
+ totalAfterMicro,
861
+ threshold: effectiveThreshold,
862
+ ratio: totalAfterMicro / this._contextWindow,
863
+ completeFn: !!this.completeFn,
864
+ srcAccessors: !!getSourceHistory && !!setSourceHistory,
865
+ shouldCompact: shouldCompact(totalAfterMicro, this._contextWindow, effectiveThreshold),
866
+ });
867
+
868
+ if (
869
+ this.completeFn &&
870
+ getSourceHistory &&
871
+ setSourceHistory &&
872
+ shouldCompact(totalAfterMicro, this._contextWindow, effectiveThreshold)
873
+ ) {
874
+ const maxRetries = this.config.compaction.maxRetries ?? 3;
875
+ const retryDelayMs = this.config.compaction.retryDelayMs ?? 2000;
876
+ let succeeded = false;
877
+
878
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
879
+ try {
880
+ const sourceHistory = getSourceHistory();
881
+ if (sourceHistory.length === 0) break;
882
+
883
+ const { newHistory: compactedSource, result } = await runCompaction(
884
+ sourceHistory,
885
+ this.config.compaction,
886
+ this.completeFn,
887
+ {
888
+ onBeforeCompaction: this.beforeCompactionHandlers,
889
+ onPostCompaction: this.postCompactionHandlers,
890
+ onCompactionError: this.compactionErrorHandlers,
891
+ },
892
+ currentTokens, // pass actual full-context token count for accurate reporting
893
+ );
894
+
895
+ // Success: update state and reset failure counter
896
+ setSourceHistory(compactedSource);
897
+ this.microcompaction.resetCache();
898
+
899
+ // result.tokensAfter now includes overhead (system prompt, slots,
900
+ // tool definitions) since we passed actualContextTokens to
901
+ // runCompaction. Use it directly to prevent the stale low value
902
+ // that would cause re-triggering compaction on the next call.
903
+ this._currentContextTokenCount = result.tokensAfter;
904
+
905
+ this._consecutiveLayer2Failures = 0;
906
+
907
+ for (const handler of this.compactionResultHandlers) {
908
+ try {
909
+ handler(result);
910
+ } catch (err) {
911
+ this.logger.error('[Compaction] compactionResult handler threw', {
912
+ error: err instanceof Error ? err.message : String(err),
913
+ });
914
+ }
915
+ }
916
+
917
+ // Rebuild context from updated source. L2 just rewrote history
918
+ // wholesale, so any existing cache prefix is invalidated; treat as
919
+ // cold so L1 can trim the rebuilt history if warranted.
920
+ history = await this.microcompaction.apply(
921
+ compactedSource,
922
+ this._contextWindow,
923
+ this._currentContextTokenCount,
924
+ { cacheCold: true },
925
+ );
926
+
927
+ succeeded = true;
928
+ break;
929
+ } catch (err) {
930
+ this._consecutiveLayer2Failures++;
931
+ lastLayer2Error = err instanceof Error ? err : new Error(String(err));
932
+ this.logger.warn('[Compaction] Layer2 retry failed', {
933
+ attempt,
934
+ maxRetries,
935
+ error: lastLayer2Error.message,
936
+ });
937
+
938
+ if (attempt < maxRetries) {
939
+ await new Promise(resolve => setTimeout(resolve, retryDelayMs));
940
+ }
941
+ }
942
+ }
943
+
944
+ if (!succeeded) {
945
+ layer2Failed = true;
946
+ }
947
+ }
948
+ }
949
+
950
+ // Layer 3: Emergency truncation (90% of model context window)
951
+ // Uses the MODEL's actual context window, not the budget. Emergency
952
+ // truncation should only fire when we're near the model's real limit,
953
+ // not the user's artificial budget. Layer 1/2 handle the budget.
954
+ // When observational memory is active, L3 operates on the post-slot
955
+ // history (raw messages only). The observation slot lives in the slot
956
+ // region and is naturally protected by slotCount.
957
+ {
958
+ const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
959
+ const postLayerTokens = this.estimateHistoryTokens(history);
960
+ const totalNow = slotTokens + postLayerTokens;
961
+
962
+ if (shouldTruncate(totalNow, failsafeWindow, this.config.failsafe.threshold)) {
963
+ // Force sync observation before L3 truncation to capture unobserved
964
+ // content before it is dropped. The source history from getSourceHistory
965
+ // is already post-slot, so pass 0 as slotCount.
966
+ if (this._strategy === 'observational' && this.observationalEngine && getSourceHistory) {
967
+ const sourceHistory = getSourceHistory();
968
+ await this.observationalEngine.triggerObservation(sourceHistory, 0);
969
+ }
970
+
971
+ const truncResult = emergencyTruncate(
972
+ history,
973
+ failsafeWindow,
974
+ slotTokens,
975
+ this.config.failsafe.threshold,
976
+ );
977
+ history = truncResult.newHistory;
978
+
979
+ // Emit degraded event if Layer 3 was used as fallback for Layer 2 failure
980
+ if (layer2Failed) {
981
+ const failures = this._consecutiveLayer2Failures;
982
+ this._consecutiveLayer2Failures = 0;
983
+ for (const handler of this.compactionDegradedHandlers) {
984
+ try {
985
+ handler({
986
+ layer2Failures: failures,
987
+ turnsDropped: truncResult.turnsRemoved,
988
+ });
989
+ } catch (err) {
990
+ this.logger.error('[Compaction] compactionDegraded handler threw', {
991
+ error: err instanceof Error ? err.message : String(err),
992
+ });
993
+ }
994
+ }
995
+ }
996
+ } else if (layer2Failed) {
997
+ // Layer 2 failed but Layer 3 didn't need to run. If tokens are still
998
+ // over the Layer 2 budget, emit exhausted so the consumer can act.
999
+ const postTokens = this.estimateHistoryTokens(history);
1000
+ const stillOverBudget = shouldCompact(slotTokens + postTokens, this._contextWindow, effectiveThreshold);
1001
+
1002
+ if (stillOverBudget) {
1003
+ const failures = this._consecutiveLayer2Failures;
1004
+ this._consecutiveLayer2Failures = 0;
1005
+ for (const handler of this.compactionExhaustedHandlers) {
1006
+ try {
1007
+ handler({
1008
+ error: lastLayer2Error ?? new Error('Layer 2 compaction failed'),
1009
+ layer2Failures: failures,
1010
+ });
1011
+ } catch (err) {
1012
+ this.logger.error('[Compaction] compactionExhausted handler threw', {
1013
+ error: err instanceof Error ? err.message : String(err),
1014
+ });
1015
+ }
1016
+ }
1017
+ }
1018
+ }
1019
+ }
1020
+
1021
+ return setHistory(context, history);
1022
+ }
1023
+
1024
+ // -----------------------------------------------------------------------
1025
+ // End-of-tick compaction check
1026
+ // -----------------------------------------------------------------------
1027
+
1028
+ /**
1029
+ * Manually check if compaction is needed and run it.
1030
+ *
1031
+ * This is a convenience API for consumers who want to trigger compaction
1032
+ * outside the agentic loop (e.g., for testing or manual maintenance).
1033
+ * The primary compaction trigger is `applyInTransformContext`, which runs
1034
+ * automatically before every LLM call.
1035
+ *
1036
+ * @param getHistory - Get current conversation history
1037
+ * @param setHistory - Replace conversation history
1038
+ * @returns CompactionResult if compaction ran, null otherwise
1039
+ */
1040
+ async checkAndRunCompaction(
1041
+ getHistory: () => AgentMessage[],
1042
+ setHistory: (history: AgentMessage[]) => void,
1043
+ ): Promise<CompactionResult | null> {
1044
+ if (this._contextWindow <= 0) return null;
1045
+
1046
+ const history = getHistory();
1047
+ if (history.length === 0) return null;
1048
+
1049
+ const estimatedTokens = this.estimateHistoryTokens(history);
1050
+
1051
+ // Use adaptive threshold (adjusts based on interaction recency)
1052
+ const effectiveThreshold = this.getEffectiveThreshold();
1053
+
1054
+ // Check Layer 2 threshold
1055
+ if (!shouldCompact(this._currentContextTokenCount, this._contextWindow, effectiveThreshold)) {
1056
+ // Also check using heuristic estimation as fallback
1057
+ if (!shouldCompact(estimatedTokens, this._contextWindow, effectiveThreshold)) {
1058
+ return null;
1059
+ }
1060
+ }
1061
+
1062
+ // Attempt Layer 2 (summarization)
1063
+ if (this.completeFn) {
1064
+ try {
1065
+ const actualTokens = Math.max(this._currentContextTokenCount, estimatedTokens);
1066
+ const { newHistory, result } = await runCompaction(
1067
+ history,
1068
+ this.config.compaction,
1069
+ this.completeFn,
1070
+ {
1071
+ onBeforeCompaction: this.beforeCompactionHandlers,
1072
+ onPostCompaction: this.postCompactionHandlers,
1073
+ onCompactionError: this.compactionErrorHandlers,
1074
+ },
1075
+ actualTokens, // pass full-context token count for accurate reporting
1076
+ );
1077
+
1078
+ setHistory(newHistory);
1079
+ this.microcompaction.resetCache();
1080
+
1081
+ // result.tokensAfter includes overhead since we passed actualTokens
1082
+ this._currentContextTokenCount = result.tokensAfter;
1083
+
1084
+ // Emit result
1085
+ for (const handler of this.compactionResultHandlers) {
1086
+ try {
1087
+ handler(result);
1088
+ } catch {
1089
+ // Swallow handler errors
1090
+ }
1091
+ }
1092
+
1093
+ return result;
1094
+
1095
+ } catch {
1096
+ // Layer 2 failed, fall through to Layer 3
1097
+ }
1098
+ }
1099
+
1100
+ // Layer 3 fallback: emergency truncation (uses model's actual window)
1101
+ const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
1102
+ const slotTokens = this._currentContextTokenCount - estimatedTokens;
1103
+ if (shouldTruncate(this._currentContextTokenCount, failsafeWindow, this.config.failsafe.threshold)) {
1104
+ const result = emergencyTruncate(
1105
+ history,
1106
+ failsafeWindow,
1107
+ Math.max(0, slotTokens),
1108
+ this.config.failsafe.threshold,
1109
+ );
1110
+ setHistory(result.newHistory);
1111
+ this.microcompaction.resetCache();
1112
+ this._currentContextTokenCount = result.tokensAfter;
1113
+ }
1114
+
1115
+ return null;
1116
+ }
1117
+
1118
+ // -----------------------------------------------------------------------
1119
+ // Reactive overflow handling
1120
+ // -----------------------------------------------------------------------
1121
+
1122
+ /**
1123
+ * Handle a context overflow error by performing emergency truncation.
1124
+ * Called when the API returns a context overflow error.
1125
+ *
1126
+ * @param getHistory - Get current conversation history
1127
+ * @param setHistory - Replace conversation history
1128
+ */
1129
+ handleOverflowError(
1130
+ getHistory: () => AgentMessage[],
1131
+ setHistory: (history: AgentMessage[]) => void,
1132
+ ): void {
1133
+ const history = getHistory();
1134
+ if (history.length === 0) return;
1135
+
1136
+ // API returned overflow error, so use the model's actual window
1137
+ const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
1138
+ const estimatedTokens = this.estimateHistoryTokens(history);
1139
+ const slotTokens = Math.max(0, this._currentContextTokenCount - estimatedTokens);
1140
+
1141
+ const result = emergencyTruncate(
1142
+ history,
1143
+ failsafeWindow,
1144
+ slotTokens,
1145
+ this.config.failsafe.threshold,
1146
+ );
1147
+
1148
+ setHistory(result.newHistory);
1149
+ this.microcompaction.resetCache();
1150
+ this._currentContextTokenCount = result.tokensAfter;
1151
+ }
1152
+
1153
+ // -----------------------------------------------------------------------
1154
+ // Cleanup
1155
+ // -----------------------------------------------------------------------
1156
+
1157
+ /**
1158
+ * Clear all state and handlers.
1159
+ */
1160
+ destroy(): void {
1161
+ this.microcompaction.resetCache();
1162
+ this.observationalEngine?.abort();
1163
+ this.observationalEngine = null;
1164
+ this.beforeCompactionHandlers = [];
1165
+ this.postCompactionHandlers = [];
1166
+ this.compactionErrorHandlers = [];
1167
+ this.compactionResultHandlers = [];
1168
+ this.compactionDegradedHandlers = [];
1169
+ this.compactionExhaustedHandlers = [];
1170
+ this.completeFn = null;
1171
+ this._currentContextTokenCount = 0;
1172
+ this._consecutiveLayer2Failures = 0;
1173
+ this._lastInteractionTime = null;
1174
+ }
1175
+
1176
+ // -----------------------------------------------------------------------
1177
+ // Internal helpers
1178
+ // -----------------------------------------------------------------------
1179
+
1180
+ /**
1181
+ * Estimate tokens for a set of history messages.
1182
+ */
1183
+ private estimateHistoryTokens(history: AgentMessage[]): number {
1184
+ return estimateTokens(
1185
+ history.map(m => extractTextContent(m)).join('\n'),
1186
+ );
1187
+ }
1188
+
1189
+ /**
1190
+ * Estimate total context tokens from an AgentContext object.
1191
+ */
1192
+ private estimateContextTokens(context: AgentContext): number {
1193
+ let total = estimateTokens(context.systemPrompt);
1194
+ for (const msg of context.messages) {
1195
+ total += estimateTokens(extractTextContent(msg));
1196
+ }
1197
+ return total;
1198
+ }
1199
+ }