@animus-labs/cortex 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +73 -0
  3. package/dist/budget-guard.d.ts +75 -0
  4. package/dist/budget-guard.d.ts.map +1 -0
  5. package/dist/budget-guard.js +142 -0
  6. package/dist/budget-guard.js.map +1 -0
  7. package/dist/compaction/compaction.d.ts +99 -0
  8. package/dist/compaction/compaction.d.ts.map +1 -0
  9. package/dist/compaction/compaction.js +302 -0
  10. package/dist/compaction/compaction.js.map +1 -0
  11. package/dist/compaction/failsafe.d.ts +57 -0
  12. package/dist/compaction/failsafe.d.ts.map +1 -0
  13. package/dist/compaction/failsafe.js +135 -0
  14. package/dist/compaction/failsafe.js.map +1 -0
  15. package/dist/compaction/index.d.ts +381 -0
  16. package/dist/compaction/index.d.ts.map +1 -0
  17. package/dist/compaction/index.js +979 -0
  18. package/dist/compaction/index.js.map +1 -0
  19. package/dist/compaction/microcompaction.d.ts +219 -0
  20. package/dist/compaction/microcompaction.d.ts.map +1 -0
  21. package/dist/compaction/microcompaction.js +536 -0
  22. package/dist/compaction/microcompaction.js.map +1 -0
  23. package/dist/compaction/observational/buffering.d.ts +225 -0
  24. package/dist/compaction/observational/buffering.d.ts.map +1 -0
  25. package/dist/compaction/observational/buffering.js +354 -0
  26. package/dist/compaction/observational/buffering.js.map +1 -0
  27. package/dist/compaction/observational/constants.d.ts +70 -0
  28. package/dist/compaction/observational/constants.d.ts.map +1 -0
  29. package/dist/compaction/observational/constants.js +507 -0
  30. package/dist/compaction/observational/constants.js.map +1 -0
  31. package/dist/compaction/observational/index.d.ts +219 -0
  32. package/dist/compaction/observational/index.d.ts.map +1 -0
  33. package/dist/compaction/observational/index.js +641 -0
  34. package/dist/compaction/observational/index.js.map +1 -0
  35. package/dist/compaction/observational/observer.d.ts +97 -0
  36. package/dist/compaction/observational/observer.d.ts.map +1 -0
  37. package/dist/compaction/observational/observer.js +424 -0
  38. package/dist/compaction/observational/observer.js.map +1 -0
  39. package/dist/compaction/observational/recall-tool.d.ts +27 -0
  40. package/dist/compaction/observational/recall-tool.d.ts.map +1 -0
  41. package/dist/compaction/observational/recall-tool.js +93 -0
  42. package/dist/compaction/observational/recall-tool.js.map +1 -0
  43. package/dist/compaction/observational/reflector.d.ts +94 -0
  44. package/dist/compaction/observational/reflector.d.ts.map +1 -0
  45. package/dist/compaction/observational/reflector.js +167 -0
  46. package/dist/compaction/observational/reflector.js.map +1 -0
  47. package/dist/compaction/observational/types.d.ts +271 -0
  48. package/dist/compaction/observational/types.d.ts.map +1 -0
  49. package/dist/compaction/observational/types.js +15 -0
  50. package/dist/compaction/observational/types.js.map +1 -0
  51. package/dist/context-manager.d.ts +134 -0
  52. package/dist/context-manager.d.ts.map +1 -0
  53. package/dist/context-manager.js +170 -0
  54. package/dist/context-manager.js.map +1 -0
  55. package/dist/cortex-agent.d.ts +1020 -0
  56. package/dist/cortex-agent.d.ts.map +1 -0
  57. package/dist/cortex-agent.js +3589 -0
  58. package/dist/cortex-agent.js.map +1 -0
  59. package/dist/error-classifier.d.ts +48 -0
  60. package/dist/error-classifier.d.ts.map +1 -0
  61. package/dist/error-classifier.js +152 -0
  62. package/dist/error-classifier.js.map +1 -0
  63. package/dist/event-bridge.d.ts +166 -0
  64. package/dist/event-bridge.d.ts.map +1 -0
  65. package/dist/event-bridge.js +381 -0
  66. package/dist/event-bridge.js.map +1 -0
  67. package/dist/index.d.ts +55 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +57 -0
  70. package/dist/index.js.map +1 -0
  71. package/dist/mcp-client.d.ts +119 -0
  72. package/dist/mcp-client.d.ts.map +1 -0
  73. package/dist/mcp-client.js +474 -0
  74. package/dist/mcp-client.js.map +1 -0
  75. package/dist/model-wrapper.d.ts +58 -0
  76. package/dist/model-wrapper.d.ts.map +1 -0
  77. package/dist/model-wrapper.js +86 -0
  78. package/dist/model-wrapper.js.map +1 -0
  79. package/dist/noop-logger.d.ts +4 -0
  80. package/dist/noop-logger.d.ts.map +1 -0
  81. package/dist/noop-logger.js +8 -0
  82. package/dist/noop-logger.js.map +1 -0
  83. package/dist/prompt-diagnostics.d.ts +47 -0
  84. package/dist/prompt-diagnostics.d.ts.map +1 -0
  85. package/dist/prompt-diagnostics.js +230 -0
  86. package/dist/prompt-diagnostics.js.map +1 -0
  87. package/dist/provider-manager.d.ts +224 -0
  88. package/dist/provider-manager.d.ts.map +1 -0
  89. package/dist/provider-manager.js +563 -0
  90. package/dist/provider-manager.js.map +1 -0
  91. package/dist/provider-registry.d.ts +115 -0
  92. package/dist/provider-registry.d.ts.map +1 -0
  93. package/dist/provider-registry.js +305 -0
  94. package/dist/provider-registry.js.map +1 -0
  95. package/dist/schema-converter.d.ts +20 -0
  96. package/dist/schema-converter.d.ts.map +1 -0
  97. package/dist/schema-converter.js +48 -0
  98. package/dist/schema-converter.js.map +1 -0
  99. package/dist/skill-preprocessor.d.ts +46 -0
  100. package/dist/skill-preprocessor.d.ts.map +1 -0
  101. package/dist/skill-preprocessor.js +237 -0
  102. package/dist/skill-preprocessor.js.map +1 -0
  103. package/dist/skill-registry.d.ts +107 -0
  104. package/dist/skill-registry.d.ts.map +1 -0
  105. package/dist/skill-registry.js +330 -0
  106. package/dist/skill-registry.js.map +1 -0
  107. package/dist/skill-tool.d.ts +54 -0
  108. package/dist/skill-tool.d.ts.map +1 -0
  109. package/dist/skill-tool.js +88 -0
  110. package/dist/skill-tool.js.map +1 -0
  111. package/dist/sub-agent-manager.d.ts +90 -0
  112. package/dist/sub-agent-manager.d.ts.map +1 -0
  113. package/dist/sub-agent-manager.js +192 -0
  114. package/dist/sub-agent-manager.js.map +1 -0
  115. package/dist/token-estimator.d.ts +23 -0
  116. package/dist/token-estimator.d.ts.map +1 -0
  117. package/dist/token-estimator.js +27 -0
  118. package/dist/token-estimator.js.map +1 -0
  119. package/dist/tool-contract.d.ts +68 -0
  120. package/dist/tool-contract.d.ts.map +1 -0
  121. package/dist/tool-contract.js +35 -0
  122. package/dist/tool-contract.js.map +1 -0
  123. package/dist/tool-result-persistence.d.ts +89 -0
  124. package/dist/tool-result-persistence.d.ts.map +1 -0
  125. package/dist/tool-result-persistence.js +152 -0
  126. package/dist/tool-result-persistence.js.map +1 -0
  127. package/dist/tools/bash/index.d.ts +71 -0
  128. package/dist/tools/bash/index.d.ts.map +1 -0
  129. package/dist/tools/bash/index.js +485 -0
  130. package/dist/tools/bash/index.js.map +1 -0
  131. package/dist/tools/bash/interactive.d.ts +47 -0
  132. package/dist/tools/bash/interactive.d.ts.map +1 -0
  133. package/dist/tools/bash/interactive.js +262 -0
  134. package/dist/tools/bash/interactive.js.map +1 -0
  135. package/dist/tools/bash/safety.d.ts +149 -0
  136. package/dist/tools/bash/safety.d.ts.map +1 -0
  137. package/dist/tools/bash/safety.js +1116 -0
  138. package/dist/tools/bash/safety.js.map +1 -0
  139. package/dist/tools/edit.d.ts +57 -0
  140. package/dist/tools/edit.d.ts.map +1 -0
  141. package/dist/tools/edit.js +310 -0
  142. package/dist/tools/edit.js.map +1 -0
  143. package/dist/tools/glob.d.ts +34 -0
  144. package/dist/tools/glob.d.ts.map +1 -0
  145. package/dist/tools/glob.js +268 -0
  146. package/dist/tools/glob.js.map +1 -0
  147. package/dist/tools/grep.d.ts +53 -0
  148. package/dist/tools/grep.d.ts.map +1 -0
  149. package/dist/tools/grep.js +673 -0
  150. package/dist/tools/grep.js.map +1 -0
  151. package/dist/tools/index.d.ts +62 -0
  152. package/dist/tools/index.d.ts.map +1 -0
  153. package/dist/tools/index.js +52 -0
  154. package/dist/tools/index.js.map +1 -0
  155. package/dist/tools/read.d.ts +43 -0
  156. package/dist/tools/read.d.ts.map +1 -0
  157. package/dist/tools/read.js +459 -0
  158. package/dist/tools/read.js.map +1 -0
  159. package/dist/tools/runtime.d.ts +62 -0
  160. package/dist/tools/runtime.d.ts.map +1 -0
  161. package/dist/tools/runtime.js +116 -0
  162. package/dist/tools/runtime.js.map +1 -0
  163. package/dist/tools/shared/cwd-tracker.d.ts +32 -0
  164. package/dist/tools/shared/cwd-tracker.d.ts.map +1 -0
  165. package/dist/tools/shared/cwd-tracker.js +44 -0
  166. package/dist/tools/shared/cwd-tracker.js.map +1 -0
  167. package/dist/tools/shared/edit-history.d.ts +55 -0
  168. package/dist/tools/shared/edit-history.d.ts.map +1 -0
  169. package/dist/tools/shared/edit-history.js +72 -0
  170. package/dist/tools/shared/edit-history.js.map +1 -0
  171. package/dist/tools/shared/edit-matcher.d.ts +83 -0
  172. package/dist/tools/shared/edit-matcher.d.ts.map +1 -0
  173. package/dist/tools/shared/edit-matcher.js +359 -0
  174. package/dist/tools/shared/edit-matcher.js.map +1 -0
  175. package/dist/tools/shared/file-mutation-lock.d.ts +22 -0
  176. package/dist/tools/shared/file-mutation-lock.d.ts.map +1 -0
  177. package/dist/tools/shared/file-mutation-lock.js +35 -0
  178. package/dist/tools/shared/file-mutation-lock.js.map +1 -0
  179. package/dist/tools/shared/gitignore.d.ts +17 -0
  180. package/dist/tools/shared/gitignore.d.ts.map +1 -0
  181. package/dist/tools/shared/gitignore.js +59 -0
  182. package/dist/tools/shared/gitignore.js.map +1 -0
  183. package/dist/tools/shared/pdf-extractor.d.ts +96 -0
  184. package/dist/tools/shared/pdf-extractor.d.ts.map +1 -0
  185. package/dist/tools/shared/pdf-extractor.js +196 -0
  186. package/dist/tools/shared/pdf-extractor.js.map +1 -0
  187. package/dist/tools/shared/read-registry.d.ts +66 -0
  188. package/dist/tools/shared/read-registry.d.ts.map +1 -0
  189. package/dist/tools/shared/read-registry.js +65 -0
  190. package/dist/tools/shared/read-registry.js.map +1 -0
  191. package/dist/tools/shared/safe-env.d.ts +18 -0
  192. package/dist/tools/shared/safe-env.d.ts.map +1 -0
  193. package/dist/tools/shared/safe-env.js +70 -0
  194. package/dist/tools/shared/safe-env.js.map +1 -0
  195. package/dist/tools/sub-agent.d.ts +91 -0
  196. package/dist/tools/sub-agent.d.ts.map +1 -0
  197. package/dist/tools/sub-agent.js +89 -0
  198. package/dist/tools/sub-agent.js.map +1 -0
  199. package/dist/tools/task-output.d.ts +38 -0
  200. package/dist/tools/task-output.d.ts.map +1 -0
  201. package/dist/tools/task-output.js +186 -0
  202. package/dist/tools/task-output.js.map +1 -0
  203. package/dist/tools/tool-search/index.d.ts +40 -0
  204. package/dist/tools/tool-search/index.d.ts.map +1 -0
  205. package/dist/tools/tool-search/index.js +110 -0
  206. package/dist/tools/tool-search/index.js.map +1 -0
  207. package/dist/tools/tool-search/registry.d.ts +82 -0
  208. package/dist/tools/tool-search/registry.d.ts.map +1 -0
  209. package/dist/tools/tool-search/registry.js +238 -0
  210. package/dist/tools/tool-search/registry.js.map +1 -0
  211. package/dist/tools/undo-edit.d.ts +51 -0
  212. package/dist/tools/undo-edit.d.ts.map +1 -0
  213. package/dist/tools/undo-edit.js +231 -0
  214. package/dist/tools/undo-edit.js.map +1 -0
  215. package/dist/tools/web-fetch/cache.d.ts +49 -0
  216. package/dist/tools/web-fetch/cache.d.ts.map +1 -0
  217. package/dist/tools/web-fetch/cache.js +89 -0
  218. package/dist/tools/web-fetch/cache.js.map +1 -0
  219. package/dist/tools/web-fetch/index.d.ts +53 -0
  220. package/dist/tools/web-fetch/index.d.ts.map +1 -0
  221. package/dist/tools/web-fetch/index.js +513 -0
  222. package/dist/tools/web-fetch/index.js.map +1 -0
  223. package/dist/tools/write.d.ts +59 -0
  224. package/dist/tools/write.d.ts.map +1 -0
  225. package/dist/tools/write.js +316 -0
  226. package/dist/tools/write.js.map +1 -0
  227. package/dist/types.d.ts +881 -0
  228. package/dist/types.d.ts.map +1 -0
  229. package/dist/types.js +16 -0
  230. package/dist/types.js.map +1 -0
  231. package/dist/working-tags.d.ts +44 -0
  232. package/dist/working-tags.d.ts.map +1 -0
  233. package/dist/working-tags.js +103 -0
  234. package/dist/working-tags.js.map +1 -0
  235. package/package.json +87 -0
  236. package/src/budget-guard.ts +170 -0
  237. package/src/compaction/compaction.ts +386 -0
  238. package/src/compaction/failsafe.ts +185 -0
  239. package/src/compaction/index.ts +1199 -0
  240. package/src/compaction/microcompaction.ts +709 -0
  241. package/src/compaction/observational/buffering.ts +430 -0
  242. package/src/compaction/observational/constants.ts +532 -0
  243. package/src/compaction/observational/index.ts +837 -0
  244. package/src/compaction/observational/observer.ts +510 -0
  245. package/src/compaction/observational/recall-tool.ts +130 -0
  246. package/src/compaction/observational/reflector.ts +221 -0
  247. package/src/compaction/observational/types.ts +343 -0
  248. package/src/context-manager.ts +237 -0
  249. package/src/cortex-agent.ts +4297 -0
  250. package/src/error-classifier.ts +199 -0
  251. package/src/event-bridge.ts +508 -0
  252. package/src/index.ts +292 -0
  253. package/src/mcp-client.ts +582 -0
  254. package/src/model-wrapper.ts +128 -0
  255. package/src/noop-logger.ts +9 -0
  256. package/src/prompt-diagnostics.ts +296 -0
  257. package/src/provider-manager.ts +823 -0
  258. package/src/provider-registry.ts +386 -0
  259. package/src/schema-converter.ts +51 -0
  260. package/src/skill-preprocessor.ts +314 -0
  261. package/src/skill-registry.ts +378 -0
  262. package/src/skill-tool.ts +130 -0
  263. package/src/sub-agent-manager.ts +236 -0
  264. package/src/token-estimator.ts +26 -0
  265. package/src/tool-contract.ts +113 -0
  266. package/src/tool-result-persistence.ts +197 -0
  267. package/src/tools/bash/index.ts +633 -0
  268. package/src/tools/bash/interactive.ts +302 -0
  269. package/src/tools/bash/safety.ts +1297 -0
  270. package/src/tools/edit.ts +422 -0
  271. package/src/tools/glob.ts +330 -0
  272. package/src/tools/grep.ts +819 -0
  273. package/src/tools/index.ts +110 -0
  274. package/src/tools/read.ts +580 -0
  275. package/src/tools/runtime.ts +173 -0
  276. package/src/tools/shared/cwd-tracker.ts +50 -0
  277. package/src/tools/shared/edit-history.ts +96 -0
  278. package/src/tools/shared/edit-matcher.ts +457 -0
  279. package/src/tools/shared/file-mutation-lock.ts +40 -0
  280. package/src/tools/shared/gitignore.ts +61 -0
  281. package/src/tools/shared/pdf-extractor.ts +290 -0
  282. package/src/tools/shared/read-registry.ts +93 -0
  283. package/src/tools/shared/safe-env.ts +82 -0
  284. package/src/tools/sub-agent.ts +171 -0
  285. package/src/tools/task-output.ts +236 -0
  286. package/src/tools/tool-search/index.ts +167 -0
  287. package/src/tools/tool-search/registry.ts +278 -0
  288. package/src/tools/undo-edit.ts +314 -0
  289. package/src/tools/web-fetch/cache.ts +112 -0
  290. package/src/tools/web-fetch/index.ts +604 -0
  291. package/src/tools/write.ts +385 -0
  292. package/src/types.ts +1057 -0
  293. package/src/working-tags.ts +118 -0
@@ -0,0 +1,979 @@
1
+ /**
2
+ * Compaction composition: wires all three layers into the transformContext chain.
3
+ *
4
+ * Layer 1 (Microcompaction): tool result trimming at threshold crossings
5
+ * Layer 2 (Compaction): conversation summarization via LLM
6
+ * Layer 3 (Failsafe): emergency truncation, purely mechanical
7
+ *
8
+ * All three layers run inside transformContext, which fires before every LLM
9
+ * call. Compaction is fully self-contained within Cortex; no external calls
10
+ * from the backend are needed to trigger it. Layer 2 fires when token usage
11
+ * exceeds 70% of the context window and a completeFn + source accessors are
12
+ * provided. Layer 3 fires whenever tokens exceed 90% of the model's context
13
+ * window.
14
+ *
15
+ * References:
16
+ * - compaction-strategy.md
17
+ * - phase-5-compaction.md (5.5)
18
+ */
19
+ import { NOOP_LOGGER } from '../noop-logger.js';
20
+ import { estimateTokens } from '../token-estimator.js';
21
+ import { MicrocompactionEngine, MICROCOMPACTION_DEFAULTS, extractTextContent, isToolResultMessage, capToolResult, extractToolName, getToolCategory, applyBookend } from './microcompaction.js';
22
+ import { runCompaction, shouldCompact, COMPACTION_DEFAULTS, } from './compaction.js';
23
+ import { emergencyTruncate, shouldTruncate, FAILSAFE_DEFAULTS, } from './failsafe.js';
24
+ import { ObservationalMemoryEngine } from './observational/index.js';
25
+ import { PROVIDER_CACHE_CONFIG } from '../provider-registry.js';
26
+ // ---------------------------------------------------------------------------
27
+ // Re-exports for consumer convenience
28
+ // ---------------------------------------------------------------------------
29
+ export { MicrocompactionEngine, capToolResult } from './microcompaction.js';
30
+ export { runCompaction, shouldCompact, partitionHistory, buildSummaryMessage } from './compaction.js';
31
+ export { emergencyTruncate, shouldTruncate, isContextOverflow } from './failsafe.js';
32
+ export { ObservationalMemoryEngine } from './observational/index.js';
33
+ export { createRecallTool } from './observational/recall-tool.js';
34
+ // computeAdaptiveThreshold is defined below in this file and exported at the declaration site
35
+ // ---------------------------------------------------------------------------
36
+ // Default config
37
+ // ---------------------------------------------------------------------------
38
+ export const ADAPTIVE_DEFAULTS = {
39
+ enabled: true,
40
+ recentWindowMs: 300_000, // 5 minutes
41
+ idleWindowMs: 1_800_000, // 30 minutes
42
+ recentReduction: 0.0, // no change when recent
43
+ moderateReduction: 0.10, // lower threshold by 0.10 when moderately idle
44
+ idleReduction: 0.20, // lower threshold by 0.20 when fully idle
45
+ };
46
+ export const DEFAULT_COMPACTION_CONFIG = {
47
+ microcompaction: MICROCOMPACTION_DEFAULTS,
48
+ compaction: COMPACTION_DEFAULTS,
49
+ failsafe: FAILSAFE_DEFAULTS,
50
+ adaptive: ADAPTIVE_DEFAULTS,
51
+ };
52
+ /**
53
+ * Build a full compaction config from partial overrides.
54
+ */
55
+ export function buildCompactionConfig(partial) {
56
+ if (!partial)
57
+ return DEFAULT_COMPACTION_CONFIG;
58
+ const config = {
59
+ microcompaction: {
60
+ ...MICROCOMPACTION_DEFAULTS,
61
+ ...partial.microcompaction,
62
+ },
63
+ compaction: {
64
+ ...COMPACTION_DEFAULTS,
65
+ ...partial.compaction,
66
+ },
67
+ failsafe: {
68
+ ...FAILSAFE_DEFAULTS,
69
+ ...partial.failsafe,
70
+ },
71
+ adaptive: {
72
+ ...ADAPTIVE_DEFAULTS,
73
+ ...partial.adaptive,
74
+ },
75
+ };
76
+ if (partial.strategy !== undefined) {
77
+ config.strategy = partial.strategy;
78
+ }
79
+ if (partial.observational !== undefined) {
80
+ config.observational = partial.observational;
81
+ }
82
+ return config;
83
+ }
84
+ // ---------------------------------------------------------------------------
85
+ // Adaptive threshold calculation
86
+ // ---------------------------------------------------------------------------
87
+ /**
88
+ * Compute the effective Layer 2 compaction threshold adjusted by interaction
89
+ * recency. When the user has not interacted recently, the threshold is lowered
90
+ * (i.e., compaction fires sooner), reducing token costs for idle sessions.
91
+ *
92
+ * @param baseThreshold - The configured Layer 2 threshold (e.g., 0.70)
93
+ * @param adaptiveConfig - Adaptive threshold configuration
94
+ * @param lastInteractionTime - Timestamp (ms) of the last user interaction, or null if never
95
+ * @param now - Current timestamp (ms), injectable for testing
96
+ * @returns The adjusted threshold (always >= 0)
97
+ */
98
+ export function computeAdaptiveThreshold(baseThreshold, adaptiveConfig, lastInteractionTime, now = Date.now()) {
99
+ if (!adaptiveConfig.enabled) {
100
+ return baseThreshold;
101
+ }
102
+ // No interaction recorded yet: treat as fully idle
103
+ if (lastInteractionTime === null) {
104
+ return Math.max(0, baseThreshold - adaptiveConfig.idleReduction);
105
+ }
106
+ const elapsed = now - lastInteractionTime;
107
+ if (elapsed < adaptiveConfig.recentWindowMs) {
108
+ // Recent interaction: apply recentReduction (default 0, no change)
109
+ return Math.max(0, baseThreshold - adaptiveConfig.recentReduction);
110
+ }
111
+ if (elapsed < adaptiveConfig.idleWindowMs) {
112
+ // Moderate idle: apply moderateReduction
113
+ return Math.max(0, baseThreshold - adaptiveConfig.moderateReduction);
114
+ }
115
+ // Fully idle: apply idleReduction
116
+ return Math.max(0, baseThreshold - adaptiveConfig.idleReduction);
117
+ }
118
+ // ---------------------------------------------------------------------------
119
+ // CompactionManager
120
+ // ---------------------------------------------------------------------------
121
+ /**
122
+ * CompactionManager orchestrates all three compaction layers.
123
+ *
124
+ * It is stateful: it tracks the current token count and the microcompaction
125
+ * cache. The CortexAgent creates one instance and delegates all compaction
126
+ * decisions to it. Compaction is fully autonomous: all three layers run
127
+ * inside applyInTransformContext(), which fires before every LLM call.
128
+ */
129
+ export class CompactionManager {
130
+ config;
131
+ microcompaction;
132
+ slotCount;
133
+ _strategy;
134
+ observationalEngine = null;
135
+ /** Post-hoc current-context token count, updated after each parent LLM call. */
136
+ _currentContextTokenCount = 0;
137
+ /** Context budget for Layer 1/2 compaction decisions (may be artificially limited). */
138
+ _contextWindow = 0;
139
+ /** Actual model context window for Layer 3 failsafe (never artificially limited). */
140
+ _modelContextWindow = 0;
141
+ /**
142
+ * Timestamp (ms) of the last user interaction. Used by the adaptive
143
+ * threshold system to decide how aggressively to compact. Updated by
144
+ * the consumer (backend) when a message-triggered tick fires.
145
+ * Null means no interaction has been recorded yet.
146
+ */
147
+ _lastInteractionTime = null;
148
+ /**
149
+ * Timestamp (ms) of the last LLM call. Used by L1 to decide whether the
150
+ * prompt cache has gone cold. Updated automatically in
151
+ * updateCurrentContextTokenCount() (which fires after every LLM response).
152
+ * Null means no LLM call has been recorded yet (treated as cold).
153
+ */
154
+ _lastLlmCallTimestamp = null;
155
+ /**
156
+ * Effective cache TTL (ms) for the current provider + cache retention.
157
+ * Zero means caching is unsupported or disabled, in which case L1 treats
158
+ * the cache as perpetually cold (trim freely). Set via setCacheInfo().
159
+ */
160
+ _providerCacheTtlMs = 0;
161
+ /** Consumer handlers for compaction lifecycle events. */
162
+ beforeCompactionHandlers = [];
163
+ postCompactionHandlers = [];
164
+ compactionErrorHandlers = [];
165
+ compactionResultHandlers = [];
166
+ compactionDegradedHandlers = [];
167
+ compactionExhaustedHandlers = [];
168
+ /** Consecutive Layer 2 failure count for circuit breaker. Reset on success. */
169
+ _consecutiveLayer2Failures = 0;
170
+ /** LLM completion function, set by CortexAgent. */
171
+ completeFn = null;
172
+ /** Logger for compaction diagnostics. */
173
+ logger = NOOP_LOGGER;
174
+ constructor(config, slotCount) {
175
+ this.config = config;
176
+ this.slotCount = slotCount;
177
+ this.microcompaction = new MicrocompactionEngine(config.microcompaction);
178
+ this._strategy = config.strategy ?? 'observational';
179
+ if (this._strategy === 'observational') {
180
+ this.observationalEngine = new ObservationalMemoryEngine(config.observational ?? {}, slotCount - 1);
181
+ }
182
+ }
183
+ // -----------------------------------------------------------------------
184
+ // Configuration
185
+ // -----------------------------------------------------------------------
186
+ /** Get the compaction strategy. */
187
+ get strategy() { return this._strategy; }
188
+ /**
189
+ * Set the context budget (the effective limit for Layer 1/2 compaction).
190
+ * This may be smaller than the model's actual context window when a
191
+ * user-configured limit is applied.
192
+ */
193
+ setContextWindow(contextWindow) {
194
+ this._contextWindow = contextWindow;
195
+ this.observationalEngine?.setContextWindow(contextWindow);
196
+ }
197
+ /**
198
+ * Set the model's actual context window (for Layer 3 failsafe only).
199
+ * Layer 3 emergency truncation uses this to avoid dropping messages
200
+ * when the model still has capacity, even if the user-configured
201
+ * budget has been exceeded.
202
+ *
203
+ * Also used as a proxy for the utility model context window until the
204
+ * actual utility model window is set via setUtilityModelContextWindow().
205
+ */
206
+ setModelContextWindow(modelContextWindow) {
207
+ this._modelContextWindow = modelContextWindow;
208
+ this.observationalEngine?.setUtilityModelContextWindow(modelContextWindow);
209
+ }
210
+ /**
211
+ * Set the LLM completion function for Layer 2 summarization.
212
+ */
213
+ setCompleteFn(fn) {
214
+ this.completeFn = fn;
215
+ }
216
+ /**
217
+ * Set the LLM completion function for observational memory (utility model).
218
+ */
219
+ setObservationalCompleteFn(fn) {
220
+ this.observationalEngine?.setCompleteFn(fn);
221
+ }
222
+ /**
223
+ * Update the utility model context window for observer/reflector clamps.
224
+ */
225
+ setUtilityModelContextWindow(utilityModelContextWindow) {
226
+ this.observationalEngine?.setUtilityModelContextWindow(utilityModelContextWindow);
227
+ }
228
+ /**
229
+ * Set a logger for compaction diagnostics.
230
+ */
231
+ setLogger(logger) {
232
+ this.logger = logger;
233
+ this.observationalEngine?.setLogger(logger);
234
+ }
235
+ /**
236
+ * Signal when the user last interacted with the system.
237
+ * The consumer (backend) calls this during GATHER when a message-triggered
238
+ * tick fires. For interval ticks, it is not called, so the timestamp
239
+ * naturally ages.
240
+ */
241
+ setLastInteractionTime(timestamp) {
242
+ this._lastInteractionTime = timestamp;
243
+ }
244
+ /**
245
+ * Get the timestamp of the last user interaction, or null if none recorded.
246
+ */
247
+ get lastInteractionTime() {
248
+ return this._lastInteractionTime;
249
+ }
250
+ /**
251
+ * Set the active provider and cache retention. Resolves the effective
252
+ * cache TTL from PROVIDER_CACHE_CONFIG and stores it for L1's cache-aware
253
+ * gating. Called by CortexAgent at construction, on provider changes, and
254
+ * on cache retention changes.
255
+ *
256
+ * @param provider - The active provider name (e.g., "anthropic", "openai")
257
+ * @param cacheRetention - The configured cache retention ('none' | 'short' | 'long')
258
+ */
259
+ setCacheInfo(provider, cacheRetention) {
260
+ const cfg = PROVIDER_CACHE_CONFIG[provider];
261
+ if (!cfg || !cfg.supported || cacheRetention === 'none') {
262
+ this._providerCacheTtlMs = 0;
263
+ return;
264
+ }
265
+ this._providerCacheTtlMs = cacheRetention === 'long' ? cfg.longTtlMs : cfg.shortTtlMs;
266
+ }
267
+ /**
268
+ * Check whether the prompt cache has gone cold (or is unused).
269
+ *
270
+ * Returns true when:
271
+ * - Caching is unsupported / disabled (TTL <= 0), OR
272
+ * - No LLM call has been recorded yet, OR
273
+ * - The elapsed time since the last LLM call >= the cache TTL.
274
+ *
275
+ * @param now - Current timestamp (ms), injectable for testing
276
+ */
277
+ isCacheCold(now = Date.now()) {
278
+ if (this._providerCacheTtlMs <= 0)
279
+ return true;
280
+ if (this._lastLlmCallTimestamp === null)
281
+ return true;
282
+ return (now - this._lastLlmCallTimestamp) >= this._providerCacheTtlMs;
283
+ }
284
+ /**
285
+ * Get the effective cache TTL (ms) for the current provider + retention.
286
+ * Zero means caching is unsupported or disabled.
287
+ */
288
+ get providerCacheTtlMs() {
289
+ return this._providerCacheTtlMs;
290
+ }
291
+ /**
292
+ * Get the timestamp of the last LLM call, or null if none recorded.
293
+ */
294
+ get lastLlmCallTimestamp() {
295
+ return this._lastLlmCallTimestamp;
296
+ }
297
+ /**
298
+ * Compute the effective Layer 2 compaction threshold, adjusted for
299
+ * interaction recency when adaptive thresholds are enabled.
300
+ *
301
+ * @param now - Current timestamp (ms), injectable for testing
302
+ */
303
+ getEffectiveThreshold(now) {
304
+ return computeAdaptiveThreshold(this.config.compaction.threshold, this.config.adaptive, this._lastInteractionTime, now);
305
+ }
306
+ // -----------------------------------------------------------------------
307
+ // Token Tracking
308
+ // -----------------------------------------------------------------------
309
+ /**
310
+ * Update the post-hoc current-context token count from LLM usage data.
311
+ */
312
+ updateCurrentContextTokenCount(inputTokens) {
313
+ const prev = this._currentContextTokenCount;
314
+ this._currentContextTokenCount = inputTokens;
315
+ // Track the LLM call timestamp so L1 can decide whether the prompt cache
316
+ // is still warm. updateCurrentContextTokenCount() is called after every
317
+ // parent LLM call, so this is the natural point to record it.
318
+ this._lastLlmCallTimestamp = Date.now();
319
+ this.logger.debug('[Compaction] updateCurrentContextTokenCount', { prev, inputTokens });
320
+ // Log significant drops to help diagnose token count display issues
321
+ if (prev > 0 && inputTokens < prev * 0.5) {
322
+ this.logger.warn('[Compaction] currentContextTokenCount dropped >50%', {
323
+ prev,
324
+ inputTokens,
325
+ drop: `${((1 - inputTokens / prev) * 100).toFixed(1)}%`,
326
+ });
327
+ }
328
+ }
329
+ /**
330
+ * Get the post-hoc current-context token count from the most recent parent turn.
331
+ */
332
+ get currentContextTokenCount() {
333
+ return this._currentContextTokenCount;
334
+ }
335
+ /**
336
+ * Get the context budget (effective limit for Layer 1/2).
337
+ */
338
+ get contextWindow() {
339
+ return this._contextWindow;
340
+ }
341
+ /**
342
+ * Get the model's actual context window (for Layer 3 failsafe).
343
+ */
344
+ get modelContextWindow() {
345
+ return this._modelContextWindow;
346
+ }
347
+ /**
348
+ * Get the current context usage ratio.
349
+ */
350
+ get usageRatio() {
351
+ if (this._contextWindow <= 0)
352
+ return 0;
353
+ return this._currentContextTokenCount / this._contextWindow;
354
+ }
355
+ /**
356
+ * Estimate current context tokens from a transformed AgentContext snapshot.
357
+ *
358
+ * Returns the larger of:
359
+ * - the heuristic estimate of the provided context snapshot
360
+ * - the post-hoc token count from the most recent parent turn
361
+ *
362
+ * This mirrors the compaction decision logic so consumers can reason about
363
+ * context pressure using the same semantics Cortex uses internally.
364
+ */
365
+ estimateCurrentContextTokens(context) {
366
+ const estimated = this.estimateContextTokens(context);
367
+ return this._currentContextTokenCount > 0
368
+ ? Math.max(this._currentContextTokenCount, estimated)
369
+ : estimated;
370
+ }
371
+ // -----------------------------------------------------------------------
372
+ // Event Handlers
373
+ // -----------------------------------------------------------------------
374
+ /**
375
+ * Register a handler called before compaction starts (awaited).
376
+ */
377
+ onBeforeCompaction(handler) {
378
+ this.beforeCompactionHandlers.push(handler);
379
+ }
380
+ /**
381
+ * Register a handler called after compaction completes.
382
+ */
383
+ onPostCompaction(handler) {
384
+ this.postCompactionHandlers.push(handler);
385
+ }
386
+ /**
387
+ * Register a handler called if compaction fails.
388
+ */
389
+ onCompactionError(handler) {
390
+ this.compactionErrorHandlers.push(handler);
391
+ }
392
+ /**
393
+ * Register a handler that receives the CompactionResult (for CortexAgent event emission).
394
+ */
395
+ onCompactionResult(handler) {
396
+ this.compactionResultHandlers.push(handler);
397
+ }
398
+ /**
399
+ * Register a handler called when Layer 2 failed and Layer 3 was used as fallback.
400
+ */
401
+ onCompactionDegraded(handler) {
402
+ this.compactionDegradedHandlers.push(handler);
403
+ }
404
+ /**
405
+ * Register a handler called when all compaction layers have failed.
406
+ */
407
+ onCompactionExhausted(handler) {
408
+ this.compactionExhaustedHandlers.push(handler);
409
+ }
410
+ // -----------------------------------------------------------------------
411
+ // Observational Memory
412
+ // -----------------------------------------------------------------------
413
+ /**
414
+ * Called at turn_end to trigger async buffer checks.
415
+ */
416
+ onTurnEnd(totalTokens, contextWindow, messages, slotCount) {
417
+ this.observationalEngine?.onTurnEnd(totalTokens, contextWindow, messages, slotCount);
418
+ }
419
+ /**
420
+ * Register observation event handler.
421
+ */
422
+ onObservation(handler) {
423
+ this.observationalEngine?.onObservation(handler);
424
+ }
425
+ /**
426
+ * Register reflection event handler.
427
+ */
428
+ onReflection(handler) {
429
+ this.observationalEngine?.onReflection(handler);
430
+ }
431
+ /**
432
+ * Get observational memory state for persistence.
433
+ */
434
+ getObservationalMemoryState() {
435
+ return this.observationalEngine?.getState() ?? null;
436
+ }
437
+ /**
438
+ * Restore observational memory state from a previous session.
439
+ */
440
+ restoreObservationalMemoryState(state) {
441
+ this.observationalEngine?.restoreState(state);
442
+ }
443
+ /**
444
+ * Force a synchronous observation cycle.
445
+ */
446
+ async triggerObservation(messages, slotCount) {
447
+ await this.observationalEngine?.triggerObservation(messages, slotCount);
448
+ }
449
+ /**
450
+ * Kick off an initial async buffer on unobserved messages.
451
+ * Called during session resumption for a head start before the first prompt().
452
+ */
453
+ kickstartBuffer(messages, slotCount) {
454
+ this.observationalEngine?.kickstartBuffer(messages, slotCount);
455
+ }
456
+ /**
457
+ * Get the observation slot content string (for ContextManager.setSlot).
458
+ */
459
+ getObservationSlotContent() {
460
+ return this.observationalEngine?.getSlotContent() ?? '';
461
+ }
462
+ /**
463
+ * Whether observations have been produced (non-empty observation text).
464
+ */
465
+ hasObservations() {
466
+ return (this.observationalEngine?.getObservations() ?? '').length > 0;
467
+ }
468
+ /**
469
+ * Whether the recall tool should be registered.
470
+ */
471
+ hasRecallTool() {
472
+ return this.observationalEngine?.hasRecall() ?? false;
473
+ }
474
+ /**
475
+ * Get the recall config if available.
476
+ */
477
+ getRecallConfig() {
478
+ return this.observationalEngine?.getRecallConfig();
479
+ }
480
+ /**
481
+ * Current token count of activated observations only.
482
+ * Returns 0 when not using the observational strategy.
483
+ */
484
+ getObservationTokenCount() {
485
+ return this.observationalEngine?.getObservationTokenCount() ?? 0;
486
+ }
487
+ /**
488
+ * Whether the observer or reflector is currently running in the background.
489
+ * Returns false when not using the observational strategy.
490
+ */
491
+ isObservationalProcessing() {
492
+ return this.observationalEngine?.isProcessing() ?? false;
493
+ }
494
+ /**
495
+ * Whether the observer specifically is in-flight.
496
+ */
497
+ isObserverInFlight() {
498
+ return this.observationalEngine?.isObserverInFlight() ?? false;
499
+ }
500
+ /**
501
+ * Whether the reflector specifically is in-flight.
502
+ */
503
+ isReflectorInFlight() {
504
+ return this.observationalEngine?.isReflectorInFlight() ?? false;
505
+ }
506
+ // -----------------------------------------------------------------------
507
+ // Insertion-time cap
508
+ // -----------------------------------------------------------------------
509
+ /**
510
+ * Cap a tool result at insertion time (before it enters conversation history).
511
+ */
512
+ capToolResult(content) {
513
+ return this.microcompaction.capAtInsertion(content);
514
+ }
515
+ /**
516
+ * Apply insertion-time cap to all uncapped tool results in the source
517
+ * messages array (mutates in place).
518
+ *
519
+ * Called from the transformContext hook on `agent.state.messages` so that
520
+ * Tier 1 capping is automatically applied when tool results enter
521
+ * conversation history through pi-agent-core's internal tool execution
522
+ * loop. The cap is applied at most once per tool result part; already
523
+ * capped content (containing the insertion marker) is skipped.
524
+ *
525
+ * @param messages - The source messages array (mutated in place)
526
+ * @param slotCount - Number of slot messages to skip at the start
527
+ */
528
+ async applyInsertionCap(messages, slotCount) {
529
+ const config = this.microcompaction.getConfig();
530
+ // Phase 1: Individual per-result cap
531
+ for (let i = slotCount; i < messages.length; i++) {
532
+ const msg = messages[i];
533
+ if (!isToolResultMessage(msg))
534
+ continue;
535
+ if (typeof msg.content === 'string')
536
+ continue;
537
+ let modified = false;
538
+ const newContent = msg.content.map(part => {
539
+ const isLegacyToolResult = part.type === 'tool_result' && typeof part.text === 'string';
540
+ const isRuntimeToolResultText = msg.role === 'toolResult' && part.type === 'text' && typeof part.text === 'string';
541
+ if (!isLegacyToolResult && !isRuntimeToolResultText) {
542
+ return part;
543
+ }
544
+ // Skip already-capped content
545
+ if (part.text.includes('tokens trimmed at insertion')) {
546
+ return part;
547
+ }
548
+ const capped = capToolResult(part.text, config);
549
+ if (capped !== part.text) {
550
+ modified = true;
551
+ return { ...part, text: capped };
552
+ }
553
+ return part;
554
+ });
555
+ if (modified) {
556
+ messages[i] = { ...msg, content: newContent };
557
+ }
558
+ }
559
+ // Phase 2: Aggregate per-message budget
560
+ const aggregateLimit = config.maxAggregateTurnTokens ?? 150_000;
561
+ if (aggregateLimit <= 0)
562
+ return;
563
+ for (let i = slotCount; i < messages.length; i++) {
564
+ const msg = messages[i];
565
+ if (!isToolResultMessage(msg))
566
+ continue;
567
+ if (typeof msg.content === 'string')
568
+ continue;
569
+ const parts = msg.content;
570
+ const partInfos = [];
571
+ let totalTokens = 0;
572
+ for (let p = 0; p < parts.length; p++) {
573
+ const part = parts[p];
574
+ const isLegacyToolResult = part.type === 'tool_result' && typeof part.text === 'string';
575
+ const isRuntimeToolResultText = msg.role === 'toolResult' && part.type === 'text' && typeof part.text === 'string';
576
+ if (!isLegacyToolResult && !isRuntimeToolResultText) {
577
+ continue;
578
+ }
579
+ const text = part.text;
580
+ const tokens = estimateTokens(text);
581
+ const name = (typeof part['name'] === 'string'
582
+ ? part['name']
583
+ : null) ?? extractToolName(msg) ?? 'unknown';
584
+ partInfos.push({ index: p, tokens, text, toolName: name });
585
+ totalTokens += tokens;
586
+ }
587
+ if (totalTokens <= aggregateLimit)
588
+ continue;
589
+ const sorted = [...partInfos].sort((a, b) => b.tokens - a.tokens);
590
+ const newParts = [...parts];
591
+ let currentTotal = totalTokens;
592
+ for (const info of sorted) {
593
+ if (currentTotal <= aggregateLimit)
594
+ break;
595
+ if (info.tokens <= config.maxResultTokens / 2)
596
+ break;
597
+ const part = newParts[info.index];
598
+ let replacement;
599
+ if (config.persistResult) {
600
+ const category = getToolCategory(info.toolName, config.toolCategories);
601
+ try {
602
+ const path = await config.persistResult(info.text, {
603
+ toolName: info.toolName,
604
+ messageIndex: i,
605
+ category: category ?? 'rereadable',
606
+ });
607
+ const bookended = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
608
+ replacement = `${bookended}\n\n[Full content persisted to ${path} -- use Read to access]`;
609
+ }
610
+ catch {
611
+ replacement = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
612
+ }
613
+ }
614
+ else {
615
+ replacement = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
616
+ }
617
+ const newTokens = estimateTokens(replacement);
618
+ currentTotal = currentTotal - info.tokens + newTokens;
619
+ newParts[info.index] = { ...part, text: replacement };
620
+ }
621
+ messages[i] = { ...msg, content: newParts };
622
+ }
623
+ }
624
+ // -----------------------------------------------------------------------
625
+ // transformContext hook
626
+ // -----------------------------------------------------------------------
627
+ /**
628
+ * Apply compaction layers to the context in transformContext.
629
+ *
630
+ * This is the main entry point called from CortexAgent.getTransformContextHook().
631
+ * It is fully self-contained: all three compaction layers are integrated here,
632
+ * triggered autonomously based on token thresholds. No external calls from
633
+ * the backend are needed to trigger compaction.
634
+ *
635
+ * Execution order:
636
+ * 1. Layer 1 (microcompaction): tool result trimming at threshold crossings
637
+ * 2. Layer 2 (summarization): if tokens exceed 70% after Layer 1, run LLM
638
+ * summarization on agent.state.messages (the original transcript), then
639
+ * rebuild context from the updated messages
640
+ * 3. Layer 3 (failsafe): if tokens still exceed 90% after Layers 1-2,
641
+ * emergency truncation drops the oldest turns
642
+ *
643
+ * @param context - The AgentContext from transformContext
644
+ * @param getHistory - Function to get conversation history from the context
645
+ * @param setHistory - Function to set conversation history in the context
646
+ * @param getSourceHistory - Function to get the original transcript history (agent.state.messages post-slot)
647
+ * @param setSourceHistory - Function to replace the original transcript history (agent.state.messages)
648
+ * @returns Modified context with compacted history
649
+ */
650
+ async applyInTransformContext(context, getHistory, setHistory, getSourceHistory, setSourceHistory) {
651
+ if (this._contextWindow <= 0) {
652
+ // contextWindow not set, skip compaction
653
+ return context;
654
+ }
655
+ let history = getHistory(context);
656
+ if (history.length === 0) {
657
+ return context;
658
+ }
659
+ // Use the current transformed context estimate as a first-class input.
660
+ // Post-hoc token tracking from the previous turn is useful, but it can be
661
+ // stale when transformContext injects large ephemeral content on this turn.
662
+ const estimatedCurrentTokens = this.estimateContextTokens(context);
663
+ const currentTokens = this.estimateCurrentContextTokens(context);
664
+ this.logger.debug('[Compaction] transformContext', {
665
+ historyLen: history.length,
666
+ currentContextTokens: this._currentContextTokenCount,
667
+ heuristic: estimatedCurrentTokens,
668
+ currentTokens,
669
+ ctxWindow: this._contextWindow,
670
+ });
671
+ // Compute utilization and slot tokens (shared by both strategies and L3)
672
+ const originalHistoryTokens = this.estimateHistoryTokens(getHistory(context));
673
+ const slotTokens = Math.max(0, currentTokens - originalHistoryTokens);
674
+ const utilization = this._contextWindow > 0 ? currentTokens / this._contextWindow : 0;
675
+ let layer2Failed = false;
676
+ let lastLayer2Error;
677
+ let effectiveThreshold = 0;
678
+ const cacheCold = this.isCacheCold();
679
+ if (this._strategy === 'observational' && this.observationalEngine && getSourceHistory && setSourceHistory) {
680
+ // Observational memory path: observer/reflector handle conversation
681
+ // compression. L2 summarization is skipped, but L1 still runs in
682
+ // cache-aware mode on the unobserved tail to trim large tool results
683
+ // before they hit the LLM.
684
+ context = await this.observationalEngine.applyInTransformContext(context, utilization, this.slotCount, getHistory, setHistory, getSourceHistory, setSourceHistory);
685
+ history = getHistory(context);
686
+ // Run L1 on the surviving (post-observation) history. Cache-aware
687
+ // gating ensures we only trim when the prompt cache has gone cold,
688
+ // preserving cache hits during active use. Re-estimate from the
689
+ // updated context so the observation slot's new size is reflected.
690
+ const postObsTotal = this.estimateCurrentContextTokens(context);
691
+ const trimmedHistory = await this.microcompaction.apply(history, this._contextWindow, postObsTotal, { cacheCold });
692
+ if (trimmedHistory !== history) {
693
+ context = setHistory(context, trimmedHistory);
694
+ history = trimmedHistory;
695
+ }
696
+ }
697
+ else {
698
+ // Classic path: L1 + L2
699
+ // Layer 1: Microcompaction. Cache-aware gating: only trims when the
700
+ // prompt cache is cold (or unsupported). When warm, returns history
701
+ // untouched to preserve cache hits.
702
+ history = await this.microcompaction.apply(history, this._contextWindow, currentTokens, { cacheCold });
703
+ // Layer 2: Conversation summarization (70% threshold)
704
+ // Operates on the original transcript (agent.state.messages), not the
705
+ // in-memory microcompacted context. After Layer 2 modifies the source,
706
+ // we rebuild the context from the updated messages.
707
+ const postMicroTokens = this.estimateHistoryTokens(history);
708
+ const totalAfterMicro = slotTokens + postMicroTokens;
709
+ effectiveThreshold = this.getEffectiveThreshold();
710
+ this.logger.debug('[Compaction] Layer2 evaluation', {
711
+ totalAfterMicro,
712
+ threshold: effectiveThreshold,
713
+ ratio: totalAfterMicro / this._contextWindow,
714
+ completeFn: !!this.completeFn,
715
+ srcAccessors: !!getSourceHistory && !!setSourceHistory,
716
+ shouldCompact: shouldCompact(totalAfterMicro, this._contextWindow, effectiveThreshold),
717
+ });
718
+ if (this.completeFn &&
719
+ getSourceHistory &&
720
+ setSourceHistory &&
721
+ shouldCompact(totalAfterMicro, this._contextWindow, effectiveThreshold)) {
722
+ const maxRetries = this.config.compaction.maxRetries ?? 3;
723
+ const retryDelayMs = this.config.compaction.retryDelayMs ?? 2000;
724
+ let succeeded = false;
725
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
726
+ try {
727
+ const sourceHistory = getSourceHistory();
728
+ if (sourceHistory.length === 0)
729
+ break;
730
+ const { newHistory: compactedSource, result } = await runCompaction(sourceHistory, this.config.compaction, this.completeFn, {
731
+ onBeforeCompaction: this.beforeCompactionHandlers,
732
+ onPostCompaction: this.postCompactionHandlers,
733
+ onCompactionError: this.compactionErrorHandlers,
734
+ }, currentTokens);
735
+ // Success: update state and reset failure counter
736
+ setSourceHistory(compactedSource);
737
+ this.microcompaction.resetCache();
738
+ // result.tokensAfter now includes overhead (system prompt, slots,
739
+ // tool definitions) since we passed actualContextTokens to
740
+ // runCompaction. Use it directly to prevent the stale low value
741
+ // that would cause re-triggering compaction on the next call.
742
+ this._currentContextTokenCount = result.tokensAfter;
743
+ this._consecutiveLayer2Failures = 0;
744
+ for (const handler of this.compactionResultHandlers) {
745
+ try {
746
+ handler(result);
747
+ }
748
+ catch (err) {
749
+ this.logger.error('[Compaction] compactionResult handler threw', {
750
+ error: err instanceof Error ? err.message : String(err),
751
+ });
752
+ }
753
+ }
754
+ // Rebuild context from updated source. L2 just rewrote history
755
+ // wholesale, so any existing cache prefix is invalidated; treat as
756
+ // cold so L1 can trim the rebuilt history if warranted.
757
+ history = await this.microcompaction.apply(compactedSource, this._contextWindow, this._currentContextTokenCount, { cacheCold: true });
758
+ succeeded = true;
759
+ break;
760
+ }
761
+ catch (err) {
762
+ this._consecutiveLayer2Failures++;
763
+ lastLayer2Error = err instanceof Error ? err : new Error(String(err));
764
+ this.logger.warn('[Compaction] Layer2 retry failed', {
765
+ attempt,
766
+ maxRetries,
767
+ error: lastLayer2Error.message,
768
+ });
769
+ if (attempt < maxRetries) {
770
+ await new Promise(resolve => setTimeout(resolve, retryDelayMs));
771
+ }
772
+ }
773
+ }
774
+ if (!succeeded) {
775
+ layer2Failed = true;
776
+ }
777
+ }
778
+ }
779
+ // Layer 3: Emergency truncation (90% of model context window)
780
+ // Uses the MODEL's actual context window, not the budget. Emergency
781
+ // truncation should only fire when we're near the model's real limit,
782
+ // not the user's artificial budget. Layer 1/2 handle the budget.
783
+ // When observational memory is active, L3 operates on the post-slot
784
+ // history (raw messages only). The observation slot lives in the slot
785
+ // region and is naturally protected by slotCount.
786
+ {
787
+ const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
788
+ const postLayerTokens = this.estimateHistoryTokens(history);
789
+ const totalNow = slotTokens + postLayerTokens;
790
+ if (shouldTruncate(totalNow, failsafeWindow, this.config.failsafe.threshold)) {
791
+ // Force sync observation before L3 truncation to capture unobserved
792
+ // content before it is dropped. The source history from getSourceHistory
793
+ // is already post-slot, so pass 0 as slotCount.
794
+ if (this._strategy === 'observational' && this.observationalEngine && getSourceHistory) {
795
+ const sourceHistory = getSourceHistory();
796
+ await this.observationalEngine.triggerObservation(sourceHistory, 0);
797
+ }
798
+ const truncResult = emergencyTruncate(history, failsafeWindow, slotTokens, this.config.failsafe.threshold);
799
+ history = truncResult.newHistory;
800
+ // Emit degraded event if Layer 3 was used as fallback for Layer 2 failure
801
+ if (layer2Failed) {
802
+ const failures = this._consecutiveLayer2Failures;
803
+ this._consecutiveLayer2Failures = 0;
804
+ for (const handler of this.compactionDegradedHandlers) {
805
+ try {
806
+ handler({
807
+ layer2Failures: failures,
808
+ turnsDropped: truncResult.turnsRemoved,
809
+ });
810
+ }
811
+ catch (err) {
812
+ this.logger.error('[Compaction] compactionDegraded handler threw', {
813
+ error: err instanceof Error ? err.message : String(err),
814
+ });
815
+ }
816
+ }
817
+ }
818
+ }
819
+ else if (layer2Failed) {
820
+ // Layer 2 failed but Layer 3 didn't need to run. If tokens are still
821
+ // over the Layer 2 budget, emit exhausted so the consumer can act.
822
+ const postTokens = this.estimateHistoryTokens(history);
823
+ const stillOverBudget = shouldCompact(slotTokens + postTokens, this._contextWindow, effectiveThreshold);
824
+ if (stillOverBudget) {
825
+ const failures = this._consecutiveLayer2Failures;
826
+ this._consecutiveLayer2Failures = 0;
827
+ for (const handler of this.compactionExhaustedHandlers) {
828
+ try {
829
+ handler({
830
+ error: lastLayer2Error ?? new Error('Layer 2 compaction failed'),
831
+ layer2Failures: failures,
832
+ });
833
+ }
834
+ catch (err) {
835
+ this.logger.error('[Compaction] compactionExhausted handler threw', {
836
+ error: err instanceof Error ? err.message : String(err),
837
+ });
838
+ }
839
+ }
840
+ }
841
+ }
842
+ }
843
+ return setHistory(context, history);
844
+ }
845
+ // -----------------------------------------------------------------------
846
+ // End-of-tick compaction check
847
+ // -----------------------------------------------------------------------
848
+ /**
849
+ * Manually check if compaction is needed and run it.
850
+ *
851
+ * This is a convenience API for consumers who want to trigger compaction
852
+ * outside the agentic loop (e.g., for testing or manual maintenance).
853
+ * The primary compaction trigger is `applyInTransformContext`, which runs
854
+ * automatically before every LLM call.
855
+ *
856
+ * @param getHistory - Get current conversation history
857
+ * @param setHistory - Replace conversation history
858
+ * @returns CompactionResult if compaction ran, null otherwise
859
+ */
860
+ async checkAndRunCompaction(getHistory, setHistory) {
861
+ if (this._contextWindow <= 0)
862
+ return null;
863
+ const history = getHistory();
864
+ if (history.length === 0)
865
+ return null;
866
+ const estimatedTokens = this.estimateHistoryTokens(history);
867
+ // Use adaptive threshold (adjusts based on interaction recency)
868
+ const effectiveThreshold = this.getEffectiveThreshold();
869
+ // Check Layer 2 threshold
870
+ if (!shouldCompact(this._currentContextTokenCount, this._contextWindow, effectiveThreshold)) {
871
+ // Also check using heuristic estimation as fallback
872
+ if (!shouldCompact(estimatedTokens, this._contextWindow, effectiveThreshold)) {
873
+ return null;
874
+ }
875
+ }
876
+ // Attempt Layer 2 (summarization)
877
+ if (this.completeFn) {
878
+ try {
879
+ const actualTokens = Math.max(this._currentContextTokenCount, estimatedTokens);
880
+ const { newHistory, result } = await runCompaction(history, this.config.compaction, this.completeFn, {
881
+ onBeforeCompaction: this.beforeCompactionHandlers,
882
+ onPostCompaction: this.postCompactionHandlers,
883
+ onCompactionError: this.compactionErrorHandlers,
884
+ }, actualTokens);
885
+ setHistory(newHistory);
886
+ this.microcompaction.resetCache();
887
+ // result.tokensAfter includes overhead since we passed actualTokens
888
+ this._currentContextTokenCount = result.tokensAfter;
889
+ // Emit result
890
+ for (const handler of this.compactionResultHandlers) {
891
+ try {
892
+ handler(result);
893
+ }
894
+ catch {
895
+ // Swallow handler errors
896
+ }
897
+ }
898
+ return result;
899
+ }
900
+ catch {
901
+ // Layer 2 failed, fall through to Layer 3
902
+ }
903
+ }
904
+ // Layer 3 fallback: emergency truncation (uses model's actual window)
905
+ const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
906
+ const slotTokens = this._currentContextTokenCount - estimatedTokens;
907
+ if (shouldTruncate(this._currentContextTokenCount, failsafeWindow, this.config.failsafe.threshold)) {
908
+ const result = emergencyTruncate(history, failsafeWindow, Math.max(0, slotTokens), this.config.failsafe.threshold);
909
+ setHistory(result.newHistory);
910
+ this.microcompaction.resetCache();
911
+ this._currentContextTokenCount = result.tokensAfter;
912
+ }
913
+ return null;
914
+ }
915
+ // -----------------------------------------------------------------------
916
+ // Reactive overflow handling
917
+ // -----------------------------------------------------------------------
918
+ /**
919
+ * Handle a context overflow error by performing emergency truncation.
920
+ * Called when the API returns a context overflow error.
921
+ *
922
+ * @param getHistory - Get current conversation history
923
+ * @param setHistory - Replace conversation history
924
+ */
925
+ handleOverflowError(getHistory, setHistory) {
926
+ const history = getHistory();
927
+ if (history.length === 0)
928
+ return;
929
+ // API returned overflow error, so use the model's actual window
930
+ const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
931
+ const estimatedTokens = this.estimateHistoryTokens(history);
932
+ const slotTokens = Math.max(0, this._currentContextTokenCount - estimatedTokens);
933
+ const result = emergencyTruncate(history, failsafeWindow, slotTokens, this.config.failsafe.threshold);
934
+ setHistory(result.newHistory);
935
+ this.microcompaction.resetCache();
936
+ this._currentContextTokenCount = result.tokensAfter;
937
+ }
938
+ // -----------------------------------------------------------------------
939
+ // Cleanup
940
+ // -----------------------------------------------------------------------
941
+ /**
942
+ * Clear all state and handlers.
943
+ */
944
+ destroy() {
945
+ this.microcompaction.resetCache();
946
+ this.observationalEngine?.abort();
947
+ this.observationalEngine = null;
948
+ this.beforeCompactionHandlers = [];
949
+ this.postCompactionHandlers = [];
950
+ this.compactionErrorHandlers = [];
951
+ this.compactionResultHandlers = [];
952
+ this.compactionDegradedHandlers = [];
953
+ this.compactionExhaustedHandlers = [];
954
+ this.completeFn = null;
955
+ this._currentContextTokenCount = 0;
956
+ this._consecutiveLayer2Failures = 0;
957
+ this._lastInteractionTime = null;
958
+ }
959
+ // -----------------------------------------------------------------------
960
+ // Internal helpers
961
+ // -----------------------------------------------------------------------
962
+ /**
963
+ * Estimate tokens for a set of history messages.
964
+ */
965
+ estimateHistoryTokens(history) {
966
+ return estimateTokens(history.map(m => extractTextContent(m)).join('\n'));
967
+ }
968
+ /**
969
+ * Estimate total context tokens from an AgentContext object.
970
+ */
971
+ estimateContextTokens(context) {
972
+ let total = estimateTokens(context.systemPrompt);
973
+ for (const msg of context.messages) {
974
+ total += estimateTokens(extractTextContent(msg));
975
+ }
976
+ return total;
977
+ }
978
+ }
979
+ //# sourceMappingURL=index.js.map