@animus-labs/cortex 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +73 -0
- package/dist/budget-guard.d.ts +75 -0
- package/dist/budget-guard.d.ts.map +1 -0
- package/dist/budget-guard.js +142 -0
- package/dist/budget-guard.js.map +1 -0
- package/dist/compaction/compaction.d.ts +99 -0
- package/dist/compaction/compaction.d.ts.map +1 -0
- package/dist/compaction/compaction.js +302 -0
- package/dist/compaction/compaction.js.map +1 -0
- package/dist/compaction/failsafe.d.ts +57 -0
- package/dist/compaction/failsafe.d.ts.map +1 -0
- package/dist/compaction/failsafe.js +135 -0
- package/dist/compaction/failsafe.js.map +1 -0
- package/dist/compaction/index.d.ts +381 -0
- package/dist/compaction/index.d.ts.map +1 -0
- package/dist/compaction/index.js +979 -0
- package/dist/compaction/index.js.map +1 -0
- package/dist/compaction/microcompaction.d.ts +219 -0
- package/dist/compaction/microcompaction.d.ts.map +1 -0
- package/dist/compaction/microcompaction.js +536 -0
- package/dist/compaction/microcompaction.js.map +1 -0
- package/dist/compaction/observational/buffering.d.ts +225 -0
- package/dist/compaction/observational/buffering.d.ts.map +1 -0
- package/dist/compaction/observational/buffering.js +354 -0
- package/dist/compaction/observational/buffering.js.map +1 -0
- package/dist/compaction/observational/constants.d.ts +70 -0
- package/dist/compaction/observational/constants.d.ts.map +1 -0
- package/dist/compaction/observational/constants.js +507 -0
- package/dist/compaction/observational/constants.js.map +1 -0
- package/dist/compaction/observational/index.d.ts +219 -0
- package/dist/compaction/observational/index.d.ts.map +1 -0
- package/dist/compaction/observational/index.js +641 -0
- package/dist/compaction/observational/index.js.map +1 -0
- package/dist/compaction/observational/observer.d.ts +97 -0
- package/dist/compaction/observational/observer.d.ts.map +1 -0
- package/dist/compaction/observational/observer.js +424 -0
- package/dist/compaction/observational/observer.js.map +1 -0
- package/dist/compaction/observational/recall-tool.d.ts +27 -0
- package/dist/compaction/observational/recall-tool.d.ts.map +1 -0
- package/dist/compaction/observational/recall-tool.js +93 -0
- package/dist/compaction/observational/recall-tool.js.map +1 -0
- package/dist/compaction/observational/reflector.d.ts +94 -0
- package/dist/compaction/observational/reflector.d.ts.map +1 -0
- package/dist/compaction/observational/reflector.js +167 -0
- package/dist/compaction/observational/reflector.js.map +1 -0
- package/dist/compaction/observational/types.d.ts +271 -0
- package/dist/compaction/observational/types.d.ts.map +1 -0
- package/dist/compaction/observational/types.js +15 -0
- package/dist/compaction/observational/types.js.map +1 -0
- package/dist/context-manager.d.ts +134 -0
- package/dist/context-manager.d.ts.map +1 -0
- package/dist/context-manager.js +170 -0
- package/dist/context-manager.js.map +1 -0
- package/dist/cortex-agent.d.ts +1020 -0
- package/dist/cortex-agent.d.ts.map +1 -0
- package/dist/cortex-agent.js +3589 -0
- package/dist/cortex-agent.js.map +1 -0
- package/dist/error-classifier.d.ts +48 -0
- package/dist/error-classifier.d.ts.map +1 -0
- package/dist/error-classifier.js +152 -0
- package/dist/error-classifier.js.map +1 -0
- package/dist/event-bridge.d.ts +166 -0
- package/dist/event-bridge.d.ts.map +1 -0
- package/dist/event-bridge.js +381 -0
- package/dist/event-bridge.js.map +1 -0
- package/dist/index.d.ts +55 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +57 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp-client.d.ts +119 -0
- package/dist/mcp-client.d.ts.map +1 -0
- package/dist/mcp-client.js +474 -0
- package/dist/mcp-client.js.map +1 -0
- package/dist/model-wrapper.d.ts +58 -0
- package/dist/model-wrapper.d.ts.map +1 -0
- package/dist/model-wrapper.js +86 -0
- package/dist/model-wrapper.js.map +1 -0
- package/dist/noop-logger.d.ts +4 -0
- package/dist/noop-logger.d.ts.map +1 -0
- package/dist/noop-logger.js +8 -0
- package/dist/noop-logger.js.map +1 -0
- package/dist/prompt-diagnostics.d.ts +47 -0
- package/dist/prompt-diagnostics.d.ts.map +1 -0
- package/dist/prompt-diagnostics.js +230 -0
- package/dist/prompt-diagnostics.js.map +1 -0
- package/dist/provider-manager.d.ts +224 -0
- package/dist/provider-manager.d.ts.map +1 -0
- package/dist/provider-manager.js +563 -0
- package/dist/provider-manager.js.map +1 -0
- package/dist/provider-registry.d.ts +115 -0
- package/dist/provider-registry.d.ts.map +1 -0
- package/dist/provider-registry.js +305 -0
- package/dist/provider-registry.js.map +1 -0
- package/dist/schema-converter.d.ts +20 -0
- package/dist/schema-converter.d.ts.map +1 -0
- package/dist/schema-converter.js +48 -0
- package/dist/schema-converter.js.map +1 -0
- package/dist/skill-preprocessor.d.ts +46 -0
- package/dist/skill-preprocessor.d.ts.map +1 -0
- package/dist/skill-preprocessor.js +237 -0
- package/dist/skill-preprocessor.js.map +1 -0
- package/dist/skill-registry.d.ts +107 -0
- package/dist/skill-registry.d.ts.map +1 -0
- package/dist/skill-registry.js +330 -0
- package/dist/skill-registry.js.map +1 -0
- package/dist/skill-tool.d.ts +54 -0
- package/dist/skill-tool.d.ts.map +1 -0
- package/dist/skill-tool.js +88 -0
- package/dist/skill-tool.js.map +1 -0
- package/dist/sub-agent-manager.d.ts +90 -0
- package/dist/sub-agent-manager.d.ts.map +1 -0
- package/dist/sub-agent-manager.js +192 -0
- package/dist/sub-agent-manager.js.map +1 -0
- package/dist/token-estimator.d.ts +23 -0
- package/dist/token-estimator.d.ts.map +1 -0
- package/dist/token-estimator.js +27 -0
- package/dist/token-estimator.js.map +1 -0
- package/dist/tool-contract.d.ts +68 -0
- package/dist/tool-contract.d.ts.map +1 -0
- package/dist/tool-contract.js +35 -0
- package/dist/tool-contract.js.map +1 -0
- package/dist/tool-result-persistence.d.ts +89 -0
- package/dist/tool-result-persistence.d.ts.map +1 -0
- package/dist/tool-result-persistence.js +152 -0
- package/dist/tool-result-persistence.js.map +1 -0
- package/dist/tools/bash/index.d.ts +71 -0
- package/dist/tools/bash/index.d.ts.map +1 -0
- package/dist/tools/bash/index.js +485 -0
- package/dist/tools/bash/index.js.map +1 -0
- package/dist/tools/bash/interactive.d.ts +47 -0
- package/dist/tools/bash/interactive.d.ts.map +1 -0
- package/dist/tools/bash/interactive.js +262 -0
- package/dist/tools/bash/interactive.js.map +1 -0
- package/dist/tools/bash/safety.d.ts +149 -0
- package/dist/tools/bash/safety.d.ts.map +1 -0
- package/dist/tools/bash/safety.js +1116 -0
- package/dist/tools/bash/safety.js.map +1 -0
- package/dist/tools/edit.d.ts +57 -0
- package/dist/tools/edit.d.ts.map +1 -0
- package/dist/tools/edit.js +310 -0
- package/dist/tools/edit.js.map +1 -0
- package/dist/tools/glob.d.ts +34 -0
- package/dist/tools/glob.d.ts.map +1 -0
- package/dist/tools/glob.js +268 -0
- package/dist/tools/glob.js.map +1 -0
- package/dist/tools/grep.d.ts +53 -0
- package/dist/tools/grep.d.ts.map +1 -0
- package/dist/tools/grep.js +673 -0
- package/dist/tools/grep.js.map +1 -0
- package/dist/tools/index.d.ts +62 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +52 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/read.d.ts +43 -0
- package/dist/tools/read.d.ts.map +1 -0
- package/dist/tools/read.js +459 -0
- package/dist/tools/read.js.map +1 -0
- package/dist/tools/runtime.d.ts +62 -0
- package/dist/tools/runtime.d.ts.map +1 -0
- package/dist/tools/runtime.js +116 -0
- package/dist/tools/runtime.js.map +1 -0
- package/dist/tools/shared/cwd-tracker.d.ts +32 -0
- package/dist/tools/shared/cwd-tracker.d.ts.map +1 -0
- package/dist/tools/shared/cwd-tracker.js +44 -0
- package/dist/tools/shared/cwd-tracker.js.map +1 -0
- package/dist/tools/shared/edit-history.d.ts +55 -0
- package/dist/tools/shared/edit-history.d.ts.map +1 -0
- package/dist/tools/shared/edit-history.js +72 -0
- package/dist/tools/shared/edit-history.js.map +1 -0
- package/dist/tools/shared/edit-matcher.d.ts +83 -0
- package/dist/tools/shared/edit-matcher.d.ts.map +1 -0
- package/dist/tools/shared/edit-matcher.js +359 -0
- package/dist/tools/shared/edit-matcher.js.map +1 -0
- package/dist/tools/shared/file-mutation-lock.d.ts +22 -0
- package/dist/tools/shared/file-mutation-lock.d.ts.map +1 -0
- package/dist/tools/shared/file-mutation-lock.js +35 -0
- package/dist/tools/shared/file-mutation-lock.js.map +1 -0
- package/dist/tools/shared/gitignore.d.ts +17 -0
- package/dist/tools/shared/gitignore.d.ts.map +1 -0
- package/dist/tools/shared/gitignore.js +59 -0
- package/dist/tools/shared/gitignore.js.map +1 -0
- package/dist/tools/shared/pdf-extractor.d.ts +96 -0
- package/dist/tools/shared/pdf-extractor.d.ts.map +1 -0
- package/dist/tools/shared/pdf-extractor.js +196 -0
- package/dist/tools/shared/pdf-extractor.js.map +1 -0
- package/dist/tools/shared/read-registry.d.ts +66 -0
- package/dist/tools/shared/read-registry.d.ts.map +1 -0
- package/dist/tools/shared/read-registry.js +65 -0
- package/dist/tools/shared/read-registry.js.map +1 -0
- package/dist/tools/shared/safe-env.d.ts +18 -0
- package/dist/tools/shared/safe-env.d.ts.map +1 -0
- package/dist/tools/shared/safe-env.js +70 -0
- package/dist/tools/shared/safe-env.js.map +1 -0
- package/dist/tools/sub-agent.d.ts +91 -0
- package/dist/tools/sub-agent.d.ts.map +1 -0
- package/dist/tools/sub-agent.js +89 -0
- package/dist/tools/sub-agent.js.map +1 -0
- package/dist/tools/task-output.d.ts +38 -0
- package/dist/tools/task-output.d.ts.map +1 -0
- package/dist/tools/task-output.js +186 -0
- package/dist/tools/task-output.js.map +1 -0
- package/dist/tools/tool-search/index.d.ts +40 -0
- package/dist/tools/tool-search/index.d.ts.map +1 -0
- package/dist/tools/tool-search/index.js +110 -0
- package/dist/tools/tool-search/index.js.map +1 -0
- package/dist/tools/tool-search/registry.d.ts +82 -0
- package/dist/tools/tool-search/registry.d.ts.map +1 -0
- package/dist/tools/tool-search/registry.js +238 -0
- package/dist/tools/tool-search/registry.js.map +1 -0
- package/dist/tools/undo-edit.d.ts +51 -0
- package/dist/tools/undo-edit.d.ts.map +1 -0
- package/dist/tools/undo-edit.js +231 -0
- package/dist/tools/undo-edit.js.map +1 -0
- package/dist/tools/web-fetch/cache.d.ts +49 -0
- package/dist/tools/web-fetch/cache.d.ts.map +1 -0
- package/dist/tools/web-fetch/cache.js +89 -0
- package/dist/tools/web-fetch/cache.js.map +1 -0
- package/dist/tools/web-fetch/index.d.ts +53 -0
- package/dist/tools/web-fetch/index.d.ts.map +1 -0
- package/dist/tools/web-fetch/index.js +513 -0
- package/dist/tools/web-fetch/index.js.map +1 -0
- package/dist/tools/write.d.ts +59 -0
- package/dist/tools/write.d.ts.map +1 -0
- package/dist/tools/write.js +316 -0
- package/dist/tools/write.js.map +1 -0
- package/dist/types.d.ts +881 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +16 -0
- package/dist/types.js.map +1 -0
- package/dist/working-tags.d.ts +44 -0
- package/dist/working-tags.d.ts.map +1 -0
- package/dist/working-tags.js +103 -0
- package/dist/working-tags.js.map +1 -0
- package/package.json +87 -0
- package/src/budget-guard.ts +170 -0
- package/src/compaction/compaction.ts +386 -0
- package/src/compaction/failsafe.ts +185 -0
- package/src/compaction/index.ts +1199 -0
- package/src/compaction/microcompaction.ts +709 -0
- package/src/compaction/observational/buffering.ts +430 -0
- package/src/compaction/observational/constants.ts +532 -0
- package/src/compaction/observational/index.ts +837 -0
- package/src/compaction/observational/observer.ts +510 -0
- package/src/compaction/observational/recall-tool.ts +130 -0
- package/src/compaction/observational/reflector.ts +221 -0
- package/src/compaction/observational/types.ts +343 -0
- package/src/context-manager.ts +237 -0
- package/src/cortex-agent.ts +4297 -0
- package/src/error-classifier.ts +199 -0
- package/src/event-bridge.ts +508 -0
- package/src/index.ts +292 -0
- package/src/mcp-client.ts +582 -0
- package/src/model-wrapper.ts +128 -0
- package/src/noop-logger.ts +9 -0
- package/src/prompt-diagnostics.ts +296 -0
- package/src/provider-manager.ts +823 -0
- package/src/provider-registry.ts +386 -0
- package/src/schema-converter.ts +51 -0
- package/src/skill-preprocessor.ts +314 -0
- package/src/skill-registry.ts +378 -0
- package/src/skill-tool.ts +130 -0
- package/src/sub-agent-manager.ts +236 -0
- package/src/token-estimator.ts +26 -0
- package/src/tool-contract.ts +113 -0
- package/src/tool-result-persistence.ts +197 -0
- package/src/tools/bash/index.ts +633 -0
- package/src/tools/bash/interactive.ts +302 -0
- package/src/tools/bash/safety.ts +1297 -0
- package/src/tools/edit.ts +422 -0
- package/src/tools/glob.ts +330 -0
- package/src/tools/grep.ts +819 -0
- package/src/tools/index.ts +110 -0
- package/src/tools/read.ts +580 -0
- package/src/tools/runtime.ts +173 -0
- package/src/tools/shared/cwd-tracker.ts +50 -0
- package/src/tools/shared/edit-history.ts +96 -0
- package/src/tools/shared/edit-matcher.ts +457 -0
- package/src/tools/shared/file-mutation-lock.ts +40 -0
- package/src/tools/shared/gitignore.ts +61 -0
- package/src/tools/shared/pdf-extractor.ts +290 -0
- package/src/tools/shared/read-registry.ts +93 -0
- package/src/tools/shared/safe-env.ts +82 -0
- package/src/tools/sub-agent.ts +171 -0
- package/src/tools/task-output.ts +236 -0
- package/src/tools/tool-search/index.ts +167 -0
- package/src/tools/tool-search/registry.ts +278 -0
- package/src/tools/undo-edit.ts +314 -0
- package/src/tools/web-fetch/cache.ts +112 -0
- package/src/tools/web-fetch/index.ts +604 -0
- package/src/tools/write.ts +385 -0
- package/src/types.ts +1057 -0
- package/src/working-tags.ts +118 -0
|
@@ -0,0 +1,1199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compaction composition: wires all three layers into the transformContext chain.
|
|
3
|
+
*
|
|
4
|
+
* Layer 1 (Microcompaction): tool result trimming at threshold crossings
|
|
5
|
+
* Layer 2 (Compaction): conversation summarization via LLM
|
|
6
|
+
* Layer 3 (Failsafe): emergency truncation, purely mechanical
|
|
7
|
+
*
|
|
8
|
+
* All three layers run inside transformContext, which fires before every LLM
|
|
9
|
+
* call. Compaction is fully self-contained within Cortex; no external calls
|
|
10
|
+
* from the backend are needed to trigger it. Layer 2 fires when token usage
|
|
11
|
+
* exceeds 70% of the context window and a completeFn + source accessors are
|
|
12
|
+
* provided. Layer 3 fires whenever tokens exceed 90% of the model's context
|
|
13
|
+
* window.
|
|
14
|
+
*
|
|
15
|
+
* References:
|
|
16
|
+
* - compaction-strategy.md
|
|
17
|
+
* - phase-5-compaction.md (5.5)
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import type { AgentMessage, AgentContext } from '../context-manager.js';
|
|
21
|
+
import type {
|
|
22
|
+
CortexLogger,
|
|
23
|
+
CortexCompactionConfig,
|
|
24
|
+
AdaptiveThresholdConfig,
|
|
25
|
+
CompactionResult,
|
|
26
|
+
CompactionTarget,
|
|
27
|
+
CompactionDegradedInfo,
|
|
28
|
+
CompactionExhaustedInfo,
|
|
29
|
+
} from '../types.js';
|
|
30
|
+
import { NOOP_LOGGER } from '../noop-logger.js';
|
|
31
|
+
import { estimateTokens } from '../token-estimator.js';
|
|
32
|
+
import { MicrocompactionEngine, MICROCOMPACTION_DEFAULTS, extractTextContent, isToolResultMessage, capToolResult, extractToolName, getToolCategory, applyBookend } from './microcompaction.js';
|
|
33
|
+
import {
|
|
34
|
+
runCompaction,
|
|
35
|
+
shouldCompact,
|
|
36
|
+
COMPACTION_DEFAULTS,
|
|
37
|
+
} from './compaction.js';
|
|
38
|
+
import type { CompleteFn, BeforeCompactionHandler, PostCompactionHandler, CompactionErrorHandler } from './compaction.js';
|
|
39
|
+
import {
|
|
40
|
+
emergencyTruncate,
|
|
41
|
+
shouldTruncate,
|
|
42
|
+
FAILSAFE_DEFAULTS,
|
|
43
|
+
} from './failsafe.js';
|
|
44
|
+
import { ObservationalMemoryEngine } from './observational/index.js';
|
|
45
|
+
import type { ObservationalMemoryConfig, ObservationalMemoryState, ObservationEvent, ReflectionEvent } from './observational/types.js';
|
|
46
|
+
import { PROVIDER_CACHE_CONFIG, type CacheRetention } from '../provider-registry.js';
|
|
47
|
+
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
// Re-exports for consumer convenience
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
export { MicrocompactionEngine, capToolResult } from './microcompaction.js';
|
|
53
|
+
export type { TrimAction, TrimState } from './microcompaction.js';
|
|
54
|
+
export { runCompaction, shouldCompact, partitionHistory, buildSummaryMessage } from './compaction.js';
|
|
55
|
+
export type { CompleteFn } from './compaction.js';
|
|
56
|
+
export { emergencyTruncate, shouldTruncate, isContextOverflow } from './failsafe.js';
|
|
57
|
+
export type { FailsafeTruncationResult } from './failsafe.js';
|
|
58
|
+
export { ObservationalMemoryEngine } from './observational/index.js';
|
|
59
|
+
export type { ObservationalMemoryConfig, ObservationalMemoryState, ObservationChunk, ObservationEvent, ReflectionEvent, RecallResult, RecallConfig } from './observational/types.js';
|
|
60
|
+
export { createRecallTool } from './observational/recall-tool.js';
|
|
61
|
+
// computeAdaptiveThreshold is defined below in this file and exported at the declaration site
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Default config
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
export const ADAPTIVE_DEFAULTS: AdaptiveThresholdConfig = {
|
|
68
|
+
enabled: true,
|
|
69
|
+
recentWindowMs: 300_000, // 5 minutes
|
|
70
|
+
idleWindowMs: 1_800_000, // 30 minutes
|
|
71
|
+
recentReduction: 0.0, // no change when recent
|
|
72
|
+
moderateReduction: 0.10, // lower threshold by 0.10 when moderately idle
|
|
73
|
+
idleReduction: 0.20, // lower threshold by 0.20 when fully idle
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
export const DEFAULT_COMPACTION_CONFIG: CortexCompactionConfig = {
|
|
77
|
+
microcompaction: MICROCOMPACTION_DEFAULTS,
|
|
78
|
+
compaction: COMPACTION_DEFAULTS,
|
|
79
|
+
failsafe: FAILSAFE_DEFAULTS,
|
|
80
|
+
adaptive: ADAPTIVE_DEFAULTS,
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Build a full compaction config from partial overrides.
|
|
85
|
+
*/
|
|
86
|
+
export function buildCompactionConfig(
|
|
87
|
+
partial?: Partial<CortexCompactionConfig>,
|
|
88
|
+
): CortexCompactionConfig {
|
|
89
|
+
if (!partial) return DEFAULT_COMPACTION_CONFIG;
|
|
90
|
+
|
|
91
|
+
const config: CortexCompactionConfig = {
|
|
92
|
+
microcompaction: {
|
|
93
|
+
...MICROCOMPACTION_DEFAULTS,
|
|
94
|
+
...partial.microcompaction,
|
|
95
|
+
},
|
|
96
|
+
compaction: {
|
|
97
|
+
...COMPACTION_DEFAULTS,
|
|
98
|
+
...partial.compaction,
|
|
99
|
+
},
|
|
100
|
+
failsafe: {
|
|
101
|
+
...FAILSAFE_DEFAULTS,
|
|
102
|
+
...partial.failsafe,
|
|
103
|
+
},
|
|
104
|
+
adaptive: {
|
|
105
|
+
...ADAPTIVE_DEFAULTS,
|
|
106
|
+
...partial.adaptive,
|
|
107
|
+
},
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
if (partial.strategy !== undefined) {
|
|
111
|
+
config.strategy = partial.strategy;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (partial.observational !== undefined) {
|
|
115
|
+
config.observational = partial.observational;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return config;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
// Adaptive threshold calculation
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Compute the effective Layer 2 compaction threshold adjusted by interaction
|
|
127
|
+
* recency. When the user has not interacted recently, the threshold is lowered
|
|
128
|
+
* (i.e., compaction fires sooner), reducing token costs for idle sessions.
|
|
129
|
+
*
|
|
130
|
+
* @param baseThreshold - The configured Layer 2 threshold (e.g., 0.70)
|
|
131
|
+
* @param adaptiveConfig - Adaptive threshold configuration
|
|
132
|
+
* @param lastInteractionTime - Timestamp (ms) of the last user interaction, or null if never
|
|
133
|
+
* @param now - Current timestamp (ms), injectable for testing
|
|
134
|
+
* @returns The adjusted threshold (always >= 0)
|
|
135
|
+
*/
|
|
136
|
+
export function computeAdaptiveThreshold(
|
|
137
|
+
baseThreshold: number,
|
|
138
|
+
adaptiveConfig: AdaptiveThresholdConfig,
|
|
139
|
+
lastInteractionTime: number | null,
|
|
140
|
+
now: number = Date.now(),
|
|
141
|
+
): number {
|
|
142
|
+
if (!adaptiveConfig.enabled) {
|
|
143
|
+
return baseThreshold;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// No interaction recorded yet: treat as fully idle
|
|
147
|
+
if (lastInteractionTime === null) {
|
|
148
|
+
return Math.max(0, baseThreshold - adaptiveConfig.idleReduction);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const elapsed = now - lastInteractionTime;
|
|
152
|
+
|
|
153
|
+
if (elapsed < adaptiveConfig.recentWindowMs) {
|
|
154
|
+
// Recent interaction: apply recentReduction (default 0, no change)
|
|
155
|
+
return Math.max(0, baseThreshold - adaptiveConfig.recentReduction);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (elapsed < adaptiveConfig.idleWindowMs) {
|
|
159
|
+
// Moderate idle: apply moderateReduction
|
|
160
|
+
return Math.max(0, baseThreshold - adaptiveConfig.moderateReduction);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Fully idle: apply idleReduction
|
|
164
|
+
return Math.max(0, baseThreshold - adaptiveConfig.idleReduction);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
// CompactionManager
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* CompactionManager orchestrates all three compaction layers.
|
|
173
|
+
*
|
|
174
|
+
* It is stateful: it tracks the current token count and the microcompaction
|
|
175
|
+
* cache. The CortexAgent creates one instance and delegates all compaction
|
|
176
|
+
* decisions to it. Compaction is fully autonomous: all three layers run
|
|
177
|
+
* inside applyInTransformContext(), which fires before every LLM call.
|
|
178
|
+
*/
|
|
179
|
+
export class CompactionManager {
|
|
180
|
+
private readonly config: CortexCompactionConfig;
|
|
181
|
+
private readonly microcompaction: MicrocompactionEngine;
|
|
182
|
+
private readonly slotCount: number;
|
|
183
|
+
private readonly _strategy: 'observational' | 'classic';
|
|
184
|
+
private observationalEngine: ObservationalMemoryEngine | null = null;
|
|
185
|
+
|
|
186
|
+
/** Post-hoc current-context token count, updated after each parent LLM call. */
|
|
187
|
+
private _currentContextTokenCount = 0;
|
|
188
|
+
|
|
189
|
+
/** Context budget for Layer 1/2 compaction decisions (may be artificially limited). */
|
|
190
|
+
private _contextWindow = 0;
|
|
191
|
+
|
|
192
|
+
/** Actual model context window for Layer 3 failsafe (never artificially limited). */
|
|
193
|
+
private _modelContextWindow = 0;
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Timestamp (ms) of the last user interaction. Used by the adaptive
|
|
197
|
+
* threshold system to decide how aggressively to compact. Updated by
|
|
198
|
+
* the consumer (backend) when a message-triggered tick fires.
|
|
199
|
+
* Null means no interaction has been recorded yet.
|
|
200
|
+
*/
|
|
201
|
+
private _lastInteractionTime: number | null = null;
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Timestamp (ms) of the last LLM call. Used by L1 to decide whether the
|
|
205
|
+
* prompt cache has gone cold. Updated automatically in
|
|
206
|
+
* updateCurrentContextTokenCount() (which fires after every LLM response).
|
|
207
|
+
* Null means no LLM call has been recorded yet (treated as cold).
|
|
208
|
+
*/
|
|
209
|
+
private _lastLlmCallTimestamp: number | null = null;
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Effective cache TTL (ms) for the current provider + cache retention.
|
|
213
|
+
* Zero means caching is unsupported or disabled, in which case L1 treats
|
|
214
|
+
* the cache as perpetually cold (trim freely). Set via setCacheInfo().
|
|
215
|
+
*/
|
|
216
|
+
private _providerCacheTtlMs = 0;
|
|
217
|
+
|
|
218
|
+
/** Consumer handlers for compaction lifecycle events. */
|
|
219
|
+
private beforeCompactionHandlers: BeforeCompactionHandler[] = [];
|
|
220
|
+
private postCompactionHandlers: PostCompactionHandler[] = [];
|
|
221
|
+
private compactionErrorHandlers: CompactionErrorHandler[] = [];
|
|
222
|
+
private compactionResultHandlers: Array<(result: CompactionResult) => void> = [];
|
|
223
|
+
private compactionDegradedHandlers: Array<(info: CompactionDegradedInfo) => void> = [];
|
|
224
|
+
private compactionExhaustedHandlers: Array<(info: CompactionExhaustedInfo) => void> = [];
|
|
225
|
+
|
|
226
|
+
/** Consecutive Layer 2 failure count for circuit breaker. Reset on success. */
|
|
227
|
+
private _consecutiveLayer2Failures = 0;
|
|
228
|
+
|
|
229
|
+
/** LLM completion function, set by CortexAgent. */
|
|
230
|
+
private completeFn: CompleteFn | null = null;
|
|
231
|
+
|
|
232
|
+
/** Logger for compaction diagnostics. */
|
|
233
|
+
private logger: CortexLogger = NOOP_LOGGER;
|
|
234
|
+
|
|
235
|
+
constructor(
|
|
236
|
+
config: CortexCompactionConfig,
|
|
237
|
+
slotCount: number,
|
|
238
|
+
) {
|
|
239
|
+
this.config = config;
|
|
240
|
+
this.slotCount = slotCount;
|
|
241
|
+
this.microcompaction = new MicrocompactionEngine(config.microcompaction);
|
|
242
|
+
this._strategy = config.strategy ?? 'observational';
|
|
243
|
+
|
|
244
|
+
if (this._strategy === 'observational') {
|
|
245
|
+
this.observationalEngine = new ObservationalMemoryEngine(
|
|
246
|
+
config.observational ?? {},
|
|
247
|
+
slotCount - 1,
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// -----------------------------------------------------------------------
|
|
253
|
+
// Configuration
|
|
254
|
+
// -----------------------------------------------------------------------
|
|
255
|
+
|
|
256
|
+
/** Get the compaction strategy. */
|
|
257
|
+
get strategy(): 'observational' | 'classic' { return this._strategy; }
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Set the context budget (the effective limit for Layer 1/2 compaction).
|
|
261
|
+
* This may be smaller than the model's actual context window when a
|
|
262
|
+
* user-configured limit is applied.
|
|
263
|
+
*/
|
|
264
|
+
setContextWindow(contextWindow: number): void {
|
|
265
|
+
this._contextWindow = contextWindow;
|
|
266
|
+
this.observationalEngine?.setContextWindow(contextWindow);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Set the model's actual context window (for Layer 3 failsafe only).
|
|
271
|
+
* Layer 3 emergency truncation uses this to avoid dropping messages
|
|
272
|
+
* when the model still has capacity, even if the user-configured
|
|
273
|
+
* budget has been exceeded.
|
|
274
|
+
*
|
|
275
|
+
* Also used as a proxy for the utility model context window until the
|
|
276
|
+
* actual utility model window is set via setUtilityModelContextWindow().
|
|
277
|
+
*/
|
|
278
|
+
setModelContextWindow(modelContextWindow: number): void {
|
|
279
|
+
this._modelContextWindow = modelContextWindow;
|
|
280
|
+
this.observationalEngine?.setUtilityModelContextWindow(modelContextWindow);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Set the LLM completion function for Layer 2 summarization.
|
|
285
|
+
*/
|
|
286
|
+
setCompleteFn(fn: CompleteFn): void {
|
|
287
|
+
this.completeFn = fn;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* Set the LLM completion function for observational memory (utility model).
|
|
292
|
+
*/
|
|
293
|
+
setObservationalCompleteFn(fn: CompleteFn): void {
|
|
294
|
+
this.observationalEngine?.setCompleteFn(fn);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Update the utility model context window for observer/reflector clamps.
|
|
299
|
+
*/
|
|
300
|
+
setUtilityModelContextWindow(utilityModelContextWindow: number): void {
|
|
301
|
+
this.observationalEngine?.setUtilityModelContextWindow(utilityModelContextWindow);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Set a logger for compaction diagnostics.
|
|
306
|
+
*/
|
|
307
|
+
setLogger(logger: CortexLogger): void {
|
|
308
|
+
this.logger = logger;
|
|
309
|
+
this.observationalEngine?.setLogger(logger);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Signal when the user last interacted with the system.
|
|
314
|
+
* The consumer (backend) calls this during GATHER when a message-triggered
|
|
315
|
+
* tick fires. For interval ticks, it is not called, so the timestamp
|
|
316
|
+
* naturally ages.
|
|
317
|
+
*/
|
|
318
|
+
setLastInteractionTime(timestamp: number): void {
|
|
319
|
+
this._lastInteractionTime = timestamp;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Get the timestamp of the last user interaction, or null if none recorded.
|
|
324
|
+
*/
|
|
325
|
+
get lastInteractionTime(): number | null {
|
|
326
|
+
return this._lastInteractionTime;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Set the active provider and cache retention. Resolves the effective
|
|
331
|
+
* cache TTL from PROVIDER_CACHE_CONFIG and stores it for L1's cache-aware
|
|
332
|
+
* gating. Called by CortexAgent at construction, on provider changes, and
|
|
333
|
+
* on cache retention changes.
|
|
334
|
+
*
|
|
335
|
+
* @param provider - The active provider name (e.g., "anthropic", "openai")
|
|
336
|
+
* @param cacheRetention - The configured cache retention ('none' | 'short' | 'long')
|
|
337
|
+
*/
|
|
338
|
+
setCacheInfo(provider: string, cacheRetention: CacheRetention): void {
|
|
339
|
+
const cfg = PROVIDER_CACHE_CONFIG[provider];
|
|
340
|
+
if (!cfg || !cfg.supported || cacheRetention === 'none') {
|
|
341
|
+
this._providerCacheTtlMs = 0;
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
this._providerCacheTtlMs = cacheRetention === 'long' ? cfg.longTtlMs : cfg.shortTtlMs;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Check whether the prompt cache has gone cold (or is unused).
|
|
349
|
+
*
|
|
350
|
+
* Returns true when:
|
|
351
|
+
* - Caching is unsupported / disabled (TTL <= 0), OR
|
|
352
|
+
* - No LLM call has been recorded yet, OR
|
|
353
|
+
* - The elapsed time since the last LLM call >= the cache TTL.
|
|
354
|
+
*
|
|
355
|
+
* @param now - Current timestamp (ms), injectable for testing
|
|
356
|
+
*/
|
|
357
|
+
isCacheCold(now: number = Date.now()): boolean {
|
|
358
|
+
if (this._providerCacheTtlMs <= 0) return true;
|
|
359
|
+
if (this._lastLlmCallTimestamp === null) return true;
|
|
360
|
+
return (now - this._lastLlmCallTimestamp) >= this._providerCacheTtlMs;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Get the effective cache TTL (ms) for the current provider + retention.
|
|
365
|
+
* Zero means caching is unsupported or disabled.
|
|
366
|
+
*/
|
|
367
|
+
get providerCacheTtlMs(): number {
|
|
368
|
+
return this._providerCacheTtlMs;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Get the timestamp of the last LLM call, or null if none recorded.
|
|
373
|
+
*/
|
|
374
|
+
get lastLlmCallTimestamp(): number | null {
|
|
375
|
+
return this._lastLlmCallTimestamp;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Compute the effective Layer 2 compaction threshold, adjusted for
|
|
380
|
+
* interaction recency when adaptive thresholds are enabled.
|
|
381
|
+
*
|
|
382
|
+
* @param now - Current timestamp (ms), injectable for testing
|
|
383
|
+
*/
|
|
384
|
+
getEffectiveThreshold(now?: number): number {
|
|
385
|
+
return computeAdaptiveThreshold(
|
|
386
|
+
this.config.compaction.threshold,
|
|
387
|
+
this.config.adaptive,
|
|
388
|
+
this._lastInteractionTime,
|
|
389
|
+
now,
|
|
390
|
+
);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// -----------------------------------------------------------------------
|
|
394
|
+
// Token Tracking
|
|
395
|
+
// -----------------------------------------------------------------------
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* Update the post-hoc current-context token count from LLM usage data.
|
|
399
|
+
*/
|
|
400
|
+
updateCurrentContextTokenCount(inputTokens: number): void {
|
|
401
|
+
const prev = this._currentContextTokenCount;
|
|
402
|
+
this._currentContextTokenCount = inputTokens;
|
|
403
|
+
// Track the LLM call timestamp so L1 can decide whether the prompt cache
|
|
404
|
+
// is still warm. updateCurrentContextTokenCount() is called after every
|
|
405
|
+
// parent LLM call, so this is the natural point to record it.
|
|
406
|
+
this._lastLlmCallTimestamp = Date.now();
|
|
407
|
+
this.logger.debug('[Compaction] updateCurrentContextTokenCount', { prev, inputTokens });
|
|
408
|
+
// Log significant drops to help diagnose token count display issues
|
|
409
|
+
if (prev > 0 && inputTokens < prev * 0.5) {
|
|
410
|
+
this.logger.warn('[Compaction] currentContextTokenCount dropped >50%', {
|
|
411
|
+
prev,
|
|
412
|
+
inputTokens,
|
|
413
|
+
drop: `${((1 - inputTokens / prev) * 100).toFixed(1)}%`,
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Get the post-hoc current-context token count from the most recent parent turn.
|
|
420
|
+
*/
|
|
421
|
+
get currentContextTokenCount(): number {
|
|
422
|
+
return this._currentContextTokenCount;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Get the context budget (effective limit for Layer 1/2).
|
|
427
|
+
*/
|
|
428
|
+
get contextWindow(): number {
|
|
429
|
+
return this._contextWindow;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Get the model's actual context window (for Layer 3 failsafe).
|
|
434
|
+
*/
|
|
435
|
+
get modelContextWindow(): number {
|
|
436
|
+
return this._modelContextWindow;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Get the current context usage ratio.
|
|
441
|
+
*/
|
|
442
|
+
get usageRatio(): number {
|
|
443
|
+
if (this._contextWindow <= 0) return 0;
|
|
444
|
+
return this._currentContextTokenCount / this._contextWindow;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Estimate current context tokens from a transformed AgentContext snapshot.
|
|
449
|
+
*
|
|
450
|
+
* Returns the larger of:
|
|
451
|
+
* - the heuristic estimate of the provided context snapshot
|
|
452
|
+
* - the post-hoc token count from the most recent parent turn
|
|
453
|
+
*
|
|
454
|
+
* This mirrors the compaction decision logic so consumers can reason about
|
|
455
|
+
* context pressure using the same semantics Cortex uses internally.
|
|
456
|
+
*/
|
|
457
|
+
estimateCurrentContextTokens(context: AgentContext): number {
|
|
458
|
+
const estimated = this.estimateContextTokens(context);
|
|
459
|
+
return this._currentContextTokenCount > 0
|
|
460
|
+
? Math.max(this._currentContextTokenCount, estimated)
|
|
461
|
+
: estimated;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// -----------------------------------------------------------------------
|
|
465
|
+
// Event Handlers
|
|
466
|
+
// -----------------------------------------------------------------------
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Register a handler called before compaction starts (awaited).
|
|
470
|
+
*/
|
|
471
|
+
onBeforeCompaction(handler: BeforeCompactionHandler): void {
|
|
472
|
+
this.beforeCompactionHandlers.push(handler);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Register a handler called after compaction completes.
|
|
477
|
+
*/
|
|
478
|
+
onPostCompaction(handler: PostCompactionHandler): void {
|
|
479
|
+
this.postCompactionHandlers.push(handler);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Register a handler called if compaction fails.
|
|
484
|
+
*/
|
|
485
|
+
onCompactionError(handler: CompactionErrorHandler): void {
|
|
486
|
+
this.compactionErrorHandlers.push(handler);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Register a handler that receives the CompactionResult (for CortexAgent event emission).
|
|
491
|
+
*/
|
|
492
|
+
onCompactionResult(handler: (result: CompactionResult) => void): void {
|
|
493
|
+
this.compactionResultHandlers.push(handler);
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Register a handler called when Layer 2 failed and Layer 3 was used as fallback.
|
|
498
|
+
*/
|
|
499
|
+
onCompactionDegraded(handler: (info: CompactionDegradedInfo) => void): void {
|
|
500
|
+
this.compactionDegradedHandlers.push(handler);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Register a handler called when all compaction layers have failed.
|
|
505
|
+
*/
|
|
506
|
+
onCompactionExhausted(handler: (info: CompactionExhaustedInfo) => void): void {
|
|
507
|
+
this.compactionExhaustedHandlers.push(handler);
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// -----------------------------------------------------------------------
|
|
511
|
+
// Observational Memory
|
|
512
|
+
// -----------------------------------------------------------------------
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Called at turn_end to trigger async buffer checks.
|
|
516
|
+
*/
|
|
517
|
+
onTurnEnd(totalTokens: number, contextWindow: number, messages: AgentMessage[], slotCount: number): void {
|
|
518
|
+
this.observationalEngine?.onTurnEnd(totalTokens, contextWindow, messages, slotCount);
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Register observation event handler.
|
|
523
|
+
*/
|
|
524
|
+
onObservation(handler: (event: ObservationEvent) => void): void {
|
|
525
|
+
this.observationalEngine?.onObservation(handler);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/**
|
|
529
|
+
* Register reflection event handler.
|
|
530
|
+
*/
|
|
531
|
+
onReflection(handler: (event: ReflectionEvent) => void): void {
|
|
532
|
+
this.observationalEngine?.onReflection(handler);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
/**
|
|
536
|
+
* Get observational memory state for persistence.
|
|
537
|
+
*/
|
|
538
|
+
getObservationalMemoryState(): ObservationalMemoryState | null {
|
|
539
|
+
return this.observationalEngine?.getState() ?? null;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* Restore observational memory state from a previous session.
|
|
544
|
+
*/
|
|
545
|
+
restoreObservationalMemoryState(state: ObservationalMemoryState): void {
|
|
546
|
+
this.observationalEngine?.restoreState(state);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
/**
|
|
550
|
+
* Force a synchronous observation cycle.
|
|
551
|
+
*/
|
|
552
|
+
async triggerObservation(messages: AgentMessage[], slotCount: number): Promise<void> {
|
|
553
|
+
await this.observationalEngine?.triggerObservation(messages, slotCount);
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
/**
|
|
557
|
+
* Kick off an initial async buffer on unobserved messages.
|
|
558
|
+
* Called during session resumption for a head start before the first prompt().
|
|
559
|
+
*/
|
|
560
|
+
kickstartBuffer(messages: AgentMessage[], slotCount: number): void {
|
|
561
|
+
this.observationalEngine?.kickstartBuffer(messages, slotCount);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Get the observation slot content string (for ContextManager.setSlot).
|
|
566
|
+
*/
|
|
567
|
+
getObservationSlotContent(): string {
|
|
568
|
+
return this.observationalEngine?.getSlotContent() ?? '';
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Whether observations have been produced (non-empty observation text).
|
|
573
|
+
*/
|
|
574
|
+
hasObservations(): boolean {
|
|
575
|
+
return (this.observationalEngine?.getObservations() ?? '').length > 0;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
/**
|
|
579
|
+
* Whether the recall tool should be registered.
|
|
580
|
+
*/
|
|
581
|
+
hasRecallTool(): boolean {
|
|
582
|
+
return this.observationalEngine?.hasRecall() ?? false;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Get the recall config if available.
|
|
587
|
+
*/
|
|
588
|
+
getRecallConfig() {
|
|
589
|
+
return this.observationalEngine?.getRecallConfig();
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Current token count of activated observations only.
|
|
594
|
+
* Returns 0 when not using the observational strategy.
|
|
595
|
+
*/
|
|
596
|
+
getObservationTokenCount(): number {
|
|
597
|
+
return this.observationalEngine?.getObservationTokenCount() ?? 0;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Whether the observer or reflector is currently running in the background.
|
|
603
|
+
* Returns false when not using the observational strategy.
|
|
604
|
+
*/
|
|
605
|
+
isObservationalProcessing(): boolean {
|
|
606
|
+
return this.observationalEngine?.isProcessing() ?? false;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* Whether the observer specifically is in-flight.
|
|
611
|
+
*/
|
|
612
|
+
isObserverInFlight(): boolean {
|
|
613
|
+
return this.observationalEngine?.isObserverInFlight() ?? false;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Whether the reflector specifically is in-flight.
|
|
618
|
+
*/
|
|
619
|
+
isReflectorInFlight(): boolean {
|
|
620
|
+
return this.observationalEngine?.isReflectorInFlight() ?? false;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// -----------------------------------------------------------------------
|
|
624
|
+
// Insertion-time cap
|
|
625
|
+
// -----------------------------------------------------------------------
|
|
626
|
+
|
|
627
|
+
/**
|
|
628
|
+
* Cap a tool result at insertion time (before it enters conversation history).
|
|
629
|
+
*/
|
|
630
|
+
capToolResult(content: string): string {
|
|
631
|
+
return this.microcompaction.capAtInsertion(content);
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Apply insertion-time cap to all uncapped tool results in the source
|
|
636
|
+
* messages array (mutates in place).
|
|
637
|
+
*
|
|
638
|
+
* Called from the transformContext hook on `agent.state.messages` so that
|
|
639
|
+
* Tier 1 capping is automatically applied when tool results enter
|
|
640
|
+
* conversation history through pi-agent-core's internal tool execution
|
|
641
|
+
* loop. The cap is applied at most once per tool result part; already
|
|
642
|
+
* capped content (containing the insertion marker) is skipped.
|
|
643
|
+
*
|
|
644
|
+
* @param messages - The source messages array (mutated in place)
|
|
645
|
+
* @param slotCount - Number of slot messages to skip at the start
|
|
646
|
+
*/
|
|
647
|
+
async applyInsertionCap(messages: AgentMessage[], slotCount: number): Promise<void> {
|
|
648
|
+
const config = this.microcompaction.getConfig();
|
|
649
|
+
|
|
650
|
+
// Phase 1: Individual per-result cap
|
|
651
|
+
for (let i = slotCount; i < messages.length; i++) {
|
|
652
|
+
const msg = messages[i]!;
|
|
653
|
+
if (!isToolResultMessage(msg)) continue;
|
|
654
|
+
if (typeof msg.content === 'string') continue;
|
|
655
|
+
|
|
656
|
+
let modified = false;
|
|
657
|
+
const newContent = msg.content.map(part => {
|
|
658
|
+
const isLegacyToolResult = part.type === 'tool_result' && typeof part.text === 'string';
|
|
659
|
+
const isRuntimeToolResultText = msg.role === 'toolResult' && part.type === 'text' && typeof part.text === 'string';
|
|
660
|
+
if (!isLegacyToolResult && !isRuntimeToolResultText) {
|
|
661
|
+
return part;
|
|
662
|
+
}
|
|
663
|
+
// Skip already-capped content
|
|
664
|
+
if ((part.text as string).includes('tokens trimmed at insertion')) {
|
|
665
|
+
return part;
|
|
666
|
+
}
|
|
667
|
+
const capped = capToolResult(part.text as string, config);
|
|
668
|
+
if (capped !== part.text) {
|
|
669
|
+
modified = true;
|
|
670
|
+
return { ...part, text: capped };
|
|
671
|
+
}
|
|
672
|
+
return part;
|
|
673
|
+
});
|
|
674
|
+
|
|
675
|
+
if (modified) {
|
|
676
|
+
messages[i] = { ...msg, content: newContent };
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
// Phase 2: Aggregate per-message budget
|
|
681
|
+
const aggregateLimit = config.maxAggregateTurnTokens ?? 150_000;
|
|
682
|
+
if (aggregateLimit <= 0) return;
|
|
683
|
+
|
|
684
|
+
for (let i = slotCount; i < messages.length; i++) {
|
|
685
|
+
const msg = messages[i]!;
|
|
686
|
+
if (!isToolResultMessage(msg)) continue;
|
|
687
|
+
if (typeof msg.content === 'string') continue;
|
|
688
|
+
|
|
689
|
+
const parts = msg.content;
|
|
690
|
+
const partInfos: Array<{ index: number; tokens: number; text: string; toolName: string }> = [];
|
|
691
|
+
let totalTokens = 0;
|
|
692
|
+
|
|
693
|
+
for (let p = 0; p < parts.length; p++) {
|
|
694
|
+
const part = parts[p]!;
|
|
695
|
+
const isLegacyToolResult = part.type === 'tool_result' && typeof part.text === 'string';
|
|
696
|
+
const isRuntimeToolResultText = msg.role === 'toolResult' && part.type === 'text' && typeof part.text === 'string';
|
|
697
|
+
if (!isLegacyToolResult && !isRuntimeToolResultText) {
|
|
698
|
+
continue;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
const text = part.text as string;
|
|
702
|
+
const tokens = estimateTokens(text);
|
|
703
|
+
const name = (typeof (part as Record<string, unknown>)['name'] === 'string'
|
|
704
|
+
? (part as Record<string, unknown>)['name'] as string
|
|
705
|
+
: null) ?? extractToolName(msg) ?? 'unknown';
|
|
706
|
+
partInfos.push({ index: p, tokens, text, toolName: name });
|
|
707
|
+
totalTokens += tokens;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
if (totalTokens <= aggregateLimit) continue;
|
|
711
|
+
|
|
712
|
+
const sorted = [...partInfos].sort((a, b) => b.tokens - a.tokens);
|
|
713
|
+
const newParts = [...parts];
|
|
714
|
+
let currentTotal = totalTokens;
|
|
715
|
+
|
|
716
|
+
for (const info of sorted) {
|
|
717
|
+
if (currentTotal <= aggregateLimit) break;
|
|
718
|
+
if (info.tokens <= config.maxResultTokens / 2) break;
|
|
719
|
+
|
|
720
|
+
const part = newParts[info.index]!;
|
|
721
|
+
let replacement: string;
|
|
722
|
+
|
|
723
|
+
if (config.persistResult) {
|
|
724
|
+
const category = getToolCategory(info.toolName, config.toolCategories);
|
|
725
|
+
try {
|
|
726
|
+
const path = await config.persistResult(info.text, {
|
|
727
|
+
toolName: info.toolName,
|
|
728
|
+
messageIndex: i,
|
|
729
|
+
category: category ?? 'rereadable',
|
|
730
|
+
});
|
|
731
|
+
const bookended = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
|
|
732
|
+
replacement = `${bookended}\n\n[Full content persisted to ${path} -- use Read to access]`;
|
|
733
|
+
} catch {
|
|
734
|
+
replacement = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
|
|
735
|
+
}
|
|
736
|
+
} else {
|
|
737
|
+
replacement = applyBookend(info.text, config.bookendMaxChars, config.bookendMaxChars, info.tokens);
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
const newTokens = estimateTokens(replacement);
|
|
741
|
+
currentTotal = currentTotal - info.tokens + newTokens;
|
|
742
|
+
newParts[info.index] = { ...part, text: replacement };
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
messages[i] = { ...msg, content: newParts };
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// -----------------------------------------------------------------------
|
|
750
|
+
// transformContext hook
|
|
751
|
+
// -----------------------------------------------------------------------
|
|
752
|
+
|
|
753
|
+
/**
|
|
754
|
+
* Apply compaction layers to the context in transformContext.
|
|
755
|
+
*
|
|
756
|
+
* This is the main entry point called from CortexAgent.getTransformContextHook().
|
|
757
|
+
* It is fully self-contained: all three compaction layers are integrated here,
|
|
758
|
+
* triggered autonomously based on token thresholds. No external calls from
|
|
759
|
+
* the backend are needed to trigger compaction.
|
|
760
|
+
*
|
|
761
|
+
* Execution order:
|
|
762
|
+
* 1. Layer 1 (microcompaction): tool result trimming at threshold crossings
|
|
763
|
+
* 2. Layer 2 (summarization): if tokens exceed 70% after Layer 1, run LLM
|
|
764
|
+
* summarization on agent.state.messages (the original transcript), then
|
|
765
|
+
* rebuild context from the updated messages
|
|
766
|
+
* 3. Layer 3 (failsafe): if tokens still exceed 90% after Layers 1-2,
|
|
767
|
+
* emergency truncation drops the oldest turns
|
|
768
|
+
*
|
|
769
|
+
* @param context - The AgentContext from transformContext
|
|
770
|
+
* @param getHistory - Function to get conversation history from the context
|
|
771
|
+
* @param setHistory - Function to set conversation history in the context
|
|
772
|
+
* @param getSourceHistory - Function to get the original transcript history (agent.state.messages post-slot)
|
|
773
|
+
* @param setSourceHistory - Function to replace the original transcript history (agent.state.messages)
|
|
774
|
+
* @returns Modified context with compacted history
|
|
775
|
+
*/
|
|
776
|
+
async applyInTransformContext(
|
|
777
|
+
context: AgentContext,
|
|
778
|
+
getHistory: (ctx: AgentContext) => AgentMessage[],
|
|
779
|
+
setHistory: (ctx: AgentContext, history: AgentMessage[]) => AgentContext,
|
|
780
|
+
getSourceHistory?: () => AgentMessage[],
|
|
781
|
+
setSourceHistory?: (history: AgentMessage[]) => void,
|
|
782
|
+
): Promise<AgentContext> {
|
|
783
|
+
if (this._contextWindow <= 0) {
|
|
784
|
+
// contextWindow not set, skip compaction
|
|
785
|
+
return context;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
let history = getHistory(context);
|
|
789
|
+
if (history.length === 0) {
|
|
790
|
+
return context;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
// Use the current transformed context estimate as a first-class input.
|
|
794
|
+
// Post-hoc token tracking from the previous turn is useful, but it can be
|
|
795
|
+
// stale when transformContext injects large ephemeral content on this turn.
|
|
796
|
+
const estimatedCurrentTokens = this.estimateContextTokens(context);
|
|
797
|
+
const currentTokens = this.estimateCurrentContextTokens(context);
|
|
798
|
+
|
|
799
|
+
this.logger.debug('[Compaction] transformContext', {
|
|
800
|
+
historyLen: history.length,
|
|
801
|
+
currentContextTokens: this._currentContextTokenCount,
|
|
802
|
+
heuristic: estimatedCurrentTokens,
|
|
803
|
+
currentTokens,
|
|
804
|
+
ctxWindow: this._contextWindow,
|
|
805
|
+
});
|
|
806
|
+
|
|
807
|
+
// Compute utilization and slot tokens (shared by both strategies and L3)
|
|
808
|
+
const originalHistoryTokens = this.estimateHistoryTokens(getHistory(context));
|
|
809
|
+
const slotTokens = Math.max(0, currentTokens - originalHistoryTokens);
|
|
810
|
+
const utilization = this._contextWindow > 0 ? currentTokens / this._contextWindow : 0;
|
|
811
|
+
|
|
812
|
+
let layer2Failed = false;
|
|
813
|
+
let lastLayer2Error: Error | undefined;
|
|
814
|
+
let effectiveThreshold = 0;
|
|
815
|
+
|
|
816
|
+
const cacheCold = this.isCacheCold();
|
|
817
|
+
|
|
818
|
+
if (this._strategy === 'observational' && this.observationalEngine && getSourceHistory && setSourceHistory) {
|
|
819
|
+
// Observational memory path: observer/reflector handle conversation
|
|
820
|
+
// compression. L2 summarization is skipped, but L1 still runs in
|
|
821
|
+
// cache-aware mode on the unobserved tail to trim large tool results
|
|
822
|
+
// before they hit the LLM.
|
|
823
|
+
context = await this.observationalEngine.applyInTransformContext(
|
|
824
|
+
context, utilization, this.slotCount, getHistory, setHistory, getSourceHistory, setSourceHistory,
|
|
825
|
+
);
|
|
826
|
+
history = getHistory(context);
|
|
827
|
+
|
|
828
|
+
// Run L1 on the surviving (post-observation) history. Cache-aware
|
|
829
|
+
// gating ensures we only trim when the prompt cache has gone cold,
|
|
830
|
+
// preserving cache hits during active use. Re-estimate from the
|
|
831
|
+
// updated context so the observation slot's new size is reflected.
|
|
832
|
+
const postObsTotal = this.estimateCurrentContextTokens(context);
|
|
833
|
+
const trimmedHistory = await this.microcompaction.apply(
|
|
834
|
+
history, this._contextWindow, postObsTotal, { cacheCold },
|
|
835
|
+
);
|
|
836
|
+
if (trimmedHistory !== history) {
|
|
837
|
+
context = setHistory(context, trimmedHistory);
|
|
838
|
+
history = trimmedHistory;
|
|
839
|
+
}
|
|
840
|
+
} else {
|
|
841
|
+
// Classic path: L1 + L2
|
|
842
|
+
|
|
843
|
+
// Layer 1: Microcompaction. Cache-aware gating: only trims when the
|
|
844
|
+
// prompt cache is cold (or unsupported). When warm, returns history
|
|
845
|
+
// untouched to preserve cache hits.
|
|
846
|
+
history = await this.microcompaction.apply(
|
|
847
|
+
history, this._contextWindow, currentTokens, { cacheCold },
|
|
848
|
+
);
|
|
849
|
+
|
|
850
|
+
// Layer 2: Conversation summarization (70% threshold)
|
|
851
|
+
// Operates on the original transcript (agent.state.messages), not the
|
|
852
|
+
// in-memory microcompacted context. After Layer 2 modifies the source,
|
|
853
|
+
// we rebuild the context from the updated messages.
|
|
854
|
+
const postMicroTokens = this.estimateHistoryTokens(history);
|
|
855
|
+
const totalAfterMicro = slotTokens + postMicroTokens;
|
|
856
|
+
|
|
857
|
+
effectiveThreshold = this.getEffectiveThreshold();
|
|
858
|
+
|
|
859
|
+
this.logger.debug('[Compaction] Layer2 evaluation', {
|
|
860
|
+
totalAfterMicro,
|
|
861
|
+
threshold: effectiveThreshold,
|
|
862
|
+
ratio: totalAfterMicro / this._contextWindow,
|
|
863
|
+
completeFn: !!this.completeFn,
|
|
864
|
+
srcAccessors: !!getSourceHistory && !!setSourceHistory,
|
|
865
|
+
shouldCompact: shouldCompact(totalAfterMicro, this._contextWindow, effectiveThreshold),
|
|
866
|
+
});
|
|
867
|
+
|
|
868
|
+
if (
|
|
869
|
+
this.completeFn &&
|
|
870
|
+
getSourceHistory &&
|
|
871
|
+
setSourceHistory &&
|
|
872
|
+
shouldCompact(totalAfterMicro, this._contextWindow, effectiveThreshold)
|
|
873
|
+
) {
|
|
874
|
+
const maxRetries = this.config.compaction.maxRetries ?? 3;
|
|
875
|
+
const retryDelayMs = this.config.compaction.retryDelayMs ?? 2000;
|
|
876
|
+
let succeeded = false;
|
|
877
|
+
|
|
878
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
879
|
+
try {
|
|
880
|
+
const sourceHistory = getSourceHistory();
|
|
881
|
+
if (sourceHistory.length === 0) break;
|
|
882
|
+
|
|
883
|
+
const { newHistory: compactedSource, result } = await runCompaction(
|
|
884
|
+
sourceHistory,
|
|
885
|
+
this.config.compaction,
|
|
886
|
+
this.completeFn,
|
|
887
|
+
{
|
|
888
|
+
onBeforeCompaction: this.beforeCompactionHandlers,
|
|
889
|
+
onPostCompaction: this.postCompactionHandlers,
|
|
890
|
+
onCompactionError: this.compactionErrorHandlers,
|
|
891
|
+
},
|
|
892
|
+
currentTokens, // pass actual full-context token count for accurate reporting
|
|
893
|
+
);
|
|
894
|
+
|
|
895
|
+
// Success: update state and reset failure counter
|
|
896
|
+
setSourceHistory(compactedSource);
|
|
897
|
+
this.microcompaction.resetCache();
|
|
898
|
+
|
|
899
|
+
// result.tokensAfter now includes overhead (system prompt, slots,
|
|
900
|
+
// tool definitions) since we passed actualContextTokens to
|
|
901
|
+
// runCompaction. Use it directly to prevent the stale low value
|
|
902
|
+
// that would cause re-triggering compaction on the next call.
|
|
903
|
+
this._currentContextTokenCount = result.tokensAfter;
|
|
904
|
+
|
|
905
|
+
this._consecutiveLayer2Failures = 0;
|
|
906
|
+
|
|
907
|
+
for (const handler of this.compactionResultHandlers) {
|
|
908
|
+
try {
|
|
909
|
+
handler(result);
|
|
910
|
+
} catch (err) {
|
|
911
|
+
this.logger.error('[Compaction] compactionResult handler threw', {
|
|
912
|
+
error: err instanceof Error ? err.message : String(err),
|
|
913
|
+
});
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
// Rebuild context from updated source. L2 just rewrote history
|
|
918
|
+
// wholesale, so any existing cache prefix is invalidated; treat as
|
|
919
|
+
// cold so L1 can trim the rebuilt history if warranted.
|
|
920
|
+
history = await this.microcompaction.apply(
|
|
921
|
+
compactedSource,
|
|
922
|
+
this._contextWindow,
|
|
923
|
+
this._currentContextTokenCount,
|
|
924
|
+
{ cacheCold: true },
|
|
925
|
+
);
|
|
926
|
+
|
|
927
|
+
succeeded = true;
|
|
928
|
+
break;
|
|
929
|
+
} catch (err) {
|
|
930
|
+
this._consecutiveLayer2Failures++;
|
|
931
|
+
lastLayer2Error = err instanceof Error ? err : new Error(String(err));
|
|
932
|
+
this.logger.warn('[Compaction] Layer2 retry failed', {
|
|
933
|
+
attempt,
|
|
934
|
+
maxRetries,
|
|
935
|
+
error: lastLayer2Error.message,
|
|
936
|
+
});
|
|
937
|
+
|
|
938
|
+
if (attempt < maxRetries) {
|
|
939
|
+
await new Promise(resolve => setTimeout(resolve, retryDelayMs));
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
if (!succeeded) {
|
|
945
|
+
layer2Failed = true;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
// Layer 3: Emergency truncation (90% of model context window)
|
|
951
|
+
// Uses the MODEL's actual context window, not the budget. Emergency
|
|
952
|
+
// truncation should only fire when we're near the model's real limit,
|
|
953
|
+
// not the user's artificial budget. Layer 1/2 handle the budget.
|
|
954
|
+
// When observational memory is active, L3 operates on the post-slot
|
|
955
|
+
// history (raw messages only). The observation slot lives in the slot
|
|
956
|
+
// region and is naturally protected by slotCount.
|
|
957
|
+
{
|
|
958
|
+
const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
|
|
959
|
+
const postLayerTokens = this.estimateHistoryTokens(history);
|
|
960
|
+
const totalNow = slotTokens + postLayerTokens;
|
|
961
|
+
|
|
962
|
+
if (shouldTruncate(totalNow, failsafeWindow, this.config.failsafe.threshold)) {
|
|
963
|
+
// Force sync observation before L3 truncation to capture unobserved
|
|
964
|
+
// content before it is dropped. The source history from getSourceHistory
|
|
965
|
+
// is already post-slot, so pass 0 as slotCount.
|
|
966
|
+
if (this._strategy === 'observational' && this.observationalEngine && getSourceHistory) {
|
|
967
|
+
const sourceHistory = getSourceHistory();
|
|
968
|
+
await this.observationalEngine.triggerObservation(sourceHistory, 0);
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
const truncResult = emergencyTruncate(
|
|
972
|
+
history,
|
|
973
|
+
failsafeWindow,
|
|
974
|
+
slotTokens,
|
|
975
|
+
this.config.failsafe.threshold,
|
|
976
|
+
);
|
|
977
|
+
history = truncResult.newHistory;
|
|
978
|
+
|
|
979
|
+
// Emit degraded event if Layer 3 was used as fallback for Layer 2 failure
|
|
980
|
+
if (layer2Failed) {
|
|
981
|
+
const failures = this._consecutiveLayer2Failures;
|
|
982
|
+
this._consecutiveLayer2Failures = 0;
|
|
983
|
+
for (const handler of this.compactionDegradedHandlers) {
|
|
984
|
+
try {
|
|
985
|
+
handler({
|
|
986
|
+
layer2Failures: failures,
|
|
987
|
+
turnsDropped: truncResult.turnsRemoved,
|
|
988
|
+
});
|
|
989
|
+
} catch (err) {
|
|
990
|
+
this.logger.error('[Compaction] compactionDegraded handler threw', {
|
|
991
|
+
error: err instanceof Error ? err.message : String(err),
|
|
992
|
+
});
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
} else if (layer2Failed) {
|
|
997
|
+
// Layer 2 failed but Layer 3 didn't need to run. If tokens are still
|
|
998
|
+
// over the Layer 2 budget, emit exhausted so the consumer can act.
|
|
999
|
+
const postTokens = this.estimateHistoryTokens(history);
|
|
1000
|
+
const stillOverBudget = shouldCompact(slotTokens + postTokens, this._contextWindow, effectiveThreshold);
|
|
1001
|
+
|
|
1002
|
+
if (stillOverBudget) {
|
|
1003
|
+
const failures = this._consecutiveLayer2Failures;
|
|
1004
|
+
this._consecutiveLayer2Failures = 0;
|
|
1005
|
+
for (const handler of this.compactionExhaustedHandlers) {
|
|
1006
|
+
try {
|
|
1007
|
+
handler({
|
|
1008
|
+
error: lastLayer2Error ?? new Error('Layer 2 compaction failed'),
|
|
1009
|
+
layer2Failures: failures,
|
|
1010
|
+
});
|
|
1011
|
+
} catch (err) {
|
|
1012
|
+
this.logger.error('[Compaction] compactionExhausted handler threw', {
|
|
1013
|
+
error: err instanceof Error ? err.message : String(err),
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
return setHistory(context, history);
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
// -----------------------------------------------------------------------
|
|
1025
|
+
// End-of-tick compaction check
|
|
1026
|
+
// -----------------------------------------------------------------------
|
|
1027
|
+
|
|
1028
|
+
/**
|
|
1029
|
+
* Manually check if compaction is needed and run it.
|
|
1030
|
+
*
|
|
1031
|
+
* This is a convenience API for consumers who want to trigger compaction
|
|
1032
|
+
* outside the agentic loop (e.g., for testing or manual maintenance).
|
|
1033
|
+
* The primary compaction trigger is `applyInTransformContext`, which runs
|
|
1034
|
+
* automatically before every LLM call.
|
|
1035
|
+
*
|
|
1036
|
+
* @param getHistory - Get current conversation history
|
|
1037
|
+
* @param setHistory - Replace conversation history
|
|
1038
|
+
* @returns CompactionResult if compaction ran, null otherwise
|
|
1039
|
+
*/
|
|
1040
|
+
async checkAndRunCompaction(
|
|
1041
|
+
getHistory: () => AgentMessage[],
|
|
1042
|
+
setHistory: (history: AgentMessage[]) => void,
|
|
1043
|
+
): Promise<CompactionResult | null> {
|
|
1044
|
+
if (this._contextWindow <= 0) return null;
|
|
1045
|
+
|
|
1046
|
+
const history = getHistory();
|
|
1047
|
+
if (history.length === 0) return null;
|
|
1048
|
+
|
|
1049
|
+
const estimatedTokens = this.estimateHistoryTokens(history);
|
|
1050
|
+
|
|
1051
|
+
// Use adaptive threshold (adjusts based on interaction recency)
|
|
1052
|
+
const effectiveThreshold = this.getEffectiveThreshold();
|
|
1053
|
+
|
|
1054
|
+
// Check Layer 2 threshold
|
|
1055
|
+
if (!shouldCompact(this._currentContextTokenCount, this._contextWindow, effectiveThreshold)) {
|
|
1056
|
+
// Also check using heuristic estimation as fallback
|
|
1057
|
+
if (!shouldCompact(estimatedTokens, this._contextWindow, effectiveThreshold)) {
|
|
1058
|
+
return null;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
// Attempt Layer 2 (summarization)
|
|
1063
|
+
if (this.completeFn) {
|
|
1064
|
+
try {
|
|
1065
|
+
const actualTokens = Math.max(this._currentContextTokenCount, estimatedTokens);
|
|
1066
|
+
const { newHistory, result } = await runCompaction(
|
|
1067
|
+
history,
|
|
1068
|
+
this.config.compaction,
|
|
1069
|
+
this.completeFn,
|
|
1070
|
+
{
|
|
1071
|
+
onBeforeCompaction: this.beforeCompactionHandlers,
|
|
1072
|
+
onPostCompaction: this.postCompactionHandlers,
|
|
1073
|
+
onCompactionError: this.compactionErrorHandlers,
|
|
1074
|
+
},
|
|
1075
|
+
actualTokens, // pass full-context token count for accurate reporting
|
|
1076
|
+
);
|
|
1077
|
+
|
|
1078
|
+
setHistory(newHistory);
|
|
1079
|
+
this.microcompaction.resetCache();
|
|
1080
|
+
|
|
1081
|
+
// result.tokensAfter includes overhead since we passed actualTokens
|
|
1082
|
+
this._currentContextTokenCount = result.tokensAfter;
|
|
1083
|
+
|
|
1084
|
+
// Emit result
|
|
1085
|
+
for (const handler of this.compactionResultHandlers) {
|
|
1086
|
+
try {
|
|
1087
|
+
handler(result);
|
|
1088
|
+
} catch {
|
|
1089
|
+
// Swallow handler errors
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
return result;
|
|
1094
|
+
|
|
1095
|
+
} catch {
|
|
1096
|
+
// Layer 2 failed, fall through to Layer 3
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
// Layer 3 fallback: emergency truncation (uses model's actual window)
|
|
1101
|
+
const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
|
|
1102
|
+
const slotTokens = this._currentContextTokenCount - estimatedTokens;
|
|
1103
|
+
if (shouldTruncate(this._currentContextTokenCount, failsafeWindow, this.config.failsafe.threshold)) {
|
|
1104
|
+
const result = emergencyTruncate(
|
|
1105
|
+
history,
|
|
1106
|
+
failsafeWindow,
|
|
1107
|
+
Math.max(0, slotTokens),
|
|
1108
|
+
this.config.failsafe.threshold,
|
|
1109
|
+
);
|
|
1110
|
+
setHistory(result.newHistory);
|
|
1111
|
+
this.microcompaction.resetCache();
|
|
1112
|
+
this._currentContextTokenCount = result.tokensAfter;
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
return null;
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
// -----------------------------------------------------------------------
|
|
1119
|
+
// Reactive overflow handling
|
|
1120
|
+
// -----------------------------------------------------------------------
|
|
1121
|
+
|
|
1122
|
+
/**
|
|
1123
|
+
* Handle a context overflow error by performing emergency truncation.
|
|
1124
|
+
* Called when the API returns a context overflow error.
|
|
1125
|
+
*
|
|
1126
|
+
* @param getHistory - Get current conversation history
|
|
1127
|
+
* @param setHistory - Replace conversation history
|
|
1128
|
+
*/
|
|
1129
|
+
handleOverflowError(
|
|
1130
|
+
getHistory: () => AgentMessage[],
|
|
1131
|
+
setHistory: (history: AgentMessage[]) => void,
|
|
1132
|
+
): void {
|
|
1133
|
+
const history = getHistory();
|
|
1134
|
+
if (history.length === 0) return;
|
|
1135
|
+
|
|
1136
|
+
// API returned overflow error, so use the model's actual window
|
|
1137
|
+
const failsafeWindow = this._modelContextWindow > 0 ? this._modelContextWindow : this._contextWindow;
|
|
1138
|
+
const estimatedTokens = this.estimateHistoryTokens(history);
|
|
1139
|
+
const slotTokens = Math.max(0, this._currentContextTokenCount - estimatedTokens);
|
|
1140
|
+
|
|
1141
|
+
const result = emergencyTruncate(
|
|
1142
|
+
history,
|
|
1143
|
+
failsafeWindow,
|
|
1144
|
+
slotTokens,
|
|
1145
|
+
this.config.failsafe.threshold,
|
|
1146
|
+
);
|
|
1147
|
+
|
|
1148
|
+
setHistory(result.newHistory);
|
|
1149
|
+
this.microcompaction.resetCache();
|
|
1150
|
+
this._currentContextTokenCount = result.tokensAfter;
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
// -----------------------------------------------------------------------
|
|
1154
|
+
// Cleanup
|
|
1155
|
+
// -----------------------------------------------------------------------
|
|
1156
|
+
|
|
1157
|
+
/**
|
|
1158
|
+
* Clear all state and handlers.
|
|
1159
|
+
*/
|
|
1160
|
+
destroy(): void {
|
|
1161
|
+
this.microcompaction.resetCache();
|
|
1162
|
+
this.observationalEngine?.abort();
|
|
1163
|
+
this.observationalEngine = null;
|
|
1164
|
+
this.beforeCompactionHandlers = [];
|
|
1165
|
+
this.postCompactionHandlers = [];
|
|
1166
|
+
this.compactionErrorHandlers = [];
|
|
1167
|
+
this.compactionResultHandlers = [];
|
|
1168
|
+
this.compactionDegradedHandlers = [];
|
|
1169
|
+
this.compactionExhaustedHandlers = [];
|
|
1170
|
+
this.completeFn = null;
|
|
1171
|
+
this._currentContextTokenCount = 0;
|
|
1172
|
+
this._consecutiveLayer2Failures = 0;
|
|
1173
|
+
this._lastInteractionTime = null;
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
// -----------------------------------------------------------------------
|
|
1177
|
+
// Internal helpers
|
|
1178
|
+
// -----------------------------------------------------------------------
|
|
1179
|
+
|
|
1180
|
+
/**
|
|
1181
|
+
* Estimate tokens for a set of history messages.
|
|
1182
|
+
*/
|
|
1183
|
+
private estimateHistoryTokens(history: AgentMessage[]): number {
|
|
1184
|
+
return estimateTokens(
|
|
1185
|
+
history.map(m => extractTextContent(m)).join('\n'),
|
|
1186
|
+
);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
/**
|
|
1190
|
+
* Estimate total context tokens from an AgentContext object.
|
|
1191
|
+
*/
|
|
1192
|
+
private estimateContextTokens(context: AgentContext): number {
|
|
1193
|
+
let total = estimateTokens(context.systemPrompt);
|
|
1194
|
+
for (const msg of context.messages) {
|
|
1195
|
+
total += estimateTokens(extractTextContent(msg));
|
|
1196
|
+
}
|
|
1197
|
+
return total;
|
|
1198
|
+
}
|
|
1199
|
+
}
|