mcp-agent-foundry 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +350 -52
- package/dist/background/index.d.ts +33 -0
- package/dist/background/index.d.ts.map +1 -0
- package/dist/background/index.js +33 -0
- package/dist/background/index.js.map +1 -0
- package/dist/background/task-runner.d.ts +177 -0
- package/dist/background/task-runner.d.ts.map +1 -0
- package/dist/background/task-runner.js +551 -0
- package/dist/background/task-runner.js.map +1 -0
- package/dist/background/types.d.ts +135 -0
- package/dist/background/types.d.ts.map +1 -0
- package/dist/background/types.js +8 -0
- package/dist/background/types.js.map +1 -0
- package/dist/failover/health-tracker.d.ts +81 -1
- package/dist/failover/health-tracker.d.ts.map +1 -1
- package/dist/failover/health-tracker.js +229 -1
- package/dist/failover/health-tracker.js.map +1 -1
- package/dist/hooks/hook-executor.d.ts +77 -0
- package/dist/hooks/hook-executor.d.ts.map +1 -0
- package/dist/hooks/hook-executor.js +308 -0
- package/dist/hooks/hook-executor.js.map +1 -0
- package/dist/hooks/hook-manager.d.ts +140 -0
- package/dist/hooks/hook-manager.d.ts.map +1 -0
- package/dist/hooks/hook-manager.js +520 -0
- package/dist/hooks/hook-manager.js.map +1 -0
- package/dist/hooks/index.d.ts +10 -0
- package/dist/hooks/index.d.ts.map +1 -0
- package/dist/hooks/index.js +10 -0
- package/dist/hooks/index.js.map +1 -0
- package/dist/hooks/types.d.ts +221 -0
- package/dist/hooks/types.d.ts.map +1 -0
- package/dist/hooks/types.js +31 -0
- package/dist/hooks/types.js.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/mcp/auto-mode.d.ts +221 -0
- package/dist/mcp/auto-mode.d.ts.map +1 -0
- package/dist/mcp/auto-mode.js +436 -0
- package/dist/mcp/auto-mode.js.map +1 -0
- package/dist/mcp/index.d.ts +14 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +22 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/tools/tasks/delete-task.d.ts +25 -0
- package/dist/mcp/tools/tasks/delete-task.d.ts.map +1 -0
- package/dist/mcp/tools/tasks/delete-task.js +148 -0
- package/dist/mcp/tools/tasks/delete-task.js.map +1 -0
- package/dist/mcp/tools/tasks/index.d.ts +2 -0
- package/dist/mcp/tools/tasks/index.d.ts.map +1 -1
- package/dist/mcp/tools/tasks/index.js +6 -0
- package/dist/mcp/tools/tasks/index.js.map +1 -1
- package/dist/observability/debug-logger.d.ts +209 -0
- package/dist/observability/debug-logger.d.ts.map +1 -0
- package/dist/observability/debug-logger.js +430 -0
- package/dist/observability/debug-logger.js.map +1 -0
- package/dist/observability/index.d.ts +12 -0
- package/dist/observability/index.d.ts.map +1 -0
- package/dist/observability/index.js +12 -0
- package/dist/observability/index.js.map +1 -0
- package/dist/observability/logger.d.ts +180 -0
- package/dist/observability/logger.d.ts.map +1 -1
- package/dist/observability/logger.js +158 -0
- package/dist/observability/logger.js.map +1 -1
- package/dist/router/context-manager.d.ts +214 -1
- package/dist/router/context-manager.d.ts.map +1 -1
- package/dist/router/context-manager.js +759 -2
- package/dist/router/context-manager.js.map +1 -1
- package/dist/router/context-types.d.ts +182 -0
- package/dist/router/context-types.d.ts.map +1 -0
- package/dist/router/context-types.js +8 -0
- package/dist/router/context-types.js.map +1 -0
- package/dist/server.d.ts +41 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +165 -0
- package/dist/server.js.map +1 -1
- package/dist/skills/hot-reloader.d.ts +104 -0
- package/dist/skills/hot-reloader.d.ts.map +1 -0
- package/dist/skills/hot-reloader.js +314 -0
- package/dist/skills/hot-reloader.js.map +1 -0
- package/dist/skills/index.d.ts +14 -0
- package/dist/skills/index.d.ts.map +1 -0
- package/dist/skills/index.js +16 -0
- package/dist/skills/index.js.map +1 -0
- package/dist/skills/skill-executor.d.ts +96 -0
- package/dist/skills/skill-executor.d.ts.map +1 -0
- package/dist/skills/skill-executor.js +289 -0
- package/dist/skills/skill-executor.js.map +1 -0
- package/dist/skills/skill-loader.d.ts +147 -0
- package/dist/skills/skill-loader.d.ts.map +1 -0
- package/dist/skills/skill-loader.js +579 -0
- package/dist/skills/skill-loader.js.map +1 -0
- package/dist/skills/types.d.ts +198 -0
- package/dist/skills/types.d.ts.map +1 -0
- package/dist/skills/types.js +21 -0
- package/dist/skills/types.js.map +1 -0
- package/dist/tasks/coordinator.d.ts +22 -1
- package/dist/tasks/coordinator.d.ts.map +1 -1
- package/dist/tasks/coordinator.js +83 -0
- package/dist/tasks/coordinator.js.map +1 -1
- package/dist/tasks/state-coordinator.d.ts +19 -0
- package/dist/tasks/state-coordinator.d.ts.map +1 -1
- package/dist/tasks/state-coordinator.js +40 -0
- package/dist/tasks/state-coordinator.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,3 +1,760 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Context Manager
|
|
3
|
+
*
|
|
4
|
+
* Manages context window tracking, auto-compaction, and truncation
|
|
5
|
+
* for multi-provider agent orchestration. Tracks token usage across
|
|
6
|
+
* different providers and models with varying context limits.
|
|
7
|
+
*
|
|
8
|
+
* Key features:
|
|
9
|
+
* - Per-provider/model context limit tracking
|
|
10
|
+
* - Automatic compaction when approaching limits
|
|
11
|
+
* - Smart truncation that preserves important content
|
|
12
|
+
* - Context visualization for debugging
|
|
13
|
+
* - Event emission for monitoring
|
|
14
|
+
*/
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Default Model Limits
|
|
17
|
+
// ============================================================================
|
|
18
|
+
/**
|
|
19
|
+
* Default context limits for known models.
|
|
20
|
+
* Values are in tokens.
|
|
21
|
+
*/
|
|
22
|
+
const DEFAULT_MODEL_LIMITS = {
|
|
23
|
+
// OpenAI
|
|
24
|
+
'gpt-4o': 128000,
|
|
25
|
+
'gpt-4o-mini': 128000,
|
|
26
|
+
'gpt-4-turbo': 128000,
|
|
27
|
+
'gpt-4-turbo-preview': 128000,
|
|
28
|
+
'gpt-4': 8192,
|
|
29
|
+
'gpt-3.5-turbo': 16385,
|
|
30
|
+
'o1': 128000,
|
|
31
|
+
'o1-mini': 128000,
|
|
32
|
+
'o1-preview': 128000,
|
|
33
|
+
'o3-mini': 128000,
|
|
34
|
+
// Anthropic
|
|
35
|
+
'claude-3-opus': 200000,
|
|
36
|
+
'claude-3-opus-20240229': 200000,
|
|
37
|
+
'claude-3-sonnet': 200000,
|
|
38
|
+
'claude-3-sonnet-20240229': 200000,
|
|
39
|
+
'claude-3-haiku': 200000,
|
|
40
|
+
'claude-3-haiku-20240307': 200000,
|
|
41
|
+
'claude-3-5-sonnet': 200000,
|
|
42
|
+
'claude-3-5-sonnet-20240620': 200000,
|
|
43
|
+
'claude-3-5-sonnet-20241022': 200000,
|
|
44
|
+
'claude-sonnet-4': 200000,
|
|
45
|
+
'claude-sonnet-4-20250514': 200000,
|
|
46
|
+
'claude-opus-4': 200000,
|
|
47
|
+
'claude-opus-4-20250514': 200000,
|
|
48
|
+
'claude-opus-4-5-20251101': 200000,
|
|
49
|
+
// Google
|
|
50
|
+
'gemini-2.0-flash': 1000000,
|
|
51
|
+
'gemini-2.0-flash-exp': 1000000,
|
|
52
|
+
'gemini-1.5-pro': 1000000,
|
|
53
|
+
'gemini-1.5-pro-latest': 1000000,
|
|
54
|
+
'gemini-1.5-flash': 1000000,
|
|
55
|
+
'gemini-1.5-flash-latest': 1000000,
|
|
56
|
+
'gemini-pro': 32768,
|
|
57
|
+
// DeepSeek
|
|
58
|
+
'deepseek-chat': 128000,
|
|
59
|
+
'deepseek-coder': 128000,
|
|
60
|
+
'deepseek-reasoner': 128000,
|
|
61
|
+
// Groq
|
|
62
|
+
'llama-3.3-70b-versatile': 32768,
|
|
63
|
+
'llama-3.1-70b-versatile': 32768,
|
|
64
|
+
'llama-3.1-8b-instant': 8192,
|
|
65
|
+
'mixtral-8x7b-32768': 32768,
|
|
66
|
+
'gemma2-9b-it': 8192,
|
|
67
|
+
// Perplexity
|
|
68
|
+
'llama-3.1-sonar-large-128k-online': 127072,
|
|
69
|
+
'llama-3.1-sonar-small-128k-online': 127072,
|
|
70
|
+
'llama-3.1-sonar-huge-128k-online': 127072,
|
|
71
|
+
// Together/Fireworks (common models)
|
|
72
|
+
'meta-llama/Llama-3.1-70B-Instruct': 131072,
|
|
73
|
+
'meta-llama/Llama-3.1-8B-Instruct': 131072,
|
|
74
|
+
'mistralai/Mixtral-8x7B-Instruct-v0.1': 32768,
|
|
75
|
+
// Ollama (local models - conservative defaults)
|
|
76
|
+
'llama3': 8192,
|
|
77
|
+
'llama3:70b': 8192,
|
|
78
|
+
'codellama': 16384,
|
|
79
|
+
'mistral': 8192,
|
|
80
|
+
'mixtral': 32768,
|
|
81
|
+
};
|
|
82
|
+
/**
|
|
83
|
+
* Characters per token ratio by provider.
|
|
84
|
+
* Used for token estimation when exact counting isn't available.
|
|
85
|
+
*/
|
|
86
|
+
const CHARS_PER_TOKEN = {
|
|
87
|
+
anthropic: 3.5,
|
|
88
|
+
openai: 4.0,
|
|
89
|
+
google: 4.0,
|
|
90
|
+
deepseek: 4.0,
|
|
91
|
+
groq: 4.0,
|
|
92
|
+
perplexity: 4.0,
|
|
93
|
+
together: 4.0,
|
|
94
|
+
fireworks: 4.0,
|
|
95
|
+
ollama: 4.0,
|
|
96
|
+
default: 4.0,
|
|
97
|
+
};
|
|
98
|
+
// ============================================================================
|
|
99
|
+
// Default Configuration
|
|
100
|
+
// ============================================================================
|
|
101
|
+
const DEFAULT_CONFIG = {
|
|
102
|
+
defaultContextLimit: 128000,
|
|
103
|
+
autoCompactThreshold: 0.9,
|
|
104
|
+
blockingThreshold: 0.98,
|
|
105
|
+
maxToolResultTokens: 4096,
|
|
106
|
+
maxMessageTokens: 16384,
|
|
107
|
+
modelLimits: {},
|
|
108
|
+
};
|
|
109
|
+
// ============================================================================
|
|
110
|
+
// Context Manager Class
|
|
111
|
+
// ============================================================================
|
|
112
|
+
/**
|
|
113
|
+
* Manages context window tracking and optimization for multi-provider
|
|
114
|
+
* agent orchestration.
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* const contextManager = new ContextManager({ autoCompactThreshold: 0.85 }, logger);
|
|
119
|
+
*
|
|
120
|
+
* // Check context state before making a request
|
|
121
|
+
* const state = contextManager.getState('openai', 'gpt-4o', messages);
|
|
122
|
+
* if (state.isBlocked) {
|
|
123
|
+
* // Context too full, need to compact
|
|
124
|
+
* messages = contextManager.compact(messages);
|
|
125
|
+
* }
|
|
126
|
+
*
|
|
127
|
+
* // Get detailed breakdown for debugging
|
|
128
|
+
* const breakdown = contextManager.getBreakdown(messages);
|
|
129
|
+
* console.log(contextManager.visualize(state, breakdown));
|
|
130
|
+
* ```
|
|
131
|
+
*/
|
|
132
|
+
export class ContextManager {
|
|
133
|
+
config;
|
|
134
|
+
logger;
|
|
135
|
+
listeners = new Set();
|
|
136
|
+
/**
|
|
137
|
+
* Create a new ContextManager instance.
|
|
138
|
+
*
|
|
139
|
+
* @param config - Partial configuration (merged with defaults)
|
|
140
|
+
* @param logger - Logger instance for observability
|
|
141
|
+
*/
|
|
142
|
+
constructor(config, logger) {
|
|
143
|
+
this.config = {
|
|
144
|
+
...DEFAULT_CONFIG,
|
|
145
|
+
...config,
|
|
146
|
+
modelLimits: {
|
|
147
|
+
...DEFAULT_MODEL_LIMITS,
|
|
148
|
+
...config.modelLimits,
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
this.logger = logger;
|
|
152
|
+
this.logger.debug('ContextManager initialized', {
|
|
153
|
+
autoCompactThreshold: this.config.autoCompactThreshold,
|
|
154
|
+
blockingThreshold: this.config.blockingThreshold,
|
|
155
|
+
defaultContextLimit: this.config.defaultContextLimit,
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
// ==========================================================================
|
|
159
|
+
// Public Methods - State Management
|
|
160
|
+
// ==========================================================================
|
|
161
|
+
/**
|
|
162
|
+
* Get the current context state for a provider/model combination.
|
|
163
|
+
*
|
|
164
|
+
* @param provider - Provider name (e.g., 'openai', 'anthropic')
|
|
165
|
+
* @param model - Model name (e.g., 'gpt-4o', 'claude-3-opus')
|
|
166
|
+
* @param messages - Current conversation messages
|
|
167
|
+
* @returns Current context state with usage metrics
|
|
168
|
+
*/
|
|
169
|
+
getState(provider, model, messages) {
|
|
170
|
+
const maxTokens = this.getModelLimit(provider, model);
|
|
171
|
+
const usedTokens = this.estimateMessagesTokens(messages, provider);
|
|
172
|
+
const usedPercentage = (usedTokens / maxTokens) * 100;
|
|
173
|
+
const remainingPercentage = 100 - usedPercentage;
|
|
174
|
+
const remainingTokens = Math.max(0, maxTokens - usedTokens);
|
|
175
|
+
const isNearLimit = usedPercentage >= this.config.autoCompactThreshold * 100;
|
|
176
|
+
const isBlocked = usedPercentage >= this.config.blockingThreshold * 100;
|
|
177
|
+
const state = {
|
|
178
|
+
provider,
|
|
179
|
+
model,
|
|
180
|
+
usedTokens,
|
|
181
|
+
maxTokens,
|
|
182
|
+
usedPercentage: Math.round(usedPercentage * 100) / 100,
|
|
183
|
+
remainingPercentage: Math.round(remainingPercentage * 100) / 100,
|
|
184
|
+
remainingTokens,
|
|
185
|
+
isNearLimit,
|
|
186
|
+
isBlocked,
|
|
187
|
+
};
|
|
188
|
+
// Emit events based on state
|
|
189
|
+
if (isBlocked) {
|
|
190
|
+
this.emit({ type: 'context_blocked', state });
|
|
191
|
+
}
|
|
192
|
+
else if (isNearLimit) {
|
|
193
|
+
this.emit({
|
|
194
|
+
type: 'context_warning',
|
|
195
|
+
state,
|
|
196
|
+
message: `Context usage at ${usedPercentage.toFixed(1)}%, approaching limit`,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
return state;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Check if the context should be auto-compacted.
|
|
203
|
+
*
|
|
204
|
+
* @param state - Current context state
|
|
205
|
+
* @returns True if compaction should be triggered
|
|
206
|
+
*/
|
|
207
|
+
shouldCompact(state) {
|
|
208
|
+
return state.usedPercentage >= this.config.autoCompactThreshold * 100;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Check if the context is blocked (too full for new messages).
|
|
212
|
+
*
|
|
213
|
+
* @param state - Current context state
|
|
214
|
+
* @returns True if context is blocked
|
|
215
|
+
*/
|
|
216
|
+
isBlocked(state) {
|
|
217
|
+
return state.isBlocked;
|
|
218
|
+
}
|
|
219
|
+
// ==========================================================================
|
|
220
|
+
// Public Methods - Token Estimation
|
|
221
|
+
// ==========================================================================
|
|
222
|
+
/**
|
|
223
|
+
* Estimate the number of tokens in a string.
|
|
224
|
+
*
|
|
225
|
+
* Uses a simple character-based approximation. For more accurate counting,
|
|
226
|
+
* consider integrating tiktoken or a provider-specific tokenizer.
|
|
227
|
+
*
|
|
228
|
+
* @param text - Text to estimate tokens for
|
|
229
|
+
* @param provider - Provider name for provider-specific ratios
|
|
230
|
+
* @returns Estimated token count
|
|
231
|
+
*/
|
|
232
|
+
estimateTokens(text, provider = 'default') {
|
|
233
|
+
if (!text)
|
|
234
|
+
return 0;
|
|
235
|
+
const charsPerToken = CHARS_PER_TOKEN[provider] ?? CHARS_PER_TOKEN['default'] ?? 4.0;
|
|
236
|
+
return Math.ceil(text.length / charsPerToken);
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Estimate tokens for a content block.
|
|
240
|
+
*
|
|
241
|
+
* @param block - Content block to analyze
|
|
242
|
+
* @param provider - Provider name for estimation
|
|
243
|
+
* @returns Estimated token count
|
|
244
|
+
*/
|
|
245
|
+
estimateContentBlockTokens(block, provider = 'default') {
|
|
246
|
+
let tokens = 0;
|
|
247
|
+
const blockType = block.type;
|
|
248
|
+
switch (blockType) {
|
|
249
|
+
case 'text':
|
|
250
|
+
tokens = this.estimateTokens(block.text ?? '', provider);
|
|
251
|
+
break;
|
|
252
|
+
case 'tool_use': {
|
|
253
|
+
// Tool name + input JSON
|
|
254
|
+
// Use type assertion to access name property
|
|
255
|
+
const name = 'name' in block ? block.name : undefined;
|
|
256
|
+
tokens = this.estimateTokens(name ?? '', provider);
|
|
257
|
+
if (block.input) {
|
|
258
|
+
tokens += this.estimateTokens(JSON.stringify(block.input), provider);
|
|
259
|
+
}
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
case 'tool_result':
|
|
263
|
+
tokens = this.estimateTokens(block.content ?? '', provider);
|
|
264
|
+
break;
|
|
265
|
+
case 'image':
|
|
266
|
+
// Images are typically 85 tokens for low detail, up to 1445 for high detail
|
|
267
|
+
// Use a conservative estimate
|
|
268
|
+
tokens = 500;
|
|
269
|
+
break;
|
|
270
|
+
default:
|
|
271
|
+
// Unknown block type, estimate based on any text-like content
|
|
272
|
+
if ('text' in block && typeof block.text === 'string') {
|
|
273
|
+
tokens = this.estimateTokens(block.text, provider);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return tokens;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Estimate tokens for a message.
|
|
280
|
+
*
|
|
281
|
+
* @param message - Message to analyze
|
|
282
|
+
* @param provider - Provider name for estimation
|
|
283
|
+
* @returns Estimated token count
|
|
284
|
+
*/
|
|
285
|
+
estimateMessageTokens(message, provider = 'default') {
|
|
286
|
+
// Role token overhead (approximately 4 tokens for role markers)
|
|
287
|
+
let tokens = 4;
|
|
288
|
+
if (typeof message.content === 'string') {
|
|
289
|
+
tokens += this.estimateTokens(message.content, provider);
|
|
290
|
+
}
|
|
291
|
+
else if (Array.isArray(message.content)) {
|
|
292
|
+
for (const block of message.content) {
|
|
293
|
+
tokens += this.estimateContentBlockTokens(block, provider);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
return tokens;
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Estimate total tokens for an array of messages.
|
|
300
|
+
*
|
|
301
|
+
* @param messages - Messages to analyze
|
|
302
|
+
* @param provider - Provider name for estimation
|
|
303
|
+
* @returns Total estimated token count
|
|
304
|
+
*/
|
|
305
|
+
estimateMessagesTokens(messages, provider = 'default') {
|
|
306
|
+
let total = 0;
|
|
307
|
+
for (const message of messages) {
|
|
308
|
+
total += this.estimateMessageTokens(message, provider);
|
|
309
|
+
}
|
|
310
|
+
// Add overhead for message structure (approximately 3 tokens per message boundary)
|
|
311
|
+
total += messages.length * 3;
|
|
312
|
+
return total;
|
|
313
|
+
}
|
|
314
|
+
// ==========================================================================
|
|
315
|
+
// Public Methods - Model Limits
|
|
316
|
+
// ==========================================================================
|
|
317
|
+
/**
|
|
318
|
+
* Get the context limit for a specific model.
|
|
319
|
+
*
|
|
320
|
+
* Looks up the model in the configured limits, falling back to
|
|
321
|
+
* DEFAULT_MODEL_LIMITS, then to the default context limit.
|
|
322
|
+
*
|
|
323
|
+
* @param provider - Provider name
|
|
324
|
+
* @param model - Model name
|
|
325
|
+
* @returns Context limit in tokens
|
|
326
|
+
*/
|
|
327
|
+
getModelLimit(provider, model) {
|
|
328
|
+
// Check configured limits first
|
|
329
|
+
if (this.config.modelLimits[model]) {
|
|
330
|
+
return this.config.modelLimits[model];
|
|
331
|
+
}
|
|
332
|
+
// Check default limits
|
|
333
|
+
if (DEFAULT_MODEL_LIMITS[model]) {
|
|
334
|
+
return DEFAULT_MODEL_LIMITS[model];
|
|
335
|
+
}
|
|
336
|
+
// Try to match by prefix (e.g., 'gpt-4o-2024-05-13' matches 'gpt-4o')
|
|
337
|
+
for (const [knownModel, limit] of Object.entries(DEFAULT_MODEL_LIMITS)) {
|
|
338
|
+
if (model.startsWith(knownModel) || knownModel.startsWith(model)) {
|
|
339
|
+
return limit;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
// Provider-specific defaults
|
|
343
|
+
const providerDefaults = {
|
|
344
|
+
anthropic: 200000,
|
|
345
|
+
google: 1000000,
|
|
346
|
+
openai: 128000,
|
|
347
|
+
deepseek: 128000,
|
|
348
|
+
groq: 32768,
|
|
349
|
+
perplexity: 127072,
|
|
350
|
+
together: 32768,
|
|
351
|
+
fireworks: 32768,
|
|
352
|
+
ollama: 8192,
|
|
353
|
+
};
|
|
354
|
+
if (providerDefaults[provider]) {
|
|
355
|
+
this.logger.debug('Using provider default limit', {
|
|
356
|
+
provider,
|
|
357
|
+
model,
|
|
358
|
+
limit: providerDefaults[provider],
|
|
359
|
+
});
|
|
360
|
+
return providerDefaults[provider];
|
|
361
|
+
}
|
|
362
|
+
// Fall back to configured default
|
|
363
|
+
this.logger.warn('Unknown model, using default context limit', {
|
|
364
|
+
provider,
|
|
365
|
+
model,
|
|
366
|
+
limit: this.config.defaultContextLimit,
|
|
367
|
+
});
|
|
368
|
+
return this.config.defaultContextLimit;
|
|
369
|
+
}
|
|
370
|
+
// ==========================================================================
|
|
371
|
+
// Public Methods - Truncation
|
|
372
|
+
// ==========================================================================
|
|
373
|
+
/**
|
|
374
|
+
* Truncate content to fit within a token limit.
|
|
375
|
+
*
|
|
376
|
+
* @param content - Content to truncate
|
|
377
|
+
* @param maxTokens - Maximum tokens allowed
|
|
378
|
+
* @param options - Truncation options
|
|
379
|
+
* @returns Truncated content
|
|
380
|
+
*/
|
|
381
|
+
truncate(content, maxTokens, options) {
|
|
382
|
+
const result = this.truncateWithResult(content, maxTokens, options);
|
|
383
|
+
return result.content;
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Truncate content with detailed result information.
|
|
387
|
+
*
|
|
388
|
+
* @param content - Content to truncate
|
|
389
|
+
* @param maxTokens - Maximum tokens allowed
|
|
390
|
+
* @param options - Truncation options
|
|
391
|
+
* @returns Truncation result with metadata
|
|
392
|
+
*/
|
|
393
|
+
truncateWithResult(content, maxTokens, options) {
|
|
394
|
+
const opts = {
|
|
395
|
+
maxTokens,
|
|
396
|
+
position: options?.position ?? 'end',
|
|
397
|
+
suffix: options?.suffix ?? '... [truncated]',
|
|
398
|
+
prefix: options?.prefix ?? '',
|
|
399
|
+
preserveCodeBlocks: options?.preserveCodeBlocks ?? true,
|
|
400
|
+
};
|
|
401
|
+
const originalTokens = this.estimateTokens(content);
|
|
402
|
+
if (originalTokens <= maxTokens) {
|
|
403
|
+
return {
|
|
404
|
+
content,
|
|
405
|
+
originalTokens,
|
|
406
|
+
truncatedTokens: originalTokens,
|
|
407
|
+
wasTruncated: false,
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
// Calculate how many characters we can keep
|
|
411
|
+
const charsPerToken = CHARS_PER_TOKEN['default'] ?? 4.0;
|
|
412
|
+
const suffixTokens = this.estimateTokens(opts.suffix ?? '');
|
|
413
|
+
const prefixTokens = this.estimateTokens(opts.prefix ?? '');
|
|
414
|
+
const availableTokens = maxTokens - suffixTokens - prefixTokens;
|
|
415
|
+
const maxChars = Math.floor(availableTokens * charsPerToken);
|
|
416
|
+
let truncated;
|
|
417
|
+
switch (opts.position) {
|
|
418
|
+
case 'start':
|
|
419
|
+
truncated = opts.prefix + content.slice(-maxChars) + (opts.suffix ?? '');
|
|
420
|
+
break;
|
|
421
|
+
case 'middle': {
|
|
422
|
+
const halfChars = Math.floor(maxChars / 2);
|
|
423
|
+
const start = content.slice(0, halfChars);
|
|
424
|
+
const end = content.slice(-halfChars);
|
|
425
|
+
truncated = start + (opts.suffix ?? '') + end;
|
|
426
|
+
break;
|
|
427
|
+
}
|
|
428
|
+
case 'end':
|
|
429
|
+
default:
|
|
430
|
+
truncated = (opts.prefix ?? '') + content.slice(0, maxChars) + (opts.suffix ?? '');
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
const truncatedTokens = this.estimateTokens(truncated);
|
|
434
|
+
this.emit({
|
|
435
|
+
type: 'truncation_applied',
|
|
436
|
+
originalTokens,
|
|
437
|
+
truncatedTokens,
|
|
438
|
+
});
|
|
439
|
+
return {
|
|
440
|
+
content: truncated,
|
|
441
|
+
originalTokens,
|
|
442
|
+
truncatedTokens,
|
|
443
|
+
wasTruncated: true,
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
// ==========================================================================
|
|
447
|
+
// Public Methods - Compaction
|
|
448
|
+
// ==========================================================================
|
|
449
|
+
/**
|
|
450
|
+
* Compact messages to reduce context usage.
|
|
451
|
+
*
|
|
452
|
+
* Strategy:
|
|
453
|
+
* 1. Truncate long tool results
|
|
454
|
+
* 2. Truncate long individual messages
|
|
455
|
+
* 3. Summarize/drop older messages if still over limit
|
|
456
|
+
*
|
|
457
|
+
* @param messages - Messages to compact
|
|
458
|
+
* @param targetReduction - Target reduction percentage (0-1, default 0.3)
|
|
459
|
+
* @returns Compacted messages array
|
|
460
|
+
*/
|
|
461
|
+
compact(messages, targetReduction = 0.3) {
|
|
462
|
+
const originalTokens = this.estimateMessagesTokens(messages);
|
|
463
|
+
const targetTokens = originalTokens * (1 - targetReduction);
|
|
464
|
+
this.logger.info('Starting context compaction', {
|
|
465
|
+
originalTokens,
|
|
466
|
+
targetTokens,
|
|
467
|
+
messageCount: messages.length,
|
|
468
|
+
});
|
|
469
|
+
let compactedMessages = [...messages];
|
|
470
|
+
let currentTokens = originalTokens;
|
|
471
|
+
let strategy = 'truncate';
|
|
472
|
+
// Step 1: Truncate long tool results
|
|
473
|
+
compactedMessages = this.truncateToolResults(compactedMessages);
|
|
474
|
+
currentTokens = this.estimateMessagesTokens(compactedMessages);
|
|
475
|
+
if (currentTokens <= targetTokens) {
|
|
476
|
+
this.emitCompactionResult(originalTokens, currentTokens, messages.length, compactedMessages.length, strategy);
|
|
477
|
+
return compactedMessages;
|
|
478
|
+
}
|
|
479
|
+
// Step 2: Truncate long messages
|
|
480
|
+
compactedMessages = this.truncateLongMessages(compactedMessages);
|
|
481
|
+
currentTokens = this.estimateMessagesTokens(compactedMessages);
|
|
482
|
+
if (currentTokens <= targetTokens) {
|
|
483
|
+
this.emitCompactionResult(originalTokens, currentTokens, messages.length, compactedMessages.length, strategy);
|
|
484
|
+
return compactedMessages;
|
|
485
|
+
}
|
|
486
|
+
// Step 3: Drop older messages (keep first message as context, and recent messages)
|
|
487
|
+
strategy = 'hybrid';
|
|
488
|
+
const keepRecent = Math.max(4, Math.ceil(messages.length * 0.3));
|
|
489
|
+
const firstMessage = compactedMessages[0];
|
|
490
|
+
const recentMessages = compactedMessages.slice(-keepRecent);
|
|
491
|
+
if (firstMessage && compactedMessages.length > keepRecent + 1) {
|
|
492
|
+
// Keep first message (usually system context) and recent messages
|
|
493
|
+
compactedMessages = [firstMessage, ...recentMessages];
|
|
494
|
+
// Add a summary marker
|
|
495
|
+
const summaryMessage = {
|
|
496
|
+
role: 'assistant',
|
|
497
|
+
content: '[Previous conversation context has been summarized to reduce length]',
|
|
498
|
+
};
|
|
499
|
+
compactedMessages.splice(1, 0, summaryMessage);
|
|
500
|
+
}
|
|
501
|
+
currentTokens = this.estimateMessagesTokens(compactedMessages);
|
|
502
|
+
this.emitCompactionResult(originalTokens, currentTokens, messages.length, compactedMessages.length, strategy);
|
|
503
|
+
return compactedMessages;
|
|
504
|
+
}
|
|
505
|
+
// ==========================================================================
|
|
506
|
+
// Public Methods - Breakdown & Visualization
|
|
507
|
+
// ==========================================================================
|
|
508
|
+
/**
|
|
509
|
+
* Get a detailed breakdown of context usage by category.
|
|
510
|
+
*
|
|
511
|
+
* @param messages - Messages to analyze
|
|
512
|
+
* @param provider - Provider name for estimation
|
|
513
|
+
* @returns Context breakdown by category
|
|
514
|
+
*/
|
|
515
|
+
getBreakdown(messages, provider = 'default') {
|
|
516
|
+
const breakdown = {
|
|
517
|
+
systemPrompt: 0,
|
|
518
|
+
messages: 0,
|
|
519
|
+
toolResults: 0,
|
|
520
|
+
images: 0,
|
|
521
|
+
total: 0,
|
|
522
|
+
};
|
|
523
|
+
for (const message of messages) {
|
|
524
|
+
const isSystemPrompt = this.isSystemPromptMessage(message);
|
|
525
|
+
if (typeof message.content === 'string') {
|
|
526
|
+
const tokens = this.estimateTokens(message.content, provider);
|
|
527
|
+
if (isSystemPrompt) {
|
|
528
|
+
breakdown.systemPrompt += tokens;
|
|
529
|
+
}
|
|
530
|
+
else {
|
|
531
|
+
breakdown.messages += tokens;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
else if (Array.isArray(message.content)) {
|
|
535
|
+
for (const block of message.content) {
|
|
536
|
+
const tokens = this.estimateContentBlockTokens(block, provider);
|
|
537
|
+
if (block.type === 'tool_result') {
|
|
538
|
+
breakdown.toolResults += tokens;
|
|
539
|
+
}
|
|
540
|
+
else if (block.type === 'image') {
|
|
541
|
+
breakdown.images += tokens;
|
|
542
|
+
}
|
|
543
|
+
else if (isSystemPrompt) {
|
|
544
|
+
breakdown.systemPrompt += tokens;
|
|
545
|
+
}
|
|
546
|
+
else {
|
|
547
|
+
breakdown.messages += tokens;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
breakdown.total = breakdown.systemPrompt + breakdown.messages + breakdown.toolResults + breakdown.images;
|
|
553
|
+
return breakdown;
|
|
554
|
+
}
|
|
555
|
+
/**
|
|
556
|
+
* Generate a human-readable visualization of context usage.
|
|
557
|
+
*
|
|
558
|
+
* @param state - Current context state
|
|
559
|
+
* @param breakdown - Context breakdown by category
|
|
560
|
+
* @returns Formatted visualization string
|
|
561
|
+
*/
|
|
562
|
+
visualize(state, breakdown) {
|
|
563
|
+
const lines = [
|
|
564
|
+
'='.repeat(60),
|
|
565
|
+
'CONTEXT USAGE REPORT',
|
|
566
|
+
'='.repeat(60),
|
|
567
|
+
'',
|
|
568
|
+
`Provider: ${state.provider}`,
|
|
569
|
+
`Model: ${state.model}`,
|
|
570
|
+
'',
|
|
571
|
+
'--- Token Usage ---',
|
|
572
|
+
`Used: ${state.usedTokens.toLocaleString()} / ${state.maxTokens.toLocaleString()} (${state.usedPercentage.toFixed(1)}%)`,
|
|
573
|
+
`Remaining: ${state.remainingTokens.toLocaleString()} (${state.remainingPercentage.toFixed(1)}%)`,
|
|
574
|
+
'',
|
|
575
|
+
'--- Usage Breakdown ---',
|
|
576
|
+
`System Prompt: ${breakdown.systemPrompt.toLocaleString()} tokens (${this.formatPercentage(breakdown.systemPrompt, breakdown.total)})`,
|
|
577
|
+
`Messages: ${breakdown.messages.toLocaleString()} tokens (${this.formatPercentage(breakdown.messages, breakdown.total)})`,
|
|
578
|
+
`Tool Results: ${breakdown.toolResults.toLocaleString()} tokens (${this.formatPercentage(breakdown.toolResults, breakdown.total)})`,
|
|
579
|
+
`Images: ${breakdown.images.toLocaleString()} tokens (${this.formatPercentage(breakdown.images, breakdown.total)})`,
|
|
580
|
+
`Total: ${breakdown.total.toLocaleString()} tokens`,
|
|
581
|
+
'',
|
|
582
|
+
'--- Status ---',
|
|
583
|
+
`Near Limit: ${state.isNearLimit ? 'YES (consider compacting)' : 'No'}`,
|
|
584
|
+
`Blocked: ${state.isBlocked ? 'YES (compaction required)' : 'No'}`,
|
|
585
|
+
'',
|
|
586
|
+
this.generateUsageBar(state.usedPercentage),
|
|
587
|
+
'='.repeat(60),
|
|
588
|
+
];
|
|
589
|
+
return lines.join('\n');
|
|
590
|
+
}
|
|
591
|
+
// ==========================================================================
|
|
592
|
+
// Public Methods - Event Handling
|
|
593
|
+
// ==========================================================================
|
|
594
|
+
/**
|
|
595
|
+
* Add an event listener for context events.
|
|
596
|
+
*
|
|
597
|
+
* @param listener - Event listener function
|
|
598
|
+
*/
|
|
599
|
+
addEventListener(listener) {
|
|
600
|
+
this.listeners.add(listener);
|
|
601
|
+
}
|
|
602
|
+
/**
|
|
603
|
+
* Remove an event listener.
|
|
604
|
+
*
|
|
605
|
+
* @param listener - Event listener to remove
|
|
606
|
+
*/
|
|
607
|
+
removeEventListener(listener) {
|
|
608
|
+
this.listeners.delete(listener);
|
|
609
|
+
}
|
|
610
|
+
// ==========================================================================
|
|
611
|
+
// Private Methods
|
|
612
|
+
// ==========================================================================
|
|
613
|
+
/**
|
|
614
|
+
* Emit a context event to all listeners.
|
|
615
|
+
*/
|
|
616
|
+
emit(event) {
|
|
617
|
+
for (const listener of this.listeners) {
|
|
618
|
+
try {
|
|
619
|
+
listener(event);
|
|
620
|
+
}
|
|
621
|
+
catch (err) {
|
|
622
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
623
|
+
this.logger.error('Error in context event listener', { error });
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Truncate tool results that exceed the configured limit.
|
|
629
|
+
*/
|
|
630
|
+
truncateToolResults(messages) {
|
|
631
|
+
return messages.map((message) => {
|
|
632
|
+
if (typeof message.content === 'string') {
|
|
633
|
+
return message;
|
|
634
|
+
}
|
|
635
|
+
const truncatedContent = message.content.map((block) => {
|
|
636
|
+
if (block.type === 'tool_result' && block.content) {
|
|
637
|
+
const tokens = this.estimateTokens(block.content);
|
|
638
|
+
if (tokens > this.config.maxToolResultTokens) {
|
|
639
|
+
return {
|
|
640
|
+
...block,
|
|
641
|
+
content: this.truncate(block.content, this.config.maxToolResultTokens),
|
|
642
|
+
};
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
return block;
|
|
646
|
+
});
|
|
647
|
+
return { ...message, content: truncatedContent };
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* Truncate individual messages that exceed the configured limit.
|
|
652
|
+
*/
|
|
653
|
+
truncateLongMessages(messages) {
|
|
654
|
+
return messages.map((message) => {
|
|
655
|
+
if (typeof message.content === 'string') {
|
|
656
|
+
const tokens = this.estimateTokens(message.content);
|
|
657
|
+
if (tokens > this.config.maxMessageTokens) {
|
|
658
|
+
return {
|
|
659
|
+
...message,
|
|
660
|
+
content: this.truncate(message.content, this.config.maxMessageTokens),
|
|
661
|
+
};
|
|
662
|
+
}
|
|
663
|
+
return message;
|
|
664
|
+
}
|
|
665
|
+
// For content blocks, truncate text blocks that are too long
|
|
666
|
+
const truncatedContent = message.content.map((block) => {
|
|
667
|
+
if (block.type === 'text' && block.text) {
|
|
668
|
+
const tokens = this.estimateTokens(block.text);
|
|
669
|
+
if (tokens > this.config.maxMessageTokens) {
|
|
670
|
+
return {
|
|
671
|
+
...block,
|
|
672
|
+
text: this.truncate(block.text, this.config.maxMessageTokens),
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
return block;
|
|
677
|
+
});
|
|
678
|
+
return { ...message, content: truncatedContent };
|
|
679
|
+
});
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* Check if a message is a system prompt.
|
|
683
|
+
*/
|
|
684
|
+
isSystemPromptMessage(message) {
|
|
685
|
+
// Check for explicit metadata
|
|
686
|
+
if ('metadata' in message && message.metadata?.isSystemPrompt) {
|
|
687
|
+
return true;
|
|
688
|
+
}
|
|
689
|
+
// Check for system role
|
|
690
|
+
if (message.role === 'system') {
|
|
691
|
+
return true;
|
|
692
|
+
}
|
|
693
|
+
// Check for <system> tags in content
|
|
694
|
+
if (typeof message.content === 'string') {
|
|
695
|
+
return message.content.includes('<system>') || message.content.includes('</system>');
|
|
696
|
+
}
|
|
697
|
+
return false;
|
|
698
|
+
}
|
|
699
|
+
/**
|
|
700
|
+
* Emit a compaction result event.
|
|
701
|
+
*/
|
|
702
|
+
emitCompactionResult(originalTokens, compactedTokens, originalCount, compactedCount, strategy) {
|
|
703
|
+
const result = {
|
|
704
|
+
originalTokens,
|
|
705
|
+
compactedTokens,
|
|
706
|
+
tokensSaved: originalTokens - compactedTokens,
|
|
707
|
+
reductionPercentage: ((originalTokens - compactedTokens) / originalTokens) * 100,
|
|
708
|
+
originalMessageCount: originalCount,
|
|
709
|
+
compactedMessageCount: compactedCount,
|
|
710
|
+
strategy,
|
|
711
|
+
};
|
|
712
|
+
this.logger.info('Context compaction completed', {
|
|
713
|
+
tokensSaved: result.tokensSaved,
|
|
714
|
+
reductionPercentage: result.reductionPercentage.toFixed(1),
|
|
715
|
+
strategy,
|
|
716
|
+
});
|
|
717
|
+
this.emit({ type: 'compact_completed', result });
|
|
718
|
+
}
|
|
719
|
+
/**
|
|
720
|
+
* Format a percentage for display.
|
|
721
|
+
*/
|
|
722
|
+
formatPercentage(value, total) {
|
|
723
|
+
if (total === 0)
|
|
724
|
+
return '0.0%';
|
|
725
|
+
return ((value / total) * 100).toFixed(1) + '%';
|
|
726
|
+
}
|
|
727
|
+
/**
|
|
728
|
+
* Generate a visual progress bar for context usage.
|
|
729
|
+
*/
|
|
730
|
+
generateUsageBar(percentage) {
|
|
731
|
+
const width = 50;
|
|
732
|
+
const filled = Math.round((percentage / 100) * width);
|
|
733
|
+
const empty = width - filled;
|
|
734
|
+
let bar;
|
|
735
|
+
if (percentage >= 98) {
|
|
736
|
+
bar = '!'.repeat(filled) + '-'.repeat(empty);
|
|
737
|
+
}
|
|
738
|
+
else if (percentage >= 90) {
|
|
739
|
+
bar = '#'.repeat(filled) + '-'.repeat(empty);
|
|
740
|
+
}
|
|
741
|
+
else {
|
|
742
|
+
bar = '='.repeat(filled) + '-'.repeat(empty);
|
|
743
|
+
}
|
|
744
|
+
return `[${bar}] ${percentage.toFixed(1)}%`;
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
// ============================================================================
|
|
748
|
+
// Factory Function
|
|
749
|
+
// ============================================================================
|
|
750
|
+
/**
|
|
751
|
+
* Create a ContextManager instance.
|
|
752
|
+
*
|
|
753
|
+
* @param config - Partial configuration (merged with defaults)
|
|
754
|
+
* @param logger - Logger instance
|
|
755
|
+
* @returns Configured ContextManager instance
|
|
756
|
+
*/
|
|
757
|
+
export function createContextManager(config, logger) {
|
|
758
|
+
return new ContextManager(config, logger);
|
|
759
|
+
}
|
|
3
760
|
//# sourceMappingURL=context-manager.js.map
|