lynkr 3.0.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -21
- package/README.md +626 -145
- package/docs/index.md +150 -18
- package/install.sh +63 -16
- package/package.json +2 -2
- package/scripts/setup.js +117 -43
- package/src/api/router.js +78 -0
- package/src/clients/openrouter-utils.js +51 -7
- package/src/config/index.js +51 -0
- package/src/context/budget.js +326 -0
- package/src/context/compression.js +397 -0
- package/src/memory/format.js +156 -0
- package/src/memory/retriever.js +55 -14
- package/src/memory/search.js +36 -12
- package/src/memory/store.js +61 -13
- package/src/memory/surprise.js +56 -15
- package/src/orchestrator/index.js +189 -2
- package/src/prompts/system.js +320 -0
- package/src/tools/index.js +9 -0
- package/src/tools/smart-selection.js +356 -0
- package/src/tools/truncate.js +105 -0
- package/src/utils/tokens.js +217 -0
- package/test/llamacpp-integration.test.js +198 -0
- package/test/memory/extractor.test.js +34 -6
- package/test/memory/retriever.test.js +45 -15
- package/test/memory/retriever.test.js.bak +585 -0
- package/test/memory/search.test.js +160 -12
- package/test/memory/search.test.js.bak +389 -0
- package/test/memory/store.test.js +57 -25
- package/test/memory/store.test.js.bak +312 -0
- package/test/memory/surprise.test.js +1 -1
package/src/api/router.js
CHANGED
|
@@ -305,4 +305,82 @@ router.get("/v1/agents/:executionId", (req, res) => {
|
|
|
305
305
|
}
|
|
306
306
|
});
|
|
307
307
|
|
|
308
|
+
// Token usage statistics for a session
|
|
309
|
+
router.get("/api/sessions/:sessionId/tokens", (req, res) => {
|
|
310
|
+
try {
|
|
311
|
+
const tokens = require("../utils/tokens");
|
|
312
|
+
const { sessionId } = req.params;
|
|
313
|
+
const session = getSession(sessionId);
|
|
314
|
+
|
|
315
|
+
if (!session) {
|
|
316
|
+
return res.status(404).json({ error: "Session not found" });
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const stats = tokens.getSessionTokenStats(session);
|
|
320
|
+
|
|
321
|
+
res.json({
|
|
322
|
+
sessionId,
|
|
323
|
+
stats: {
|
|
324
|
+
turns: stats.turns,
|
|
325
|
+
totalTokens: stats.totalTokens,
|
|
326
|
+
totalCost: parseFloat(stats.totalCost.toFixed(4)),
|
|
327
|
+
averageTokensPerTurn: stats.averageTokensPerTurn,
|
|
328
|
+
cacheHitRate: parseFloat(stats.cacheHitRate) + '%'
|
|
329
|
+
},
|
|
330
|
+
breakdown: stats.breakdown.map(turn => ({
|
|
331
|
+
turn: turn.turn,
|
|
332
|
+
timestamp: turn.timestamp,
|
|
333
|
+
model: turn.model,
|
|
334
|
+
estimated: turn.estimated.total,
|
|
335
|
+
actual: {
|
|
336
|
+
input: turn.actual.inputTokens,
|
|
337
|
+
output: turn.actual.outputTokens,
|
|
338
|
+
cached: turn.actual.cacheReadTokens,
|
|
339
|
+
total: turn.actual.totalTokens
|
|
340
|
+
},
|
|
341
|
+
cost: parseFloat(turn.cost.total.toFixed(6))
|
|
342
|
+
}))
|
|
343
|
+
});
|
|
344
|
+
} catch (error) {
|
|
345
|
+
res.status(500).json({ error: error.message });
|
|
346
|
+
}
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
// Global token usage statistics (all sessions)
|
|
350
|
+
router.get("/api/tokens/stats", (req, res) => {
|
|
351
|
+
try {
|
|
352
|
+
const tokens = require("../utils/tokens");
|
|
353
|
+
const { getAllSessions } = require("../sessions");
|
|
354
|
+
const allSessions = getAllSessions();
|
|
355
|
+
|
|
356
|
+
let totalTokens = 0;
|
|
357
|
+
let totalCost = 0;
|
|
358
|
+
let totalTurns = 0;
|
|
359
|
+
let totalSessions = 0;
|
|
360
|
+
|
|
361
|
+
for (const session of allSessions) {
|
|
362
|
+
const stats = tokens.getSessionTokenStats(session);
|
|
363
|
+
if (stats.turns > 0) {
|
|
364
|
+
totalTokens += stats.totalTokens;
|
|
365
|
+
totalCost += stats.totalCost;
|
|
366
|
+
totalTurns += stats.turns;
|
|
367
|
+
totalSessions++;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
res.json({
|
|
372
|
+
global: {
|
|
373
|
+
sessions: totalSessions,
|
|
374
|
+
turns: totalTurns,
|
|
375
|
+
totalTokens,
|
|
376
|
+
totalCost: parseFloat(totalCost.toFixed(4)),
|
|
377
|
+
averageTokensPerTurn: totalTurns > 0 ? Math.round(totalTokens / totalTurns) : 0,
|
|
378
|
+
averageTokensPerSession: totalSessions > 0 ? Math.round(totalTokens / totalSessions) : 0
|
|
379
|
+
}
|
|
380
|
+
});
|
|
381
|
+
} catch (error) {
|
|
382
|
+
res.status(500).json({ error: error.message });
|
|
383
|
+
}
|
|
384
|
+
});
|
|
385
|
+
|
|
308
386
|
module.exports = router;
|
|
@@ -244,16 +244,60 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
|
|
|
244
244
|
const message = choice.message || {};
|
|
245
245
|
const contentBlocks = [];
|
|
246
246
|
|
|
247
|
-
//
|
|
247
|
+
// Check if there are tool calls present
|
|
248
|
+
const hasToolCalls = Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
|
|
249
|
+
|
|
250
|
+
// Helper function to detect if content is a JSON representation of a tool call
|
|
251
|
+
// Some models (like llama.cpp) may output tool calls in both content AND tool_calls
|
|
252
|
+
const isToolCallJson = (text) => {
|
|
253
|
+
if (!text) return false;
|
|
254
|
+
const trimmed = text.trim();
|
|
255
|
+
// Check if it looks like a JSON object containing tool/function call
|
|
256
|
+
// Matches various formats:
|
|
257
|
+
// - {"type": "function", "function": {"name": "X", "parameters": {...}}}
|
|
258
|
+
// - {"function": "X", "parameters": {...}}
|
|
259
|
+
// - {"tool": "X", "input": {...}}
|
|
260
|
+
return (trimmed.startsWith('{') || trimmed.startsWith('[')) &&
|
|
261
|
+
(trimmed.includes('"function"') || trimmed.includes('"tool"') ||
|
|
262
|
+
(trimmed.includes('"type"') && trimmed.includes('"parameters"'))) &&
|
|
263
|
+
(trimmed.includes('"parameters"') || trimmed.includes('"input"') ||
|
|
264
|
+
trimmed.includes('"arguments"'));
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
// Add text content if present, but skip if it's a duplicate/malformed tool call JSON
|
|
248
268
|
if (message.content && message.content.trim()) {
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
269
|
+
const looksLikeToolJson = isToolCallJson(message.content);
|
|
270
|
+
|
|
271
|
+
// Skip content in two cases:
|
|
272
|
+
// 1. We have proper tool_calls AND content duplicates them (original fix)
|
|
273
|
+
// 2. Content looks like tool call JSON but we DON'T have tool_calls
|
|
274
|
+
// (model incorrectly output JSON instead of structured tool_calls)
|
|
275
|
+
if (looksLikeToolJson) {
|
|
276
|
+
if (hasToolCalls) {
|
|
277
|
+
// Case 1: Duplicate - model provided both content and tool_calls
|
|
278
|
+
logger.debug({
|
|
279
|
+
contentPreview: message.content.substring(0, 100),
|
|
280
|
+
toolCallCount: message.tool_calls.length
|
|
281
|
+
}, "Skipping text content that duplicates tool_calls (llama.cpp quirk)");
|
|
282
|
+
} else {
|
|
283
|
+
// Case 2: Malformed - model only provided JSON in content, not structured tool_calls
|
|
284
|
+
// This is a model error - it should have used tool_calls, not raw JSON
|
|
285
|
+
logger.warn({
|
|
286
|
+
contentPreview: message.content.substring(0, 200)
|
|
287
|
+
}, "Model output tool call as JSON text instead of structured tool_calls - filtering out malformed output");
|
|
288
|
+
}
|
|
289
|
+
// Skip this content block in both cases
|
|
290
|
+
} else {
|
|
291
|
+
// Normal text content - include it
|
|
292
|
+
contentBlocks.push({
|
|
293
|
+
type: "text",
|
|
294
|
+
text: message.content
|
|
295
|
+
});
|
|
296
|
+
}
|
|
253
297
|
}
|
|
254
298
|
|
|
255
299
|
// Add tool calls if present
|
|
256
|
-
if (
|
|
300
|
+
if (hasToolCalls) {
|
|
257
301
|
for (const toolCall of message.tool_calls) {
|
|
258
302
|
const func = toolCall.function || {};
|
|
259
303
|
let input = {};
|
|
@@ -288,7 +332,7 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
|
|
|
288
332
|
|
|
289
333
|
// Determine stop reason
|
|
290
334
|
let stopReason = "end_turn";
|
|
291
|
-
if (
|
|
335
|
+
if (hasToolCalls) {
|
|
292
336
|
stopReason = "tool_use";
|
|
293
337
|
} else if (choice.finish_reason === "length") {
|
|
294
338
|
stopReason = "max_tokens";
|
package/src/config/index.js
CHANGED
|
@@ -135,6 +135,28 @@ const memoryExtractionEnabled = process.env.MEMORY_EXTRACTION_ENABLED !== "false
|
|
|
135
135
|
const memoryDecayEnabled = process.env.MEMORY_DECAY_ENABLED !== "false"; // default true
|
|
136
136
|
const memoryDecayHalfLifeDays = Number.parseInt(process.env.MEMORY_DECAY_HALF_LIFE ?? "30", 10);
|
|
137
137
|
|
|
138
|
+
// Token optimization settings
|
|
139
|
+
const tokenTrackingEnabled = process.env.TOKEN_TRACKING_ENABLED !== "false"; // default true
|
|
140
|
+
const toolTruncationEnabled = process.env.TOOL_TRUNCATION_ENABLED !== "false"; // default true
|
|
141
|
+
const memoryFormat = (process.env.MEMORY_FORMAT ?? "compact").toLowerCase();
|
|
142
|
+
const memoryDedupEnabled = process.env.MEMORY_DEDUP_ENABLED !== "false"; // default true
|
|
143
|
+
const memoryDedupLookback = Number.parseInt(process.env.MEMORY_DEDUP_LOOKBACK ?? "5", 10);
|
|
144
|
+
const systemPromptMode = (process.env.SYSTEM_PROMPT_MODE ?? "dynamic").toLowerCase();
|
|
145
|
+
const toolDescriptions = (process.env.TOOL_DESCRIPTIONS ?? "minimal").toLowerCase();
|
|
146
|
+
const historyCompressionEnabled = process.env.HISTORY_COMPRESSION_ENABLED !== "false"; // default true
|
|
147
|
+
const historyKeepRecentTurns = Number.parseInt(process.env.HISTORY_KEEP_RECENT_TURNS ?? "10", 10);
|
|
148
|
+
const historySummarizeOlder = process.env.HISTORY_SUMMARIZE_OLDER !== "false"; // default true
|
|
149
|
+
const tokenBudgetWarning = Number.parseInt(process.env.TOKEN_BUDGET_WARNING ?? "100000", 10);
|
|
150
|
+
const tokenBudgetMax = Number.parseInt(process.env.TOKEN_BUDGET_MAX ?? "180000", 10);
|
|
151
|
+
const tokenBudgetEnforcement = process.env.TOKEN_BUDGET_ENFORCEMENT !== "false"; // default true
|
|
152
|
+
|
|
153
|
+
// Smart tool selection configuration (always enabled)
|
|
154
|
+
const smartToolSelectionMode = (process.env.SMART_TOOL_SELECTION_MODE ?? "heuristic").toLowerCase();
|
|
155
|
+
const smartToolSelectionTokenBudget = Number.parseInt(
|
|
156
|
+
process.env.SMART_TOOL_SELECTION_TOKEN_BUDGET ?? "2500",
|
|
157
|
+
10
|
|
158
|
+
);
|
|
159
|
+
|
|
138
160
|
// Only require Databricks credentials if it's the primary provider or used as fallback
|
|
139
161
|
if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
|
|
140
162
|
throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
|
|
@@ -531,6 +553,9 @@ const config = {
|
|
|
531
553
|
injectionFormat: ["system", "assistant_preamble"].includes(memoryInjectionFormat)
|
|
532
554
|
? memoryInjectionFormat
|
|
533
555
|
: "system",
|
|
556
|
+
format: memoryFormat,
|
|
557
|
+
dedupEnabled: memoryDedupEnabled,
|
|
558
|
+
dedupLookback: memoryDedupLookback,
|
|
534
559
|
extraction: {
|
|
535
560
|
enabled: memoryExtractionEnabled,
|
|
536
561
|
},
|
|
@@ -539,6 +564,32 @@ const config = {
|
|
|
539
564
|
halfLifeDays: Number.isNaN(memoryDecayHalfLifeDays) ? 30 : memoryDecayHalfLifeDays,
|
|
540
565
|
},
|
|
541
566
|
},
|
|
567
|
+
tokenTracking: {
|
|
568
|
+
enabled: tokenTrackingEnabled,
|
|
569
|
+
},
|
|
570
|
+
toolTruncation: {
|
|
571
|
+
enabled: toolTruncationEnabled,
|
|
572
|
+
},
|
|
573
|
+
systemPrompt: {
|
|
574
|
+
mode: systemPromptMode,
|
|
575
|
+
toolDescriptions: toolDescriptions,
|
|
576
|
+
},
|
|
577
|
+
historyCompression: {
|
|
578
|
+
enabled: historyCompressionEnabled,
|
|
579
|
+
keepRecentTurns: historyKeepRecentTurns,
|
|
580
|
+
summarizeOlder: historySummarizeOlder,
|
|
581
|
+
},
|
|
582
|
+
tokenBudget: {
|
|
583
|
+
warning: tokenBudgetWarning,
|
|
584
|
+
max: tokenBudgetMax,
|
|
585
|
+
enforcement: tokenBudgetEnforcement,
|
|
586
|
+
},
|
|
587
|
+
smartToolSelection: {
|
|
588
|
+
enabled: true, // HARDCODED - always enabled
|
|
589
|
+
mode: smartToolSelectionMode,
|
|
590
|
+
tokenBudget: smartToolSelectionTokenBudget,
|
|
591
|
+
minimalMode: false, // HARDCODED - disabled
|
|
592
|
+
},
|
|
542
593
|
};
|
|
543
594
|
|
|
544
595
|
module.exports = config;
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Budget Management
|
|
3
|
+
*
|
|
4
|
+
* Enforces token budgets and applies adaptive compression
|
|
5
|
+
* when payloads approach or exceed limits.
|
|
6
|
+
*
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const logger = require('../logger');
|
|
10
|
+
const config = require('../config');
|
|
11
|
+
const tokens = require('../utils/tokens');
|
|
12
|
+
const historyCompression = require('./compression');
|
|
13
|
+
const systemPrompt = require('../prompts/system');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Check if payload exceeds token budget
|
|
17
|
+
*
|
|
18
|
+
* @param {Object} payload - Request payload
|
|
19
|
+
* @param {number} warningThreshold - Warning threshold (tokens)
|
|
20
|
+
* @param {number} maxThreshold - Maximum threshold (tokens)
|
|
21
|
+
* @returns {Object} Budget check result
|
|
22
|
+
*/
|
|
23
|
+
function checkBudget(payload, warningThreshold = null, maxThreshold = null) {
|
|
24
|
+
warningThreshold = warningThreshold ?? config.tokenBudget?.warning ?? 100000;
|
|
25
|
+
maxThreshold = maxThreshold ?? config.tokenBudget?.max ?? 180000;
|
|
26
|
+
|
|
27
|
+
const estimated = tokens.countPayloadTokens(payload);
|
|
28
|
+
const totalTokens = estimated.total;
|
|
29
|
+
|
|
30
|
+
const warningLevel = totalTokens / warningThreshold;
|
|
31
|
+
const maxLevel = totalTokens / maxThreshold;
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
estimated,
|
|
35
|
+
totalTokens,
|
|
36
|
+
warningThreshold,
|
|
37
|
+
maxThreshold,
|
|
38
|
+
atWarning: totalTokens >= warningThreshold,
|
|
39
|
+
overMax: totalTokens >= maxThreshold,
|
|
40
|
+
warningLevel: warningLevel.toFixed(2),
|
|
41
|
+
maxLevel: maxLevel.toFixed(2),
|
|
42
|
+
needsCompression: totalTokens >= warningThreshold
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Enforce token budget with adaptive compression
|
|
48
|
+
*
|
|
49
|
+
* Applies progressively aggressive compression strategies
|
|
50
|
+
* until payload fits within budget.
|
|
51
|
+
*
|
|
52
|
+
* @param {Object} payload - Request payload
|
|
53
|
+
* @param {Object} options - Budget options
|
|
54
|
+
* @returns {Object} Optimized payload and statistics
|
|
55
|
+
*/
|
|
56
|
+
function enforceBudget(payload, options = {}) {
|
|
57
|
+
const opts = {
|
|
58
|
+
warningThreshold: options.warningThreshold ?? config.tokenBudget?.warning ?? 100000,
|
|
59
|
+
maxThreshold: options.maxThreshold ?? config.tokenBudget?.max ?? 180000,
|
|
60
|
+
enforcement: options.enforcement ?? config.tokenBudget?.enforcement ?? true,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
if (!opts.enforcement) {
|
|
64
|
+
return { payload, compressed: false, strategy: 'none' };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const initialCheck = checkBudget(payload, opts.warningThreshold, opts.maxThreshold);
|
|
68
|
+
|
|
69
|
+
if (!initialCheck.needsCompression) {
|
|
70
|
+
return {
|
|
71
|
+
payload,
|
|
72
|
+
compressed: false,
|
|
73
|
+
strategy: 'none',
|
|
74
|
+
budget: initialCheck
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Clone payload to avoid modifying original
|
|
79
|
+
let optimized = JSON.parse(JSON.stringify(payload));
|
|
80
|
+
let strategy = [];
|
|
81
|
+
|
|
82
|
+
logger.info({
|
|
83
|
+
initialTokens: initialCheck.totalTokens,
|
|
84
|
+
warningThreshold: opts.warningThreshold,
|
|
85
|
+
maxThreshold: opts.maxThreshold,
|
|
86
|
+
overBudget: initialCheck.totalTokens - opts.maxThreshold
|
|
87
|
+
}, 'Token budget exceeded, applying adaptive compression');
|
|
88
|
+
|
|
89
|
+
// Strategy 1: Compress history more aggressively
|
|
90
|
+
if (optimized.messages && optimized.messages.length > 10) {
|
|
91
|
+
const originalMessages = optimized.messages;
|
|
92
|
+
optimized.messages = historyCompression.compressHistory(originalMessages, {
|
|
93
|
+
keepRecentTurns: 5, // More aggressive: keep only 5 recent
|
|
94
|
+
summarizeOlder: true,
|
|
95
|
+
enabled: true
|
|
96
|
+
});
|
|
97
|
+
strategy.push('aggressive_history_compression');
|
|
98
|
+
|
|
99
|
+
const afterHistory = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
|
|
100
|
+
if (!afterHistory.overMax) {
|
|
101
|
+
return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterHistory);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Strategy 2: Further compress history (keep only 3 turns)
|
|
106
|
+
if (optimized.messages && optimized.messages.length > 5) {
|
|
107
|
+
const originalMessages = optimized.messages;
|
|
108
|
+
optimized.messages = historyCompression.compressHistory(originalMessages, {
|
|
109
|
+
keepRecentTurns: 3, // Very aggressive: keep only 3
|
|
110
|
+
summarizeOlder: true,
|
|
111
|
+
enabled: true
|
|
112
|
+
});
|
|
113
|
+
strategy.push('extreme_history_compression');
|
|
114
|
+
|
|
115
|
+
const afterExtreme = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
|
|
116
|
+
if (!afterExtreme.overMax) {
|
|
117
|
+
return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterExtreme);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Strategy 3: Compress system prompt aggressively
|
|
122
|
+
if (optimized.system) {
|
|
123
|
+
const originalSystem = optimized.system;
|
|
124
|
+
optimized.system = compressSystemPromptAggressively(originalSystem, optimized);
|
|
125
|
+
strategy.push('aggressive_system_compression');
|
|
126
|
+
|
|
127
|
+
const afterSystem = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
|
|
128
|
+
if (!afterSystem.overMax) {
|
|
129
|
+
return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterSystem);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Strategy 4: Remove tool descriptions entirely (keep only names/schemas)
|
|
134
|
+
if (optimized.tools && optimized.tools.length > 0) {
|
|
135
|
+
optimized.tools = optimized.tools.map(tool => ({
|
|
136
|
+
name: tool.name,
|
|
137
|
+
input_schema: tool.input_schema
|
|
138
|
+
// Remove description entirely
|
|
139
|
+
}));
|
|
140
|
+
strategy.push('remove_tool_descriptions');
|
|
141
|
+
|
|
142
|
+
const afterTools = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
|
|
143
|
+
if (!afterTools.overMax) {
|
|
144
|
+
return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterTools);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Strategy 5: Reduce tools to essential only
|
|
149
|
+
if (optimized.tools && optimized.tools.length > 5) {
|
|
150
|
+
const essentialTools = ['Read', 'Write', 'Edit', 'Bash', 'Grep'];
|
|
151
|
+
optimized.tools = optimized.tools.filter(t => essentialTools.includes(t.name));
|
|
152
|
+
strategy.push('reduce_to_essential_tools');
|
|
153
|
+
|
|
154
|
+
const afterToolReduction = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
|
|
155
|
+
if (!afterToolReduction.overMax) {
|
|
156
|
+
return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterToolReduction);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Strategy 6: Last resort - truncate system prompt
|
|
161
|
+
if (optimized.system) {
|
|
162
|
+
const systemText = typeof optimized.system === 'string'
|
|
163
|
+
? optimized.system
|
|
164
|
+
: systemPrompt.flattenBlocks(optimized.system);
|
|
165
|
+
|
|
166
|
+
optimized.system = systemText.substring(0, 5000) + '\n\n[System prompt truncated due to token budget]';
|
|
167
|
+
strategy.push('truncate_system_prompt');
|
|
168
|
+
|
|
169
|
+
const afterTruncate = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
|
|
170
|
+
if (!afterTruncate.overMax) {
|
|
171
|
+
return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterTruncate);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Final check
|
|
176
|
+
const finalCheck = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
|
|
177
|
+
|
|
178
|
+
if (finalCheck.overMax) {
|
|
179
|
+
logger.error({
|
|
180
|
+
initialTokens: initialCheck.totalTokens,
|
|
181
|
+
finalTokens: finalCheck.totalTokens,
|
|
182
|
+
maxThreshold: opts.maxThreshold,
|
|
183
|
+
strategiesApplied: strategy
|
|
184
|
+
}, 'Failed to compress payload within token budget');
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, finalCheck);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Finalize budget enforcement and return results
|
|
192
|
+
*/
|
|
193
|
+
function finalizeBudgetEnforcement(original, optimized, strategy, initialCheck, finalCheck) {
|
|
194
|
+
const saved = initialCheck.totalTokens - finalCheck.totalTokens;
|
|
195
|
+
const percentage = initialCheck.totalTokens > 0
|
|
196
|
+
? ((saved / initialCheck.totalTokens) * 100).toFixed(1)
|
|
197
|
+
: '0.0';
|
|
198
|
+
|
|
199
|
+
logger.info({
|
|
200
|
+
strategiesApplied: strategy,
|
|
201
|
+
initialTokens: initialCheck.totalTokens,
|
|
202
|
+
finalTokens: finalCheck.totalTokens,
|
|
203
|
+
saved,
|
|
204
|
+
percentage,
|
|
205
|
+
nowWithinBudget: !finalCheck.overMax
|
|
206
|
+
}, 'Budget enforcement completed');
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
payload: optimized,
|
|
210
|
+
compressed: true,
|
|
211
|
+
strategy: strategy.join(' -> '),
|
|
212
|
+
initialBudget: initialCheck,
|
|
213
|
+
finalBudget: finalCheck,
|
|
214
|
+
stats: {
|
|
215
|
+
initialTokens: initialCheck.totalTokens,
|
|
216
|
+
finalTokens: finalCheck.totalTokens,
|
|
217
|
+
saved,
|
|
218
|
+
percentage
|
|
219
|
+
}
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Compress system prompt aggressively for budget enforcement
|
|
225
|
+
*/
|
|
226
|
+
function compressSystemPromptAggressively(systemPromptContent, payload) {
|
|
227
|
+
let text = typeof systemPromptContent === 'string'
|
|
228
|
+
? systemPromptContent
|
|
229
|
+
: systemPrompt.flattenBlocks(systemPromptContent);
|
|
230
|
+
|
|
231
|
+
// Remove all examples
|
|
232
|
+
text = text.replace(/<example>[\s\S]*?<\/example>/g, '');
|
|
233
|
+
|
|
234
|
+
// Remove verbose sections
|
|
235
|
+
text = text.replace(/# (Background|Context|Examples|Notes|Tips|Guidelines)[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, '');
|
|
236
|
+
|
|
237
|
+
// Remove excessive whitespace
|
|
238
|
+
text = text.replace(/\n{3,}/g, '\n\n');
|
|
239
|
+
text = text.replace(/[ \t]+\n/g, '\n');
|
|
240
|
+
|
|
241
|
+
// Remove comments
|
|
242
|
+
text = text.replace(/<!--[\s\S]*?-->/g, '');
|
|
243
|
+
|
|
244
|
+
return text;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Get budget allocation for different sections
|
|
249
|
+
*
|
|
250
|
+
* Helps prioritize token allocation across system/tools/messages
|
|
251
|
+
*
|
|
252
|
+
* @param {number} totalBudget - Total token budget
|
|
253
|
+
* @returns {Object} Allocation breakdown
|
|
254
|
+
*/
|
|
255
|
+
function getAllocation(totalBudget = 180000) {
|
|
256
|
+
return {
|
|
257
|
+
system: Math.floor(totalBudget * 0.15), // 15% for system prompt
|
|
258
|
+
tools: Math.floor(totalBudget * 0.10), // 10% for tool definitions
|
|
259
|
+
messages: Math.floor(totalBudget * 0.60), // 60% for message history
|
|
260
|
+
output: Math.floor(totalBudget * 0.15), // 15% reserved for output
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Analyze budget usage breakdown
|
|
266
|
+
*
|
|
267
|
+
* @param {Object} payload - Request payload
|
|
268
|
+
* @returns {Object} Budget breakdown analysis
|
|
269
|
+
*/
|
|
270
|
+
function analyzeBudgetUsage(payload) {
|
|
271
|
+
const breakdown = tokens.countPayloadTokens(payload);
|
|
272
|
+
|
|
273
|
+
const total = breakdown.total;
|
|
274
|
+
const allocation = getAllocation(config.tokenBudget?.max ?? 180000);
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
usage: breakdown,
|
|
278
|
+
allocation,
|
|
279
|
+
percentages: {
|
|
280
|
+
system: total > 0 ? ((breakdown.system / total) * 100).toFixed(1) : '0.0',
|
|
281
|
+
tools: total > 0 ? ((breakdown.tools / total) * 100).toFixed(1) : '0.0',
|
|
282
|
+
messages: total > 0 ? ((breakdown.messages / total) * 100).toFixed(1) : '0.0',
|
|
283
|
+
},
|
|
284
|
+
recommendations: generateRecommendations(breakdown, allocation)
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Generate recommendations based on budget usage
|
|
290
|
+
*/
|
|
291
|
+
function generateRecommendations(breakdown, allocation) {
|
|
292
|
+
const recommendations = [];
|
|
293
|
+
|
|
294
|
+
if (breakdown.system > allocation.system) {
|
|
295
|
+
recommendations.push({
|
|
296
|
+
section: 'system',
|
|
297
|
+
issue: 'System prompt exceeds recommended allocation',
|
|
298
|
+
suggestion: 'Enable dynamic system prompts (SYSTEM_PROMPT_MODE=dynamic)'
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (breakdown.tools > allocation.tools) {
|
|
303
|
+
recommendations.push({
|
|
304
|
+
section: 'tools',
|
|
305
|
+
issue: 'Tool definitions exceed recommended allocation',
|
|
306
|
+
suggestion: 'Enable minimal tool descriptions (TOOL_DESCRIPTIONS=minimal)'
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (breakdown.messages > allocation.messages) {
|
|
311
|
+
recommendations.push({
|
|
312
|
+
section: 'messages',
|
|
313
|
+
issue: 'Message history exceeds recommended allocation',
|
|
314
|
+
suggestion: 'Enable history compression (HISTORY_COMPRESSION_ENABLED=true)'
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return recommendations;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
module.exports = {
|
|
322
|
+
checkBudget,
|
|
323
|
+
enforceBudget,
|
|
324
|
+
getAllocation,
|
|
325
|
+
analyzeBudgetUsage,
|
|
326
|
+
};
|