@juspay/neurolink 9.14.0 → 9.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +15 -15
- package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/adapters/video/videoAnalyzer.js +10 -8
- package/dist/auth/anthropicOAuth.d.ts +377 -0
- package/dist/auth/anthropicOAuth.js +914 -0
- package/dist/auth/index.d.ts +20 -0
- package/dist/auth/index.js +29 -0
- package/dist/auth/tokenStore.d.ts +225 -0
- package/dist/auth/tokenStore.js +521 -0
- package/dist/cli/commands/auth.d.ts +50 -0
- package/dist/cli/commands/auth.js +1115 -0
- package/dist/cli/commands/setup-anthropic.js +1 -14
- package/dist/cli/commands/setup-azure.js +1 -12
- package/dist/cli/commands/setup-bedrock.js +1 -9
- package/dist/cli/commands/setup-google-ai.js +1 -12
- package/dist/cli/commands/setup-openai.js +1 -14
- package/dist/cli/commands/workflow.d.ts +27 -0
- package/dist/cli/commands/workflow.js +216 -0
- package/dist/cli/factories/authCommandFactory.d.ts +52 -0
- package/dist/cli/factories/authCommandFactory.js +146 -0
- package/dist/cli/factories/commandFactory.d.ts +6 -0
- package/dist/cli/factories/commandFactory.js +171 -22
- package/dist/cli/index.js +0 -1
- package/dist/cli/parser.js +14 -2
- package/dist/cli/utils/maskCredential.d.ts +11 -0
- package/dist/cli/utils/maskCredential.js +23 -0
- package/dist/constants/contextWindows.js +107 -16
- package/dist/constants/enums.d.ts +119 -15
- package/dist/constants/enums.js +182 -22
- package/dist/constants/index.d.ts +3 -1
- package/dist/constants/index.js +11 -1
- package/dist/context/budgetChecker.js +1 -1
- package/dist/context/contextCompactor.js +31 -4
- package/dist/context/emergencyTruncation.d.ts +21 -0
- package/dist/context/emergencyTruncation.js +88 -0
- package/dist/context/errorDetection.d.ts +16 -0
- package/dist/context/errorDetection.js +48 -1
- package/dist/context/errors.d.ts +19 -0
- package/dist/context/errors.js +21 -0
- package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/core/baseProvider.js +306 -200
- package/dist/core/conversationMemoryManager.js +104 -61
- package/dist/core/evaluationProviders.js +16 -33
- package/dist/core/factory.js +237 -164
- package/dist/core/modules/GenerationHandler.js +175 -116
- package/dist/core/modules/MessageBuilder.js +222 -170
- package/dist/core/modules/StreamHandler.d.ts +1 -0
- package/dist/core/modules/StreamHandler.js +95 -27
- package/dist/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/core/modules/TelemetryHandler.js +25 -7
- package/dist/core/modules/ToolsManager.js +115 -191
- package/dist/core/redisConversationMemoryManager.js +418 -282
- package/dist/factories/providerRegistry.d.ts +5 -0
- package/dist/factories/providerRegistry.js +20 -2
- package/dist/index.d.ts +3 -3
- package/dist/index.js +4 -2
- package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
- package/dist/lib/auth/anthropicOAuth.d.ts +377 -0
- package/dist/lib/auth/anthropicOAuth.js +915 -0
- package/dist/lib/auth/index.d.ts +20 -0
- package/dist/lib/auth/index.js +30 -0
- package/dist/lib/auth/tokenStore.d.ts +225 -0
- package/dist/lib/auth/tokenStore.js +522 -0
- package/dist/lib/constants/contextWindows.js +107 -16
- package/dist/lib/constants/enums.d.ts +119 -15
- package/dist/lib/constants/enums.js +182 -22
- package/dist/lib/constants/index.d.ts +3 -1
- package/dist/lib/constants/index.js +11 -1
- package/dist/lib/context/budgetChecker.js +1 -1
- package/dist/lib/context/contextCompactor.js +31 -4
- package/dist/lib/context/emergencyTruncation.d.ts +21 -0
- package/dist/lib/context/emergencyTruncation.js +89 -0
- package/dist/lib/context/errorDetection.d.ts +16 -0
- package/dist/lib/context/errorDetection.js +48 -1
- package/dist/lib/context/errors.d.ts +19 -0
- package/dist/lib/context/errors.js +22 -0
- package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/lib/core/baseProvider.js +306 -200
- package/dist/lib/core/conversationMemoryManager.js +104 -61
- package/dist/lib/core/evaluationProviders.js +16 -33
- package/dist/lib/core/factory.js +237 -164
- package/dist/lib/core/modules/GenerationHandler.js +175 -116
- package/dist/lib/core/modules/MessageBuilder.js +222 -170
- package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
- package/dist/lib/core/modules/StreamHandler.js +95 -27
- package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/lib/core/modules/TelemetryHandler.js +25 -7
- package/dist/lib/core/modules/ToolsManager.js +115 -191
- package/dist/lib/core/redisConversationMemoryManager.js +418 -282
- package/dist/lib/factories/providerRegistry.d.ts +5 -0
- package/dist/lib/factories/providerRegistry.js +20 -2
- package/dist/lib/index.d.ts +3 -3
- package/dist/lib/index.js +4 -2
- package/dist/lib/mcp/externalServerManager.js +66 -0
- package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/lib/mcp/mcpClientFactory.js +16 -0
- package/dist/lib/mcp/toolDiscoveryService.js +32 -6
- package/dist/lib/mcp/toolRegistry.js +193 -123
- package/dist/lib/models/anthropicModels.d.ts +267 -0
- package/dist/lib/models/anthropicModels.js +528 -0
- package/dist/lib/neurolink.d.ts +6 -0
- package/dist/lib/neurolink.js +1162 -646
- package/dist/lib/providers/amazonBedrock.d.ts +1 -1
- package/dist/lib/providers/amazonBedrock.js +521 -319
- package/dist/lib/providers/anthropic.d.ts +123 -2
- package/dist/lib/providers/anthropic.js +873 -27
- package/dist/lib/providers/anthropicBaseProvider.js +77 -17
- package/dist/lib/providers/googleAiStudio.d.ts +1 -1
- package/dist/lib/providers/googleAiStudio.js +292 -227
- package/dist/lib/providers/googleVertex.d.ts +36 -1
- package/dist/lib/providers/googleVertex.js +553 -260
- package/dist/lib/providers/ollama.js +329 -278
- package/dist/lib/providers/openAI.js +77 -19
- package/dist/lib/providers/sagemaker/parsers.js +3 -3
- package/dist/lib/providers/sagemaker/streaming.js +3 -3
- package/dist/lib/proxy/proxyFetch.js +81 -48
- package/dist/lib/rag/ChunkerFactory.js +1 -1
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/markdownChunker.js +174 -2
- package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
- package/dist/lib/rag/ragIntegration.d.ts +18 -1
- package/dist/lib/rag/ragIntegration.js +94 -14
- package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
- package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/lib/telemetry/attributes.d.ts +52 -0
- package/dist/lib/telemetry/attributes.js +61 -0
- package/dist/lib/telemetry/index.d.ts +3 -0
- package/dist/lib/telemetry/index.js +3 -0
- package/dist/lib/telemetry/telemetryService.d.ts +6 -0
- package/dist/lib/telemetry/telemetryService.js +6 -0
- package/dist/lib/telemetry/tracers.d.ts +15 -0
- package/dist/lib/telemetry/tracers.js +17 -0
- package/dist/lib/telemetry/withSpan.d.ts +9 -0
- package/dist/lib/telemetry/withSpan.js +35 -0
- package/dist/lib/types/contextTypes.d.ts +10 -0
- package/dist/lib/types/errors.d.ts +62 -0
- package/dist/lib/types/errors.js +107 -0
- package/dist/lib/types/index.d.ts +2 -1
- package/dist/lib/types/index.js +2 -0
- package/dist/lib/types/providers.d.ts +107 -0
- package/dist/lib/types/providers.js +69 -0
- package/dist/lib/types/streamTypes.d.ts +14 -0
- package/dist/lib/types/subscriptionTypes.d.ts +893 -0
- package/dist/lib/types/subscriptionTypes.js +8 -0
- package/dist/lib/utils/conversationMemory.js +121 -82
- package/dist/lib/utils/logger.d.ts +5 -0
- package/dist/lib/utils/logger.js +50 -2
- package/dist/lib/utils/messageBuilder.js +22 -42
- package/dist/lib/utils/modelDetection.js +3 -3
- package/dist/lib/utils/providerConfig.d.ts +167 -0
- package/dist/lib/utils/providerConfig.js +619 -9
- package/dist/lib/utils/providerRetry.d.ts +41 -0
- package/dist/lib/utils/providerRetry.js +114 -0
- package/dist/lib/utils/retryability.d.ts +14 -0
- package/dist/lib/utils/retryability.js +23 -0
- package/dist/lib/utils/sanitizers/svg.js +4 -5
- package/dist/lib/utils/tokenEstimation.d.ts +11 -1
- package/dist/lib/utils/tokenEstimation.js +19 -4
- package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
- package/dist/mcp/externalServerManager.js +66 -0
- package/dist/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/mcp/mcpClientFactory.js +16 -0
- package/dist/mcp/toolDiscoveryService.js +32 -6
- package/dist/mcp/toolRegistry.js +193 -123
- package/dist/models/anthropicModels.d.ts +267 -0
- package/dist/models/anthropicModels.js +527 -0
- package/dist/neurolink.d.ts +6 -0
- package/dist/neurolink.js +1162 -646
- package/dist/providers/amazonBedrock.d.ts +1 -1
- package/dist/providers/amazonBedrock.js +521 -319
- package/dist/providers/anthropic.d.ts +123 -2
- package/dist/providers/anthropic.js +873 -27
- package/dist/providers/anthropicBaseProvider.js +77 -17
- package/dist/providers/googleAiStudio.d.ts +1 -1
- package/dist/providers/googleAiStudio.js +292 -227
- package/dist/providers/googleVertex.d.ts +36 -1
- package/dist/providers/googleVertex.js +553 -260
- package/dist/providers/ollama.js +329 -278
- package/dist/providers/openAI.js +77 -19
- package/dist/providers/sagemaker/parsers.js +3 -3
- package/dist/providers/sagemaker/streaming.js +3 -3
- package/dist/proxy/proxyFetch.js +81 -48
- package/dist/rag/ChunkerFactory.js +1 -1
- package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/rag/chunking/markdownChunker.js +174 -2
- package/dist/rag/pipeline/contextAssembly.js +2 -1
- package/dist/rag/ragIntegration.d.ts +18 -1
- package/dist/rag/ragIntegration.js +94 -14
- package/dist/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/server/abstract/baseServerAdapter.js +4 -1
- package/dist/server/adapters/fastifyAdapter.js +35 -30
- package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/telemetry/attributes.d.ts +52 -0
- package/dist/telemetry/attributes.js +60 -0
- package/dist/telemetry/index.d.ts +3 -0
- package/dist/telemetry/index.js +3 -0
- package/dist/telemetry/telemetryService.d.ts +6 -0
- package/dist/telemetry/telemetryService.js +6 -0
- package/dist/telemetry/tracers.d.ts +15 -0
- package/dist/telemetry/tracers.js +16 -0
- package/dist/telemetry/withSpan.d.ts +9 -0
- package/dist/telemetry/withSpan.js +34 -0
- package/dist/types/contextTypes.d.ts +10 -0
- package/dist/types/errors.d.ts +62 -0
- package/dist/types/errors.js +107 -0
- package/dist/types/index.d.ts +2 -1
- package/dist/types/index.js +2 -0
- package/dist/types/providers.d.ts +107 -0
- package/dist/types/providers.js +69 -0
- package/dist/types/streamTypes.d.ts +14 -0
- package/dist/types/subscriptionTypes.d.ts +893 -0
- package/dist/types/subscriptionTypes.js +7 -0
- package/dist/utils/conversationMemory.js +121 -82
- package/dist/utils/logger.d.ts +5 -0
- package/dist/utils/logger.js +50 -2
- package/dist/utils/messageBuilder.js +22 -42
- package/dist/utils/modelDetection.js +3 -3
- package/dist/utils/providerConfig.d.ts +167 -0
- package/dist/utils/providerConfig.js +619 -9
- package/dist/utils/providerRetry.d.ts +41 -0
- package/dist/utils/providerRetry.js +113 -0
- package/dist/utils/retryability.d.ts +14 -0
- package/dist/utils/retryability.js +22 -0
- package/dist/utils/sanitizers/svg.js +4 -5
- package/dist/utils/tokenEstimation.d.ts +11 -1
- package/dist/utils/tokenEstimation.js +19 -4
- package/dist/utils/videoAnalysisProcessor.js +7 -3
- package/dist/workflow/config.d.ts +26 -26
- package/package.json +2 -1
|
@@ -36,7 +36,7 @@ export function checkContextBudget(params) {
|
|
|
36
36
|
? toolDefinitions.reduce((sum, tool) => {
|
|
37
37
|
try {
|
|
38
38
|
const serialized = JSON.stringify(tool);
|
|
39
|
-
return sum +
|
|
39
|
+
return sum + estimateTokens(serialized, provider);
|
|
40
40
|
}
|
|
41
41
|
catch {
|
|
42
42
|
return sum + TOKENS_PER_TOOL_DEFINITION;
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* Stage 3: LLM Summarization (expensive -- requires LLM call)
|
|
9
9
|
* Stage 4: Sliding Window Truncation (fallback -- no LLM call)
|
|
10
10
|
*/
|
|
11
|
+
import { trace, SpanStatusCode } from "@opentelemetry/api";
|
|
11
12
|
import { estimateMessagesTokens } from "../utils/tokenEstimation.js";
|
|
12
13
|
import { logger } from "../utils/logger.js";
|
|
13
14
|
import { pruneToolOutputs } from "./stages/toolOutputPruner.js";
|
|
@@ -112,15 +113,36 @@ export class ContextCompactor {
|
|
|
112
113
|
saved: stageTokensBefore - stageTokensAfter,
|
|
113
114
|
});
|
|
114
115
|
}
|
|
115
|
-
catch {
|
|
116
|
-
|
|
116
|
+
catch (error) {
|
|
117
|
+
// Capture the actual error for debugging
|
|
118
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
119
|
+
const errorName = error instanceof Error ? error.name : "UnknownError";
|
|
120
|
+
logger.warn("[Compaction] Stage 3 (summarize) FAILED", {
|
|
117
121
|
requestId,
|
|
118
|
-
|
|
122
|
+
error: errorMessage,
|
|
123
|
+
errorName,
|
|
119
124
|
tokensBefore: stageTokensBefore,
|
|
120
125
|
tokensAfter: stageTokensBefore,
|
|
121
126
|
saved: 0,
|
|
122
127
|
});
|
|
123
|
-
//
|
|
128
|
+
// Record on OTel span for trace visibility
|
|
129
|
+
const activeSpan = trace.getActiveSpan();
|
|
130
|
+
if (activeSpan) {
|
|
131
|
+
activeSpan.addEvent("compaction.stage3.failed", {
|
|
132
|
+
"error.message": errorMessage,
|
|
133
|
+
"error.name": errorName,
|
|
134
|
+
"stage.tokens_before": stageTokensBefore,
|
|
135
|
+
});
|
|
136
|
+
if (error instanceof Error) {
|
|
137
|
+
activeSpan.recordException(error);
|
|
138
|
+
}
|
|
139
|
+
// NLK-GAP-005 fix: set error status alongside recordException
|
|
140
|
+
activeSpan.setStatus({
|
|
141
|
+
code: SpanStatusCode.ERROR,
|
|
142
|
+
message: `Compaction stage 3 (summarize) failed: ${errorMessage}`,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
// Fall through to Stage 4 truncation as before
|
|
124
146
|
}
|
|
125
147
|
}
|
|
126
148
|
// Stage 4: Sliding Window Truncation (fallback)
|
|
@@ -129,6 +151,11 @@ export class ContextCompactor {
|
|
|
129
151
|
const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
|
|
130
152
|
const truncResult = truncateWithSlidingWindow(currentMessages, {
|
|
131
153
|
fraction: this.config.truncationFraction,
|
|
154
|
+
currentTokens: stageTokensBefore,
|
|
155
|
+
targetTokens: targetTokens,
|
|
156
|
+
provider: provider,
|
|
157
|
+
adaptiveBuffer: 0.15,
|
|
158
|
+
maxIterations: 3,
|
|
132
159
|
});
|
|
133
160
|
if (truncResult.truncated) {
|
|
134
161
|
currentMessages = truncResult.messages;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Emergency Content Truncation
|
|
3
|
+
*
|
|
4
|
+
* When message-level removal (sliding window) can't fit context into budget,
|
|
5
|
+
* this truncates the CONTENT of the longest messages as a last resort.
|
|
6
|
+
*/
|
|
7
|
+
import type { ChatMessage } from "../types/conversation.js";
|
|
8
|
+
/**
|
|
9
|
+
* Emergency content truncation: truncate the content of the longest messages
|
|
10
|
+
* to fit within the available token budget.
|
|
11
|
+
*
|
|
12
|
+
* Strategy: Sort messages by content length (descending), truncate each
|
|
13
|
+
* to a proportional share of the available budget until total fits.
|
|
14
|
+
*/
|
|
15
|
+
export declare function emergencyContentTruncation(messages: ChatMessage[], availableTokensForHistory: number, breakdown: {
|
|
16
|
+
systemPrompt: number;
|
|
17
|
+
conversationHistory: number;
|
|
18
|
+
currentPrompt: number;
|
|
19
|
+
toolDefinitions: number;
|
|
20
|
+
fileAttachments: number;
|
|
21
|
+
}, provider?: string): ChatMessage[];
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Emergency Content Truncation
|
|
3
|
+
*
|
|
4
|
+
* When message-level removal (sliding window) can't fit context into budget,
|
|
5
|
+
* this truncates the CONTENT of the longest messages as a last resort.
|
|
6
|
+
*/
|
|
7
|
+
import { estimateTokens, estimateMessagesTokens, truncateToTokenBudget, } from "../utils/tokenEstimation.js";
|
|
8
|
+
import { logger } from "../utils/logger.js";
|
|
9
|
+
/**
|
|
10
|
+
* Emergency content truncation: truncate the content of the longest messages
|
|
11
|
+
* to fit within the available token budget.
|
|
12
|
+
*
|
|
13
|
+
* Strategy: Sort messages by content length (descending), truncate each
|
|
14
|
+
* to a proportional share of the available budget until total fits.
|
|
15
|
+
*/
|
|
16
|
+
export function emergencyContentTruncation(messages, availableTokensForHistory, breakdown, provider) {
|
|
17
|
+
// Budget available for conversation history specifically
|
|
18
|
+
const historyBudget = availableTokensForHistory -
|
|
19
|
+
breakdown.systemPrompt -
|
|
20
|
+
breakdown.currentPrompt -
|
|
21
|
+
breakdown.toolDefinitions -
|
|
22
|
+
breakdown.fileAttachments;
|
|
23
|
+
if (historyBudget <= 0) {
|
|
24
|
+
// No room for history: return empty to guarantee budget safety
|
|
25
|
+
return [];
|
|
26
|
+
}
|
|
27
|
+
const currentHistoryTokens = estimateMessagesTokens(messages, provider);
|
|
28
|
+
if (currentHistoryTokens <= historyBudget) {
|
|
29
|
+
return messages; // Already fits
|
|
30
|
+
}
|
|
31
|
+
// Calculate per-message budgets proportional to original size,
|
|
32
|
+
// but cap large messages to free space for others
|
|
33
|
+
const result = [...messages];
|
|
34
|
+
const reductionNeeded = currentHistoryTokens - historyBudget;
|
|
35
|
+
const reductionRatio = reductionNeeded / currentHistoryTokens;
|
|
36
|
+
// Sort indices by content length descending (truncate biggest first)
|
|
37
|
+
const sortedIndices = result
|
|
38
|
+
.map((msg, idx) => ({ idx, len: msg.content.length }))
|
|
39
|
+
.sort((a, b) => b.len - a.len);
|
|
40
|
+
let tokensSaved = 0;
|
|
41
|
+
for (const { idx } of sortedIndices) {
|
|
42
|
+
if (tokensSaved >= reductionNeeded) {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
const msg = result[idx];
|
|
46
|
+
// Don't truncate system messages or very short messages
|
|
47
|
+
if (msg.role === "system" || msg.content.length < 200) {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
const msgTokens = estimateTokens(msg.content, provider);
|
|
51
|
+
const targetTokens = Math.floor(msgTokens * (1 - reductionRatio - 0.05));
|
|
52
|
+
if (targetTokens < msgTokens && targetTokens > 50) {
|
|
53
|
+
const truncated = truncateToTokenBudget(msg.content, targetTokens, provider);
|
|
54
|
+
if (truncated.truncated) {
|
|
55
|
+
const savedThisMsg = msgTokens - estimateTokens(truncated.text, provider);
|
|
56
|
+
tokensSaved += savedThisMsg;
|
|
57
|
+
result[idx] = {
|
|
58
|
+
...msg,
|
|
59
|
+
content: truncated.text,
|
|
60
|
+
metadata: { ...msg.metadata, truncated: true },
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
logger.info("[EmergencyTruncation] Content truncation complete", {
|
|
66
|
+
tokensSaved,
|
|
67
|
+
reductionNeeded,
|
|
68
|
+
messagesModified: result.filter((m, i) => m !== messages[i]).length,
|
|
69
|
+
});
|
|
70
|
+
// Final safety check: guarantee returned history fits budget
|
|
71
|
+
if (estimateMessagesTokens(result, provider) <= historyBudget) {
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
// Hard fallback: keep newest non-system messages that fit
|
|
75
|
+
const fallback = [];
|
|
76
|
+
for (let i = result.length - 1; i >= 0; i--) {
|
|
77
|
+
const msg = result[i];
|
|
78
|
+
if (msg.role === "system") {
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
fallback.unshift(msg);
|
|
82
|
+
if (estimateMessagesTokens(fallback, provider) > historyBudget) {
|
|
83
|
+
fallback.shift();
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return fallback;
|
|
88
|
+
}
|
|
@@ -12,3 +12,19 @@ export declare function isContextOverflowError(error: unknown): boolean;
|
|
|
12
12
|
* Identify which provider produced the context overflow error.
|
|
13
13
|
*/
|
|
14
14
|
export declare function getContextOverflowProvider(error: unknown): string | null;
|
|
15
|
+
/**
|
|
16
|
+
* Extract actual token counts from provider overflow error messages.
|
|
17
|
+
*
|
|
18
|
+
* Many providers include the actual/max token counts in their error messages:
|
|
19
|
+
* - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
|
|
20
|
+
* - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
|
|
21
|
+
* - Google: "exceeds the maximum number of tokens (180000 > 100000)"
|
|
22
|
+
*/
|
|
23
|
+
export declare function parseProviderOverflowDetails(error: unknown): {
|
|
24
|
+
actualTokens: number;
|
|
25
|
+
budgetTokens: number;
|
|
26
|
+
} | null;
|
|
27
|
+
/**
|
|
28
|
+
* Extract error message from various error formats.
|
|
29
|
+
*/
|
|
30
|
+
export declare function extractErrorMessage(error: unknown): string | null;
|
|
@@ -86,10 +86,57 @@ export function getContextOverflowProvider(error) {
|
|
|
86
86
|
}
|
|
87
87
|
return null;
|
|
88
88
|
}
|
|
89
|
+
/**
|
|
90
|
+
* Extract actual token counts from provider overflow error messages.
|
|
91
|
+
*
|
|
92
|
+
* Many providers include the actual/max token counts in their error messages:
|
|
93
|
+
* - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
|
|
94
|
+
* - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
|
|
95
|
+
* - Google: "exceeds the maximum number of tokens (180000 > 100000)"
|
|
96
|
+
*/
|
|
97
|
+
export function parseProviderOverflowDetails(error) {
|
|
98
|
+
const message = extractErrorMessage(error);
|
|
99
|
+
if (!message) {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
// Guard against excessively long inputs that could slow regex matching
|
|
103
|
+
if (message.length > 2000) {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
// OpenAI pattern: "resulted in X tokens" + "maximum context length is Y"
|
|
107
|
+
// Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
|
|
108
|
+
const openaiActual = message.match(/resulted\s+in\s+(\d[\d,]{0,19})\s*tokens/i);
|
|
109
|
+
const openaiMax = message.match(/maximum\s+context\s+length\s+is\s+(\d[\d,]{0,19})/i);
|
|
110
|
+
if (openaiActual && openaiMax) {
|
|
111
|
+
return {
|
|
112
|
+
actualTokens: parseInt(openaiActual[1].replace(/,/g, ""), 10),
|
|
113
|
+
budgetTokens: parseInt(openaiMax[1].replace(/,/g, ""), 10),
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// Anthropic pattern: "X tokens > Y token limit" or "X tokens, limit Y"
|
|
117
|
+
// Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
|
|
118
|
+
const anthropicMatch = message.match(/(\d[\d,]{0,19})\s*tokens?\s*[>:]\s*(\d[\d,]{0,19})/i);
|
|
119
|
+
if (anthropicMatch) {
|
|
120
|
+
return {
|
|
121
|
+
actualTokens: parseInt(anthropicMatch[1].replace(/,/g, ""), 10),
|
|
122
|
+
budgetTokens: parseInt(anthropicMatch[2].replace(/,/g, ""), 10),
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
// Google pattern: "X > Y" or "X exceeds Y"
|
|
126
|
+
// Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
|
|
127
|
+
const googleMatch = message.match(/(\d[\d,]{0,19})\s*(?:>|exceeds)\s*(\d[\d,]{0,19})/i);
|
|
128
|
+
if (googleMatch) {
|
|
129
|
+
return {
|
|
130
|
+
actualTokens: parseInt(googleMatch[1].replace(/,/g, ""), 10),
|
|
131
|
+
budgetTokens: parseInt(googleMatch[2].replace(/,/g, ""), 10),
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
89
136
|
/**
|
|
90
137
|
* Extract error message from various error formats.
|
|
91
138
|
*/
|
|
92
|
-
function extractErrorMessage(error) {
|
|
139
|
+
export function extractErrorMessage(error) {
|
|
93
140
|
if (!error) {
|
|
94
141
|
return null;
|
|
95
142
|
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context-specific error classes for budget and overflow scenarios.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Thrown when context exceeds model budget after all compaction stages,
|
|
6
|
+
* preventing wasteful API calls to providers that will reject the request.
|
|
7
|
+
*/
|
|
8
|
+
export declare class ContextBudgetExceededError extends Error {
|
|
9
|
+
readonly estimatedTokens: number;
|
|
10
|
+
readonly availableTokens: number;
|
|
11
|
+
readonly stagesUsed: string[];
|
|
12
|
+
readonly breakdown: Record<string, number>;
|
|
13
|
+
constructor(message: string, details: {
|
|
14
|
+
estimatedTokens: number;
|
|
15
|
+
availableTokens: number;
|
|
16
|
+
stagesUsed: string[];
|
|
17
|
+
breakdown: Record<string, number>;
|
|
18
|
+
});
|
|
19
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context-specific error classes for budget and overflow scenarios.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Thrown when context exceeds model budget after all compaction stages,
|
|
6
|
+
* preventing wasteful API calls to providers that will reject the request.
|
|
7
|
+
*/
|
|
8
|
+
export class ContextBudgetExceededError extends Error {
|
|
9
|
+
estimatedTokens;
|
|
10
|
+
availableTokens;
|
|
11
|
+
stagesUsed;
|
|
12
|
+
breakdown;
|
|
13
|
+
constructor(message, details) {
|
|
14
|
+
super(message);
|
|
15
|
+
this.name = "ContextBudgetExceededError";
|
|
16
|
+
this.estimatedTokens = details.estimatedTokens;
|
|
17
|
+
this.availableTokens = details.availableTokens;
|
|
18
|
+
this.stagesUsed = details.stagesUsed;
|
|
19
|
+
this.breakdown = details.breakdown;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
* Non-destructive fallback: tags oldest messages as truncated
|
|
5
5
|
* instead of deleting them. Always preserves first message pair.
|
|
6
6
|
* Removes messages in pairs to maintain role alternation.
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Adaptive truncation (PERF-001): calculates fraction from actual overage
|
|
10
|
+
* instead of fixed 50%, with iterative refinement up to 3 passes.
|
|
11
|
+
* - Small conversation handling (BUG-005): for <= 4 messages, truncates
|
|
12
|
+
* message content proportionally instead of returning no-op.
|
|
7
13
|
*/
|
|
8
14
|
import type { ChatMessage } from "../../types/conversation.js";
|
|
9
15
|
import type { TruncationConfig, TruncationResult } from "../../types/contextTypes.js";
|
|
@@ -4,39 +4,174 @@
|
|
|
4
4
|
* Non-destructive fallback: tags oldest messages as truncated
|
|
5
5
|
* instead of deleting them. Always preserves first message pair.
|
|
6
6
|
* Removes messages in pairs to maintain role alternation.
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Adaptive truncation (PERF-001): calculates fraction from actual overage
|
|
10
|
+
* instead of fixed 50%, with iterative refinement up to 3 passes.
|
|
11
|
+
* - Small conversation handling (BUG-005): for <= 4 messages, truncates
|
|
12
|
+
* message content proportionally instead of returning no-op.
|
|
7
13
|
*/
|
|
8
14
|
import { randomUUID } from "crypto";
|
|
15
|
+
import { estimateTokens, estimateMessagesTokens, truncateToTokenBudget, } from "../../utils/tokenEstimation.js";
|
|
16
|
+
import { logger } from "../../utils/logger.js";
|
|
9
17
|
const TRUNCATION_MARKER_CONTENT = "[Earlier conversation history was truncated to fit within context limits]";
|
|
18
|
+
/**
|
|
19
|
+
* For conversations with <= 4 messages that exceed token budget,
|
|
20
|
+
* truncate the CONTENT of the longest messages rather than removing messages.
|
|
21
|
+
*
|
|
22
|
+
* Strategy:
|
|
23
|
+
* 1. Calculate each message's proportional share of the token budget
|
|
24
|
+
* 2. Truncate messages that exceed their share using truncateToTokenBudget()
|
|
25
|
+
* 3. Never truncate messages below 200 tokens (preserve minimum context)
|
|
26
|
+
*/
|
|
27
|
+
function truncateSmallConversation(messages, config) {
|
|
28
|
+
// If no target tokens provided, we can't do content truncation
|
|
29
|
+
if (!config?.targetTokens) {
|
|
30
|
+
return { truncated: false, messages, messagesRemoved: 0 };
|
|
31
|
+
}
|
|
32
|
+
const provider = config.provider;
|
|
33
|
+
const targetTokens = config.targetTokens;
|
|
34
|
+
const currentTokens = estimateMessagesTokens(messages, provider);
|
|
35
|
+
if (currentTokens <= targetTokens) {
|
|
36
|
+
return { truncated: false, messages, messagesRemoved: 0 };
|
|
37
|
+
}
|
|
38
|
+
const MINIMUM_MSG_TOKENS = 200;
|
|
39
|
+
const FRAMING_OVERHEAD = 24 + messages.length * 4; // conversation + per-message overhead
|
|
40
|
+
// Available budget for actual content
|
|
41
|
+
const contentBudget = targetTokens - FRAMING_OVERHEAD;
|
|
42
|
+
if (contentBudget <= 0) {
|
|
43
|
+
return { truncated: false, messages, messagesRemoved: 0 };
|
|
44
|
+
}
|
|
45
|
+
// Calculate current content tokens per message
|
|
46
|
+
const msgTokens = messages.map((msg) => estimateTokens(msg.content, provider));
|
|
47
|
+
const totalContentTokens = msgTokens.reduce((sum, t) => sum + t, 0);
|
|
48
|
+
// Each message gets a proportional share of the content budget
|
|
49
|
+
const result = [...messages];
|
|
50
|
+
let totalSaved = 0;
|
|
51
|
+
for (let i = 0; i < result.length; i++) {
|
|
52
|
+
const msg = result[i];
|
|
53
|
+
// Don't truncate system/summary messages
|
|
54
|
+
if (msg.role === "system" || msg.metadata?.isSummary) {
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
const proportionalBudget = Math.floor((msgTokens[i] / totalContentTokens) * contentBudget);
|
|
58
|
+
const msgBudget = Math.max(MINIMUM_MSG_TOKENS, proportionalBudget);
|
|
59
|
+
if (msgTokens[i] > msgBudget) {
|
|
60
|
+
const truncated = truncateToTokenBudget(msg.content, msgBudget, provider);
|
|
61
|
+
if (truncated.truncated) {
|
|
62
|
+
totalSaved += msgTokens[i] - estimateTokens(truncated.text, provider);
|
|
63
|
+
result[i] = {
|
|
64
|
+
...msg,
|
|
65
|
+
content: truncated.text,
|
|
66
|
+
metadata: { ...msg.metadata, truncated: true },
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (totalSaved > 0) {
|
|
72
|
+
const finalTokens = estimateMessagesTokens(result, provider);
|
|
73
|
+
logger.info("[Truncation] Small conversation content truncated", {
|
|
74
|
+
messageCount: messages.length,
|
|
75
|
+
tokensSaved: totalSaved,
|
|
76
|
+
targetTokens,
|
|
77
|
+
finalTokens,
|
|
78
|
+
});
|
|
79
|
+
return {
|
|
80
|
+
truncated: finalTokens <= targetTokens,
|
|
81
|
+
messages: result,
|
|
82
|
+
messagesRemoved: 0, // No messages removed, only content truncated
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
return { truncated: false, messages, messagesRemoved: 0 };
|
|
86
|
+
}
|
|
10
87
|
export function truncateWithSlidingWindow(messages, config) {
|
|
11
|
-
const fraction = config?.fraction ?? 0.5;
|
|
12
88
|
if (messages.length <= 4) {
|
|
13
|
-
|
|
89
|
+
// Delegate to content truncation for small conversations (BUG-005)
|
|
90
|
+
return truncateSmallConversation(messages, config);
|
|
91
|
+
}
|
|
92
|
+
// ADAPTIVE MODE: calculate fraction from actual overage (PERF-001)
|
|
93
|
+
let fraction;
|
|
94
|
+
if (config?.currentTokens &&
|
|
95
|
+
config?.targetTokens &&
|
|
96
|
+
config.currentTokens > config.targetTokens) {
|
|
97
|
+
const overageRatio = (config.currentTokens - config.targetTokens) / config.currentTokens;
|
|
98
|
+
const buffer = config?.adaptiveBuffer ?? 0.15;
|
|
99
|
+
// Required fraction = overage ratio + buffer, clamped to [0.1, 0.9]
|
|
100
|
+
fraction = Math.min(0.9, Math.max(0.1, overageRatio + buffer));
|
|
101
|
+
logger.info("[Truncation] Adaptive fraction calculated", {
|
|
102
|
+
currentTokens: config.currentTokens,
|
|
103
|
+
targetTokens: config.targetTokens,
|
|
104
|
+
overageRatio: Math.round(overageRatio * 100),
|
|
105
|
+
fraction: Math.round(fraction * 100),
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
// Fallback to configured or default fraction
|
|
110
|
+
fraction = config?.fraction ?? 0.5;
|
|
14
111
|
}
|
|
15
112
|
// Always preserve first user-assistant pair
|
|
16
113
|
const firstPair = messages.slice(0, 2);
|
|
17
|
-
// Calculate how many messages to remove from the middle
|
|
18
114
|
const remainingMessages = messages.slice(2);
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
115
|
+
// ITERATIVE: if first pass isn't enough, increase fraction
|
|
116
|
+
const maxIterations = config?.maxIterations ?? 3;
|
|
117
|
+
let currentFraction = fraction;
|
|
118
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
119
|
+
const removeCount = Math.floor(remainingMessages.length * currentFraction);
|
|
120
|
+
const evenRemoveCount = removeCount - (removeCount % 2);
|
|
121
|
+
if (evenRemoveCount <= 0) {
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
const keptAfterTruncation = remainingMessages.slice(evenRemoveCount);
|
|
125
|
+
const truncationMarker = {
|
|
126
|
+
id: `truncation-${randomUUID()}`,
|
|
127
|
+
role: "system",
|
|
128
|
+
content: TRUNCATION_MARKER_CONTENT,
|
|
129
|
+
timestamp: new Date().toISOString(),
|
|
130
|
+
metadata: { isSummary: false, truncated: true },
|
|
131
|
+
};
|
|
132
|
+
const candidateMessages = [
|
|
133
|
+
...firstPair,
|
|
134
|
+
truncationMarker,
|
|
135
|
+
...keptAfterTruncation,
|
|
136
|
+
];
|
|
137
|
+
// If we have token targets, verify the result fits
|
|
138
|
+
if (config?.targetTokens) {
|
|
139
|
+
const candidateTokens = estimateMessagesTokens(candidateMessages, config.provider);
|
|
140
|
+
if (candidateTokens <= config.targetTokens) {
|
|
141
|
+
return {
|
|
142
|
+
truncated: true,
|
|
143
|
+
messages: candidateMessages,
|
|
144
|
+
messagesRemoved: evenRemoveCount,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
// Not enough -- increase fraction by 25% for next iteration
|
|
148
|
+
currentFraction = Math.min(0.95, currentFraction + 0.25);
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
// No token targets -- single-pass with calculated fraction
|
|
152
|
+
return {
|
|
153
|
+
truncated: true,
|
|
154
|
+
messages: candidateMessages,
|
|
155
|
+
messagesRemoved: evenRemoveCount,
|
|
156
|
+
};
|
|
24
157
|
}
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
158
|
+
// All iterations exhausted -- return best effort (most aggressive truncation)
|
|
159
|
+
const maxRemove = Math.floor(remainingMessages.length * 0.95);
|
|
160
|
+
const evenMaxRemove = maxRemove - (maxRemove % 2);
|
|
161
|
+
if (evenMaxRemove > 0) {
|
|
162
|
+
const keptMessages = remainingMessages.slice(evenMaxRemove);
|
|
163
|
+
const truncationMarker = {
|
|
164
|
+
id: `truncation-${randomUUID()}`,
|
|
165
|
+
role: "system",
|
|
166
|
+
content: TRUNCATION_MARKER_CONTENT,
|
|
167
|
+
timestamp: new Date().toISOString(),
|
|
168
|
+
metadata: { isSummary: false, truncated: true },
|
|
169
|
+
};
|
|
170
|
+
return {
|
|
34
171
|
truncated: true,
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
messagesRemoved: evenRemoveCount,
|
|
41
|
-
};
|
|
172
|
+
messages: [...firstPair, truncationMarker, ...keptMessages],
|
|
173
|
+
messagesRemoved: evenMaxRemove,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
return { truncated: false, messages, messagesRemoved: 0 };
|
|
42
177
|
}
|