@juspay/neurolink 9.15.0 → 9.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +22 -20
- package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/adapters/video/videoAnalyzer.js +10 -8
- package/dist/cli/commands/setup-anthropic.js +1 -14
- package/dist/cli/commands/setup-azure.js +1 -12
- package/dist/cli/commands/setup-bedrock.js +1 -9
- package/dist/cli/commands/setup-google-ai.js +1 -12
- package/dist/cli/commands/setup-openai.js +1 -14
- package/dist/cli/commands/workflow.d.ts +27 -0
- package/dist/cli/commands/workflow.js +216 -0
- package/dist/cli/factories/commandFactory.js +79 -20
- package/dist/cli/index.js +0 -1
- package/dist/cli/parser.js +4 -1
- package/dist/cli/utils/maskCredential.d.ts +11 -0
- package/dist/cli/utils/maskCredential.js +23 -0
- package/dist/constants/contextWindows.js +107 -16
- package/dist/constants/enums.d.ts +99 -15
- package/dist/constants/enums.js +152 -22
- package/dist/context/budgetChecker.js +1 -1
- package/dist/context/contextCompactor.js +31 -4
- package/dist/context/emergencyTruncation.d.ts +21 -0
- package/dist/context/emergencyTruncation.js +88 -0
- package/dist/context/errorDetection.d.ts +16 -0
- package/dist/context/errorDetection.js +48 -1
- package/dist/context/errors.d.ts +19 -0
- package/dist/context/errors.js +21 -0
- package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/context/stages/structuredSummarizer.js +2 -2
- package/dist/core/baseProvider.js +306 -200
- package/dist/core/conversationMemoryManager.js +104 -61
- package/dist/core/evaluationProviders.js +16 -33
- package/dist/core/factory.js +237 -164
- package/dist/core/modules/GenerationHandler.js +175 -116
- package/dist/core/modules/MessageBuilder.js +222 -170
- package/dist/core/modules/StreamHandler.d.ts +1 -0
- package/dist/core/modules/StreamHandler.js +95 -27
- package/dist/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/core/modules/TelemetryHandler.js +25 -7
- package/dist/core/modules/ToolsManager.js +115 -191
- package/dist/core/redisConversationMemoryManager.js +418 -282
- package/dist/factories/providerRegistry.d.ts +5 -0
- package/dist/factories/providerRegistry.js +20 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +4 -2
- package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
- package/dist/lib/constants/contextWindows.js +107 -16
- package/dist/lib/constants/enums.d.ts +99 -15
- package/dist/lib/constants/enums.js +152 -22
- package/dist/lib/context/budgetChecker.js +1 -1
- package/dist/lib/context/contextCompactor.js +31 -4
- package/dist/lib/context/emergencyTruncation.d.ts +21 -0
- package/dist/lib/context/emergencyTruncation.js +89 -0
- package/dist/lib/context/errorDetection.d.ts +16 -0
- package/dist/lib/context/errorDetection.js +48 -1
- package/dist/lib/context/errors.d.ts +19 -0
- package/dist/lib/context/errors.js +22 -0
- package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/lib/context/stages/structuredSummarizer.js +2 -2
- package/dist/lib/core/baseProvider.js +306 -200
- package/dist/lib/core/conversationMemoryManager.js +104 -61
- package/dist/lib/core/evaluationProviders.js +16 -33
- package/dist/lib/core/factory.js +237 -164
- package/dist/lib/core/modules/GenerationHandler.js +175 -116
- package/dist/lib/core/modules/MessageBuilder.js +222 -170
- package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
- package/dist/lib/core/modules/StreamHandler.js +95 -27
- package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/lib/core/modules/TelemetryHandler.js +25 -7
- package/dist/lib/core/modules/ToolsManager.js +115 -191
- package/dist/lib/core/redisConversationMemoryManager.js +418 -282
- package/dist/lib/factories/providerRegistry.d.ts +5 -0
- package/dist/lib/factories/providerRegistry.js +20 -2
- package/dist/lib/index.d.ts +2 -2
- package/dist/lib/index.js +4 -2
- package/dist/lib/mcp/externalServerManager.js +66 -0
- package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/lib/mcp/mcpClientFactory.js +16 -0
- package/dist/lib/mcp/toolDiscoveryService.js +32 -6
- package/dist/lib/mcp/toolRegistry.js +193 -123
- package/dist/lib/neurolink.d.ts +6 -0
- package/dist/lib/neurolink.js +1162 -646
- package/dist/lib/providers/amazonBedrock.d.ts +1 -1
- package/dist/lib/providers/amazonBedrock.js +521 -319
- package/dist/lib/providers/anthropic.js +73 -17
- package/dist/lib/providers/anthropicBaseProvider.js +77 -17
- package/dist/lib/providers/googleAiStudio.d.ts +1 -1
- package/dist/lib/providers/googleAiStudio.js +292 -227
- package/dist/lib/providers/googleVertex.d.ts +36 -1
- package/dist/lib/providers/googleVertex.js +553 -260
- package/dist/lib/providers/ollama.js +329 -278
- package/dist/lib/providers/openAI.js +77 -19
- package/dist/lib/providers/sagemaker/parsers.js +3 -3
- package/dist/lib/providers/sagemaker/streaming.js +3 -3
- package/dist/lib/proxy/proxyFetch.js +81 -48
- package/dist/lib/rag/ChunkerFactory.js +1 -1
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/markdownChunker.js +174 -2
- package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
- package/dist/lib/rag/ragIntegration.d.ts +18 -1
- package/dist/lib/rag/ragIntegration.js +94 -14
- package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
- package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/lib/telemetry/attributes.d.ts +52 -0
- package/dist/lib/telemetry/attributes.js +61 -0
- package/dist/lib/telemetry/index.d.ts +3 -0
- package/dist/lib/telemetry/index.js +3 -0
- package/dist/lib/telemetry/telemetryService.d.ts +6 -0
- package/dist/lib/telemetry/telemetryService.js +6 -0
- package/dist/lib/telemetry/tracers.d.ts +15 -0
- package/dist/lib/telemetry/tracers.js +17 -0
- package/dist/lib/telemetry/withSpan.d.ts +9 -0
- package/dist/lib/telemetry/withSpan.js +35 -0
- package/dist/lib/types/contextTypes.d.ts +10 -0
- package/dist/lib/types/streamTypes.d.ts +14 -0
- package/dist/lib/utils/conversationMemory.js +123 -84
- package/dist/lib/utils/logger.d.ts +5 -0
- package/dist/lib/utils/logger.js +50 -2
- package/dist/lib/utils/messageBuilder.js +22 -42
- package/dist/lib/utils/modelDetection.js +3 -3
- package/dist/lib/utils/providerRetry.d.ts +41 -0
- package/dist/lib/utils/providerRetry.js +114 -0
- package/dist/lib/utils/retryability.d.ts +14 -0
- package/dist/lib/utils/retryability.js +23 -0
- package/dist/lib/utils/sanitizers/svg.js +4 -5
- package/dist/lib/utils/tokenEstimation.d.ts +11 -1
- package/dist/lib/utils/tokenEstimation.js +19 -4
- package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
- package/dist/mcp/externalServerManager.js +66 -0
- package/dist/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/mcp/mcpClientFactory.js +16 -0
- package/dist/mcp/toolDiscoveryService.js +32 -6
- package/dist/mcp/toolRegistry.js +193 -123
- package/dist/neurolink.d.ts +6 -0
- package/dist/neurolink.js +1162 -646
- package/dist/providers/amazonBedrock.d.ts +1 -1
- package/dist/providers/amazonBedrock.js +521 -319
- package/dist/providers/anthropic.js +73 -17
- package/dist/providers/anthropicBaseProvider.js +77 -17
- package/dist/providers/googleAiStudio.d.ts +1 -1
- package/dist/providers/googleAiStudio.js +292 -227
- package/dist/providers/googleVertex.d.ts +36 -1
- package/dist/providers/googleVertex.js +553 -260
- package/dist/providers/ollama.js +329 -278
- package/dist/providers/openAI.js +77 -19
- package/dist/providers/sagemaker/parsers.js +3 -3
- package/dist/providers/sagemaker/streaming.js +3 -3
- package/dist/proxy/proxyFetch.js +81 -48
- package/dist/rag/ChunkerFactory.js +1 -1
- package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/rag/chunking/markdownChunker.js +174 -2
- package/dist/rag/pipeline/contextAssembly.js +2 -1
- package/dist/rag/ragIntegration.d.ts +18 -1
- package/dist/rag/ragIntegration.js +94 -14
- package/dist/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/server/abstract/baseServerAdapter.js +4 -1
- package/dist/server/adapters/fastifyAdapter.js +35 -30
- package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/telemetry/attributes.d.ts +52 -0
- package/dist/telemetry/attributes.js +60 -0
- package/dist/telemetry/index.d.ts +3 -0
- package/dist/telemetry/index.js +3 -0
- package/dist/telemetry/telemetryService.d.ts +6 -0
- package/dist/telemetry/telemetryService.js +6 -0
- package/dist/telemetry/tracers.d.ts +15 -0
- package/dist/telemetry/tracers.js +16 -0
- package/dist/telemetry/withSpan.d.ts +9 -0
- package/dist/telemetry/withSpan.js +34 -0
- package/dist/types/contextTypes.d.ts +10 -0
- package/dist/types/streamTypes.d.ts +14 -0
- package/dist/utils/conversationMemory.js +123 -84
- package/dist/utils/logger.d.ts +5 -0
- package/dist/utils/logger.js +50 -2
- package/dist/utils/messageBuilder.js +22 -42
- package/dist/utils/modelDetection.js +3 -3
- package/dist/utils/providerRetry.d.ts +41 -0
- package/dist/utils/providerRetry.js +113 -0
- package/dist/utils/retryability.d.ts +14 -0
- package/dist/utils/retryability.js +22 -0
- package/dist/utils/sanitizers/svg.js +4 -5
- package/dist/utils/tokenEstimation.d.ts +11 -1
- package/dist/utils/tokenEstimation.js +19 -4
- package/dist/utils/videoAnalysisProcessor.js +7 -3
- package/dist/workflow/config.d.ts +26 -26
- package/package.json +1 -1
package/dist/constants/enums.js
CHANGED
|
@@ -28,22 +28,36 @@ export var AIProviderName;
|
|
|
28
28
|
export var OpenRouterModels;
|
|
29
29
|
(function (OpenRouterModels) {
|
|
30
30
|
// Anthropic Claude models
|
|
31
|
+
OpenRouterModels["CLAUDE_OPUS_4_6"] = "anthropic/claude-opus-4.6";
|
|
32
|
+
OpenRouterModels["CLAUDE_SONNET_4_6"] = "anthropic/claude-sonnet-4.6";
|
|
33
|
+
OpenRouterModels["CLAUDE_SONNET_4_5"] = "anthropic/claude-sonnet-4.5";
|
|
34
|
+
OpenRouterModels["CLAUDE_HAIKU_4_5"] = "anthropic/claude-haiku-4.5";
|
|
31
35
|
OpenRouterModels["CLAUDE_3_5_SONNET"] = "anthropic/claude-3-5-sonnet";
|
|
32
36
|
OpenRouterModels["CLAUDE_3_5_HAIKU"] = "anthropic/claude-3-5-haiku";
|
|
33
37
|
OpenRouterModels["CLAUDE_3_OPUS"] = "anthropic/claude-3-opus";
|
|
34
38
|
// OpenAI models
|
|
39
|
+
OpenRouterModels["GPT_5_2"] = "openai/gpt-5.2";
|
|
40
|
+
OpenRouterModels["GPT_5"] = "openai/gpt-5";
|
|
35
41
|
OpenRouterModels["GPT_4O"] = "openai/gpt-4o";
|
|
36
42
|
OpenRouterModels["GPT_4O_MINI"] = "openai/gpt-4o-mini";
|
|
37
43
|
OpenRouterModels["GPT_4_TURBO"] = "openai/gpt-4-turbo";
|
|
38
44
|
// Google models
|
|
45
|
+
OpenRouterModels["GEMINI_3_1_PRO_PREVIEW"] = "google/gemini-3.1-pro-preview";
|
|
46
|
+
OpenRouterModels["GEMINI_3_PRO_PREVIEW"] = "google/gemini-3-pro-preview";
|
|
47
|
+
OpenRouterModels["GEMINI_3_FLASH_PREVIEW"] = "google/gemini-3-flash-preview";
|
|
48
|
+
OpenRouterModels["GEMINI_2_5_FLASH"] = "google/gemini-2.5-flash";
|
|
49
|
+
OpenRouterModels["GEMINI_2_5_FLASH_LITE"] = "google/gemini-2.5-flash-lite";
|
|
39
50
|
OpenRouterModels["GEMINI_2_0_FLASH"] = "google/gemini-2.0-flash";
|
|
40
|
-
OpenRouterModels["GEMINI_1_5_PRO"] = "google/gemini-1.5-pro";
|
|
41
51
|
// Meta Llama models
|
|
42
52
|
OpenRouterModels["LLAMA_3_1_70B"] = "meta-llama/llama-3.1-70b-instruct";
|
|
43
53
|
OpenRouterModels["LLAMA_3_1_8B"] = "meta-llama/llama-3.1-8b-instruct";
|
|
44
54
|
// Mistral models
|
|
45
55
|
OpenRouterModels["MISTRAL_LARGE"] = "mistralai/mistral-large";
|
|
46
56
|
OpenRouterModels["MIXTRAL_8X7B"] = "mistralai/mixtral-8x7b-instruct";
|
|
57
|
+
// DeepSeek models
|
|
58
|
+
OpenRouterModels["DEEPSEEK_R1"] = "deepseek/deepseek-r1";
|
|
59
|
+
// xAI models
|
|
60
|
+
OpenRouterModels["GROK_4_1_FAST"] = "xai/grok-4.1-fast";
|
|
47
61
|
})(OpenRouterModels || (OpenRouterModels = {}));
|
|
48
62
|
/**
|
|
49
63
|
* Supported Models for Amazon Bedrock
|
|
@@ -53,7 +67,10 @@ export var BedrockModels;
|
|
|
53
67
|
// ============================================================================
|
|
54
68
|
// ANTHROPIC CLAUDE MODELS
|
|
55
69
|
// ============================================================================
|
|
56
|
-
// Claude 4.
|
|
70
|
+
// Claude 4.6 Series (Latest - February 2026)
|
|
71
|
+
BedrockModels["CLAUDE_4_6_OPUS"] = "anthropic.claude-opus-4-6-v1:0";
|
|
72
|
+
BedrockModels["CLAUDE_4_6_SONNET"] = "anthropic.claude-sonnet-4-6";
|
|
73
|
+
// Claude 4.5 Series (September-November 2025)
|
|
57
74
|
BedrockModels["CLAUDE_4_5_OPUS"] = "anthropic.claude-opus-4-5-20251124-v1:0";
|
|
58
75
|
BedrockModels["CLAUDE_4_5_SONNET"] = "anthropic.claude-sonnet-4-5-20250929-v1:0";
|
|
59
76
|
BedrockModels["CLAUDE_4_5_HAIKU"] = "anthropic.claude-haiku-4-5-20251001-v1:0";
|
|
@@ -160,20 +177,66 @@ export var BedrockModels;
|
|
|
160
177
|
// AI21 Labs Models
|
|
161
178
|
BedrockModels["JAMBA_1_5_LARGE"] = "ai21.jamba-1-5-large-v1:0";
|
|
162
179
|
BedrockModels["JAMBA_1_5_MINI"] = "ai21.jamba-1-5-mini-v1:0";
|
|
180
|
+
// ============================================================================
|
|
181
|
+
// NEW PROVIDERS (February 2026)
|
|
182
|
+
// ============================================================================
|
|
183
|
+
// Writer Models
|
|
184
|
+
BedrockModels["WRITER_PALMYRA_X5"] = "writer.palmyra-x5-v1:0";
|
|
185
|
+
BedrockModels["WRITER_PALMYRA_X4"] = "writer.palmyra-x4-v1:0";
|
|
186
|
+
// MiniMax Models
|
|
187
|
+
BedrockModels["MINIMAX_M2_1"] = "minimax.minimax-m2.1";
|
|
188
|
+
BedrockModels["MINIMAX_M2"] = "minimax.minimax-m2";
|
|
189
|
+
// Moonshot AI (Kimi) Models
|
|
190
|
+
BedrockModels["KIMI_K2_THINKING"] = "moonshot.kimi-k2-thinking";
|
|
191
|
+
BedrockModels["KIMI_K2_5"] = "moonshotai.kimi-k2.5";
|
|
192
|
+
// NVIDIA Nemotron Models
|
|
193
|
+
BedrockModels["NVIDIA_NEMOTRON_NANO_3_30B"] = "nvidia.nemotron-nano-3-30b";
|
|
194
|
+
BedrockModels["NVIDIA_NEMOTRON_NANO_12B_V2"] = "nvidia.nemotron-nano-12b-v2";
|
|
195
|
+
BedrockModels["NVIDIA_NEMOTRON_NANO_9B_V2"] = "nvidia.nemotron-nano-9b-v2";
|
|
196
|
+
// OpenAI Open Source Models (Apache 2.0)
|
|
197
|
+
BedrockModels["OPENAI_GPT_OSS_120B"] = "openai.gpt-oss-120b-1:0";
|
|
198
|
+
BedrockModels["OPENAI_GPT_OSS_20B"] = "openai.gpt-oss-20b-1:0";
|
|
199
|
+
// Z.AI GLM Models
|
|
200
|
+
BedrockModels["GLM_4_7"] = "zai.glm-4.7";
|
|
201
|
+
BedrockModels["GLM_4_7_FLASH"] = "zai.glm-4.7-flash";
|
|
202
|
+
// Cohere Embedding & Reranking
|
|
203
|
+
BedrockModels["COHERE_EMBED_ENGLISH_V3"] = "cohere.embed-english-v3";
|
|
204
|
+
BedrockModels["COHERE_EMBED_MULTILINGUAL_V3"] = "cohere.embed-multilingual-v3";
|
|
205
|
+
BedrockModels["COHERE_EMBED_V4"] = "cohere.embed-v4:0";
|
|
206
|
+
BedrockModels["COHERE_RERANK_V3_5"] = "cohere.rerank-v3-5:0";
|
|
207
|
+
// Amazon Rerank
|
|
208
|
+
BedrockModels["AMAZON_RERANK_V1"] = "amazon.rerank-v1:0";
|
|
209
|
+
// Mistral Devstral 2
|
|
210
|
+
BedrockModels["DEVSTRAL_2_123B"] = "mistral.devstral-2-123b";
|
|
163
211
|
})(BedrockModels || (BedrockModels = {}));
|
|
164
212
|
/**
|
|
165
213
|
* Supported Models for OpenAI
|
|
166
214
|
*/
|
|
167
215
|
export var OpenAIModels;
|
|
168
216
|
(function (OpenAIModels) {
|
|
169
|
-
// GPT-5.
|
|
217
|
+
// GPT-5.3 Series (Released February 2026) - Latest coding models
|
|
218
|
+
OpenAIModels["GPT_5_3_CODEX"] = "gpt-5.3-codex";
|
|
219
|
+
// GPT-5.2 Series (Released December 11, 2025) - Flagship models
|
|
170
220
|
OpenAIModels["GPT_5_2"] = "gpt-5.2";
|
|
171
221
|
OpenAIModels["GPT_5_2_CHAT_LATEST"] = "gpt-5.2-chat-latest";
|
|
172
222
|
OpenAIModels["GPT_5_2_PRO"] = "gpt-5.2-pro";
|
|
223
|
+
OpenAIModels["GPT_5_2_CODEX"] = "gpt-5.2-codex";
|
|
224
|
+
// GPT-5.1 Series (Released October 2025)
|
|
225
|
+
OpenAIModels["GPT_5_1"] = "gpt-5.1";
|
|
226
|
+
OpenAIModels["GPT_5_1_CHAT_LATEST"] = "gpt-5.1-chat-latest";
|
|
227
|
+
OpenAIModels["GPT_5_1_CODEX"] = "gpt-5.1-codex";
|
|
228
|
+
OpenAIModels["GPT_5_1_CODEX_MAX"] = "gpt-5.1-codex-max";
|
|
229
|
+
OpenAIModels["GPT_5_1_CODEX_MINI"] = "gpt-5.1-codex-mini";
|
|
173
230
|
// GPT-5 Series (Released August 7, 2025)
|
|
174
231
|
OpenAIModels["GPT_5"] = "gpt-5";
|
|
175
232
|
OpenAIModels["GPT_5_MINI"] = "gpt-5-mini";
|
|
176
233
|
OpenAIModels["GPT_5_NANO"] = "gpt-5-nano";
|
|
234
|
+
OpenAIModels["GPT_5_PRO"] = "gpt-5-pro";
|
|
235
|
+
OpenAIModels["GPT_5_CHAT_LATEST"] = "gpt-5-chat-latest";
|
|
236
|
+
OpenAIModels["GPT_5_CODEX"] = "gpt-5-codex";
|
|
237
|
+
// GPT Open Source (Apache 2.0 - January 2026, Responses API only)
|
|
238
|
+
OpenAIModels["GPT_OSS_120B"] = "gpt-oss-120b";
|
|
239
|
+
OpenAIModels["GPT_OSS_20B"] = "gpt-oss-20b";
|
|
177
240
|
// GPT-4.1 Series (Released April 14, 2025)
|
|
178
241
|
OpenAIModels["GPT_4_1"] = "gpt-4.1";
|
|
179
242
|
OpenAIModels["GPT_4_1_MINI"] = "gpt-4.1-mini";
|
|
@@ -201,7 +264,12 @@ export var OpenAIModels;
|
|
|
201
264
|
*/
|
|
202
265
|
export var AzureOpenAIModels;
|
|
203
266
|
(function (AzureOpenAIModels) {
|
|
204
|
-
// GPT-5.
|
|
267
|
+
// GPT-5.2 Series (Latest - December 2025)
|
|
268
|
+
AzureOpenAIModels["GPT_5_2"] = "gpt-5.2";
|
|
269
|
+
AzureOpenAIModels["GPT_5_2_CHAT"] = "gpt-5.2-chat";
|
|
270
|
+
AzureOpenAIModels["GPT_5_2_PRO"] = "gpt-5.2-pro";
|
|
271
|
+
AzureOpenAIModels["GPT_5_2_CODEX"] = "gpt-5.2-codex";
|
|
272
|
+
// GPT-5.1 Series (October 2025)
|
|
205
273
|
AzureOpenAIModels["GPT_5_1"] = "gpt-5.1";
|
|
206
274
|
AzureOpenAIModels["GPT_5_1_CHAT"] = "gpt-5.1-chat";
|
|
207
275
|
AzureOpenAIModels["GPT_5_1_CODEX"] = "gpt-5.1-codex";
|
|
@@ -244,7 +312,10 @@ export var AzureOpenAIModels;
|
|
|
244
312
|
*/
|
|
245
313
|
export var VertexModels;
|
|
246
314
|
(function (VertexModels) {
|
|
247
|
-
// Claude 4.
|
|
315
|
+
// Claude 4.6 Series (Latest - February 2026)
|
|
316
|
+
VertexModels["CLAUDE_4_6_OPUS"] = "claude-opus-4-6";
|
|
317
|
+
VertexModels["CLAUDE_4_6_SONNET"] = "claude-sonnet-4-6";
|
|
318
|
+
// Claude 4.5 Series (September-November 2025)
|
|
248
319
|
VertexModels["CLAUDE_4_5_OPUS"] = "claude-opus-4-5@20251124";
|
|
249
320
|
VertexModels["CLAUDE_4_5_SONNET"] = "claude-sonnet-4-5@20250929";
|
|
250
321
|
VertexModels["CLAUDE_4_5_HAIKU"] = "claude-haiku-4-5@20251001";
|
|
@@ -260,33 +331,29 @@ export var VertexModels;
|
|
|
260
331
|
VertexModels["CLAUDE_3_SONNET"] = "claude-3-sonnet-20240229";
|
|
261
332
|
VertexModels["CLAUDE_3_OPUS"] = "claude-3-opus-20240229";
|
|
262
333
|
VertexModels["CLAUDE_3_HAIKU"] = "claude-3-haiku-20240307";
|
|
334
|
+
// Gemini 3.1 Series (Released February 2026)
|
|
335
|
+
VertexModels["GEMINI_3_1_PRO_PREVIEW"] = "gemini-3.1-pro-preview";
|
|
263
336
|
// Gemini 3 Series (Preview)
|
|
264
|
-
/** Gemini 3 Pro - Base model with adaptive thinking */
|
|
265
337
|
VertexModels["GEMINI_3_PRO"] = "gemini-3-pro";
|
|
266
|
-
/** Gemini 3 Pro Preview - Versioned preview (November 2025) */
|
|
267
338
|
VertexModels["GEMINI_3_PRO_PREVIEW_11_2025"] = "gemini-3-pro-preview-11-2025";
|
|
268
|
-
/** Gemini 3 Pro Latest - Auto-updated alias (always points to latest preview) */
|
|
269
339
|
VertexModels["GEMINI_3_PRO_LATEST"] = "gemini-3-pro-latest";
|
|
270
|
-
/** Gemini 3 Pro Preview - Generic preview (legacy) */
|
|
271
340
|
VertexModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
|
|
272
|
-
/** Gemini 3 Flash - Base model with adaptive thinking */
|
|
273
341
|
VertexModels["GEMINI_3_FLASH"] = "gemini-3-flash";
|
|
274
|
-
/** Gemini 3 Flash Preview - Versioned preview */
|
|
275
342
|
VertexModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
|
|
276
|
-
/** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
|
|
277
343
|
VertexModels["GEMINI_3_FLASH_LATEST"] = "gemini-3-flash-latest";
|
|
278
|
-
// Gemini 2.5 Series (
|
|
344
|
+
// Gemini 2.5 Series (GA)
|
|
279
345
|
VertexModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
|
|
280
346
|
VertexModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
|
|
281
347
|
VertexModels["GEMINI_2_5_FLASH_LITE"] = "gemini-2.5-flash-lite";
|
|
282
348
|
VertexModels["GEMINI_2_5_FLASH_IMAGE"] = "gemini-2.5-flash-image";
|
|
283
|
-
// Gemini 2.0 Series
|
|
349
|
+
// Gemini 2.0 Series (Deprecated - retiring Jun 2026)
|
|
284
350
|
VertexModels["GEMINI_2_0_FLASH"] = "gemini-2.0-flash";
|
|
285
351
|
VertexModels["GEMINI_2_0_FLASH_001"] = "gemini-2.0-flash-001";
|
|
286
|
-
/** Gemini 2.0 Flash Lite - GA, production-ready, cost-optimized */
|
|
287
352
|
VertexModels["GEMINI_2_0_FLASH_LITE"] = "gemini-2.0-flash-lite";
|
|
288
|
-
// Gemini 1.5 Series (
|
|
353
|
+
// Gemini 1.5 Series (Retired - returns 404)
|
|
354
|
+
/** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
|
|
289
355
|
VertexModels["GEMINI_1_5_PRO"] = "gemini-1.5-pro-002";
|
|
356
|
+
/** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
|
|
290
357
|
VertexModels["GEMINI_1_5_FLASH"] = "gemini-1.5-flash-002";
|
|
291
358
|
})(VertexModels || (VertexModels = {}));
|
|
292
359
|
/**
|
|
@@ -294,27 +361,33 @@ export var VertexModels;
|
|
|
294
361
|
*/
|
|
295
362
|
export var GoogleAIModels;
|
|
296
363
|
(function (GoogleAIModels) {
|
|
297
|
-
// Gemini 3 Series
|
|
364
|
+
// Gemini 3.1 Series (Released February 2026)
|
|
365
|
+
GoogleAIModels["GEMINI_3_1_PRO_PREVIEW"] = "gemini-3.1-pro-preview";
|
|
366
|
+
// Gemini 3 Series (Preview)
|
|
298
367
|
GoogleAIModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
|
|
299
368
|
GoogleAIModels["GEMINI_3_PRO_IMAGE_PREVIEW"] = "gemini-3-pro-image-preview";
|
|
300
369
|
GoogleAIModels["GEMINI_3_FLASH"] = "gemini-3-flash";
|
|
301
370
|
GoogleAIModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
|
|
302
|
-
// Gemini 2.5 Series
|
|
371
|
+
// Gemini 2.5 Series (GA)
|
|
303
372
|
GoogleAIModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
|
|
304
373
|
GoogleAIModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
|
|
305
374
|
GoogleAIModels["GEMINI_2_5_FLASH_LITE"] = "gemini-2.5-flash-lite";
|
|
306
375
|
GoogleAIModels["GEMINI_2_5_FLASH_IMAGE"] = "gemini-2.5-flash-image";
|
|
307
|
-
GoogleAIModels["
|
|
308
|
-
|
|
376
|
+
GoogleAIModels["GEMINI_2_5_FLASH_PREVIEW_TTS"] = "gemini-2.5-flash-preview-tts";
|
|
377
|
+
GoogleAIModels["GEMINI_2_5_PRO_PREVIEW_TTS"] = "gemini-2.5-pro-preview-tts";
|
|
378
|
+
// Gemini 2.0 Series (Deprecated - retiring Jun 2026)
|
|
309
379
|
GoogleAIModels["GEMINI_2_0_FLASH"] = "gemini-2.0-flash";
|
|
310
380
|
GoogleAIModels["GEMINI_2_0_FLASH_001"] = "gemini-2.0-flash-001";
|
|
311
381
|
GoogleAIModels["GEMINI_2_0_FLASH_LITE"] = "gemini-2.0-flash-lite";
|
|
312
382
|
GoogleAIModels["GEMINI_2_0_FLASH_IMAGE"] = "gemini-2.0-flash-preview-image-generation";
|
|
313
|
-
// Gemini 1.5 Series (
|
|
383
|
+
// Gemini 1.5 Series (Retired - returns 404)
|
|
384
|
+
/** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
|
|
314
385
|
GoogleAIModels["GEMINI_1_5_PRO"] = "gemini-1.5-pro";
|
|
386
|
+
/** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
|
|
315
387
|
GoogleAIModels["GEMINI_1_5_FLASH"] = "gemini-1.5-flash";
|
|
316
388
|
// Embedding Models
|
|
317
389
|
GoogleAIModels["GEMINI_EMBEDDING"] = "gemini-embedding-001";
|
|
390
|
+
/** @deprecated Shutdown Jan 2026. Use gemini-embedding-001 */
|
|
318
391
|
GoogleAIModels["TEXT_EMBEDDING_004"] = "text-embedding-004";
|
|
319
392
|
})(GoogleAIModels || (GoogleAIModels = {}));
|
|
320
393
|
/**
|
|
@@ -322,7 +395,10 @@ export var GoogleAIModels;
|
|
|
322
395
|
*/
|
|
323
396
|
export var AnthropicModels;
|
|
324
397
|
(function (AnthropicModels) {
|
|
325
|
-
// Claude 4.
|
|
398
|
+
// Claude 4.6 Series (Latest - February 2026)
|
|
399
|
+
AnthropicModels["CLAUDE_OPUS_4_6"] = "claude-opus-4-6";
|
|
400
|
+
AnthropicModels["CLAUDE_SONNET_4_6"] = "claude-sonnet-4-6";
|
|
401
|
+
// Claude 4.5 Series (September-November 2025)
|
|
326
402
|
AnthropicModels["CLAUDE_OPUS_4_5"] = "claude-opus-4-5-20251101";
|
|
327
403
|
AnthropicModels["CLAUDE_SONNET_4_5"] = "claude-sonnet-4-5-20250929";
|
|
328
404
|
AnthropicModels["CLAUDE_4_5_HAIKU"] = "claude-haiku-4-5-20251001";
|
|
@@ -375,6 +451,17 @@ export var MistralModels;
|
|
|
375
451
|
// Voxtral (Audio)
|
|
376
452
|
MistralModels["VOXTRAL_SMALL_LATEST"] = "voxtral-small-latest";
|
|
377
453
|
MistralModels["VOXTRAL_MINI_LATEST"] = "voxtral-mini-latest";
|
|
454
|
+
// Devstral 2 Series (December 2025)
|
|
455
|
+
MistralModels["DEVSTRAL_2"] = "devstral-2512";
|
|
456
|
+
MistralModels["DEVSTRAL_SMALL_2"] = "devstral-small-2512";
|
|
457
|
+
// Magistral Versioned (September 2025)
|
|
458
|
+
MistralModels["MAGISTRAL_MEDIUM_2509"] = "magistral-medium-2509";
|
|
459
|
+
MistralModels["MAGISTRAL_SMALL_2509"] = "magistral-small-2509";
|
|
460
|
+
// Voxtral Transcribe 2 (February 2026)
|
|
461
|
+
MistralModels["VOXTRAL_MINI_TRANSCRIBE_2"] = "voxtral-mini-2602";
|
|
462
|
+
// OCR (December 2025)
|
|
463
|
+
MistralModels["MISTRAL_OCR_3"] = "mistral-ocr-2512";
|
|
464
|
+
MistralModels["MISTRAL_OCR_LATEST"] = "mistral-ocr-latest";
|
|
378
465
|
// Specialized Models
|
|
379
466
|
MistralModels["MISTRAL_NEMO"] = "mistral-nemo";
|
|
380
467
|
MistralModels["MISTRAL_EMBED"] = "mistral-embed";
|
|
@@ -464,6 +551,24 @@ export var OllamaModels;
|
|
|
464
551
|
OllamaModels["MIXTRAL_8X22B"] = "mixtral:8x22b";
|
|
465
552
|
// Enterprise Models
|
|
466
553
|
OllamaModels["COMMAND_R_PLUS"] = "command-r-plus:104b";
|
|
554
|
+
// Z.AI GLM-5 - Flagship reasoning model (February 2026)
|
|
555
|
+
OllamaModels["GLM_5_LATEST"] = "glm-5:latest";
|
|
556
|
+
// Kimi-K2.5 - Moonshot AI multimodal agentic model
|
|
557
|
+
OllamaModels["KIMI_K2_5_LATEST"] = "kimi-k2.5:latest";
|
|
558
|
+
// Qwen 3.5 - Multimodal native agents (February 2026)
|
|
559
|
+
OllamaModels["QWEN3_5_LATEST"] = "qwen3.5:latest";
|
|
560
|
+
// Qwen3-Coder - Coding-focused agentic model
|
|
561
|
+
OllamaModels["QWEN3_CODER_LATEST"] = "qwen3-coder:latest";
|
|
562
|
+
OllamaModels["QWEN3_CODER_30B"] = "qwen3-coder:30b";
|
|
563
|
+
// DeepSeek-V3.2 - Enhanced reasoning
|
|
564
|
+
OllamaModels["DEEPSEEK_V3_2_LATEST"] = "deepseek-v3.2:latest";
|
|
565
|
+
// NVIDIA Nemotron 3 Nano - Hybrid MoE, 1M context
|
|
566
|
+
OllamaModels["NEMOTRON_3_NANO_LATEST"] = "nemotron-3-nano:latest";
|
|
567
|
+
OllamaModels["NEMOTRON_3_NANO_30B"] = "nemotron-3-nano:30b";
|
|
568
|
+
// SmolLM3 - Compact dual-mode reasoning (HuggingFace)
|
|
569
|
+
OllamaModels["SMOLLM3_3B"] = "smollm3:3b";
|
|
570
|
+
// GPT-OSS - Open-source GPT (Apache 2.0)
|
|
571
|
+
OllamaModels["GPT_OSS_LATEST"] = "gpt-oss:latest";
|
|
467
572
|
})(OllamaModels || (OllamaModels = {}));
|
|
468
573
|
/**
|
|
469
574
|
* Common Models for LiteLLM Proxy
|
|
@@ -513,6 +618,16 @@ export var LiteLLMModels;
|
|
|
513
618
|
// AWS Bedrock via LiteLLM
|
|
514
619
|
LiteLLMModels["BEDROCK_CLAUDE_3_5_SONNET"] = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0";
|
|
515
620
|
LiteLLMModels["BEDROCK_CLAUDE_3_HAIKU"] = "bedrock/anthropic.claude-3-haiku-20240307-v1:0";
|
|
621
|
+
// OpenAI GPT-5.2 via LiteLLM
|
|
622
|
+
LiteLLMModels["OPENAI_GPT_5_2"] = "openai/gpt-5.2";
|
|
623
|
+
LiteLLMModels["OPENAI_GPT_5_2_CODEX"] = "openai/gpt-5.2-codex";
|
|
624
|
+
// Anthropic Claude 4.6 via LiteLLM
|
|
625
|
+
LiteLLMModels["ANTHROPIC_CLAUDE_OPUS_4_6"] = "anthropic/claude-opus-4-6";
|
|
626
|
+
LiteLLMModels["ANTHROPIC_CLAUDE_SONNET_4_6"] = "anthropic/claude-sonnet-4-6";
|
|
627
|
+
// Google Gemini 3 via LiteLLM
|
|
628
|
+
LiteLLMModels["GEMINI_3_1_PRO"] = "gemini/gemini-3.1-pro-preview";
|
|
629
|
+
// xAI via LiteLLM
|
|
630
|
+
LiteLLMModels["XAI_GROK_4_1_FAST"] = "xai/grok-4.1-fast";
|
|
516
631
|
// Perplexity AI via LiteLLM
|
|
517
632
|
LiteLLMModels["PERPLEXITY_SONAR_PRO"] = "perplexity/sonar-pro";
|
|
518
633
|
LiteLLMModels["PERPLEXITY_SONAR_REASONING_PRO"] = "perplexity/sonar-reasoning-pro";
|
|
@@ -588,6 +703,17 @@ export var HuggingFaceModels;
|
|
|
588
703
|
// BLOOM
|
|
589
704
|
HuggingFaceModels["BLOOM_7B1"] = "bigscience/bloom-7b1";
|
|
590
705
|
HuggingFaceModels["BLOOM_1B3"] = "bigscience/bloom-1b3";
|
|
706
|
+
// Z.AI GLM-5 (February 2026)
|
|
707
|
+
HuggingFaceModels["GLM_5"] = "zai-org/GLM-5";
|
|
708
|
+
// Qwen 3.5 Multimodal (February 2026)
|
|
709
|
+
HuggingFaceModels["QWEN_3_5_397B_A17B"] = "Qwen/Qwen3.5-397B-A17B";
|
|
710
|
+
// NVIDIA Nemotron 3 Nano
|
|
711
|
+
HuggingFaceModels["NEMOTRON_3_NANO_30B"] = "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16";
|
|
712
|
+
// HuggingFace SmolLM3
|
|
713
|
+
HuggingFaceModels["SMOLLM3_3B"] = "HuggingFaceTB/SmolLM3-3B";
|
|
714
|
+
// Falcon 3 Series
|
|
715
|
+
HuggingFaceModels["FALCON_3_7B_INSTRUCT"] = "tiiuae/Falcon3-7B-Instruct";
|
|
716
|
+
HuggingFaceModels["FALCON_3_10B_INSTRUCT"] = "tiiuae/Falcon3-10B-Instruct";
|
|
591
717
|
})(HuggingFaceModels || (HuggingFaceModels = {}));
|
|
592
718
|
/**
|
|
593
719
|
* Supported Models for AWS SageMaker JumpStart
|
|
@@ -616,6 +742,10 @@ export var SageMakerModels;
|
|
|
616
742
|
SageMakerModels["FALCON_3_10B"] = "tii-falcon-3-10b-instruct";
|
|
617
743
|
SageMakerModels["FALCON_40B"] = "tii-falcon-40b-instruct";
|
|
618
744
|
SageMakerModels["FALCON_180B"] = "tii-falcon-180b";
|
|
745
|
+
// NVIDIA Nemotron 3 Nano (February 2026)
|
|
746
|
+
SageMakerModels["NEMOTRON_3_NANO_30B"] = "nvidia-nemotron-3-nano-30b";
|
|
747
|
+
// Qwen3 VL - Vision-language
|
|
748
|
+
SageMakerModels["QWEN3_VL_8B"] = "qwen3-vl-8b-instruct";
|
|
619
749
|
})(SageMakerModels || (SageMakerModels = {}));
|
|
620
750
|
/**
|
|
621
751
|
* API Versions for various providers
|
|
@@ -36,7 +36,7 @@ export function checkContextBudget(params) {
|
|
|
36
36
|
? toolDefinitions.reduce((sum, tool) => {
|
|
37
37
|
try {
|
|
38
38
|
const serialized = JSON.stringify(tool);
|
|
39
|
-
return sum +
|
|
39
|
+
return sum + estimateTokens(serialized, provider);
|
|
40
40
|
}
|
|
41
41
|
catch {
|
|
42
42
|
return sum + TOKENS_PER_TOOL_DEFINITION;
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* Stage 3: LLM Summarization (expensive -- requires LLM call)
|
|
9
9
|
* Stage 4: Sliding Window Truncation (fallback -- no LLM call)
|
|
10
10
|
*/
|
|
11
|
+
import { trace, SpanStatusCode } from "@opentelemetry/api";
|
|
11
12
|
import { estimateMessagesTokens } from "../utils/tokenEstimation.js";
|
|
12
13
|
import { logger } from "../utils/logger.js";
|
|
13
14
|
import { pruneToolOutputs } from "./stages/toolOutputPruner.js";
|
|
@@ -112,15 +113,36 @@ export class ContextCompactor {
|
|
|
112
113
|
saved: stageTokensBefore - stageTokensAfter,
|
|
113
114
|
});
|
|
114
115
|
}
|
|
115
|
-
catch {
|
|
116
|
-
|
|
116
|
+
catch (error) {
|
|
117
|
+
// Capture the actual error for debugging
|
|
118
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
119
|
+
const errorName = error instanceof Error ? error.name : "UnknownError";
|
|
120
|
+
logger.warn("[Compaction] Stage 3 (summarize) FAILED", {
|
|
117
121
|
requestId,
|
|
118
|
-
|
|
122
|
+
error: errorMessage,
|
|
123
|
+
errorName,
|
|
119
124
|
tokensBefore: stageTokensBefore,
|
|
120
125
|
tokensAfter: stageTokensBefore,
|
|
121
126
|
saved: 0,
|
|
122
127
|
});
|
|
123
|
-
//
|
|
128
|
+
// Record on OTel span for trace visibility
|
|
129
|
+
const activeSpan = trace.getActiveSpan();
|
|
130
|
+
if (activeSpan) {
|
|
131
|
+
activeSpan.addEvent("compaction.stage3.failed", {
|
|
132
|
+
"error.message": errorMessage,
|
|
133
|
+
"error.name": errorName,
|
|
134
|
+
"stage.tokens_before": stageTokensBefore,
|
|
135
|
+
});
|
|
136
|
+
if (error instanceof Error) {
|
|
137
|
+
activeSpan.recordException(error);
|
|
138
|
+
}
|
|
139
|
+
// NLK-GAP-005 fix: set error status alongside recordException
|
|
140
|
+
activeSpan.setStatus({
|
|
141
|
+
code: SpanStatusCode.ERROR,
|
|
142
|
+
message: `Compaction stage 3 (summarize) failed: ${errorMessage}`,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
// Fall through to Stage 4 truncation as before
|
|
124
146
|
}
|
|
125
147
|
}
|
|
126
148
|
// Stage 4: Sliding Window Truncation (fallback)
|
|
@@ -129,6 +151,11 @@ export class ContextCompactor {
|
|
|
129
151
|
const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
|
|
130
152
|
const truncResult = truncateWithSlidingWindow(currentMessages, {
|
|
131
153
|
fraction: this.config.truncationFraction,
|
|
154
|
+
currentTokens: stageTokensBefore,
|
|
155
|
+
targetTokens: targetTokens,
|
|
156
|
+
provider: provider,
|
|
157
|
+
adaptiveBuffer: 0.15,
|
|
158
|
+
maxIterations: 3,
|
|
132
159
|
});
|
|
133
160
|
if (truncResult.truncated) {
|
|
134
161
|
currentMessages = truncResult.messages;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Emergency Content Truncation
|
|
3
|
+
*
|
|
4
|
+
* When message-level removal (sliding window) can't fit context into budget,
|
|
5
|
+
* this truncates the CONTENT of the longest messages as a last resort.
|
|
6
|
+
*/
|
|
7
|
+
import type { ChatMessage } from "../types/conversation.js";
|
|
8
|
+
/**
|
|
9
|
+
* Emergency content truncation: truncate the content of the longest messages
|
|
10
|
+
* to fit within the available token budget.
|
|
11
|
+
*
|
|
12
|
+
* Strategy: Sort messages by content length (descending), truncate each
|
|
13
|
+
* to a proportional share of the available budget until total fits.
|
|
14
|
+
*/
|
|
15
|
+
export declare function emergencyContentTruncation(messages: ChatMessage[], availableTokensForHistory: number, breakdown: {
|
|
16
|
+
systemPrompt: number;
|
|
17
|
+
conversationHistory: number;
|
|
18
|
+
currentPrompt: number;
|
|
19
|
+
toolDefinitions: number;
|
|
20
|
+
fileAttachments: number;
|
|
21
|
+
}, provider?: string): ChatMessage[];
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Emergency Content Truncation
|
|
3
|
+
*
|
|
4
|
+
* When message-level removal (sliding window) can't fit context into budget,
|
|
5
|
+
* this truncates the CONTENT of the longest messages as a last resort.
|
|
6
|
+
*/
|
|
7
|
+
import { estimateTokens, estimateMessagesTokens, truncateToTokenBudget, } from "../utils/tokenEstimation.js";
|
|
8
|
+
import { logger } from "../utils/logger.js";
|
|
9
|
+
/**
|
|
10
|
+
* Emergency content truncation: truncate the content of the longest messages
|
|
11
|
+
* to fit within the available token budget.
|
|
12
|
+
*
|
|
13
|
+
* Strategy: Sort messages by content length (descending), truncate each
|
|
14
|
+
* to a proportional share of the available budget until total fits.
|
|
15
|
+
*/
|
|
16
|
+
export function emergencyContentTruncation(messages, availableTokensForHistory, breakdown, provider) {
|
|
17
|
+
// Budget available for conversation history specifically
|
|
18
|
+
const historyBudget = availableTokensForHistory -
|
|
19
|
+
breakdown.systemPrompt -
|
|
20
|
+
breakdown.currentPrompt -
|
|
21
|
+
breakdown.toolDefinitions -
|
|
22
|
+
breakdown.fileAttachments;
|
|
23
|
+
if (historyBudget <= 0) {
|
|
24
|
+
// No room for history: return empty to guarantee budget safety
|
|
25
|
+
return [];
|
|
26
|
+
}
|
|
27
|
+
const currentHistoryTokens = estimateMessagesTokens(messages, provider);
|
|
28
|
+
if (currentHistoryTokens <= historyBudget) {
|
|
29
|
+
return messages; // Already fits
|
|
30
|
+
}
|
|
31
|
+
// Calculate per-message budgets proportional to original size,
|
|
32
|
+
// but cap large messages to free space for others
|
|
33
|
+
const result = [...messages];
|
|
34
|
+
const reductionNeeded = currentHistoryTokens - historyBudget;
|
|
35
|
+
const reductionRatio = reductionNeeded / currentHistoryTokens;
|
|
36
|
+
// Sort indices by content length descending (truncate biggest first)
|
|
37
|
+
const sortedIndices = result
|
|
38
|
+
.map((msg, idx) => ({ idx, len: msg.content.length }))
|
|
39
|
+
.sort((a, b) => b.len - a.len);
|
|
40
|
+
let tokensSaved = 0;
|
|
41
|
+
for (const { idx } of sortedIndices) {
|
|
42
|
+
if (tokensSaved >= reductionNeeded) {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
const msg = result[idx];
|
|
46
|
+
// Don't truncate system messages or very short messages
|
|
47
|
+
if (msg.role === "system" || msg.content.length < 200) {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
const msgTokens = estimateTokens(msg.content, provider);
|
|
51
|
+
const targetTokens = Math.floor(msgTokens * (1 - reductionRatio - 0.05));
|
|
52
|
+
if (targetTokens < msgTokens && targetTokens > 50) {
|
|
53
|
+
const truncated = truncateToTokenBudget(msg.content, targetTokens, provider);
|
|
54
|
+
if (truncated.truncated) {
|
|
55
|
+
const savedThisMsg = msgTokens - estimateTokens(truncated.text, provider);
|
|
56
|
+
tokensSaved += savedThisMsg;
|
|
57
|
+
result[idx] = {
|
|
58
|
+
...msg,
|
|
59
|
+
content: truncated.text,
|
|
60
|
+
metadata: { ...msg.metadata, truncated: true },
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
logger.info("[EmergencyTruncation] Content truncation complete", {
|
|
66
|
+
tokensSaved,
|
|
67
|
+
reductionNeeded,
|
|
68
|
+
messagesModified: result.filter((m, i) => m !== messages[i]).length,
|
|
69
|
+
});
|
|
70
|
+
// Final safety check: guarantee returned history fits budget
|
|
71
|
+
if (estimateMessagesTokens(result, provider) <= historyBudget) {
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
// Hard fallback: keep newest non-system messages that fit
|
|
75
|
+
const fallback = [];
|
|
76
|
+
for (let i = result.length - 1; i >= 0; i--) {
|
|
77
|
+
const msg = result[i];
|
|
78
|
+
if (msg.role === "system") {
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
fallback.unshift(msg);
|
|
82
|
+
if (estimateMessagesTokens(fallback, provider) > historyBudget) {
|
|
83
|
+
fallback.shift();
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return fallback;
|
|
88
|
+
}
|
|
@@ -12,3 +12,19 @@ export declare function isContextOverflowError(error: unknown): boolean;
|
|
|
12
12
|
* Identify which provider produced the context overflow error.
|
|
13
13
|
*/
|
|
14
14
|
export declare function getContextOverflowProvider(error: unknown): string | null;
|
|
15
|
+
/**
|
|
16
|
+
* Extract actual token counts from provider overflow error messages.
|
|
17
|
+
*
|
|
18
|
+
* Many providers include the actual/max token counts in their error messages:
|
|
19
|
+
* - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
|
|
20
|
+
* - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
|
|
21
|
+
* - Google: "exceeds the maximum number of tokens (180000 > 100000)"
|
|
22
|
+
*/
|
|
23
|
+
export declare function parseProviderOverflowDetails(error: unknown): {
|
|
24
|
+
actualTokens: number;
|
|
25
|
+
budgetTokens: number;
|
|
26
|
+
} | null;
|
|
27
|
+
/**
|
|
28
|
+
* Extract error message from various error formats.
|
|
29
|
+
*/
|
|
30
|
+
export declare function extractErrorMessage(error: unknown): string | null;
|
|
@@ -86,10 +86,57 @@ export function getContextOverflowProvider(error) {
|
|
|
86
86
|
}
|
|
87
87
|
return null;
|
|
88
88
|
}
|
|
89
|
+
/**
|
|
90
|
+
* Extract actual token counts from provider overflow error messages.
|
|
91
|
+
*
|
|
92
|
+
* Many providers include the actual/max token counts in their error messages:
|
|
93
|
+
* - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
|
|
94
|
+
* - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
|
|
95
|
+
* - Google: "exceeds the maximum number of tokens (180000 > 100000)"
|
|
96
|
+
*/
|
|
97
|
+
export function parseProviderOverflowDetails(error) {
|
|
98
|
+
const message = extractErrorMessage(error);
|
|
99
|
+
if (!message) {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
// Guard against excessively long inputs that could slow regex matching
|
|
103
|
+
if (message.length > 2000) {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
// OpenAI pattern: "resulted in X tokens" + "maximum context length is Y"
|
|
107
|
+
// Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
|
|
108
|
+
const openaiActual = message.match(/resulted\s+in\s+(\d[\d,]{0,19})\s*tokens/i);
|
|
109
|
+
const openaiMax = message.match(/maximum\s+context\s+length\s+is\s+(\d[\d,]{0,19})/i);
|
|
110
|
+
if (openaiActual && openaiMax) {
|
|
111
|
+
return {
|
|
112
|
+
actualTokens: parseInt(openaiActual[1].replace(/,/g, ""), 10),
|
|
113
|
+
budgetTokens: parseInt(openaiMax[1].replace(/,/g, ""), 10),
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// Anthropic pattern: "X tokens > Y token limit" or "X tokens, limit Y"
|
|
117
|
+
// Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
|
|
118
|
+
const anthropicMatch = message.match(/(\d[\d,]{0,19})\s*tokens?\s*[>:]\s*(\d[\d,]{0,19})/i);
|
|
119
|
+
if (anthropicMatch) {
|
|
120
|
+
return {
|
|
121
|
+
actualTokens: parseInt(anthropicMatch[1].replace(/,/g, ""), 10),
|
|
122
|
+
budgetTokens: parseInt(anthropicMatch[2].replace(/,/g, ""), 10),
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
// Google pattern: "X > Y" or "X exceeds Y"
|
|
126
|
+
// Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
|
|
127
|
+
const googleMatch = message.match(/(\d[\d,]{0,19})\s*(?:>|exceeds)\s*(\d[\d,]{0,19})/i);
|
|
128
|
+
if (googleMatch) {
|
|
129
|
+
return {
|
|
130
|
+
actualTokens: parseInt(googleMatch[1].replace(/,/g, ""), 10),
|
|
131
|
+
budgetTokens: parseInt(googleMatch[2].replace(/,/g, ""), 10),
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
89
136
|
/**
|
|
90
137
|
* Extract error message from various error formats.
|
|
91
138
|
*/
|
|
92
|
-
function extractErrorMessage(error) {
|
|
139
|
+
export function extractErrorMessage(error) {
|
|
93
140
|
if (!error) {
|
|
94
141
|
return null;
|
|
95
142
|
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context-specific error classes for budget and overflow scenarios.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Thrown when context exceeds model budget after all compaction stages,
|
|
6
|
+
* preventing wasteful API calls to providers that will reject the request.
|
|
7
|
+
*/
|
|
8
|
+
export declare class ContextBudgetExceededError extends Error {
|
|
9
|
+
readonly estimatedTokens: number;
|
|
10
|
+
readonly availableTokens: number;
|
|
11
|
+
readonly stagesUsed: string[];
|
|
12
|
+
readonly breakdown: Record<string, number>;
|
|
13
|
+
constructor(message: string, details: {
|
|
14
|
+
estimatedTokens: number;
|
|
15
|
+
availableTokens: number;
|
|
16
|
+
stagesUsed: string[];
|
|
17
|
+
breakdown: Record<string, number>;
|
|
18
|
+
});
|
|
19
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context-specific error classes for budget and overflow scenarios.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Thrown when context exceeds model budget after all compaction stages,
|
|
6
|
+
* preventing wasteful API calls to providers that will reject the request.
|
|
7
|
+
*/
|
|
8
|
+
export class ContextBudgetExceededError extends Error {
|
|
9
|
+
estimatedTokens;
|
|
10
|
+
availableTokens;
|
|
11
|
+
stagesUsed;
|
|
12
|
+
breakdown;
|
|
13
|
+
constructor(message, details) {
|
|
14
|
+
super(message);
|
|
15
|
+
this.name = "ContextBudgetExceededError";
|
|
16
|
+
this.estimatedTokens = details.estimatedTokens;
|
|
17
|
+
this.availableTokens = details.availableTokens;
|
|
18
|
+
this.stagesUsed = details.stagesUsed;
|
|
19
|
+
this.breakdown = details.breakdown;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
* Non-destructive fallback: tags oldest messages as truncated
|
|
5
5
|
* instead of deleting them. Always preserves first message pair.
|
|
6
6
|
* Removes messages in pairs to maintain role alternation.
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Adaptive truncation (PERF-001): calculates fraction from actual overage
|
|
10
|
+
* instead of fixed 50%, with iterative refinement up to 3 passes.
|
|
11
|
+
* - Small conversation handling (BUG-005): for <= 4 messages, truncates
|
|
12
|
+
* message content proportionally instead of returning no-op.
|
|
7
13
|
*/
|
|
8
14
|
import type { ChatMessage } from "../../types/conversation.js";
|
|
9
15
|
import type { TruncationConfig, TruncationResult } from "../../types/contextTypes.js";
|