@juspay/neurolink 9.15.0 → 9.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +22 -20
  3. package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
  4. package/dist/adapters/video/videoAnalyzer.js +10 -8
  5. package/dist/cli/commands/setup-anthropic.js +1 -14
  6. package/dist/cli/commands/setup-azure.js +1 -12
  7. package/dist/cli/commands/setup-bedrock.js +1 -9
  8. package/dist/cli/commands/setup-google-ai.js +1 -12
  9. package/dist/cli/commands/setup-openai.js +1 -14
  10. package/dist/cli/commands/workflow.d.ts +27 -0
  11. package/dist/cli/commands/workflow.js +216 -0
  12. package/dist/cli/factories/commandFactory.js +79 -20
  13. package/dist/cli/index.js +0 -1
  14. package/dist/cli/parser.js +4 -1
  15. package/dist/cli/utils/maskCredential.d.ts +11 -0
  16. package/dist/cli/utils/maskCredential.js +23 -0
  17. package/dist/constants/contextWindows.js +107 -16
  18. package/dist/constants/enums.d.ts +99 -15
  19. package/dist/constants/enums.js +152 -22
  20. package/dist/context/budgetChecker.js +1 -1
  21. package/dist/context/contextCompactor.js +31 -4
  22. package/dist/context/emergencyTruncation.d.ts +21 -0
  23. package/dist/context/emergencyTruncation.js +88 -0
  24. package/dist/context/errorDetection.d.ts +16 -0
  25. package/dist/context/errorDetection.js +48 -1
  26. package/dist/context/errors.d.ts +19 -0
  27. package/dist/context/errors.js +21 -0
  28. package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
  29. package/dist/context/stages/slidingWindowTruncator.js +159 -24
  30. package/dist/context/stages/structuredSummarizer.js +2 -2
  31. package/dist/core/baseProvider.js +306 -200
  32. package/dist/core/conversationMemoryManager.js +104 -61
  33. package/dist/core/evaluationProviders.js +16 -33
  34. package/dist/core/factory.js +237 -164
  35. package/dist/core/modules/GenerationHandler.js +175 -116
  36. package/dist/core/modules/MessageBuilder.js +222 -170
  37. package/dist/core/modules/StreamHandler.d.ts +1 -0
  38. package/dist/core/modules/StreamHandler.js +95 -27
  39. package/dist/core/modules/TelemetryHandler.d.ts +10 -1
  40. package/dist/core/modules/TelemetryHandler.js +25 -7
  41. package/dist/core/modules/ToolsManager.js +115 -191
  42. package/dist/core/redisConversationMemoryManager.js +418 -282
  43. package/dist/factories/providerRegistry.d.ts +5 -0
  44. package/dist/factories/providerRegistry.js +20 -2
  45. package/dist/index.d.ts +2 -2
  46. package/dist/index.js +4 -2
  47. package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
  48. package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
  49. package/dist/lib/constants/contextWindows.js +107 -16
  50. package/dist/lib/constants/enums.d.ts +99 -15
  51. package/dist/lib/constants/enums.js +152 -22
  52. package/dist/lib/context/budgetChecker.js +1 -1
  53. package/dist/lib/context/contextCompactor.js +31 -4
  54. package/dist/lib/context/emergencyTruncation.d.ts +21 -0
  55. package/dist/lib/context/emergencyTruncation.js +89 -0
  56. package/dist/lib/context/errorDetection.d.ts +16 -0
  57. package/dist/lib/context/errorDetection.js +48 -1
  58. package/dist/lib/context/errors.d.ts +19 -0
  59. package/dist/lib/context/errors.js +22 -0
  60. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
  61. package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
  62. package/dist/lib/context/stages/structuredSummarizer.js +2 -2
  63. package/dist/lib/core/baseProvider.js +306 -200
  64. package/dist/lib/core/conversationMemoryManager.js +104 -61
  65. package/dist/lib/core/evaluationProviders.js +16 -33
  66. package/dist/lib/core/factory.js +237 -164
  67. package/dist/lib/core/modules/GenerationHandler.js +175 -116
  68. package/dist/lib/core/modules/MessageBuilder.js +222 -170
  69. package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
  70. package/dist/lib/core/modules/StreamHandler.js +95 -27
  71. package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
  72. package/dist/lib/core/modules/TelemetryHandler.js +25 -7
  73. package/dist/lib/core/modules/ToolsManager.js +115 -191
  74. package/dist/lib/core/redisConversationMemoryManager.js +418 -282
  75. package/dist/lib/factories/providerRegistry.d.ts +5 -0
  76. package/dist/lib/factories/providerRegistry.js +20 -2
  77. package/dist/lib/index.d.ts +2 -2
  78. package/dist/lib/index.js +4 -2
  79. package/dist/lib/mcp/externalServerManager.js +66 -0
  80. package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
  81. package/dist/lib/mcp/mcpClientFactory.js +16 -0
  82. package/dist/lib/mcp/toolDiscoveryService.js +32 -6
  83. package/dist/lib/mcp/toolRegistry.js +193 -123
  84. package/dist/lib/neurolink.d.ts +6 -0
  85. package/dist/lib/neurolink.js +1162 -646
  86. package/dist/lib/providers/amazonBedrock.d.ts +1 -1
  87. package/dist/lib/providers/amazonBedrock.js +521 -319
  88. package/dist/lib/providers/anthropic.js +73 -17
  89. package/dist/lib/providers/anthropicBaseProvider.js +77 -17
  90. package/dist/lib/providers/googleAiStudio.d.ts +1 -1
  91. package/dist/lib/providers/googleAiStudio.js +292 -227
  92. package/dist/lib/providers/googleVertex.d.ts +36 -1
  93. package/dist/lib/providers/googleVertex.js +553 -260
  94. package/dist/lib/providers/ollama.js +329 -278
  95. package/dist/lib/providers/openAI.js +77 -19
  96. package/dist/lib/providers/sagemaker/parsers.js +3 -3
  97. package/dist/lib/providers/sagemaker/streaming.js +3 -3
  98. package/dist/lib/proxy/proxyFetch.js +81 -48
  99. package/dist/lib/rag/ChunkerFactory.js +1 -1
  100. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
  101. package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
  102. package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
  103. package/dist/lib/rag/chunking/markdownChunker.js +174 -2
  104. package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
  105. package/dist/lib/rag/ragIntegration.d.ts +18 -1
  106. package/dist/lib/rag/ragIntegration.js +94 -14
  107. package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
  108. package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
  109. package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
  110. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
  111. package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
  112. package/dist/lib/telemetry/attributes.d.ts +52 -0
  113. package/dist/lib/telemetry/attributes.js +61 -0
  114. package/dist/lib/telemetry/index.d.ts +3 -0
  115. package/dist/lib/telemetry/index.js +3 -0
  116. package/dist/lib/telemetry/telemetryService.d.ts +6 -0
  117. package/dist/lib/telemetry/telemetryService.js +6 -0
  118. package/dist/lib/telemetry/tracers.d.ts +15 -0
  119. package/dist/lib/telemetry/tracers.js +17 -0
  120. package/dist/lib/telemetry/withSpan.d.ts +9 -0
  121. package/dist/lib/telemetry/withSpan.js +35 -0
  122. package/dist/lib/types/contextTypes.d.ts +10 -0
  123. package/dist/lib/types/streamTypes.d.ts +14 -0
  124. package/dist/lib/utils/conversationMemory.js +123 -84
  125. package/dist/lib/utils/logger.d.ts +5 -0
  126. package/dist/lib/utils/logger.js +50 -2
  127. package/dist/lib/utils/messageBuilder.js +22 -42
  128. package/dist/lib/utils/modelDetection.js +3 -3
  129. package/dist/lib/utils/providerRetry.d.ts +41 -0
  130. package/dist/lib/utils/providerRetry.js +114 -0
  131. package/dist/lib/utils/retryability.d.ts +14 -0
  132. package/dist/lib/utils/retryability.js +23 -0
  133. package/dist/lib/utils/sanitizers/svg.js +4 -5
  134. package/dist/lib/utils/tokenEstimation.d.ts +11 -1
  135. package/dist/lib/utils/tokenEstimation.js +19 -4
  136. package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
  137. package/dist/mcp/externalServerManager.js +66 -0
  138. package/dist/mcp/mcpCircuitBreaker.js +24 -0
  139. package/dist/mcp/mcpClientFactory.js +16 -0
  140. package/dist/mcp/toolDiscoveryService.js +32 -6
  141. package/dist/mcp/toolRegistry.js +193 -123
  142. package/dist/neurolink.d.ts +6 -0
  143. package/dist/neurolink.js +1162 -646
  144. package/dist/providers/amazonBedrock.d.ts +1 -1
  145. package/dist/providers/amazonBedrock.js +521 -319
  146. package/dist/providers/anthropic.js +73 -17
  147. package/dist/providers/anthropicBaseProvider.js +77 -17
  148. package/dist/providers/googleAiStudio.d.ts +1 -1
  149. package/dist/providers/googleAiStudio.js +292 -227
  150. package/dist/providers/googleVertex.d.ts +36 -1
  151. package/dist/providers/googleVertex.js +553 -260
  152. package/dist/providers/ollama.js +329 -278
  153. package/dist/providers/openAI.js +77 -19
  154. package/dist/providers/sagemaker/parsers.js +3 -3
  155. package/dist/providers/sagemaker/streaming.js +3 -3
  156. package/dist/proxy/proxyFetch.js +81 -48
  157. package/dist/rag/ChunkerFactory.js +1 -1
  158. package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
  159. package/dist/rag/chunkers/MarkdownChunker.js +213 -9
  160. package/dist/rag/chunking/markdownChunker.d.ts +16 -0
  161. package/dist/rag/chunking/markdownChunker.js +174 -2
  162. package/dist/rag/pipeline/contextAssembly.js +2 -1
  163. package/dist/rag/ragIntegration.d.ts +18 -1
  164. package/dist/rag/ragIntegration.js +94 -14
  165. package/dist/rag/retrieval/vectorQueryTool.js +21 -4
  166. package/dist/server/abstract/baseServerAdapter.js +4 -1
  167. package/dist/server/adapters/fastifyAdapter.js +35 -30
  168. package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
  169. package/dist/services/server/ai/observability/instrumentation.js +39 -0
  170. package/dist/telemetry/attributes.d.ts +52 -0
  171. package/dist/telemetry/attributes.js +60 -0
  172. package/dist/telemetry/index.d.ts +3 -0
  173. package/dist/telemetry/index.js +3 -0
  174. package/dist/telemetry/telemetryService.d.ts +6 -0
  175. package/dist/telemetry/telemetryService.js +6 -0
  176. package/dist/telemetry/tracers.d.ts +15 -0
  177. package/dist/telemetry/tracers.js +16 -0
  178. package/dist/telemetry/withSpan.d.ts +9 -0
  179. package/dist/telemetry/withSpan.js +34 -0
  180. package/dist/types/contextTypes.d.ts +10 -0
  181. package/dist/types/streamTypes.d.ts +14 -0
  182. package/dist/utils/conversationMemory.js +123 -84
  183. package/dist/utils/logger.d.ts +5 -0
  184. package/dist/utils/logger.js +50 -2
  185. package/dist/utils/messageBuilder.js +22 -42
  186. package/dist/utils/modelDetection.js +3 -3
  187. package/dist/utils/providerRetry.d.ts +41 -0
  188. package/dist/utils/providerRetry.js +113 -0
  189. package/dist/utils/retryability.d.ts +14 -0
  190. package/dist/utils/retryability.js +22 -0
  191. package/dist/utils/sanitizers/svg.js +4 -5
  192. package/dist/utils/tokenEstimation.d.ts +11 -1
  193. package/dist/utils/tokenEstimation.js +19 -4
  194. package/dist/utils/videoAnalysisProcessor.js +7 -3
  195. package/dist/workflow/config.d.ts +26 -26
  196. package/package.json +1 -1
@@ -28,22 +28,36 @@ export var AIProviderName;
28
28
  export var OpenRouterModels;
29
29
  (function (OpenRouterModels) {
30
30
  // Anthropic Claude models
31
+ OpenRouterModels["CLAUDE_OPUS_4_6"] = "anthropic/claude-opus-4.6";
32
+ OpenRouterModels["CLAUDE_SONNET_4_6"] = "anthropic/claude-sonnet-4.6";
33
+ OpenRouterModels["CLAUDE_SONNET_4_5"] = "anthropic/claude-sonnet-4.5";
34
+ OpenRouterModels["CLAUDE_HAIKU_4_5"] = "anthropic/claude-haiku-4.5";
31
35
  OpenRouterModels["CLAUDE_3_5_SONNET"] = "anthropic/claude-3-5-sonnet";
32
36
  OpenRouterModels["CLAUDE_3_5_HAIKU"] = "anthropic/claude-3-5-haiku";
33
37
  OpenRouterModels["CLAUDE_3_OPUS"] = "anthropic/claude-3-opus";
34
38
  // OpenAI models
39
+ OpenRouterModels["GPT_5_2"] = "openai/gpt-5.2";
40
+ OpenRouterModels["GPT_5"] = "openai/gpt-5";
35
41
  OpenRouterModels["GPT_4O"] = "openai/gpt-4o";
36
42
  OpenRouterModels["GPT_4O_MINI"] = "openai/gpt-4o-mini";
37
43
  OpenRouterModels["GPT_4_TURBO"] = "openai/gpt-4-turbo";
38
44
  // Google models
45
+ OpenRouterModels["GEMINI_3_1_PRO_PREVIEW"] = "google/gemini-3.1-pro-preview";
46
+ OpenRouterModels["GEMINI_3_PRO_PREVIEW"] = "google/gemini-3-pro-preview";
47
+ OpenRouterModels["GEMINI_3_FLASH_PREVIEW"] = "google/gemini-3-flash-preview";
48
+ OpenRouterModels["GEMINI_2_5_FLASH"] = "google/gemini-2.5-flash";
49
+ OpenRouterModels["GEMINI_2_5_FLASH_LITE"] = "google/gemini-2.5-flash-lite";
39
50
  OpenRouterModels["GEMINI_2_0_FLASH"] = "google/gemini-2.0-flash";
40
- OpenRouterModels["GEMINI_1_5_PRO"] = "google/gemini-1.5-pro";
41
51
  // Meta Llama models
42
52
  OpenRouterModels["LLAMA_3_1_70B"] = "meta-llama/llama-3.1-70b-instruct";
43
53
  OpenRouterModels["LLAMA_3_1_8B"] = "meta-llama/llama-3.1-8b-instruct";
44
54
  // Mistral models
45
55
  OpenRouterModels["MISTRAL_LARGE"] = "mistralai/mistral-large";
46
56
  OpenRouterModels["MIXTRAL_8X7B"] = "mistralai/mixtral-8x7b-instruct";
57
+ // DeepSeek models
58
+ OpenRouterModels["DEEPSEEK_R1"] = "deepseek/deepseek-r1";
59
+ // xAI models
60
+ OpenRouterModels["GROK_4_1_FAST"] = "xai/grok-4.1-fast";
47
61
  })(OpenRouterModels || (OpenRouterModels = {}));
48
62
  /**
49
63
  * Supported Models for Amazon Bedrock
@@ -53,7 +67,10 @@ export var BedrockModels;
53
67
  // ============================================================================
54
68
  // ANTHROPIC CLAUDE MODELS
55
69
  // ============================================================================
56
- // Claude 4.5 Series (Latest - September-November 2025)
70
+ // Claude 4.6 Series (Latest - February 2026)
71
+ BedrockModels["CLAUDE_4_6_OPUS"] = "anthropic.claude-opus-4-6-v1:0";
72
+ BedrockModels["CLAUDE_4_6_SONNET"] = "anthropic.claude-sonnet-4-6";
73
+ // Claude 4.5 Series (September-November 2025)
57
74
  BedrockModels["CLAUDE_4_5_OPUS"] = "anthropic.claude-opus-4-5-20251124-v1:0";
58
75
  BedrockModels["CLAUDE_4_5_SONNET"] = "anthropic.claude-sonnet-4-5-20250929-v1:0";
59
76
  BedrockModels["CLAUDE_4_5_HAIKU"] = "anthropic.claude-haiku-4-5-20251001-v1:0";
@@ -160,20 +177,66 @@ export var BedrockModels;
160
177
  // AI21 Labs Models
161
178
  BedrockModels["JAMBA_1_5_LARGE"] = "ai21.jamba-1-5-large-v1:0";
162
179
  BedrockModels["JAMBA_1_5_MINI"] = "ai21.jamba-1-5-mini-v1:0";
180
+ // ============================================================================
181
+ // NEW PROVIDERS (February 2026)
182
+ // ============================================================================
183
+ // Writer Models
184
+ BedrockModels["WRITER_PALMYRA_X5"] = "writer.palmyra-x5-v1:0";
185
+ BedrockModels["WRITER_PALMYRA_X4"] = "writer.palmyra-x4-v1:0";
186
+ // MiniMax Models
187
+ BedrockModels["MINIMAX_M2_1"] = "minimax.minimax-m2.1";
188
+ BedrockModels["MINIMAX_M2"] = "minimax.minimax-m2";
189
+ // Moonshot AI (Kimi) Models
190
+ BedrockModels["KIMI_K2_THINKING"] = "moonshot.kimi-k2-thinking";
191
+ BedrockModels["KIMI_K2_5"] = "moonshotai.kimi-k2.5";
192
+ // NVIDIA Nemotron Models
193
+ BedrockModels["NVIDIA_NEMOTRON_NANO_3_30B"] = "nvidia.nemotron-nano-3-30b";
194
+ BedrockModels["NVIDIA_NEMOTRON_NANO_12B_V2"] = "nvidia.nemotron-nano-12b-v2";
195
+ BedrockModels["NVIDIA_NEMOTRON_NANO_9B_V2"] = "nvidia.nemotron-nano-9b-v2";
196
+ // OpenAI Open Source Models (Apache 2.0)
197
+ BedrockModels["OPENAI_GPT_OSS_120B"] = "openai.gpt-oss-120b-1:0";
198
+ BedrockModels["OPENAI_GPT_OSS_20B"] = "openai.gpt-oss-20b-1:0";
199
+ // Z.AI GLM Models
200
+ BedrockModels["GLM_4_7"] = "zai.glm-4.7";
201
+ BedrockModels["GLM_4_7_FLASH"] = "zai.glm-4.7-flash";
202
+ // Cohere Embedding & Reranking
203
+ BedrockModels["COHERE_EMBED_ENGLISH_V3"] = "cohere.embed-english-v3";
204
+ BedrockModels["COHERE_EMBED_MULTILINGUAL_V3"] = "cohere.embed-multilingual-v3";
205
+ BedrockModels["COHERE_EMBED_V4"] = "cohere.embed-v4:0";
206
+ BedrockModels["COHERE_RERANK_V3_5"] = "cohere.rerank-v3-5:0";
207
+ // Amazon Rerank
208
+ BedrockModels["AMAZON_RERANK_V1"] = "amazon.rerank-v1:0";
209
+ // Mistral Devstral 2
210
+ BedrockModels["DEVSTRAL_2_123B"] = "mistral.devstral-2-123b";
163
211
  })(BedrockModels || (BedrockModels = {}));
164
212
  /**
165
213
  * Supported Models for OpenAI
166
214
  */
167
215
  export var OpenAIModels;
168
216
  (function (OpenAIModels) {
169
- // GPT-5.2 Series (Released December 11, 2025) - Latest flagship models
217
+ // GPT-5.3 Series (Released February 2026) - Latest coding models
218
+ OpenAIModels["GPT_5_3_CODEX"] = "gpt-5.3-codex";
219
+ // GPT-5.2 Series (Released December 11, 2025) - Flagship models
170
220
  OpenAIModels["GPT_5_2"] = "gpt-5.2";
171
221
  OpenAIModels["GPT_5_2_CHAT_LATEST"] = "gpt-5.2-chat-latest";
172
222
  OpenAIModels["GPT_5_2_PRO"] = "gpt-5.2-pro";
223
+ OpenAIModels["GPT_5_2_CODEX"] = "gpt-5.2-codex";
224
+ // GPT-5.1 Series (Released October 2025)
225
+ OpenAIModels["GPT_5_1"] = "gpt-5.1";
226
+ OpenAIModels["GPT_5_1_CHAT_LATEST"] = "gpt-5.1-chat-latest";
227
+ OpenAIModels["GPT_5_1_CODEX"] = "gpt-5.1-codex";
228
+ OpenAIModels["GPT_5_1_CODEX_MAX"] = "gpt-5.1-codex-max";
229
+ OpenAIModels["GPT_5_1_CODEX_MINI"] = "gpt-5.1-codex-mini";
173
230
  // GPT-5 Series (Released August 7, 2025)
174
231
  OpenAIModels["GPT_5"] = "gpt-5";
175
232
  OpenAIModels["GPT_5_MINI"] = "gpt-5-mini";
176
233
  OpenAIModels["GPT_5_NANO"] = "gpt-5-nano";
234
+ OpenAIModels["GPT_5_PRO"] = "gpt-5-pro";
235
+ OpenAIModels["GPT_5_CHAT_LATEST"] = "gpt-5-chat-latest";
236
+ OpenAIModels["GPT_5_CODEX"] = "gpt-5-codex";
237
+ // GPT Open Source (Apache 2.0 - January 2026, Responses API only)
238
+ OpenAIModels["GPT_OSS_120B"] = "gpt-oss-120b";
239
+ OpenAIModels["GPT_OSS_20B"] = "gpt-oss-20b";
177
240
  // GPT-4.1 Series (Released April 14, 2025)
178
241
  OpenAIModels["GPT_4_1"] = "gpt-4.1";
179
242
  OpenAIModels["GPT_4_1_MINI"] = "gpt-4.1-mini";
@@ -201,7 +264,12 @@ export var OpenAIModels;
201
264
  */
202
265
  export var AzureOpenAIModels;
203
266
  (function (AzureOpenAIModels) {
204
- // GPT-5.1 Series (Latest - December 2025)
267
+ // GPT-5.2 Series (Latest - December 2025)
268
+ AzureOpenAIModels["GPT_5_2"] = "gpt-5.2";
269
+ AzureOpenAIModels["GPT_5_2_CHAT"] = "gpt-5.2-chat";
270
+ AzureOpenAIModels["GPT_5_2_PRO"] = "gpt-5.2-pro";
271
+ AzureOpenAIModels["GPT_5_2_CODEX"] = "gpt-5.2-codex";
272
+ // GPT-5.1 Series (October 2025)
205
273
  AzureOpenAIModels["GPT_5_1"] = "gpt-5.1";
206
274
  AzureOpenAIModels["GPT_5_1_CHAT"] = "gpt-5.1-chat";
207
275
  AzureOpenAIModels["GPT_5_1_CODEX"] = "gpt-5.1-codex";
@@ -244,7 +312,10 @@ export var AzureOpenAIModels;
244
312
  */
245
313
  export var VertexModels;
246
314
  (function (VertexModels) {
247
- // Claude 4.5 Series (Latest - December 2025)
315
+ // Claude 4.6 Series (Latest - February 2026)
316
+ VertexModels["CLAUDE_4_6_OPUS"] = "claude-opus-4-6";
317
+ VertexModels["CLAUDE_4_6_SONNET"] = "claude-sonnet-4-6";
318
+ // Claude 4.5 Series (September-November 2025)
248
319
  VertexModels["CLAUDE_4_5_OPUS"] = "claude-opus-4-5@20251124";
249
320
  VertexModels["CLAUDE_4_5_SONNET"] = "claude-sonnet-4-5@20250929";
250
321
  VertexModels["CLAUDE_4_5_HAIKU"] = "claude-haiku-4-5@20251001";
@@ -260,33 +331,29 @@ export var VertexModels;
260
331
  VertexModels["CLAUDE_3_SONNET"] = "claude-3-sonnet-20240229";
261
332
  VertexModels["CLAUDE_3_OPUS"] = "claude-3-opus-20240229";
262
333
  VertexModels["CLAUDE_3_HAIKU"] = "claude-3-haiku-20240307";
334
+ // Gemini 3.1 Series (Released February 2026)
335
+ VertexModels["GEMINI_3_1_PRO_PREVIEW"] = "gemini-3.1-pro-preview";
263
336
  // Gemini 3 Series (Preview)
264
- /** Gemini 3 Pro - Base model with adaptive thinking */
265
337
  VertexModels["GEMINI_3_PRO"] = "gemini-3-pro";
266
- /** Gemini 3 Pro Preview - Versioned preview (November 2025) */
267
338
  VertexModels["GEMINI_3_PRO_PREVIEW_11_2025"] = "gemini-3-pro-preview-11-2025";
268
- /** Gemini 3 Pro Latest - Auto-updated alias (always points to latest preview) */
269
339
  VertexModels["GEMINI_3_PRO_LATEST"] = "gemini-3-pro-latest";
270
- /** Gemini 3 Pro Preview - Generic preview (legacy) */
271
340
  VertexModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
272
- /** Gemini 3 Flash - Base model with adaptive thinking */
273
341
  VertexModels["GEMINI_3_FLASH"] = "gemini-3-flash";
274
- /** Gemini 3 Flash Preview - Versioned preview */
275
342
  VertexModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
276
- /** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
277
343
  VertexModels["GEMINI_3_FLASH_LATEST"] = "gemini-3-flash-latest";
278
- // Gemini 2.5 Series (Latest - 2025)
344
+ // Gemini 2.5 Series (GA)
279
345
  VertexModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
280
346
  VertexModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
281
347
  VertexModels["GEMINI_2_5_FLASH_LITE"] = "gemini-2.5-flash-lite";
282
348
  VertexModels["GEMINI_2_5_FLASH_IMAGE"] = "gemini-2.5-flash-image";
283
- // Gemini 2.0 Series
349
+ // Gemini 2.0 Series (Deprecated - retiring Jun 2026)
284
350
  VertexModels["GEMINI_2_0_FLASH"] = "gemini-2.0-flash";
285
351
  VertexModels["GEMINI_2_0_FLASH_001"] = "gemini-2.0-flash-001";
286
- /** Gemini 2.0 Flash Lite - GA, production-ready, cost-optimized */
287
352
  VertexModels["GEMINI_2_0_FLASH_LITE"] = "gemini-2.0-flash-lite";
288
- // Gemini 1.5 Series (Legacy support)
353
+ // Gemini 1.5 Series (Retired - returns 404)
354
+ /** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
289
355
  VertexModels["GEMINI_1_5_PRO"] = "gemini-1.5-pro-002";
356
+ /** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
290
357
  VertexModels["GEMINI_1_5_FLASH"] = "gemini-1.5-flash-002";
291
358
  })(VertexModels || (VertexModels = {}));
292
359
  /**
@@ -294,27 +361,33 @@ export var VertexModels;
294
361
  */
295
362
  export var GoogleAIModels;
296
363
  (function (GoogleAIModels) {
297
- // Gemini 3 Series
364
+ // Gemini 3.1 Series (Released February 2026)
365
+ GoogleAIModels["GEMINI_3_1_PRO_PREVIEW"] = "gemini-3.1-pro-preview";
366
+ // Gemini 3 Series (Preview)
298
367
  GoogleAIModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
299
368
  GoogleAIModels["GEMINI_3_PRO_IMAGE_PREVIEW"] = "gemini-3-pro-image-preview";
300
369
  GoogleAIModels["GEMINI_3_FLASH"] = "gemini-3-flash";
301
370
  GoogleAIModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
302
- // Gemini 2.5 Series
371
+ // Gemini 2.5 Series (GA)
303
372
  GoogleAIModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
304
373
  GoogleAIModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
305
374
  GoogleAIModels["GEMINI_2_5_FLASH_LITE"] = "gemini-2.5-flash-lite";
306
375
  GoogleAIModels["GEMINI_2_5_FLASH_IMAGE"] = "gemini-2.5-flash-image";
307
- GoogleAIModels["GEMINI_2_5_FLASH_LIVE"] = "gemini-2.5-flash-native-audio-preview-09-2025";
308
- // Gemini 2.0 Series
376
+ GoogleAIModels["GEMINI_2_5_FLASH_PREVIEW_TTS"] = "gemini-2.5-flash-preview-tts";
377
+ GoogleAIModels["GEMINI_2_5_PRO_PREVIEW_TTS"] = "gemini-2.5-pro-preview-tts";
378
+ // Gemini 2.0 Series (Deprecated - retiring Jun 2026)
309
379
  GoogleAIModels["GEMINI_2_0_FLASH"] = "gemini-2.0-flash";
310
380
  GoogleAIModels["GEMINI_2_0_FLASH_001"] = "gemini-2.0-flash-001";
311
381
  GoogleAIModels["GEMINI_2_0_FLASH_LITE"] = "gemini-2.0-flash-lite";
312
382
  GoogleAIModels["GEMINI_2_0_FLASH_IMAGE"] = "gemini-2.0-flash-preview-image-generation";
313
- // Gemini 1.5 Series (Legacy)
383
+ // Gemini 1.5 Series (Retired - returns 404)
384
+ /** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
314
385
  GoogleAIModels["GEMINI_1_5_PRO"] = "gemini-1.5-pro";
386
+ /** @deprecated Retired Sep 2025. Use Gemini 2.5+ */
315
387
  GoogleAIModels["GEMINI_1_5_FLASH"] = "gemini-1.5-flash";
316
388
  // Embedding Models
317
389
  GoogleAIModels["GEMINI_EMBEDDING"] = "gemini-embedding-001";
390
+ /** @deprecated Shutdown Jan 2026. Use gemini-embedding-001 */
318
391
  GoogleAIModels["TEXT_EMBEDDING_004"] = "text-embedding-004";
319
392
  })(GoogleAIModels || (GoogleAIModels = {}));
320
393
  /**
@@ -322,7 +395,10 @@ export var GoogleAIModels;
322
395
  */
323
396
  export var AnthropicModels;
324
397
  (function (AnthropicModels) {
325
- // Claude 4.5 Series (Latest - September-November 2025)
398
+ // Claude 4.6 Series (Latest - February 2026)
399
+ AnthropicModels["CLAUDE_OPUS_4_6"] = "claude-opus-4-6";
400
+ AnthropicModels["CLAUDE_SONNET_4_6"] = "claude-sonnet-4-6";
401
+ // Claude 4.5 Series (September-November 2025)
326
402
  AnthropicModels["CLAUDE_OPUS_4_5"] = "claude-opus-4-5-20251101";
327
403
  AnthropicModels["CLAUDE_SONNET_4_5"] = "claude-sonnet-4-5-20250929";
328
404
  AnthropicModels["CLAUDE_4_5_HAIKU"] = "claude-haiku-4-5-20251001";
@@ -375,6 +451,17 @@ export var MistralModels;
375
451
  // Voxtral (Audio)
376
452
  MistralModels["VOXTRAL_SMALL_LATEST"] = "voxtral-small-latest";
377
453
  MistralModels["VOXTRAL_MINI_LATEST"] = "voxtral-mini-latest";
454
+ // Devstral 2 Series (December 2025)
455
+ MistralModels["DEVSTRAL_2"] = "devstral-2512";
456
+ MistralModels["DEVSTRAL_SMALL_2"] = "devstral-small-2512";
457
+ // Magistral Versioned (September 2025)
458
+ MistralModels["MAGISTRAL_MEDIUM_2509"] = "magistral-medium-2509";
459
+ MistralModels["MAGISTRAL_SMALL_2509"] = "magistral-small-2509";
460
+ // Voxtral Transcribe 2 (February 2026)
461
+ MistralModels["VOXTRAL_MINI_TRANSCRIBE_2"] = "voxtral-mini-2602";
462
+ // OCR (December 2025)
463
+ MistralModels["MISTRAL_OCR_3"] = "mistral-ocr-2512";
464
+ MistralModels["MISTRAL_OCR_LATEST"] = "mistral-ocr-latest";
378
465
  // Specialized Models
379
466
  MistralModels["MISTRAL_NEMO"] = "mistral-nemo";
380
467
  MistralModels["MISTRAL_EMBED"] = "mistral-embed";
@@ -464,6 +551,24 @@ export var OllamaModels;
464
551
  OllamaModels["MIXTRAL_8X22B"] = "mixtral:8x22b";
465
552
  // Enterprise Models
466
553
  OllamaModels["COMMAND_R_PLUS"] = "command-r-plus:104b";
554
+ // Z.AI GLM-5 - Flagship reasoning model (February 2026)
555
+ OllamaModels["GLM_5_LATEST"] = "glm-5:latest";
556
+ // Kimi-K2.5 - Moonshot AI multimodal agentic model
557
+ OllamaModels["KIMI_K2_5_LATEST"] = "kimi-k2.5:latest";
558
+ // Qwen 3.5 - Multimodal native agents (February 2026)
559
+ OllamaModels["QWEN3_5_LATEST"] = "qwen3.5:latest";
560
+ // Qwen3-Coder - Coding-focused agentic model
561
+ OllamaModels["QWEN3_CODER_LATEST"] = "qwen3-coder:latest";
562
+ OllamaModels["QWEN3_CODER_30B"] = "qwen3-coder:30b";
563
+ // DeepSeek-V3.2 - Enhanced reasoning
564
+ OllamaModels["DEEPSEEK_V3_2_LATEST"] = "deepseek-v3.2:latest";
565
+ // NVIDIA Nemotron 3 Nano - Hybrid MoE, 1M context
566
+ OllamaModels["NEMOTRON_3_NANO_LATEST"] = "nemotron-3-nano:latest";
567
+ OllamaModels["NEMOTRON_3_NANO_30B"] = "nemotron-3-nano:30b";
568
+ // SmolLM3 - Compact dual-mode reasoning (HuggingFace)
569
+ OllamaModels["SMOLLM3_3B"] = "smollm3:3b";
570
+ // GPT-OSS - Open-source GPT (Apache 2.0)
571
+ OllamaModels["GPT_OSS_LATEST"] = "gpt-oss:latest";
467
572
  })(OllamaModels || (OllamaModels = {}));
468
573
  /**
469
574
  * Common Models for LiteLLM Proxy
@@ -513,6 +618,16 @@ export var LiteLLMModels;
513
618
  // AWS Bedrock via LiteLLM
514
619
  LiteLLMModels["BEDROCK_CLAUDE_3_5_SONNET"] = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0";
515
620
  LiteLLMModels["BEDROCK_CLAUDE_3_HAIKU"] = "bedrock/anthropic.claude-3-haiku-20240307-v1:0";
621
+ // OpenAI GPT-5.2 via LiteLLM
622
+ LiteLLMModels["OPENAI_GPT_5_2"] = "openai/gpt-5.2";
623
+ LiteLLMModels["OPENAI_GPT_5_2_CODEX"] = "openai/gpt-5.2-codex";
624
+ // Anthropic Claude 4.6 via LiteLLM
625
+ LiteLLMModels["ANTHROPIC_CLAUDE_OPUS_4_6"] = "anthropic/claude-opus-4-6";
626
+ LiteLLMModels["ANTHROPIC_CLAUDE_SONNET_4_6"] = "anthropic/claude-sonnet-4-6";
627
+ // Google Gemini 3 via LiteLLM
628
+ LiteLLMModels["GEMINI_3_1_PRO"] = "gemini/gemini-3.1-pro-preview";
629
+ // xAI via LiteLLM
630
+ LiteLLMModels["XAI_GROK_4_1_FAST"] = "xai/grok-4.1-fast";
516
631
  // Perplexity AI via LiteLLM
517
632
  LiteLLMModels["PERPLEXITY_SONAR_PRO"] = "perplexity/sonar-pro";
518
633
  LiteLLMModels["PERPLEXITY_SONAR_REASONING_PRO"] = "perplexity/sonar-reasoning-pro";
@@ -588,6 +703,17 @@ export var HuggingFaceModels;
588
703
  // BLOOM
589
704
  HuggingFaceModels["BLOOM_7B1"] = "bigscience/bloom-7b1";
590
705
  HuggingFaceModels["BLOOM_1B3"] = "bigscience/bloom-1b3";
706
+ // Z.AI GLM-5 (February 2026)
707
+ HuggingFaceModels["GLM_5"] = "zai-org/GLM-5";
708
+ // Qwen 3.5 Multimodal (February 2026)
709
+ HuggingFaceModels["QWEN_3_5_397B_A17B"] = "Qwen/Qwen3.5-397B-A17B";
710
+ // NVIDIA Nemotron 3 Nano
711
+ HuggingFaceModels["NEMOTRON_3_NANO_30B"] = "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16";
712
+ // HuggingFace SmolLM3
713
+ HuggingFaceModels["SMOLLM3_3B"] = "HuggingFaceTB/SmolLM3-3B";
714
+ // Falcon 3 Series
715
+ HuggingFaceModels["FALCON_3_7B_INSTRUCT"] = "tiiuae/Falcon3-7B-Instruct";
716
+ HuggingFaceModels["FALCON_3_10B_INSTRUCT"] = "tiiuae/Falcon3-10B-Instruct";
591
717
  })(HuggingFaceModels || (HuggingFaceModels = {}));
592
718
  /**
593
719
  * Supported Models for AWS SageMaker JumpStart
@@ -616,6 +742,10 @@ export var SageMakerModels;
616
742
  SageMakerModels["FALCON_3_10B"] = "tii-falcon-3-10b-instruct";
617
743
  SageMakerModels["FALCON_40B"] = "tii-falcon-40b-instruct";
618
744
  SageMakerModels["FALCON_180B"] = "tii-falcon-180b";
745
+ // NVIDIA Nemotron 3 Nano (February 2026)
746
+ SageMakerModels["NEMOTRON_3_NANO_30B"] = "nvidia-nemotron-3-nano-30b";
747
+ // Qwen3 VL - Vision-language
748
+ SageMakerModels["QWEN3_VL_8B"] = "qwen3-vl-8b-instruct";
619
749
  })(SageMakerModels || (SageMakerModels = {}));
620
750
  /**
621
751
  * API Versions for various providers
@@ -36,7 +36,7 @@ export function checkContextBudget(params) {
36
36
  ? toolDefinitions.reduce((sum, tool) => {
37
37
  try {
38
38
  const serialized = JSON.stringify(tool);
39
- return sum + Math.ceil(serialized.length / 4);
39
+ return sum + estimateTokens(serialized, provider);
40
40
  }
41
41
  catch {
42
42
  return sum + TOKENS_PER_TOOL_DEFINITION;
@@ -8,6 +8,7 @@
8
8
  * Stage 3: LLM Summarization (expensive -- requires LLM call)
9
9
  * Stage 4: Sliding Window Truncation (fallback -- no LLM call)
10
10
  */
11
+ import { trace, SpanStatusCode } from "@opentelemetry/api";
11
12
  import { estimateMessagesTokens } from "../utils/tokenEstimation.js";
12
13
  import { logger } from "../utils/logger.js";
13
14
  import { pruneToolOutputs } from "./stages/toolOutputPruner.js";
@@ -112,15 +113,36 @@ export class ContextCompactor {
112
113
  saved: stageTokensBefore - stageTokensAfter,
113
114
  });
114
115
  }
115
- catch {
116
- logger.info("[Compaction] Stage 3 (summarize)", {
116
+ catch (error) {
117
+ // Capture the actual error for debugging
118
+ const errorMessage = error instanceof Error ? error.message : String(error);
119
+ const errorName = error instanceof Error ? error.name : "UnknownError";
120
+ logger.warn("[Compaction] Stage 3 (summarize) FAILED", {
117
121
  requestId,
118
- ran: false,
122
+ error: errorMessage,
123
+ errorName,
119
124
  tokensBefore: stageTokensBefore,
120
125
  tokensAfter: stageTokensBefore,
121
126
  saved: 0,
122
127
  });
123
- // Summarization failed, fall through to truncation
128
+ // Record on OTel span for trace visibility
129
+ const activeSpan = trace.getActiveSpan();
130
+ if (activeSpan) {
131
+ activeSpan.addEvent("compaction.stage3.failed", {
132
+ "error.message": errorMessage,
133
+ "error.name": errorName,
134
+ "stage.tokens_before": stageTokensBefore,
135
+ });
136
+ if (error instanceof Error) {
137
+ activeSpan.recordException(error);
138
+ }
139
+ // NLK-GAP-005 fix: set error status alongside recordException
140
+ activeSpan.setStatus({
141
+ code: SpanStatusCode.ERROR,
142
+ message: `Compaction stage 3 (summarize) failed: ${errorMessage}`,
143
+ });
144
+ }
145
+ // Fall through to Stage 4 truncation as before
124
146
  }
125
147
  }
126
148
  // Stage 4: Sliding Window Truncation (fallback)
@@ -129,6 +151,11 @@ export class ContextCompactor {
129
151
  const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
130
152
  const truncResult = truncateWithSlidingWindow(currentMessages, {
131
153
  fraction: this.config.truncationFraction,
154
+ currentTokens: stageTokensBefore,
155
+ targetTokens: targetTokens,
156
+ provider: provider,
157
+ adaptiveBuffer: 0.15,
158
+ maxIterations: 3,
132
159
  });
133
160
  if (truncResult.truncated) {
134
161
  currentMessages = truncResult.messages;
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Emergency Content Truncation
3
+ *
4
+ * When message-level removal (sliding window) can't fit context into budget,
5
+ * this truncates the CONTENT of the longest messages as a last resort.
6
+ */
7
+ import type { ChatMessage } from "../types/conversation.js";
8
+ /**
9
+ * Emergency content truncation: truncate the content of the longest messages
10
+ * to fit within the available token budget.
11
+ *
12
+ * Strategy: Sort messages by content length (descending), truncate each
13
+ * to a proportional share of the available budget until total fits.
14
+ */
15
+ export declare function emergencyContentTruncation(messages: ChatMessage[], availableTokensForHistory: number, breakdown: {
16
+ systemPrompt: number;
17
+ conversationHistory: number;
18
+ currentPrompt: number;
19
+ toolDefinitions: number;
20
+ fileAttachments: number;
21
+ }, provider?: string): ChatMessage[];
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Emergency Content Truncation
3
+ *
4
+ * When message-level removal (sliding window) can't fit context into budget,
5
+ * this truncates the CONTENT of the longest messages as a last resort.
6
+ */
7
+ import { estimateTokens, estimateMessagesTokens, truncateToTokenBudget, } from "../utils/tokenEstimation.js";
8
+ import { logger } from "../utils/logger.js";
9
+ /**
10
+ * Emergency content truncation: truncate the content of the longest messages
11
+ * to fit within the available token budget.
12
+ *
13
+ * Strategy: Sort messages by content length (descending), truncate each
14
+ * to a proportional share of the available budget until total fits.
15
+ */
16
+ export function emergencyContentTruncation(messages, availableTokensForHistory, breakdown, provider) {
17
+ // Budget available for conversation history specifically
18
+ const historyBudget = availableTokensForHistory -
19
+ breakdown.systemPrompt -
20
+ breakdown.currentPrompt -
21
+ breakdown.toolDefinitions -
22
+ breakdown.fileAttachments;
23
+ if (historyBudget <= 0) {
24
+ // No room for history: return empty to guarantee budget safety
25
+ return [];
26
+ }
27
+ const currentHistoryTokens = estimateMessagesTokens(messages, provider);
28
+ if (currentHistoryTokens <= historyBudget) {
29
+ return messages; // Already fits
30
+ }
31
+ // Calculate per-message budgets proportional to original size,
32
+ // but cap large messages to free space for others
33
+ const result = [...messages];
34
+ const reductionNeeded = currentHistoryTokens - historyBudget;
35
+ const reductionRatio = reductionNeeded / currentHistoryTokens;
36
+ // Sort indices by content length descending (truncate biggest first)
37
+ const sortedIndices = result
38
+ .map((msg, idx) => ({ idx, len: msg.content.length }))
39
+ .sort((a, b) => b.len - a.len);
40
+ let tokensSaved = 0;
41
+ for (const { idx } of sortedIndices) {
42
+ if (tokensSaved >= reductionNeeded) {
43
+ break;
44
+ }
45
+ const msg = result[idx];
46
+ // Don't truncate system messages or very short messages
47
+ if (msg.role === "system" || msg.content.length < 200) {
48
+ continue;
49
+ }
50
+ const msgTokens = estimateTokens(msg.content, provider);
51
+ const targetTokens = Math.floor(msgTokens * (1 - reductionRatio - 0.05));
52
+ if (targetTokens < msgTokens && targetTokens > 50) {
53
+ const truncated = truncateToTokenBudget(msg.content, targetTokens, provider);
54
+ if (truncated.truncated) {
55
+ const savedThisMsg = msgTokens - estimateTokens(truncated.text, provider);
56
+ tokensSaved += savedThisMsg;
57
+ result[idx] = {
58
+ ...msg,
59
+ content: truncated.text,
60
+ metadata: { ...msg.metadata, truncated: true },
61
+ };
62
+ }
63
+ }
64
+ }
65
+ logger.info("[EmergencyTruncation] Content truncation complete", {
66
+ tokensSaved,
67
+ reductionNeeded,
68
+ messagesModified: result.filter((m, i) => m !== messages[i]).length,
69
+ });
70
+ // Final safety check: guarantee returned history fits budget
71
+ if (estimateMessagesTokens(result, provider) <= historyBudget) {
72
+ return result;
73
+ }
74
+ // Hard fallback: keep newest non-system messages that fit
75
+ const fallback = [];
76
+ for (let i = result.length - 1; i >= 0; i--) {
77
+ const msg = result[i];
78
+ if (msg.role === "system") {
79
+ continue;
80
+ }
81
+ fallback.unshift(msg);
82
+ if (estimateMessagesTokens(fallback, provider) > historyBudget) {
83
+ fallback.shift();
84
+ break;
85
+ }
86
+ }
87
+ return fallback;
88
+ }
@@ -12,3 +12,19 @@ export declare function isContextOverflowError(error: unknown): boolean;
12
12
  * Identify which provider produced the context overflow error.
13
13
  */
14
14
  export declare function getContextOverflowProvider(error: unknown): string | null;
15
+ /**
16
+ * Extract actual token counts from provider overflow error messages.
17
+ *
18
+ * Many providers include the actual/max token counts in their error messages:
19
+ * - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
20
+ * - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
21
+ * - Google: "exceeds the maximum number of tokens (180000 > 100000)"
22
+ */
23
+ export declare function parseProviderOverflowDetails(error: unknown): {
24
+ actualTokens: number;
25
+ budgetTokens: number;
26
+ } | null;
27
+ /**
28
+ * Extract error message from various error formats.
29
+ */
30
+ export declare function extractErrorMessage(error: unknown): string | null;
@@ -86,10 +86,57 @@ export function getContextOverflowProvider(error) {
86
86
  }
87
87
  return null;
88
88
  }
89
+ /**
90
+ * Extract actual token counts from provider overflow error messages.
91
+ *
92
+ * Many providers include the actual/max token counts in their error messages:
93
+ * - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
94
+ * - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
95
+ * - Google: "exceeds the maximum number of tokens (180000 > 100000)"
96
+ */
97
+ export function parseProviderOverflowDetails(error) {
98
+ const message = extractErrorMessage(error);
99
+ if (!message) {
100
+ return null;
101
+ }
102
+ // Guard against excessively long inputs that could slow regex matching
103
+ if (message.length > 2000) {
104
+ return null;
105
+ }
106
+ // OpenAI pattern: "resulted in X tokens" + "maximum context length is Y"
107
+ // Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
108
+ const openaiActual = message.match(/resulted\s+in\s+(\d[\d,]{0,19})\s*tokens/i);
109
+ const openaiMax = message.match(/maximum\s+context\s+length\s+is\s+(\d[\d,]{0,19})/i);
110
+ if (openaiActual && openaiMax) {
111
+ return {
112
+ actualTokens: parseInt(openaiActual[1].replace(/,/g, ""), 10),
113
+ budgetTokens: parseInt(openaiMax[1].replace(/,/g, ""), 10),
114
+ };
115
+ }
116
+ // Anthropic pattern: "X tokens > Y token limit" or "X tokens, limit Y"
117
+ // Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
118
+ const anthropicMatch = message.match(/(\d[\d,]{0,19})\s*tokens?\s*[>:]\s*(\d[\d,]{0,19})/i);
119
+ if (anthropicMatch) {
120
+ return {
121
+ actualTokens: parseInt(anthropicMatch[1].replace(/,/g, ""), 10),
122
+ budgetTokens: parseInt(anthropicMatch[2].replace(/,/g, ""), 10),
123
+ };
124
+ }
125
+ // Google pattern: "X > Y" or "X exceeds Y"
126
+ // Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
127
+ const googleMatch = message.match(/(\d[\d,]{0,19})\s*(?:>|exceeds)\s*(\d[\d,]{0,19})/i);
128
+ if (googleMatch) {
129
+ return {
130
+ actualTokens: parseInt(googleMatch[1].replace(/,/g, ""), 10),
131
+ budgetTokens: parseInt(googleMatch[2].replace(/,/g, ""), 10),
132
+ };
133
+ }
134
+ return null;
135
+ }
89
136
  /**
90
137
  * Extract error message from various error formats.
91
138
  */
92
- function extractErrorMessage(error) {
139
+ export function extractErrorMessage(error) {
93
140
  if (!error) {
94
141
  return null;
95
142
  }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Context-specific error classes for budget and overflow scenarios.
3
+ */
4
+ /**
5
+ * Thrown when context exceeds model budget after all compaction stages,
6
+ * preventing wasteful API calls to providers that will reject the request.
7
+ */
8
+ export declare class ContextBudgetExceededError extends Error {
9
+ readonly estimatedTokens: number;
10
+ readonly availableTokens: number;
11
+ readonly stagesUsed: string[];
12
+ readonly breakdown: Record<string, number>;
13
+ constructor(message: string, details: {
14
+ estimatedTokens: number;
15
+ availableTokens: number;
16
+ stagesUsed: string[];
17
+ breakdown: Record<string, number>;
18
+ });
19
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Context-specific error classes for budget and overflow scenarios.
3
+ */
4
+ /**
5
+ * Thrown when context exceeds model budget after all compaction stages,
6
+ * preventing wasteful API calls to providers that will reject the request.
7
+ */
8
+ export class ContextBudgetExceededError extends Error {
9
+ estimatedTokens;
10
+ availableTokens;
11
+ stagesUsed;
12
+ breakdown;
13
+ constructor(message, details) {
14
+ super(message);
15
+ this.name = "ContextBudgetExceededError";
16
+ this.estimatedTokens = details.estimatedTokens;
17
+ this.availableTokens = details.availableTokens;
18
+ this.stagesUsed = details.stagesUsed;
19
+ this.breakdown = details.breakdown;
20
+ }
21
+ }
@@ -4,6 +4,12 @@
4
4
  * Non-destructive fallback: tags oldest messages as truncated
5
5
  * instead of deleting them. Always preserves first message pair.
6
6
  * Removes messages in pairs to maintain role alternation.
7
+ *
8
+ * Features:
9
+ * - Adaptive truncation (PERF-001): calculates fraction from actual overage
10
+ * instead of fixed 50%, with iterative refinement up to 3 passes.
11
+ * - Small conversation handling (BUG-005): for <= 4 messages, truncates
12
+ * message content proportionally instead of returning no-op.
7
13
  */
8
14
  import type { ChatMessage } from "../../types/conversation.js";
9
15
  import type { TruncationConfig, TruncationResult } from "../../types/contextTypes.js";