@juspay/neurolink 9.14.0 → 9.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +15 -15
  3. package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
  4. package/dist/adapters/video/videoAnalyzer.js +10 -8
  5. package/dist/auth/anthropicOAuth.d.ts +377 -0
  6. package/dist/auth/anthropicOAuth.js +914 -0
  7. package/dist/auth/index.d.ts +20 -0
  8. package/dist/auth/index.js +29 -0
  9. package/dist/auth/tokenStore.d.ts +225 -0
  10. package/dist/auth/tokenStore.js +521 -0
  11. package/dist/cli/commands/auth.d.ts +50 -0
  12. package/dist/cli/commands/auth.js +1115 -0
  13. package/dist/cli/commands/setup-anthropic.js +1 -14
  14. package/dist/cli/commands/setup-azure.js +1 -12
  15. package/dist/cli/commands/setup-bedrock.js +1 -9
  16. package/dist/cli/commands/setup-google-ai.js +1 -12
  17. package/dist/cli/commands/setup-openai.js +1 -14
  18. package/dist/cli/commands/workflow.d.ts +27 -0
  19. package/dist/cli/commands/workflow.js +216 -0
  20. package/dist/cli/factories/authCommandFactory.d.ts +52 -0
  21. package/dist/cli/factories/authCommandFactory.js +146 -0
  22. package/dist/cli/factories/commandFactory.d.ts +6 -0
  23. package/dist/cli/factories/commandFactory.js +171 -22
  24. package/dist/cli/index.js +0 -1
  25. package/dist/cli/parser.js +14 -2
  26. package/dist/cli/utils/maskCredential.d.ts +11 -0
  27. package/dist/cli/utils/maskCredential.js +23 -0
  28. package/dist/constants/contextWindows.js +107 -16
  29. package/dist/constants/enums.d.ts +119 -15
  30. package/dist/constants/enums.js +182 -22
  31. package/dist/constants/index.d.ts +3 -1
  32. package/dist/constants/index.js +11 -1
  33. package/dist/context/budgetChecker.js +1 -1
  34. package/dist/context/contextCompactor.js +31 -4
  35. package/dist/context/emergencyTruncation.d.ts +21 -0
  36. package/dist/context/emergencyTruncation.js +88 -0
  37. package/dist/context/errorDetection.d.ts +16 -0
  38. package/dist/context/errorDetection.js +48 -1
  39. package/dist/context/errors.d.ts +19 -0
  40. package/dist/context/errors.js +21 -0
  41. package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
  42. package/dist/context/stages/slidingWindowTruncator.js +159 -24
  43. package/dist/core/baseProvider.js +306 -200
  44. package/dist/core/conversationMemoryManager.js +104 -61
  45. package/dist/core/evaluationProviders.js +16 -33
  46. package/dist/core/factory.js +237 -164
  47. package/dist/core/modules/GenerationHandler.js +175 -116
  48. package/dist/core/modules/MessageBuilder.js +222 -170
  49. package/dist/core/modules/StreamHandler.d.ts +1 -0
  50. package/dist/core/modules/StreamHandler.js +95 -27
  51. package/dist/core/modules/TelemetryHandler.d.ts +10 -1
  52. package/dist/core/modules/TelemetryHandler.js +25 -7
  53. package/dist/core/modules/ToolsManager.js +115 -191
  54. package/dist/core/redisConversationMemoryManager.js +418 -282
  55. package/dist/factories/providerRegistry.d.ts +5 -0
  56. package/dist/factories/providerRegistry.js +20 -2
  57. package/dist/index.d.ts +3 -3
  58. package/dist/index.js +4 -2
  59. package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
  60. package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
  61. package/dist/lib/auth/anthropicOAuth.d.ts +377 -0
  62. package/dist/lib/auth/anthropicOAuth.js +915 -0
  63. package/dist/lib/auth/index.d.ts +20 -0
  64. package/dist/lib/auth/index.js +30 -0
  65. package/dist/lib/auth/tokenStore.d.ts +225 -0
  66. package/dist/lib/auth/tokenStore.js +522 -0
  67. package/dist/lib/constants/contextWindows.js +107 -16
  68. package/dist/lib/constants/enums.d.ts +119 -15
  69. package/dist/lib/constants/enums.js +182 -22
  70. package/dist/lib/constants/index.d.ts +3 -1
  71. package/dist/lib/constants/index.js +11 -1
  72. package/dist/lib/context/budgetChecker.js +1 -1
  73. package/dist/lib/context/contextCompactor.js +31 -4
  74. package/dist/lib/context/emergencyTruncation.d.ts +21 -0
  75. package/dist/lib/context/emergencyTruncation.js +89 -0
  76. package/dist/lib/context/errorDetection.d.ts +16 -0
  77. package/dist/lib/context/errorDetection.js +48 -1
  78. package/dist/lib/context/errors.d.ts +19 -0
  79. package/dist/lib/context/errors.js +22 -0
  80. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
  81. package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
  82. package/dist/lib/core/baseProvider.js +306 -200
  83. package/dist/lib/core/conversationMemoryManager.js +104 -61
  84. package/dist/lib/core/evaluationProviders.js +16 -33
  85. package/dist/lib/core/factory.js +237 -164
  86. package/dist/lib/core/modules/GenerationHandler.js +175 -116
  87. package/dist/lib/core/modules/MessageBuilder.js +222 -170
  88. package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
  89. package/dist/lib/core/modules/StreamHandler.js +95 -27
  90. package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
  91. package/dist/lib/core/modules/TelemetryHandler.js +25 -7
  92. package/dist/lib/core/modules/ToolsManager.js +115 -191
  93. package/dist/lib/core/redisConversationMemoryManager.js +418 -282
  94. package/dist/lib/factories/providerRegistry.d.ts +5 -0
  95. package/dist/lib/factories/providerRegistry.js +20 -2
  96. package/dist/lib/index.d.ts +3 -3
  97. package/dist/lib/index.js +4 -2
  98. package/dist/lib/mcp/externalServerManager.js +66 -0
  99. package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
  100. package/dist/lib/mcp/mcpClientFactory.js +16 -0
  101. package/dist/lib/mcp/toolDiscoveryService.js +32 -6
  102. package/dist/lib/mcp/toolRegistry.js +193 -123
  103. package/dist/lib/models/anthropicModels.d.ts +267 -0
  104. package/dist/lib/models/anthropicModels.js +528 -0
  105. package/dist/lib/neurolink.d.ts +6 -0
  106. package/dist/lib/neurolink.js +1162 -646
  107. package/dist/lib/providers/amazonBedrock.d.ts +1 -1
  108. package/dist/lib/providers/amazonBedrock.js +521 -319
  109. package/dist/lib/providers/anthropic.d.ts +123 -2
  110. package/dist/lib/providers/anthropic.js +873 -27
  111. package/dist/lib/providers/anthropicBaseProvider.js +77 -17
  112. package/dist/lib/providers/googleAiStudio.d.ts +1 -1
  113. package/dist/lib/providers/googleAiStudio.js +292 -227
  114. package/dist/lib/providers/googleVertex.d.ts +36 -1
  115. package/dist/lib/providers/googleVertex.js +553 -260
  116. package/dist/lib/providers/ollama.js +329 -278
  117. package/dist/lib/providers/openAI.js +77 -19
  118. package/dist/lib/providers/sagemaker/parsers.js +3 -3
  119. package/dist/lib/providers/sagemaker/streaming.js +3 -3
  120. package/dist/lib/proxy/proxyFetch.js +81 -48
  121. package/dist/lib/rag/ChunkerFactory.js +1 -1
  122. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
  123. package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
  124. package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
  125. package/dist/lib/rag/chunking/markdownChunker.js +174 -2
  126. package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
  127. package/dist/lib/rag/ragIntegration.d.ts +18 -1
  128. package/dist/lib/rag/ragIntegration.js +94 -14
  129. package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
  130. package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
  131. package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
  132. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
  133. package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
  134. package/dist/lib/telemetry/attributes.d.ts +52 -0
  135. package/dist/lib/telemetry/attributes.js +61 -0
  136. package/dist/lib/telemetry/index.d.ts +3 -0
  137. package/dist/lib/telemetry/index.js +3 -0
  138. package/dist/lib/telemetry/telemetryService.d.ts +6 -0
  139. package/dist/lib/telemetry/telemetryService.js +6 -0
  140. package/dist/lib/telemetry/tracers.d.ts +15 -0
  141. package/dist/lib/telemetry/tracers.js +17 -0
  142. package/dist/lib/telemetry/withSpan.d.ts +9 -0
  143. package/dist/lib/telemetry/withSpan.js +35 -0
  144. package/dist/lib/types/contextTypes.d.ts +10 -0
  145. package/dist/lib/types/errors.d.ts +62 -0
  146. package/dist/lib/types/errors.js +107 -0
  147. package/dist/lib/types/index.d.ts +2 -1
  148. package/dist/lib/types/index.js +2 -0
  149. package/dist/lib/types/providers.d.ts +107 -0
  150. package/dist/lib/types/providers.js +69 -0
  151. package/dist/lib/types/streamTypes.d.ts +14 -0
  152. package/dist/lib/types/subscriptionTypes.d.ts +893 -0
  153. package/dist/lib/types/subscriptionTypes.js +8 -0
  154. package/dist/lib/utils/conversationMemory.js +121 -82
  155. package/dist/lib/utils/logger.d.ts +5 -0
  156. package/dist/lib/utils/logger.js +50 -2
  157. package/dist/lib/utils/messageBuilder.js +22 -42
  158. package/dist/lib/utils/modelDetection.js +3 -3
  159. package/dist/lib/utils/providerConfig.d.ts +167 -0
  160. package/dist/lib/utils/providerConfig.js +619 -9
  161. package/dist/lib/utils/providerRetry.d.ts +41 -0
  162. package/dist/lib/utils/providerRetry.js +114 -0
  163. package/dist/lib/utils/retryability.d.ts +14 -0
  164. package/dist/lib/utils/retryability.js +23 -0
  165. package/dist/lib/utils/sanitizers/svg.js +4 -5
  166. package/dist/lib/utils/tokenEstimation.d.ts +11 -1
  167. package/dist/lib/utils/tokenEstimation.js +19 -4
  168. package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
  169. package/dist/mcp/externalServerManager.js +66 -0
  170. package/dist/mcp/mcpCircuitBreaker.js +24 -0
  171. package/dist/mcp/mcpClientFactory.js +16 -0
  172. package/dist/mcp/toolDiscoveryService.js +32 -6
  173. package/dist/mcp/toolRegistry.js +193 -123
  174. package/dist/models/anthropicModels.d.ts +267 -0
  175. package/dist/models/anthropicModels.js +527 -0
  176. package/dist/neurolink.d.ts +6 -0
  177. package/dist/neurolink.js +1162 -646
  178. package/dist/providers/amazonBedrock.d.ts +1 -1
  179. package/dist/providers/amazonBedrock.js +521 -319
  180. package/dist/providers/anthropic.d.ts +123 -2
  181. package/dist/providers/anthropic.js +873 -27
  182. package/dist/providers/anthropicBaseProvider.js +77 -17
  183. package/dist/providers/googleAiStudio.d.ts +1 -1
  184. package/dist/providers/googleAiStudio.js +292 -227
  185. package/dist/providers/googleVertex.d.ts +36 -1
  186. package/dist/providers/googleVertex.js +553 -260
  187. package/dist/providers/ollama.js +329 -278
  188. package/dist/providers/openAI.js +77 -19
  189. package/dist/providers/sagemaker/parsers.js +3 -3
  190. package/dist/providers/sagemaker/streaming.js +3 -3
  191. package/dist/proxy/proxyFetch.js +81 -48
  192. package/dist/rag/ChunkerFactory.js +1 -1
  193. package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
  194. package/dist/rag/chunkers/MarkdownChunker.js +213 -9
  195. package/dist/rag/chunking/markdownChunker.d.ts +16 -0
  196. package/dist/rag/chunking/markdownChunker.js +174 -2
  197. package/dist/rag/pipeline/contextAssembly.js +2 -1
  198. package/dist/rag/ragIntegration.d.ts +18 -1
  199. package/dist/rag/ragIntegration.js +94 -14
  200. package/dist/rag/retrieval/vectorQueryTool.js +21 -4
  201. package/dist/server/abstract/baseServerAdapter.js +4 -1
  202. package/dist/server/adapters/fastifyAdapter.js +35 -30
  203. package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
  204. package/dist/services/server/ai/observability/instrumentation.js +39 -0
  205. package/dist/telemetry/attributes.d.ts +52 -0
  206. package/dist/telemetry/attributes.js +60 -0
  207. package/dist/telemetry/index.d.ts +3 -0
  208. package/dist/telemetry/index.js +3 -0
  209. package/dist/telemetry/telemetryService.d.ts +6 -0
  210. package/dist/telemetry/telemetryService.js +6 -0
  211. package/dist/telemetry/tracers.d.ts +15 -0
  212. package/dist/telemetry/tracers.js +16 -0
  213. package/dist/telemetry/withSpan.d.ts +9 -0
  214. package/dist/telemetry/withSpan.js +34 -0
  215. package/dist/types/contextTypes.d.ts +10 -0
  216. package/dist/types/errors.d.ts +62 -0
  217. package/dist/types/errors.js +107 -0
  218. package/dist/types/index.d.ts +2 -1
  219. package/dist/types/index.js +2 -0
  220. package/dist/types/providers.d.ts +107 -0
  221. package/dist/types/providers.js +69 -0
  222. package/dist/types/streamTypes.d.ts +14 -0
  223. package/dist/types/subscriptionTypes.d.ts +893 -0
  224. package/dist/types/subscriptionTypes.js +7 -0
  225. package/dist/utils/conversationMemory.js +121 -82
  226. package/dist/utils/logger.d.ts +5 -0
  227. package/dist/utils/logger.js +50 -2
  228. package/dist/utils/messageBuilder.js +22 -42
  229. package/dist/utils/modelDetection.js +3 -3
  230. package/dist/utils/providerConfig.d.ts +167 -0
  231. package/dist/utils/providerConfig.js +619 -9
  232. package/dist/utils/providerRetry.d.ts +41 -0
  233. package/dist/utils/providerRetry.js +113 -0
  234. package/dist/utils/retryability.d.ts +14 -0
  235. package/dist/utils/retryability.js +22 -0
  236. package/dist/utils/sanitizers/svg.js +4 -5
  237. package/dist/utils/tokenEstimation.d.ts +11 -1
  238. package/dist/utils/tokenEstimation.js +19 -4
  239. package/dist/utils/videoAnalysisProcessor.js +7 -3
  240. package/dist/workflow/config.d.ts +26 -26
  241. package/package.json +2 -1
@@ -36,7 +36,7 @@ export function checkContextBudget(params) {
36
36
  ? toolDefinitions.reduce((sum, tool) => {
37
37
  try {
38
38
  const serialized = JSON.stringify(tool);
39
- return sum + Math.ceil(serialized.length / 4);
39
+ return sum + estimateTokens(serialized, provider);
40
40
  }
41
41
  catch {
42
42
  return sum + TOKENS_PER_TOOL_DEFINITION;
@@ -8,6 +8,7 @@
8
8
  * Stage 3: LLM Summarization (expensive -- requires LLM call)
9
9
  * Stage 4: Sliding Window Truncation (fallback -- no LLM call)
10
10
  */
11
+ import { trace, SpanStatusCode } from "@opentelemetry/api";
11
12
  import { estimateMessagesTokens } from "../utils/tokenEstimation.js";
12
13
  import { logger } from "../utils/logger.js";
13
14
  import { pruneToolOutputs } from "./stages/toolOutputPruner.js";
@@ -112,15 +113,36 @@ export class ContextCompactor {
112
113
  saved: stageTokensBefore - stageTokensAfter,
113
114
  });
114
115
  }
115
- catch {
116
- logger.info("[Compaction] Stage 3 (summarize)", {
116
+ catch (error) {
117
+ // Capture the actual error for debugging
118
+ const errorMessage = error instanceof Error ? error.message : String(error);
119
+ const errorName = error instanceof Error ? error.name : "UnknownError";
120
+ logger.warn("[Compaction] Stage 3 (summarize) FAILED", {
117
121
  requestId,
118
- ran: false,
122
+ error: errorMessage,
123
+ errorName,
119
124
  tokensBefore: stageTokensBefore,
120
125
  tokensAfter: stageTokensBefore,
121
126
  saved: 0,
122
127
  });
123
- // Summarization failed, fall through to truncation
128
+ // Record on OTel span for trace visibility
129
+ const activeSpan = trace.getActiveSpan();
130
+ if (activeSpan) {
131
+ activeSpan.addEvent("compaction.stage3.failed", {
132
+ "error.message": errorMessage,
133
+ "error.name": errorName,
134
+ "stage.tokens_before": stageTokensBefore,
135
+ });
136
+ if (error instanceof Error) {
137
+ activeSpan.recordException(error);
138
+ }
139
+ // NLK-GAP-005 fix: set error status alongside recordException
140
+ activeSpan.setStatus({
141
+ code: SpanStatusCode.ERROR,
142
+ message: `Compaction stage 3 (summarize) failed: ${errorMessage}`,
143
+ });
144
+ }
145
+ // Fall through to Stage 4 truncation as before
124
146
  }
125
147
  }
126
148
  // Stage 4: Sliding Window Truncation (fallback)
@@ -129,6 +151,11 @@ export class ContextCompactor {
129
151
  const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
130
152
  const truncResult = truncateWithSlidingWindow(currentMessages, {
131
153
  fraction: this.config.truncationFraction,
154
+ currentTokens: stageTokensBefore,
155
+ targetTokens: targetTokens,
156
+ provider: provider,
157
+ adaptiveBuffer: 0.15,
158
+ maxIterations: 3,
132
159
  });
133
160
  if (truncResult.truncated) {
134
161
  currentMessages = truncResult.messages;
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Emergency Content Truncation
3
+ *
4
+ * When message-level removal (sliding window) can't fit context into budget,
5
+ * this truncates the CONTENT of the longest messages as a last resort.
6
+ */
7
+ import type { ChatMessage } from "../types/conversation.js";
8
+ /**
9
+ * Emergency content truncation: truncate the content of the longest messages
10
+ * to fit within the available token budget.
11
+ *
12
+ * Strategy: Sort messages by content length (descending), truncate each
13
+ * to a proportional share of the available budget until total fits.
14
+ */
15
+ export declare function emergencyContentTruncation(messages: ChatMessage[], availableTokensForHistory: number, breakdown: {
16
+ systemPrompt: number;
17
+ conversationHistory: number;
18
+ currentPrompt: number;
19
+ toolDefinitions: number;
20
+ fileAttachments: number;
21
+ }, provider?: string): ChatMessage[];
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Emergency Content Truncation
3
+ *
4
+ * When message-level removal (sliding window) can't fit context into budget,
5
+ * this truncates the CONTENT of the longest messages as a last resort.
6
+ */
7
+ import { estimateTokens, estimateMessagesTokens, truncateToTokenBudget, } from "../utils/tokenEstimation.js";
8
+ import { logger } from "../utils/logger.js";
9
+ /**
10
+ * Emergency content truncation: truncate the content of the longest messages
11
+ * to fit within the available token budget.
12
+ *
13
+ * Strategy: Sort messages by content length (descending), truncate each
14
+ * to a proportional share of the available budget until total fits.
15
+ */
16
+ export function emergencyContentTruncation(messages, availableTokensForHistory, breakdown, provider) {
17
+ // Budget available for conversation history specifically
18
+ const historyBudget = availableTokensForHistory -
19
+ breakdown.systemPrompt -
20
+ breakdown.currentPrompt -
21
+ breakdown.toolDefinitions -
22
+ breakdown.fileAttachments;
23
+ if (historyBudget <= 0) {
24
+ // No room for history: return empty to guarantee budget safety
25
+ return [];
26
+ }
27
+ const currentHistoryTokens = estimateMessagesTokens(messages, provider);
28
+ if (currentHistoryTokens <= historyBudget) {
29
+ return messages; // Already fits
30
+ }
31
+ // Calculate per-message budgets proportional to original size,
32
+ // but cap large messages to free space for others
33
+ const result = [...messages];
34
+ const reductionNeeded = currentHistoryTokens - historyBudget;
35
+ const reductionRatio = reductionNeeded / currentHistoryTokens;
36
+ // Sort indices by content length descending (truncate biggest first)
37
+ const sortedIndices = result
38
+ .map((msg, idx) => ({ idx, len: msg.content.length }))
39
+ .sort((a, b) => b.len - a.len);
40
+ let tokensSaved = 0;
41
+ for (const { idx } of sortedIndices) {
42
+ if (tokensSaved >= reductionNeeded) {
43
+ break;
44
+ }
45
+ const msg = result[idx];
46
+ // Don't truncate system messages or very short messages
47
+ if (msg.role === "system" || msg.content.length < 200) {
48
+ continue;
49
+ }
50
+ const msgTokens = estimateTokens(msg.content, provider);
51
+ const targetTokens = Math.floor(msgTokens * (1 - reductionRatio - 0.05));
52
+ if (targetTokens < msgTokens && targetTokens > 50) {
53
+ const truncated = truncateToTokenBudget(msg.content, targetTokens, provider);
54
+ if (truncated.truncated) {
55
+ const savedThisMsg = msgTokens - estimateTokens(truncated.text, provider);
56
+ tokensSaved += savedThisMsg;
57
+ result[idx] = {
58
+ ...msg,
59
+ content: truncated.text,
60
+ metadata: { ...msg.metadata, truncated: true },
61
+ };
62
+ }
63
+ }
64
+ }
65
+ logger.info("[EmergencyTruncation] Content truncation complete", {
66
+ tokensSaved,
67
+ reductionNeeded,
68
+ messagesModified: result.filter((m, i) => m !== messages[i]).length,
69
+ });
70
+ // Final safety check: guarantee returned history fits budget
71
+ if (estimateMessagesTokens(result, provider) <= historyBudget) {
72
+ return result;
73
+ }
74
+ // Hard fallback: keep newest non-system messages that fit
75
+ const fallback = [];
76
+ for (let i = result.length - 1; i >= 0; i--) {
77
+ const msg = result[i];
78
+ if (msg.role === "system") {
79
+ continue;
80
+ }
81
+ fallback.unshift(msg);
82
+ if (estimateMessagesTokens(fallback, provider) > historyBudget) {
83
+ fallback.shift();
84
+ break;
85
+ }
86
+ }
87
+ return fallback;
88
+ }
@@ -12,3 +12,19 @@ export declare function isContextOverflowError(error: unknown): boolean;
12
12
  * Identify which provider produced the context overflow error.
13
13
  */
14
14
  export declare function getContextOverflowProvider(error: unknown): string | null;
15
+ /**
16
+ * Extract actual token counts from provider overflow error messages.
17
+ *
18
+ * Many providers include the actual/max token counts in their error messages:
19
+ * - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
20
+ * - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
21
+ * - Google: "exceeds the maximum number of tokens (180000 > 100000)"
22
+ */
23
+ export declare function parseProviderOverflowDetails(error: unknown): {
24
+ actualTokens: number;
25
+ budgetTokens: number;
26
+ } | null;
27
+ /**
28
+ * Extract error message from various error formats.
29
+ */
30
+ export declare function extractErrorMessage(error: unknown): string | null;
@@ -86,10 +86,57 @@ export function getContextOverflowProvider(error) {
86
86
  }
87
87
  return null;
88
88
  }
89
+ /**
90
+ * Extract actual token counts from provider overflow error messages.
91
+ *
92
+ * Many providers include the actual/max token counts in their error messages:
93
+ * - OpenAI: "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens."
94
+ * - Anthropic: "prompt is too long: 180000 tokens > 200000 token limit"
95
+ * - Google: "exceeds the maximum number of tokens (180000 > 100000)"
96
+ */
97
+ export function parseProviderOverflowDetails(error) {
98
+ const message = extractErrorMessage(error);
99
+ if (!message) {
100
+ return null;
101
+ }
102
+ // Guard against excessively long inputs that could slow regex matching
103
+ if (message.length > 2000) {
104
+ return null;
105
+ }
106
+ // OpenAI pattern: "resulted in X tokens" + "maximum context length is Y"
107
+ // Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
108
+ const openaiActual = message.match(/resulted\s+in\s+(\d[\d,]{0,19})\s*tokens/i);
109
+ const openaiMax = message.match(/maximum\s+context\s+length\s+is\s+(\d[\d,]{0,19})/i);
110
+ if (openaiActual && openaiMax) {
111
+ return {
112
+ actualTokens: parseInt(openaiActual[1].replace(/,/g, ""), 10),
113
+ budgetTokens: parseInt(openaiMax[1].replace(/,/g, ""), 10),
114
+ };
115
+ }
116
+ // Anthropic pattern: "X tokens > Y token limit" or "X tokens, limit Y"
117
+ // Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
118
+ const anthropicMatch = message.match(/(\d[\d,]{0,19})\s*tokens?\s*[>:]\s*(\d[\d,]{0,19})/i);
119
+ if (anthropicMatch) {
120
+ return {
121
+ actualTokens: parseInt(anthropicMatch[1].replace(/,/g, ""), 10),
122
+ budgetTokens: parseInt(anthropicMatch[2].replace(/,/g, ""), 10),
123
+ };
124
+ }
125
+ // Google pattern: "X > Y" or "X exceeds Y"
126
+ // Use single character-class number groups to prevent ReDoS (CodeQL: js/polynomial-redos)
127
+ const googleMatch = message.match(/(\d[\d,]{0,19})\s*(?:>|exceeds)\s*(\d[\d,]{0,19})/i);
128
+ if (googleMatch) {
129
+ return {
130
+ actualTokens: parseInt(googleMatch[1].replace(/,/g, ""), 10),
131
+ budgetTokens: parseInt(googleMatch[2].replace(/,/g, ""), 10),
132
+ };
133
+ }
134
+ return null;
135
+ }
89
136
  /**
90
137
  * Extract error message from various error formats.
91
138
  */
92
- function extractErrorMessage(error) {
139
+ export function extractErrorMessage(error) {
93
140
  if (!error) {
94
141
  return null;
95
142
  }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Context-specific error classes for budget and overflow scenarios.
3
+ */
4
+ /**
5
+ * Thrown when context exceeds model budget after all compaction stages,
6
+ * preventing wasteful API calls to providers that will reject the request.
7
+ */
8
+ export declare class ContextBudgetExceededError extends Error {
9
+ readonly estimatedTokens: number;
10
+ readonly availableTokens: number;
11
+ readonly stagesUsed: string[];
12
+ readonly breakdown: Record<string, number>;
13
+ constructor(message: string, details: {
14
+ estimatedTokens: number;
15
+ availableTokens: number;
16
+ stagesUsed: string[];
17
+ breakdown: Record<string, number>;
18
+ });
19
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Context-specific error classes for budget and overflow scenarios.
3
+ */
4
+ /**
5
+ * Thrown when context exceeds model budget after all compaction stages,
6
+ * preventing wasteful API calls to providers that will reject the request.
7
+ */
8
+ export class ContextBudgetExceededError extends Error {
9
+ estimatedTokens;
10
+ availableTokens;
11
+ stagesUsed;
12
+ breakdown;
13
+ constructor(message, details) {
14
+ super(message);
15
+ this.name = "ContextBudgetExceededError";
16
+ this.estimatedTokens = details.estimatedTokens;
17
+ this.availableTokens = details.availableTokens;
18
+ this.stagesUsed = details.stagesUsed;
19
+ this.breakdown = details.breakdown;
20
+ }
21
+ }
@@ -4,6 +4,12 @@
4
4
  * Non-destructive fallback: tags oldest messages as truncated
5
5
  * instead of deleting them. Always preserves first message pair.
6
6
  * Removes messages in pairs to maintain role alternation.
7
+ *
8
+ * Features:
9
+ * - Adaptive truncation (PERF-001): calculates fraction from actual overage
10
+ * instead of fixed 50%, with iterative refinement up to 3 passes.
11
+ * - Small conversation handling (BUG-005): for <= 4 messages, truncates
12
+ * message content proportionally instead of returning no-op.
7
13
  */
8
14
  import type { ChatMessage } from "../../types/conversation.js";
9
15
  import type { TruncationConfig, TruncationResult } from "../../types/contextTypes.js";
@@ -4,39 +4,174 @@
4
4
  * Non-destructive fallback: tags oldest messages as truncated
5
5
  * instead of deleting them. Always preserves first message pair.
6
6
  * Removes messages in pairs to maintain role alternation.
7
+ *
8
+ * Features:
9
+ * - Adaptive truncation (PERF-001): calculates fraction from actual overage
10
+ * instead of fixed 50%, with iterative refinement up to 3 passes.
11
+ * - Small conversation handling (BUG-005): for <= 4 messages, truncates
12
+ * message content proportionally instead of returning no-op.
7
13
  */
8
14
  import { randomUUID } from "crypto";
15
+ import { estimateTokens, estimateMessagesTokens, truncateToTokenBudget, } from "../../utils/tokenEstimation.js";
16
+ import { logger } from "../../utils/logger.js";
9
17
  const TRUNCATION_MARKER_CONTENT = "[Earlier conversation history was truncated to fit within context limits]";
18
+ /**
19
+ * For conversations with <= 4 messages that exceed token budget,
20
+ * truncate the CONTENT of the longest messages rather than removing messages.
21
+ *
22
+ * Strategy:
23
+ * 1. Calculate each message's proportional share of the token budget
24
+ * 2. Truncate messages that exceed their share using truncateToTokenBudget()
25
+ * 3. Never truncate messages below 200 tokens (preserve minimum context)
26
+ */
27
+ function truncateSmallConversation(messages, config) {
28
+ // If no target tokens provided, we can't do content truncation
29
+ if (!config?.targetTokens) {
30
+ return { truncated: false, messages, messagesRemoved: 0 };
31
+ }
32
+ const provider = config.provider;
33
+ const targetTokens = config.targetTokens;
34
+ const currentTokens = estimateMessagesTokens(messages, provider);
35
+ if (currentTokens <= targetTokens) {
36
+ return { truncated: false, messages, messagesRemoved: 0 };
37
+ }
38
+ const MINIMUM_MSG_TOKENS = 200;
39
+ const FRAMING_OVERHEAD = 24 + messages.length * 4; // conversation + per-message overhead
40
+ // Available budget for actual content
41
+ const contentBudget = targetTokens - FRAMING_OVERHEAD;
42
+ if (contentBudget <= 0) {
43
+ return { truncated: false, messages, messagesRemoved: 0 };
44
+ }
45
+ // Calculate current content tokens per message
46
+ const msgTokens = messages.map((msg) => estimateTokens(msg.content, provider));
47
+ const totalContentTokens = msgTokens.reduce((sum, t) => sum + t, 0);
48
+ // Each message gets a proportional share of the content budget
49
+ const result = [...messages];
50
+ let totalSaved = 0;
51
+ for (let i = 0; i < result.length; i++) {
52
+ const msg = result[i];
53
+ // Don't truncate system/summary messages
54
+ if (msg.role === "system" || msg.metadata?.isSummary) {
55
+ continue;
56
+ }
57
+ const proportionalBudget = Math.floor((msgTokens[i] / totalContentTokens) * contentBudget);
58
+ const msgBudget = Math.max(MINIMUM_MSG_TOKENS, proportionalBudget);
59
+ if (msgTokens[i] > msgBudget) {
60
+ const truncated = truncateToTokenBudget(msg.content, msgBudget, provider);
61
+ if (truncated.truncated) {
62
+ totalSaved += msgTokens[i] - estimateTokens(truncated.text, provider);
63
+ result[i] = {
64
+ ...msg,
65
+ content: truncated.text,
66
+ metadata: { ...msg.metadata, truncated: true },
67
+ };
68
+ }
69
+ }
70
+ }
71
+ if (totalSaved > 0) {
72
+ const finalTokens = estimateMessagesTokens(result, provider);
73
+ logger.info("[Truncation] Small conversation content truncated", {
74
+ messageCount: messages.length,
75
+ tokensSaved: totalSaved,
76
+ targetTokens,
77
+ finalTokens,
78
+ });
79
+ return {
80
+ truncated: finalTokens <= targetTokens,
81
+ messages: result,
82
+ messagesRemoved: 0, // No messages removed, only content truncated
83
+ };
84
+ }
85
+ return { truncated: false, messages, messagesRemoved: 0 };
86
+ }
10
87
  export function truncateWithSlidingWindow(messages, config) {
11
- const fraction = config?.fraction ?? 0.5;
12
88
  if (messages.length <= 4) {
13
- return { truncated: false, messages, messagesRemoved: 0 };
89
+ // Delegate to content truncation for small conversations (BUG-005)
90
+ return truncateSmallConversation(messages, config);
91
+ }
92
+ // ADAPTIVE MODE: calculate fraction from actual overage (PERF-001)
93
+ let fraction;
94
+ if (config?.currentTokens &&
95
+ config?.targetTokens &&
96
+ config.currentTokens > config.targetTokens) {
97
+ const overageRatio = (config.currentTokens - config.targetTokens) / config.currentTokens;
98
+ const buffer = config?.adaptiveBuffer ?? 0.15;
99
+ // Required fraction = overage ratio + buffer, clamped to [0.1, 0.9]
100
+ fraction = Math.min(0.9, Math.max(0.1, overageRatio + buffer));
101
+ logger.info("[Truncation] Adaptive fraction calculated", {
102
+ currentTokens: config.currentTokens,
103
+ targetTokens: config.targetTokens,
104
+ overageRatio: Math.round(overageRatio * 100),
105
+ fraction: Math.round(fraction * 100),
106
+ });
107
+ }
108
+ else {
109
+ // Fallback to configured or default fraction
110
+ fraction = config?.fraction ?? 0.5;
14
111
  }
15
112
  // Always preserve first user-assistant pair
16
113
  const firstPair = messages.slice(0, 2);
17
- // Calculate how many messages to remove from the middle
18
114
  const remainingMessages = messages.slice(2);
19
- const removeCount = Math.floor(remainingMessages.length * fraction);
20
- // Ensure we remove an even number to maintain role alternation
21
- const evenRemoveCount = removeCount - (removeCount % 2);
22
- if (evenRemoveCount <= 0) {
23
- return { truncated: false, messages, messagesRemoved: 0 };
115
+ // ITERATIVE: if first pass isn't enough, increase fraction
116
+ const maxIterations = config?.maxIterations ?? 3;
117
+ let currentFraction = fraction;
118
+ for (let iteration = 0; iteration < maxIterations; iteration++) {
119
+ const removeCount = Math.floor(remainingMessages.length * currentFraction);
120
+ const evenRemoveCount = removeCount - (removeCount % 2);
121
+ if (evenRemoveCount <= 0) {
122
+ break;
123
+ }
124
+ const keptAfterTruncation = remainingMessages.slice(evenRemoveCount);
125
+ const truncationMarker = {
126
+ id: `truncation-${randomUUID()}`,
127
+ role: "system",
128
+ content: TRUNCATION_MARKER_CONTENT,
129
+ timestamp: new Date().toISOString(),
130
+ metadata: { isSummary: false, truncated: true },
131
+ };
132
+ const candidateMessages = [
133
+ ...firstPair,
134
+ truncationMarker,
135
+ ...keptAfterTruncation,
136
+ ];
137
+ // If we have token targets, verify the result fits
138
+ if (config?.targetTokens) {
139
+ const candidateTokens = estimateMessagesTokens(candidateMessages, config.provider);
140
+ if (candidateTokens <= config.targetTokens) {
141
+ return {
142
+ truncated: true,
143
+ messages: candidateMessages,
144
+ messagesRemoved: evenRemoveCount,
145
+ };
146
+ }
147
+ // Not enough -- increase fraction by 25% for next iteration
148
+ currentFraction = Math.min(0.95, currentFraction + 0.25);
149
+ continue;
150
+ }
151
+ // No token targets -- single-pass with calculated fraction
152
+ return {
153
+ truncated: true,
154
+ messages: candidateMessages,
155
+ messagesRemoved: evenRemoveCount,
156
+ };
24
157
  }
25
- const keptAfterTruncation = remainingMessages.slice(evenRemoveCount);
26
- // Create truncation marker
27
- const truncationMarker = {
28
- id: `truncation-${randomUUID()}`,
29
- role: "system",
30
- content: TRUNCATION_MARKER_CONTENT,
31
- timestamp: new Date().toISOString(),
32
- metadata: {
33
- isSummary: false,
158
+ // All iterations exhausted -- return best effort (most aggressive truncation)
159
+ const maxRemove = Math.floor(remainingMessages.length * 0.95);
160
+ const evenMaxRemove = maxRemove - (maxRemove % 2);
161
+ if (evenMaxRemove > 0) {
162
+ const keptMessages = remainingMessages.slice(evenMaxRemove);
163
+ const truncationMarker = {
164
+ id: `truncation-${randomUUID()}`,
165
+ role: "system",
166
+ content: TRUNCATION_MARKER_CONTENT,
167
+ timestamp: new Date().toISOString(),
168
+ metadata: { isSummary: false, truncated: true },
169
+ };
170
+ return {
34
171
  truncated: true,
35
- },
36
- };
37
- return {
38
- truncated: true,
39
- messages: [...firstPair, truncationMarker, ...keptAfterTruncation],
40
- messagesRemoved: evenRemoveCount,
41
- };
172
+ messages: [...firstPair, truncationMarker, ...keptMessages],
173
+ messagesRemoved: evenMaxRemove,
174
+ };
175
+ }
176
+ return { truncated: false, messages, messagesRemoved: 0 };
42
177
  }