@juspay/neurolink 9.14.0 → 9.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +15 -15
  3. package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
  4. package/dist/adapters/video/videoAnalyzer.js +10 -8
  5. package/dist/auth/anthropicOAuth.d.ts +377 -0
  6. package/dist/auth/anthropicOAuth.js +914 -0
  7. package/dist/auth/index.d.ts +20 -0
  8. package/dist/auth/index.js +29 -0
  9. package/dist/auth/tokenStore.d.ts +225 -0
  10. package/dist/auth/tokenStore.js +521 -0
  11. package/dist/cli/commands/auth.d.ts +50 -0
  12. package/dist/cli/commands/auth.js +1115 -0
  13. package/dist/cli/commands/setup-anthropic.js +1 -14
  14. package/dist/cli/commands/setup-azure.js +1 -12
  15. package/dist/cli/commands/setup-bedrock.js +1 -9
  16. package/dist/cli/commands/setup-google-ai.js +1 -12
  17. package/dist/cli/commands/setup-openai.js +1 -14
  18. package/dist/cli/commands/workflow.d.ts +27 -0
  19. package/dist/cli/commands/workflow.js +216 -0
  20. package/dist/cli/factories/authCommandFactory.d.ts +52 -0
  21. package/dist/cli/factories/authCommandFactory.js +146 -0
  22. package/dist/cli/factories/commandFactory.d.ts +6 -0
  23. package/dist/cli/factories/commandFactory.js +171 -22
  24. package/dist/cli/index.js +0 -1
  25. package/dist/cli/parser.js +14 -2
  26. package/dist/cli/utils/maskCredential.d.ts +11 -0
  27. package/dist/cli/utils/maskCredential.js +23 -0
  28. package/dist/constants/contextWindows.js +107 -16
  29. package/dist/constants/enums.d.ts +119 -15
  30. package/dist/constants/enums.js +182 -22
  31. package/dist/constants/index.d.ts +3 -1
  32. package/dist/constants/index.js +11 -1
  33. package/dist/context/budgetChecker.js +1 -1
  34. package/dist/context/contextCompactor.js +31 -4
  35. package/dist/context/emergencyTruncation.d.ts +21 -0
  36. package/dist/context/emergencyTruncation.js +88 -0
  37. package/dist/context/errorDetection.d.ts +16 -0
  38. package/dist/context/errorDetection.js +48 -1
  39. package/dist/context/errors.d.ts +19 -0
  40. package/dist/context/errors.js +21 -0
  41. package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
  42. package/dist/context/stages/slidingWindowTruncator.js +159 -24
  43. package/dist/core/baseProvider.js +306 -200
  44. package/dist/core/conversationMemoryManager.js +104 -61
  45. package/dist/core/evaluationProviders.js +16 -33
  46. package/dist/core/factory.js +237 -164
  47. package/dist/core/modules/GenerationHandler.js +175 -116
  48. package/dist/core/modules/MessageBuilder.js +222 -170
  49. package/dist/core/modules/StreamHandler.d.ts +1 -0
  50. package/dist/core/modules/StreamHandler.js +95 -27
  51. package/dist/core/modules/TelemetryHandler.d.ts +10 -1
  52. package/dist/core/modules/TelemetryHandler.js +25 -7
  53. package/dist/core/modules/ToolsManager.js +115 -191
  54. package/dist/core/redisConversationMemoryManager.js +418 -282
  55. package/dist/factories/providerRegistry.d.ts +5 -0
  56. package/dist/factories/providerRegistry.js +20 -2
  57. package/dist/index.d.ts +3 -3
  58. package/dist/index.js +4 -2
  59. package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
  60. package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
  61. package/dist/lib/auth/anthropicOAuth.d.ts +377 -0
  62. package/dist/lib/auth/anthropicOAuth.js +915 -0
  63. package/dist/lib/auth/index.d.ts +20 -0
  64. package/dist/lib/auth/index.js +30 -0
  65. package/dist/lib/auth/tokenStore.d.ts +225 -0
  66. package/dist/lib/auth/tokenStore.js +522 -0
  67. package/dist/lib/constants/contextWindows.js +107 -16
  68. package/dist/lib/constants/enums.d.ts +119 -15
  69. package/dist/lib/constants/enums.js +182 -22
  70. package/dist/lib/constants/index.d.ts +3 -1
  71. package/dist/lib/constants/index.js +11 -1
  72. package/dist/lib/context/budgetChecker.js +1 -1
  73. package/dist/lib/context/contextCompactor.js +31 -4
  74. package/dist/lib/context/emergencyTruncation.d.ts +21 -0
  75. package/dist/lib/context/emergencyTruncation.js +89 -0
  76. package/dist/lib/context/errorDetection.d.ts +16 -0
  77. package/dist/lib/context/errorDetection.js +48 -1
  78. package/dist/lib/context/errors.d.ts +19 -0
  79. package/dist/lib/context/errors.js +22 -0
  80. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
  81. package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
  82. package/dist/lib/core/baseProvider.js +306 -200
  83. package/dist/lib/core/conversationMemoryManager.js +104 -61
  84. package/dist/lib/core/evaluationProviders.js +16 -33
  85. package/dist/lib/core/factory.js +237 -164
  86. package/dist/lib/core/modules/GenerationHandler.js +175 -116
  87. package/dist/lib/core/modules/MessageBuilder.js +222 -170
  88. package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
  89. package/dist/lib/core/modules/StreamHandler.js +95 -27
  90. package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
  91. package/dist/lib/core/modules/TelemetryHandler.js +25 -7
  92. package/dist/lib/core/modules/ToolsManager.js +115 -191
  93. package/dist/lib/core/redisConversationMemoryManager.js +418 -282
  94. package/dist/lib/factories/providerRegistry.d.ts +5 -0
  95. package/dist/lib/factories/providerRegistry.js +20 -2
  96. package/dist/lib/index.d.ts +3 -3
  97. package/dist/lib/index.js +4 -2
  98. package/dist/lib/mcp/externalServerManager.js +66 -0
  99. package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
  100. package/dist/lib/mcp/mcpClientFactory.js +16 -0
  101. package/dist/lib/mcp/toolDiscoveryService.js +32 -6
  102. package/dist/lib/mcp/toolRegistry.js +193 -123
  103. package/dist/lib/models/anthropicModels.d.ts +267 -0
  104. package/dist/lib/models/anthropicModels.js +528 -0
  105. package/dist/lib/neurolink.d.ts +6 -0
  106. package/dist/lib/neurolink.js +1162 -646
  107. package/dist/lib/providers/amazonBedrock.d.ts +1 -1
  108. package/dist/lib/providers/amazonBedrock.js +521 -319
  109. package/dist/lib/providers/anthropic.d.ts +123 -2
  110. package/dist/lib/providers/anthropic.js +873 -27
  111. package/dist/lib/providers/anthropicBaseProvider.js +77 -17
  112. package/dist/lib/providers/googleAiStudio.d.ts +1 -1
  113. package/dist/lib/providers/googleAiStudio.js +292 -227
  114. package/dist/lib/providers/googleVertex.d.ts +36 -1
  115. package/dist/lib/providers/googleVertex.js +553 -260
  116. package/dist/lib/providers/ollama.js +329 -278
  117. package/dist/lib/providers/openAI.js +77 -19
  118. package/dist/lib/providers/sagemaker/parsers.js +3 -3
  119. package/dist/lib/providers/sagemaker/streaming.js +3 -3
  120. package/dist/lib/proxy/proxyFetch.js +81 -48
  121. package/dist/lib/rag/ChunkerFactory.js +1 -1
  122. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
  123. package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
  124. package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
  125. package/dist/lib/rag/chunking/markdownChunker.js +174 -2
  126. package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
  127. package/dist/lib/rag/ragIntegration.d.ts +18 -1
  128. package/dist/lib/rag/ragIntegration.js +94 -14
  129. package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
  130. package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
  131. package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
  132. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
  133. package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
  134. package/dist/lib/telemetry/attributes.d.ts +52 -0
  135. package/dist/lib/telemetry/attributes.js +61 -0
  136. package/dist/lib/telemetry/index.d.ts +3 -0
  137. package/dist/lib/telemetry/index.js +3 -0
  138. package/dist/lib/telemetry/telemetryService.d.ts +6 -0
  139. package/dist/lib/telemetry/telemetryService.js +6 -0
  140. package/dist/lib/telemetry/tracers.d.ts +15 -0
  141. package/dist/lib/telemetry/tracers.js +17 -0
  142. package/dist/lib/telemetry/withSpan.d.ts +9 -0
  143. package/dist/lib/telemetry/withSpan.js +35 -0
  144. package/dist/lib/types/contextTypes.d.ts +10 -0
  145. package/dist/lib/types/errors.d.ts +62 -0
  146. package/dist/lib/types/errors.js +107 -0
  147. package/dist/lib/types/index.d.ts +2 -1
  148. package/dist/lib/types/index.js +2 -0
  149. package/dist/lib/types/providers.d.ts +107 -0
  150. package/dist/lib/types/providers.js +69 -0
  151. package/dist/lib/types/streamTypes.d.ts +14 -0
  152. package/dist/lib/types/subscriptionTypes.d.ts +893 -0
  153. package/dist/lib/types/subscriptionTypes.js +8 -0
  154. package/dist/lib/utils/conversationMemory.js +121 -82
  155. package/dist/lib/utils/logger.d.ts +5 -0
  156. package/dist/lib/utils/logger.js +50 -2
  157. package/dist/lib/utils/messageBuilder.js +22 -42
  158. package/dist/lib/utils/modelDetection.js +3 -3
  159. package/dist/lib/utils/providerConfig.d.ts +167 -0
  160. package/dist/lib/utils/providerConfig.js +619 -9
  161. package/dist/lib/utils/providerRetry.d.ts +41 -0
  162. package/dist/lib/utils/providerRetry.js +114 -0
  163. package/dist/lib/utils/retryability.d.ts +14 -0
  164. package/dist/lib/utils/retryability.js +23 -0
  165. package/dist/lib/utils/sanitizers/svg.js +4 -5
  166. package/dist/lib/utils/tokenEstimation.d.ts +11 -1
  167. package/dist/lib/utils/tokenEstimation.js +19 -4
  168. package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
  169. package/dist/mcp/externalServerManager.js +66 -0
  170. package/dist/mcp/mcpCircuitBreaker.js +24 -0
  171. package/dist/mcp/mcpClientFactory.js +16 -0
  172. package/dist/mcp/toolDiscoveryService.js +32 -6
  173. package/dist/mcp/toolRegistry.js +193 -123
  174. package/dist/models/anthropicModels.d.ts +267 -0
  175. package/dist/models/anthropicModels.js +527 -0
  176. package/dist/neurolink.d.ts +6 -0
  177. package/dist/neurolink.js +1162 -646
  178. package/dist/providers/amazonBedrock.d.ts +1 -1
  179. package/dist/providers/amazonBedrock.js +521 -319
  180. package/dist/providers/anthropic.d.ts +123 -2
  181. package/dist/providers/anthropic.js +873 -27
  182. package/dist/providers/anthropicBaseProvider.js +77 -17
  183. package/dist/providers/googleAiStudio.d.ts +1 -1
  184. package/dist/providers/googleAiStudio.js +292 -227
  185. package/dist/providers/googleVertex.d.ts +36 -1
  186. package/dist/providers/googleVertex.js +553 -260
  187. package/dist/providers/ollama.js +329 -278
  188. package/dist/providers/openAI.js +77 -19
  189. package/dist/providers/sagemaker/parsers.js +3 -3
  190. package/dist/providers/sagemaker/streaming.js +3 -3
  191. package/dist/proxy/proxyFetch.js +81 -48
  192. package/dist/rag/ChunkerFactory.js +1 -1
  193. package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
  194. package/dist/rag/chunkers/MarkdownChunker.js +213 -9
  195. package/dist/rag/chunking/markdownChunker.d.ts +16 -0
  196. package/dist/rag/chunking/markdownChunker.js +174 -2
  197. package/dist/rag/pipeline/contextAssembly.js +2 -1
  198. package/dist/rag/ragIntegration.d.ts +18 -1
  199. package/dist/rag/ragIntegration.js +94 -14
  200. package/dist/rag/retrieval/vectorQueryTool.js +21 -4
  201. package/dist/server/abstract/baseServerAdapter.js +4 -1
  202. package/dist/server/adapters/fastifyAdapter.js +35 -30
  203. package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
  204. package/dist/services/server/ai/observability/instrumentation.js +39 -0
  205. package/dist/telemetry/attributes.d.ts +52 -0
  206. package/dist/telemetry/attributes.js +60 -0
  207. package/dist/telemetry/index.d.ts +3 -0
  208. package/dist/telemetry/index.js +3 -0
  209. package/dist/telemetry/telemetryService.d.ts +6 -0
  210. package/dist/telemetry/telemetryService.js +6 -0
  211. package/dist/telemetry/tracers.d.ts +15 -0
  212. package/dist/telemetry/tracers.js +16 -0
  213. package/dist/telemetry/withSpan.d.ts +9 -0
  214. package/dist/telemetry/withSpan.js +34 -0
  215. package/dist/types/contextTypes.d.ts +10 -0
  216. package/dist/types/errors.d.ts +62 -0
  217. package/dist/types/errors.js +107 -0
  218. package/dist/types/index.d.ts +2 -1
  219. package/dist/types/index.js +2 -0
  220. package/dist/types/providers.d.ts +107 -0
  221. package/dist/types/providers.js +69 -0
  222. package/dist/types/streamTypes.d.ts +14 -0
  223. package/dist/types/subscriptionTypes.d.ts +893 -0
  224. package/dist/types/subscriptionTypes.js +7 -0
  225. package/dist/utils/conversationMemory.js +121 -82
  226. package/dist/utils/logger.d.ts +5 -0
  227. package/dist/utils/logger.js +50 -2
  228. package/dist/utils/messageBuilder.js +22 -42
  229. package/dist/utils/modelDetection.js +3 -3
  230. package/dist/utils/providerConfig.d.ts +167 -0
  231. package/dist/utils/providerConfig.js +619 -9
  232. package/dist/utils/providerRetry.d.ts +41 -0
  233. package/dist/utils/providerRetry.js +113 -0
  234. package/dist/utils/retryability.d.ts +14 -0
  235. package/dist/utils/retryability.js +22 -0
  236. package/dist/utils/sanitizers/svg.js +4 -5
  237. package/dist/utils/tokenEstimation.d.ts +11 -1
  238. package/dist/utils/tokenEstimation.js +19 -4
  239. package/dist/utils/videoAnalysisProcessor.js +7 -3
  240. package/dist/workflow/config.d.ts +26 -26
  241. package/package.json +2 -1
@@ -19,7 +19,9 @@ import { ErrorCategory, ErrorSeverity } from "./constants/enums.js";
19
19
  import { CIRCUIT_BREAKER, CIRCUIT_BREAKER_RESET_MS, MEMORY_THRESHOLDS, NANOSECOND_TO_MS_DIVISOR, PERFORMANCE_THRESHOLDS, PROVIDER_TIMEOUTS, RETRY_ATTEMPTS, RETRY_DELAYS, TOOL_TIMEOUTS, } from "./constants/index.js";
20
20
  import { checkContextBudget } from "./context/budgetChecker.js";
21
21
  import { ContextCompactor, } from "./context/contextCompactor.js";
22
- import { isContextOverflowError } from "./context/errorDetection.js";
22
+ import { isContextOverflowError, getContextOverflowProvider, parseProviderOverflowDetails, } from "./context/errorDetection.js";
23
+ import { ContextBudgetExceededError } from "./context/errors.js";
24
+ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
23
25
  import { repairToolPairs } from "./context/toolPairRepair.js";
24
26
  import { SYSTEM_LIMITS } from "./core/constants.js";
25
27
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
@@ -51,9 +53,60 @@ import { BinaryTaskClassifier } from "./utils/taskClassifier.js";
51
53
  // Tool detection and execution imports
52
54
  // Transformation utilities
53
55
  import { extractToolNames, optimizeToolForCollection, transformAvailableTools, transformParamsForLogging, transformToolExecutions, transformToolExecutionsForMCP, transformToolsForMCP, transformToolsToDescriptions, transformToolsToExpectedFormat, } from "./utils/transformationUtils.js";
56
+ import { InvalidModelError, AuthenticationError, AuthorizationError, } from "./types/errors.js";
54
57
  import { isNonNullObject } from "./utils/typeUtils.js";
58
+ import { NON_RETRYABLE_HTTP_STATUS_CODES } from "./utils/retryability.js";
59
+ import { SpanKind, SpanStatusCode } from "@opentelemetry/api";
60
+ import { tracers } from "./telemetry/tracers.js";
61
+ import { ATTR } from "./telemetry/attributes.js";
55
62
  import { getWorkflow } from "./workflow/core/workflowRegistry.js";
56
63
  import { runWorkflow } from "./workflow/core/workflowRunner.js";
64
+ /**
65
+ * Check if an error is a non-retryable provider error that should immediately
66
+ * stop the retry/fallback chain. These errors represent permanent failures
67
+ * (e.g., model not found, authentication failed) where retrying with the
68
+ * same configuration will never succeed.
69
+ *
70
+ * This prevents wasting tokens and latency on guaranteed-to-fail retries.
71
+ * For example, a NOT_FOUND error for a model causes 6 retries of a 418KB
72
+ * message, wasting ~628,000 tokens and adding 10+ seconds of latency.
73
+ */
74
+ function isNonRetryableProviderError(error) {
75
+ // Check for typed error classes from providers
76
+ if (error instanceof InvalidModelError) {
77
+ return true;
78
+ }
79
+ if (error instanceof AuthenticationError) {
80
+ return true;
81
+ }
82
+ if (error instanceof AuthorizationError) {
83
+ return true;
84
+ }
85
+ // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
86
+ if (error && typeof error === "object") {
87
+ const err = error;
88
+ const status = typeof err.status === "number"
89
+ ? err.status
90
+ : typeof err.statusCode === "number"
91
+ ? err.statusCode
92
+ : undefined;
93
+ if (status && NON_RETRYABLE_HTTP_STATUS_CODES.includes(status)) {
94
+ return true;
95
+ }
96
+ }
97
+ // Check error message for NOT_FOUND patterns (catches wrapped errors)
98
+ if (error instanceof Error) {
99
+ const msg = error.message;
100
+ if (msg.includes("NOT_FOUND") ||
101
+ msg.includes("Model Not Found") ||
102
+ msg.includes("model not found") ||
103
+ msg.includes("PERMISSION_DENIED") ||
104
+ msg.includes("UNAUTHENTICATED")) {
105
+ return true;
106
+ }
107
+ }
108
+ return false;
109
+ }
57
110
  /**
58
111
  * NeuroLink - Universal AI Development Platform
59
112
  *
@@ -128,6 +181,7 @@ import { runWorkflow } from "./workflow/core/workflowRunner.js";
128
181
  */
129
182
  export class NeuroLink {
130
183
  mcpInitialized = false;
184
+ mcpInitPromise = null;
131
185
  emitter = new EventEmitter();
132
186
  toolRegistry;
133
187
  autoDiscoveredServerInfos = [];
@@ -161,8 +215,6 @@ export class NeuroLink {
161
215
  result: result, // Enhanced: include actual result
162
216
  error: error, // Enhanced: include error if present
163
217
  });
164
- // ADD: Bedrock-compatible tool:end event (positional parameters)
165
- this.emitter.emit("tool:end", toolName, success ? result : error);
166
218
  }
167
219
  // Conversation memory support
168
220
  conversationMemory;
@@ -191,6 +243,7 @@ export class NeuroLink {
191
243
  if (options.context &&
192
244
  typeof options.context === "object" &&
193
245
  options.context !== null) {
246
+ let callbackExecuted = false;
194
247
  try {
195
248
  const ctx = options.context;
196
249
  // Trigger context scoping if any meaningful Langfuse field is present
@@ -231,6 +284,7 @@ export class NeuroLink {
231
284
  ...(customAttributes !== undefined && { customAttributes }),
232
285
  }, async () => {
233
286
  try {
287
+ callbackExecuted = true;
234
288
  const result = await callback();
235
289
  resolve(result);
236
290
  }
@@ -242,6 +296,12 @@ export class NeuroLink {
242
296
  }
243
297
  }
244
298
  catch (error) {
299
+ if (callbackExecuted) {
300
+ // Callback was executed inside Langfuse context but failed — do NOT retry
301
+ // Re-throw to avoid double API calls and preserve error context
302
+ throw error;
303
+ }
304
+ // Langfuse context setup itself failed — graceful degradation, run without context
245
305
  logger.warn("Failed to set Langfuse context from options", {
246
306
  error: error instanceof Error ? error.message : String(error),
247
307
  });
@@ -995,6 +1055,31 @@ Current user's request: ${currentInput}`;
995
1055
  * Uses isolated async context to prevent hanging
996
1056
  */
997
1057
  async initializeMCP() {
1058
+ // Skip if already initialized — prevents redundant re-init on every generate call
1059
+ if (this.mcpInitialized) {
1060
+ return;
1061
+ }
1062
+ // Deduplicate concurrent initialization attempts — if an init is already
1063
+ // in-flight, coalesce callers onto the same promise instead of running
1064
+ // a second parallel initialization.
1065
+ if (this.mcpInitPromise) {
1066
+ return this.mcpInitPromise;
1067
+ }
1068
+ this.mcpInitPromise = this.performMCPInitializationOnce();
1069
+ try {
1070
+ await this.mcpInitPromise;
1071
+ }
1072
+ finally {
1073
+ // Clear the in-flight promise so a future call (e.g. after cleanup/reset)
1074
+ // can re-initialize if needed.
1075
+ this.mcpInitPromise = null;
1076
+ }
1077
+ }
1078
+ /**
1079
+ * Actual one-shot MCP initialization logic. Called at most once per
1080
+ * NeuroLink instance lifetime (unless cleanup() resets the flag).
1081
+ */
1082
+ async performMCPInitializationOnce() {
998
1083
  const mcpInitId = `mcp-init-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
999
1084
  const mcpInitStartTime = Date.now();
1000
1085
  const mcpInitHrTimeStart = process.hrtime.bigint();
@@ -1640,279 +1725,321 @@ Current user's request: ${currentInput}`;
1640
1725
  * @since 1.0.0
1641
1726
  */
1642
1727
  async generate(optionsOrPrompt) {
1643
- const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
1644
- // Convert string prompt to full options
1645
- const options = typeof optionsOrPrompt === "string"
1646
- ? { input: { text: optionsOrPrompt } }
1647
- : optionsOrPrompt;
1648
- // Validate prompt
1649
- if (!options.input?.text || typeof options.input.text !== "string") {
1650
- throw new Error("Input text is required and must be a non-empty string");
1651
- }
1652
- // Check budget limit before making API call
1653
- if (options.maxBudgetUsd !== undefined &&
1654
- options.maxBudgetUsd > 0 &&
1655
- this._sessionCostUsd >= options.maxBudgetUsd) {
1656
- throw new NeuroLinkError({
1657
- code: "SESSION_BUDGET_EXCEEDED",
1658
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
1659
- category: ErrorCategory.VALIDATION,
1660
- severity: ErrorSeverity.HIGH,
1661
- retriable: false,
1662
- context: {
1663
- spent: this._sessionCostUsd,
1664
- limit: options.maxBudgetUsd,
1665
- },
1666
- });
1667
- }
1668
- // Check if workflow is requested
1669
- if (options.workflow || options.workflowConfig) {
1670
- return await this.generateWithWorkflow(options);
1671
- }
1672
- // Check if PPT output mode is requested
1673
- if (options.output?.mode === "ppt") {
1674
- return await this.generateWithPPT(options);
1675
- }
1676
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
1677
- return await this.setLangfuseContextFromOptions(options, async () => {
1678
- if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1679
- options.context?.userId) {
1680
- try {
1681
- const mem0 = await this.ensureMem0Ready();
1682
- if (!mem0) {
1683
- logger.debug("Mem0 not available, continuing without memory retrieval");
1684
- }
1685
- else {
1686
- const memories = await mem0.search(options.input.text, {
1687
- user_id: options.context.userId,
1688
- limit: 5,
1689
- });
1690
- if (memories && memories.length > 0) {
1691
- // Enhance the input with memory context
1692
- const memoryContext = this.extractMemoryContext(memories);
1693
- options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
1694
- }
1695
- }
1696
- }
1697
- catch (error) {
1698
- logger.warn("Mem0 memory retrieval failed:", error);
1699
- }
1700
- }
1701
- // Memory retrieval
1702
- if (this.conversationMemoryConfig?.conversationMemory?.memory?.enabled &&
1703
- options.context?.userId) {
1704
- try {
1705
- options.input.text = await this.retrieveMemory(options.input.text, options.context.userId);
1706
- logger.debug("Memory retrieval successful");
1707
- }
1708
- catch (error) {
1709
- logger.warn("Memory retrieval failed:", error);
1728
+ return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, async (generateSpan) => {
1729
+ try {
1730
+ const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
1731
+ // Convert string prompt to full options
1732
+ const options = typeof optionsOrPrompt === "string"
1733
+ ? { input: { text: optionsOrPrompt } }
1734
+ : optionsOrPrompt;
1735
+ // Set span attributes for observability
1736
+ generateSpan.setAttribute("neurolink.provider", options.provider || "default");
1737
+ generateSpan.setAttribute("neurolink.model", options.model || "default");
1738
+ generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
1739
+ ? optionsOrPrompt.length
1740
+ : options.input?.text?.length || 0);
1741
+ generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
1742
+ // Validate prompt
1743
+ if (!options.input?.text || typeof options.input.text !== "string") {
1744
+ throw new Error("Input text is required and must be a non-empty string");
1710
1745
  }
1711
- }
1712
- const startTime = Date.now();
1713
- // Apply orchestration if enabled and no specific provider/model requested
1714
- if (this.enableOrchestration && !options.provider && !options.model) {
1715
- try {
1716
- const orchestratedOptions = await this.applyOrchestration(options);
1717
- logger.debug("Orchestration applied", {
1718
- originalProvider: options.provider || "auto",
1719
- orchestratedProvider: orchestratedOptions.provider,
1720
- orchestratedModel: orchestratedOptions.model,
1721
- prompt: options.input.text.substring(0, 100),
1746
+ // Check budget limit before making API call
1747
+ if (options.maxBudgetUsd !== undefined &&
1748
+ options.maxBudgetUsd > 0 &&
1749
+ this._sessionCostUsd >= options.maxBudgetUsd) {
1750
+ throw new NeuroLinkError({
1751
+ code: "SESSION_BUDGET_EXCEEDED",
1752
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
1753
+ category: ErrorCategory.VALIDATION,
1754
+ severity: ErrorSeverity.HIGH,
1755
+ retriable: false,
1756
+ context: {
1757
+ spent: this._sessionCostUsd,
1758
+ limit: options.maxBudgetUsd,
1759
+ },
1722
1760
  });
1723
- // Use orchestrated options
1724
- Object.assign(options, orchestratedOptions);
1725
1761
  }
1726
- catch (error) {
1727
- logger.warn("Orchestration failed, continuing with original options", {
1728
- error: error instanceof Error ? error.message : String(error),
1729
- originalProvider: options.provider || "auto",
1730
- });
1731
- // Continue with original options if orchestration fails
1762
+ // Check if workflow is requested
1763
+ if (options.workflow || options.workflowConfig) {
1764
+ return await this.generateWithWorkflow(options);
1732
1765
  }
1733
- }
1734
- // Emit generation start event (NeuroLink format - keep existing)
1735
- this.emitter.emit("generation:start", {
1736
- provider: options.provider || "auto",
1737
- timestamp: startTime,
1738
- });
1739
- // ADD: Bedrock-compatible response:start event
1740
- this.emitter.emit("response:start");
1741
- // ADD: Bedrock-compatible message event
1742
- this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
1743
- // Process factory configuration
1744
- const factoryResult = processFactoryOptions(options);
1745
- // Validate factory configuration if present
1746
- if (factoryResult.hasFactoryConfig && options.factoryConfig) {
1747
- const validation = validateFactoryConfig(options.factoryConfig);
1748
- if (!validation.isValid) {
1749
- logger.warn("Invalid factory configuration detected", {
1750
- errors: validation.errors,
1751
- });
1752
- // Continue with warning rather than throwing - graceful degradation
1766
+ // Check if PPT output mode is requested
1767
+ if (options.output?.mode === "ppt") {
1768
+ const pptResult = await this.generateWithPPT(options);
1769
+ generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
1770
+ if (pptResult.analytics) {
1771
+ generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
1772
+ generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
1773
+ generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
1774
+ }
1775
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
1776
+ return pptResult;
1753
1777
  }
1754
- }
1755
- // RAG Integration: If rag config is provided, prepare the RAG search tool
1756
- if (options.rag?.files?.length) {
1757
- try {
1758
- const { prepareRAGTool } = await import("./rag/ragIntegration.js");
1759
- const ragResult = await prepareRAGTool(options.rag, options.provider);
1760
- // Inject the RAG tool into the tools record
1761
- if (!options.tools) {
1762
- options.tools = {};
1778
+ // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
1779
+ return await this.setLangfuseContextFromOptions(options, async () => {
1780
+ if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1781
+ options.context?.userId) {
1782
+ try {
1783
+ const mem0 = await this.ensureMem0Ready();
1784
+ if (!mem0) {
1785
+ logger.debug("Mem0 not available, continuing without memory retrieval");
1786
+ }
1787
+ else {
1788
+ const memories = await mem0.search(options.input.text, {
1789
+ user_id: options.context.userId,
1790
+ limit: 5,
1791
+ });
1792
+ if (memories && memories.length > 0) {
1793
+ // Enhance the input with memory context
1794
+ const memoryContext = this.extractMemoryContext(memories);
1795
+ options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
1796
+ }
1797
+ }
1798
+ }
1799
+ catch (error) {
1800
+ logger.warn("Mem0 memory retrieval failed:", error);
1801
+ }
1802
+ }
1803
+ // Memory retrieval
1804
+ if (this.conversationMemoryConfig?.conversationMemory?.memory
1805
+ ?.enabled &&
1806
+ options.context?.userId) {
1807
+ try {
1808
+ options.input.text = await this.retrieveMemory(options.input.text, options.context.userId);
1809
+ logger.debug("Memory retrieval successful");
1810
+ }
1811
+ catch (error) {
1812
+ logger.warn("Memory retrieval failed:", error);
1813
+ }
1763
1814
  }
1764
- options.tools[ragResult.toolName] =
1765
- ragResult.tool;
1766
- // Inject RAG-aware system prompt so the AI uses the RAG tool first
1767
- const ragSystemInstruction = [
1768
- `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
1769
- `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
1770
- `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
1771
- `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
1772
- `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
1773
- ].join(" ");
1774
- options.systemPrompt =
1775
- (options.systemPrompt || "") + ragSystemInstruction;
1776
- logger.info("[RAG] Tool injected into generate()", {
1777
- toolName: ragResult.toolName,
1778
- filesLoaded: ragResult.filesLoaded,
1779
- chunksIndexed: ragResult.chunksIndexed,
1815
+ const startTime = Date.now();
1816
+ // Apply orchestration if enabled and no specific provider/model requested
1817
+ if (this.enableOrchestration &&
1818
+ !options.provider &&
1819
+ !options.model) {
1820
+ try {
1821
+ const orchestratedOptions = await this.applyOrchestration(options);
1822
+ logger.debug("Orchestration applied", {
1823
+ originalProvider: options.provider || "auto",
1824
+ orchestratedProvider: orchestratedOptions.provider,
1825
+ orchestratedModel: orchestratedOptions.model,
1826
+ prompt: options.input.text.substring(0, 100),
1827
+ });
1828
+ // Use orchestrated options
1829
+ Object.assign(options, orchestratedOptions);
1830
+ }
1831
+ catch (error) {
1832
+ logger.warn("Orchestration failed, continuing with original options", {
1833
+ error: error instanceof Error ? error.message : String(error),
1834
+ originalProvider: options.provider || "auto",
1835
+ });
1836
+ // Continue with original options if orchestration fails
1837
+ }
1838
+ }
1839
+ // Emit generation start event (NeuroLink format - keep existing)
1840
+ this.emitter.emit("generation:start", {
1841
+ provider: options.provider || "auto",
1842
+ timestamp: startTime,
1780
1843
  });
1781
- }
1782
- catch (error) {
1783
- logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
1784
- error: error instanceof Error ? error.message : String(error),
1844
+ // ADD: Bedrock-compatible response:start event
1845
+ this.emitter.emit("response:start");
1846
+ // ADD: Bedrock-compatible message event
1847
+ this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
1848
+ // Process factory configuration
1849
+ const factoryResult = processFactoryOptions(options);
1850
+ // Validate factory configuration if present
1851
+ if (factoryResult.hasFactoryConfig && options.factoryConfig) {
1852
+ const validation = validateFactoryConfig(options.factoryConfig);
1853
+ if (!validation.isValid) {
1854
+ logger.warn("Invalid factory configuration detected", {
1855
+ errors: validation.errors,
1856
+ });
1857
+ // Continue with warning rather than throwing - graceful degradation
1858
+ }
1859
+ }
1860
+ // RAG Integration: If rag config is provided, prepare the RAG search tool
1861
+ if (options.rag?.files?.length) {
1862
+ try {
1863
+ const { prepareRAGTool } = await import("./rag/ragIntegration.js");
1864
+ const ragResult = await prepareRAGTool(options.rag, options.provider);
1865
+ // Inject the RAG tool into the tools record
1866
+ if (!options.tools) {
1867
+ options.tools = {};
1868
+ }
1869
+ options.tools[ragResult.toolName] =
1870
+ ragResult.tool;
1871
+ // Inject RAG-aware system prompt so the AI uses the RAG tool first
1872
+ const ragSystemInstruction = [
1873
+ `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
1874
+ `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
1875
+ `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
1876
+ `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
1877
+ `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
1878
+ ].join(" ");
1879
+ options.systemPrompt =
1880
+ (options.systemPrompt || "") + ragSystemInstruction;
1881
+ logger.info("[RAG] Tool injected into generate()", {
1882
+ toolName: ragResult.toolName,
1883
+ filesLoaded: ragResult.filesLoaded,
1884
+ chunksIndexed: ragResult.chunksIndexed,
1885
+ });
1886
+ }
1887
+ catch (error) {
1888
+ logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
1889
+ error: error instanceof Error ? error.message : String(error),
1890
+ });
1891
+ }
1892
+ }
1893
+ // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
1894
+ const baseOptions = {
1895
+ prompt: options.input.text,
1896
+ provider: options.provider,
1897
+ model: options.model,
1898
+ temperature: options.temperature,
1899
+ maxTokens: options.maxTokens,
1900
+ systemPrompt: options.systemPrompt,
1901
+ schema: options.schema,
1902
+ output: options.output,
1903
+ tools: options.tools, // Includes RAG tools if rag config was provided
1904
+ disableTools: options.disableTools,
1905
+ toolFilter: options.toolFilter,
1906
+ excludeTools: options.excludeTools,
1907
+ maxSteps: options.maxSteps,
1908
+ toolChoice: options.toolChoice,
1909
+ prepareStep: options.prepareStep,
1910
+ enableAnalytics: options.enableAnalytics,
1911
+ enableEvaluation: options.enableEvaluation,
1912
+ context: options.context,
1913
+ evaluationDomain: options.evaluationDomain,
1914
+ toolUsageContext: options.toolUsageContext,
1915
+ input: options.input, // This includes text, images, and content arrays
1916
+ region: options.region,
1917
+ tts: options.tts,
1918
+ fileRegistry: this.fileRegistry,
1919
+ abortSignal: options.abortSignal,
1920
+ skipToolPromptInjection: options.skipToolPromptInjection,
1921
+ };
1922
+ // Auto-map top-level sessionId/userId to context for convenience
1923
+ // Tests and users may pass sessionId/userId as top-level options
1924
+ const extraContext = options;
1925
+ if (extraContext.sessionId || extraContext.userId) {
1926
+ baseOptions.context = {
1927
+ ...baseOptions.context,
1928
+ ...(extraContext.sessionId && !baseOptions.context?.sessionId
1929
+ ? { sessionId: extraContext.sessionId }
1930
+ : {}),
1931
+ ...(extraContext.userId && !baseOptions.context?.userId
1932
+ ? { userId: extraContext.userId }
1933
+ : {}),
1934
+ };
1935
+ }
1936
+ // Apply factory enhancement using centralized utilities
1937
+ const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
1938
+ // Pass conversation memory config if available
1939
+ if (this.conversationMemory) {
1940
+ textOptions.conversationMemoryConfig =
1941
+ this.conversationMemory.config;
1942
+ // Include original prompt for context summarization
1943
+ textOptions.originalPrompt = originalPrompt;
1944
+ }
1945
+ // Detect and execute domain-specific tools
1946
+ const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
1947
+ // Update prompt with tool results if available
1948
+ if (enhancedPrompt !== textOptions.prompt) {
1949
+ textOptions.prompt = enhancedPrompt;
1950
+ logger.debug("Enhanced prompt with tool results", {
1951
+ originalLength: options.input.text.length,
1952
+ enhancedLength: enhancedPrompt.length,
1953
+ toolResults: toolResults.length,
1954
+ });
1955
+ }
1956
+ // Use redesigned generation logic
1957
+ const textResult = await this.generateTextInternal(textOptions);
1958
+ // Emit generation completion event (NeuroLink format - enhanced with content)
1959
+ this.emitter.emit("generation:end", {
1960
+ provider: textResult.provider,
1961
+ responseTime: Date.now() - startTime,
1962
+ toolsUsed: textResult.toolsUsed,
1963
+ timestamp: Date.now(),
1964
+ result: textResult, // Enhanced: include full result
1785
1965
  });
1786
- }
1787
- }
1788
- // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
1789
- const baseOptions = {
1790
- prompt: options.input.text,
1791
- provider: options.provider,
1792
- model: options.model,
1793
- temperature: options.temperature,
1794
- maxTokens: options.maxTokens,
1795
- systemPrompt: options.systemPrompt,
1796
- schema: options.schema,
1797
- output: options.output,
1798
- tools: options.tools, // Includes RAG tools if rag config was provided
1799
- disableTools: options.disableTools,
1800
- toolFilter: options.toolFilter,
1801
- excludeTools: options.excludeTools,
1802
- maxSteps: options.maxSteps,
1803
- toolChoice: options.toolChoice,
1804
- prepareStep: options.prepareStep,
1805
- enableAnalytics: options.enableAnalytics,
1806
- enableEvaluation: options.enableEvaluation,
1807
- context: options.context,
1808
- evaluationDomain: options.evaluationDomain,
1809
- toolUsageContext: options.toolUsageContext,
1810
- input: options.input, // This includes text, images, and content arrays
1811
- region: options.region,
1812
- tts: options.tts,
1813
- fileRegistry: this.fileRegistry,
1814
- abortSignal: options.abortSignal,
1815
- skipToolPromptInjection: options.skipToolPromptInjection,
1816
- };
1817
- // Auto-map top-level sessionId/userId to context for convenience
1818
- // Tests and users may pass sessionId/userId as top-level options
1819
- const extraContext = options;
1820
- if (extraContext.sessionId || extraContext.userId) {
1821
- baseOptions.context = {
1822
- ...baseOptions.context,
1823
- ...(extraContext.sessionId && !baseOptions.context?.sessionId
1824
- ? { sessionId: extraContext.sessionId }
1825
- : {}),
1826
- ...(extraContext.userId && !baseOptions.context?.userId
1827
- ? { userId: extraContext.userId }
1828
- : {}),
1829
- };
1830
- }
1831
- // Apply factory enhancement using centralized utilities
1832
- const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
1833
- // Pass conversation memory config if available
1834
- if (this.conversationMemory) {
1835
- textOptions.conversationMemoryConfig = this.conversationMemory.config;
1836
- // Include original prompt for context summarization
1837
- textOptions.originalPrompt = originalPrompt;
1966
+ // ADD: Bedrock-compatible response:end event with content
1967
+ this.emitter.emit("response:end", textResult.content || "");
1968
+ // ADD: Bedrock-compatible message event
1969
+ this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
1970
+ // Convert back to GenerateResult
1971
+ const generateResult = {
1972
+ content: textResult.content,
1973
+ finishReason: textResult.finishReason,
1974
+ provider: textResult.provider,
1975
+ model: textResult.model,
1976
+ usage: textResult.usage
1977
+ ? {
1978
+ input: textResult.usage.input || 0,
1979
+ output: textResult.usage.output || 0,
1980
+ total: textResult.usage.total || 0,
1981
+ }
1982
+ : undefined,
1983
+ responseTime: textResult.responseTime,
1984
+ toolsUsed: textResult.toolsUsed,
1985
+ toolExecutions: transformToolExecutions(textResult.toolExecutions),
1986
+ enhancedWithTools: textResult.enhancedWithTools,
1987
+ availableTools: transformAvailableTools(textResult.availableTools),
1988
+ analytics: textResult.analytics,
1989
+ // CRITICAL FIX: Include imageOutput for image generation models
1990
+ imageOutput: textResult.imageOutput,
1991
+ evaluation: textResult.evaluation
1992
+ ? {
1993
+ ...textResult.evaluation,
1994
+ isOffTopic: textResult.evaluation
1995
+ .isOffTopic ?? false,
1996
+ alertSeverity: textResult.evaluation
1997
+ .alertSeverity ??
1998
+ "none",
1999
+ reasoning: textResult.evaluation
2000
+ .reasoning ?? "No evaluation provided",
2001
+ evaluationModel: textResult.evaluation
2002
+ .evaluationModel ?? "unknown",
2003
+ evaluationTime: textResult.evaluation
2004
+ .evaluationTime ?? Date.now(),
2005
+ // Include evaluationDomain from original options
2006
+ evaluationDomain: textResult.evaluation
2007
+ .evaluationDomain ??
2008
+ textOptions.evaluationDomain ??
2009
+ factoryResult.domainType,
2010
+ }
2011
+ : undefined,
2012
+ audio: textResult.audio,
2013
+ video: textResult.video,
2014
+ ppt: textResult.ppt,
2015
+ };
2016
+ // Accumulate session cost for budget tracking
2017
+ if (generateResult.analytics?.cost &&
2018
+ generateResult.analytics.cost > 0) {
2019
+ this._sessionCostUsd += generateResult.analytics.cost;
2020
+ }
2021
+ this.scheduleGenerateMem0Storage(options, originalPrompt, generateResult);
2022
+ // Set completion span attributes
2023
+ generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2024
+ generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2025
+ generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2026
+ generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2027
+ generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2028
+ generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2029
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2030
+ return generateResult;
2031
+ });
1838
2032
  }
1839
- // Detect and execute domain-specific tools
1840
- const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
1841
- // Update prompt with tool results if available
1842
- if (enhancedPrompt !== textOptions.prompt) {
1843
- textOptions.prompt = enhancedPrompt;
1844
- logger.debug("Enhanced prompt with tool results", {
1845
- originalLength: options.input.text.length,
1846
- enhancedLength: enhancedPrompt.length,
1847
- toolResults: toolResults.length,
2033
+ catch (error) {
2034
+ generateSpan.setStatus({
2035
+ code: SpanStatusCode.ERROR,
2036
+ message: error instanceof Error ? error.message : String(error),
1848
2037
  });
2038
+ throw error;
1849
2039
  }
1850
- // Use redesigned generation logic
1851
- const textResult = await this.generateTextInternal(textOptions);
1852
- // Emit generation completion event (NeuroLink format - enhanced with content)
1853
- this.emitter.emit("generation:end", {
1854
- provider: textResult.provider,
1855
- responseTime: Date.now() - startTime,
1856
- toolsUsed: textResult.toolsUsed,
1857
- timestamp: Date.now(),
1858
- result: textResult, // Enhanced: include full result
1859
- });
1860
- // ADD: Bedrock-compatible response:end event with content
1861
- this.emitter.emit("response:end", textResult.content || "");
1862
- // ADD: Bedrock-compatible message event
1863
- this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
1864
- // Convert back to GenerateResult
1865
- const generateResult = {
1866
- content: textResult.content,
1867
- finishReason: textResult.finishReason,
1868
- provider: textResult.provider,
1869
- model: textResult.model,
1870
- usage: textResult.usage
1871
- ? {
1872
- input: textResult.usage.input || 0,
1873
- output: textResult.usage.output || 0,
1874
- total: textResult.usage.total || 0,
1875
- }
1876
- : undefined,
1877
- responseTime: textResult.responseTime,
1878
- toolsUsed: textResult.toolsUsed,
1879
- toolExecutions: transformToolExecutions(textResult.toolExecutions),
1880
- enhancedWithTools: textResult.enhancedWithTools,
1881
- availableTools: transformAvailableTools(textResult.availableTools),
1882
- analytics: textResult.analytics,
1883
- // CRITICAL FIX: Include imageOutput for image generation models
1884
- imageOutput: textResult.imageOutput,
1885
- evaluation: textResult.evaluation
1886
- ? {
1887
- ...textResult.evaluation,
1888
- isOffTopic: textResult.evaluation
1889
- .isOffTopic ?? false,
1890
- alertSeverity: textResult.evaluation
1891
- .alertSeverity ??
1892
- "none",
1893
- reasoning: textResult.evaluation
1894
- .reasoning ?? "No evaluation provided",
1895
- evaluationModel: textResult.evaluation
1896
- .evaluationModel ?? "unknown",
1897
- evaluationTime: textResult.evaluation
1898
- .evaluationTime ?? Date.now(),
1899
- // Include evaluationDomain from original options
1900
- evaluationDomain: textResult.evaluation
1901
- .evaluationDomain ??
1902
- textOptions.evaluationDomain ??
1903
- factoryResult.domainType,
1904
- }
1905
- : undefined,
1906
- audio: textResult.audio,
1907
- video: textResult.video,
1908
- ppt: textResult.ppt,
1909
- };
1910
- // Accumulate session cost for budget tracking
1911
- if (generateResult.analytics?.cost && generateResult.analytics.cost > 0) {
1912
- this._sessionCostUsd += generateResult.analytics.cost;
2040
+ finally {
2041
+ generateSpan.end();
1913
2042
  }
1914
- this.scheduleGenerateMem0Storage(options, originalPrompt, generateResult);
1915
- return generateResult;
1916
2043
  });
1917
2044
  }
1918
2045
  /**
@@ -2243,148 +2370,235 @@ Current user's request: ${currentInput}`;
2243
2370
  * 5. Store conversation turn for future context
2244
2371
  */
2245
2372
  async generateTextInternal(options) {
2246
- const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2247
- const existingRequestId = options.context?.requestId;
2248
- const requestId = typeof existingRequestId === "string" && existingRequestId
2249
- ? existingRequestId
2250
- : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2251
- options.context = { ...options.context, requestId };
2252
- const generateInternalStartTime = Date.now();
2253
- const generateInternalHrTimeStart = process.hrtime.bigint();
2254
- const functionTag = "NeuroLink.generateTextInternal";
2255
- this.logGenerateTextInternalStart(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, options, functionTag);
2256
- this.emitGenerationStartEvents(options);
2257
- try {
2258
- await this.initializeConversationMemoryForGeneration(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart);
2259
- const mcpResult = await this.attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
2260
- if (mcpResult) {
2261
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS (MCP path)`, {
2262
- provider: mcpResult.provider,
2263
- model: mcpResult.model,
2264
- responseTimeMs: Date.now() - generateInternalStartTime,
2265
- tokensUsed: mcpResult.usage?.total || 0,
2266
- toolsUsed: mcpResult.toolsUsed?.length || 0,
2267
- ...(mcpResult.usage?.cacheCreationTokens !== undefined && {
2268
- cacheCreationTokens: mcpResult.usage.cacheCreationTokens,
2269
- }),
2270
- ...(mcpResult.usage?.cacheReadTokens !== undefined && {
2271
- cacheReadTokens: mcpResult.usage.cacheReadTokens,
2272
- }),
2273
- ...(mcpResult.usage?.cacheSavingsPercent !== undefined && {
2274
- cacheSavingsPercent: mcpResult.usage.cacheSavingsPercent,
2275
- }),
2276
- });
2277
- await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
2278
- this.emitter.emit("response:end", mcpResult.content || "");
2279
- return mcpResult;
2280
- }
2281
- if (options.abortSignal?.aborted) {
2282
- throw new DOMException("The operation was aborted", "AbortError");
2283
- }
2284
- const directResult = await this.directProviderGeneration(options);
2285
- logger.debug(`[${functionTag}] Direct generation successful`);
2286
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS`, {
2287
- provider: directResult.provider,
2288
- model: directResult.model,
2289
- responseTimeMs: Date.now() - generateInternalStartTime,
2290
- tokensUsed: directResult.usage?.total || 0,
2291
- toolsUsed: directResult.toolsUsed?.length || 0,
2292
- ...(directResult.usage?.cacheCreationTokens !== undefined && {
2293
- cacheCreationTokens: directResult.usage.cacheCreationTokens,
2294
- }),
2295
- ...(directResult.usage?.cacheReadTokens !== undefined && {
2296
- cacheReadTokens: directResult.usage.cacheReadTokens,
2297
- }),
2298
- ...(directResult.usage?.cacheSavingsPercent !== undefined && {
2299
- cacheSavingsPercent: directResult.usage.cacheSavingsPercent,
2300
- }),
2301
- });
2302
- await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
2303
- this.emitter.emit("response:end", directResult.content || "");
2304
- this.emitter.emit("message", `Text generation completed successfully`);
2305
- return directResult;
2306
- }
2307
- catch (error) {
2308
- // Check if this is a context overflow error - attempt recovery
2309
- if (isContextOverflowError(error) && this.conversationMemory) {
2310
- logger.warn(`[${functionTag}] Context overflow detected, attempting aggressive compaction`, {
2311
- error: error instanceof Error ? error.message : String(error),
2312
- });
2373
+ return tracers.sdk.startActiveSpan("neurolink.generateTextInternal", { kind: SpanKind.INTERNAL }, async (internalSpan) => {
2374
+ try {
2375
+ const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2376
+ const existingRequestId = options.context?.requestId;
2377
+ const requestId = typeof existingRequestId === "string" && existingRequestId
2378
+ ? existingRequestId
2379
+ : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2380
+ options.context = { ...options.context, requestId };
2381
+ const generateInternalStartTime = Date.now();
2382
+ const generateInternalHrTimeStart = process.hrtime.bigint();
2383
+ const functionTag = "NeuroLink.generateTextInternal";
2384
+ // Set span attributes for internal generation
2385
+ internalSpan.setAttribute("neurolink.request_id", requestId);
2386
+ internalSpan.setAttribute("neurolink.has_conversation_memory", !!this.conversationMemory);
2387
+ internalSpan.setAttribute("neurolink.provider", options.provider || "auto");
2388
+ internalSpan.setAttribute("neurolink.model", options.model || "default");
2389
+ this.logGenerateTextInternalStart(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, options, functionTag);
2390
+ this.emitGenerationStartEvents(options);
2313
2391
  try {
2314
- const conversationMessages = await getConversationMessages(this.conversationMemory, options);
2315
- // Calculate a meaningful compaction target from the model's budget
2316
- const recoveryBudget = checkContextBudget({
2317
- provider: options.provider || "openai",
2318
- model: options.model,
2319
- maxTokens: options.maxTokens,
2320
- currentPrompt: options.prompt,
2321
- systemPrompt: options.systemPrompt,
2322
- });
2323
- const compactionTarget = Math.floor(recoveryBudget.availableInputTokens * 0.7);
2324
- const compactor = new ContextCompactor({
2325
- enableSummarize: false, // Skip LLM call for recovery
2326
- truncationFraction: 0.75, // Aggressive truncation
2392
+ await this.initializeConversationMemoryForGeneration(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart);
2393
+ const mcpResult = await this.attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
2394
+ if (mcpResult) {
2395
+ logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS (MCP path)`, {
2396
+ provider: mcpResult.provider,
2397
+ model: mcpResult.model,
2398
+ responseTimeMs: Date.now() - generateInternalStartTime,
2399
+ tokensUsed: mcpResult.usage?.total || 0,
2400
+ toolsUsed: mcpResult.toolsUsed?.length || 0,
2401
+ ...(mcpResult.usage?.cacheCreationTokens !== undefined && {
2402
+ cacheCreationTokens: mcpResult.usage.cacheCreationTokens,
2403
+ }),
2404
+ ...(mcpResult.usage?.cacheReadTokens !== undefined && {
2405
+ cacheReadTokens: mcpResult.usage.cacheReadTokens,
2406
+ }),
2407
+ ...(mcpResult.usage?.cacheSavingsPercent !== undefined && {
2408
+ cacheSavingsPercent: mcpResult.usage.cacheSavingsPercent,
2409
+ }),
2410
+ });
2411
+ await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
2412
+ this.emitter.emit("response:end", mcpResult.content || "");
2413
+ internalSpan.setAttribute("neurolink.path", "mcp");
2414
+ internalSpan.setAttribute("neurolink.tokens.input", mcpResult.usage?.input || 0);
2415
+ internalSpan.setAttribute("neurolink.tokens.output", mcpResult.usage?.output || 0);
2416
+ internalSpan.setAttribute("neurolink.result_provider", mcpResult.provider || "unknown");
2417
+ internalSpan.setStatus({ code: SpanStatusCode.OK });
2418
+ return mcpResult;
2419
+ }
2420
+ if (options.abortSignal?.aborted) {
2421
+ throw new DOMException("The operation was aborted", "AbortError");
2422
+ }
2423
+ // Save original messages for smart overflow recovery (Solution 6)
2424
+ // directProviderGeneration may compact messages; if provider still rejects,
2425
+ // the catch block needs the originals for a more effective retry
2426
+ if (this.conversationMemory) {
2427
+ const originalMessages = await getConversationMessages(this.conversationMemory, options);
2428
+ options._originalConversationMessages = originalMessages
2429
+ ? [...originalMessages]
2430
+ : undefined;
2431
+ }
2432
+ const directResult = await this.directProviderGeneration(options);
2433
+ logger.debug(`[${functionTag}] Direct generation successful`);
2434
+ logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS`, {
2435
+ provider: directResult.provider,
2436
+ model: directResult.model,
2437
+ responseTimeMs: Date.now() - generateInternalStartTime,
2438
+ tokensUsed: directResult.usage?.total || 0,
2439
+ toolsUsed: directResult.toolsUsed?.length || 0,
2440
+ ...(directResult.usage?.cacheCreationTokens !== undefined && {
2441
+ cacheCreationTokens: directResult.usage.cacheCreationTokens,
2442
+ }),
2443
+ ...(directResult.usage?.cacheReadTokens !== undefined && {
2444
+ cacheReadTokens: directResult.usage.cacheReadTokens,
2445
+ }),
2446
+ ...(directResult.usage?.cacheSavingsPercent !== undefined && {
2447
+ cacheSavingsPercent: directResult.usage.cacheSavingsPercent,
2448
+ }),
2327
2449
  });
2328
- const compactionResult = await compactor.compact(conversationMessages, compactionTarget, undefined, options.context?.requestId);
2329
- if (compactionResult.compacted) {
2330
- const repairedResult = repairToolPairs(compactionResult.messages);
2331
- logger.info(`[${functionTag}] Aggressive compaction complete, retrying`, {
2332
- tokensSaved: compactionResult.tokensSaved,
2333
- compactionTarget,
2450
+ await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
2451
+ this.emitter.emit("response:end", directResult.content || "");
2452
+ this.emitter.emit("message", `Text generation completed successfully`);
2453
+ internalSpan.setAttribute("neurolink.path", "direct");
2454
+ internalSpan.setAttribute("neurolink.tokens.input", directResult.usage?.input || 0);
2455
+ internalSpan.setAttribute("neurolink.tokens.output", directResult.usage?.output || 0);
2456
+ internalSpan.setAttribute("neurolink.result_provider", directResult.provider || "unknown");
2457
+ internalSpan.setStatus({ code: SpanStatusCode.OK });
2458
+ return directResult;
2459
+ }
2460
+ catch (error) {
2461
+ // Check if this is a context overflow error - attempt recovery
2462
+ if (isContextOverflowError(error) && this.conversationMemory) {
2463
+ logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
2464
+ error: error instanceof Error ? error.message : String(error),
2465
+ overflowProvider: getContextOverflowProvider(error),
2334
2466
  });
2335
- // Retry with compacted context - pass compacted messages to avoid re-fetching
2336
- return await this.directProviderGeneration({
2337
- ...options,
2338
- conversationMessages: repairedResult.messages,
2467
+ try {
2468
+ // IMPROVEMENT 1: Extract actual token count from provider error if available
2469
+ const actualOverflow = parseProviderOverflowDetails(error);
2470
+ // IMPROVEMENT 2: Use ORIGINAL messages (not already-compacted ones)
2471
+ const originalMessages = options._originalConversationMessages ??
2472
+ (await getConversationMessages(this.conversationMemory, options));
2473
+ // IMPROVEMENT 3: Calculate precise reduction target
2474
+ const recoveryBudget = checkContextBudget({
2475
+ provider: options.provider || "openai",
2476
+ model: options.model,
2477
+ maxTokens: options.maxTokens,
2478
+ currentPrompt: options.prompt,
2479
+ systemPrompt: options.systemPrompt,
2480
+ });
2481
+ // Use provider's reported token count if available (more accurate than our estimate)
2482
+ const actualTokens = actualOverflow?.actualTokens ??
2483
+ recoveryBudget.estimatedInputTokens;
2484
+ const budgetTokens = actualOverflow?.budgetTokens ??
2485
+ recoveryBudget.availableInputTokens;
2486
+ // Target = 70% of budget (aggressive safety margin for recovery)
2487
+ const compactionTarget = Math.floor(budgetTokens * 0.7);
2488
+ // IMPROVEMENT 4: Calculate adaptive truncation fraction from actual numbers
2489
+ const requiredReduction = actualTokens > 0
2490
+ ? (actualTokens - compactionTarget) / actualTokens
2491
+ : 0.5;
2492
+ const compactor = new ContextCompactor({
2493
+ enableSummarize: false, // Skip LLM call for recovery (speed)
2494
+ enablePrune: true,
2495
+ enableDeduplicate: true,
2496
+ enableTruncate: true,
2497
+ truncationFraction: Math.min(0.9, requiredReduction + 0.15),
2498
+ });
2499
+ const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
2500
+ if (compactionResult.compacted) {
2501
+ const repairedResult = repairToolPairs(compactionResult.messages);
2502
+ // IMPROVEMENT 5: Verify BEFORE retrying
2503
+ const verifyBudget = checkContextBudget({
2504
+ provider: options.provider || "openai",
2505
+ model: options.model,
2506
+ maxTokens: options.maxTokens,
2507
+ systemPrompt: options.systemPrompt,
2508
+ currentPrompt: options.prompt,
2509
+ conversationMessages: repairedResult.messages,
2510
+ });
2511
+ if (!verifyBudget.withinBudget) {
2512
+ logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
2513
+ estimatedTokens: verifyBudget.estimatedInputTokens,
2514
+ availableTokens: verifyBudget.availableInputTokens,
2515
+ });
2516
+ throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
2517
+ `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
2518
+ `but budget is ${budgetTokens} tokens.`, {
2519
+ estimatedTokens: compactionResult.tokensAfter,
2520
+ availableTokens: budgetTokens,
2521
+ stagesUsed: compactionResult.stagesUsed,
2522
+ breakdown: verifyBudget.breakdown,
2523
+ });
2524
+ }
2525
+ logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
2526
+ tokensSaved: compactionResult.tokensSaved,
2527
+ compactionTarget,
2528
+ verifiedTokens: verifyBudget.estimatedInputTokens,
2529
+ verifiedBudget: verifyBudget.availableInputTokens,
2530
+ });
2531
+ // Single verified retry
2532
+ return await this.directProviderGeneration({
2533
+ ...options,
2534
+ conversationMessages: repairedResult.messages,
2535
+ });
2536
+ }
2537
+ }
2538
+ catch (retryError) {
2539
+ // If the retry error is our own ContextBudgetExceededError, re-throw it
2540
+ if (retryError instanceof ContextBudgetExceededError) {
2541
+ throw retryError;
2542
+ }
2543
+ logger.error(`[${functionTag}] Recovery attempt failed`, {
2544
+ error: retryError instanceof Error
2545
+ ? retryError.message
2546
+ : String(retryError),
2547
+ });
2548
+ }
2549
+ }
2550
+ // If the generation was aborted (e.g., coding task short-circuit via AbortController),
2551
+ // still store the conversation turn so that:
2552
+ // 1. The Redis conversation entry is created (if first turn)
2553
+ // 2. setImmediate triggers generateConversationTitle() for the session
2554
+ // 3. The caller's syncTitleFromRedis() can find the SDK-generated title
2555
+ if (isAbortError(error)) {
2556
+ logger.info(`[${functionTag}] Generation aborted — storing conversation turn for title generation`, {
2557
+ hasMemory: !!this.conversationMemory,
2558
+ memoryType: this.conversationMemory?.constructor?.name || "NONE",
2559
+ sessionId: options.context?.sessionId ||
2560
+ "unknown",
2339
2561
  });
2562
+ try {
2563
+ const abortedResult = {
2564
+ content: "[generation was interrupted]",
2565
+ provider: options.provider || "unknown",
2566
+ model: options.model || "unknown",
2567
+ responseTime: Date.now() - generateInternalStartTime,
2568
+ };
2569
+ await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(generateInternalStartTime), requestId), 5000);
2570
+ }
2571
+ catch (storeError) {
2572
+ logger.warn(`[${functionTag}] Failed to store conversation turn after abort`, {
2573
+ error: storeError instanceof Error
2574
+ ? storeError.message
2575
+ : String(storeError),
2576
+ });
2577
+ }
2340
2578
  }
2341
- }
2342
- catch (retryError) {
2343
- logger.error(`[${functionTag}] Recovery attempt also failed`, {
2344
- error: retryError instanceof Error
2345
- ? retryError.message
2346
- : String(retryError),
2347
- });
2579
+ else {
2580
+ logger.error(`[${functionTag}] All generation methods failed`, {
2581
+ error: error instanceof Error ? error.message : String(error),
2582
+ });
2583
+ }
2584
+ this.emitter.emit("response:end", "");
2585
+ this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
2586
+ throw error;
2348
2587
  }
2349
2588
  }
2350
- // If the generation was aborted (e.g., coding task short-circuit via AbortController),
2351
- // still store the conversation turn so that:
2352
- // 1. The Redis conversation entry is created (if first turn)
2353
- // 2. setImmediate triggers generateConversationTitle() for the session
2354
- // 3. The caller's syncTitleFromRedis() can find the SDK-generated title
2355
- if (isAbortError(error)) {
2356
- logger.info(`[${functionTag}] Generation aborted — storing conversation turn for title generation`, {
2357
- hasMemory: !!this.conversationMemory,
2358
- memoryType: this.conversationMemory?.constructor?.name || "NONE",
2359
- sessionId: options.context?.sessionId ||
2360
- "unknown",
2589
+ catch (spanError) {
2590
+ internalSpan.setStatus({
2591
+ code: SpanStatusCode.ERROR,
2592
+ message: spanError instanceof Error
2593
+ ? spanError.message
2594
+ : String(spanError),
2361
2595
  });
2362
- try {
2363
- const abortedResult = {
2364
- content: "[generation was interrupted]",
2365
- provider: options.provider || "unknown",
2366
- model: options.model || "unknown",
2367
- responseTime: Date.now() - generateInternalStartTime,
2368
- };
2369
- await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(generateInternalStartTime), requestId), 5000);
2370
- }
2371
- catch (storeError) {
2372
- logger.warn(`[${functionTag}] Failed to store conversation turn after abort`, {
2373
- error: storeError instanceof Error
2374
- ? storeError.message
2375
- : String(storeError),
2376
- });
2377
- }
2596
+ throw spanError;
2378
2597
  }
2379
- else {
2380
- logger.error(`[${functionTag}] All generation methods failed`, {
2381
- error: error instanceof Error ? error.message : String(error),
2382
- });
2598
+ finally {
2599
+ internalSpan.end();
2383
2600
  }
2384
- this.emitter.emit("response:end", "");
2385
- this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
2386
- throw error;
2387
- }
2601
+ });
2388
2602
  }
2389
2603
  /**
2390
2604
  * Log generateTextInternal start with comprehensive analysis
@@ -2517,6 +2731,7 @@ Current user's request: ${currentInput}`;
2517
2731
  error.message.includes("Model tried to call unavailable tool"));
2518
2732
  const isNonRetryable = isContextOverflowError(error) ||
2519
2733
  isToolError ||
2734
+ isNonRetryableProviderError(error) ||
2520
2735
  (error instanceof Error &&
2521
2736
  error.isRetryable ===
2522
2737
  false) ||
@@ -2616,12 +2831,15 @@ Current user's request: ${currentInput}`;
2616
2831
  originalPromptLength: options.systemPrompt?.length || 0,
2617
2832
  enhancedPromptLength: enhancedSystemPrompt.length,
2618
2833
  skippedToolInjection: !!options.skipToolPromptInjection,
2619
- enhancedPromptPreview: enhancedSystemPrompt.substring(0, 500) + "...",
2834
+ enhancedPromptPreview: enhancedSystemPrompt.substring(0, 80) + "...",
2620
2835
  });
2621
- logger.debug("[Observability] Full system prompt", {
2836
+ logger.debug("[Observability] System prompt metadata", {
2622
2837
  requestId,
2623
2838
  systemPromptLength: enhancedSystemPrompt.length,
2624
- systemPrompt: enhancedSystemPrompt,
2839
+ systemPromptHash: enhancedSystemPrompt.length > 0
2840
+ ? `sha256:${enhancedSystemPrompt.slice(0, 8)}...`
2841
+ : "empty",
2842
+ hasCustomSystemPrompt: !!options.systemPrompt,
2625
2843
  });
2626
2844
  // Get conversation messages for context
2627
2845
  let conversationMessages = await getConversationMessages(this.conversationMemory, options);
@@ -2708,8 +2926,51 @@ Current user's request: ${currentInput}`;
2708
2926
  tokensSaved: compactionResult.tokensSaved,
2709
2927
  });
2710
2928
  }
2929
+ // POST-COMPACTION BUDGET RE-CHECK (BUG-003 fix)
2930
+ const postCompactBudget = checkContextBudget({
2931
+ provider: providerName,
2932
+ model: options.model,
2933
+ maxTokens: options.maxTokens,
2934
+ systemPrompt: enhancedSystemPrompt,
2935
+ conversationMessages: conversationMessages,
2936
+ currentPrompt: options.prompt,
2937
+ toolDefinitions: availableTools,
2938
+ });
2939
+ if (!postCompactBudget.withinBudget) {
2940
+ const overageRatio = postCompactBudget.usageRatio - 1.0;
2941
+ logger.warn("[NeuroLink] Post-compaction still over budget, attempting emergency content truncation", {
2942
+ requestId,
2943
+ estimatedTokens: postCompactBudget.estimatedInputTokens,
2944
+ availableTokens: postCompactBudget.availableInputTokens,
2945
+ overagePercent: Math.round(overageRatio * 100),
2946
+ stagesUsedInCompaction: compactionResult.stagesUsed,
2947
+ });
2948
+ // Emergency: truncate the content of the longest messages
2949
+ conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
2950
+ // Final check after emergency truncation
2951
+ const finalBudget = checkContextBudget({
2952
+ provider: providerName,
2953
+ model: options.model,
2954
+ maxTokens: options.maxTokens,
2955
+ systemPrompt: enhancedSystemPrompt,
2956
+ conversationMessages: conversationMessages,
2957
+ currentPrompt: options.prompt,
2958
+ toolDefinitions: availableTools,
2959
+ });
2960
+ if (!finalBudget.withinBudget) {
2961
+ throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
2962
+ `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
2963
+ `Budget: ${finalBudget.availableInputTokens} tokens. ` +
2964
+ `Conversation is too large to fit in the model's context window.`, {
2965
+ estimatedTokens: finalBudget.estimatedInputTokens,
2966
+ availableTokens: finalBudget.availableInputTokens,
2967
+ stagesUsed: compactionResult.stagesUsed,
2968
+ breakdown: finalBudget.breakdown,
2969
+ });
2970
+ }
2971
+ }
2711
2972
  }
2712
- // Create provider and generate
2973
+ // Create provider and generate (with confidence that context fits)
2713
2974
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
2714
2975
  this, // Pass SDK instance
2715
2976
  options.region);
@@ -2882,6 +3143,47 @@ Current user's request: ${currentInput}`;
2882
3143
  const repairedResult = repairToolPairs(compactionResult.messages);
2883
3144
  conversationMessages = repairedResult.messages;
2884
3145
  }
3146
+ // POST-COMPACTION BUDGET RE-CHECK (BUG-003 fix)
3147
+ const postCompactBudget = checkContextBudget({
3148
+ provider: providerName,
3149
+ model: options.model,
3150
+ maxTokens: options.maxTokens,
3151
+ systemPrompt: options.systemPrompt,
3152
+ conversationMessages: conversationMessages,
3153
+ currentPrompt: options.prompt,
3154
+ toolDefinitions: options.tools
3155
+ ? Object.values(options.tools)
3156
+ : undefined,
3157
+ });
3158
+ if (!postCompactBudget.withinBudget) {
3159
+ logger.warn("[NeuroLink] directProviderGeneration: post-compaction still over budget, emergency truncation", {
3160
+ estimatedTokens: postCompactBudget.estimatedInputTokens,
3161
+ availableTokens: postCompactBudget.availableInputTokens,
3162
+ overagePercent: Math.round((postCompactBudget.usageRatio - 1.0) * 100),
3163
+ });
3164
+ conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
3165
+ const finalBudget = checkContextBudget({
3166
+ provider: providerName,
3167
+ model: options.model,
3168
+ maxTokens: options.maxTokens,
3169
+ systemPrompt: options.systemPrompt,
3170
+ conversationMessages: conversationMessages,
3171
+ currentPrompt: options.prompt,
3172
+ toolDefinitions: options.tools
3173
+ ? Object.values(options.tools)
3174
+ : undefined,
3175
+ });
3176
+ if (!finalBudget.withinBudget) {
3177
+ throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3178
+ `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3179
+ `Budget: ${finalBudget.availableInputTokens} tokens.`, {
3180
+ estimatedTokens: finalBudget.estimatedInputTokens,
3181
+ availableTokens: finalBudget.availableInputTokens,
3182
+ stagesUsed: compactionResult.stagesUsed,
3183
+ breakdown: finalBudget.breakdown,
3184
+ });
3185
+ }
3186
+ }
2885
3187
  }
2886
3188
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
2887
3189
  this, // Pass SDK instance
@@ -2930,6 +3232,16 @@ Current user's request: ${currentInput}`;
2930
3232
  logger.debug(`[${functionTag}] AbortError detected on provider ${providerName}, stopping fallback`);
2931
3233
  throw error;
2932
3234
  }
3235
+ // Circuit breaker for non-retryable errors (model not found, auth failed, etc.)
3236
+ // These errors are permanent — retrying with the same config will always fail
3237
+ // and wastes tokens/latency (e.g., 6 retries of 418KB = ~628K wasted tokens)
3238
+ if (isNonRetryableProviderError(error)) {
3239
+ logger.warn(`[${functionTag}] Non-retryable error from provider ${providerName}, stopping fallback chain`, {
3240
+ error: error instanceof Error ? error.message : String(error),
3241
+ errorType: error instanceof Error ? error.constructor.name : typeof error,
3242
+ });
3243
+ throw error instanceof Error ? error : new Error(String(error));
3244
+ }
2933
3245
  lastError = error instanceof Error ? error : new Error(String(error));
2934
3246
  logger.warn(`[${functionTag}] Provider ${providerName} failed`, {
2935
3247
  error: lastError.message,
@@ -3109,92 +3421,206 @@ Current user's request: ${currentInput}`;
3109
3421
  * @throws {Error} When conversation memory operations fail (if enabled)
3110
3422
  */
3111
3423
  async stream(options) {
3112
- const startTime = Date.now();
3113
- const hrTimeStart = process.hrtime.bigint();
3114
- const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3115
- const originalPrompt = options.input.text; // Store the original prompt for memory storage
3116
- // Inject file registry for lazy on-demand file processing
3117
- options.fileRegistry = this.fileRegistry;
3118
- await this.validateStreamInput(options);
3119
- this.emitStreamStartEvents(options, startTime);
3120
- // Check if workflow is requested
3121
- if (options.workflow || options.workflowConfig) {
3122
- return await this.streamWithWorkflow(options, startTime);
3123
- }
3124
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
3125
- return await this.setLangfuseContextFromOptions(options, async () => {
3126
- try {
3127
- // Prepare options: init memory, MCP, Mem0, orchestration, Ollama auto-disable, tool detection
3128
- const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
3129
- const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
3130
- let accumulatedContent = "";
3131
- let chunkCount = 0;
3132
- // Set up event capture listeners
3133
- const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
3134
- const metadata = {
3135
- fallbackAttempted: false,
3136
- guardrailsBlocked: false,
3137
- error: undefined,
3138
- };
3139
- const self = this;
3140
- const processedStream = (async function* () {
3141
- try {
3142
- for await (const chunk of mcpStream) {
3143
- chunkCount++;
3144
- if (chunk &&
3145
- "content" in chunk &&
3146
- typeof chunk.content === "string") {
3147
- accumulatedContent += chunk.content;
3148
- self.emitter.emit("response:chunk", chunk.content);
3424
+ // Manual span lifecycle: the span must stay open until the stream is fully consumed,
3425
+ // NOT when the StreamResult object is returned. withSpan would end the span too early
3426
+ // because streaming results resolve lazily via the async generator.
3427
+ const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
3428
+ kind: SpanKind.INTERNAL,
3429
+ attributes: {
3430
+ [ATTR.NL_PROVIDER]: options.provider || "default",
3431
+ [ATTR.GEN_AI_MODEL]: options.model || "default",
3432
+ [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
3433
+ [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
3434
+ [ATTR.NL_STREAM_MODE]: true,
3435
+ },
3436
+ });
3437
+ const spanStartTime = Date.now();
3438
+ try {
3439
+ const startTime = Date.now();
3440
+ const hrTimeStart = process.hrtime.bigint();
3441
+ const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3442
+ const originalPrompt = options.input.text; // Store the original prompt for memory storage
3443
+ // Inject file registry for lazy on-demand file processing
3444
+ options.fileRegistry = this.fileRegistry;
3445
+ await this.validateStreamInput(options);
3446
+ // Check budget limit before making API call
3447
+ if (options.maxBudgetUsd !== undefined &&
3448
+ options.maxBudgetUsd > 0 &&
3449
+ this._sessionCostUsd >= options.maxBudgetUsd) {
3450
+ throw new NeuroLinkError({
3451
+ code: "SESSION_BUDGET_EXCEEDED",
3452
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
3453
+ category: ErrorCategory.VALIDATION,
3454
+ severity: ErrorSeverity.HIGH,
3455
+ retriable: false,
3456
+ context: {
3457
+ spent: this._sessionCostUsd,
3458
+ limit: options.maxBudgetUsd,
3459
+ },
3460
+ });
3461
+ }
3462
+ this.emitStreamStartEvents(options, startTime);
3463
+ // Check if workflow is requested
3464
+ if (options.workflow || options.workflowConfig) {
3465
+ const result = await this.streamWithWorkflow(options, startTime);
3466
+ streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3467
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
3468
+ streamSpan.end();
3469
+ return result;
3470
+ }
3471
+ // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
3472
+ return await this.setLangfuseContextFromOptions(options, async () => {
3473
+ try {
3474
+ // Prepare options: init memory, MCP, Mem0, orchestration, Ollama auto-disable, tool detection
3475
+ const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
3476
+ const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
3477
+ // Update span with resolved provider name
3478
+ streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
3479
+ let accumulatedContent = "";
3480
+ let chunkCount = 0;
3481
+ // Set up event capture listeners
3482
+ const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
3483
+ const metadata = {
3484
+ fallbackAttempted: false,
3485
+ guardrailsBlocked: false,
3486
+ error: undefined,
3487
+ };
3488
+ const self = this;
3489
+ const streamStartTime = Date.now();
3490
+ const sessionId = enhancedOptions.context
3491
+ ?.sessionId;
3492
+ const processedStream = (async function* () {
3493
+ let streamError = undefined;
3494
+ try {
3495
+ for await (const chunk of mcpStream) {
3496
+ chunkCount++;
3497
+ if (chunk &&
3498
+ "content" in chunk &&
3499
+ typeof chunk.content === "string") {
3500
+ accumulatedContent += chunk.content;
3501
+ self.emitter.emit("response:chunk", chunk.content);
3502
+ // Emit stream:chunk event (Observability Solution 8)
3503
+ self.emitter.emit("stream:chunk", {
3504
+ type: "stream:chunk",
3505
+ content: chunk.content,
3506
+ metadata: {
3507
+ chunkIndex: chunkCount,
3508
+ totalLength: accumulatedContent.length,
3509
+ },
3510
+ timestamp: Date.now(),
3511
+ });
3512
+ }
3513
+ yield chunk;
3514
+ }
3515
+ if (chunkCount === 0 && !metadata.fallbackAttempted) {
3516
+ yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
3517
+ accumulatedContent += content;
3518
+ });
3149
3519
  }
3150
- yield chunk;
3520
+ // Emit stream:complete event (Observability Solution 8)
3521
+ self.emitter.emit("stream:complete", {
3522
+ type: "stream:complete",
3523
+ content: accumulatedContent,
3524
+ metadata: {
3525
+ chunkCount,
3526
+ totalLength: accumulatedContent.length,
3527
+ durationMs: Date.now() - streamStartTime,
3528
+ sessionId,
3529
+ },
3530
+ timestamp: Date.now(),
3531
+ });
3151
3532
  }
3152
- if (chunkCount === 0 && !metadata.fallbackAttempted) {
3153
- yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
3154
- accumulatedContent += content;
3533
+ catch (error) {
3534
+ streamError = error;
3535
+ // Emit stream:error event (Observability Solution 8)
3536
+ self.emitter.emit("stream:error", {
3537
+ type: "stream:error",
3538
+ content: error instanceof Error ? error.message : String(error),
3539
+ metadata: {
3540
+ chunkCount,
3541
+ totalLength: accumulatedContent.length,
3542
+ durationMs: Date.now() - streamStartTime,
3543
+ errorName: error instanceof Error ? error.name : "UnknownError",
3544
+ sessionId,
3545
+ },
3546
+ timestamp: Date.now(),
3155
3547
  });
3548
+ throw error;
3156
3549
  }
3157
- }
3158
- finally {
3159
- cleanupListeners();
3160
- if (accumulatedContent.trim()) {
3161
- logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
3162
- provider: providerName,
3163
- model: enhancedOptions.model,
3164
- responseTimeMs: Date.now() - startTime,
3165
- contentLength: accumulatedContent.length,
3166
- fallback: metadata.fallbackAttempted,
3550
+ finally {
3551
+ cleanupListeners();
3552
+ // Finalize span now that the stream is fully consumed
3553
+ streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3554
+ streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
3555
+ streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, metadata.error || streamError ? "error" : "stop");
3556
+ if (metadata.error || streamError) {
3557
+ streamSpan.setStatus({
3558
+ code: SpanStatusCode.ERROR,
3559
+ message: metadata.error ||
3560
+ (streamError instanceof Error
3561
+ ? streamError.message
3562
+ : String(streamError)),
3563
+ });
3564
+ }
3565
+ else {
3566
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
3567
+ }
3568
+ streamSpan.end();
3569
+ if (accumulatedContent.trim()) {
3570
+ logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
3571
+ provider: providerName,
3572
+ model: enhancedOptions.model,
3573
+ responseTimeMs: Date.now() - startTime,
3574
+ contentLength: accumulatedContent.length,
3575
+ fallback: metadata.fallbackAttempted,
3576
+ });
3577
+ }
3578
+ await self.storeStreamConversationMemory({
3579
+ enhancedOptions,
3580
+ providerName,
3581
+ originalPrompt,
3582
+ accumulatedContent,
3583
+ startTime,
3584
+ eventSequence,
3167
3585
  });
3168
3586
  }
3169
- await self.storeStreamConversationMemory({
3170
- enhancedOptions,
3171
- providerName,
3172
- originalPrompt,
3173
- accumulatedContent,
3174
- startTime,
3175
- eventSequence,
3176
- });
3587
+ })();
3588
+ const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
3589
+ const responseTime = Date.now() - startTime;
3590
+ // Accumulate session cost for budget tracking
3591
+ if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
3592
+ this._sessionCostUsd += streamResult.analytics.cost;
3177
3593
  }
3178
- })();
3179
- const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
3180
- const responseTime = Date.now() - startTime;
3181
- this.emitStreamEndEvents(streamResult);
3182
- return this.createStreamResponse(streamResult, processedStream, {
3183
- providerName,
3184
- options,
3185
- startTime,
3186
- responseTime,
3187
- streamId,
3188
- fallback: metadata.fallbackAttempted,
3189
- guardrailsBlocked: metadata.guardrailsBlocked,
3190
- error: metadata.error,
3191
- events: eventSequence,
3192
- });
3193
- }
3194
- catch (error) {
3195
- return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
3594
+ this.emitStreamEndEvents(streamResult);
3595
+ return this.createStreamResponse(streamResult, processedStream, {
3596
+ providerName,
3597
+ options,
3598
+ startTime,
3599
+ responseTime,
3600
+ streamId,
3601
+ fallback: metadata.fallbackAttempted,
3602
+ guardrailsBlocked: metadata.guardrailsBlocked,
3603
+ error: metadata.error,
3604
+ events: eventSequence,
3605
+ });
3606
+ }
3607
+ catch (error) {
3608
+ return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
3609
+ }
3610
+ });
3611
+ }
3612
+ catch (error) {
3613
+ // End span on error before re-throwing
3614
+ streamSpan.setStatus({
3615
+ code: SpanStatusCode.ERROR,
3616
+ message: error instanceof Error ? error.message : String(error),
3617
+ });
3618
+ if (error instanceof Error) {
3619
+ streamSpan.recordException(error);
3196
3620
  }
3197
- });
3621
+ streamSpan.end();
3622
+ throw error;
3623
+ }
3198
3624
  }
3199
3625
  /**
3200
3626
  * Prepare stream options: initialize memory, MCP, Mem0 retrieval, orchestration,
@@ -3264,6 +3690,39 @@ Current user's request: ${currentInput}`;
3264
3690
  }
3265
3691
  // Auto-disable tools for Ollama models that don't support them
3266
3692
  await this.autoDisableOllamaStreamTools(options);
3693
+ // RAG Integration: If rag config is provided, prepare the RAG search tool
3694
+ if (options.rag?.files?.length) {
3695
+ try {
3696
+ const { prepareRAGTool } = await import("./rag/ragIntegration.js");
3697
+ const ragResult = await prepareRAGTool(options.rag, options.provider);
3698
+ // Inject the RAG tool into the tools record
3699
+ if (!options.tools) {
3700
+ options.tools = {};
3701
+ }
3702
+ options.tools[ragResult.toolName] =
3703
+ ragResult.tool;
3704
+ // Inject RAG-aware system prompt so the AI uses the RAG tool first
3705
+ const ragSystemInstruction = [
3706
+ `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
3707
+ `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
3708
+ `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
3709
+ `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
3710
+ `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
3711
+ ].join(" ");
3712
+ options.systemPrompt =
3713
+ (options.systemPrompt || "") + ragSystemInstruction;
3714
+ logger.info("[RAG] Tool injected into stream()", {
3715
+ toolName: ragResult.toolName,
3716
+ filesLoaded: ragResult.filesLoaded,
3717
+ chunksIndexed: ragResult.chunksIndexed,
3718
+ });
3719
+ }
3720
+ catch (error) {
3721
+ logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
3722
+ error: error instanceof Error ? error.message : String(error),
3723
+ });
3724
+ }
3725
+ }
3267
3726
  const factoryResult = processStreamingFactoryOptions(options);
3268
3727
  const enhancedOptions = createCleanStreamOptions(options);
3269
3728
  if (options.input?.text) {
@@ -4368,186 +4827,242 @@ Current user's request: ${currentInput}`;
4368
4827
  async executeTool(toolName, params = {}, options) {
4369
4828
  const functionTag = "NeuroLink.executeTool";
4370
4829
  const executionStartTime = Date.now();
4371
- // Debug: Log tool execution attempt
4372
- logger.debug(`[${functionTag}] Tool execution requested:`, {
4373
- toolName,
4374
- params: isNonNullObject(params)
4375
- ? transformParamsForLogging(params)
4376
- : params,
4377
- hasExternalManager: !!this.externalServerManager,
4378
- });
4379
- // 🔧 PARAMETER TRACE: Log tool execution details for debugging
4380
- logger.debug(`Tool execution detailed analysis`, {
4381
- toolName,
4382
- executionStartTime,
4383
- paramsAnalysis: {
4384
- type: typeof params,
4385
- isNull: params === null,
4386
- isUndefined: params === undefined,
4387
- isEmpty: params &&
4388
- typeof params === "object" &&
4389
- Object.keys(params).length === 0,
4390
- keys: params && typeof params === "object"
4391
- ? Object.keys(params)
4392
- : "NOT_OBJECT",
4393
- keysLength: params && typeof params === "object"
4394
- ? Object.keys(params).length
4395
- : 0,
4830
+ // Determine tool type for span attributes
4831
+ const externalTools = this.externalServerManager.getAllTools();
4832
+ const externalTool = externalTools.find((tool) => tool.name === toolName);
4833
+ const toolType = externalTool
4834
+ ? "mcp"
4835
+ : this.getCustomTools().has(toolName)
4836
+ ? "custom"
4837
+ : "external";
4838
+ // Compute truncated input size for the span
4839
+ const inputStr = typeof params === "string"
4840
+ ? params
4841
+ : params
4842
+ ? JSON.stringify(params)
4843
+ : "";
4844
+ const inputSize = inputStr.length;
4845
+ const truncatedInput = inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr;
4846
+ return tracers.mcp.startActiveSpan("neurolink.tool.execute", {
4847
+ attributes: {
4848
+ "tool.name": toolName,
4849
+ "tool.type": toolType,
4850
+ "tool.input_size": inputSize,
4851
+ "tool.input_preview": truncatedInput,
4396
4852
  },
4397
- isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
4398
- options,
4399
- hasExternalManager: !!this.externalServerManager,
4400
- });
4401
- // Emit tool start event (NeuroLink format - keep existing)
4402
- this.emitter.emit("tool:start", {
4403
- toolName,
4404
- timestamp: executionStartTime,
4405
- input: params, // Enhanced: add input parameters
4406
- });
4407
- // ADD: Bedrock-compatible tool:start event (positional parameters)
4408
- this.emitter.emit("tool:start", toolName, params);
4409
- // Set default options
4410
- const finalOptions = {
4411
- timeout: options?.timeout || TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS, // 30 second default timeout
4412
- maxRetries: options?.maxRetries || RETRY_ATTEMPTS.DEFAULT, // Default 2 retries for retriable errors
4413
- retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS, // 1 second delay between retries
4414
- authContext: options?.authContext, // Pass through authentication context
4415
- };
4416
- // Track memory usage for tool execution
4417
- const { MemoryManager } = await import("./utils/performance.js");
4418
- const startMemory = MemoryManager.getMemoryUsageMB();
4419
- // Get or create circuit breaker for this tool
4420
- if (!this.toolCircuitBreakers.has(toolName)) {
4421
- this.toolCircuitBreakers.set(toolName, new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS));
4422
- }
4423
- const circuitBreaker = this.toolCircuitBreakers.get(toolName);
4424
- // Initialize metrics for this tool if not exists
4425
- if (!this.toolExecutionMetrics.has(toolName)) {
4426
- this.toolExecutionMetrics.set(toolName, {
4427
- totalExecutions: 0,
4428
- successfulExecutions: 0,
4429
- failedExecutions: 0,
4430
- averageExecutionTime: 0,
4431
- lastExecutionTime: 0,
4432
- });
4433
- }
4434
- const metrics = this.toolExecutionMetrics.get(toolName);
4435
- if (metrics) {
4436
- metrics.totalExecutions++;
4437
- }
4438
- try {
4439
- mcpLogger.debug(`[${functionTag}] Executing tool: ${toolName}`, {
4440
- toolName,
4441
- params,
4442
- options: finalOptions,
4443
- circuitBreakerState: circuitBreaker?.getState(),
4444
- });
4445
- // Execute with circuit breaker, timeout, and retry logic
4446
- if (!circuitBreaker) {
4447
- throw new Error(`Circuit breaker not initialized for tool: ${toolName}`);
4448
- }
4449
- const result = await circuitBreaker.execute(async () => {
4450
- return await withRetry(async () => {
4451
- return await withTimeout(this.executeToolInternal(toolName, params, finalOptions), finalOptions.timeout, ErrorFactory.toolTimeout(toolName, finalOptions.timeout));
4452
- }, {
4453
- maxAttempts: finalOptions.maxRetries + 1, // +1 for initial attempt
4454
- delayMs: finalOptions.retryDelayMs,
4455
- isRetriable: isRetriableError,
4456
- onRetry: (attempt, error) => {
4457
- mcpLogger.warn(`[${functionTag}] Retrying tool execution (attempt ${attempt})`, {
4458
- toolName,
4459
- error: error.message,
4460
- attempt,
4461
- });
4853
+ }, async (toolSpan) => {
4854
+ try {
4855
+ // Debug: Log tool execution attempt
4856
+ logger.debug(`[${functionTag}] Tool execution requested:`, {
4857
+ toolName,
4858
+ params: isNonNullObject(params)
4859
+ ? transformParamsForLogging(params)
4860
+ : params,
4861
+ hasExternalManager: !!this.externalServerManager,
4862
+ });
4863
+ // 🔧 PARAMETER TRACE: Log tool execution details for debugging
4864
+ logger.debug(`Tool execution detailed analysis`, {
4865
+ toolName,
4866
+ executionStartTime,
4867
+ paramsAnalysis: {
4868
+ type: typeof params,
4869
+ isNull: params === null,
4870
+ isUndefined: params === undefined,
4871
+ isEmpty: params &&
4872
+ typeof params === "object" &&
4873
+ Object.keys(params).length === 0,
4874
+ keys: params && typeof params === "object"
4875
+ ? Object.keys(params)
4876
+ : "NOT_OBJECT",
4877
+ keysLength: params && typeof params === "object"
4878
+ ? Object.keys(params).length
4879
+ : 0,
4462
4880
  },
4881
+ isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
4882
+ options,
4883
+ hasExternalManager: !!this.externalServerManager,
4463
4884
  });
4464
- });
4465
- // Update success metrics
4466
- const executionTime = Date.now() - executionStartTime;
4467
- if (metrics) {
4468
- metrics.successfulExecutions++;
4469
- metrics.lastExecutionTime = executionTime;
4470
- metrics.averageExecutionTime =
4471
- (metrics.averageExecutionTime * (metrics.successfulExecutions - 1) +
4472
- executionTime) /
4473
- metrics.successfulExecutions;
4474
- }
4475
- // Track memory usage
4476
- const endMemory = MemoryManager.getMemoryUsageMB();
4477
- const memoryDelta = endMemory.heapUsed - startMemory.heapUsed;
4478
- if (memoryDelta > 20) {
4479
- mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
4885
+ // Emit tool start event (NeuroLink format - keep existing)
4886
+ this.emitter.emit("tool:start", {
4480
4887
  toolName,
4481
- memoryDelta,
4482
- executionTime,
4888
+ timestamp: executionStartTime,
4889
+ input: params, // Enhanced: add input parameters
4483
4890
  });
4484
- }
4485
- mcpLogger.debug(`[${functionTag}] Tool executed successfully`, {
4486
- toolName,
4487
- executionTime,
4488
- memoryDelta,
4489
- circuitBreakerState: circuitBreaker?.getState(),
4490
- });
4491
- // Emit tool end event using the helper method
4492
- this.emitToolEndEvent(toolName, executionStartTime, true, result);
4493
- return result;
4494
- }
4495
- catch (error) {
4496
- // Update failure metrics
4497
- if (metrics) {
4498
- metrics.failedExecutions++;
4499
- }
4500
- const executionTime = Date.now() - executionStartTime;
4501
- // Create structured error
4502
- let structuredError;
4503
- if (error instanceof NeuroLinkError) {
4504
- structuredError = error;
4505
- }
4506
- else if (error instanceof Error) {
4507
- // Categorize the error based on the message
4508
- if (error.message.includes("timeout")) {
4509
- structuredError = ErrorFactory.toolTimeout(toolName, finalOptions.timeout);
4891
+ // Set default options
4892
+ const finalOptions = {
4893
+ timeout: options?.timeout || TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS, // 30 second default timeout
4894
+ maxRetries: options?.maxRetries || RETRY_ATTEMPTS.DEFAULT, // Default 2 retries for retriable errors
4895
+ retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS, // 1 second delay between retries
4896
+ authContext: options?.authContext, // Pass through authentication context
4897
+ };
4898
+ // Track memory usage for tool execution
4899
+ const { MemoryManager } = await import("./utils/performance.js");
4900
+ const startMemory = MemoryManager.getMemoryUsageMB();
4901
+ // Get or create circuit breaker for this tool
4902
+ if (!this.toolCircuitBreakers.has(toolName)) {
4903
+ this.toolCircuitBreakers.set(toolName, new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS));
4510
4904
  }
4511
- else if (error.message.includes("not found")) {
4512
- const availableTools = await this.getAllAvailableTools();
4513
- structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((t) => ({ name: t.name }))));
4905
+ const circuitBreaker = this.toolCircuitBreakers.get(toolName);
4906
+ // Initialize metrics for this tool if not exists
4907
+ if (!this.toolExecutionMetrics.has(toolName)) {
4908
+ this.toolExecutionMetrics.set(toolName, {
4909
+ totalExecutions: 0,
4910
+ successfulExecutions: 0,
4911
+ failedExecutions: 0,
4912
+ averageExecutionTime: 0,
4913
+ lastExecutionTime: 0,
4914
+ });
4514
4915
  }
4515
- else if (error.message.includes("validation") ||
4516
- error.message.includes("parameter")) {
4517
- structuredError = ErrorFactory.invalidParameters(toolName, error, params);
4916
+ const metrics = this.toolExecutionMetrics.get(toolName);
4917
+ if (metrics) {
4918
+ metrics.totalExecutions++;
4518
4919
  }
4519
- else if (error.message.includes("network") ||
4520
- error.message.includes("connection")) {
4521
- structuredError = ErrorFactory.networkError(toolName, error);
4920
+ try {
4921
+ mcpLogger.debug(`[${functionTag}] Executing tool: ${toolName}`, {
4922
+ toolName,
4923
+ params,
4924
+ options: finalOptions,
4925
+ circuitBreakerState: circuitBreaker?.getState(),
4926
+ });
4927
+ // Execute with circuit breaker, timeout, and retry logic
4928
+ if (!circuitBreaker) {
4929
+ throw new Error(`Circuit breaker not initialized for tool: ${toolName}`);
4930
+ }
4931
+ const result = await circuitBreaker.execute(async () => {
4932
+ return await withRetry(async () => {
4933
+ return await withTimeout(this.executeToolInternal(toolName, params, finalOptions), finalOptions.timeout, ErrorFactory.toolTimeout(toolName, finalOptions.timeout));
4934
+ }, {
4935
+ maxAttempts: finalOptions.maxRetries + 1, // +1 for initial attempt
4936
+ delayMs: finalOptions.retryDelayMs,
4937
+ isRetriable: isRetriableError,
4938
+ onRetry: (attempt, error) => {
4939
+ mcpLogger.warn(`[${functionTag}] Retrying tool execution (attempt ${attempt})`, {
4940
+ toolName,
4941
+ error: error.message,
4942
+ attempt,
4943
+ });
4944
+ },
4945
+ });
4946
+ });
4947
+ // Update success metrics
4948
+ const executionTime = Date.now() - executionStartTime;
4949
+ if (metrics) {
4950
+ metrics.successfulExecutions++;
4951
+ metrics.lastExecutionTime = executionTime;
4952
+ metrics.averageExecutionTime =
4953
+ (metrics.averageExecutionTime *
4954
+ (metrics.successfulExecutions - 1) +
4955
+ executionTime) /
4956
+ metrics.successfulExecutions;
4957
+ }
4958
+ // Track memory usage
4959
+ const endMemory = MemoryManager.getMemoryUsageMB();
4960
+ const memoryDelta = endMemory.heapUsed - startMemory.heapUsed;
4961
+ if (memoryDelta > 20) {
4962
+ mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
4963
+ toolName,
4964
+ memoryDelta,
4965
+ executionTime,
4966
+ });
4967
+ }
4968
+ mcpLogger.debug(`[${functionTag}] Tool executed successfully`, {
4969
+ toolName,
4970
+ executionTime,
4971
+ memoryDelta,
4972
+ circuitBreakerState: circuitBreaker?.getState(),
4973
+ });
4974
+ // Emit tool end event using the helper method
4975
+ this.emitToolEndEvent(toolName, executionStartTime, true, result);
4976
+ // Set span success attributes
4977
+ // Check if result has isError flag (MCP tool error result)
4978
+ const isToolError = result &&
4979
+ typeof result === "object" &&
4980
+ "isError" in result &&
4981
+ result.isError === true;
4982
+ toolSpan.setAttribute("tool.result.status", isToolError ? "error" : "success");
4983
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
4984
+ return result;
4522
4985
  }
4523
- else {
4524
- structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
4986
+ catch (error) {
4987
+ // Update failure metrics
4988
+ if (metrics) {
4989
+ metrics.failedExecutions++;
4990
+ }
4991
+ const executionTime = Date.now() - executionStartTime;
4992
+ // Create structured error
4993
+ let structuredError;
4994
+ if (error instanceof NeuroLinkError) {
4995
+ structuredError = error;
4996
+ }
4997
+ else if (error instanceof Error) {
4998
+ // Categorize the error based on the message
4999
+ if (error.message.includes("timeout")) {
5000
+ structuredError = ErrorFactory.toolTimeout(toolName, finalOptions.timeout);
5001
+ }
5002
+ else if (error.message.includes("not found")) {
5003
+ const availableTools = await this.getAllAvailableTools();
5004
+ structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((t) => ({ name: t.name }))));
5005
+ }
5006
+ else if (error.message.includes("validation") ||
5007
+ error.message.includes("parameter")) {
5008
+ structuredError = ErrorFactory.invalidParameters(toolName, error, params);
5009
+ }
5010
+ else if (error.message.includes("network") ||
5011
+ error.message.includes("connection")) {
5012
+ structuredError = ErrorFactory.networkError(toolName, error);
5013
+ }
5014
+ else {
5015
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
5016
+ }
5017
+ }
5018
+ else {
5019
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
5020
+ }
5021
+ // ADD: Centralized error event emission
5022
+ this.emitter.emit("error", structuredError);
5023
+ // Emit tool end event using the helper method
5024
+ this.emitToolEndEvent(toolName, executionStartTime, false, undefined, structuredError);
5025
+ // Add execution context to structured error
5026
+ structuredError = new NeuroLinkError({
5027
+ ...structuredError,
5028
+ context: {
5029
+ ...structuredError.context,
5030
+ executionTime,
5031
+ params,
5032
+ options: finalOptions,
5033
+ circuitBreakerState: circuitBreaker?.getState(),
5034
+ circuitBreakerFailures: circuitBreaker?.getFailureCount(),
5035
+ metrics: { ...metrics },
5036
+ },
5037
+ });
5038
+ // Log structured error
5039
+ logStructuredError(structuredError);
5040
+ // Record error on span
5041
+ toolSpan.setAttribute("tool.result.status", "error");
5042
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
5043
+ toolSpan.recordException(structuredError);
5044
+ toolSpan.setStatus({
5045
+ code: SpanStatusCode.ERROR,
5046
+ message: structuredError.message,
5047
+ });
5048
+ throw structuredError;
4525
5049
  }
4526
5050
  }
4527
- else {
4528
- structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
5051
+ catch (outerError) {
5052
+ // If the error was not already recorded on the span (from inner catch), record it
5053
+ if (!(outerError instanceof NeuroLinkError)) {
5054
+ const errMsg = outerError instanceof Error
5055
+ ? outerError.message
5056
+ : String(outerError);
5057
+ toolSpan.recordException(outerError instanceof Error ? outerError : new Error(errMsg));
5058
+ toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
5059
+ }
5060
+ throw outerError;
4529
5061
  }
4530
- // ADD: Centralized error event emission
4531
- this.emitter.emit("error", structuredError);
4532
- // Emit tool end event using the helper method
4533
- this.emitToolEndEvent(toolName, executionStartTime, false, undefined, structuredError);
4534
- // Add execution context to structured error
4535
- structuredError = new NeuroLinkError({
4536
- ...structuredError,
4537
- context: {
4538
- ...structuredError.context,
4539
- executionTime,
4540
- params,
4541
- options: finalOptions,
4542
- circuitBreakerState: circuitBreaker?.getState(),
4543
- circuitBreakerFailures: circuitBreaker?.getFailureCount(),
4544
- metrics: { ...metrics },
4545
- },
4546
- });
4547
- // Log structured error
4548
- logStructuredError(structuredError);
4549
- throw structuredError;
4550
- }
5062
+ finally {
5063
+ toolSpan.end();
5064
+ }
5065
+ });
4551
5066
  }
4552
5067
  /**
4553
5068
  * Internal tool execution method (extracted for better error handling)
@@ -5934,6 +6449,7 @@ Current user's request: ${currentInput}`;
5934
6449
  try {
5935
6450
  logger.debug("[NeuroLink] Resetting initialization state...");
5936
6451
  this.mcpInitialized = false;
6452
+ this.mcpInitPromise = null;
5937
6453
  this.conversationMemoryNeedsInit = false;
5938
6454
  logger.debug("[NeuroLink] Initialization state reset successfully");
5939
6455
  }